devsquad 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devsquad-3.6.0.dist-info/METADATA +944 -0
- devsquad-3.6.0.dist-info/RECORD +95 -0
- devsquad-3.6.0.dist-info/WHEEL +5 -0
- devsquad-3.6.0.dist-info/entry_points.txt +2 -0
- devsquad-3.6.0.dist-info/licenses/LICENSE +21 -0
- devsquad-3.6.0.dist-info/top_level.txt +2 -0
- scripts/__init__.py +0 -0
- scripts/ai_semantic_matcher.py +512 -0
- scripts/alert_manager.py +505 -0
- scripts/api/__init__.py +43 -0
- scripts/api/models.py +386 -0
- scripts/api/routes/__init__.py +20 -0
- scripts/api/routes/dispatch.py +348 -0
- scripts/api/routes/lifecycle.py +330 -0
- scripts/api/routes/metrics_gates.py +347 -0
- scripts/api_server.py +318 -0
- scripts/auth.py +451 -0
- scripts/cli/__init__.py +1 -0
- scripts/cli/cli_visual.py +642 -0
- scripts/cli.py +1094 -0
- scripts/collaboration/__init__.py +212 -0
- scripts/collaboration/_version.py +1 -0
- scripts/collaboration/agent_briefing.py +656 -0
- scripts/collaboration/ai_semantic_matcher.py +260 -0
- scripts/collaboration/anchor_checker.py +281 -0
- scripts/collaboration/anti_rationalization.py +470 -0
- scripts/collaboration/async_integration_example.py +255 -0
- scripts/collaboration/batch_scheduler.py +149 -0
- scripts/collaboration/checkpoint_manager.py +561 -0
- scripts/collaboration/ci_feedback_adapter.py +351 -0
- scripts/collaboration/code_map_generator.py +247 -0
- scripts/collaboration/concern_pack_loader.py +352 -0
- scripts/collaboration/confidence_score.py +496 -0
- scripts/collaboration/config_loader.py +188 -0
- scripts/collaboration/consensus.py +244 -0
- scripts/collaboration/context_compressor.py +533 -0
- scripts/collaboration/coordinator.py +668 -0
- scripts/collaboration/dispatcher.py +1636 -0
- scripts/collaboration/dual_layer_context.py +128 -0
- scripts/collaboration/enhanced_worker.py +539 -0
- scripts/collaboration/feature_usage_tracker.py +206 -0
- scripts/collaboration/five_axis_consensus.py +334 -0
- scripts/collaboration/input_validator.py +401 -0
- scripts/collaboration/integration_example.py +287 -0
- scripts/collaboration/intent_workflow_mapper.py +350 -0
- scripts/collaboration/language_parsers.py +269 -0
- scripts/collaboration/lifecycle_protocol.py +1446 -0
- scripts/collaboration/llm_backend.py +453 -0
- scripts/collaboration/llm_cache.py +448 -0
- scripts/collaboration/llm_cache_async.py +347 -0
- scripts/collaboration/llm_retry.py +387 -0
- scripts/collaboration/llm_retry_async.py +389 -0
- scripts/collaboration/mce_adapter.py +597 -0
- scripts/collaboration/memory_bridge.py +1607 -0
- scripts/collaboration/models.py +537 -0
- scripts/collaboration/null_providers.py +297 -0
- scripts/collaboration/operation_classifier.py +289 -0
- scripts/collaboration/output_slicer.py +225 -0
- scripts/collaboration/performance_monitor.py +462 -0
- scripts/collaboration/permission_guard.py +865 -0
- scripts/collaboration/prompt_assembler.py +756 -0
- scripts/collaboration/prompt_variant_generator.py +483 -0
- scripts/collaboration/protocols.py +267 -0
- scripts/collaboration/report_formatter.py +352 -0
- scripts/collaboration/retrospective.py +279 -0
- scripts/collaboration/role_matcher.py +92 -0
- scripts/collaboration/role_template_market.py +352 -0
- scripts/collaboration/rule_collector.py +678 -0
- scripts/collaboration/scratchpad.py +346 -0
- scripts/collaboration/skill_registry.py +151 -0
- scripts/collaboration/skillifier.py +878 -0
- scripts/collaboration/standardized_role_template.py +317 -0
- scripts/collaboration/task_completion_checker.py +237 -0
- scripts/collaboration/test_quality_guard.py +695 -0
- scripts/collaboration/unified_gate_engine.py +598 -0
- scripts/collaboration/usage_tracker.py +309 -0
- scripts/collaboration/user_friendly_error.py +176 -0
- scripts/collaboration/verification_gate.py +312 -0
- scripts/collaboration/warmup_manager.py +635 -0
- scripts/collaboration/worker.py +513 -0
- scripts/collaboration/workflow_engine.py +684 -0
- scripts/dashboard.py +1088 -0
- scripts/generate_benchmark_report.py +786 -0
- scripts/history_manager.py +604 -0
- scripts/mcp_server.py +289 -0
- skills/__init__.py +32 -0
- skills/dispatch/handler.py +52 -0
- skills/intent/handler.py +59 -0
- skills/registry.py +67 -0
- skills/retrospective/__init__.py +0 -0
- skills/retrospective/handler.py +125 -0
- skills/review/handler.py +356 -0
- skills/security/handler.py +454 -0
- skills/test/__init__.py +0 -0
- skills/test/handler.py +78 -0
|
@@ -0,0 +1,1607 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
MemoryBridge - 记忆桥接系统
|
|
5
|
+
|
|
6
|
+
将协作系统(Coordinator/Skillifier/Scratchpad)与持久记忆层(memory-bank)连接,
|
|
7
|
+
实现跨会话的知识复用、经验捕获、反馈闭环和模式持久化。
|
|
8
|
+
|
|
9
|
+
核心能力:
|
|
10
|
+
- recall(): 任务前召回相关历史经验
|
|
11
|
+
- capture_execution(): 执行后自动捕获洞察
|
|
12
|
+
- record_feedback(): 用户反馈记录
|
|
13
|
+
- persist_pattern(): Skillifier 模式跨会话保留
|
|
14
|
+
- search_knowledge(): 知识库关键词搜索
|
|
15
|
+
- 生命周期: 遗忘曲线 / 自动压缩 / 清理
|
|
16
|
+
|
|
17
|
+
使用示例:
|
|
18
|
+
from collaboration.memory_bridge import MemoryBridge, MemoryConfig
|
|
19
|
+
|
|
20
|
+
bridge = MemoryBridge(config=MemoryConfig.default())
|
|
21
|
+
result = bridge.recall(MemoryQuery(query_text="微服务架构设计"))
|
|
22
|
+
for mem in result.memories:
|
|
23
|
+
print(f"[{mem.memory_type.value}] {mem.title}: {mem.content[:80]}")
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import os
|
|
27
|
+
import re
|
|
28
|
+
import json
|
|
29
|
+
import math
|
|
30
|
+
import time
|
|
31
|
+
import uuid
|
|
32
|
+
import threading
|
|
33
|
+
from enum import Enum
|
|
34
|
+
from abc import ABC, abstractmethod
|
|
35
|
+
from collections import Counter
|
|
36
|
+
from dataclasses import dataclass, field
|
|
37
|
+
from datetime import datetime, timedelta
|
|
38
|
+
from pathlib import Path
|
|
39
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class MemoryType(Enum):
|
|
43
|
+
KNOWLEDGE = "knowledge"
|
|
44
|
+
EPISODIC = "episodic"
|
|
45
|
+
SEMANTIC = "semantic"
|
|
46
|
+
FEEDBACK = "feedback"
|
|
47
|
+
PATTERN = "pattern"
|
|
48
|
+
ANALYSIS = "analysis"
|
|
49
|
+
CORRECTION = "correction"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class MemoryItem:
|
|
54
|
+
id: str
|
|
55
|
+
memory_type: MemoryType
|
|
56
|
+
title: str
|
|
57
|
+
content: str
|
|
58
|
+
domain: Optional[str] = None
|
|
59
|
+
tags: List[str] = field(default_factory=list)
|
|
60
|
+
source: str = ""
|
|
61
|
+
relevance_score: float = 0.0
|
|
62
|
+
created_at: datetime = field(default_factory=datetime.now)
|
|
63
|
+
last_accessed: datetime = field(default_factory=datetime.now)
|
|
64
|
+
access_count: int = 0
|
|
65
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def age_days(self) -> float:
|
|
69
|
+
return (datetime.now() - self.created_at).total_seconds() / 86400
|
|
70
|
+
|
|
71
|
+
def to_dict(self) -> Dict:
|
|
72
|
+
return {
|
|
73
|
+
"id": self.id,
|
|
74
|
+
"memory_type": self.memory_type.value,
|
|
75
|
+
"title": self.title,
|
|
76
|
+
"content": self.content,
|
|
77
|
+
"domain": self.domain,
|
|
78
|
+
"tags": self.tags,
|
|
79
|
+
"source": self.source,
|
|
80
|
+
"relevance_score": self.relevance_score,
|
|
81
|
+
"created_at": self.created_at.isoformat(),
|
|
82
|
+
"last_accessed": self.last_accessed.isoformat(),
|
|
83
|
+
"access_count": self.access_count,
|
|
84
|
+
"metadata": self.metadata,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def from_dict(cls, d: Dict) -> 'MemoryItem':
|
|
89
|
+
return cls(
|
|
90
|
+
id=d["id"],
|
|
91
|
+
memory_type=MemoryType(d["memory_type"]),
|
|
92
|
+
title=d["title"],
|
|
93
|
+
content=d["content"],
|
|
94
|
+
domain=d.get("domain"),
|
|
95
|
+
tags=d.get("tags", []),
|
|
96
|
+
source=d.get("source", ""),
|
|
97
|
+
relevance_score=d.get("relevance_score", 0.0),
|
|
98
|
+
created_at=datetime.fromisoformat(d["created_at"]) if isinstance(d.get("created_at"), str) else datetime.now(),
|
|
99
|
+
last_accessed=datetime.fromisoformat(d["last_accessed"]) if isinstance(d.get("last_accessed"), str) else datetime.now(),
|
|
100
|
+
access_count=d.get("access_count", 0),
|
|
101
|
+
metadata=d.get("metadata", {}),
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass
|
|
106
|
+
class MemoryQuery:
|
|
107
|
+
query_text: str = ""
|
|
108
|
+
domain: Optional[str] = None
|
|
109
|
+
memory_type: Optional[MemoryType] = None
|
|
110
|
+
limit: int = 5
|
|
111
|
+
min_relevance: float = 0.3
|
|
112
|
+
time_range: Optional[Tuple[datetime, datetime]] = None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@dataclass
|
|
116
|
+
class MemoryRecallResult:
|
|
117
|
+
memories: List[MemoryItem] = field(default_factory=list)
|
|
118
|
+
total_found: int = 0
|
|
119
|
+
query_time_ms: float = 0.0
|
|
120
|
+
hit_memory_types: Dict[str, int] = field(default_factory=dict)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@dataclass
|
|
124
|
+
class MemoryConfig:
|
|
125
|
+
enabled: bool = True
|
|
126
|
+
base_dir: Optional[str] = None
|
|
127
|
+
auto_capture: bool = True
|
|
128
|
+
auto_index: bool = True
|
|
129
|
+
max_episodic_memories: int = 1000
|
|
130
|
+
max_knowledge_items: int = 5000
|
|
131
|
+
index_rebuild_threshold: int = 50
|
|
132
|
+
relevance_threshold: float = 0.3
|
|
133
|
+
retention_days: int = 90
|
|
134
|
+
compress_old_memories: bool = True
|
|
135
|
+
enable_semantic_search: bool = False
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def default(cls) -> 'MemoryConfig':
|
|
139
|
+
return cls()
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def lightweight(cls) -> 'MemoryConfig':
|
|
143
|
+
return cls(auto_capture=False, auto_index=False,
|
|
144
|
+
max_episodic_memories=100)
|
|
145
|
+
|
|
146
|
+
@classmethod
|
|
147
|
+
def full(cls) -> 'MemoryConfig':
|
|
148
|
+
return cls(max_episodic_memories=5000,
|
|
149
|
+
max_knowledge_items=20000,
|
|
150
|
+
enable_semantic_search=True)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@dataclass
|
|
154
|
+
class MemoryStats:
|
|
155
|
+
total_memories: int = 0
|
|
156
|
+
by_type_counts: Dict[str, int] = field(default_factory=dict)
|
|
157
|
+
oldest_memory: Optional[str] = None
|
|
158
|
+
newest_memory: Optional[str] = None
|
|
159
|
+
storage_size_kb: float = 0.0
|
|
160
|
+
index_built: bool = False
|
|
161
|
+
last_index_time: Optional[str] = None
|
|
162
|
+
total_captures: int = 0
|
|
163
|
+
total_recalls: int = 0
|
|
164
|
+
claw_enabled: bool = False
|
|
165
|
+
claw_item_count: int = 0
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
@dataclass
|
|
169
|
+
class KnowledgeItem:
|
|
170
|
+
id: str
|
|
171
|
+
domain: str
|
|
172
|
+
title: str
|
|
173
|
+
content: str
|
|
174
|
+
tags: List[str] = field(default_factory=list)
|
|
175
|
+
created_at: str = ""
|
|
176
|
+
source: str = ""
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@dataclass
|
|
180
|
+
class UserFeedback:
|
|
181
|
+
id: str
|
|
182
|
+
user_id: str = "default"
|
|
183
|
+
feedback_type: str = "suggestion"
|
|
184
|
+
content: str = ""
|
|
185
|
+
rating: Optional[int] = None
|
|
186
|
+
context: Dict[str, Any] = field(default_factory=dict)
|
|
187
|
+
created_at: str = ""
|
|
188
|
+
status: str = "pending"
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@dataclass
|
|
192
|
+
class EpisodicMemory:
|
|
193
|
+
id: str
|
|
194
|
+
task_description: str
|
|
195
|
+
finding: str
|
|
196
|
+
worker_id: str = ""
|
|
197
|
+
confidence: float = 0.0
|
|
198
|
+
tags: List[str] = field(default_factory=list)
|
|
199
|
+
created_at: str = ""
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@dataclass
|
|
203
|
+
class PersistedPattern:
|
|
204
|
+
id: str
|
|
205
|
+
name: str
|
|
206
|
+
slug: str
|
|
207
|
+
category: str
|
|
208
|
+
trigger_keywords: List[str] = field(default_factory=list)
|
|
209
|
+
steps_template: List[Dict] = field(default_factory=list)
|
|
210
|
+
confidence: float = 0.0
|
|
211
|
+
quality_score: float = 0.0
|
|
212
|
+
created_at: str = ""
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
@dataclass
|
|
216
|
+
class AnalysisCase:
|
|
217
|
+
id: str
|
|
218
|
+
problem: str
|
|
219
|
+
context: Dict[str, Any] = field(default_factory=dict)
|
|
220
|
+
root_cause: str = ""
|
|
221
|
+
solutions: List[str] = field(default_factory=list)
|
|
222
|
+
status: str = "completed"
|
|
223
|
+
created_at: str = ""
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
@dataclass
|
|
227
|
+
class ErrorContext:
|
|
228
|
+
error_message: str
|
|
229
|
+
task_description: str = ""
|
|
230
|
+
worker_id: str = ""
|
|
231
|
+
stack_trace: str = ""
|
|
232
|
+
timestamp: str = ""
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class WorkBuddyClawSource:
|
|
236
|
+
"""
|
|
237
|
+
Read-only bridge for WorkBuddy (Claw) memory data source.
|
|
238
|
+
|
|
239
|
+
Reads structured memory files from /Users/lin/WorkBuddy/Claw/.memory/
|
|
240
|
+
and .workbuddy/memory/ directories, converting them into standard
|
|
241
|
+
MemoryItem lists.
|
|
242
|
+
|
|
243
|
+
Data mapping rules:
|
|
244
|
+
.memory/SOUL.md -> MemoryType.SEMANTIC (personality matrix)
|
|
245
|
+
.memory/USER.md -> MemoryType.KNOWLEDGE (user profile)
|
|
246
|
+
.memory/MEMORY.md -> MemoryType.KNOWLEDGE (core knowledge)
|
|
247
|
+
.memory/INDEX.md -> used for retrieval acceleration (not returned directly)
|
|
248
|
+
.memory/PROMPT.md -> MemoryType.PATTERN (prompt optimization rules)
|
|
249
|
+
.memory/EXP.md -> MemoryType.EPISODIC (experience system)
|
|
250
|
+
|
|
251
|
+
Design constraints:
|
|
252
|
+
- Read-only access, never writes to Claw directory
|
|
253
|
+
- Path hardcoded to /Users/lin/WorkBuddy/Claw (overridable via constructor)
|
|
254
|
+
- Caches INDEX.md parsing results to avoid repeated IO
|
|
255
|
+
- All exceptions caught internally, never affects main flow
|
|
256
|
+
"""
|
|
257
|
+
|
|
258
|
+
CLAW_BASE_PATH = os.environ.get("WORKBUDDY_CLAW_PATH", "/Users/lin/WorkBuddy/Claw")
|
|
259
|
+
MEMORY_DIR = ".memory"
|
|
260
|
+
WORKBUDDY_MEMORY_DIR = ".workbuddy/memory"
|
|
261
|
+
|
|
262
|
+
CORE_FILE_MAPPING = {
|
|
263
|
+
"SOUL.md": ("AI Personality Matrix (OCEAN model)", MemoryType.SEMANTIC),
|
|
264
|
+
"USER.md": ("User Profile (background/preferences/channels)", MemoryType.KNOWLEDGE),
|
|
265
|
+
"MEMORY.md": ("Core Knowledge Base (lessons/decisions)", MemoryType.KNOWLEDGE),
|
|
266
|
+
"EXP.md": ("Experience System", MemoryType.EPISODIC),
|
|
267
|
+
"PROMPT.md": ("Prompt Optimization Rules", MemoryType.PATTERN),
|
|
268
|
+
"HEALTH.md": ("Health Monitoring Status", MemoryType.SEMANTIC),
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
def __init__(self, base_path: Optional[str] = None):
|
|
272
|
+
"""
|
|
273
|
+
Initialize the Claw source with optional custom base path.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
base_path: Custom path to Claw directory. Defaults to CLAW_BASE_PATH.
|
|
277
|
+
"""
|
|
278
|
+
self.base_path = Path(base_path or self.CLAW_BASE_PATH)
|
|
279
|
+
self._memory_dir = self.base_path / self.MEMORY_DIR
|
|
280
|
+
self._wb_memory_dir = self.base_path / self.WORKBUDDY_MEMORY_DIR
|
|
281
|
+
self._index_cache: Optional[Dict[str, List[str]]] = None
|
|
282
|
+
|
|
283
|
+
@property
|
|
284
|
+
def is_available(self) -> bool:
|
|
285
|
+
"""Check if the Claw directory exists and is accessible."""
|
|
286
|
+
return self.base_path.exists() and self._memory_dir.exists()
|
|
287
|
+
|
|
288
|
+
def load_all_memories(self) -> List[MemoryItem]:
|
|
289
|
+
"""
|
|
290
|
+
Load all available memories from Claw directories.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
List[MemoryItem]: Combined list of core + daily memories,
|
|
294
|
+
each tagged with source='workbuddy-claw'.
|
|
295
|
+
"""
|
|
296
|
+
items = []
|
|
297
|
+
if not self.is_available:
|
|
298
|
+
return items
|
|
299
|
+
items.extend(self._load_core_memories())
|
|
300
|
+
items.extend(self._load_workbuddy_daily_memories())
|
|
301
|
+
for item in items:
|
|
302
|
+
item.source = "workbuddy-claw"
|
|
303
|
+
return items
|
|
304
|
+
|
|
305
|
+
def _load_core_memories(self) -> List[MemoryItem]:
|
|
306
|
+
"""Load core memory files from .memory/ directory."""
|
|
307
|
+
items = []
|
|
308
|
+
for filename, (title, mtype) in self.CORE_FILE_MAPPING.items():
|
|
309
|
+
filepath = self._memory_dir / filename
|
|
310
|
+
if filepath.exists():
|
|
311
|
+
content = filepath.read_text(encoding="utf-8")
|
|
312
|
+
items.append(MemoryItem(
|
|
313
|
+
id=f"wb-core-{filename.replace('.md', '')}",
|
|
314
|
+
memory_type=mtype,
|
|
315
|
+
title=title,
|
|
316
|
+
content=content,
|
|
317
|
+
domain="user-profile" if "USER" in filename else "claw-core",
|
|
318
|
+
tags=self._extract_tags(content),
|
|
319
|
+
source="workbuddy-claw",
|
|
320
|
+
))
|
|
321
|
+
return items
|
|
322
|
+
|
|
323
|
+
def _load_workbuddy_daily_memories(self) -> List[MemoryItem]:
|
|
324
|
+
"""Load daily work memories from .workbuddy/memory/ directory."""
|
|
325
|
+
items = []
|
|
326
|
+
if not self._wb_memory_dir.exists():
|
|
327
|
+
return items
|
|
328
|
+
|
|
329
|
+
md_files = sorted(
|
|
330
|
+
self._wb_memory_dir.glob("2026-*.md"),
|
|
331
|
+
key=lambda p: p.name,
|
|
332
|
+
reverse=True,
|
|
333
|
+
)
|
|
334
|
+
for filepath in md_files[:30]:
|
|
335
|
+
date_str = filepath.stem
|
|
336
|
+
content = filepath.read_text(encoding="utf-8")
|
|
337
|
+
items.append(MemoryItem(
|
|
338
|
+
id=f"wb-daily-{date_str}",
|
|
339
|
+
memory_type=MemoryType.EPISODIC,
|
|
340
|
+
title=f"Work Log {date_str}",
|
|
341
|
+
content=content,
|
|
342
|
+
domain="daily-log",
|
|
343
|
+
tags=["workbuddy", "daily", date_str] + self._extract_tags(content),
|
|
344
|
+
source="workbuddy-claw",
|
|
345
|
+
))
|
|
346
|
+
return items
|
|
347
|
+
|
|
348
|
+
def search_by_index(self, query: str, limit: int = 5) -> List[MemoryItem]:
|
|
349
|
+
"""
|
|
350
|
+
Fast search using Claw INDEX.md keyword inverted index.
|
|
351
|
+
|
|
352
|
+
INDEX.md format example:
|
|
353
|
+
| Keyword | Location |
|
|
354
|
+
| Fudan/Education | USER.md#Background |
|
|
355
|
+
| QQ/WeChat | USER.md#Channels |
|
|
356
|
+
|
|
357
|
+
Performance:
|
|
358
|
+
- Index hit: O(1) lookup + 1 file read
|
|
359
|
+
- Index miss: fallback to full-text scan
|
|
360
|
+
|
|
361
|
+
Args:
|
|
362
|
+
query: Search query string.
|
|
363
|
+
limit: Maximum number of results to return.
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
List[MemoryItem]: Matched memories sorted by relevance.
|
|
367
|
+
"""
|
|
368
|
+
index_path = self._memory_dir / "INDEX.md"
|
|
369
|
+
if not index_path.exists():
|
|
370
|
+
return self._fallback_search(query, limit)
|
|
371
|
+
|
|
372
|
+
if self._index_cache is None:
|
|
373
|
+
self._index_cache = self._parse_index(index_path)
|
|
374
|
+
|
|
375
|
+
query_tokens = set(query.lower().split())
|
|
376
|
+
matched_files = set()
|
|
377
|
+
for token in query_tokens:
|
|
378
|
+
if token in self._index_cache:
|
|
379
|
+
for entry in self._index_cache[token]:
|
|
380
|
+
matched_files.add(entry)
|
|
381
|
+
|
|
382
|
+
results = []
|
|
383
|
+
for file_ref in list(matched_files)[:limit]:
|
|
384
|
+
item = self._load_memory_by_index_ref(file_ref)
|
|
385
|
+
if item:
|
|
386
|
+
results.append(item)
|
|
387
|
+
return results
|
|
388
|
+
|
|
389
|
+
def _parse_index(self, index_path: Path) -> Dict[str, List[str]]:
|
|
390
|
+
"""
|
|
391
|
+
Parse INDEX.md table into {keyword: [file_ref]} dictionary.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
index_path: Path to INDEX.md file.
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
Dict mapping lowercase keywords to lists of file references.
|
|
398
|
+
"""
|
|
399
|
+
result: Dict[str, List[str]] = {}
|
|
400
|
+
lines = index_path.read_text(encoding="utf-8").splitlines()
|
|
401
|
+
for line in lines:
|
|
402
|
+
line = line.strip()
|
|
403
|
+
if not line or line.startswith("#") or line.startswith("|---"):
|
|
404
|
+
continue
|
|
405
|
+
if line.startswith("|"):
|
|
406
|
+
parts = [p.strip() for p in line.split("|") if p.strip()]
|
|
407
|
+
if len(parts) >= 2 and parts[0] and parts[0] != "\u5173\u952e\u8bcd":
|
|
408
|
+
keywords = parts[0]
|
|
409
|
+
file_ref = parts[1] if len(parts) > 1 else ""
|
|
410
|
+
if file_ref and file_ref != "\u4f4d\u7f6e":
|
|
411
|
+
for kw in keywords.split("/"):
|
|
412
|
+
kw = kw.strip().lower()
|
|
413
|
+
if kw:
|
|
414
|
+
result.setdefault(kw, []).append(file_ref)
|
|
415
|
+
return result
|
|
416
|
+
|
|
417
|
+
def _load_memory_by_index_ref(self, ref: str) -> Optional[MemoryItem]:
|
|
418
|
+
"""
|
|
419
|
+
Load a memory fragment based on an INDEX reference.
|
|
420
|
+
|
|
421
|
+
Supports both 'filename.md' and 'filename.md#section' formats.
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
ref: File reference from INDEX.md (e.g., USER.md#Background).
|
|
425
|
+
|
|
426
|
+
Returns:
|
|
427
|
+
MemoryItem for the referenced content, or None if not found.
|
|
428
|
+
"""
|
|
429
|
+
if "#" in ref:
|
|
430
|
+
filename, section = ref.split("#", 1)
|
|
431
|
+
else:
|
|
432
|
+
filename, section = ref, None
|
|
433
|
+
|
|
434
|
+
filepath = self._memory_dir / filename
|
|
435
|
+
if not filepath.exists():
|
|
436
|
+
return None
|
|
437
|
+
|
|
438
|
+
content = filepath.read_text(encoding="utf-8")
|
|
439
|
+
if section:
|
|
440
|
+
extracted = self._extract_section(content, section)
|
|
441
|
+
content = extracted if extracted is not None else content[:500]
|
|
442
|
+
|
|
443
|
+
type_map = {
|
|
444
|
+
"SOUL": MemoryType.SEMANTIC,
|
|
445
|
+
"USER": MemoryType.KNOWLEDGE,
|
|
446
|
+
"MEMORY": MemoryType.KNOWLEDGE,
|
|
447
|
+
"EXP": MemoryType.EPISODIC,
|
|
448
|
+
"PROMPT": MemoryType.PATTERN,
|
|
449
|
+
}
|
|
450
|
+
mtype = next((t for k, t in type_map.items() if k in filename.upper()), MemoryType.KNOWLEDGE)
|
|
451
|
+
|
|
452
|
+
return MemoryItem(
|
|
453
|
+
id=f"wb-index-{filename.replace('.md', '').replace('/', '-')}",
|
|
454
|
+
memory_type=mtype,
|
|
455
|
+
title=f"[Claw] {ref}",
|
|
456
|
+
content=content,
|
|
457
|
+
source="workbuddy-claw",
|
|
458
|
+
relevance_score=0.9,
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
@staticmethod
|
|
462
|
+
def _extract_section(content: str, anchor: str) -> Optional[str]:
|
|
463
|
+
"""
|
|
464
|
+
Extract a markdown section by its heading anchor text.
|
|
465
|
+
|
|
466
|
+
Args:
|
|
467
|
+
content: Full markdown text to search in.
|
|
468
|
+
anchor: Section heading text to find.
|
|
469
|
+
|
|
470
|
+
Returns:
|
|
471
|
+
Extracted section text, or None if anchor not found.
|
|
472
|
+
"""
|
|
473
|
+
pattern = rf'(?:^|\n)#+\s*.*{re.escape(anchor)}'
|
|
474
|
+
match = re.search(pattern, content, re.MULTILINE | re.IGNORECASE)
|
|
475
|
+
if not match:
|
|
476
|
+
return None
|
|
477
|
+
start = match.start()
|
|
478
|
+
next_heading = re.search(r'\n#+\s+', content[start + 1:])
|
|
479
|
+
end = (next_heading.start() + start + 1) if next_heading else len(content)
|
|
480
|
+
return content[start:end].strip()
|
|
481
|
+
|
|
482
|
+
@staticmethod
|
|
483
|
+
def _extract_tags(text: str) -> List[str]:
|
|
484
|
+
"""
|
|
485
|
+
Extract meaningful words as tags from text content.
|
|
486
|
+
|
|
487
|
+
Extracts Chinese words (>=2 chars) and English words (>=3 chars).
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
text: Source text to extract tags from.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
List of unique tag strings (max 15).
|
|
494
|
+
"""
|
|
495
|
+
words = re.findall(r'[\u4e00-\u9fff]{2,}|[a-zA-Z]{3,}', text)
|
|
496
|
+
return list(set(words))[:15]
|
|
497
|
+
|
|
498
|
+
def _fallback_search(self, query: str, limit: int = 5) -> List[MemoryItem]:
|
|
499
|
+
"""
|
|
500
|
+
Fallback full-text search when INDEX.md is unavailable.
|
|
501
|
+
|
|
502
|
+
Scores results by title match (+0.5), content match (+0.3),
|
|
503
|
+
and tag overlap (+0.2).
|
|
504
|
+
|
|
505
|
+
Args:
|
|
506
|
+
query: Search query string.
|
|
507
|
+
limit: Maximum results.
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
Scored and ranked MemoryItem list.
|
|
511
|
+
"""
|
|
512
|
+
all_items = self.load_all_memories()
|
|
513
|
+
query_lower = query.lower()
|
|
514
|
+
scored = []
|
|
515
|
+
for item in all_items:
|
|
516
|
+
score = 0.0
|
|
517
|
+
if query_lower in item.title.lower():
|
|
518
|
+
score += 0.5
|
|
519
|
+
if query_lower in item.content.lower():
|
|
520
|
+
score += 0.3
|
|
521
|
+
if any(q in t.lower() for q in query_lower.split() for t in item.tags):
|
|
522
|
+
score += 0.2
|
|
523
|
+
if score > 0:
|
|
524
|
+
item.relevance_score = min(score, 1.0)
|
|
525
|
+
scored.append(item)
|
|
526
|
+
scored.sort(key=lambda x: x.relevance_score, reverse=True)
|
|
527
|
+
return scored[:limit]
|
|
528
|
+
|
|
529
|
+
# ========== Plan B: Automation News Feed Consumer ==========
|
|
530
|
+
|
|
531
|
+
def get_latest_ai_news(self, days: int = 7) -> List[MemoryItem]:
|
|
532
|
+
"""
|
|
533
|
+
Read daily AI news automation task execution records.
|
|
534
|
+
|
|
535
|
+
Data source: .codebuddy/automations/ai/memory.md
|
|
536
|
+
Returns: Recent N days of news entries, each date block as a MemoryItem.
|
|
537
|
+
|
|
538
|
+
Each MemoryItem.metadata contains:
|
|
539
|
+
- sources: List of information sources
|
|
540
|
+
- topics: List of core topics
|
|
541
|
+
- status: Execution status string
|
|
542
|
+
|
|
543
|
+
Args:
|
|
544
|
+
days: Number of days to look back (default 7).
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
List of MemoryItems representing AI news entries.
|
|
548
|
+
"""
|
|
549
|
+
ai_memory_path = self.base_path / ".codebuddy" / "automations" / "ai" / "memory.md"
|
|
550
|
+
if not ai_memory_path.exists():
|
|
551
|
+
return []
|
|
552
|
+
|
|
553
|
+
content = ai_memory_path.read_text(encoding="utf-8")
|
|
554
|
+
entries = self._parse_automation_log(content)
|
|
555
|
+
|
|
556
|
+
items = []
|
|
557
|
+
cutoff = datetime.now() - timedelta(days=days)
|
|
558
|
+
for entry in entries:
|
|
559
|
+
if entry["date"] >= cutoff:
|
|
560
|
+
items.append(MemoryItem(
|
|
561
|
+
id=f"wb-news-{entry['date'].strftime('%Y%m%d')}",
|
|
562
|
+
memory_type=MemoryType.EPISODIC,
|
|
563
|
+
title=f"AI News {entry['date'].strftime('%Y-%m-%d')}",
|
|
564
|
+
content=entry["content"],
|
|
565
|
+
domain="ai-news",
|
|
566
|
+
tags=["ai-news", "daily-push", "automation"] + self._extract_tags(entry["content"]),
|
|
567
|
+
source="workbuddy-claw-automation",
|
|
568
|
+
metadata={
|
|
569
|
+
"sources": entry.get("sources", []),
|
|
570
|
+
"core_topics": entry.get("topics", []),
|
|
571
|
+
"status": entry.get("status", ""),
|
|
572
|
+
},
|
|
573
|
+
))
|
|
574
|
+
return items
|
|
575
|
+
|
|
576
|
+
def _parse_automation_log(self, content: str) -> List[Dict]:
|
|
577
|
+
"""
|
|
578
|
+
Parse automation memory.md log format into structured entries.
|
|
579
|
+
|
|
580
|
+
Input format:
|
|
581
|
+
## YYYY-MM-DD HH:MM
|
|
582
|
+
**Status**: Success
|
|
583
|
+
**Sources**: source1, source2
|
|
584
|
+
**Push Count**: N
|
|
585
|
+
**Core Topics**:
|
|
586
|
+
- topic1
|
|
587
|
+
- topic2
|
|
588
|
+
**Notes**: additional notes
|
|
589
|
+
|
|
590
|
+
Output:
|
|
591
|
+
[{date: datetime, content: str, sources: [], topics: [], status: str}, ...]
|
|
592
|
+
|
|
593
|
+
Args:
|
|
594
|
+
content: Raw markdown content from automation memory.md.
|
|
595
|
+
|
|
596
|
+
Returns:
|
|
597
|
+
List of parsed entry dictionaries.
|
|
598
|
+
"""
|
|
599
|
+
entries = []
|
|
600
|
+
date_pattern = re.compile(r'^## (\d{4}-\d{2}-\d{2})')
|
|
601
|
+
current_entry = None
|
|
602
|
+
|
|
603
|
+
for line in content.splitlines():
|
|
604
|
+
date_match = date_pattern.match(line)
|
|
605
|
+
if date_match:
|
|
606
|
+
if current_entry:
|
|
607
|
+
entries.append(current_entry)
|
|
608
|
+
try:
|
|
609
|
+
current_entry = {
|
|
610
|
+
"date": datetime.strptime(date_match.group(1), "%Y-%m-%d"),
|
|
611
|
+
"content": "",
|
|
612
|
+
"sources": [],
|
|
613
|
+
"topics": [],
|
|
614
|
+
"status": "",
|
|
615
|
+
}
|
|
616
|
+
except ValueError:
|
|
617
|
+
continue
|
|
618
|
+
elif current_entry is not None:
|
|
619
|
+
current_entry["content"] += line + "\n"
|
|
620
|
+
|
|
621
|
+
src_match = re.match(r'\*\*\u4fe1\u606f\u6765\u6e90\*\*:\s*(.+)', line)
|
|
622
|
+
if src_match:
|
|
623
|
+
current_entry["sources"].append(src_match.group(1))
|
|
624
|
+
|
|
625
|
+
topics_match = re.match(r'\*\*\u6838\u5fc3\u4e3b\u9898\*\*:\s*(.+)', line)
|
|
626
|
+
if topics_match:
|
|
627
|
+
current_entry["topics"].append(topics_match.group(1))
|
|
628
|
+
|
|
629
|
+
status_match = re.match(r'\*\*\u6267\u884c\u72b6\u6001\*\*:\s*(\S+)', line)
|
|
630
|
+
if status_match:
|
|
631
|
+
current_entry["status"] = status_match.group(1)
|
|
632
|
+
|
|
633
|
+
if current_entry:
|
|
634
|
+
entries.append(current_entry)
|
|
635
|
+
|
|
636
|
+
return entries
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
class MemoryStore(ABC):
|
|
640
|
+
@abstractmethod
|
|
641
|
+
def save(self, memory_type: MemoryType, data: Dict) -> str:
|
|
642
|
+
pass
|
|
643
|
+
|
|
644
|
+
@abstractmethod
|
|
645
|
+
def load(self, memory_type: MemoryType, item_id: str) -> Optional[Dict]:
|
|
646
|
+
pass
|
|
647
|
+
|
|
648
|
+
@abstractmethod
|
|
649
|
+
def list_all(self, memory_type: MemoryType,
|
|
650
|
+
filters: Optional[Dict] = None) -> List[Dict]:
|
|
651
|
+
pass
|
|
652
|
+
|
|
653
|
+
@abstractmethod
|
|
654
|
+
def delete(self, memory_type: MemoryType, item_id: str) -> bool:
|
|
655
|
+
pass
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
class JsonMemoryStore(MemoryStore):
|
|
659
|
+
def __init__(self, base_dir: str):
|
|
660
|
+
self.base_dir = Path(base_dir)
|
|
661
|
+
self._lock = threading.RLock()
|
|
662
|
+
self._type_dirs = {
|
|
663
|
+
MemoryType.KNOWLEDGE: self.base_dir / "knowledge_base" / "domains",
|
|
664
|
+
MemoryType.FEEDBACK: self.base_dir / "user_experience" / "feedback",
|
|
665
|
+
MemoryType.PATTERN: self.base_dir / "persisted_patterns",
|
|
666
|
+
MemoryType.ANALYSIS: self.base_dir / "analysis_cases",
|
|
667
|
+
MemoryType.EPISODIC: self.base_dir / "episodic",
|
|
668
|
+
MemoryType.SEMANTIC: self.base_dir / "semantic",
|
|
669
|
+
MemoryType.CORRECTION: self.base_dir / "corrections",
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
def _get_file_path(self, mtype: MemoryType, item_id: str) -> Path:
|
|
673
|
+
if '..' in item_id or '/' in item_id or '\\' in item_id:
|
|
674
|
+
raise ValueError(f"Invalid item_id (path traversal): {item_id}")
|
|
675
|
+
dir_path = self._type_dirs.get(mtype, self.base_dir / "other")
|
|
676
|
+
if mtype == MemoryType.KNOWLEDGE:
|
|
677
|
+
domain = "general"
|
|
678
|
+
path = dir_path / domain / f"{item_id}.json"
|
|
679
|
+
else:
|
|
680
|
+
path = dir_path / f"{item_id}.json"
|
|
681
|
+
if not path.resolve().is_relative_to(self.base_dir.resolve()):
|
|
682
|
+
raise ValueError(f"Path traversal detected: {item_id}")
|
|
683
|
+
return path
|
|
684
|
+
|
|
685
|
+
def save(self, memory_type: MemoryType, data: Dict) -> str:
|
|
686
|
+
item_id = data.get("id", f"{memory_type.value}_{uuid.uuid4().hex[:12]}_{int(time.time())}")
|
|
687
|
+
file_path = self._get_file_path(memory_type, item_id)
|
|
688
|
+
with self._lock:
|
|
689
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
690
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
691
|
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
692
|
+
return item_id
|
|
693
|
+
|
|
694
|
+
def load(self, memory_type: MemoryType, item_id: str) -> Optional[Dict]:
|
|
695
|
+
file_path = self._get_file_path(memory_type, item_id)
|
|
696
|
+
with self._lock:
|
|
697
|
+
if not file_path.exists():
|
|
698
|
+
return None
|
|
699
|
+
try:
|
|
700
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
701
|
+
return json.load(f)
|
|
702
|
+
except (json.JSONDecodeError, IOError):
|
|
703
|
+
return None
|
|
704
|
+
|
|
705
|
+
def list_all(self, memory_type: MemoryType,
|
|
706
|
+
filters: Optional[Dict] = None) -> List[Dict]:
|
|
707
|
+
results = []
|
|
708
|
+
dir_path = self._type_dirs.get(memory_type, self.base_dir / "other")
|
|
709
|
+
with self._lock:
|
|
710
|
+
if not dir_path.exists():
|
|
711
|
+
return results
|
|
712
|
+
pattern = "**/*.json"
|
|
713
|
+
for json_file in sorted(dir_path.glob(pattern)):
|
|
714
|
+
try:
|
|
715
|
+
with open(json_file, "r", encoding="utf-8") as f:
|
|
716
|
+
data = json.load(f)
|
|
717
|
+
if filters:
|
|
718
|
+
match = True
|
|
719
|
+
for k, v in filters.items():
|
|
720
|
+
if data.get(k) != v:
|
|
721
|
+
match = False
|
|
722
|
+
break
|
|
723
|
+
if not match:
|
|
724
|
+
continue
|
|
725
|
+
results.append(data)
|
|
726
|
+
except (json.JSONDecodeError, IOError):
|
|
727
|
+
continue
|
|
728
|
+
return results
|
|
729
|
+
|
|
730
|
+
def delete(self, memory_type: MemoryType, item_id: str) -> bool:
|
|
731
|
+
file_path = self._get_file_path(memory_type, item_id)
|
|
732
|
+
with self._lock:
|
|
733
|
+
if file_path.exists():
|
|
734
|
+
file_path.unlink()
|
|
735
|
+
return True
|
|
736
|
+
return False
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
class MemoryIndexer:
|
|
740
|
+
def __init__(self):
|
|
741
|
+
self._inverted_index: Dict[str, set] = {}
|
|
742
|
+
self._domain_index: Dict[str, set] = {}
|
|
743
|
+
self._tag_index: Dict[str, set] = {}
|
|
744
|
+
self._type_index: Dict[MemoryType, set] = {}
|
|
745
|
+
self._tf_cache: Dict[str, Counter] = {}
|
|
746
|
+
self._items_cache: Dict[str, MemoryItem] = {}
|
|
747
|
+
self._index_built: bool = False
|
|
748
|
+
self._write_count: int = 0
|
|
749
|
+
self._lock = threading.RLock()
|
|
750
|
+
self._doc_count: int = 0
|
|
751
|
+
|
|
752
|
+
def build_index(self, items: List[MemoryItem]) -> None:
|
|
753
|
+
with self._lock:
|
|
754
|
+
self._inverted_index.clear()
|
|
755
|
+
self._domain_index.clear()
|
|
756
|
+
self._tag_index.clear()
|
|
757
|
+
self._type_index.clear()
|
|
758
|
+
self._tf_cache.clear()
|
|
759
|
+
self._items_cache.clear()
|
|
760
|
+
self._doc_count = 0
|
|
761
|
+
for item in items:
|
|
762
|
+
self._add_to_index_internal(item)
|
|
763
|
+
self._index_built = True
|
|
764
|
+
|
|
765
|
+
def add_to_index(self, item: MemoryItem) -> None:
|
|
766
|
+
with self._lock:
|
|
767
|
+
self._add_to_index_internal(item)
|
|
768
|
+
self._write_count += 1
|
|
769
|
+
if self._write_count >= 50 and not self._index_built:
|
|
770
|
+
pass
|
|
771
|
+
|
|
772
|
+
def _add_to_index_internal(self, item: MemoryItem) -> None:
|
|
773
|
+
mid = item.id
|
|
774
|
+
self._items_cache[mid] = item
|
|
775
|
+
self._doc_count += 1
|
|
776
|
+
tokens = self._tokenize(item.title + " " + item.content)
|
|
777
|
+
self._tf_cache[mid] = Counter(tokens)
|
|
778
|
+
for token in set(tokens):
|
|
779
|
+
self._inverted_index.setdefault(token, set()).add(mid)
|
|
780
|
+
if item.domain:
|
|
781
|
+
self._domain_index.setdefault(item.domain, set()).add(mid)
|
|
782
|
+
for tag in item.tags:
|
|
783
|
+
self._tag_index.setdefault(tag, set()).add(mid)
|
|
784
|
+
self._type_index.setdefault(item.memory_type, set()).add(mid)
|
|
785
|
+
|
|
786
|
+
def remove_from_index(self, memory_id: str) -> None:
|
|
787
|
+
with self._lock:
|
|
788
|
+
item = self._items_cache.pop(memory_id, None)
|
|
789
|
+
if item is None:
|
|
790
|
+
return
|
|
791
|
+
self._doc_count -= 1
|
|
792
|
+
tokens = self._tokenize(item.title + " " + item.content)
|
|
793
|
+
for token in set(tokens):
|
|
794
|
+
ids = self._inverted_index.get(token)
|
|
795
|
+
if ids:
|
|
796
|
+
ids.discard(memory_id)
|
|
797
|
+
if not ids:
|
|
798
|
+
del self._inverted_index[token]
|
|
799
|
+
if item.domain:
|
|
800
|
+
ids = self._domain_index.get(item.domain)
|
|
801
|
+
if ids:
|
|
802
|
+
ids.discard(memory_id)
|
|
803
|
+
for tag in item.tags:
|
|
804
|
+
ids = self._tag_index.get(tag)
|
|
805
|
+
if ids:
|
|
806
|
+
ids.discard(memory_id)
|
|
807
|
+
type_set = self._type_index.get(item.memory_type)
|
|
808
|
+
if type_set:
|
|
809
|
+
type_set.discard(memory_id)
|
|
810
|
+
self._tf_cache.pop(memory_id, None)
|
|
811
|
+
|
|
812
|
+
def search(self, query_text: str,
|
|
813
|
+
type_filter: Optional[MemoryType] = None,
|
|
814
|
+
domain_filter: Optional[str] = None,
|
|
815
|
+
limit: int = 10) -> List[Tuple[str, float]]:
|
|
816
|
+
with self._lock:
|
|
817
|
+
if not self._index_built or not self._inverted_index:
|
|
818
|
+
return []
|
|
819
|
+
query_tokens = self._tokenize(query_text)
|
|
820
|
+
candidates: Dict[str, float] = {}
|
|
821
|
+
for token in query_tokens:
|
|
822
|
+
ids = self._inverted_index.get(token)
|
|
823
|
+
if ids:
|
|
824
|
+
for doc_id in ids:
|
|
825
|
+
candidates[doc_id] = candidates.get(doc_id, 0) + 1
|
|
826
|
+
if type_filter:
|
|
827
|
+
type_ids = self._type_index.get(type_filter, set())
|
|
828
|
+
candidates = {k: v for k, v in candidates.items() if k in type_ids}
|
|
829
|
+
if domain_filter:
|
|
830
|
+
dom_ids = self._domain_index.get(domain_filter, set())
|
|
831
|
+
candidates = {k: v for k, v in candidates.items() if k in dom_ids}
|
|
832
|
+
results = []
|
|
833
|
+
for doc_id, raw_score in candidates.items():
|
|
834
|
+
tfidf_score = self._compute_relevance(query_tokens, doc_id)
|
|
835
|
+
results.append((doc_id, tfidf_score))
|
|
836
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
|
837
|
+
return results[:limit]
|
|
838
|
+
|
|
839
|
+
def keyword_search(self, keywords: List[str],
|
|
840
|
+
domain: Optional[str] = None) -> List[Tuple[str, float]]:
|
|
841
|
+
with self._lock:
|
|
842
|
+
if not keywords:
|
|
843
|
+
return []
|
|
844
|
+
candidate_sets = []
|
|
845
|
+
for kw in keywords:
|
|
846
|
+
tokens = self._tokenize(kw)
|
|
847
|
+
matching_ids = None
|
|
848
|
+
for t in tokens:
|
|
849
|
+
ids = self._inverted_index.get(t)
|
|
850
|
+
if ids is None:
|
|
851
|
+
ids = set()
|
|
852
|
+
if matching_ids is None:
|
|
853
|
+
matching_ids = set(ids)
|
|
854
|
+
else:
|
|
855
|
+
matching_ids &= set(ids)
|
|
856
|
+
if matching_ids is not None:
|
|
857
|
+
candidate_sets.append(matching_ids)
|
|
858
|
+
if not candidate_sets:
|
|
859
|
+
return []
|
|
860
|
+
final_candidates = candidate_sets[0]
|
|
861
|
+
for s in candidate_sets[1:]:
|
|
862
|
+
final_candidates &= s
|
|
863
|
+
if domain:
|
|
864
|
+
dom_ids = self._domain_index.get(domain, set())
|
|
865
|
+
final_candidates &= dom_ids
|
|
866
|
+
results = [(mid, 1.0) for mid in final_candidates]
|
|
867
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
|
868
|
+
return results
|
|
869
|
+
|
|
870
|
+
def _compute_relevance(self, query_tokens: List[str], doc_id: str) -> float:
|
|
871
|
+
doc_tf = self._tf_cache.get(doc_id, Counter())
|
|
872
|
+
query_tf = Counter(query_tokens)
|
|
873
|
+
score = 0.0
|
|
874
|
+
for token in query_tokens:
|
|
875
|
+
if token in doc_tf:
|
|
876
|
+
idf = math.log((self._doc_count + 1) / (len(self._inverted_index.get(token, set())) + 1)) + 1
|
|
877
|
+
score += doc_tf[token] * idf
|
|
878
|
+
if score > 0:
|
|
879
|
+
doc_norm = math.sqrt(sum(v ** 2 for v in doc_tf.values()))
|
|
880
|
+
query_norm = math.sqrt(sum(v ** 2 for v in query_tf.values())) or 1
|
|
881
|
+
score = score / (doc_norm * query_norm)
|
|
882
|
+
return min(score, 1.0)
|
|
883
|
+
|
|
884
|
+
@staticmethod
|
|
885
|
+
def _tokenize(text: str) -> List[str]:
|
|
886
|
+
text = text.lower()
|
|
887
|
+
text = re.sub(r'[^\w\u4e00-\u9fff]', ' ', text)
|
|
888
|
+
tokens = text.split()
|
|
889
|
+
result = []
|
|
890
|
+
for t in tokens:
|
|
891
|
+
if len(t) <= 1:
|
|
892
|
+
result.append(t)
|
|
893
|
+
elif any('\u4e00' <= c <= '\u9fff' for c in t):
|
|
894
|
+
result.extend([c for c in t])
|
|
895
|
+
else:
|
|
896
|
+
if len(t) > 3:
|
|
897
|
+
for i in range(len(t) - 1):
|
|
898
|
+
result.append(t[i:i+2])
|
|
899
|
+
result.append(t)
|
|
900
|
+
return [t for t in result if len(t) >= 1]
|
|
901
|
+
|
|
902
|
+
@property
|
|
903
|
+
def is_built(self) -> bool:
|
|
904
|
+
return self._index_built
|
|
905
|
+
|
|
906
|
+
@property
|
|
907
|
+
def size(self) -> int:
|
|
908
|
+
return self._doc_count
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
class MemoryWriter:
|
|
912
|
+
def __init__(self, store: MemoryStore, indexer: Optional[MemoryIndexer] = None):
|
|
913
|
+
self.store = store
|
|
914
|
+
self.indexer = indexer
|
|
915
|
+
self._capture_count = 0
|
|
916
|
+
|
|
917
|
+
def write_knowledge(self, item: KnowledgeItem) -> str:
|
|
918
|
+
data = {
|
|
919
|
+
"id": item.id, "domain": item.domain, "title": item.title,
|
|
920
|
+
"content": item.content, "tags": item.tags,
|
|
921
|
+
"source": item.source, "created_at": item.created_at or datetime.now().isoformat(),
|
|
922
|
+
}
|
|
923
|
+
item_id = self.store.save(MemoryType.KNOWLEDGE, data)
|
|
924
|
+
if self.indexer:
|
|
925
|
+
mem_item = MemoryItem(
|
|
926
|
+
id=item_id, memory_type=MemoryType.KNOWLEDGE,
|
|
927
|
+
title=item.title, content=item.content,
|
|
928
|
+
domain=item.domain, tags=item.tags, source=item.source,
|
|
929
|
+
)
|
|
930
|
+
self.indexer.add_to_index(mem_item)
|
|
931
|
+
return item_id
|
|
932
|
+
|
|
933
|
+
def write_episodic(self, memory: EpisodicMemory) -> str:
|
|
934
|
+
data = {
|
|
935
|
+
"id": memory.id, "task_description": memory.task_description,
|
|
936
|
+
"finding": memory.finding, "worker_id": memory.worker_id,
|
|
937
|
+
"confidence": memory.confidence, "tags": memory.tags,
|
|
938
|
+
"created_at": memory.created_at or datetime.now().isoformat(),
|
|
939
|
+
}
|
|
940
|
+
item_id = self.store.save(MemoryType.EPISODIC, data)
|
|
941
|
+
if self.indexer:
|
|
942
|
+
mem_item = MemoryItem(
|
|
943
|
+
id=item_id, memory_type=MemoryType.EPISODIC,
|
|
944
|
+
title=memory.finding[:60], content=memory.finding,
|
|
945
|
+
tags=memory.tags, source=memory.worker_id,
|
|
946
|
+
metadata={"confidence": memory.confidence},
|
|
947
|
+
)
|
|
948
|
+
self.indexer.add_to_index(mem_item)
|
|
949
|
+
self._capture_count += 1
|
|
950
|
+
return item_id
|
|
951
|
+
|
|
952
|
+
def write_feedback(self, feedback: UserFeedback) -> str:
|
|
953
|
+
data = {
|
|
954
|
+
"id": feedback.id, "user_id": feedback.user_id,
|
|
955
|
+
"type": feedback.feedback_type, "content": feedback.content,
|
|
956
|
+
"rating": feedback.rating, "context": feedback.context,
|
|
957
|
+
"created_at": feedback.created_at or datetime.now().isoformat(),
|
|
958
|
+
"status": feedback.status,
|
|
959
|
+
}
|
|
960
|
+
item_id = self.store.save(MemoryType.FEEDBACK, data)
|
|
961
|
+
if self.indexer:
|
|
962
|
+
mem_item = MemoryItem(
|
|
963
|
+
id=item_id, memory_type=MemoryType.FEEDBACK,
|
|
964
|
+
title=f"[{feedback.feedback_type}] {feedback.content[:40]}",
|
|
965
|
+
content=feedback.content,
|
|
966
|
+
tags=[feedback.feedback_type],
|
|
967
|
+
metadata={"rating": feedback.rating},
|
|
968
|
+
)
|
|
969
|
+
self.indexer.add_to_index(mem_item)
|
|
970
|
+
return item_id
|
|
971
|
+
|
|
972
|
+
def write_pattern(self, pattern: PersistedPattern) -> str:
|
|
973
|
+
data = {
|
|
974
|
+
"id": pattern.id, "name": pattern.name, "slug": pattern.slug,
|
|
975
|
+
"category": pattern.category, "trigger_keywords": pattern.trigger_keywords,
|
|
976
|
+
"steps_template": pattern.steps_template,
|
|
977
|
+
"confidence": pattern.confidence, "quality_score": pattern.quality_score,
|
|
978
|
+
"created_at": pattern.created_at or datetime.now().isoformat(),
|
|
979
|
+
}
|
|
980
|
+
item_id = self.store.save(MemoryType.PATTERN, data)
|
|
981
|
+
if self.indexer:
|
|
982
|
+
mem_item = MemoryItem(
|
|
983
|
+
id=item_id, memory_type=MemoryType.PATTERN,
|
|
984
|
+
title=pattern.name, content=json.dumps(pattern.steps_template, ensure_ascii=False)[:500],
|
|
985
|
+
domain=pattern.category, tags=pattern.trigger_keywords,
|
|
986
|
+
metadata={"quality_score": pattern.quality_score, "confidence": pattern.confidence},
|
|
987
|
+
)
|
|
988
|
+
self.indexer.add_to_index(mem_item)
|
|
989
|
+
return item_id
|
|
990
|
+
|
|
991
|
+
def write_analysis(self, analysis: AnalysisCase) -> str:
|
|
992
|
+
data = {
|
|
993
|
+
"id": analysis.id, "problem": analysis.problem,
|
|
994
|
+
"context": analysis.context, "root_cause": analysis.root_cause,
|
|
995
|
+
"solutions": analysis.solutions, "status": analysis.status,
|
|
996
|
+
"created_at": analysis.created_at or datetime.now().isoformat(),
|
|
997
|
+
}
|
|
998
|
+
item_id = self.store.save(MemoryType.ANALYSIS, data)
|
|
999
|
+
if self.indexer:
|
|
1000
|
+
mem_item = MemoryItem(
|
|
1001
|
+
id=item_id, memory_type=MemoryType.ANALYSIS,
|
|
1002
|
+
title=analysis.problem[:60], content=analysis.root_cause,
|
|
1003
|
+
tags=self._extract_tags(analysis.problem),
|
|
1004
|
+
metadata={"solutions_count": len(analysis.solutions)},
|
|
1005
|
+
)
|
|
1006
|
+
self.indexer.add_to_index(mem_item)
|
|
1007
|
+
return item_id
|
|
1008
|
+
|
|
1009
|
+
def batch_write(self, items: List[MemoryItem]) -> int:
|
|
1010
|
+
success = 0
|
|
1011
|
+
for item in items:
|
|
1012
|
+
data = item.to_dict()
|
|
1013
|
+
try:
|
|
1014
|
+
self.store.save(item.memory_type, data)
|
|
1015
|
+
if self.indexer:
|
|
1016
|
+
self.indexer.add_to_index(item)
|
|
1017
|
+
success += 1
|
|
1018
|
+
except Exception:
|
|
1019
|
+
pass
|
|
1020
|
+
return success
|
|
1021
|
+
|
|
1022
|
+
@staticmethod
|
|
1023
|
+
def _extract_tags(text: str) -> List[str]:
|
|
1024
|
+
words = re.findall(r'[\u4e00-\u9fff]{2,}|[a-zA-Z]{3,}', text)
|
|
1025
|
+
return list(set(words))[:10]
|
|
1026
|
+
|
|
1027
|
+
|
|
1028
|
+
class MemoryReader:
|
|
1029
|
+
def __init__(self, store: MemoryStore):
|
|
1030
|
+
self.store = store
|
|
1031
|
+
|
|
1032
|
+
def read_knowledge(self, domain: Optional[str] = None) -> List[KnowledgeItem]:
|
|
1033
|
+
filters = {"domain": domain} if domain else None
|
|
1034
|
+
raw_list = self.store.list_all(MemoryType.KNOWLEDGE, filters)
|
|
1035
|
+
return [KnowledgeItem(
|
|
1036
|
+
id=r.get("id", ""), domain=r.get("domain", "general"),
|
|
1037
|
+
title=r.get("title", ""), content=r.get("content", ""),
|
|
1038
|
+
tags=r.get("tags", []), created_at=r.get("created_at", ""),
|
|
1039
|
+
source=r.get("source", ""),
|
|
1040
|
+
) for r in raw_list]
|
|
1041
|
+
|
|
1042
|
+
def read_episodic(self, limit: int = 50,
|
|
1043
|
+
since: Optional[datetime] = None) -> List[EpisodicMemory]:
|
|
1044
|
+
raw_list = self.store.list_all(MemoryType.EPISODIC)
|
|
1045
|
+
if since:
|
|
1046
|
+
raw_list = [r for r in raw_list if r.get("created_at", "") >= since.isoformat()]
|
|
1047
|
+
raw_list = raw_list[:limit]
|
|
1048
|
+
return [EpisodicMemory(
|
|
1049
|
+
id=r.get("id", ""), task_description=r.get("task_description", ""),
|
|
1050
|
+
finding=r.get("finding", ""), worker_id=r.get("worker_id", ""),
|
|
1051
|
+
confidence=r.get("confidence", 0.0), tags=r.get("tags", []),
|
|
1052
|
+
created_at=r.get("created_at", ""),
|
|
1053
|
+
) for r in raw_list]
|
|
1054
|
+
|
|
1055
|
+
def read_feedback(self, status: Optional[str] = None,
|
|
1056
|
+
feedback_type: Optional[str] = None) -> List[UserFeedback]:
|
|
1057
|
+
filters = {}
|
|
1058
|
+
if status:
|
|
1059
|
+
filters["status"] = status
|
|
1060
|
+
if feedback_type:
|
|
1061
|
+
filters["type"] = feedback_type
|
|
1062
|
+
raw_list = self.store.list_all(MemoryType.FEEDBACK, filters if filters else None)
|
|
1063
|
+
return [UserFeedback(
|
|
1064
|
+
id=r.get("id", ""), user_id=r.get("user_id", "default"),
|
|
1065
|
+
feedback_type=r.get("type", "suggestion"), content=r.get("content", ""),
|
|
1066
|
+
rating=r.get("rating"), context=r.get("context", {}),
|
|
1067
|
+
created_at=r.get("created_at", ""), status=r.get("status", "pending"),
|
|
1068
|
+
) for r in raw_list]
|
|
1069
|
+
|
|
1070
|
+
def read_patterns(self, category: Optional[str] = None) -> List[PersistedPattern]:
|
|
1071
|
+
raw_list = self.store.list_all(MemoryType.PATTERN)
|
|
1072
|
+
if category:
|
|
1073
|
+
raw_list = [r for r in raw_list if r.get("category") == category]
|
|
1074
|
+
return [PersistedPattern(
|
|
1075
|
+
id=r.get("id", ""), name=r.get("name", ""), slug=r.get("slug", ""),
|
|
1076
|
+
category=r.get("category", ""), trigger_keywords=r.get("trigger_keywords", []),
|
|
1077
|
+
steps_template=r.get("steps_template", []),
|
|
1078
|
+
confidence=r.get("confidence", 0.0), quality_score=r.get("quality_score", 0.0),
|
|
1079
|
+
created_at=r.get("created_at", ""),
|
|
1080
|
+
) for r in raw_list]
|
|
1081
|
+
|
|
1082
|
+
def read_analysis_cases(self, status: Optional[str] = None) -> List[AnalysisCase]:
|
|
1083
|
+
filters = {"status": status} if status else None
|
|
1084
|
+
raw_list = self.store.list_all(MemoryType.ANALYSIS, filters)
|
|
1085
|
+
return [AnalysisCase(
|
|
1086
|
+
id=r.get("id", ""), problem=r.get("problem", ""),
|
|
1087
|
+
context=r.get("context", {}), root_cause=r.get("root_cause", ""),
|
|
1088
|
+
solutions=r.get("solutions", []), status=r.get("status", "completed"),
|
|
1089
|
+
created_at=r.get("created_at", ""),
|
|
1090
|
+
) for r in raw_list]
|
|
1091
|
+
|
|
1092
|
+
|
|
1093
|
+
class MemoryBridge:
|
|
1094
|
+
def __init__(self, base_dir: Optional[str] = None,
|
|
1095
|
+
config: Optional[MemoryConfig] = None,
|
|
1096
|
+
mce_adapter=None):
|
|
1097
|
+
"""
|
|
1098
|
+
初始化记忆桥接器
|
|
1099
|
+
|
|
1100
|
+
Args:
|
|
1101
|
+
base_dir: 记忆存储根目录 (默认: data/memory-bank)
|
|
1102
|
+
config: 记忆配置项 (MemoryConfig, 默认使用默认配置)
|
|
1103
|
+
mce_adapter: MCE 记忆分类引擎适配器 (可选, v3.2 集成)
|
|
1104
|
+
传入后自动启用以下增强:
|
|
1105
|
+
- capture_execution(): 自动用 MCE 分类 scratchpad 内容,
|
|
1106
|
+
preference→FEEDBACK, decision→EPISODIC, fact→KNOWLEDGE
|
|
1107
|
+
- recall(): 自动用 MCE 对查询文本做意图分类并过滤结果
|
|
1108
|
+
- shutdown(): 联动关闭 MCE 连接
|
|
1109
|
+
"""
|
|
1110
|
+
self.config = config or MemoryConfig.default()
|
|
1111
|
+
if base_dir is None:
|
|
1112
|
+
base_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'memory-bank')
|
|
1113
|
+
self.base_dir = os.path.abspath(base_dir)
|
|
1114
|
+
self.store: JsonMemoryStore = JsonMemoryStore(self.base_dir)
|
|
1115
|
+
self.indexer: MemoryIndexer = MemoryIndexer()
|
|
1116
|
+
self.writer: MemoryWriter = MemoryWriter(self.store, self.indexer)
|
|
1117
|
+
self.reader: MemoryReader = MemoryReader(self.store)
|
|
1118
|
+
self._stats = MemoryStats(total_captures=0, total_recalls=0)
|
|
1119
|
+
self._inner_lock = threading.RLock()
|
|
1120
|
+
|
|
1121
|
+
self._mce_adapter = mce_adapter
|
|
1122
|
+
self._mce_enabled = mce_adapter is not None and getattr(mce_adapter, 'is_available', False)
|
|
1123
|
+
|
|
1124
|
+
self._claw_source: Optional[WorkBuddyClawSource] = None
|
|
1125
|
+
self._claw_enabled = False
|
|
1126
|
+
try:
|
|
1127
|
+
self._claw_source = WorkBuddyClawSource()
|
|
1128
|
+
if self._claw_source.is_available:
|
|
1129
|
+
self._claw_enabled = True
|
|
1130
|
+
except Exception:
|
|
1131
|
+
pass
|
|
1132
|
+
|
|
1133
|
+
def recall(self, query: MemoryQuery) -> MemoryRecallResult:
|
|
1134
|
+
"""
|
|
1135
|
+
[MCE 集成点 Phase B] 跨会话记忆召回
|
|
1136
|
+
|
|
1137
|
+
当前行为: TF-IDF 全文检索 → 按相关性排序返回
|
|
1138
|
+
MCE 就绪后:
|
|
1139
|
+
1. 先用 MCE 对 query.query_text 做意图分类
|
|
1140
|
+
→ 确定用户要找什么类型的记忆 (user_preference/decision/correction)
|
|
1141
|
+
2. 用分类结果设置 MemoryQuery.memory_type 过滤
|
|
1142
|
+
3. 精确召回,噪声过滤率提升 60%+
|
|
1143
|
+
4. 示例: recall("用户偏好") → MCE 分类为 user_preference
|
|
1144
|
+
→ 只搜索 memory_type=FEEDBACK 的记忆
|
|
1145
|
+
|
|
1146
|
+
接口预留: mce_engine 参数 (Optional[MemoryClassificationEngine])
|
|
1147
|
+
enable_mce_recall_filter: bool = False
|
|
1148
|
+
"""
|
|
1149
|
+
start = time.perf_counter()
|
|
1150
|
+
self._stats.total_recalls += 1
|
|
1151
|
+
if not self.config.enabled or not query.query_text.strip():
|
|
1152
|
+
return MemoryRecallResult(
|
|
1153
|
+
query_time_ms=(time.perf_counter() - start) * 1000,
|
|
1154
|
+
)
|
|
1155
|
+
|
|
1156
|
+
effective_type_filter = query.memory_type
|
|
1157
|
+
|
|
1158
|
+
if self._mce_enabled and self._mce_adapter and not query.memory_type:
|
|
1159
|
+
try:
|
|
1160
|
+
mce_result = self._mce_adapter.classify(query.query_text, timeout_ms=300)
|
|
1161
|
+
if mce_result and mce_result.memory_type:
|
|
1162
|
+
type_mapping = {
|
|
1163
|
+
"preference": "FEEDBACK",
|
|
1164
|
+
"decision": "EPISODIC",
|
|
1165
|
+
"correction": "EPISODIC",
|
|
1166
|
+
"fact": "KNOWLEDGE",
|
|
1167
|
+
"task": "EPISODIC",
|
|
1168
|
+
}
|
|
1169
|
+
mapped_type = type_mapping.get(mce_result.memory_type.lower())
|
|
1170
|
+
if mapped_type:
|
|
1171
|
+
effective_type_filter = mapped_type
|
|
1172
|
+
except Exception:
|
|
1173
|
+
pass
|
|
1174
|
+
|
|
1175
|
+
claw_items: List[MemoryItem] = []
|
|
1176
|
+
if self._claw_enabled and self._claw_source:
|
|
1177
|
+
try:
|
|
1178
|
+
claw_items = self._claw_source.search_by_index(query.query_text, limit=query.limit // 2)
|
|
1179
|
+
except Exception:
|
|
1180
|
+
pass
|
|
1181
|
+
|
|
1182
|
+
search_results = self.indexer.search(
|
|
1183
|
+
query.query_text,
|
|
1184
|
+
type_filter=effective_type_filter,
|
|
1185
|
+
domain_filter=query.domain,
|
|
1186
|
+
limit=query.limit * 3,
|
|
1187
|
+
)
|
|
1188
|
+
memories = []
|
|
1189
|
+
hit_types: Dict[str, int] = {}
|
|
1190
|
+
for mid, score in search_results:
|
|
1191
|
+
if score < query.min_relevance:
|
|
1192
|
+
continue
|
|
1193
|
+
item_data = self._load_any_type(mid)
|
|
1194
|
+
if item_data is None:
|
|
1195
|
+
continue
|
|
1196
|
+
item = MemoryItem.from_dict(item_data)
|
|
1197
|
+
item.relevance_score = score
|
|
1198
|
+
item.last_accessed = datetime.now()
|
|
1199
|
+
item.access_count += 1
|
|
1200
|
+
memories.append(item)
|
|
1201
|
+
mt = item.memory_type.value
|
|
1202
|
+
hit_types[mt] = hit_types.get(mt, 0) + 1
|
|
1203
|
+
if len(memories) >= query.limit:
|
|
1204
|
+
break
|
|
1205
|
+
elapsed = (time.perf_counter() - start) * 1000
|
|
1206
|
+
if claw_items:
|
|
1207
|
+
for ci in claw_items:
|
|
1208
|
+
ci.last_accessed = datetime.now()
|
|
1209
|
+
memories.append(ci)
|
|
1210
|
+
mt = ci.memory_type.value
|
|
1211
|
+
hit_types[mt] = hit_types.get(mt, 0) + 1
|
|
1212
|
+
memories.sort(key=lambda x: x.relevance_score, reverse=True)
|
|
1213
|
+
memories = memories[:query.limit]
|
|
1214
|
+
return MemoryRecallResult(
|
|
1215
|
+
memories=memories,
|
|
1216
|
+
total_found=len(memories),
|
|
1217
|
+
query_time_ms=elapsed,
|
|
1218
|
+
hit_memory_types=hit_types,
|
|
1219
|
+
)
|
|
1220
|
+
|
|
1221
|
+
def capture_execution(self, execution_record=None,
|
|
1222
|
+
scratchpad_entries=None) -> Optional[str]:
|
|
1223
|
+
"""
|
|
1224
|
+
[MCE 集成点 Phase A] Worker 执行结果 → 记忆沉淀
|
|
1225
|
+
|
|
1226
|
+
当前行为: 手动判断 entry_type=="FINDING" → 存为 EPISODIC 类型
|
|
1227
|
+
MCE 就绪后:
|
|
1228
|
+
1. 将 scratchpad_entry.content 传入 MCE.process_message()
|
|
1229
|
+
2. 用返回的 type/correction/preference/decision 标签替代手动类型推断
|
|
1230
|
+
3. 用 MCE 的 confidence 替代默认 0.8
|
|
1231
|
+
4. 示例: "我选择了方案B因为A太复杂了"
|
|
1232
|
+
→ MCE 返回 {type: correction, conf: 0.89, tier: episodic}
|
|
1233
|
+
→ MemoryBridge 直接用此分类写入,无需 AI 猜测
|
|
1234
|
+
|
|
1235
|
+
接口预留: mce_engine 参数 (Optional[MemoryClassificationEngine])
|
|
1236
|
+
enable_mce_classify: bool = False (配置开关)
|
|
1237
|
+
"""
|
|
1238
|
+
if not self.config.auto_capture or scratchpad_entries is None:
|
|
1239
|
+
return None
|
|
1240
|
+
captured_id = None
|
|
1241
|
+
for entry in scratchpad_entries:
|
|
1242
|
+
entry_type = getattr(entry, 'entry_type', None)
|
|
1243
|
+
entry_type_val = entry_type.value if hasattr(entry_type, 'value') else str(entry_type)
|
|
1244
|
+
if entry_type_val != "FINDING":
|
|
1245
|
+
continue
|
|
1246
|
+
confidence = getattr(entry, 'confidence', 0.8) or 0.8
|
|
1247
|
+
if confidence < 0.7:
|
|
1248
|
+
continue
|
|
1249
|
+
content = getattr(entry, 'content', '') or ''
|
|
1250
|
+
if len(content) > 5000:
|
|
1251
|
+
content = content[:5000] + "...[TRUNCATED]"
|
|
1252
|
+
task_desc = getattr(execution_record, 'task_description', '') or ''
|
|
1253
|
+
worker_id = getattr(execution_record, 'worker_id', '') or ''
|
|
1254
|
+
|
|
1255
|
+
mce_memory_type = None
|
|
1256
|
+
mce_confidence = confidence
|
|
1257
|
+
if self._mce_enabled and self._mce_adapter and content:
|
|
1258
|
+
try:
|
|
1259
|
+
mce_result = self._mce_adapter.classify(content, timeout_ms=500)
|
|
1260
|
+
if mce_result:
|
|
1261
|
+
mce_confidence = max(confidence, mce_result.confidence)
|
|
1262
|
+
if mce_result.memory_type:
|
|
1263
|
+
type_hint_map = {
|
|
1264
|
+
"preference": "FEEDBACK",
|
|
1265
|
+
"decision": "EPISODIC",
|
|
1266
|
+
"correction": "EPISODIC",
|
|
1267
|
+
"fact": "KNOWLEDGE",
|
|
1268
|
+
}
|
|
1269
|
+
mce_memory_type = type_hint_map.get(mce_result.memory_type.lower())
|
|
1270
|
+
except Exception:
|
|
1271
|
+
pass
|
|
1272
|
+
|
|
1273
|
+
tags = self._extract_tags(task_desc + " " + content)
|
|
1274
|
+
|
|
1275
|
+
if mce_memory_type == "KNOWLEDGE":
|
|
1276
|
+
knowledge = KnowledgeMemory(
|
|
1277
|
+
id=f"know_{uuid.uuid4().hex[:12]}_{int(time.time())}",
|
|
1278
|
+
domain=task_desc[:100] if task_desc else "general",
|
|
1279
|
+
fact=content,
|
|
1280
|
+
source=worker_id or "multi-agent",
|
|
1281
|
+
confidence=mce_confidence,
|
|
1282
|
+
tags=tags,
|
|
1283
|
+
created_at=datetime.now().isoformat(),
|
|
1284
|
+
)
|
|
1285
|
+
self.writer.write_knowledge(knowledge)
|
|
1286
|
+
captured_id = knowledge.id
|
|
1287
|
+
elif mce_memory_type == "FEEDBACK":
|
|
1288
|
+
feedback = FeedbackMemory(
|
|
1289
|
+
id=f"feed_{uuid.uuid4().hex[:12]}_{int(time.time())}",
|
|
1290
|
+
category="preference",
|
|
1291
|
+
content=content,
|
|
1292
|
+
source=worker_id or "user",
|
|
1293
|
+
severity="info",
|
|
1294
|
+
tags=tags,
|
|
1295
|
+
created_at=datetime.now().isoformat(),
|
|
1296
|
+
)
|
|
1297
|
+
self.writer.write_feedback(feedback)
|
|
1298
|
+
captured_id = feedback.id
|
|
1299
|
+
else:
|
|
1300
|
+
episodic = EpisodicMemory(
|
|
1301
|
+
id=f"epi_{uuid.uuid4().hex[:12]}_{int(time.time())}",
|
|
1302
|
+
task_description=task_desc[:200],
|
|
1303
|
+
finding=content,
|
|
1304
|
+
worker_id=worker_id,
|
|
1305
|
+
confidence=mce_confidence,
|
|
1306
|
+
tags=tags,
|
|
1307
|
+
created_at=datetime.now().isoformat(),
|
|
1308
|
+
)
|
|
1309
|
+
captured_id = self.writer.write_episodic(episodic)
|
|
1310
|
+
self._stats.total_captures += 1
|
|
1311
|
+
return captured_id
|
|
1312
|
+
|
|
1313
|
+
def record_feedback(self, feedback: UserFeedback) -> str:
|
|
1314
|
+
"""
|
|
1315
|
+
[MCE 集成点 Phase A] 用户反馈记录
|
|
1316
|
+
|
|
1317
|
+
当前行为: 直接写入 FEEDBACK 类型
|
|
1318
|
+
MCE 就绪后: 对 feedback.content 做 sentiment + intent 分类
|
|
1319
|
+
→ 自动标记正面/负面/中性情绪
|
|
1320
|
+
→ 关联到相关 decision/correction 记忆
|
|
1321
|
+
|
|
1322
|
+
接口预留: mce_engine 参数
|
|
1323
|
+
"""
|
|
1324
|
+
if feedback.id == "":
|
|
1325
|
+
feedback.id = f"fb_{uuid.uuid4().hex[:12]}_{int(time.time())}"
|
|
1326
|
+
if not feedback.created_at:
|
|
1327
|
+
feedback.created_at = datetime.now().isoformat()
|
|
1328
|
+
return self.writer.write_feedback(feedback)
|
|
1329
|
+
|
|
1330
|
+
def persist_pattern(self, pattern) -> Optional[str]:
|
|
1331
|
+
"""
|
|
1332
|
+
[MCE 集成点 Phase D] Skillifier 生成的 Skill 模式持久化
|
|
1333
|
+
|
|
1334
|
+
当前行为: 直接写入 PATTERN 类型
|
|
1335
|
+
MCE 就绪后: 对 pattern.name + steps_template 做 decision 分类
|
|
1336
|
+
→ 标记哪些步骤是关键决策点
|
|
1337
|
+
→ 关联到历史 correction/decision 记忆
|
|
1338
|
+
→ Skillifier 学习素材增强: 用 MCE 标记提取"什么导致了成功"
|
|
1339
|
+
|
|
1340
|
+
接口预留: mce_engine 参数
|
|
1341
|
+
"""
|
|
1342
|
+
if not hasattr(pattern, 'name') or not hasattr(pattern, 'steps_template'):
|
|
1343
|
+
return None
|
|
1344
|
+
quality = getattr(pattern, 'confidence', 0) or 0
|
|
1345
|
+
if isinstance(quality, (int, float)) and quality < 0.7:
|
|
1346
|
+
return None
|
|
1347
|
+
qs = getattr(pattern, 'quality_score', quality * 100) or (quality * 100 if quality else 0)
|
|
1348
|
+
if qs < 70:
|
|
1349
|
+
return None
|
|
1350
|
+
slug = getattr(pattern, 'pattern_id', pattern.name.lower().replace(' ', '-')) or ""
|
|
1351
|
+
persisted = PersistedPattern(
|
|
1352
|
+
id=f"pat_{uuid.uuid4().hex[:12]}_{int(time.time())}",
|
|
1353
|
+
name=pattern.name,
|
|
1354
|
+
slug=slug,
|
|
1355
|
+
category=getattr(pattern, 'category', 'auto-generated'),
|
|
1356
|
+
trigger_keywords=getattr(pattern, 'trigger_keywords', []) or [],
|
|
1357
|
+
steps_template=[s.to_dict() if hasattr(s, 'to_dict') else s for s in getattr(pattern, 'steps_template', []) or []],
|
|
1358
|
+
confidence=float(getattr(pattern, 'confidence', quality)) if getattr(pattern, 'confidence', None) is not None else quality,
|
|
1359
|
+
quality_score=qs,
|
|
1360
|
+
created_at=datetime.now().isoformat(),
|
|
1361
|
+
)
|
|
1362
|
+
return self.writer.write_pattern(persisted)
|
|
1363
|
+
|
|
1364
|
+
def learn_from_mistake(self, error_context: ErrorContext) -> str:
|
|
1365
|
+
analysis = AnalysisCase(
|
|
1366
|
+
id=f"anal_{uuid.uuid4().hex[:12]}_{int(time.time())}",
|
|
1367
|
+
problem=error_context.error_message[:200],
|
|
1368
|
+
context={
|
|
1369
|
+
"task": error_context.task_description[:200],
|
|
1370
|
+
"worker": error_context.worker_id,
|
|
1371
|
+
"timestamp": error_context.timestamp,
|
|
1372
|
+
},
|
|
1373
|
+
root_cause=f"Error during execution: {error_context.error_message[:100]}",
|
|
1374
|
+
solutions=[
|
|
1375
|
+
f"Review the error context: {error_context.error_message[:100]}",
|
|
1376
|
+
"Check input parameters and dependencies",
|
|
1377
|
+
"Add validation to prevent recurrence",
|
|
1378
|
+
"Document the solution for future reference",
|
|
1379
|
+
],
|
|
1380
|
+
status="completed",
|
|
1381
|
+
created_at=datetime.now().isoformat(),
|
|
1382
|
+
)
|
|
1383
|
+
return self.writer.write_analysis(analysis)
|
|
1384
|
+
|
|
1385
|
+
def search_knowledge(self, keywords: List[str],
|
|
1386
|
+
domain: Optional[str] = None) -> List[KnowledgeItem]:
|
|
1387
|
+
if not keywords:
|
|
1388
|
+
return []
|
|
1389
|
+
results = self.indexer.keyword_search(keywords, domain=domain)
|
|
1390
|
+
items = []
|
|
1391
|
+
for mid, _score in results:
|
|
1392
|
+
data = self.store.load(MemoryType.KNOWLEDGE, mid)
|
|
1393
|
+
if data:
|
|
1394
|
+
items.append(KnowledgeItem(
|
|
1395
|
+
id=data.get("id", ""), domain=data.get("domain", "general"),
|
|
1396
|
+
title=data.get("title", ""), content=data.get("content", ""),
|
|
1397
|
+
tags=data.get("tags", []), created_at=data.get("created_at", ""),
|
|
1398
|
+
source=data.get("source", ""),
|
|
1399
|
+
))
|
|
1400
|
+
return items
|
|
1401
|
+
|
|
1402
|
+
def get_statistics(self) -> MemoryStats:
|
|
1403
|
+
stats = MemoryStats(
|
|
1404
|
+
total_captures=self._stats.total_captures,
|
|
1405
|
+
total_recalls=self._stats.total_recalls,
|
|
1406
|
+
index_built=self.indexer.is_built,
|
|
1407
|
+
last_index_time=datetime.now().isoformat() if self.indexer.is_built else None,
|
|
1408
|
+
)
|
|
1409
|
+
type_counts: Dict[str, int] = {}
|
|
1410
|
+
all_items = []
|
|
1411
|
+
for mtype in MemoryType:
|
|
1412
|
+
try:
|
|
1413
|
+
raw = self.store.list_all(mtype)
|
|
1414
|
+
type_counts[mtype.value] = len(raw)
|
|
1415
|
+
all_items.extend(raw)
|
|
1416
|
+
except Exception:
|
|
1417
|
+
type_counts[mtype.value] = 0
|
|
1418
|
+
stats.by_type_counts = type_counts
|
|
1419
|
+
stats.total_memories = sum(type_counts.values())
|
|
1420
|
+
if all_items:
|
|
1421
|
+
dates = [r.get("created_at", "") for r in all_items if r.get("created_at")]
|
|
1422
|
+
if dates:
|
|
1423
|
+
stats.newest_memory = max(dates)
|
|
1424
|
+
stats.oldest_memory = min(dates)
|
|
1425
|
+
stats.claw_enabled = self._claw_enabled
|
|
1426
|
+
if self._claw_source and self._claw_enabled:
|
|
1427
|
+
try:
|
|
1428
|
+
core_count = sum(1 for f in self._claw_source.CORE_FILE_MAPPING
|
|
1429
|
+
if (self._claw_source._memory_dir / f).exists())
|
|
1430
|
+
daily_count = min(30, sum(1 for _ in self._claw_source._wb_memory_dir.glob("2026-*.md"))) if self._claw_source._wb_memory_dir.exists() else 0
|
|
1431
|
+
stats.claw_item_count = core_count + daily_count
|
|
1432
|
+
except Exception:
|
|
1433
|
+
stats.claw_item_count = 0
|
|
1434
|
+
else:
|
|
1435
|
+
stats.claw_item_count = 0
|
|
1436
|
+
return stats
|
|
1437
|
+
|
|
1438
|
+
def get_recent_history(self, n: int = 10) -> List[EpisodicMemory]:
|
|
1439
|
+
return self.reader.read_episodic(limit=n)
|
|
1440
|
+
|
|
1441
|
+
def get_workbuddy_ai_news(self, days: int = 7) -> List[MemoryItem]:
|
|
1442
|
+
"""
|
|
1443
|
+
Plan B: Retrieve WorkBuddy daily AI news feed.
|
|
1444
|
+
|
|
1445
|
+
Used by Coordinator to auto-inject latest AI industry information
|
|
1446
|
+
as context when analyzing technology trends or industry dynamics tasks.
|
|
1447
|
+
|
|
1448
|
+
Args:
|
|
1449
|
+
days: Number of days to look back (default 7).
|
|
1450
|
+
|
|
1451
|
+
Returns:
|
|
1452
|
+
List[MemoryItem]: News entries in reverse chronological order,
|
|
1453
|
+
metadata contains sources/topics/status fields.
|
|
1454
|
+
"""
|
|
1455
|
+
if not self._claw_enabled or not self._claw_source:
|
|
1456
|
+
return []
|
|
1457
|
+
try:
|
|
1458
|
+
return self._claw_source.get_latest_ai_news(days)
|
|
1459
|
+
except Exception:
|
|
1460
|
+
return []
|
|
1461
|
+
|
|
1462
|
+
def rebuild_index(self) -> None:
|
|
1463
|
+
all_items: List[MemoryItem] = []
|
|
1464
|
+
for mtype in MemoryType:
|
|
1465
|
+
try:
|
|
1466
|
+
raw_list = self.store.list_all(mtype)
|
|
1467
|
+
for r in raw_list:
|
|
1468
|
+
try:
|
|
1469
|
+
item = MemoryItem.from_dict(r)
|
|
1470
|
+
all_items.append(item)
|
|
1471
|
+
except Exception:
|
|
1472
|
+
continue
|
|
1473
|
+
except Exception:
|
|
1474
|
+
continue
|
|
1475
|
+
self.indexer.build_index(all_items)
|
|
1476
|
+
|
|
1477
|
+
def print_diagnostics(self) -> str:
|
|
1478
|
+
s = self.get_statistics()
|
|
1479
|
+
lines = [
|
|
1480
|
+
"=== MemoryBridge Diagnostics ===",
|
|
1481
|
+
f"Total Memories: {s.total_memories}",
|
|
1482
|
+
f"By Type: {s.by_type_counts}",
|
|
1483
|
+
f"Index Built: {'Yes' if s.index_built else 'No'}",
|
|
1484
|
+
f"Captures: {s.total_captures} | Recalls: {s.total_recalls}",
|
|
1485
|
+
f"Index Size: {self.indexer.size} documents",
|
|
1486
|
+
"--- Memory Types ---",
|
|
1487
|
+
]
|
|
1488
|
+
for t, count in sorted(s.by_type_counts.items()):
|
|
1489
|
+
lines.append(f" {t}: {count}")
|
|
1490
|
+
lines.append("--- WorkBuddy (Claw) Bridge ---")
|
|
1491
|
+
lines.append(f" Available: {'Yes' if s.claw_enabled else 'No'}")
|
|
1492
|
+
if self._claw_source:
|
|
1493
|
+
all_claw = self._claw_source.load_all_memories()
|
|
1494
|
+
lines.append(f" Items: {len(all_claw)} ({sum(1 for a in all_claw if a.memory_type == MemoryType.EPISODIC)} episodic)")
|
|
1495
|
+
return "\n".join(lines)
|
|
1496
|
+
|
|
1497
|
+
def forgetting_weight(self, memory: MemoryItem) -> float:
|
|
1498
|
+
age_days = memory.age_days
|
|
1499
|
+
access_factor = math.log(memory.access_count + 1)
|
|
1500
|
+
if age_days < 7:
|
|
1501
|
+
return 1.0
|
|
1502
|
+
elif age_days < 30:
|
|
1503
|
+
return 0.8 * (access_factor / (access_factor + 1))
|
|
1504
|
+
elif age_days < 60:
|
|
1505
|
+
return 0.5 * (access_factor / (access_factor + 2))
|
|
1506
|
+
else:
|
|
1507
|
+
return 0.3 * (access_factor / (access_factor + 3))
|
|
1508
|
+
|
|
1509
|
+
def compress_old_memories(self) -> int:
|
|
1510
|
+
if not self.config.compress_old_memories:
|
|
1511
|
+
return 0
|
|
1512
|
+
compressed = 0
|
|
1513
|
+
cutoff = datetime.now() - timedelta(days=60)
|
|
1514
|
+
try:
|
|
1515
|
+
raw_list = self.store.list_all(MemoryType.EPISODIC)
|
|
1516
|
+
for r in raw_list:
|
|
1517
|
+
created_str = r.get("created_at", "")
|
|
1518
|
+
if not created_str:
|
|
1519
|
+
continue
|
|
1520
|
+
try:
|
|
1521
|
+
created = datetime.fromisoformat(created_str)
|
|
1522
|
+
except (ValueError, TypeError):
|
|
1523
|
+
continue
|
|
1524
|
+
if created < cutoff and not r.get("metadata", {}).get("compressed"):
|
|
1525
|
+
content = r.get("finding", "") or r.get("content", "")
|
|
1526
|
+
summary = content[:200] + "...[COMPRESSED]"
|
|
1527
|
+
r["content"] = summary
|
|
1528
|
+
r["finding"] = summary
|
|
1529
|
+
r.setdefault("metadata", {})["compressed"] = True
|
|
1530
|
+
r["metadata"]["original_length"] = len(content)
|
|
1531
|
+
r["metadata"]["compressed_at"] = datetime.now().isoformat()
|
|
1532
|
+
mid = r.get("id", "")
|
|
1533
|
+
if mid:
|
|
1534
|
+
self.store.save(MemoryType.EPISODIC, r)
|
|
1535
|
+
compressed += 1
|
|
1536
|
+
except Exception:
|
|
1537
|
+
pass
|
|
1538
|
+
return compressed
|
|
1539
|
+
|
|
1540
|
+
def cleanup_expired_memories(self) -> int:
|
|
1541
|
+
removed = 0
|
|
1542
|
+
cutoff = datetime.now() - timedelta(days=self.config.retention_days)
|
|
1543
|
+
for mtype in [MemoryType.EPISODIC, MemoryType.FEEDBACK]:
|
|
1544
|
+
try:
|
|
1545
|
+
raw_list = self.store.list_all(mtype)
|
|
1546
|
+
for r in raw_list:
|
|
1547
|
+
created_str = r.get("created_at", "")
|
|
1548
|
+
if not created_str:
|
|
1549
|
+
continue
|
|
1550
|
+
try:
|
|
1551
|
+
created = datetime.fromisoformat(created_str)
|
|
1552
|
+
except (ValueError, TypeError):
|
|
1553
|
+
continue
|
|
1554
|
+
if created < cutoff:
|
|
1555
|
+
mid = r.get("id", "")
|
|
1556
|
+
if mid and self.store.delete(mtype, mid):
|
|
1557
|
+
removed += 1
|
|
1558
|
+
if self.indexer:
|
|
1559
|
+
self.indexer.remove_from_index(mid)
|
|
1560
|
+
except Exception:
|
|
1561
|
+
continue
|
|
1562
|
+
return removed
|
|
1563
|
+
|
|
1564
|
+
def _guess_type(self, memory_id: str) -> MemoryType:
|
|
1565
|
+
prefix_map = {
|
|
1566
|
+
"know_": MemoryType.KNOWLEDGE,
|
|
1567
|
+
"fb_": MemoryType.FEEDBACK,
|
|
1568
|
+
"epi_": MemoryType.EPISODIC,
|
|
1569
|
+
"pat_": MemoryType.PATTERN,
|
|
1570
|
+
"anal_": MemoryType.ANALYSIS,
|
|
1571
|
+
}
|
|
1572
|
+
for prefix, mtype in prefix_map.items():
|
|
1573
|
+
if memory_id.startswith(prefix):
|
|
1574
|
+
return mtype
|
|
1575
|
+
for mtype in MemoryType:
|
|
1576
|
+
data = self.store.load(mtype, memory_id)
|
|
1577
|
+
if data is not None:
|
|
1578
|
+
return mtype
|
|
1579
|
+
return MemoryType.KNOWLEDGE
|
|
1580
|
+
|
|
1581
|
+
def _load_any_type(self, memory_id: str) -> Optional[Dict]:
|
|
1582
|
+
guessed = self._guess_type(memory_id)
|
|
1583
|
+
data = self.store.load(guessed, memory_id)
|
|
1584
|
+
if data is not None:
|
|
1585
|
+
if "memory_type" not in data:
|
|
1586
|
+
data["memory_type"] = guessed.value
|
|
1587
|
+
return data
|
|
1588
|
+
for mtype in MemoryType:
|
|
1589
|
+
if mtype != guessed:
|
|
1590
|
+
data = self.store.load(mtype, memory_id)
|
|
1591
|
+
if data is not None:
|
|
1592
|
+
if "memory_type" not in data:
|
|
1593
|
+
data["memory_type"] = mtype.value
|
|
1594
|
+
return data
|
|
1595
|
+
return None
|
|
1596
|
+
|
|
1597
|
+
@staticmethod
|
|
1598
|
+
def _extract_tags(text: str) -> List[str]:
|
|
1599
|
+
words = re.findall(r'[\u4e00-\u9fff]{2,}|[a-zA-Z]{3,}', text)
|
|
1600
|
+
return list(set(words))[:10]
|
|
1601
|
+
|
|
1602
|
+
def shutdown(self) -> None:
|
|
1603
|
+
if self._mce_adapter:
|
|
1604
|
+
try:
|
|
1605
|
+
self._mce_adapter.shutdown()
|
|
1606
|
+
except Exception:
|
|
1607
|
+
pass
|