devsquad 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. devsquad-3.6.0.dist-info/METADATA +944 -0
  2. devsquad-3.6.0.dist-info/RECORD +95 -0
  3. devsquad-3.6.0.dist-info/WHEEL +5 -0
  4. devsquad-3.6.0.dist-info/entry_points.txt +2 -0
  5. devsquad-3.6.0.dist-info/licenses/LICENSE +21 -0
  6. devsquad-3.6.0.dist-info/top_level.txt +2 -0
  7. scripts/__init__.py +0 -0
  8. scripts/ai_semantic_matcher.py +512 -0
  9. scripts/alert_manager.py +505 -0
  10. scripts/api/__init__.py +43 -0
  11. scripts/api/models.py +386 -0
  12. scripts/api/routes/__init__.py +20 -0
  13. scripts/api/routes/dispatch.py +348 -0
  14. scripts/api/routes/lifecycle.py +330 -0
  15. scripts/api/routes/metrics_gates.py +347 -0
  16. scripts/api_server.py +318 -0
  17. scripts/auth.py +451 -0
  18. scripts/cli/__init__.py +1 -0
  19. scripts/cli/cli_visual.py +642 -0
  20. scripts/cli.py +1094 -0
  21. scripts/collaboration/__init__.py +212 -0
  22. scripts/collaboration/_version.py +1 -0
  23. scripts/collaboration/agent_briefing.py +656 -0
  24. scripts/collaboration/ai_semantic_matcher.py +260 -0
  25. scripts/collaboration/anchor_checker.py +281 -0
  26. scripts/collaboration/anti_rationalization.py +470 -0
  27. scripts/collaboration/async_integration_example.py +255 -0
  28. scripts/collaboration/batch_scheduler.py +149 -0
  29. scripts/collaboration/checkpoint_manager.py +561 -0
  30. scripts/collaboration/ci_feedback_adapter.py +351 -0
  31. scripts/collaboration/code_map_generator.py +247 -0
  32. scripts/collaboration/concern_pack_loader.py +352 -0
  33. scripts/collaboration/confidence_score.py +496 -0
  34. scripts/collaboration/config_loader.py +188 -0
  35. scripts/collaboration/consensus.py +244 -0
  36. scripts/collaboration/context_compressor.py +533 -0
  37. scripts/collaboration/coordinator.py +668 -0
  38. scripts/collaboration/dispatcher.py +1636 -0
  39. scripts/collaboration/dual_layer_context.py +128 -0
  40. scripts/collaboration/enhanced_worker.py +539 -0
  41. scripts/collaboration/feature_usage_tracker.py +206 -0
  42. scripts/collaboration/five_axis_consensus.py +334 -0
  43. scripts/collaboration/input_validator.py +401 -0
  44. scripts/collaboration/integration_example.py +287 -0
  45. scripts/collaboration/intent_workflow_mapper.py +350 -0
  46. scripts/collaboration/language_parsers.py +269 -0
  47. scripts/collaboration/lifecycle_protocol.py +1446 -0
  48. scripts/collaboration/llm_backend.py +453 -0
  49. scripts/collaboration/llm_cache.py +448 -0
  50. scripts/collaboration/llm_cache_async.py +347 -0
  51. scripts/collaboration/llm_retry.py +387 -0
  52. scripts/collaboration/llm_retry_async.py +389 -0
  53. scripts/collaboration/mce_adapter.py +597 -0
  54. scripts/collaboration/memory_bridge.py +1607 -0
  55. scripts/collaboration/models.py +537 -0
  56. scripts/collaboration/null_providers.py +297 -0
  57. scripts/collaboration/operation_classifier.py +289 -0
  58. scripts/collaboration/output_slicer.py +225 -0
  59. scripts/collaboration/performance_monitor.py +462 -0
  60. scripts/collaboration/permission_guard.py +865 -0
  61. scripts/collaboration/prompt_assembler.py +756 -0
  62. scripts/collaboration/prompt_variant_generator.py +483 -0
  63. scripts/collaboration/protocols.py +267 -0
  64. scripts/collaboration/report_formatter.py +352 -0
  65. scripts/collaboration/retrospective.py +279 -0
  66. scripts/collaboration/role_matcher.py +92 -0
  67. scripts/collaboration/role_template_market.py +352 -0
  68. scripts/collaboration/rule_collector.py +678 -0
  69. scripts/collaboration/scratchpad.py +346 -0
  70. scripts/collaboration/skill_registry.py +151 -0
  71. scripts/collaboration/skillifier.py +878 -0
  72. scripts/collaboration/standardized_role_template.py +317 -0
  73. scripts/collaboration/task_completion_checker.py +237 -0
  74. scripts/collaboration/test_quality_guard.py +695 -0
  75. scripts/collaboration/unified_gate_engine.py +598 -0
  76. scripts/collaboration/usage_tracker.py +309 -0
  77. scripts/collaboration/user_friendly_error.py +176 -0
  78. scripts/collaboration/verification_gate.py +312 -0
  79. scripts/collaboration/warmup_manager.py +635 -0
  80. scripts/collaboration/worker.py +513 -0
  81. scripts/collaboration/workflow_engine.py +684 -0
  82. scripts/dashboard.py +1088 -0
  83. scripts/generate_benchmark_report.py +786 -0
  84. scripts/history_manager.py +604 -0
  85. scripts/mcp_server.py +289 -0
  86. skills/__init__.py +32 -0
  87. skills/dispatch/handler.py +52 -0
  88. skills/intent/handler.py +59 -0
  89. skills/registry.py +67 -0
  90. skills/retrospective/__init__.py +0 -0
  91. skills/retrospective/handler.py +125 -0
  92. skills/review/handler.py +356 -0
  93. skills/security/handler.py +454 -0
  94. skills/test/__init__.py +0 -0
  95. skills/test/handler.py +78 -0
@@ -0,0 +1,1607 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ MemoryBridge - 记忆桥接系统
5
+
6
+ 将协作系统(Coordinator/Skillifier/Scratchpad)与持久记忆层(memory-bank)连接,
7
+ 实现跨会话的知识复用、经验捕获、反馈闭环和模式持久化。
8
+
9
+ 核心能力:
10
+ - recall(): 任务前召回相关历史经验
11
+ - capture_execution(): 执行后自动捕获洞察
12
+ - record_feedback(): 用户反馈记录
13
+ - persist_pattern(): Skillifier 模式跨会话保留
14
+ - search_knowledge(): 知识库关键词搜索
15
+ - 生命周期: 遗忘曲线 / 自动压缩 / 清理
16
+
17
+ 使用示例:
18
+ from collaboration.memory_bridge import MemoryBridge, MemoryConfig
19
+
20
+ bridge = MemoryBridge(config=MemoryConfig.default())
21
+ result = bridge.recall(MemoryQuery(query_text="微服务架构设计"))
22
+ for mem in result.memories:
23
+ print(f"[{mem.memory_type.value}] {mem.title}: {mem.content[:80]}")
24
+ """
25
+
26
+ import os
27
+ import re
28
+ import json
29
+ import math
30
+ import time
31
+ import uuid
32
+ import threading
33
+ from enum import Enum
34
+ from abc import ABC, abstractmethod
35
+ from collections import Counter
36
+ from dataclasses import dataclass, field
37
+ from datetime import datetime, timedelta
38
+ from pathlib import Path
39
+ from typing import Any, Callable, Dict, List, Optional, Tuple
40
+
41
+
42
+ class MemoryType(Enum):
43
+ KNOWLEDGE = "knowledge"
44
+ EPISODIC = "episodic"
45
+ SEMANTIC = "semantic"
46
+ FEEDBACK = "feedback"
47
+ PATTERN = "pattern"
48
+ ANALYSIS = "analysis"
49
+ CORRECTION = "correction"
50
+
51
+
52
+ @dataclass
53
+ class MemoryItem:
54
+ id: str
55
+ memory_type: MemoryType
56
+ title: str
57
+ content: str
58
+ domain: Optional[str] = None
59
+ tags: List[str] = field(default_factory=list)
60
+ source: str = ""
61
+ relevance_score: float = 0.0
62
+ created_at: datetime = field(default_factory=datetime.now)
63
+ last_accessed: datetime = field(default_factory=datetime.now)
64
+ access_count: int = 0
65
+ metadata: Dict[str, Any] = field(default_factory=dict)
66
+
67
+ @property
68
+ def age_days(self) -> float:
69
+ return (datetime.now() - self.created_at).total_seconds() / 86400
70
+
71
+ def to_dict(self) -> Dict:
72
+ return {
73
+ "id": self.id,
74
+ "memory_type": self.memory_type.value,
75
+ "title": self.title,
76
+ "content": self.content,
77
+ "domain": self.domain,
78
+ "tags": self.tags,
79
+ "source": self.source,
80
+ "relevance_score": self.relevance_score,
81
+ "created_at": self.created_at.isoformat(),
82
+ "last_accessed": self.last_accessed.isoformat(),
83
+ "access_count": self.access_count,
84
+ "metadata": self.metadata,
85
+ }
86
+
87
+ @classmethod
88
+ def from_dict(cls, d: Dict) -> 'MemoryItem':
89
+ return cls(
90
+ id=d["id"],
91
+ memory_type=MemoryType(d["memory_type"]),
92
+ title=d["title"],
93
+ content=d["content"],
94
+ domain=d.get("domain"),
95
+ tags=d.get("tags", []),
96
+ source=d.get("source", ""),
97
+ relevance_score=d.get("relevance_score", 0.0),
98
+ created_at=datetime.fromisoformat(d["created_at"]) if isinstance(d.get("created_at"), str) else datetime.now(),
99
+ last_accessed=datetime.fromisoformat(d["last_accessed"]) if isinstance(d.get("last_accessed"), str) else datetime.now(),
100
+ access_count=d.get("access_count", 0),
101
+ metadata=d.get("metadata", {}),
102
+ )
103
+
104
+
105
+ @dataclass
106
+ class MemoryQuery:
107
+ query_text: str = ""
108
+ domain: Optional[str] = None
109
+ memory_type: Optional[MemoryType] = None
110
+ limit: int = 5
111
+ min_relevance: float = 0.3
112
+ time_range: Optional[Tuple[datetime, datetime]] = None
113
+
114
+
115
+ @dataclass
116
+ class MemoryRecallResult:
117
+ memories: List[MemoryItem] = field(default_factory=list)
118
+ total_found: int = 0
119
+ query_time_ms: float = 0.0
120
+ hit_memory_types: Dict[str, int] = field(default_factory=dict)
121
+
122
+
123
+ @dataclass
124
+ class MemoryConfig:
125
+ enabled: bool = True
126
+ base_dir: Optional[str] = None
127
+ auto_capture: bool = True
128
+ auto_index: bool = True
129
+ max_episodic_memories: int = 1000
130
+ max_knowledge_items: int = 5000
131
+ index_rebuild_threshold: int = 50
132
+ relevance_threshold: float = 0.3
133
+ retention_days: int = 90
134
+ compress_old_memories: bool = True
135
+ enable_semantic_search: bool = False
136
+
137
+ @classmethod
138
+ def default(cls) -> 'MemoryConfig':
139
+ return cls()
140
+
141
+ @classmethod
142
+ def lightweight(cls) -> 'MemoryConfig':
143
+ return cls(auto_capture=False, auto_index=False,
144
+ max_episodic_memories=100)
145
+
146
+ @classmethod
147
+ def full(cls) -> 'MemoryConfig':
148
+ return cls(max_episodic_memories=5000,
149
+ max_knowledge_items=20000,
150
+ enable_semantic_search=True)
151
+
152
+
153
+ @dataclass
154
+ class MemoryStats:
155
+ total_memories: int = 0
156
+ by_type_counts: Dict[str, int] = field(default_factory=dict)
157
+ oldest_memory: Optional[str] = None
158
+ newest_memory: Optional[str] = None
159
+ storage_size_kb: float = 0.0
160
+ index_built: bool = False
161
+ last_index_time: Optional[str] = None
162
+ total_captures: int = 0
163
+ total_recalls: int = 0
164
+ claw_enabled: bool = False
165
+ claw_item_count: int = 0
166
+
167
+
168
+ @dataclass
169
+ class KnowledgeItem:
170
+ id: str
171
+ domain: str
172
+ title: str
173
+ content: str
174
+ tags: List[str] = field(default_factory=list)
175
+ created_at: str = ""
176
+ source: str = ""
177
+
178
+
179
+ @dataclass
180
+ class UserFeedback:
181
+ id: str
182
+ user_id: str = "default"
183
+ feedback_type: str = "suggestion"
184
+ content: str = ""
185
+ rating: Optional[int] = None
186
+ context: Dict[str, Any] = field(default_factory=dict)
187
+ created_at: str = ""
188
+ status: str = "pending"
189
+
190
+
191
+ @dataclass
192
+ class EpisodicMemory:
193
+ id: str
194
+ task_description: str
195
+ finding: str
196
+ worker_id: str = ""
197
+ confidence: float = 0.0
198
+ tags: List[str] = field(default_factory=list)
199
+ created_at: str = ""
200
+
201
+
202
+ @dataclass
203
+ class PersistedPattern:
204
+ id: str
205
+ name: str
206
+ slug: str
207
+ category: str
208
+ trigger_keywords: List[str] = field(default_factory=list)
209
+ steps_template: List[Dict] = field(default_factory=list)
210
+ confidence: float = 0.0
211
+ quality_score: float = 0.0
212
+ created_at: str = ""
213
+
214
+
215
+ @dataclass
216
+ class AnalysisCase:
217
+ id: str
218
+ problem: str
219
+ context: Dict[str, Any] = field(default_factory=dict)
220
+ root_cause: str = ""
221
+ solutions: List[str] = field(default_factory=list)
222
+ status: str = "completed"
223
+ created_at: str = ""
224
+
225
+
226
+ @dataclass
227
+ class ErrorContext:
228
+ error_message: str
229
+ task_description: str = ""
230
+ worker_id: str = ""
231
+ stack_trace: str = ""
232
+ timestamp: str = ""
233
+
234
+
235
+ class WorkBuddyClawSource:
236
+ """
237
+ Read-only bridge for WorkBuddy (Claw) memory data source.
238
+
239
+ Reads structured memory files from /Users/lin/WorkBuddy/Claw/.memory/
240
+ and .workbuddy/memory/ directories, converting them into standard
241
+ MemoryItem lists.
242
+
243
+ Data mapping rules:
244
+ .memory/SOUL.md -> MemoryType.SEMANTIC (personality matrix)
245
+ .memory/USER.md -> MemoryType.KNOWLEDGE (user profile)
246
+ .memory/MEMORY.md -> MemoryType.KNOWLEDGE (core knowledge)
247
+ .memory/INDEX.md -> used for retrieval acceleration (not returned directly)
248
+ .memory/PROMPT.md -> MemoryType.PATTERN (prompt optimization rules)
249
+ .memory/EXP.md -> MemoryType.EPISODIC (experience system)
250
+
251
+ Design constraints:
252
+ - Read-only access, never writes to Claw directory
253
+ - Path hardcoded to /Users/lin/WorkBuddy/Claw (overridable via constructor)
254
+ - Caches INDEX.md parsing results to avoid repeated IO
255
+ - All exceptions caught internally, never affects main flow
256
+ """
257
+
258
+ CLAW_BASE_PATH = os.environ.get("WORKBUDDY_CLAW_PATH", "/Users/lin/WorkBuddy/Claw")
259
+ MEMORY_DIR = ".memory"
260
+ WORKBUDDY_MEMORY_DIR = ".workbuddy/memory"
261
+
262
+ CORE_FILE_MAPPING = {
263
+ "SOUL.md": ("AI Personality Matrix (OCEAN model)", MemoryType.SEMANTIC),
264
+ "USER.md": ("User Profile (background/preferences/channels)", MemoryType.KNOWLEDGE),
265
+ "MEMORY.md": ("Core Knowledge Base (lessons/decisions)", MemoryType.KNOWLEDGE),
266
+ "EXP.md": ("Experience System", MemoryType.EPISODIC),
267
+ "PROMPT.md": ("Prompt Optimization Rules", MemoryType.PATTERN),
268
+ "HEALTH.md": ("Health Monitoring Status", MemoryType.SEMANTIC),
269
+ }
270
+
271
+ def __init__(self, base_path: Optional[str] = None):
272
+ """
273
+ Initialize the Claw source with optional custom base path.
274
+
275
+ Args:
276
+ base_path: Custom path to Claw directory. Defaults to CLAW_BASE_PATH.
277
+ """
278
+ self.base_path = Path(base_path or self.CLAW_BASE_PATH)
279
+ self._memory_dir = self.base_path / self.MEMORY_DIR
280
+ self._wb_memory_dir = self.base_path / self.WORKBUDDY_MEMORY_DIR
281
+ self._index_cache: Optional[Dict[str, List[str]]] = None
282
+
283
+ @property
284
+ def is_available(self) -> bool:
285
+ """Check if the Claw directory exists and is accessible."""
286
+ return self.base_path.exists() and self._memory_dir.exists()
287
+
288
+ def load_all_memories(self) -> List[MemoryItem]:
289
+ """
290
+ Load all available memories from Claw directories.
291
+
292
+ Returns:
293
+ List[MemoryItem]: Combined list of core + daily memories,
294
+ each tagged with source='workbuddy-claw'.
295
+ """
296
+ items = []
297
+ if not self.is_available:
298
+ return items
299
+ items.extend(self._load_core_memories())
300
+ items.extend(self._load_workbuddy_daily_memories())
301
+ for item in items:
302
+ item.source = "workbuddy-claw"
303
+ return items
304
+
305
+ def _load_core_memories(self) -> List[MemoryItem]:
306
+ """Load core memory files from .memory/ directory."""
307
+ items = []
308
+ for filename, (title, mtype) in self.CORE_FILE_MAPPING.items():
309
+ filepath = self._memory_dir / filename
310
+ if filepath.exists():
311
+ content = filepath.read_text(encoding="utf-8")
312
+ items.append(MemoryItem(
313
+ id=f"wb-core-{filename.replace('.md', '')}",
314
+ memory_type=mtype,
315
+ title=title,
316
+ content=content,
317
+ domain="user-profile" if "USER" in filename else "claw-core",
318
+ tags=self._extract_tags(content),
319
+ source="workbuddy-claw",
320
+ ))
321
+ return items
322
+
323
+ def _load_workbuddy_daily_memories(self) -> List[MemoryItem]:
324
+ """Load daily work memories from .workbuddy/memory/ directory."""
325
+ items = []
326
+ if not self._wb_memory_dir.exists():
327
+ return items
328
+
329
+ md_files = sorted(
330
+ self._wb_memory_dir.glob("2026-*.md"),
331
+ key=lambda p: p.name,
332
+ reverse=True,
333
+ )
334
+ for filepath in md_files[:30]:
335
+ date_str = filepath.stem
336
+ content = filepath.read_text(encoding="utf-8")
337
+ items.append(MemoryItem(
338
+ id=f"wb-daily-{date_str}",
339
+ memory_type=MemoryType.EPISODIC,
340
+ title=f"Work Log {date_str}",
341
+ content=content,
342
+ domain="daily-log",
343
+ tags=["workbuddy", "daily", date_str] + self._extract_tags(content),
344
+ source="workbuddy-claw",
345
+ ))
346
+ return items
347
+
348
+ def search_by_index(self, query: str, limit: int = 5) -> List[MemoryItem]:
349
+ """
350
+ Fast search using Claw INDEX.md keyword inverted index.
351
+
352
+ INDEX.md format example:
353
+ | Keyword | Location |
354
+ | Fudan/Education | USER.md#Background |
355
+ | QQ/WeChat | USER.md#Channels |
356
+
357
+ Performance:
358
+ - Index hit: O(1) lookup + 1 file read
359
+ - Index miss: fallback to full-text scan
360
+
361
+ Args:
362
+ query: Search query string.
363
+ limit: Maximum number of results to return.
364
+
365
+ Returns:
366
+ List[MemoryItem]: Matched memories sorted by relevance.
367
+ """
368
+ index_path = self._memory_dir / "INDEX.md"
369
+ if not index_path.exists():
370
+ return self._fallback_search(query, limit)
371
+
372
+ if self._index_cache is None:
373
+ self._index_cache = self._parse_index(index_path)
374
+
375
+ query_tokens = set(query.lower().split())
376
+ matched_files = set()
377
+ for token in query_tokens:
378
+ if token in self._index_cache:
379
+ for entry in self._index_cache[token]:
380
+ matched_files.add(entry)
381
+
382
+ results = []
383
+ for file_ref in list(matched_files)[:limit]:
384
+ item = self._load_memory_by_index_ref(file_ref)
385
+ if item:
386
+ results.append(item)
387
+ return results
388
+
389
+ def _parse_index(self, index_path: Path) -> Dict[str, List[str]]:
390
+ """
391
+ Parse INDEX.md table into {keyword: [file_ref]} dictionary.
392
+
393
+ Args:
394
+ index_path: Path to INDEX.md file.
395
+
396
+ Returns:
397
+ Dict mapping lowercase keywords to lists of file references.
398
+ """
399
+ result: Dict[str, List[str]] = {}
400
+ lines = index_path.read_text(encoding="utf-8").splitlines()
401
+ for line in lines:
402
+ line = line.strip()
403
+ if not line or line.startswith("#") or line.startswith("|---"):
404
+ continue
405
+ if line.startswith("|"):
406
+ parts = [p.strip() for p in line.split("|") if p.strip()]
407
+ if len(parts) >= 2 and parts[0] and parts[0] != "\u5173\u952e\u8bcd":
408
+ keywords = parts[0]
409
+ file_ref = parts[1] if len(parts) > 1 else ""
410
+ if file_ref and file_ref != "\u4f4d\u7f6e":
411
+ for kw in keywords.split("/"):
412
+ kw = kw.strip().lower()
413
+ if kw:
414
+ result.setdefault(kw, []).append(file_ref)
415
+ return result
416
+
417
+ def _load_memory_by_index_ref(self, ref: str) -> Optional[MemoryItem]:
418
+ """
419
+ Load a memory fragment based on an INDEX reference.
420
+
421
+ Supports both 'filename.md' and 'filename.md#section' formats.
422
+
423
+ Args:
424
+ ref: File reference from INDEX.md (e.g., USER.md#Background).
425
+
426
+ Returns:
427
+ MemoryItem for the referenced content, or None if not found.
428
+ """
429
+ if "#" in ref:
430
+ filename, section = ref.split("#", 1)
431
+ else:
432
+ filename, section = ref, None
433
+
434
+ filepath = self._memory_dir / filename
435
+ if not filepath.exists():
436
+ return None
437
+
438
+ content = filepath.read_text(encoding="utf-8")
439
+ if section:
440
+ extracted = self._extract_section(content, section)
441
+ content = extracted if extracted is not None else content[:500]
442
+
443
+ type_map = {
444
+ "SOUL": MemoryType.SEMANTIC,
445
+ "USER": MemoryType.KNOWLEDGE,
446
+ "MEMORY": MemoryType.KNOWLEDGE,
447
+ "EXP": MemoryType.EPISODIC,
448
+ "PROMPT": MemoryType.PATTERN,
449
+ }
450
+ mtype = next((t for k, t in type_map.items() if k in filename.upper()), MemoryType.KNOWLEDGE)
451
+
452
+ return MemoryItem(
453
+ id=f"wb-index-{filename.replace('.md', '').replace('/', '-')}",
454
+ memory_type=mtype,
455
+ title=f"[Claw] {ref}",
456
+ content=content,
457
+ source="workbuddy-claw",
458
+ relevance_score=0.9,
459
+ )
460
+
461
+ @staticmethod
462
+ def _extract_section(content: str, anchor: str) -> Optional[str]:
463
+ """
464
+ Extract a markdown section by its heading anchor text.
465
+
466
+ Args:
467
+ content: Full markdown text to search in.
468
+ anchor: Section heading text to find.
469
+
470
+ Returns:
471
+ Extracted section text, or None if anchor not found.
472
+ """
473
+ pattern = rf'(?:^|\n)#+\s*.*{re.escape(anchor)}'
474
+ match = re.search(pattern, content, re.MULTILINE | re.IGNORECASE)
475
+ if not match:
476
+ return None
477
+ start = match.start()
478
+ next_heading = re.search(r'\n#+\s+', content[start + 1:])
479
+ end = (next_heading.start() + start + 1) if next_heading else len(content)
480
+ return content[start:end].strip()
481
+
482
+ @staticmethod
483
+ def _extract_tags(text: str) -> List[str]:
484
+ """
485
+ Extract meaningful words as tags from text content.
486
+
487
+ Extracts Chinese words (>=2 chars) and English words (>=3 chars).
488
+
489
+ Args:
490
+ text: Source text to extract tags from.
491
+
492
+ Returns:
493
+ List of unique tag strings (max 15).
494
+ """
495
+ words = re.findall(r'[\u4e00-\u9fff]{2,}|[a-zA-Z]{3,}', text)
496
+ return list(set(words))[:15]
497
+
498
+ def _fallback_search(self, query: str, limit: int = 5) -> List[MemoryItem]:
499
+ """
500
+ Fallback full-text search when INDEX.md is unavailable.
501
+
502
+ Scores results by title match (+0.5), content match (+0.3),
503
+ and tag overlap (+0.2).
504
+
505
+ Args:
506
+ query: Search query string.
507
+ limit: Maximum results.
508
+
509
+ Returns:
510
+ Scored and ranked MemoryItem list.
511
+ """
512
+ all_items = self.load_all_memories()
513
+ query_lower = query.lower()
514
+ scored = []
515
+ for item in all_items:
516
+ score = 0.0
517
+ if query_lower in item.title.lower():
518
+ score += 0.5
519
+ if query_lower in item.content.lower():
520
+ score += 0.3
521
+ if any(q in t.lower() for q in query_lower.split() for t in item.tags):
522
+ score += 0.2
523
+ if score > 0:
524
+ item.relevance_score = min(score, 1.0)
525
+ scored.append(item)
526
+ scored.sort(key=lambda x: x.relevance_score, reverse=True)
527
+ return scored[:limit]
528
+
529
+ # ========== Plan B: Automation News Feed Consumer ==========
530
+
531
+ def get_latest_ai_news(self, days: int = 7) -> List[MemoryItem]:
532
+ """
533
+ Read daily AI news automation task execution records.
534
+
535
+ Data source: .codebuddy/automations/ai/memory.md
536
+ Returns: Recent N days of news entries, each date block as a MemoryItem.
537
+
538
+ Each MemoryItem.metadata contains:
539
+ - sources: List of information sources
540
+ - topics: List of core topics
541
+ - status: Execution status string
542
+
543
+ Args:
544
+ days: Number of days to look back (default 7).
545
+
546
+ Returns:
547
+ List of MemoryItems representing AI news entries.
548
+ """
549
+ ai_memory_path = self.base_path / ".codebuddy" / "automations" / "ai" / "memory.md"
550
+ if not ai_memory_path.exists():
551
+ return []
552
+
553
+ content = ai_memory_path.read_text(encoding="utf-8")
554
+ entries = self._parse_automation_log(content)
555
+
556
+ items = []
557
+ cutoff = datetime.now() - timedelta(days=days)
558
+ for entry in entries:
559
+ if entry["date"] >= cutoff:
560
+ items.append(MemoryItem(
561
+ id=f"wb-news-{entry['date'].strftime('%Y%m%d')}",
562
+ memory_type=MemoryType.EPISODIC,
563
+ title=f"AI News {entry['date'].strftime('%Y-%m-%d')}",
564
+ content=entry["content"],
565
+ domain="ai-news",
566
+ tags=["ai-news", "daily-push", "automation"] + self._extract_tags(entry["content"]),
567
+ source="workbuddy-claw-automation",
568
+ metadata={
569
+ "sources": entry.get("sources", []),
570
+ "core_topics": entry.get("topics", []),
571
+ "status": entry.get("status", ""),
572
+ },
573
+ ))
574
+ return items
575
+
576
+ def _parse_automation_log(self, content: str) -> List[Dict]:
577
+ """
578
+ Parse automation memory.md log format into structured entries.
579
+
580
+ Input format:
581
+ ## YYYY-MM-DD HH:MM
582
+ **Status**: Success
583
+ **Sources**: source1, source2
584
+ **Push Count**: N
585
+ **Core Topics**:
586
+ - topic1
587
+ - topic2
588
+ **Notes**: additional notes
589
+
590
+ Output:
591
+ [{date: datetime, content: str, sources: [], topics: [], status: str}, ...]
592
+
593
+ Args:
594
+ content: Raw markdown content from automation memory.md.
595
+
596
+ Returns:
597
+ List of parsed entry dictionaries.
598
+ """
599
+ entries = []
600
+ date_pattern = re.compile(r'^## (\d{4}-\d{2}-\d{2})')
601
+ current_entry = None
602
+
603
+ for line in content.splitlines():
604
+ date_match = date_pattern.match(line)
605
+ if date_match:
606
+ if current_entry:
607
+ entries.append(current_entry)
608
+ try:
609
+ current_entry = {
610
+ "date": datetime.strptime(date_match.group(1), "%Y-%m-%d"),
611
+ "content": "",
612
+ "sources": [],
613
+ "topics": [],
614
+ "status": "",
615
+ }
616
+ except ValueError:
617
+ continue
618
+ elif current_entry is not None:
619
+ current_entry["content"] += line + "\n"
620
+
621
+ src_match = re.match(r'\*\*\u4fe1\u606f\u6765\u6e90\*\*:\s*(.+)', line)
622
+ if src_match:
623
+ current_entry["sources"].append(src_match.group(1))
624
+
625
+ topics_match = re.match(r'\*\*\u6838\u5fc3\u4e3b\u9898\*\*:\s*(.+)', line)
626
+ if topics_match:
627
+ current_entry["topics"].append(topics_match.group(1))
628
+
629
+ status_match = re.match(r'\*\*\u6267\u884c\u72b6\u6001\*\*:\s*(\S+)', line)
630
+ if status_match:
631
+ current_entry["status"] = status_match.group(1)
632
+
633
+ if current_entry:
634
+ entries.append(current_entry)
635
+
636
+ return entries
637
+
638
+
639
+ class MemoryStore(ABC):
640
+ @abstractmethod
641
+ def save(self, memory_type: MemoryType, data: Dict) -> str:
642
+ pass
643
+
644
+ @abstractmethod
645
+ def load(self, memory_type: MemoryType, item_id: str) -> Optional[Dict]:
646
+ pass
647
+
648
+ @abstractmethod
649
+ def list_all(self, memory_type: MemoryType,
650
+ filters: Optional[Dict] = None) -> List[Dict]:
651
+ pass
652
+
653
+ @abstractmethod
654
+ def delete(self, memory_type: MemoryType, item_id: str) -> bool:
655
+ pass
656
+
657
+
658
+ class JsonMemoryStore(MemoryStore):
659
+ def __init__(self, base_dir: str):
660
+ self.base_dir = Path(base_dir)
661
+ self._lock = threading.RLock()
662
+ self._type_dirs = {
663
+ MemoryType.KNOWLEDGE: self.base_dir / "knowledge_base" / "domains",
664
+ MemoryType.FEEDBACK: self.base_dir / "user_experience" / "feedback",
665
+ MemoryType.PATTERN: self.base_dir / "persisted_patterns",
666
+ MemoryType.ANALYSIS: self.base_dir / "analysis_cases",
667
+ MemoryType.EPISODIC: self.base_dir / "episodic",
668
+ MemoryType.SEMANTIC: self.base_dir / "semantic",
669
+ MemoryType.CORRECTION: self.base_dir / "corrections",
670
+ }
671
+
672
+ def _get_file_path(self, mtype: MemoryType, item_id: str) -> Path:
673
+ if '..' in item_id or '/' in item_id or '\\' in item_id:
674
+ raise ValueError(f"Invalid item_id (path traversal): {item_id}")
675
+ dir_path = self._type_dirs.get(mtype, self.base_dir / "other")
676
+ if mtype == MemoryType.KNOWLEDGE:
677
+ domain = "general"
678
+ path = dir_path / domain / f"{item_id}.json"
679
+ else:
680
+ path = dir_path / f"{item_id}.json"
681
+ if not path.resolve().is_relative_to(self.base_dir.resolve()):
682
+ raise ValueError(f"Path traversal detected: {item_id}")
683
+ return path
684
+
685
+ def save(self, memory_type: MemoryType, data: Dict) -> str:
686
+ item_id = data.get("id", f"{memory_type.value}_{uuid.uuid4().hex[:12]}_{int(time.time())}")
687
+ file_path = self._get_file_path(memory_type, item_id)
688
+ with self._lock:
689
+ file_path.parent.mkdir(parents=True, exist_ok=True)
690
+ with open(file_path, "w", encoding="utf-8") as f:
691
+ json.dump(data, f, ensure_ascii=False, indent=2)
692
+ return item_id
693
+
694
+ def load(self, memory_type: MemoryType, item_id: str) -> Optional[Dict]:
695
+ file_path = self._get_file_path(memory_type, item_id)
696
+ with self._lock:
697
+ if not file_path.exists():
698
+ return None
699
+ try:
700
+ with open(file_path, "r", encoding="utf-8") as f:
701
+ return json.load(f)
702
+ except (json.JSONDecodeError, IOError):
703
+ return None
704
+
705
+ def list_all(self, memory_type: MemoryType,
706
+ filters: Optional[Dict] = None) -> List[Dict]:
707
+ results = []
708
+ dir_path = self._type_dirs.get(memory_type, self.base_dir / "other")
709
+ with self._lock:
710
+ if not dir_path.exists():
711
+ return results
712
+ pattern = "**/*.json"
713
+ for json_file in sorted(dir_path.glob(pattern)):
714
+ try:
715
+ with open(json_file, "r", encoding="utf-8") as f:
716
+ data = json.load(f)
717
+ if filters:
718
+ match = True
719
+ for k, v in filters.items():
720
+ if data.get(k) != v:
721
+ match = False
722
+ break
723
+ if not match:
724
+ continue
725
+ results.append(data)
726
+ except (json.JSONDecodeError, IOError):
727
+ continue
728
+ return results
729
+
730
+ def delete(self, memory_type: MemoryType, item_id: str) -> bool:
731
+ file_path = self._get_file_path(memory_type, item_id)
732
+ with self._lock:
733
+ if file_path.exists():
734
+ file_path.unlink()
735
+ return True
736
+ return False
737
+
738
+
739
+ class MemoryIndexer:
740
+ def __init__(self):
741
+ self._inverted_index: Dict[str, set] = {}
742
+ self._domain_index: Dict[str, set] = {}
743
+ self._tag_index: Dict[str, set] = {}
744
+ self._type_index: Dict[MemoryType, set] = {}
745
+ self._tf_cache: Dict[str, Counter] = {}
746
+ self._items_cache: Dict[str, MemoryItem] = {}
747
+ self._index_built: bool = False
748
+ self._write_count: int = 0
749
+ self._lock = threading.RLock()
750
+ self._doc_count: int = 0
751
+
752
+ def build_index(self, items: List[MemoryItem]) -> None:
753
+ with self._lock:
754
+ self._inverted_index.clear()
755
+ self._domain_index.clear()
756
+ self._tag_index.clear()
757
+ self._type_index.clear()
758
+ self._tf_cache.clear()
759
+ self._items_cache.clear()
760
+ self._doc_count = 0
761
+ for item in items:
762
+ self._add_to_index_internal(item)
763
+ self._index_built = True
764
+
765
+ def add_to_index(self, item: MemoryItem) -> None:
766
+ with self._lock:
767
+ self._add_to_index_internal(item)
768
+ self._write_count += 1
769
+ if self._write_count >= 50 and not self._index_built:
770
+ pass
771
+
772
+ def _add_to_index_internal(self, item: MemoryItem) -> None:
773
+ mid = item.id
774
+ self._items_cache[mid] = item
775
+ self._doc_count += 1
776
+ tokens = self._tokenize(item.title + " " + item.content)
777
+ self._tf_cache[mid] = Counter(tokens)
778
+ for token in set(tokens):
779
+ self._inverted_index.setdefault(token, set()).add(mid)
780
+ if item.domain:
781
+ self._domain_index.setdefault(item.domain, set()).add(mid)
782
+ for tag in item.tags:
783
+ self._tag_index.setdefault(tag, set()).add(mid)
784
+ self._type_index.setdefault(item.memory_type, set()).add(mid)
785
+
786
+ def remove_from_index(self, memory_id: str) -> None:
787
+ with self._lock:
788
+ item = self._items_cache.pop(memory_id, None)
789
+ if item is None:
790
+ return
791
+ self._doc_count -= 1
792
+ tokens = self._tokenize(item.title + " " + item.content)
793
+ for token in set(tokens):
794
+ ids = self._inverted_index.get(token)
795
+ if ids:
796
+ ids.discard(memory_id)
797
+ if not ids:
798
+ del self._inverted_index[token]
799
+ if item.domain:
800
+ ids = self._domain_index.get(item.domain)
801
+ if ids:
802
+ ids.discard(memory_id)
803
+ for tag in item.tags:
804
+ ids = self._tag_index.get(tag)
805
+ if ids:
806
+ ids.discard(memory_id)
807
+ type_set = self._type_index.get(item.memory_type)
808
+ if type_set:
809
+ type_set.discard(memory_id)
810
+ self._tf_cache.pop(memory_id, None)
811
+
812
+ def search(self, query_text: str,
813
+ type_filter: Optional[MemoryType] = None,
814
+ domain_filter: Optional[str] = None,
815
+ limit: int = 10) -> List[Tuple[str, float]]:
816
+ with self._lock:
817
+ if not self._index_built or not self._inverted_index:
818
+ return []
819
+ query_tokens = self._tokenize(query_text)
820
+ candidates: Dict[str, float] = {}
821
+ for token in query_tokens:
822
+ ids = self._inverted_index.get(token)
823
+ if ids:
824
+ for doc_id in ids:
825
+ candidates[doc_id] = candidates.get(doc_id, 0) + 1
826
+ if type_filter:
827
+ type_ids = self._type_index.get(type_filter, set())
828
+ candidates = {k: v for k, v in candidates.items() if k in type_ids}
829
+ if domain_filter:
830
+ dom_ids = self._domain_index.get(domain_filter, set())
831
+ candidates = {k: v for k, v in candidates.items() if k in dom_ids}
832
+ results = []
833
+ for doc_id, raw_score in candidates.items():
834
+ tfidf_score = self._compute_relevance(query_tokens, doc_id)
835
+ results.append((doc_id, tfidf_score))
836
+ results.sort(key=lambda x: x[1], reverse=True)
837
+ return results[:limit]
838
+
839
+ def keyword_search(self, keywords: List[str],
840
+ domain: Optional[str] = None) -> List[Tuple[str, float]]:
841
+ with self._lock:
842
+ if not keywords:
843
+ return []
844
+ candidate_sets = []
845
+ for kw in keywords:
846
+ tokens = self._tokenize(kw)
847
+ matching_ids = None
848
+ for t in tokens:
849
+ ids = self._inverted_index.get(t)
850
+ if ids is None:
851
+ ids = set()
852
+ if matching_ids is None:
853
+ matching_ids = set(ids)
854
+ else:
855
+ matching_ids &= set(ids)
856
+ if matching_ids is not None:
857
+ candidate_sets.append(matching_ids)
858
+ if not candidate_sets:
859
+ return []
860
+ final_candidates = candidate_sets[0]
861
+ for s in candidate_sets[1:]:
862
+ final_candidates &= s
863
+ if domain:
864
+ dom_ids = self._domain_index.get(domain, set())
865
+ final_candidates &= dom_ids
866
+ results = [(mid, 1.0) for mid in final_candidates]
867
+ results.sort(key=lambda x: x[1], reverse=True)
868
+ return results
869
+
870
+ def _compute_relevance(self, query_tokens: List[str], doc_id: str) -> float:
871
+ doc_tf = self._tf_cache.get(doc_id, Counter())
872
+ query_tf = Counter(query_tokens)
873
+ score = 0.0
874
+ for token in query_tokens:
875
+ if token in doc_tf:
876
+ idf = math.log((self._doc_count + 1) / (len(self._inverted_index.get(token, set())) + 1)) + 1
877
+ score += doc_tf[token] * idf
878
+ if score > 0:
879
+ doc_norm = math.sqrt(sum(v ** 2 for v in doc_tf.values()))
880
+ query_norm = math.sqrt(sum(v ** 2 for v in query_tf.values())) or 1
881
+ score = score / (doc_norm * query_norm)
882
+ return min(score, 1.0)
883
+
884
+ @staticmethod
885
+ def _tokenize(text: str) -> List[str]:
886
+ text = text.lower()
887
+ text = re.sub(r'[^\w\u4e00-\u9fff]', ' ', text)
888
+ tokens = text.split()
889
+ result = []
890
+ for t in tokens:
891
+ if len(t) <= 1:
892
+ result.append(t)
893
+ elif any('\u4e00' <= c <= '\u9fff' for c in t):
894
+ result.extend([c for c in t])
895
+ else:
896
+ if len(t) > 3:
897
+ for i in range(len(t) - 1):
898
+ result.append(t[i:i+2])
899
+ result.append(t)
900
+ return [t for t in result if len(t) >= 1]
901
+
902
+ @property
903
+ def is_built(self) -> bool:
904
+ return self._index_built
905
+
906
+ @property
907
+ def size(self) -> int:
908
+ return self._doc_count
909
+
910
+
911
+ class MemoryWriter:
912
+ def __init__(self, store: MemoryStore, indexer: Optional[MemoryIndexer] = None):
913
+ self.store = store
914
+ self.indexer = indexer
915
+ self._capture_count = 0
916
+
917
+ def write_knowledge(self, item: KnowledgeItem) -> str:
918
+ data = {
919
+ "id": item.id, "domain": item.domain, "title": item.title,
920
+ "content": item.content, "tags": item.tags,
921
+ "source": item.source, "created_at": item.created_at or datetime.now().isoformat(),
922
+ }
923
+ item_id = self.store.save(MemoryType.KNOWLEDGE, data)
924
+ if self.indexer:
925
+ mem_item = MemoryItem(
926
+ id=item_id, memory_type=MemoryType.KNOWLEDGE,
927
+ title=item.title, content=item.content,
928
+ domain=item.domain, tags=item.tags, source=item.source,
929
+ )
930
+ self.indexer.add_to_index(mem_item)
931
+ return item_id
932
+
933
+ def write_episodic(self, memory: EpisodicMemory) -> str:
934
+ data = {
935
+ "id": memory.id, "task_description": memory.task_description,
936
+ "finding": memory.finding, "worker_id": memory.worker_id,
937
+ "confidence": memory.confidence, "tags": memory.tags,
938
+ "created_at": memory.created_at or datetime.now().isoformat(),
939
+ }
940
+ item_id = self.store.save(MemoryType.EPISODIC, data)
941
+ if self.indexer:
942
+ mem_item = MemoryItem(
943
+ id=item_id, memory_type=MemoryType.EPISODIC,
944
+ title=memory.finding[:60], content=memory.finding,
945
+ tags=memory.tags, source=memory.worker_id,
946
+ metadata={"confidence": memory.confidence},
947
+ )
948
+ self.indexer.add_to_index(mem_item)
949
+ self._capture_count += 1
950
+ return item_id
951
+
952
+ def write_feedback(self, feedback: UserFeedback) -> str:
953
+ data = {
954
+ "id": feedback.id, "user_id": feedback.user_id,
955
+ "type": feedback.feedback_type, "content": feedback.content,
956
+ "rating": feedback.rating, "context": feedback.context,
957
+ "created_at": feedback.created_at or datetime.now().isoformat(),
958
+ "status": feedback.status,
959
+ }
960
+ item_id = self.store.save(MemoryType.FEEDBACK, data)
961
+ if self.indexer:
962
+ mem_item = MemoryItem(
963
+ id=item_id, memory_type=MemoryType.FEEDBACK,
964
+ title=f"[{feedback.feedback_type}] {feedback.content[:40]}",
965
+ content=feedback.content,
966
+ tags=[feedback.feedback_type],
967
+ metadata={"rating": feedback.rating},
968
+ )
969
+ self.indexer.add_to_index(mem_item)
970
+ return item_id
971
+
972
+ def write_pattern(self, pattern: PersistedPattern) -> str:
973
+ data = {
974
+ "id": pattern.id, "name": pattern.name, "slug": pattern.slug,
975
+ "category": pattern.category, "trigger_keywords": pattern.trigger_keywords,
976
+ "steps_template": pattern.steps_template,
977
+ "confidence": pattern.confidence, "quality_score": pattern.quality_score,
978
+ "created_at": pattern.created_at or datetime.now().isoformat(),
979
+ }
980
+ item_id = self.store.save(MemoryType.PATTERN, data)
981
+ if self.indexer:
982
+ mem_item = MemoryItem(
983
+ id=item_id, memory_type=MemoryType.PATTERN,
984
+ title=pattern.name, content=json.dumps(pattern.steps_template, ensure_ascii=False)[:500],
985
+ domain=pattern.category, tags=pattern.trigger_keywords,
986
+ metadata={"quality_score": pattern.quality_score, "confidence": pattern.confidence},
987
+ )
988
+ self.indexer.add_to_index(mem_item)
989
+ return item_id
990
+
991
+ def write_analysis(self, analysis: AnalysisCase) -> str:
992
+ data = {
993
+ "id": analysis.id, "problem": analysis.problem,
994
+ "context": analysis.context, "root_cause": analysis.root_cause,
995
+ "solutions": analysis.solutions, "status": analysis.status,
996
+ "created_at": analysis.created_at or datetime.now().isoformat(),
997
+ }
998
+ item_id = self.store.save(MemoryType.ANALYSIS, data)
999
+ if self.indexer:
1000
+ mem_item = MemoryItem(
1001
+ id=item_id, memory_type=MemoryType.ANALYSIS,
1002
+ title=analysis.problem[:60], content=analysis.root_cause,
1003
+ tags=self._extract_tags(analysis.problem),
1004
+ metadata={"solutions_count": len(analysis.solutions)},
1005
+ )
1006
+ self.indexer.add_to_index(mem_item)
1007
+ return item_id
1008
+
1009
+ def batch_write(self, items: List[MemoryItem]) -> int:
1010
+ success = 0
1011
+ for item in items:
1012
+ data = item.to_dict()
1013
+ try:
1014
+ self.store.save(item.memory_type, data)
1015
+ if self.indexer:
1016
+ self.indexer.add_to_index(item)
1017
+ success += 1
1018
+ except Exception:
1019
+ pass
1020
+ return success
1021
+
1022
+ @staticmethod
1023
+ def _extract_tags(text: str) -> List[str]:
1024
+ words = re.findall(r'[\u4e00-\u9fff]{2,}|[a-zA-Z]{3,}', text)
1025
+ return list(set(words))[:10]
1026
+
1027
+
1028
+ class MemoryReader:
1029
+ def __init__(self, store: MemoryStore):
1030
+ self.store = store
1031
+
1032
+ def read_knowledge(self, domain: Optional[str] = None) -> List[KnowledgeItem]:
1033
+ filters = {"domain": domain} if domain else None
1034
+ raw_list = self.store.list_all(MemoryType.KNOWLEDGE, filters)
1035
+ return [KnowledgeItem(
1036
+ id=r.get("id", ""), domain=r.get("domain", "general"),
1037
+ title=r.get("title", ""), content=r.get("content", ""),
1038
+ tags=r.get("tags", []), created_at=r.get("created_at", ""),
1039
+ source=r.get("source", ""),
1040
+ ) for r in raw_list]
1041
+
1042
+ def read_episodic(self, limit: int = 50,
1043
+ since: Optional[datetime] = None) -> List[EpisodicMemory]:
1044
+ raw_list = self.store.list_all(MemoryType.EPISODIC)
1045
+ if since:
1046
+ raw_list = [r for r in raw_list if r.get("created_at", "") >= since.isoformat()]
1047
+ raw_list = raw_list[:limit]
1048
+ return [EpisodicMemory(
1049
+ id=r.get("id", ""), task_description=r.get("task_description", ""),
1050
+ finding=r.get("finding", ""), worker_id=r.get("worker_id", ""),
1051
+ confidence=r.get("confidence", 0.0), tags=r.get("tags", []),
1052
+ created_at=r.get("created_at", ""),
1053
+ ) for r in raw_list]
1054
+
1055
+ def read_feedback(self, status: Optional[str] = None,
1056
+ feedback_type: Optional[str] = None) -> List[UserFeedback]:
1057
+ filters = {}
1058
+ if status:
1059
+ filters["status"] = status
1060
+ if feedback_type:
1061
+ filters["type"] = feedback_type
1062
+ raw_list = self.store.list_all(MemoryType.FEEDBACK, filters if filters else None)
1063
+ return [UserFeedback(
1064
+ id=r.get("id", ""), user_id=r.get("user_id", "default"),
1065
+ feedback_type=r.get("type", "suggestion"), content=r.get("content", ""),
1066
+ rating=r.get("rating"), context=r.get("context", {}),
1067
+ created_at=r.get("created_at", ""), status=r.get("status", "pending"),
1068
+ ) for r in raw_list]
1069
+
1070
+ def read_patterns(self, category: Optional[str] = None) -> List[PersistedPattern]:
1071
+ raw_list = self.store.list_all(MemoryType.PATTERN)
1072
+ if category:
1073
+ raw_list = [r for r in raw_list if r.get("category") == category]
1074
+ return [PersistedPattern(
1075
+ id=r.get("id", ""), name=r.get("name", ""), slug=r.get("slug", ""),
1076
+ category=r.get("category", ""), trigger_keywords=r.get("trigger_keywords", []),
1077
+ steps_template=r.get("steps_template", []),
1078
+ confidence=r.get("confidence", 0.0), quality_score=r.get("quality_score", 0.0),
1079
+ created_at=r.get("created_at", ""),
1080
+ ) for r in raw_list]
1081
+
1082
+ def read_analysis_cases(self, status: Optional[str] = None) -> List[AnalysisCase]:
1083
+ filters = {"status": status} if status else None
1084
+ raw_list = self.store.list_all(MemoryType.ANALYSIS, filters)
1085
+ return [AnalysisCase(
1086
+ id=r.get("id", ""), problem=r.get("problem", ""),
1087
+ context=r.get("context", {}), root_cause=r.get("root_cause", ""),
1088
+ solutions=r.get("solutions", []), status=r.get("status", "completed"),
1089
+ created_at=r.get("created_at", ""),
1090
+ ) for r in raw_list]
1091
+
1092
+
1093
+ class MemoryBridge:
1094
+ def __init__(self, base_dir: Optional[str] = None,
1095
+ config: Optional[MemoryConfig] = None,
1096
+ mce_adapter=None):
1097
+ """
1098
+ 初始化记忆桥接器
1099
+
1100
+ Args:
1101
+ base_dir: 记忆存储根目录 (默认: data/memory-bank)
1102
+ config: 记忆配置项 (MemoryConfig, 默认使用默认配置)
1103
+ mce_adapter: MCE 记忆分类引擎适配器 (可选, v3.2 集成)
1104
+ 传入后自动启用以下增强:
1105
+ - capture_execution(): 自动用 MCE 分类 scratchpad 内容,
1106
+ preference→FEEDBACK, decision→EPISODIC, fact→KNOWLEDGE
1107
+ - recall(): 自动用 MCE 对查询文本做意图分类并过滤结果
1108
+ - shutdown(): 联动关闭 MCE 连接
1109
+ """
1110
+ self.config = config or MemoryConfig.default()
1111
+ if base_dir is None:
1112
+ base_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'memory-bank')
1113
+ self.base_dir = os.path.abspath(base_dir)
1114
+ self.store: JsonMemoryStore = JsonMemoryStore(self.base_dir)
1115
+ self.indexer: MemoryIndexer = MemoryIndexer()
1116
+ self.writer: MemoryWriter = MemoryWriter(self.store, self.indexer)
1117
+ self.reader: MemoryReader = MemoryReader(self.store)
1118
+ self._stats = MemoryStats(total_captures=0, total_recalls=0)
1119
+ self._inner_lock = threading.RLock()
1120
+
1121
+ self._mce_adapter = mce_adapter
1122
+ self._mce_enabled = mce_adapter is not None and getattr(mce_adapter, 'is_available', False)
1123
+
1124
+ self._claw_source: Optional[WorkBuddyClawSource] = None
1125
+ self._claw_enabled = False
1126
+ try:
1127
+ self._claw_source = WorkBuddyClawSource()
1128
+ if self._claw_source.is_available:
1129
+ self._claw_enabled = True
1130
+ except Exception:
1131
+ pass
1132
+
1133
+ def recall(self, query: MemoryQuery) -> MemoryRecallResult:
1134
+ """
1135
+ [MCE 集成点 Phase B] 跨会话记忆召回
1136
+
1137
+ 当前行为: TF-IDF 全文检索 → 按相关性排序返回
1138
+ MCE 就绪后:
1139
+ 1. 先用 MCE 对 query.query_text 做意图分类
1140
+ → 确定用户要找什么类型的记忆 (user_preference/decision/correction)
1141
+ 2. 用分类结果设置 MemoryQuery.memory_type 过滤
1142
+ 3. 精确召回,噪声过滤率提升 60%+
1143
+ 4. 示例: recall("用户偏好") → MCE 分类为 user_preference
1144
+ → 只搜索 memory_type=FEEDBACK 的记忆
1145
+
1146
+ 接口预留: mce_engine 参数 (Optional[MemoryClassificationEngine])
1147
+ enable_mce_recall_filter: bool = False
1148
+ """
1149
+ start = time.perf_counter()
1150
+ self._stats.total_recalls += 1
1151
+ if not self.config.enabled or not query.query_text.strip():
1152
+ return MemoryRecallResult(
1153
+ query_time_ms=(time.perf_counter() - start) * 1000,
1154
+ )
1155
+
1156
+ effective_type_filter = query.memory_type
1157
+
1158
+ if self._mce_enabled and self._mce_adapter and not query.memory_type:
1159
+ try:
1160
+ mce_result = self._mce_adapter.classify(query.query_text, timeout_ms=300)
1161
+ if mce_result and mce_result.memory_type:
1162
+ type_mapping = {
1163
+ "preference": "FEEDBACK",
1164
+ "decision": "EPISODIC",
1165
+ "correction": "EPISODIC",
1166
+ "fact": "KNOWLEDGE",
1167
+ "task": "EPISODIC",
1168
+ }
1169
+ mapped_type = type_mapping.get(mce_result.memory_type.lower())
1170
+ if mapped_type:
1171
+ effective_type_filter = mapped_type
1172
+ except Exception:
1173
+ pass
1174
+
1175
+ claw_items: List[MemoryItem] = []
1176
+ if self._claw_enabled and self._claw_source:
1177
+ try:
1178
+ claw_items = self._claw_source.search_by_index(query.query_text, limit=query.limit // 2)
1179
+ except Exception:
1180
+ pass
1181
+
1182
+ search_results = self.indexer.search(
1183
+ query.query_text,
1184
+ type_filter=effective_type_filter,
1185
+ domain_filter=query.domain,
1186
+ limit=query.limit * 3,
1187
+ )
1188
+ memories = []
1189
+ hit_types: Dict[str, int] = {}
1190
+ for mid, score in search_results:
1191
+ if score < query.min_relevance:
1192
+ continue
1193
+ item_data = self._load_any_type(mid)
1194
+ if item_data is None:
1195
+ continue
1196
+ item = MemoryItem.from_dict(item_data)
1197
+ item.relevance_score = score
1198
+ item.last_accessed = datetime.now()
1199
+ item.access_count += 1
1200
+ memories.append(item)
1201
+ mt = item.memory_type.value
1202
+ hit_types[mt] = hit_types.get(mt, 0) + 1
1203
+ if len(memories) >= query.limit:
1204
+ break
1205
+ elapsed = (time.perf_counter() - start) * 1000
1206
+ if claw_items:
1207
+ for ci in claw_items:
1208
+ ci.last_accessed = datetime.now()
1209
+ memories.append(ci)
1210
+ mt = ci.memory_type.value
1211
+ hit_types[mt] = hit_types.get(mt, 0) + 1
1212
+ memories.sort(key=lambda x: x.relevance_score, reverse=True)
1213
+ memories = memories[:query.limit]
1214
+ return MemoryRecallResult(
1215
+ memories=memories,
1216
+ total_found=len(memories),
1217
+ query_time_ms=elapsed,
1218
+ hit_memory_types=hit_types,
1219
+ )
1220
+
1221
+ def capture_execution(self, execution_record=None,
1222
+ scratchpad_entries=None) -> Optional[str]:
1223
+ """
1224
+ [MCE 集成点 Phase A] Worker 执行结果 → 记忆沉淀
1225
+
1226
+ 当前行为: 手动判断 entry_type=="FINDING" → 存为 EPISODIC 类型
1227
+ MCE 就绪后:
1228
+ 1. 将 scratchpad_entry.content 传入 MCE.process_message()
1229
+ 2. 用返回的 type/correction/preference/decision 标签替代手动类型推断
1230
+ 3. 用 MCE 的 confidence 替代默认 0.8
1231
+ 4. 示例: "我选择了方案B因为A太复杂了"
1232
+ → MCE 返回 {type: correction, conf: 0.89, tier: episodic}
1233
+ → MemoryBridge 直接用此分类写入,无需 AI 猜测
1234
+
1235
+ 接口预留: mce_engine 参数 (Optional[MemoryClassificationEngine])
1236
+ enable_mce_classify: bool = False (配置开关)
1237
+ """
1238
+ if not self.config.auto_capture or scratchpad_entries is None:
1239
+ return None
1240
+ captured_id = None
1241
+ for entry in scratchpad_entries:
1242
+ entry_type = getattr(entry, 'entry_type', None)
1243
+ entry_type_val = entry_type.value if hasattr(entry_type, 'value') else str(entry_type)
1244
+ if entry_type_val != "FINDING":
1245
+ continue
1246
+ confidence = getattr(entry, 'confidence', 0.8) or 0.8
1247
+ if confidence < 0.7:
1248
+ continue
1249
+ content = getattr(entry, 'content', '') or ''
1250
+ if len(content) > 5000:
1251
+ content = content[:5000] + "...[TRUNCATED]"
1252
+ task_desc = getattr(execution_record, 'task_description', '') or ''
1253
+ worker_id = getattr(execution_record, 'worker_id', '') or ''
1254
+
1255
+ mce_memory_type = None
1256
+ mce_confidence = confidence
1257
+ if self._mce_enabled and self._mce_adapter and content:
1258
+ try:
1259
+ mce_result = self._mce_adapter.classify(content, timeout_ms=500)
1260
+ if mce_result:
1261
+ mce_confidence = max(confidence, mce_result.confidence)
1262
+ if mce_result.memory_type:
1263
+ type_hint_map = {
1264
+ "preference": "FEEDBACK",
1265
+ "decision": "EPISODIC",
1266
+ "correction": "EPISODIC",
1267
+ "fact": "KNOWLEDGE",
1268
+ }
1269
+ mce_memory_type = type_hint_map.get(mce_result.memory_type.lower())
1270
+ except Exception:
1271
+ pass
1272
+
1273
+ tags = self._extract_tags(task_desc + " " + content)
1274
+
1275
+ if mce_memory_type == "KNOWLEDGE":
1276
+ knowledge = KnowledgeMemory(
1277
+ id=f"know_{uuid.uuid4().hex[:12]}_{int(time.time())}",
1278
+ domain=task_desc[:100] if task_desc else "general",
1279
+ fact=content,
1280
+ source=worker_id or "multi-agent",
1281
+ confidence=mce_confidence,
1282
+ tags=tags,
1283
+ created_at=datetime.now().isoformat(),
1284
+ )
1285
+ self.writer.write_knowledge(knowledge)
1286
+ captured_id = knowledge.id
1287
+ elif mce_memory_type == "FEEDBACK":
1288
+ feedback = FeedbackMemory(
1289
+ id=f"feed_{uuid.uuid4().hex[:12]}_{int(time.time())}",
1290
+ category="preference",
1291
+ content=content,
1292
+ source=worker_id or "user",
1293
+ severity="info",
1294
+ tags=tags,
1295
+ created_at=datetime.now().isoformat(),
1296
+ )
1297
+ self.writer.write_feedback(feedback)
1298
+ captured_id = feedback.id
1299
+ else:
1300
+ episodic = EpisodicMemory(
1301
+ id=f"epi_{uuid.uuid4().hex[:12]}_{int(time.time())}",
1302
+ task_description=task_desc[:200],
1303
+ finding=content,
1304
+ worker_id=worker_id,
1305
+ confidence=mce_confidence,
1306
+ tags=tags,
1307
+ created_at=datetime.now().isoformat(),
1308
+ )
1309
+ captured_id = self.writer.write_episodic(episodic)
1310
+ self._stats.total_captures += 1
1311
+ return captured_id
1312
+
1313
+ def record_feedback(self, feedback: UserFeedback) -> str:
1314
+ """
1315
+ [MCE 集成点 Phase A] 用户反馈记录
1316
+
1317
+ 当前行为: 直接写入 FEEDBACK 类型
1318
+ MCE 就绪后: 对 feedback.content 做 sentiment + intent 分类
1319
+ → 自动标记正面/负面/中性情绪
1320
+ → 关联到相关 decision/correction 记忆
1321
+
1322
+ 接口预留: mce_engine 参数
1323
+ """
1324
+ if feedback.id == "":
1325
+ feedback.id = f"fb_{uuid.uuid4().hex[:12]}_{int(time.time())}"
1326
+ if not feedback.created_at:
1327
+ feedback.created_at = datetime.now().isoformat()
1328
+ return self.writer.write_feedback(feedback)
1329
+
1330
+ def persist_pattern(self, pattern) -> Optional[str]:
1331
+ """
1332
+ [MCE 集成点 Phase D] Skillifier 生成的 Skill 模式持久化
1333
+
1334
+ 当前行为: 直接写入 PATTERN 类型
1335
+ MCE 就绪后: 对 pattern.name + steps_template 做 decision 分类
1336
+ → 标记哪些步骤是关键决策点
1337
+ → 关联到历史 correction/decision 记忆
1338
+ → Skillifier 学习素材增强: 用 MCE 标记提取"什么导致了成功"
1339
+
1340
+ 接口预留: mce_engine 参数
1341
+ """
1342
+ if not hasattr(pattern, 'name') or not hasattr(pattern, 'steps_template'):
1343
+ return None
1344
+ quality = getattr(pattern, 'confidence', 0) or 0
1345
+ if isinstance(quality, (int, float)) and quality < 0.7:
1346
+ return None
1347
+ qs = getattr(pattern, 'quality_score', quality * 100) or (quality * 100 if quality else 0)
1348
+ if qs < 70:
1349
+ return None
1350
+ slug = getattr(pattern, 'pattern_id', pattern.name.lower().replace(' ', '-')) or ""
1351
+ persisted = PersistedPattern(
1352
+ id=f"pat_{uuid.uuid4().hex[:12]}_{int(time.time())}",
1353
+ name=pattern.name,
1354
+ slug=slug,
1355
+ category=getattr(pattern, 'category', 'auto-generated'),
1356
+ trigger_keywords=getattr(pattern, 'trigger_keywords', []) or [],
1357
+ steps_template=[s.to_dict() if hasattr(s, 'to_dict') else s for s in getattr(pattern, 'steps_template', []) or []],
1358
+ confidence=float(getattr(pattern, 'confidence', quality)) if getattr(pattern, 'confidence', None) is not None else quality,
1359
+ quality_score=qs,
1360
+ created_at=datetime.now().isoformat(),
1361
+ )
1362
+ return self.writer.write_pattern(persisted)
1363
+
1364
+ def learn_from_mistake(self, error_context: ErrorContext) -> str:
1365
+ analysis = AnalysisCase(
1366
+ id=f"anal_{uuid.uuid4().hex[:12]}_{int(time.time())}",
1367
+ problem=error_context.error_message[:200],
1368
+ context={
1369
+ "task": error_context.task_description[:200],
1370
+ "worker": error_context.worker_id,
1371
+ "timestamp": error_context.timestamp,
1372
+ },
1373
+ root_cause=f"Error during execution: {error_context.error_message[:100]}",
1374
+ solutions=[
1375
+ f"Review the error context: {error_context.error_message[:100]}",
1376
+ "Check input parameters and dependencies",
1377
+ "Add validation to prevent recurrence",
1378
+ "Document the solution for future reference",
1379
+ ],
1380
+ status="completed",
1381
+ created_at=datetime.now().isoformat(),
1382
+ )
1383
+ return self.writer.write_analysis(analysis)
1384
+
1385
+ def search_knowledge(self, keywords: List[str],
1386
+ domain: Optional[str] = None) -> List[KnowledgeItem]:
1387
+ if not keywords:
1388
+ return []
1389
+ results = self.indexer.keyword_search(keywords, domain=domain)
1390
+ items = []
1391
+ for mid, _score in results:
1392
+ data = self.store.load(MemoryType.KNOWLEDGE, mid)
1393
+ if data:
1394
+ items.append(KnowledgeItem(
1395
+ id=data.get("id", ""), domain=data.get("domain", "general"),
1396
+ title=data.get("title", ""), content=data.get("content", ""),
1397
+ tags=data.get("tags", []), created_at=data.get("created_at", ""),
1398
+ source=data.get("source", ""),
1399
+ ))
1400
+ return items
1401
+
1402
+ def get_statistics(self) -> MemoryStats:
1403
+ stats = MemoryStats(
1404
+ total_captures=self._stats.total_captures,
1405
+ total_recalls=self._stats.total_recalls,
1406
+ index_built=self.indexer.is_built,
1407
+ last_index_time=datetime.now().isoformat() if self.indexer.is_built else None,
1408
+ )
1409
+ type_counts: Dict[str, int] = {}
1410
+ all_items = []
1411
+ for mtype in MemoryType:
1412
+ try:
1413
+ raw = self.store.list_all(mtype)
1414
+ type_counts[mtype.value] = len(raw)
1415
+ all_items.extend(raw)
1416
+ except Exception:
1417
+ type_counts[mtype.value] = 0
1418
+ stats.by_type_counts = type_counts
1419
+ stats.total_memories = sum(type_counts.values())
1420
+ if all_items:
1421
+ dates = [r.get("created_at", "") for r in all_items if r.get("created_at")]
1422
+ if dates:
1423
+ stats.newest_memory = max(dates)
1424
+ stats.oldest_memory = min(dates)
1425
+ stats.claw_enabled = self._claw_enabled
1426
+ if self._claw_source and self._claw_enabled:
1427
+ try:
1428
+ core_count = sum(1 for f in self._claw_source.CORE_FILE_MAPPING
1429
+ if (self._claw_source._memory_dir / f).exists())
1430
+ daily_count = min(30, sum(1 for _ in self._claw_source._wb_memory_dir.glob("2026-*.md"))) if self._claw_source._wb_memory_dir.exists() else 0
1431
+ stats.claw_item_count = core_count + daily_count
1432
+ except Exception:
1433
+ stats.claw_item_count = 0
1434
+ else:
1435
+ stats.claw_item_count = 0
1436
+ return stats
1437
+
1438
+ def get_recent_history(self, n: int = 10) -> List[EpisodicMemory]:
1439
+ return self.reader.read_episodic(limit=n)
1440
+
1441
+ def get_workbuddy_ai_news(self, days: int = 7) -> List[MemoryItem]:
1442
+ """
1443
+ Plan B: Retrieve WorkBuddy daily AI news feed.
1444
+
1445
+ Used by Coordinator to auto-inject latest AI industry information
1446
+ as context when analyzing technology trends or industry dynamics tasks.
1447
+
1448
+ Args:
1449
+ days: Number of days to look back (default 7).
1450
+
1451
+ Returns:
1452
+ List[MemoryItem]: News entries in reverse chronological order,
1453
+ metadata contains sources/topics/status fields.
1454
+ """
1455
+ if not self._claw_enabled or not self._claw_source:
1456
+ return []
1457
+ try:
1458
+ return self._claw_source.get_latest_ai_news(days)
1459
+ except Exception:
1460
+ return []
1461
+
1462
+ def rebuild_index(self) -> None:
1463
+ all_items: List[MemoryItem] = []
1464
+ for mtype in MemoryType:
1465
+ try:
1466
+ raw_list = self.store.list_all(mtype)
1467
+ for r in raw_list:
1468
+ try:
1469
+ item = MemoryItem.from_dict(r)
1470
+ all_items.append(item)
1471
+ except Exception:
1472
+ continue
1473
+ except Exception:
1474
+ continue
1475
+ self.indexer.build_index(all_items)
1476
+
1477
+ def print_diagnostics(self) -> str:
1478
+ s = self.get_statistics()
1479
+ lines = [
1480
+ "=== MemoryBridge Diagnostics ===",
1481
+ f"Total Memories: {s.total_memories}",
1482
+ f"By Type: {s.by_type_counts}",
1483
+ f"Index Built: {'Yes' if s.index_built else 'No'}",
1484
+ f"Captures: {s.total_captures} | Recalls: {s.total_recalls}",
1485
+ f"Index Size: {self.indexer.size} documents",
1486
+ "--- Memory Types ---",
1487
+ ]
1488
+ for t, count in sorted(s.by_type_counts.items()):
1489
+ lines.append(f" {t}: {count}")
1490
+ lines.append("--- WorkBuddy (Claw) Bridge ---")
1491
+ lines.append(f" Available: {'Yes' if s.claw_enabled else 'No'}")
1492
+ if self._claw_source:
1493
+ all_claw = self._claw_source.load_all_memories()
1494
+ lines.append(f" Items: {len(all_claw)} ({sum(1 for a in all_claw if a.memory_type == MemoryType.EPISODIC)} episodic)")
1495
+ return "\n".join(lines)
1496
+
1497
+ def forgetting_weight(self, memory: MemoryItem) -> float:
1498
+ age_days = memory.age_days
1499
+ access_factor = math.log(memory.access_count + 1)
1500
+ if age_days < 7:
1501
+ return 1.0
1502
+ elif age_days < 30:
1503
+ return 0.8 * (access_factor / (access_factor + 1))
1504
+ elif age_days < 60:
1505
+ return 0.5 * (access_factor / (access_factor + 2))
1506
+ else:
1507
+ return 0.3 * (access_factor / (access_factor + 3))
1508
+
1509
+ def compress_old_memories(self) -> int:
1510
+ if not self.config.compress_old_memories:
1511
+ return 0
1512
+ compressed = 0
1513
+ cutoff = datetime.now() - timedelta(days=60)
1514
+ try:
1515
+ raw_list = self.store.list_all(MemoryType.EPISODIC)
1516
+ for r in raw_list:
1517
+ created_str = r.get("created_at", "")
1518
+ if not created_str:
1519
+ continue
1520
+ try:
1521
+ created = datetime.fromisoformat(created_str)
1522
+ except (ValueError, TypeError):
1523
+ continue
1524
+ if created < cutoff and not r.get("metadata", {}).get("compressed"):
1525
+ content = r.get("finding", "") or r.get("content", "")
1526
+ summary = content[:200] + "...[COMPRESSED]"
1527
+ r["content"] = summary
1528
+ r["finding"] = summary
1529
+ r.setdefault("metadata", {})["compressed"] = True
1530
+ r["metadata"]["original_length"] = len(content)
1531
+ r["metadata"]["compressed_at"] = datetime.now().isoformat()
1532
+ mid = r.get("id", "")
1533
+ if mid:
1534
+ self.store.save(MemoryType.EPISODIC, r)
1535
+ compressed += 1
1536
+ except Exception:
1537
+ pass
1538
+ return compressed
1539
+
1540
+ def cleanup_expired_memories(self) -> int:
1541
+ removed = 0
1542
+ cutoff = datetime.now() - timedelta(days=self.config.retention_days)
1543
+ for mtype in [MemoryType.EPISODIC, MemoryType.FEEDBACK]:
1544
+ try:
1545
+ raw_list = self.store.list_all(mtype)
1546
+ for r in raw_list:
1547
+ created_str = r.get("created_at", "")
1548
+ if not created_str:
1549
+ continue
1550
+ try:
1551
+ created = datetime.fromisoformat(created_str)
1552
+ except (ValueError, TypeError):
1553
+ continue
1554
+ if created < cutoff:
1555
+ mid = r.get("id", "")
1556
+ if mid and self.store.delete(mtype, mid):
1557
+ removed += 1
1558
+ if self.indexer:
1559
+ self.indexer.remove_from_index(mid)
1560
+ except Exception:
1561
+ continue
1562
+ return removed
1563
+
1564
+ def _guess_type(self, memory_id: str) -> MemoryType:
1565
+ prefix_map = {
1566
+ "know_": MemoryType.KNOWLEDGE,
1567
+ "fb_": MemoryType.FEEDBACK,
1568
+ "epi_": MemoryType.EPISODIC,
1569
+ "pat_": MemoryType.PATTERN,
1570
+ "anal_": MemoryType.ANALYSIS,
1571
+ }
1572
+ for prefix, mtype in prefix_map.items():
1573
+ if memory_id.startswith(prefix):
1574
+ return mtype
1575
+ for mtype in MemoryType:
1576
+ data = self.store.load(mtype, memory_id)
1577
+ if data is not None:
1578
+ return mtype
1579
+ return MemoryType.KNOWLEDGE
1580
+
1581
+ def _load_any_type(self, memory_id: str) -> Optional[Dict]:
1582
+ guessed = self._guess_type(memory_id)
1583
+ data = self.store.load(guessed, memory_id)
1584
+ if data is not None:
1585
+ if "memory_type" not in data:
1586
+ data["memory_type"] = guessed.value
1587
+ return data
1588
+ for mtype in MemoryType:
1589
+ if mtype != guessed:
1590
+ data = self.store.load(mtype, memory_id)
1591
+ if data is not None:
1592
+ if "memory_type" not in data:
1593
+ data["memory_type"] = mtype.value
1594
+ return data
1595
+ return None
1596
+
1597
+ @staticmethod
1598
+ def _extract_tags(text: str) -> List[str]:
1599
+ words = re.findall(r'[\u4e00-\u9fff]{2,}|[a-zA-Z]{3,}', text)
1600
+ return list(set(words))[:10]
1601
+
1602
+ def shutdown(self) -> None:
1603
+ if self._mce_adapter:
1604
+ try:
1605
+ self._mce_adapter.shutdown()
1606
+ except Exception:
1607
+ pass