openclaw-xache 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,532 @@
1
+ """
2
+ Memory Extraction for OpenClaw
3
+ Extract structured learnings from conversation traces using LLM
4
+
5
+ This module analyzes OpenClaw conversation logs (markdown files) and extracts:
6
+ - Domain heuristics (patterns and best practices)
7
+ - User preferences
8
+ - Error fixes and solutions
9
+ - Successful patterns
10
+ - Optimization insights
11
+
12
+ Extracted learnings can be:
13
+ 1. Stored to Xache with verifiable receipts
14
+ 2. Contributed to collective intelligence
15
+ 3. Used to improve future agent performance
16
+ """
17
+
18
+ import json
19
+ import re
20
+ from typing import List, Optional, Dict, Any, Callable
21
+ from dataclasses import dataclass, field
22
+ from enum import Enum
23
+
24
+
25
+ class MemoryType(str, Enum):
26
+ """Standard memory types for extraction"""
27
+ USER_PREFERENCE = "xache.user.preference"
28
+ ERROR_FIX = "xache.error.fix"
29
+ SUCCESSFUL_PATTERN = "xache.pattern.success"
30
+ FAILED_APPROACH = "xache.pattern.failure"
31
+ TOOL_CONFIG = "xache.tool.config"
32
+ CONVERSATION_SUMMARY = "xache.conversation.summary"
33
+ DOMAIN_HEURISTIC = "xache.domain.heuristic"
34
+ OPTIMIZATION_INSIGHT = "xache.optimization.insight"
35
+
36
+
37
+ @dataclass
38
+ class ExtractedMemory:
39
+ """A single extracted memory/learning"""
40
+ type: MemoryType
41
+ confidence: float
42
+ data: Dict[str, Any]
43
+ reasoning: str
44
+ evidence: Optional[str] = None
45
+ suggested_method: str = "store"
46
+
47
+ def to_dict(self) -> Dict[str, Any]:
48
+ return {
49
+ "type": self.type.value if isinstance(self.type, MemoryType) else self.type,
50
+ "confidence": self.confidence,
51
+ "data": self.data,
52
+ "reasoning": self.reasoning,
53
+ "evidence": self.evidence,
54
+ "suggested_method": self.suggested_method,
55
+ }
56
+
57
+
58
+ # Mapping from memory type to suggested storage method
59
+ TYPE_TO_METHOD = {
60
+ MemoryType.USER_PREFERENCE: "rememberPreference",
61
+ MemoryType.ERROR_FIX: "rememberFix",
62
+ MemoryType.SUCCESSFUL_PATTERN: "rememberPattern",
63
+ MemoryType.FAILED_APPROACH: "rememberPattern",
64
+ MemoryType.TOOL_CONFIG: "rememberToolConfig",
65
+ MemoryType.CONVERSATION_SUMMARY: "rememberConversation",
66
+ MemoryType.DOMAIN_HEURISTIC: "rememberHeuristic",
67
+ MemoryType.OPTIMIZATION_INSIGHT: "rememberOptimization",
68
+ }
69
+
70
+
71
+ EXTRACTION_PROMPT = '''You are a memory extraction specialist analyzing agent execution traces to identify learnings worth remembering.
72
+
73
+ AGENT EXECUTION TRACE:
74
+ {trace}
75
+
76
+ {agent_context}
77
+
78
+ YOUR TASK:
79
+ Analyze the trace and extract learnings that would help this agent (or similar agents) perform better in future executions.
80
+
81
+ STANDARD MEMORY TYPES:
82
+ 1. USER_PREFERENCE (xache.user.preference)
83
+ - User settings, preferences, communication styles
84
+ - Example: "User prefers concise responses", "User timezone is PST"
85
+
86
+ 2. ERROR_FIX (xache.error.fix)
87
+ - Error-to-solution mappings
88
+ - Example: "TypeError: undefined → added null check"
89
+
90
+ 3. SUCCESSFUL_PATTERN (xache.pattern.success)
91
+ - Approaches and patterns that worked well
92
+ - Example: "Exponential backoff improved API reliability"
93
+
94
+ 4. FAILED_APPROACH (xache.pattern.failure)
95
+ - Approaches that didn't work (to avoid repeating)
96
+ - Example: "WHERE clause optimization didn't improve query time"
97
+
98
+ 5. TOOL_CONFIG (xache.tool.config)
99
+ - Tool settings and configurations
100
+ - Example: "WeatherAPI uses metric units, 5s timeout"
101
+
102
+ 6. CONVERSATION_SUMMARY (xache.conversation.summary)
103
+ - Multi-turn conversation summaries
104
+ - Example: "5-turn conversation about restaurant recommendations"
105
+
106
+ 7. DOMAIN_HEURISTIC (xache.domain.heuristic)
107
+ - Domain-specific insights and heuristics
108
+ - Example: "In code reviews, functions >50 lines should be refactored"
109
+
110
+ 8. OPTIMIZATION_INSIGHT (xache.optimization.insight)
111
+ - Performance optimizations and improvements
112
+ - Example: "Adding index on user_id reduced query time 94%"
113
+
114
+ EXTRACTION GUIDELINES:
115
+ - Focus on actionable, reusable learnings
116
+ - Be specific and concrete (avoid vague generalizations)
117
+ - Include metrics when available
118
+ - Assign confidence scores based on evidence strength
119
+ - Extract multiple learnings if multiple patterns exist
120
+ - Only extract high-value learnings (not trivial facts)
121
+
122
+ OUTPUT FORMAT:
123
+ Return a JSON array of extractions. Each extraction must have:
124
+ {
125
+ "type": "xache.context.type",
126
+ "confidence": 0.85,
127
+ "data": { /* structured data matching the memory type */ },
128
+ "reasoning": "Why this is worth remembering",
129
+ "evidence": "Direct quote from trace supporting this"
130
+ }
131
+
132
+ EXAMPLE OUTPUT:
133
+ [
134
+ {
135
+ "type": "xache.domain.heuristic",
136
+ "confidence": 0.88,
137
+ "data": {
138
+ "domain": "api-integration",
139
+ "pattern": "Rate limiting with exponential backoff prevents 429 errors",
140
+ "evidence": "Reduced errors by 95% in testing"
141
+ },
142
+ "reasoning": "Proven pattern with measurable improvement",
143
+ "evidence": "After implementing backoff, 429 errors dropped from 50/hour to 2/hour"
144
+ }
145
+ ]
146
+
147
+ NOW ANALYZE THE TRACE ABOVE:
148
+ Return ONLY valid JSON array. If no learnings found, return empty array: []
149
+ '''
150
+
151
+
152
+ def build_extraction_prompt(trace: str, agent_context: Optional[str] = None) -> str:
153
+ """Build the full extraction prompt"""
154
+ context_section = ""
155
+ if agent_context:
156
+ context_section = f'''AGENT CONTEXT:
157
+ This agent operates in the "{agent_context}" domain. Consider domain-specific patterns and best practices when extracting learnings.
158
+ '''
159
+
160
+ return EXTRACTION_PROMPT.format(trace=trace, agent_context=context_section)
161
+
162
+
163
+ def parse_extraction_response(response: str) -> List[Dict[str, Any]]:
164
+ """Parse LLM response to extract JSON array"""
165
+ # Try to find JSON array in response
166
+ # Handle cases where LLM adds explanation before/after JSON
167
+
168
+ # First try direct JSON parse
169
+ try:
170
+ result = json.loads(response.strip())
171
+ if isinstance(result, list):
172
+ return result
173
+ except json.JSONDecodeError:
174
+ pass
175
+
176
+ # Try to find JSON array in response
177
+ json_match = re.search(r'\[[\s\S]*\]', response)
178
+ if json_match:
179
+ try:
180
+ result = json.loads(json_match.group())
181
+ if isinstance(result, list):
182
+ return result
183
+ except json.JSONDecodeError:
184
+ pass
185
+
186
+ # Try to find JSON objects and wrap in array
187
+ objects = []
188
+ for match in re.finditer(r'\{[^{}]*\}', response):
189
+ try:
190
+ obj = json.loads(match.group())
191
+ if 'type' in obj and 'confidence' in obj:
192
+ objects.append(obj)
193
+ except json.JSONDecodeError:
194
+ continue
195
+
196
+ return objects
197
+
198
+
199
+ def validate_extraction(raw: Dict[str, Any]) -> bool:
200
+ """Validate a raw extraction has required fields"""
201
+ required = ['type', 'confidence', 'data', 'reasoning']
202
+ return all(key in raw for key in required)
203
+
204
+
205
+ @dataclass
206
+ class MemoryExtractor:
207
+ """
208
+ Extract learnings from conversation traces using LLM.
209
+
210
+ Example:
211
+ ```python
212
+ from xache_openclaw.extraction import MemoryExtractor
213
+
214
+ # Create extractor with your LLM function
215
+ extractor = MemoryExtractor(
216
+ llm=lambda prompt: my_llm.complete(prompt)
217
+ )
218
+
219
+ # Extract from conversation
220
+ learnings = extractor.extract(
221
+ trace=conversation_text,
222
+ agent_context="research"
223
+ )
224
+
225
+ # Filter high-confidence learnings
226
+ for learning in learnings:
227
+ if learning.confidence > 0.8:
228
+ print(f"Found: {learning.data}")
229
+ ```
230
+ """
231
+ llm: Callable[[str], str]
232
+ debug: bool = False
233
+ confidence_threshold: float = 0.7
234
+
235
+ def extract(
236
+ self,
237
+ trace: str,
238
+ agent_context: Optional[str] = None,
239
+ confidence_threshold: Optional[float] = None,
240
+ ) -> List[ExtractedMemory]:
241
+ """
242
+ Extract learnings from a conversation trace.
243
+
244
+ Args:
245
+ trace: The conversation text to analyze
246
+ agent_context: Optional domain hint (e.g., 'research', 'coding')
247
+ confidence_threshold: Override default threshold
248
+
249
+ Returns:
250
+ List of ExtractedMemory objects
251
+ """
252
+ # Build prompt
253
+ prompt = build_extraction_prompt(trace, agent_context)
254
+
255
+ if self.debug:
256
+ print(f"[Extractor] Trace length: {len(trace)}")
257
+ print(f"[Extractor] Context: {agent_context}")
258
+
259
+ # Call LLM
260
+ response = self.llm(prompt)
261
+
262
+ if self.debug:
263
+ print(f"[Extractor] Response length: {len(response)}")
264
+
265
+ # Parse response
266
+ raw_extractions = parse_extraction_response(response)
267
+
268
+ if self.debug:
269
+ print(f"[Extractor] Parsed {len(raw_extractions)} extractions")
270
+
271
+ # Validate and transform
272
+ threshold = confidence_threshold or self.confidence_threshold
273
+ results = []
274
+
275
+ for raw in raw_extractions:
276
+ if not validate_extraction(raw):
277
+ continue
278
+
279
+ if raw.get('confidence', 0) < threshold:
280
+ continue
281
+
282
+ # Map type string to enum
283
+ type_str = raw['type']
284
+ try:
285
+ mem_type = MemoryType(type_str)
286
+ except ValueError:
287
+ # Unknown type, skip
288
+ continue
289
+
290
+ suggested_method = TYPE_TO_METHOD.get(mem_type, "store")
291
+
292
+ results.append(ExtractedMemory(
293
+ type=mem_type,
294
+ confidence=raw['confidence'],
295
+ data=raw['data'],
296
+ reasoning=raw['reasoning'],
297
+ evidence=raw.get('evidence'),
298
+ suggested_method=suggested_method,
299
+ ))
300
+
301
+ if self.debug:
302
+ print(f"[Extractor] Valid extractions: {len(results)}")
303
+
304
+ return results
305
+
306
+ def batch_extract(
307
+ self,
308
+ traces: List[Dict[str, Any]],
309
+ ) -> List[List[ExtractedMemory]]:
310
+ """
311
+ Extract from multiple traces.
312
+
313
+ Args:
314
+ traces: List of dicts with 'trace' and optional 'agent_context'
315
+
316
+ Returns:
317
+ List of extraction results (one per trace)
318
+ """
319
+ results = []
320
+ for item in traces:
321
+ trace = item.get('trace', '')
322
+ context = item.get('agent_context')
323
+ threshold = item.get('confidence_threshold')
324
+ results.append(self.extract(trace, context, threshold))
325
+ return results
326
+
327
+
328
+ def extract_from_openclaw_memory(
329
+ memory_file: str,
330
+ llm: Callable[[str], str],
331
+ agent_context: Optional[str] = None,
332
+ confidence_threshold: float = 0.7,
333
+ ) -> List[ExtractedMemory]:
334
+ """
335
+ Extract learnings from an OpenClaw memory file.
336
+
337
+ OpenClaw stores memories in markdown files like:
338
+ - memory/YYYY-MM-DD.md (daily memories)
339
+ - MEMORY.md (long-term memory)
340
+
341
+ Args:
342
+ memory_file: Path to markdown file
343
+ llm: LLM function for extraction
344
+ agent_context: Optional domain hint
345
+ confidence_threshold: Minimum confidence
346
+
347
+ Returns:
348
+ List of extracted memories
349
+
350
+ Example:
351
+ ```python
352
+ from xache_openclaw.extraction import extract_from_openclaw_memory
353
+
354
+ learnings = extract_from_openclaw_memory(
355
+ memory_file="memory/2024-01-15.md",
356
+ llm=lambda p: my_llm.complete(p),
357
+ agent_context="coding-assistant"
358
+ )
359
+ ```
360
+ """
361
+ with open(memory_file, 'r', encoding='utf-8') as f:
362
+ content = f.read()
363
+
364
+ extractor = MemoryExtractor(
365
+ llm=llm,
366
+ confidence_threshold=confidence_threshold,
367
+ )
368
+
369
+ return extractor.extract(content, agent_context)
370
+
371
+
372
+ def extract_and_contribute(
373
+ trace: str,
374
+ llm: Callable[[str], str],
375
+ agent_context: Optional[str] = None,
376
+ confidence_threshold: float = 0.8,
377
+ auto_contribute: bool = True,
378
+ ) -> Dict[str, Any]:
379
+ """
380
+ Extract learnings and optionally contribute to collective intelligence.
381
+
382
+ This is a convenience function that:
383
+ 1. Extracts learnings from a trace
384
+ 2. Filters for heuristic-type learnings
385
+ 3. Auto-contributes high-confidence heuristics to the collective
386
+
387
+ Args:
388
+ trace: Conversation text
389
+ llm: LLM function for extraction
390
+ agent_context: Domain hint
391
+ confidence_threshold: Minimum confidence for contribution
392
+ auto_contribute: Whether to auto-contribute to collective
393
+
394
+ Returns:
395
+ Dict with extractions and contribution results
396
+
397
+ Example:
398
+ ```python
399
+ from xache_openclaw.extraction import extract_and_contribute
400
+ from xache_openclaw import set_config
401
+
402
+ set_config(wallet_address="0x...", private_key="0x...")
403
+
404
+ result = extract_and_contribute(
405
+ trace=conversation_text,
406
+ llm=lambda p: my_llm.complete(p),
407
+ agent_context="research",
408
+ auto_contribute=True
409
+ )
410
+
411
+ print(f"Extracted: {len(result['extractions'])}")
412
+ print(f"Contributed: {len(result['contributions'])}")
413
+ ```
414
+ """
415
+ from .tools import collective_contribute
416
+
417
+ extractor = MemoryExtractor(
418
+ llm=llm,
419
+ confidence_threshold=confidence_threshold,
420
+ )
421
+
422
+ extractions = extractor.extract(trace, agent_context)
423
+
424
+ result = {
425
+ "extractions": [e.to_dict() for e in extractions],
426
+ "contributions": [],
427
+ "errors": [],
428
+ }
429
+
430
+ if not auto_contribute:
431
+ return result
432
+
433
+ # Contribute heuristics and patterns to collective
434
+ contributable_types = [
435
+ MemoryType.DOMAIN_HEURISTIC,
436
+ MemoryType.SUCCESSFUL_PATTERN,
437
+ MemoryType.OPTIMIZATION_INSIGHT,
438
+ ]
439
+
440
+ for extraction in extractions:
441
+ if extraction.type not in contributable_types:
442
+ continue
443
+
444
+ if extraction.confidence < confidence_threshold:
445
+ continue
446
+
447
+ try:
448
+ # Build insight from extraction data
449
+ data = extraction.data
450
+ pattern = data.get('pattern') or data.get('improvement') or str(data)
451
+ domain = data.get('domain') or agent_context or 'general'
452
+ evidence = extraction.evidence or data.get('evidence')
453
+
454
+ contribution = collective_contribute(
455
+ insight=pattern,
456
+ domain=domain,
457
+ evidence=evidence,
458
+ tags=[extraction.type.value, f"confidence:{extraction.confidence:.2f}"],
459
+ )
460
+
461
+ result["contributions"].append({
462
+ "extraction_type": extraction.type.value,
463
+ "heuristic_id": contribution.get("heuristicId"),
464
+ "pattern": pattern,
465
+ "domain": domain,
466
+ })
467
+ except Exception as e:
468
+ result["errors"].append({
469
+ "extraction_type": extraction.type.value,
470
+ "error": str(e),
471
+ })
472
+
473
+ return result
474
+
475
+
476
+ @dataclass
477
+ class XacheExtractionTool:
478
+ """
479
+ OpenClaw tool for extracting and contributing learnings.
480
+
481
+ Example:
482
+ ```python
483
+ tool = XacheExtractionTool(
484
+ llm=lambda p: my_llm.complete(p)
485
+ )
486
+ result = tool.run(
487
+ trace="conversation text...",
488
+ agent_context="research"
489
+ )
490
+ ```
491
+ """
492
+ name: str = "xache_extract_learnings"
493
+ description: str = (
494
+ "Extract valuable learnings from conversations and contribute to collective intelligence. "
495
+ "Use this to analyze completed conversations and share insights with other agents."
496
+ )
497
+ llm: Callable[[str], str] = field(default=None)
498
+ confidence_threshold: float = 0.8
499
+ auto_contribute: bool = True
500
+
501
+ def run(
502
+ self,
503
+ trace: str,
504
+ agent_context: Optional[str] = None,
505
+ ) -> str:
506
+ if self.llm is None:
507
+ return "Error: LLM not configured. Pass llm function when creating tool."
508
+
509
+ result = extract_and_contribute(
510
+ trace=trace,
511
+ llm=self.llm,
512
+ agent_context=agent_context,
513
+ confidence_threshold=self.confidence_threshold,
514
+ auto_contribute=self.auto_contribute,
515
+ )
516
+
517
+ num_extracted = len(result['extractions'])
518
+ num_contributed = len(result['contributions'])
519
+
520
+ output = f"Extracted {num_extracted} learnings"
521
+
522
+ if num_contributed > 0:
523
+ output += f", contributed {num_contributed} to collective:\n"
524
+ for c in result['contributions']:
525
+ output += f"- [{c['domain']}] {c['pattern'][:100]}...\n"
526
+ else:
527
+ output += " (none met contribution threshold)"
528
+
529
+ if result['errors']:
530
+ output += f"\nErrors: {len(result['errors'])}"
531
+
532
+ return output