openclaw-langcache 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,453 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ agent-integration.py - LangCache integration pattern for OpenClaw agents
4
+
5
+ This example demonstrates how to integrate Redis LangCache semantic caching
6
+ into an OpenClaw agent workflow with the default caching policy enforced.
7
+
8
+ Requirements:
9
+ pip install langcache httpx
10
+
11
+ Environment variables:
12
+ LANGCACHE_HOST - LangCache API host
13
+ LANGCACHE_CACHE_ID - Cache ID
14
+ LANGCACHE_API_KEY - API key
15
+ OPENAI_API_KEY - OpenAI API key (or your LLM provider)
16
+ """
17
+
18
+ import os
19
+ import re
20
+ import asyncio
21
+ import logging
22
+ from dataclasses import dataclass, field
23
+ from typing import Optional
24
+ from enum import Enum
25
+
26
+ # LangCache SDK
27
+ from langcache import LangCache
28
+ from langcache.models import SearchStrategy
29
+
30
+ # Your LLM client (example uses OpenAI)
31
+ import httpx
32
+
33
+ logging.basicConfig(level=logging.INFO)
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class BlockReason(Enum):
38
+ """Reasons why content was blocked from caching."""
39
+ TEMPORAL = "temporal_info"
40
+ CREDENTIALS = "credentials"
41
+ IDENTIFIERS = "identifiers"
42
+ PERSONAL = "personal_context"
43
+
44
+
45
+ # =============================================================================
46
+ # HARD BLOCK PATTERNS - These NEVER get cached
47
+ # =============================================================================
48
+
49
+ BLOCK_PATTERNS = {
50
+ BlockReason.TEMPORAL: [
51
+ r"\b(today|tomorrow|tonight|yesterday)\b",
52
+ r"\b(this|next|last)\s+(week|month|year|monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b",
53
+ r"\bin\s+\d+\s+(minutes?|hours?|days?|weeks?)\b",
54
+ r"\b(deadline|eta|appointment|scheduled?|meeting\s+at)\b",
55
+ r"\b(right\s+now|at\s+\d{1,2}(:\d{2})?\s*(am|pm)?)\b",
56
+ r"\b(this\s+morning|this\s+afternoon|this\s+evening)\b",
57
+ ],
58
+ BlockReason.CREDENTIALS: [
59
+ r"\b(api[_-]?key|api[_-]?secret|access[_-]?token)\b",
60
+ r"\b(password|passwd|pwd)\s*[:=]",
61
+ r"\b(secret[_-]?key|private[_-]?key)\b",
62
+ r"\b(otp|2fa|totp|authenticator)\s*(code|token)?\b",
63
+ r"\bbearer\s+[a-zA-Z0-9_-]+",
64
+ r"\b(sk|pk)[_-][a-zA-Z0-9]{20,}\b", # API key patterns like sk-xxx
65
+ ],
66
+ BlockReason.IDENTIFIERS: [
67
+ r"\b\d{10,15}\b", # Phone numbers, long numeric IDs
68
+ r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", # Emails
69
+ r"\b(order|account|message|chat|user|customer)[_-]?id\s*[:=]?\s*\w+",
70
+ r"\b[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\b", # UUIDs
71
+ r"\b\d{1,5}\s+\w+\s+(street|st|avenue|ave|road|rd|boulevard|blvd)\b", # Addresses
72
+ r"\b\d{5}(-\d{4})?\b", # ZIP codes
73
+ r"@[a-zA-Z0-9_]{1,15}\b", # Social handles / JIDs
74
+ ],
75
+ BlockReason.PERSONAL: [
76
+ r"\bmy\s+(wife|husband|partner|girlfriend|boyfriend|spouse)\b",
77
+ r"\bmy\s+(mom|dad|mother|father|brother|sister|son|daughter|child|kid)\b",
78
+ r"\bmy\s+(friend|colleague|coworker|boss|manager)\s+\w+", # "my friend John"
79
+ r"\b(said\s+to\s+me|told\s+me|asked\s+me|between\s+us)\b",
80
+ r"\b(private|confidential|secret)\s+(conversation|chat|message)\b",
81
+ r"\bin\s+(our|my)\s+(chat|conversation|thread|group)\b",
82
+ r"\b(he|she|they)\s+(said|told|asked|mentioned)\b", # Referencing specific people
83
+ ],
84
+ }
85
+
86
+
87
+ @dataclass
88
+ class CacheConfig:
89
+ """Configuration for semantic caching behavior."""
90
+ enabled: bool = True
91
+ model_id: str = "gpt-5"
92
+ cache_ttl_seconds: int = 86400 # 24 hours
93
+
94
+ # Thresholds by category
95
+ thresholds: dict = field(default_factory=lambda: {
96
+ "factual": 0.90,
97
+ "template": 0.88,
98
+ "style": 0.85,
99
+ "command": 0.92,
100
+ "default": 0.90,
101
+ })
102
+
103
+
104
+ @dataclass
105
+ class CacheResult:
106
+ """Result from cache lookup."""
107
+ hit: bool
108
+ response: Optional[str] = None
109
+ similarity: Optional[float] = None
110
+ entry_id: Optional[str] = None
111
+
112
+
113
+ @dataclass
114
+ class BlockCheckResult:
115
+ """Result from block pattern check."""
116
+ blocked: bool
117
+ reason: Optional[BlockReason] = None
118
+ matched_pattern: Optional[str] = None
119
+
120
+
121
+ class CachedAgent:
122
+ """
123
+ An agent wrapper that adds semantic caching to LLM calls.
124
+ Enforces the default caching policy with hard blocks.
125
+
126
+ Usage:
127
+ agent = CachedAgent(config=CacheConfig())
128
+ response = await agent.complete("What is Redis?")
129
+ """
130
+
131
+ def __init__(self, config: Optional[CacheConfig] = None):
132
+ self.config = config or CacheConfig()
133
+
134
+ # Initialize LangCache client
135
+ self.cache = LangCache(
136
+ server_url=f"https://{os.environ['LANGCACHE_HOST']}",
137
+ cache_id=os.environ["LANGCACHE_CACHE_ID"],
138
+ api_key=os.environ["LANGCACHE_API_KEY"],
139
+ )
140
+
141
+ # Initialize LLM client (example: OpenAI-compatible API)
142
+ self.llm_client = httpx.AsyncClient(
143
+ base_url="https://api.openai.com/v1",
144
+ headers={"Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}"},
145
+ timeout=60.0,
146
+ )
147
+
148
+ # Metrics
149
+ self.cache_hits = 0
150
+ self.cache_misses = 0
151
+ self.blocked_requests = {reason: 0 for reason in BlockReason}
152
+
153
+ def _check_hard_blocks(self, text: str) -> BlockCheckResult:
154
+ """
155
+ Check if text contains any hard-blocked patterns.
156
+ Returns BlockCheckResult with reason if blocked.
157
+ """
158
+ text_lower = text.lower()
159
+
160
+ for reason, patterns in BLOCK_PATTERNS.items():
161
+ for pattern in patterns:
162
+ if re.search(pattern, text_lower, re.IGNORECASE):
163
+ return BlockCheckResult(
164
+ blocked=True,
165
+ reason=reason,
166
+ matched_pattern=pattern,
167
+ )
168
+
169
+ return BlockCheckResult(blocked=False)
170
+
171
+ def _normalize_prompt(self, prompt: str) -> str:
172
+ """Normalize prompt for better cache hit rates."""
173
+ normalized = prompt.strip().lower()
174
+ normalized = re.sub(r'\s+', ' ', normalized)
175
+
176
+ # Remove common filler phrases
177
+ fillers = [
178
+ r'^(please |can you |could you |would you |hey |hi |hello )',
179
+ r'^(i want to |i need to |i\'d like to )',
180
+ r'( please| thanks| thank you)$',
181
+ ]
182
+ for pattern in fillers:
183
+ normalized = re.sub(pattern, '', normalized)
184
+
185
+ return normalized.strip()
186
+
187
+ def _detect_category(self, prompt: str) -> str:
188
+ """Detect the category of a prompt for threshold selection."""
189
+ prompt_lower = prompt.lower()
190
+
191
+ # Template patterns
192
+ if re.search(r"(polite|professional|formal|warmer|shorter|firmer|rewrite|rephrase)", prompt_lower):
193
+ return "style"
194
+
195
+ if re.search(r"(template|draft|write a|compose a|reply to)", prompt_lower):
196
+ return "template"
197
+
198
+ # Command patterns
199
+ if re.search(r"(what does|how do i|explain|command|flag|option|syntax)", prompt_lower):
200
+ return "command"
201
+
202
+ # Default to factual
203
+ return "factual"
204
+
205
+ def _is_cacheable(self, prompt: str, response: str = "") -> tuple[bool, Optional[str]]:
206
+ """
207
+ Check if prompt/response pair should be cached.
208
+ Returns (is_cacheable, block_reason).
209
+ """
210
+ if not self.config.enabled:
211
+ return False, "caching_disabled"
212
+
213
+ # Check prompt for hard blocks
214
+ prompt_check = self._check_hard_blocks(prompt)
215
+ if prompt_check.blocked:
216
+ self.blocked_requests[prompt_check.reason] += 1
217
+ logger.info(
218
+ f"BLOCKED ({prompt_check.reason.value}): {prompt[:50]}... "
219
+ f"[pattern: {prompt_check.matched_pattern}]"
220
+ )
221
+ return False, prompt_check.reason.value
222
+
223
+ # Check response for hard blocks (don't cache responses with sensitive data)
224
+ if response:
225
+ response_check = self._check_hard_blocks(response)
226
+ if response_check.blocked:
227
+ self.blocked_requests[response_check.reason] += 1
228
+ logger.info(
229
+ f"BLOCKED response ({response_check.reason.value}): "
230
+ f"[pattern: {response_check.matched_pattern}]"
231
+ )
232
+ return False, response_check.reason.value
233
+
234
+ return True, None
235
+
236
+ async def _search_cache(self, prompt: str, category: str) -> CacheResult:
237
+ """Search for cached response with category-specific threshold."""
238
+ try:
239
+ threshold = self.config.thresholds.get(
240
+ category,
241
+ self.config.thresholds["default"]
242
+ )
243
+
244
+ result = await asyncio.to_thread(
245
+ self.cache.search,
246
+ prompt=prompt,
247
+ similarity_threshold=threshold,
248
+ search_strategies=[SearchStrategy.EXACT, SearchStrategy.SEMANTIC],
249
+ attributes={"model": self.config.model_id},
250
+ )
251
+
252
+ if result.hit:
253
+ # Verify cached response doesn't contain blocked content
254
+ response_check = self._check_hard_blocks(result.response)
255
+ if response_check.blocked:
256
+ logger.warning(
257
+ f"Cached response contains blocked content, skipping: "
258
+ f"{response_check.reason.value}"
259
+ )
260
+ return CacheResult(hit=False)
261
+
262
+ return CacheResult(
263
+ hit=True,
264
+ response=result.response,
265
+ similarity=result.similarity,
266
+ entry_id=result.entry_id,
267
+ )
268
+ except Exception as e:
269
+ logger.warning(f"Cache search failed: {e}")
270
+
271
+ return CacheResult(hit=False)
272
+
273
+ async def _store_in_cache(
274
+ self,
275
+ prompt: str,
276
+ response: str,
277
+ category: str
278
+ ) -> None:
279
+ """Store response in cache (fire-and-forget) if allowed."""
280
+ # Final safety check before storing
281
+ cacheable, reason = self._is_cacheable(prompt, response)
282
+ if not cacheable:
283
+ logger.debug(f"Not storing in cache: {reason}")
284
+ return
285
+
286
+ try:
287
+ await asyncio.to_thread(
288
+ self.cache.set,
289
+ prompt=prompt,
290
+ response=response,
291
+ attributes={
292
+ "model": self.config.model_id,
293
+ "category": category,
294
+ },
295
+ )
296
+ logger.debug(f"Cached [{category}]: {prompt[:50]}...")
297
+ except Exception as e:
298
+ logger.warning(f"Cache store failed: {e}")
299
+
300
+ async def _call_llm(self, prompt: str, system_prompt: Optional[str] = None) -> str:
301
+ """Call the LLM API."""
302
+ messages = []
303
+ if system_prompt:
304
+ messages.append({"role": "system", "content": system_prompt})
305
+ messages.append({"role": "user", "content": prompt})
306
+
307
+ response = await self.llm_client.post(
308
+ "/chat/completions",
309
+ json={
310
+ "model": self.config.model_id,
311
+ "messages": messages,
312
+ "max_tokens": 1024,
313
+ },
314
+ )
315
+ response.raise_for_status()
316
+ data = response.json()
317
+ return data["choices"][0]["message"]["content"]
318
+
319
+ async def complete(
320
+ self,
321
+ prompt: str,
322
+ system_prompt: Optional[str] = None,
323
+ force_refresh: bool = False,
324
+ ) -> str:
325
+ """
326
+ Complete a prompt with semantic caching.
327
+ Enforces caching policy with hard blocks.
328
+
329
+ Args:
330
+ prompt: The user prompt
331
+ system_prompt: Optional system prompt (not included in cache key)
332
+ force_refresh: Skip cache and call LLM directly
333
+
334
+ Returns:
335
+ The LLM response (from cache or fresh)
336
+ """
337
+ normalized_prompt = self._normalize_prompt(prompt)
338
+ category = self._detect_category(prompt)
339
+
340
+ # Check if cacheable (hard blocks)
341
+ cacheable, block_reason = self._is_cacheable(prompt)
342
+
343
+ if not force_refresh and cacheable:
344
+ cache_result = await self._search_cache(normalized_prompt, category)
345
+
346
+ if cache_result.hit:
347
+ self.cache_hits += 1
348
+ logger.info(
349
+ f"Cache HIT [{category}] (similarity={cache_result.similarity:.3f}): "
350
+ f"{prompt[:50]}..."
351
+ )
352
+ return cache_result.response
353
+
354
+ # Cache miss, blocked, or force refresh - call LLM
355
+ self.cache_misses += 1
356
+ if block_reason:
357
+ logger.info(f"Cache SKIP (blocked: {block_reason}): {prompt[:50]}...")
358
+ else:
359
+ logger.info(f"Cache MISS [{category}]: {prompt[:50]}...")
360
+
361
+ response = await self._call_llm(prompt, system_prompt)
362
+
363
+ # Store in cache if allowed (async, don't block response)
364
+ if cacheable:
365
+ asyncio.create_task(
366
+ self._store_in_cache(normalized_prompt, response, category)
367
+ )
368
+
369
+ return response
370
+
371
+ def get_stats(self) -> dict:
372
+ """Get cache statistics including block reasons."""
373
+ total = self.cache_hits + self.cache_misses
374
+ hit_rate = self.cache_hits / total if total > 0 else 0
375
+ return {
376
+ "hits": self.cache_hits,
377
+ "misses": self.cache_misses,
378
+ "total": total,
379
+ "hit_rate": f"{hit_rate:.1%}",
380
+ "blocked": {
381
+ reason.value: count
382
+ for reason, count in self.blocked_requests.items()
383
+ },
384
+ }
385
+
386
+
387
+ # =============================================================================
388
+ # Example usage
389
+ # =============================================================================
390
+
391
+ async def main():
392
+ """Demonstrate cached agent with policy enforcement."""
393
+
394
+ agent = CachedAgent(config=CacheConfig(enabled=True, model_id="gpt-5"))
395
+
396
+ test_queries = [
397
+ # CACHEABLE - Factual Q&A
398
+ ("What is Redis?", "Should cache"),
399
+ ("Explain semantic caching", "Should cache"),
400
+
401
+ # CACHEABLE - Style transforms
402
+ ("Make this message warmer: Thanks for your email", "Should cache"),
403
+ ("Rewrite this to be more professional", "Should cache"),
404
+
405
+ # CACHEABLE - Templates
406
+ ("Write a polite decline email", "Should cache"),
407
+
408
+ # BLOCKED - Temporal
409
+ ("What's on my calendar today?", "BLOCKED: temporal"),
410
+ ("Remind me in 20 minutes", "BLOCKED: temporal"),
411
+ ("What's the deadline for this week?", "BLOCKED: temporal"),
412
+
413
+ # BLOCKED - Credentials
414
+ ("Store my API key sk-abc123xyz", "BLOCKED: credentials"),
415
+ ("My password is hunter2", "BLOCKED: credentials"),
416
+
417
+ # BLOCKED - Identifiers
418
+ ("Send email to john@example.com", "BLOCKED: identifiers"),
419
+ ("Call me at 5551234567", "BLOCKED: identifiers"),
420
+ ("Order ID: 12345678", "BLOCKED: identifiers"),
421
+
422
+ # BLOCKED - Personal context
423
+ ("My wife said we should...", "BLOCKED: personal"),
424
+ ("In our private chat, he mentioned...", "BLOCKED: personal"),
425
+ ]
426
+
427
+ print("=" * 60)
428
+ print("LangCache Policy Enforcement Demo")
429
+ print("=" * 60)
430
+
431
+ for query, expected in test_queries:
432
+ print(f"\nQuery: {query}")
433
+ print(f"Expected: {expected}")
434
+ try:
435
+ response = await agent.complete(query)
436
+ print(f"Response: {response[:80]}...")
437
+ except Exception as e:
438
+ print(f"Error: {e}")
439
+
440
+ print("\n" + "=" * 60)
441
+ print("Cache Statistics:")
442
+ print("=" * 60)
443
+ stats = agent.get_stats()
444
+ print(f"Hits: {stats['hits']}")
445
+ print(f"Misses: {stats['misses']}")
446
+ print(f"Hit Rate: {stats['hit_rate']}")
447
+ print(f"Blocked by reason:")
448
+ for reason, count in stats['blocked'].items():
449
+ print(f" - {reason}: {count}")
450
+
451
+
452
+ if __name__ == "__main__":
453
+ asyncio.run(main())
@@ -0,0 +1,56 @@
1
+ #!/usr/bin/env bash
2
+ #
3
+ # basic-caching.sh - Demonstrates basic LangCache workflow
4
+ #
5
+ # This example shows the cache-aside pattern:
6
+ # 1. Check cache for existing response
7
+ # 2. If miss, call LLM and store result
8
+ # 3. If hit, use cached response
9
+
10
+ set -euo pipefail
11
+
12
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
13
+ LANGCACHE="${SCRIPT_DIR}/scripts/langcache.sh"
14
+
15
+ # Example prompt
16
+ PROMPT="What is semantic caching and why is it useful?"
17
+
18
+ echo "=== LangCache Basic Caching Example ==="
19
+ echo ""
20
+ echo "Prompt: ${PROMPT}"
21
+ echo ""
22
+
23
+ # Step 1: Search cache
24
+ echo "Step 1: Searching cache..."
25
+ CACHE_RESULT=$("$LANGCACHE" search "$PROMPT" --threshold 0.9 2>/dev/null || echo '{"hit": false}')
26
+
27
+ # Check if we got a hit
28
+ HIT=$(echo "$CACHE_RESULT" | jq -r '.hit // false')
29
+
30
+ if [[ "$HIT" == "true" ]]; then
31
+ echo "Cache HIT!"
32
+ echo ""
33
+ SIMILARITY=$(echo "$CACHE_RESULT" | jq -r '.similarity')
34
+ RESPONSE=$(echo "$CACHE_RESULT" | jq -r '.response')
35
+ echo "Similarity: ${SIMILARITY}"
36
+ echo "Response: ${RESPONSE}"
37
+ else
38
+ echo "Cache MISS - would call LLM here"
39
+ echo ""
40
+
41
+ # Simulated LLM response (in real usage, call your LLM API)
42
+ LLM_RESPONSE="Semantic caching stores LLM responses and returns them for semantically similar queries. It reduces API costs and latency by avoiding redundant LLM calls for questions that have already been answered."
43
+
44
+ echo "LLM Response: ${LLM_RESPONSE}"
45
+ echo ""
46
+
47
+ # Step 2: Store in cache for future use
48
+ echo "Step 2: Storing response in cache..."
49
+ STORE_RESULT=$("$LANGCACHE" store "$PROMPT" "$LLM_RESPONSE" --attr "example=basic" 2>/dev/null || echo '{"error": "store failed"}')
50
+
51
+ ENTRY_ID=$(echo "$STORE_RESULT" | jq -r '.entryId // "unknown"')
52
+ echo "Stored with entry ID: ${ENTRY_ID}"
53
+ fi
54
+
55
+ echo ""
56
+ echo "=== Done ==="