lynkr 8.0.0 → 8.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/README.md +1 -1
  2. package/package.json +1 -1
  3. package/src/api/openai-router.js +34 -2
  4. package/src/clients/standard-tools.js +23 -0
  5. package/src/config/index.js +20 -0
  6. package/src/orchestrator/index.js +2 -2
  7. package/src/server.js +2 -12
  8. package/src/tools/index.js +4 -0
  9. package/src/tools/lazy-loader.js +7 -0
  10. package/src/tools/tinyfish.js +358 -0
  11. package/src/tools/truncate.js +1 -0
  12. package/.github/FUNDING.yml +0 -15
  13. package/.github/workflows/README.md +0 -215
  14. package/.github/workflows/ci.yml +0 -69
  15. package/.github/workflows/index.yml +0 -62
  16. package/.github/workflows/web-tools-tests.yml +0 -56
  17. package/CITATIONS.bib +0 -6
  18. package/DEPLOYMENT.md +0 -1001
  19. package/LYNKR-TUI-PLAN.md +0 -984
  20. package/PERFORMANCE-REPORT.md +0 -866
  21. package/PLAN-per-client-model-routing.md +0 -252
  22. package/docs/42642f749da6234f41b6b425c3bb07c9.txt +0 -1
  23. package/docs/BingSiteAuth.xml +0 -4
  24. package/docs/docs-style.css +0 -478
  25. package/docs/docs.html +0 -198
  26. package/docs/google5be250e608e6da39.html +0 -1
  27. package/docs/index.html +0 -577
  28. package/docs/index.md +0 -584
  29. package/docs/robots.txt +0 -4
  30. package/docs/sitemap.xml +0 -44
  31. package/docs/style.css +0 -1223
  32. package/docs/toon-integration-spec.md +0 -130
  33. package/documentation/README.md +0 -101
  34. package/documentation/api.md +0 -806
  35. package/documentation/claude-code-cli.md +0 -679
  36. package/documentation/codex-cli.md +0 -397
  37. package/documentation/contributing.md +0 -571
  38. package/documentation/cursor-integration.md +0 -734
  39. package/documentation/docker.md +0 -874
  40. package/documentation/embeddings.md +0 -762
  41. package/documentation/faq.md +0 -713
  42. package/documentation/features.md +0 -403
  43. package/documentation/headroom.md +0 -519
  44. package/documentation/installation.md +0 -758
  45. package/documentation/memory-system.md +0 -476
  46. package/documentation/production.md +0 -636
  47. package/documentation/providers.md +0 -1009
  48. package/documentation/routing.md +0 -476
  49. package/documentation/testing.md +0 -629
  50. package/documentation/token-optimization.md +0 -325
  51. package/documentation/tools.md +0 -697
  52. package/documentation/troubleshooting.md +0 -969
  53. package/final-test.js +0 -33
  54. package/headroom-sidecar/config.py +0 -93
  55. package/headroom-sidecar/requirements.txt +0 -14
  56. package/headroom-sidecar/server.py +0 -451
  57. package/monitor-agents.sh +0 -31
  58. package/scripts/audit-log-reader.js +0 -399
  59. package/scripts/compact-dictionary.js +0 -204
  60. package/scripts/test-deduplication.js +0 -448
  61. package/src/db/database.sqlite +0 -0
  62. package/te +0 -11622
  63. package/test/README.md +0 -212
  64. package/test/azure-openai-config.test.js +0 -213
  65. package/test/azure-openai-error-resilience.test.js +0 -238
  66. package/test/azure-openai-format-conversion.test.js +0 -354
  67. package/test/azure-openai-integration.test.js +0 -287
  68. package/test/azure-openai-routing.test.js +0 -175
  69. package/test/azure-openai-streaming.test.js +0 -171
  70. package/test/bedrock-integration.test.js +0 -457
  71. package/test/comprehensive-test-suite.js +0 -928
  72. package/test/config-validation.test.js +0 -207
  73. package/test/cursor-integration.test.js +0 -484
  74. package/test/format-conversion.test.js +0 -578
  75. package/test/hybrid-routing-integration.test.js +0 -269
  76. package/test/hybrid-routing-performance.test.js +0 -428
  77. package/test/llamacpp-integration.test.js +0 -882
  78. package/test/lmstudio-integration.test.js +0 -347
  79. package/test/memory/extractor.test.js +0 -398
  80. package/test/memory/retriever.test.js +0 -613
  81. package/test/memory/retriever.test.js.bak +0 -585
  82. package/test/memory/search.test.js +0 -537
  83. package/test/memory/search.test.js.bak +0 -389
  84. package/test/memory/store.test.js +0 -344
  85. package/test/memory/store.test.js.bak +0 -312
  86. package/test/memory/surprise.test.js +0 -300
  87. package/test/memory-performance.test.js +0 -472
  88. package/test/openai-integration.test.js +0 -683
  89. package/test/openrouter-error-resilience.test.js +0 -418
  90. package/test/passthrough-mode.test.js +0 -385
  91. package/test/performance-benchmark.js +0 -351
  92. package/test/performance-tests.js +0 -528
  93. package/test/routing.test.js +0 -225
  94. package/test/toon-compression.test.js +0 -131
  95. package/test/web-tools.test.js +0 -329
  96. package/test-agents-simple.js +0 -43
  97. package/test-cli-connection.sh +0 -33
  98. package/test-learning-unit.js +0 -126
  99. package/test-learning.js +0 -112
  100. package/test-parallel-agents.sh +0 -124
  101. package/test-parallel-direct.js +0 -155
  102. package/test-subagents.sh +0 -117
package/final-test.js DELETED
@@ -1,33 +0,0 @@
1
- const http = require('http');
2
-
3
- const data = JSON.stringify({
4
- model: "claude-sonnet-4-5",
5
- max_tokens: 100,
6
- messages: [{ role: "user", content: "Say hello" }]
7
- });
8
-
9
- const req = http.request({
10
- hostname: 'localhost',
11
- port: 8081,
12
- path: '/v1/messages',
13
- method: 'POST',
14
- headers: { 'Content-Type': 'application/json', 'Content-Length': data.length }
15
- }, (res) => {
16
- let body = '';
17
- res.on('data', chunk => body += chunk);
18
- res.on('end', () => {
19
- console.log('Status:', res.statusCode);
20
- if (res.statusCode === 200) {
21
- const json = JSON.parse(body);
22
- console.log('✅ SUCCESS!');
23
- console.log('Model:', json.model);
24
- console.log('Response:', json.content[0].text.substring(0, 150));
25
- } else {
26
- console.log('❌ Error:', body.substring(0, 300));
27
- }
28
- });
29
- });
30
-
31
- req.on('error', e => console.error('Request failed:', e.message));
32
- req.write(data);
33
- req.end();
@@ -1,93 +0,0 @@
1
- """
2
- Headroom Sidecar Configuration
3
- Loads settings from environment variables
4
- """
5
-
6
- import os
7
- from typing import Optional
8
-
9
-
10
- def str_to_bool(value: str) -> bool:
11
- """Convert string to boolean"""
12
- return value.lower() in ("true", "1", "yes", "on")
13
-
14
-
15
- class HeadroomConfig:
16
- """Configuration for Headroom sidecar"""
17
-
18
- def __init__(self):
19
- # Server settings
20
- self.host = os.environ.get("HEADROOM_HOST", "0.0.0.0")
21
- self.port = int(os.environ.get("HEADROOM_PORT", "8787"))
22
- self.log_level = os.environ.get("HEADROOM_LOG_LEVEL", "info")
23
-
24
- # Operating mode
25
- self.mode = os.environ.get("HEADROOM_MODE", "optimize")
26
- self.provider = os.environ.get("HEADROOM_PROVIDER", "anthropic")
27
-
28
- # Smart Crusher settings
29
- self.smart_crusher_enabled = str_to_bool(
30
- os.environ.get("HEADROOM_SMART_CRUSHER", "true")
31
- )
32
- self.smart_crusher_min_tokens = int(
33
- os.environ.get("HEADROOM_SMART_CRUSHER_MIN_TOKENS", "200")
34
- )
35
- self.smart_crusher_max_items = int(
36
- os.environ.get("HEADROOM_SMART_CRUSHER_MAX_ITEMS", "15")
37
- )
38
-
39
- # Tool Crusher settings
40
- self.tool_crusher_enabled = str_to_bool(
41
- os.environ.get("HEADROOM_TOOL_CRUSHER", "true")
42
- )
43
-
44
- # Cache Aligner settings
45
- self.cache_aligner_enabled = str_to_bool(
46
- os.environ.get("HEADROOM_CACHE_ALIGNER", "true")
47
- )
48
-
49
- # Rolling Window settings
50
- self.rolling_window_enabled = str_to_bool(
51
- os.environ.get("HEADROOM_ROLLING_WINDOW", "true")
52
- )
53
- self.keep_turns = int(os.environ.get("HEADROOM_KEEP_TURNS", "3"))
54
-
55
- # CCR settings
56
- self.ccr_enabled = str_to_bool(os.environ.get("HEADROOM_CCR", "true"))
57
- self.ccr_ttl = int(os.environ.get("HEADROOM_CCR_TTL", "300"))
58
-
59
- # LLMLingua settings
60
- self.llmlingua_enabled = str_to_bool(
61
- os.environ.get("HEADROOM_LLMLINGUA", "false")
62
- )
63
- self.llmlingua_device = os.environ.get("HEADROOM_LLMLINGUA_DEVICE", "auto")
64
-
65
- def to_dict(self) -> dict:
66
- """Return configuration as dictionary"""
67
- return {
68
- "host": self.host,
69
- "port": self.port,
70
- "log_level": self.log_level,
71
- "mode": self.mode,
72
- "provider": self.provider,
73
- "smart_crusher": {
74
- "enabled": self.smart_crusher_enabled,
75
- "min_tokens": self.smart_crusher_min_tokens,
76
- "max_items": self.smart_crusher_max_items,
77
- },
78
- "tool_crusher": {"enabled": self.tool_crusher_enabled},
79
- "cache_aligner": {"enabled": self.cache_aligner_enabled},
80
- "rolling_window": {
81
- "enabled": self.rolling_window_enabled,
82
- "keep_turns": self.keep_turns,
83
- },
84
- "ccr": {"enabled": self.ccr_enabled, "ttl": self.ccr_ttl},
85
- "llmlingua": {
86
- "enabled": self.llmlingua_enabled,
87
- "device": self.llmlingua_device,
88
- },
89
- }
90
-
91
-
92
- # Global config instance
93
- config = HeadroomConfig()
@@ -1,14 +0,0 @@
1
- # Headroom Sidecar Dependencies
2
-
3
- # Core framework
4
- fastapi>=0.109.0
5
- uvicorn[standard]>=0.27.0
6
- pydantic>=2.5.0
7
-
8
- # Headroom SDK
9
- headroom-ai>=0.1.0
10
-
11
- # Optional: LLMLingua support (uncomment for ML compression)
12
- # llmlingua>=0.2.0
13
- # torch>=2.0.0
14
- # transformers>=4.36.0
@@ -1,451 +0,0 @@
1
- """
2
- Headroom Sidecar Server
3
- FastAPI application providing context compression via HTTP API
4
- """
5
-
6
- import logging
7
- import time
8
- import hashlib
9
- import json
10
- from typing import Any, Dict, List, Optional
11
- from datetime import datetime
12
-
13
- from fastapi import FastAPI, HTTPException
14
- from fastapi.responses import JSONResponse
15
- from pydantic import BaseModel
16
- import uvicorn
17
-
18
- from config import config
19
-
20
- # Setup logging
21
- logging.basicConfig(
22
- level=getattr(logging, config.log_level.upper()),
23
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
24
- )
25
- logger = logging.getLogger("headroom-sidecar")
26
-
27
- # Initialize FastAPI app
28
- app = FastAPI(
29
- title="Headroom Sidecar",
30
- description="Context compression service for LLM requests",
31
- version="1.0.0",
32
- )
33
-
34
- # Try to import headroom, fallback to basic compression if not available
35
- try:
36
- from headroom import (
37
- TransformPipeline,
38
- SmartCrusher,
39
- SmartCrusherConfig,
40
- ToolCrusher,
41
- ToolCrusherConfig,
42
- RollingWindow,
43
- RollingWindowConfig,
44
- AnthropicProvider,
45
- OpenAIProvider,
46
- )
47
- import warnings
48
- warnings.filterwarnings("ignore", message=".*tiktoken approximation.*")
49
-
50
- # Create transforms based on config
51
- transforms = []
52
-
53
- if config.smart_crusher_enabled:
54
- transforms.append(SmartCrusher(SmartCrusherConfig(
55
- enabled=True,
56
- min_tokens_to_crush=config.smart_crusher_min_tokens,
57
- max_items_after_crush=config.smart_crusher_max_items,
58
- )))
59
- logger.info("SmartCrusher enabled")
60
-
61
- if config.tool_crusher_enabled:
62
- transforms.append(ToolCrusher(ToolCrusherConfig(
63
- enabled=True,
64
- )))
65
- logger.info("ToolCrusher enabled")
66
-
67
- if config.rolling_window_enabled:
68
- transforms.append(RollingWindow(RollingWindowConfig(
69
- enabled=True,
70
- keep_last_turns=config.keep_turns,
71
- )))
72
- logger.info("RollingWindow enabled")
73
-
74
- # Create provider based on config
75
- if config.provider == "openai":
76
- headroom_provider = OpenAIProvider()
77
- else:
78
- headroom_provider = AnthropicProvider()
79
-
80
- headroom_pipeline = TransformPipeline(transforms=transforms, provider=headroom_provider) if transforms else None
81
- HEADROOM_AVAILABLE = headroom_pipeline is not None
82
- logger.info(f"Headroom SDK loaded successfully with {len(transforms)} transforms (provider: {config.provider})")
83
- except ImportError as e:
84
- logger.warning(f"Headroom SDK not available: {e}. Using basic compression.")
85
- headroom_pipeline = None
86
- HEADROOM_AVAILABLE = False
87
-
88
- # CCR Store (in-memory with TTL)
89
- ccr_store: Dict[str, Dict[str, Any]] = {}
90
-
91
- # Metrics
92
- metrics = {
93
- "requests_total": 0,
94
- "compressions_applied": 0,
95
- "compressions_skipped": 0,
96
- "errors": 0,
97
- "ccr_stores": 0,
98
- "ccr_retrievals": 0,
99
- "total_tokens_before": 0,
100
- "total_tokens_after": 0,
101
- "start_time": datetime.utcnow().isoformat(),
102
- }
103
-
104
-
105
- # Request/Response models
106
- class CompressRequest(BaseModel):
107
- messages: List[Dict[str, Any]]
108
- tools: Optional[List[Dict[str, Any]]] = None
109
- model: Optional[str] = "claude-3-5-sonnet-20241022"
110
- model_limit: Optional[int] = 200000
111
- mode: Optional[str] = None
112
- token_budget: Optional[int] = None
113
- query_context: Optional[str] = None
114
- preserve_recent_turns: Optional[int] = None
115
- target_ratio: Optional[float] = None
116
-
117
-
118
- class CompressResponse(BaseModel):
119
- messages: List[Dict[str, Any]]
120
- tools: Optional[List[Dict[str, Any]]] = None
121
- compressed: bool
122
- stats: Dict[str, Any]
123
-
124
-
125
- class CCRRetrieveRequest(BaseModel):
126
- hash: str
127
- query: Optional[str] = None
128
- max_results: Optional[int] = 20
129
-
130
-
131
- class CCRRetrieveResponse(BaseModel):
132
- success: bool
133
- content: Optional[Any] = None
134
- items_retrieved: int = 0
135
- was_search: bool = False
136
- error: Optional[str] = None
137
-
138
-
139
- def estimate_tokens(data: Any) -> int:
140
- """Estimate token count (rough approximation: ~4 chars per token)"""
141
- text = json.dumps(data) if not isinstance(data, str) else data
142
- return len(text) // 4
143
-
144
-
145
- def generate_hash(content: Any) -> str:
146
- """Generate hash for CCR storage"""
147
- text = json.dumps(content, sort_keys=True)
148
- return hashlib.sha256(text.encode()).hexdigest()[:12]
149
-
150
-
151
- def cleanup_expired_ccr():
152
- """Remove expired CCR entries"""
153
- now = time.time()
154
- expired = [k for k, v in ccr_store.items() if now - v["timestamp"] > config.ccr_ttl]
155
- for key in expired:
156
- del ccr_store[key]
157
-
158
-
159
- def basic_compress(messages: List[Dict], tools: Optional[List] = None) -> Dict:
160
- """Basic compression when Headroom SDK is not available"""
161
- tokens_before = estimate_tokens(messages)
162
- compressed_messages = []
163
-
164
- for msg in messages:
165
- compressed_msg = msg.copy()
166
-
167
- # Compress large tool results
168
- if msg.get("role") == "user" and isinstance(msg.get("content"), list):
169
- new_content = []
170
- for block in msg["content"]:
171
- if block.get("type") == "tool_result":
172
- content = block.get("content", "")
173
- if isinstance(content, str) and len(content) > 2000:
174
- # Store in CCR and replace with reference
175
- hash_key = generate_hash(content)
176
- ccr_store[hash_key] = {
177
- "content": content,
178
- "timestamp": time.time(),
179
- "tool_name": block.get("tool_use_id", "unknown"),
180
- }
181
- metrics["ccr_stores"] += 1
182
- block = block.copy()
183
- block["content"] = (
184
- f"[CCR:{hash_key}] Content compressed ({len(content)} chars). "
185
- f"Use ccr_retrieve to access full content."
186
- )
187
- new_content.append(block)
188
- compressed_msg["content"] = new_content
189
- compressed_messages.append(compressed_msg)
190
-
191
- tokens_after = estimate_tokens(compressed_messages)
192
-
193
- return {
194
- "messages": compressed_messages,
195
- "tools": tools,
196
- "compressed": tokens_after < tokens_before,
197
- "stats": {
198
- "tokens_before": tokens_before,
199
- "tokens_after": tokens_after,
200
- "tokens_saved": tokens_before - tokens_after,
201
- "savings_percent": round(
202
- (1 - tokens_after / tokens_before) * 100, 1
203
- ) if tokens_before > 0 else 0,
204
- "transforms_applied": ["basic_ccr"] if tokens_after < tokens_before else [],
205
- "latency_ms": 0,
206
- },
207
- }
208
-
209
-
210
- @app.get("/health")
211
- async def health_check():
212
- """Health check endpoint"""
213
- cleanup_expired_ccr()
214
- return {
215
- "status": "healthy",
216
- "headroom_loaded": HEADROOM_AVAILABLE,
217
- "ccr_enabled": config.ccr_enabled,
218
- "llmlingua_enabled": config.llmlingua_enabled,
219
- "entries_cached": len(ccr_store),
220
- "config": config.to_dict(),
221
- }
222
-
223
-
224
- @app.get("/metrics")
225
- async def get_metrics():
226
- """Get compression metrics"""
227
- return {
228
- **metrics,
229
- "average_compression_ratio": (
230
- round(metrics["total_tokens_after"] / metrics["total_tokens_before"], 3)
231
- if metrics["total_tokens_before"] > 0
232
- else 1.0
233
- ),
234
- "ccr_entries": len(ccr_store),
235
- "uptime_seconds": (
236
- datetime.utcnow() - datetime.fromisoformat(metrics["start_time"])
237
- ).total_seconds(),
238
- }
239
-
240
-
241
- @app.post("/compress", response_model=CompressResponse)
242
- async def compress_messages(request: CompressRequest):
243
- """Compress messages and tools"""
244
- start_time = time.time()
245
- metrics["requests_total"] += 1
246
-
247
- try:
248
- tokens_before = estimate_tokens(request.messages)
249
- metrics["total_tokens_before"] += tokens_before
250
-
251
- # Skip if below minimum tokens
252
- if tokens_before < config.smart_crusher_min_tokens:
253
- metrics["compressions_skipped"] += 1
254
- return CompressResponse(
255
- messages=request.messages,
256
- tools=request.tools,
257
- compressed=False,
258
- stats={
259
- "skipped": True,
260
- "reason": f"Below threshold ({tokens_before} < {config.smart_crusher_min_tokens})",
261
- },
262
- )
263
-
264
- # Use Headroom SDK if available
265
- if HEADROOM_AVAILABLE and headroom_pipeline:
266
- try:
267
- result = headroom_pipeline.apply(
268
- request.messages,
269
- model=request.model,
270
- model_limit=request.model_limit,
271
- )
272
-
273
- # Extract messages from TransformResult
274
- if hasattr(result, 'messages'):
275
- compressed_messages = result.messages
276
- # transforms_applied may be strings or objects with .name
277
- if hasattr(result, 'transforms_applied'):
278
- transforms_applied = [t if isinstance(t, str) else getattr(t, 'name', str(t)) for t in result.transforms_applied]
279
- else:
280
- transforms_applied = []
281
- elif isinstance(result, dict):
282
- compressed_messages = result.get("messages", request.messages)
283
- transforms_applied = result.get("transforms", [])
284
- else:
285
- compressed_messages = result if isinstance(result, list) else request.messages
286
- transforms_applied = []
287
-
288
- tokens_after = estimate_tokens(compressed_messages)
289
- metrics["total_tokens_after"] += tokens_after
290
- metrics["compressions_applied"] += 1
291
-
292
- return CompressResponse(
293
- messages=compressed_messages,
294
- tools=request.tools, # Tools not modified by current transforms
295
- compressed=tokens_after < tokens_before,
296
- stats={
297
- "tokens_before": tokens_before,
298
- "tokens_after": tokens_after,
299
- "tokens_saved": tokens_before - tokens_after,
300
- "savings_percent": round(
301
- (1 - tokens_after / tokens_before) * 100, 1
302
- ) if tokens_before > 0 else 0,
303
- "transforms_applied": transforms_applied,
304
- "latency_ms": round((time.time() - start_time) * 1000, 1),
305
- },
306
- )
307
- except Exception as e:
308
- logger.warning(f"Headroom SDK error, falling back to basic: {e}")
309
-
310
- # Fallback to basic compression
311
- result = basic_compress(request.messages, request.tools)
312
- metrics["total_tokens_after"] += result["stats"]["tokens_after"]
313
- if result["compressed"]:
314
- metrics["compressions_applied"] += 1
315
- else:
316
- metrics["compressions_skipped"] += 1
317
-
318
- result["stats"]["latency_ms"] = round((time.time() - start_time) * 1000, 1)
319
- return CompressResponse(**result)
320
-
321
- except Exception as e:
322
- metrics["errors"] += 1
323
- logger.error(f"Compression error: {e}")
324
- raise HTTPException(status_code=500, detail=str(e))
325
-
326
-
327
- @app.post("/ccr/retrieve", response_model=CCRRetrieveResponse)
328
- async def ccr_retrieve(request: CCRRetrieveRequest):
329
- """Retrieve content from CCR store"""
330
- cleanup_expired_ccr()
331
-
332
- if request.hash not in ccr_store:
333
- return CCRRetrieveResponse(
334
- success=False,
335
- error=f"Hash {request.hash} not found or expired",
336
- )
337
-
338
- entry = ccr_store[request.hash]
339
- content = entry["content"]
340
- metrics["ccr_retrievals"] += 1
341
-
342
- # If query provided, search within content
343
- if request.query:
344
- if isinstance(content, list):
345
- # Filter list items by query
346
- filtered = [
347
- item
348
- for item in content
349
- if request.query.lower() in json.dumps(item).lower()
350
- ][: request.max_results]
351
- return CCRRetrieveResponse(
352
- success=True,
353
- content=filtered,
354
- items_retrieved=len(filtered),
355
- was_search=True,
356
- )
357
- elif isinstance(content, str):
358
- # Return content if query matches
359
- if request.query.lower() in content.lower():
360
- return CCRRetrieveResponse(
361
- success=True,
362
- content=content,
363
- items_retrieved=1,
364
- was_search=True,
365
- )
366
- return CCRRetrieveResponse(
367
- success=False,
368
- error="Query not found in content",
369
- )
370
-
371
- # Return full content
372
- return CCRRetrieveResponse(
373
- success=True,
374
- content=content,
375
- items_retrieved=1 if not isinstance(content, list) else len(content),
376
- was_search=False,
377
- )
378
-
379
-
380
- @app.post("/ccr/track")
381
- async def ccr_track(
382
- hash_key: str,
383
- turn_number: int,
384
- tool_name: str,
385
- sample: str,
386
- ):
387
- """Track compression for proactive expansion"""
388
- return {"tracked": True, "hash_key": hash_key}
389
-
390
-
391
- @app.post("/ccr/analyze")
392
- async def ccr_analyze(query: str, turn_number: int):
393
- """Analyze query for proactive CCR expansion"""
394
- # Simple keyword matching for expansion suggestions
395
- expansions = []
396
- for hash_key, entry in ccr_store.items():
397
- if query.lower() in json.dumps(entry["content"]).lower():
398
- expansions.append(
399
- {
400
- "hash": hash_key,
401
- "tool_name": entry.get("tool_name", "unknown"),
402
- "relevance": 0.8,
403
- }
404
- )
405
- return {"expansions": expansions[:5]}
406
-
407
-
408
- @app.post("/compress/llmlingua")
409
- async def llmlingua_compress(
410
- text: str,
411
- target_ratio: float = 0.5,
412
- force_tokens: Optional[str] = None,
413
- ):
414
- """Compress text using LLMLingua (if available)"""
415
- if not config.llmlingua_enabled:
416
- raise HTTPException(status_code=400, detail="LLMLingua is not enabled")
417
-
418
- try:
419
- # Try to import and use llmlingua
420
- from llmlingua import PromptCompressor
421
-
422
- compressor = PromptCompressor(device_map=config.llmlingua_device)
423
- result = compressor.compress_prompt(
424
- text,
425
- rate=target_ratio,
426
- force_tokens=json.loads(force_tokens) if force_tokens else None,
427
- )
428
- return {
429
- "compressed": result["compressed_prompt"],
430
- "original_tokens": result.get("origin_tokens", len(text) // 4),
431
- "compressed_tokens": result.get("compressed_tokens", len(result["compressed_prompt"]) // 4),
432
- "ratio": result.get("rate", target_ratio),
433
- }
434
- except ImportError:
435
- raise HTTPException(
436
- status_code=501,
437
- detail="LLMLingua not installed. Add llmlingua to requirements.txt",
438
- )
439
- except Exception as e:
440
- raise HTTPException(status_code=500, detail=str(e))
441
-
442
-
443
- if __name__ == "__main__":
444
- logger.info(f"Starting Headroom sidecar on {config.host}:{config.port}")
445
- logger.info(f"Configuration: {json.dumps(config.to_dict(), indent=2)}")
446
- uvicorn.run(
447
- app,
448
- host=config.host,
449
- port=config.port,
450
- log_level=config.log_level,
451
- )
package/monitor-agents.sh DELETED
@@ -1,31 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Monitor agent activity in real-time
4
-
5
- echo "🔍 Monitoring Agent Activity"
6
- echo "=============================="
7
- echo ""
8
-
9
- while true; do
10
- clear
11
- echo "🔍 Agent Statistics (refreshing every 3s)"
12
- echo "=========================================="
13
- echo ""
14
-
15
- # Get stats
16
- curl -s http://localhost:8080/v1/agents/stats | jq -r '.stats[] |
17
- "Agent: \(.agent_type)
18
- Executions: \(.total_executions) (\(.completed) completed, \(.failed) failed)
19
- Avg Duration: \(.avg_duration_ms)ms
20
- Tokens: \(.total_input_tokens) in / \(.total_output_tokens) out
21
- "' || echo "Proxy not responding..."
22
-
23
- echo ""
24
- echo "Latest transcripts:"
25
- ls -lt data/agent-transcripts/*.jsonl 2>/dev/null | head -3 || echo "No transcripts yet"
26
-
27
- echo ""
28
- echo "Press Ctrl+C to stop monitoring"
29
-
30
- sleep 3
31
- done