lynkr 7.2.5 → 8.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +3 -3
  2. package/config/model-tiers.json +89 -0
  3. package/install.sh +6 -1
  4. package/package.json +4 -2
  5. package/scripts/setup.js +0 -1
  6. package/src/agents/executor.js +14 -6
  7. package/src/api/middleware/session.js +15 -2
  8. package/src/api/openai-router.js +162 -37
  9. package/src/api/providers-handler.js +15 -1
  10. package/src/api/router.js +107 -2
  11. package/src/budget/index.js +4 -3
  12. package/src/clients/databricks.js +431 -234
  13. package/src/clients/gpt-utils.js +181 -0
  14. package/src/clients/ollama-utils.js +66 -140
  15. package/src/clients/routing.js +0 -1
  16. package/src/clients/standard-tools.js +99 -3
  17. package/src/config/index.js +133 -35
  18. package/src/context/toon.js +173 -0
  19. package/src/logger/index.js +23 -0
  20. package/src/orchestrator/index.js +688 -213
  21. package/src/routing/agentic-detector.js +320 -0
  22. package/src/routing/complexity-analyzer.js +202 -2
  23. package/src/routing/cost-optimizer.js +305 -0
  24. package/src/routing/index.js +168 -159
  25. package/src/routing/model-tiers.js +365 -0
  26. package/src/server.js +4 -14
  27. package/src/sessions/cleanup.js +3 -3
  28. package/src/sessions/record.js +10 -1
  29. package/src/sessions/store.js +7 -2
  30. package/src/tools/agent-task.js +48 -1
  31. package/src/tools/index.js +19 -2
  32. package/src/tools/lazy-loader.js +7 -0
  33. package/src/tools/tinyfish.js +358 -0
  34. package/src/tools/truncate.js +1 -0
  35. package/.github/FUNDING.yml +0 -15
  36. package/.github/workflows/README.md +0 -215
  37. package/.github/workflows/ci.yml +0 -69
  38. package/.github/workflows/index.yml +0 -62
  39. package/.github/workflows/web-tools-tests.yml +0 -56
  40. package/CITATIONS.bib +0 -6
  41. package/CLAWROUTER_ROUTING_PLAN.md +0 -910
  42. package/DEPLOYMENT.md +0 -1001
  43. package/LYNKR-TUI-PLAN.md +0 -984
  44. package/PERFORMANCE-REPORT.md +0 -866
  45. package/PLAN-per-client-model-routing.md +0 -252
  46. package/ROUTER_COMPARISON.md +0 -173
  47. package/TIER_ROUTING_PLAN.md +0 -771
  48. package/docs/42642f749da6234f41b6b425c3bb07c9.txt +0 -1
  49. package/docs/BingSiteAuth.xml +0 -4
  50. package/docs/docs-style.css +0 -478
  51. package/docs/docs.html +0 -197
  52. package/docs/google5be250e608e6da39.html +0 -1
  53. package/docs/index.html +0 -577
  54. package/docs/index.md +0 -577
  55. package/docs/robots.txt +0 -4
  56. package/docs/sitemap.xml +0 -44
  57. package/docs/style.css +0 -1223
  58. package/documentation/README.md +0 -100
  59. package/documentation/api.md +0 -806
  60. package/documentation/claude-code-cli.md +0 -672
  61. package/documentation/codex-cli.md +0 -397
  62. package/documentation/contributing.md +0 -571
  63. package/documentation/cursor-integration.md +0 -731
  64. package/documentation/docker.md +0 -867
  65. package/documentation/embeddings.md +0 -760
  66. package/documentation/faq.md +0 -659
  67. package/documentation/features.md +0 -396
  68. package/documentation/headroom.md +0 -519
  69. package/documentation/installation.md +0 -706
  70. package/documentation/memory-system.md +0 -476
  71. package/documentation/production.md +0 -601
  72. package/documentation/providers.md +0 -906
  73. package/documentation/testing.md +0 -629
  74. package/documentation/token-optimization.md +0 -323
  75. package/documentation/tools.md +0 -697
  76. package/documentation/troubleshooting.md +0 -893
  77. package/final-test.js +0 -33
  78. package/headroom-sidecar/config.py +0 -93
  79. package/headroom-sidecar/requirements.txt +0 -14
  80. package/headroom-sidecar/server.py +0 -451
  81. package/monitor-agents.sh +0 -31
  82. package/scripts/audit-log-reader.js +0 -399
  83. package/scripts/compact-dictionary.js +0 -204
  84. package/scripts/test-deduplication.js +0 -448
  85. package/src/db/database.sqlite +0 -0
  86. package/test/README.md +0 -212
  87. package/test/azure-openai-config.test.js +0 -204
  88. package/test/azure-openai-error-resilience.test.js +0 -238
  89. package/test/azure-openai-format-conversion.test.js +0 -354
  90. package/test/azure-openai-integration.test.js +0 -281
  91. package/test/azure-openai-routing.test.js +0 -177
  92. package/test/azure-openai-streaming.test.js +0 -171
  93. package/test/bedrock-integration.test.js +0 -471
  94. package/test/comprehensive-test-suite.js +0 -928
  95. package/test/config-validation.test.js +0 -207
  96. package/test/cursor-integration.test.js +0 -484
  97. package/test/format-conversion.test.js +0 -578
  98. package/test/hybrid-routing-integration.test.js +0 -254
  99. package/test/hybrid-routing-performance.test.js +0 -418
  100. package/test/llamacpp-integration.test.js +0 -863
  101. package/test/lmstudio-integration.test.js +0 -335
  102. package/test/memory/extractor.test.js +0 -398
  103. package/test/memory/retriever.test.js +0 -613
  104. package/test/memory/retriever.test.js.bak +0 -585
  105. package/test/memory/search.test.js +0 -537
  106. package/test/memory/search.test.js.bak +0 -389
  107. package/test/memory/store.test.js +0 -344
  108. package/test/memory/store.test.js.bak +0 -312
  109. package/test/memory/surprise.test.js +0 -300
  110. package/test/memory-performance.test.js +0 -472
  111. package/test/openai-integration.test.js +0 -686
  112. package/test/openrouter-error-resilience.test.js +0 -418
  113. package/test/passthrough-mode.test.js +0 -385
  114. package/test/performance-benchmark.js +0 -351
  115. package/test/performance-tests.js +0 -528
  116. package/test/routing.test.js +0 -219
  117. package/test/web-tools.test.js +0 -329
  118. package/test-agents-simple.js +0 -43
  119. package/test-cli-connection.sh +0 -33
  120. package/test-learning-unit.js +0 -126
  121. package/test-learning.js +0 -112
  122. package/test-parallel-agents.sh +0 -124
  123. package/test-parallel-direct.js +0 -155
  124. package/test-subagents.sh +0 -117
package/final-test.js DELETED
@@ -1,33 +0,0 @@
1
- const http = require('http');
2
-
3
- const data = JSON.stringify({
4
- model: "claude-sonnet-4-5",
5
- max_tokens: 100,
6
- messages: [{ role: "user", content: "Say hello" }]
7
- });
8
-
9
- const req = http.request({
10
- hostname: 'localhost',
11
- port: 8081,
12
- path: '/v1/messages',
13
- method: 'POST',
14
- headers: { 'Content-Type': 'application/json', 'Content-Length': data.length }
15
- }, (res) => {
16
- let body = '';
17
- res.on('data', chunk => body += chunk);
18
- res.on('end', () => {
19
- console.log('Status:', res.statusCode);
20
- if (res.statusCode === 200) {
21
- const json = JSON.parse(body);
22
- console.log('✅ SUCCESS!');
23
- console.log('Model:', json.model);
24
- console.log('Response:', json.content[0].text.substring(0, 150));
25
- } else {
26
- console.log('❌ Error:', body.substring(0, 300));
27
- }
28
- });
29
- });
30
-
31
- req.on('error', e => console.error('Request failed:', e.message));
32
- req.write(data);
33
- req.end();
@@ -1,93 +0,0 @@
1
- """
2
- Headroom Sidecar Configuration
3
- Loads settings from environment variables
4
- """
5
-
6
- import os
7
- from typing import Optional
8
-
9
-
10
- def str_to_bool(value: str) -> bool:
11
- """Convert string to boolean"""
12
- return value.lower() in ("true", "1", "yes", "on")
13
-
14
-
15
- class HeadroomConfig:
16
- """Configuration for Headroom sidecar"""
17
-
18
- def __init__(self):
19
- # Server settings
20
- self.host = os.environ.get("HEADROOM_HOST", "0.0.0.0")
21
- self.port = int(os.environ.get("HEADROOM_PORT", "8787"))
22
- self.log_level = os.environ.get("HEADROOM_LOG_LEVEL", "info")
23
-
24
- # Operating mode
25
- self.mode = os.environ.get("HEADROOM_MODE", "optimize")
26
- self.provider = os.environ.get("HEADROOM_PROVIDER", "anthropic")
27
-
28
- # Smart Crusher settings
29
- self.smart_crusher_enabled = str_to_bool(
30
- os.environ.get("HEADROOM_SMART_CRUSHER", "true")
31
- )
32
- self.smart_crusher_min_tokens = int(
33
- os.environ.get("HEADROOM_SMART_CRUSHER_MIN_TOKENS", "200")
34
- )
35
- self.smart_crusher_max_items = int(
36
- os.environ.get("HEADROOM_SMART_CRUSHER_MAX_ITEMS", "15")
37
- )
38
-
39
- # Tool Crusher settings
40
- self.tool_crusher_enabled = str_to_bool(
41
- os.environ.get("HEADROOM_TOOL_CRUSHER", "true")
42
- )
43
-
44
- # Cache Aligner settings
45
- self.cache_aligner_enabled = str_to_bool(
46
- os.environ.get("HEADROOM_CACHE_ALIGNER", "true")
47
- )
48
-
49
- # Rolling Window settings
50
- self.rolling_window_enabled = str_to_bool(
51
- os.environ.get("HEADROOM_ROLLING_WINDOW", "true")
52
- )
53
- self.keep_turns = int(os.environ.get("HEADROOM_KEEP_TURNS", "3"))
54
-
55
- # CCR settings
56
- self.ccr_enabled = str_to_bool(os.environ.get("HEADROOM_CCR", "true"))
57
- self.ccr_ttl = int(os.environ.get("HEADROOM_CCR_TTL", "300"))
58
-
59
- # LLMLingua settings
60
- self.llmlingua_enabled = str_to_bool(
61
- os.environ.get("HEADROOM_LLMLINGUA", "false")
62
- )
63
- self.llmlingua_device = os.environ.get("HEADROOM_LLMLINGUA_DEVICE", "auto")
64
-
65
- def to_dict(self) -> dict:
66
- """Return configuration as dictionary"""
67
- return {
68
- "host": self.host,
69
- "port": self.port,
70
- "log_level": self.log_level,
71
- "mode": self.mode,
72
- "provider": self.provider,
73
- "smart_crusher": {
74
- "enabled": self.smart_crusher_enabled,
75
- "min_tokens": self.smart_crusher_min_tokens,
76
- "max_items": self.smart_crusher_max_items,
77
- },
78
- "tool_crusher": {"enabled": self.tool_crusher_enabled},
79
- "cache_aligner": {"enabled": self.cache_aligner_enabled},
80
- "rolling_window": {
81
- "enabled": self.rolling_window_enabled,
82
- "keep_turns": self.keep_turns,
83
- },
84
- "ccr": {"enabled": self.ccr_enabled, "ttl": self.ccr_ttl},
85
- "llmlingua": {
86
- "enabled": self.llmlingua_enabled,
87
- "device": self.llmlingua_device,
88
- },
89
- }
90
-
91
-
92
- # Global config instance
93
- config = HeadroomConfig()
@@ -1,14 +0,0 @@
1
- # Headroom Sidecar Dependencies
2
-
3
- # Core framework
4
- fastapi>=0.109.0
5
- uvicorn[standard]>=0.27.0
6
- pydantic>=2.5.0
7
-
8
- # Headroom SDK
9
- headroom-ai>=0.1.0
10
-
11
- # Optional: LLMLingua support (uncomment for ML compression)
12
- # llmlingua>=0.2.0
13
- # torch>=2.0.0
14
- # transformers>=4.36.0
@@ -1,451 +0,0 @@
1
- """
2
- Headroom Sidecar Server
3
- FastAPI application providing context compression via HTTP API
4
- """
5
-
6
- import logging
7
- import time
8
- import hashlib
9
- import json
10
- from typing import Any, Dict, List, Optional
11
- from datetime import datetime
12
-
13
- from fastapi import FastAPI, HTTPException
14
- from fastapi.responses import JSONResponse
15
- from pydantic import BaseModel
16
- import uvicorn
17
-
18
- from config import config
19
-
20
- # Setup logging
21
- logging.basicConfig(
22
- level=getattr(logging, config.log_level.upper()),
23
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
24
- )
25
- logger = logging.getLogger("headroom-sidecar")
26
-
27
- # Initialize FastAPI app
28
- app = FastAPI(
29
- title="Headroom Sidecar",
30
- description="Context compression service for LLM requests",
31
- version="1.0.0",
32
- )
33
-
34
- # Try to import headroom, fallback to basic compression if not available
35
- try:
36
- from headroom import (
37
- TransformPipeline,
38
- SmartCrusher,
39
- SmartCrusherConfig,
40
- ToolCrusher,
41
- ToolCrusherConfig,
42
- RollingWindow,
43
- RollingWindowConfig,
44
- AnthropicProvider,
45
- OpenAIProvider,
46
- )
47
- import warnings
48
- warnings.filterwarnings("ignore", message=".*tiktoken approximation.*")
49
-
50
- # Create transforms based on config
51
- transforms = []
52
-
53
- if config.smart_crusher_enabled:
54
- transforms.append(SmartCrusher(SmartCrusherConfig(
55
- enabled=True,
56
- min_tokens_to_crush=config.smart_crusher_min_tokens,
57
- max_items_after_crush=config.smart_crusher_max_items,
58
- )))
59
- logger.info("SmartCrusher enabled")
60
-
61
- if config.tool_crusher_enabled:
62
- transforms.append(ToolCrusher(ToolCrusherConfig(
63
- enabled=True,
64
- )))
65
- logger.info("ToolCrusher enabled")
66
-
67
- if config.rolling_window_enabled:
68
- transforms.append(RollingWindow(RollingWindowConfig(
69
- enabled=True,
70
- keep_last_turns=config.keep_turns,
71
- )))
72
- logger.info("RollingWindow enabled")
73
-
74
- # Create provider based on config
75
- if config.provider == "openai":
76
- headroom_provider = OpenAIProvider()
77
- else:
78
- headroom_provider = AnthropicProvider()
79
-
80
- headroom_pipeline = TransformPipeline(transforms=transforms, provider=headroom_provider) if transforms else None
81
- HEADROOM_AVAILABLE = headroom_pipeline is not None
82
- logger.info(f"Headroom SDK loaded successfully with {len(transforms)} transforms (provider: {config.provider})")
83
- except ImportError as e:
84
- logger.warning(f"Headroom SDK not available: {e}. Using basic compression.")
85
- headroom_pipeline = None
86
- HEADROOM_AVAILABLE = False
87
-
88
- # CCR Store (in-memory with TTL)
89
- ccr_store: Dict[str, Dict[str, Any]] = {}
90
-
91
- # Metrics
92
- metrics = {
93
- "requests_total": 0,
94
- "compressions_applied": 0,
95
- "compressions_skipped": 0,
96
- "errors": 0,
97
- "ccr_stores": 0,
98
- "ccr_retrievals": 0,
99
- "total_tokens_before": 0,
100
- "total_tokens_after": 0,
101
- "start_time": datetime.utcnow().isoformat(),
102
- }
103
-
104
-
105
- # Request/Response models
106
- class CompressRequest(BaseModel):
107
- messages: List[Dict[str, Any]]
108
- tools: Optional[List[Dict[str, Any]]] = None
109
- model: Optional[str] = "claude-3-5-sonnet-20241022"
110
- model_limit: Optional[int] = 200000
111
- mode: Optional[str] = None
112
- token_budget: Optional[int] = None
113
- query_context: Optional[str] = None
114
- preserve_recent_turns: Optional[int] = None
115
- target_ratio: Optional[float] = None
116
-
117
-
118
- class CompressResponse(BaseModel):
119
- messages: List[Dict[str, Any]]
120
- tools: Optional[List[Dict[str, Any]]] = None
121
- compressed: bool
122
- stats: Dict[str, Any]
123
-
124
-
125
- class CCRRetrieveRequest(BaseModel):
126
- hash: str
127
- query: Optional[str] = None
128
- max_results: Optional[int] = 20
129
-
130
-
131
- class CCRRetrieveResponse(BaseModel):
132
- success: bool
133
- content: Optional[Any] = None
134
- items_retrieved: int = 0
135
- was_search: bool = False
136
- error: Optional[str] = None
137
-
138
-
139
- def estimate_tokens(data: Any) -> int:
140
- """Estimate token count (rough approximation: ~4 chars per token)"""
141
- text = json.dumps(data) if not isinstance(data, str) else data
142
- return len(text) // 4
143
-
144
-
145
- def generate_hash(content: Any) -> str:
146
- """Generate hash for CCR storage"""
147
- text = json.dumps(content, sort_keys=True)
148
- return hashlib.sha256(text.encode()).hexdigest()[:12]
149
-
150
-
151
- def cleanup_expired_ccr():
152
- """Remove expired CCR entries"""
153
- now = time.time()
154
- expired = [k for k, v in ccr_store.items() if now - v["timestamp"] > config.ccr_ttl]
155
- for key in expired:
156
- del ccr_store[key]
157
-
158
-
159
- def basic_compress(messages: List[Dict], tools: Optional[List] = None) -> Dict:
160
- """Basic compression when Headroom SDK is not available"""
161
- tokens_before = estimate_tokens(messages)
162
- compressed_messages = []
163
-
164
- for msg in messages:
165
- compressed_msg = msg.copy()
166
-
167
- # Compress large tool results
168
- if msg.get("role") == "user" and isinstance(msg.get("content"), list):
169
- new_content = []
170
- for block in msg["content"]:
171
- if block.get("type") == "tool_result":
172
- content = block.get("content", "")
173
- if isinstance(content, str) and len(content) > 2000:
174
- # Store in CCR and replace with reference
175
- hash_key = generate_hash(content)
176
- ccr_store[hash_key] = {
177
- "content": content,
178
- "timestamp": time.time(),
179
- "tool_name": block.get("tool_use_id", "unknown"),
180
- }
181
- metrics["ccr_stores"] += 1
182
- block = block.copy()
183
- block["content"] = (
184
- f"[CCR:{hash_key}] Content compressed ({len(content)} chars). "
185
- f"Use ccr_retrieve to access full content."
186
- )
187
- new_content.append(block)
188
- compressed_msg["content"] = new_content
189
- compressed_messages.append(compressed_msg)
190
-
191
- tokens_after = estimate_tokens(compressed_messages)
192
-
193
- return {
194
- "messages": compressed_messages,
195
- "tools": tools,
196
- "compressed": tokens_after < tokens_before,
197
- "stats": {
198
- "tokens_before": tokens_before,
199
- "tokens_after": tokens_after,
200
- "tokens_saved": tokens_before - tokens_after,
201
- "savings_percent": round(
202
- (1 - tokens_after / tokens_before) * 100, 1
203
- ) if tokens_before > 0 else 0,
204
- "transforms_applied": ["basic_ccr"] if tokens_after < tokens_before else [],
205
- "latency_ms": 0,
206
- },
207
- }
208
-
209
-
210
- @app.get("/health")
211
- async def health_check():
212
- """Health check endpoint"""
213
- cleanup_expired_ccr()
214
- return {
215
- "status": "healthy",
216
- "headroom_loaded": HEADROOM_AVAILABLE,
217
- "ccr_enabled": config.ccr_enabled,
218
- "llmlingua_enabled": config.llmlingua_enabled,
219
- "entries_cached": len(ccr_store),
220
- "config": config.to_dict(),
221
- }
222
-
223
-
224
- @app.get("/metrics")
225
- async def get_metrics():
226
- """Get compression metrics"""
227
- return {
228
- **metrics,
229
- "average_compression_ratio": (
230
- round(metrics["total_tokens_after"] / metrics["total_tokens_before"], 3)
231
- if metrics["total_tokens_before"] > 0
232
- else 1.0
233
- ),
234
- "ccr_entries": len(ccr_store),
235
- "uptime_seconds": (
236
- datetime.utcnow() - datetime.fromisoformat(metrics["start_time"])
237
- ).total_seconds(),
238
- }
239
-
240
-
241
- @app.post("/compress", response_model=CompressResponse)
242
- async def compress_messages(request: CompressRequest):
243
- """Compress messages and tools"""
244
- start_time = time.time()
245
- metrics["requests_total"] += 1
246
-
247
- try:
248
- tokens_before = estimate_tokens(request.messages)
249
- metrics["total_tokens_before"] += tokens_before
250
-
251
- # Skip if below minimum tokens
252
- if tokens_before < config.smart_crusher_min_tokens:
253
- metrics["compressions_skipped"] += 1
254
- return CompressResponse(
255
- messages=request.messages,
256
- tools=request.tools,
257
- compressed=False,
258
- stats={
259
- "skipped": True,
260
- "reason": f"Below threshold ({tokens_before} < {config.smart_crusher_min_tokens})",
261
- },
262
- )
263
-
264
- # Use Headroom SDK if available
265
- if HEADROOM_AVAILABLE and headroom_pipeline:
266
- try:
267
- result = headroom_pipeline.apply(
268
- request.messages,
269
- model=request.model,
270
- model_limit=request.model_limit,
271
- )
272
-
273
- # Extract messages from TransformResult
274
- if hasattr(result, 'messages'):
275
- compressed_messages = result.messages
276
- # transforms_applied may be strings or objects with .name
277
- if hasattr(result, 'transforms_applied'):
278
- transforms_applied = [t if isinstance(t, str) else getattr(t, 'name', str(t)) for t in result.transforms_applied]
279
- else:
280
- transforms_applied = []
281
- elif isinstance(result, dict):
282
- compressed_messages = result.get("messages", request.messages)
283
- transforms_applied = result.get("transforms", [])
284
- else:
285
- compressed_messages = result if isinstance(result, list) else request.messages
286
- transforms_applied = []
287
-
288
- tokens_after = estimate_tokens(compressed_messages)
289
- metrics["total_tokens_after"] += tokens_after
290
- metrics["compressions_applied"] += 1
291
-
292
- return CompressResponse(
293
- messages=compressed_messages,
294
- tools=request.tools, # Tools not modified by current transforms
295
- compressed=tokens_after < tokens_before,
296
- stats={
297
- "tokens_before": tokens_before,
298
- "tokens_after": tokens_after,
299
- "tokens_saved": tokens_before - tokens_after,
300
- "savings_percent": round(
301
- (1 - tokens_after / tokens_before) * 100, 1
302
- ) if tokens_before > 0 else 0,
303
- "transforms_applied": transforms_applied,
304
- "latency_ms": round((time.time() - start_time) * 1000, 1),
305
- },
306
- )
307
- except Exception as e:
308
- logger.warning(f"Headroom SDK error, falling back to basic: {e}")
309
-
310
- # Fallback to basic compression
311
- result = basic_compress(request.messages, request.tools)
312
- metrics["total_tokens_after"] += result["stats"]["tokens_after"]
313
- if result["compressed"]:
314
- metrics["compressions_applied"] += 1
315
- else:
316
- metrics["compressions_skipped"] += 1
317
-
318
- result["stats"]["latency_ms"] = round((time.time() - start_time) * 1000, 1)
319
- return CompressResponse(**result)
320
-
321
- except Exception as e:
322
- metrics["errors"] += 1
323
- logger.error(f"Compression error: {e}")
324
- raise HTTPException(status_code=500, detail=str(e))
325
-
326
-
327
- @app.post("/ccr/retrieve", response_model=CCRRetrieveResponse)
328
- async def ccr_retrieve(request: CCRRetrieveRequest):
329
- """Retrieve content from CCR store"""
330
- cleanup_expired_ccr()
331
-
332
- if request.hash not in ccr_store:
333
- return CCRRetrieveResponse(
334
- success=False,
335
- error=f"Hash {request.hash} not found or expired",
336
- )
337
-
338
- entry = ccr_store[request.hash]
339
- content = entry["content"]
340
- metrics["ccr_retrievals"] += 1
341
-
342
- # If query provided, search within content
343
- if request.query:
344
- if isinstance(content, list):
345
- # Filter list items by query
346
- filtered = [
347
- item
348
- for item in content
349
- if request.query.lower() in json.dumps(item).lower()
350
- ][: request.max_results]
351
- return CCRRetrieveResponse(
352
- success=True,
353
- content=filtered,
354
- items_retrieved=len(filtered),
355
- was_search=True,
356
- )
357
- elif isinstance(content, str):
358
- # Return content if query matches
359
- if request.query.lower() in content.lower():
360
- return CCRRetrieveResponse(
361
- success=True,
362
- content=content,
363
- items_retrieved=1,
364
- was_search=True,
365
- )
366
- return CCRRetrieveResponse(
367
- success=False,
368
- error="Query not found in content",
369
- )
370
-
371
- # Return full content
372
- return CCRRetrieveResponse(
373
- success=True,
374
- content=content,
375
- items_retrieved=1 if not isinstance(content, list) else len(content),
376
- was_search=False,
377
- )
378
-
379
-
380
- @app.post("/ccr/track")
381
- async def ccr_track(
382
- hash_key: str,
383
- turn_number: int,
384
- tool_name: str,
385
- sample: str,
386
- ):
387
- """Track compression for proactive expansion"""
388
- return {"tracked": True, "hash_key": hash_key}
389
-
390
-
391
- @app.post("/ccr/analyze")
392
- async def ccr_analyze(query: str, turn_number: int):
393
- """Analyze query for proactive CCR expansion"""
394
- # Simple keyword matching for expansion suggestions
395
- expansions = []
396
- for hash_key, entry in ccr_store.items():
397
- if query.lower() in json.dumps(entry["content"]).lower():
398
- expansions.append(
399
- {
400
- "hash": hash_key,
401
- "tool_name": entry.get("tool_name", "unknown"),
402
- "relevance": 0.8,
403
- }
404
- )
405
- return {"expansions": expansions[:5]}
406
-
407
-
408
- @app.post("/compress/llmlingua")
409
- async def llmlingua_compress(
410
- text: str,
411
- target_ratio: float = 0.5,
412
- force_tokens: Optional[str] = None,
413
- ):
414
- """Compress text using LLMLingua (if available)"""
415
- if not config.llmlingua_enabled:
416
- raise HTTPException(status_code=400, detail="LLMLingua is not enabled")
417
-
418
- try:
419
- # Try to import and use llmlingua
420
- from llmlingua import PromptCompressor
421
-
422
- compressor = PromptCompressor(device_map=config.llmlingua_device)
423
- result = compressor.compress_prompt(
424
- text,
425
- rate=target_ratio,
426
- force_tokens=json.loads(force_tokens) if force_tokens else None,
427
- )
428
- return {
429
- "compressed": result["compressed_prompt"],
430
- "original_tokens": result.get("origin_tokens", len(text) // 4),
431
- "compressed_tokens": result.get("compressed_tokens", len(result["compressed_prompt"]) // 4),
432
- "ratio": result.get("rate", target_ratio),
433
- }
434
- except ImportError:
435
- raise HTTPException(
436
- status_code=501,
437
- detail="LLMLingua not installed. Add llmlingua to requirements.txt",
438
- )
439
- except Exception as e:
440
- raise HTTPException(status_code=500, detail=str(e))
441
-
442
-
443
- if __name__ == "__main__":
444
- logger.info(f"Starting Headroom sidecar on {config.host}:{config.port}")
445
- logger.info(f"Configuration: {json.dumps(config.to_dict(), indent=2)}")
446
- uvicorn.run(
447
- app,
448
- host=config.host,
449
- port=config.port,
450
- log_level=config.log_level,
451
- )
package/monitor-agents.sh DELETED
@@ -1,31 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Monitor agent activity in real-time
4
-
5
- echo "🔍 Monitoring Agent Activity"
6
- echo "=============================="
7
- echo ""
8
-
9
- while true; do
10
- clear
11
- echo "🔍 Agent Statistics (refreshing every 3s)"
12
- echo "=========================================="
13
- echo ""
14
-
15
- # Get stats
16
- curl -s http://localhost:8080/v1/agents/stats | jq -r '.stats[] |
17
- "Agent: \(.agent_type)
18
- Executions: \(.total_executions) (\(.completed) completed, \(.failed) failed)
19
- Avg Duration: \(.avg_duration_ms)ms
20
- Tokens: \(.total_input_tokens) in / \(.total_output_tokens) out
21
- "' || echo "Proxy not responding..."
22
-
23
- echo ""
24
- echo "Latest transcripts:"
25
- ls -lt data/agent-transcripts/*.jsonl 2>/dev/null | head -3 || echo "No transcripts yet"
26
-
27
- echo ""
28
- echo "Press Ctrl+C to stop monitoring"
29
-
30
- sleep 3
31
- done