lynkr 8.0.0 → 9.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/.lynkr/telemetry.db +0 -0
  2. package/.lynkr/telemetry.db-shm +0 -0
  3. package/.lynkr/telemetry.db-wal +0 -0
  4. package/README.md +196 -322
  5. package/lynkr-skill.tar.gz +0 -0
  6. package/package.json +4 -3
  7. package/src/api/openai-router.js +64 -13
  8. package/src/api/providers-handler.js +171 -3
  9. package/src/api/router.js +9 -2
  10. package/src/clients/circuit-breaker.js +10 -247
  11. package/src/clients/codex-process.js +342 -0
  12. package/src/clients/codex-utils.js +143 -0
  13. package/src/clients/databricks.js +210 -63
  14. package/src/clients/resilience.js +540 -0
  15. package/src/clients/retry.js +22 -167
  16. package/src/clients/standard-tools.js +23 -0
  17. package/src/config/index.js +77 -0
  18. package/src/context/compression.js +42 -9
  19. package/src/context/distill.js +492 -0
  20. package/src/orchestrator/index.js +48 -8
  21. package/src/routing/complexity-analyzer.js +258 -5
  22. package/src/routing/index.js +12 -2
  23. package/src/routing/latency-tracker.js +148 -0
  24. package/src/routing/model-tiers.js +2 -0
  25. package/src/routing/quality-scorer.js +113 -0
  26. package/src/routing/telemetry.js +464 -0
  27. package/src/server.js +13 -12
  28. package/src/tools/code-graph.js +538 -0
  29. package/src/tools/code-mode.js +304 -0
  30. package/src/tools/index.js +4 -0
  31. package/src/tools/lazy-loader.js +18 -0
  32. package/src/tools/mcp-remote.js +7 -0
  33. package/src/tools/smart-selection.js +11 -0
  34. package/src/tools/tinyfish.js +358 -0
  35. package/src/tools/truncate.js +1 -0
  36. package/src/utils/payload.js +206 -0
  37. package/src/utils/perf-timer.js +80 -0
  38. package/.github/FUNDING.yml +0 -15
  39. package/.github/workflows/README.md +0 -215
  40. package/.github/workflows/ci.yml +0 -69
  41. package/.github/workflows/index.yml +0 -62
  42. package/.github/workflows/web-tools-tests.yml +0 -56
  43. package/CITATIONS.bib +0 -6
  44. package/DEPLOYMENT.md +0 -1001
  45. package/LYNKR-TUI-PLAN.md +0 -984
  46. package/PERFORMANCE-REPORT.md +0 -866
  47. package/PLAN-per-client-model-routing.md +0 -252
  48. package/docs/42642f749da6234f41b6b425c3bb07c9.txt +0 -1
  49. package/docs/BingSiteAuth.xml +0 -4
  50. package/docs/docs-style.css +0 -478
  51. package/docs/docs.html +0 -198
  52. package/docs/google5be250e608e6da39.html +0 -1
  53. package/docs/index.html +0 -577
  54. package/docs/index.md +0 -584
  55. package/docs/robots.txt +0 -4
  56. package/docs/sitemap.xml +0 -44
  57. package/docs/style.css +0 -1223
  58. package/docs/toon-integration-spec.md +0 -130
  59. package/documentation/README.md +0 -101
  60. package/documentation/api.md +0 -806
  61. package/documentation/claude-code-cli.md +0 -679
  62. package/documentation/codex-cli.md +0 -397
  63. package/documentation/contributing.md +0 -571
  64. package/documentation/cursor-integration.md +0 -734
  65. package/documentation/docker.md +0 -874
  66. package/documentation/embeddings.md +0 -762
  67. package/documentation/faq.md +0 -713
  68. package/documentation/features.md +0 -403
  69. package/documentation/headroom.md +0 -519
  70. package/documentation/installation.md +0 -758
  71. package/documentation/memory-system.md +0 -476
  72. package/documentation/production.md +0 -636
  73. package/documentation/providers.md +0 -1009
  74. package/documentation/routing.md +0 -476
  75. package/documentation/testing.md +0 -629
  76. package/documentation/token-optimization.md +0 -325
  77. package/documentation/tools.md +0 -697
  78. package/documentation/troubleshooting.md +0 -969
  79. package/final-test.js +0 -33
  80. package/headroom-sidecar/config.py +0 -93
  81. package/headroom-sidecar/requirements.txt +0 -14
  82. package/headroom-sidecar/server.py +0 -451
  83. package/monitor-agents.sh +0 -31
  84. package/scripts/audit-log-reader.js +0 -399
  85. package/scripts/compact-dictionary.js +0 -204
  86. package/scripts/test-deduplication.js +0 -448
  87. package/src/db/database.sqlite +0 -0
  88. package/te +0 -11622
  89. package/test/README.md +0 -212
  90. package/test/azure-openai-config.test.js +0 -213
  91. package/test/azure-openai-error-resilience.test.js +0 -238
  92. package/test/azure-openai-format-conversion.test.js +0 -354
  93. package/test/azure-openai-integration.test.js +0 -287
  94. package/test/azure-openai-routing.test.js +0 -175
  95. package/test/azure-openai-streaming.test.js +0 -171
  96. package/test/bedrock-integration.test.js +0 -457
  97. package/test/comprehensive-test-suite.js +0 -928
  98. package/test/config-validation.test.js +0 -207
  99. package/test/cursor-integration.test.js +0 -484
  100. package/test/format-conversion.test.js +0 -578
  101. package/test/hybrid-routing-integration.test.js +0 -269
  102. package/test/hybrid-routing-performance.test.js +0 -428
  103. package/test/llamacpp-integration.test.js +0 -882
  104. package/test/lmstudio-integration.test.js +0 -347
  105. package/test/memory/extractor.test.js +0 -398
  106. package/test/memory/retriever.test.js +0 -613
  107. package/test/memory/retriever.test.js.bak +0 -585
  108. package/test/memory/search.test.js +0 -537
  109. package/test/memory/search.test.js.bak +0 -389
  110. package/test/memory/store.test.js +0 -344
  111. package/test/memory/store.test.js.bak +0 -312
  112. package/test/memory/surprise.test.js +0 -300
  113. package/test/memory-performance.test.js +0 -472
  114. package/test/openai-integration.test.js +0 -683
  115. package/test/openrouter-error-resilience.test.js +0 -418
  116. package/test/passthrough-mode.test.js +0 -385
  117. package/test/performance-benchmark.js +0 -351
  118. package/test/performance-tests.js +0 -528
  119. package/test/routing.test.js +0 -225
  120. package/test/toon-compression.test.js +0 -131
  121. package/test/web-tools.test.js +0 -329
  122. package/test-agents-simple.js +0 -43
  123. package/test-cli-connection.sh +0 -33
  124. package/test-learning-unit.js +0 -126
  125. package/test-learning.js +0 -112
  126. package/test-parallel-agents.sh +0 -124
  127. package/test-parallel-direct.js +0 -155
  128. package/test-subagents.sh +0 -117
package/final-test.js DELETED
@@ -1,33 +0,0 @@
1
- const http = require('http');
2
-
3
- const data = JSON.stringify({
4
- model: "claude-sonnet-4-5",
5
- max_tokens: 100,
6
- messages: [{ role: "user", content: "Say hello" }]
7
- });
8
-
9
- const req = http.request({
10
- hostname: 'localhost',
11
- port: 8081,
12
- path: '/v1/messages',
13
- method: 'POST',
14
- headers: { 'Content-Type': 'application/json', 'Content-Length': data.length }
15
- }, (res) => {
16
- let body = '';
17
- res.on('data', chunk => body += chunk);
18
- res.on('end', () => {
19
- console.log('Status:', res.statusCode);
20
- if (res.statusCode === 200) {
21
- const json = JSON.parse(body);
22
- console.log('✅ SUCCESS!');
23
- console.log('Model:', json.model);
24
- console.log('Response:', json.content[0].text.substring(0, 150));
25
- } else {
26
- console.log('❌ Error:', body.substring(0, 300));
27
- }
28
- });
29
- });
30
-
31
- req.on('error', e => console.error('Request failed:', e.message));
32
- req.write(data);
33
- req.end();
@@ -1,93 +0,0 @@
1
- """
2
- Headroom Sidecar Configuration
3
- Loads settings from environment variables
4
- """
5
-
6
- import os
7
- from typing import Optional
8
-
9
-
10
- def str_to_bool(value: str) -> bool:
11
- """Convert string to boolean"""
12
- return value.lower() in ("true", "1", "yes", "on")
13
-
14
-
15
- class HeadroomConfig:
16
- """Configuration for Headroom sidecar"""
17
-
18
- def __init__(self):
19
- # Server settings
20
- self.host = os.environ.get("HEADROOM_HOST", "0.0.0.0")
21
- self.port = int(os.environ.get("HEADROOM_PORT", "8787"))
22
- self.log_level = os.environ.get("HEADROOM_LOG_LEVEL", "info")
23
-
24
- # Operating mode
25
- self.mode = os.environ.get("HEADROOM_MODE", "optimize")
26
- self.provider = os.environ.get("HEADROOM_PROVIDER", "anthropic")
27
-
28
- # Smart Crusher settings
29
- self.smart_crusher_enabled = str_to_bool(
30
- os.environ.get("HEADROOM_SMART_CRUSHER", "true")
31
- )
32
- self.smart_crusher_min_tokens = int(
33
- os.environ.get("HEADROOM_SMART_CRUSHER_MIN_TOKENS", "200")
34
- )
35
- self.smart_crusher_max_items = int(
36
- os.environ.get("HEADROOM_SMART_CRUSHER_MAX_ITEMS", "15")
37
- )
38
-
39
- # Tool Crusher settings
40
- self.tool_crusher_enabled = str_to_bool(
41
- os.environ.get("HEADROOM_TOOL_CRUSHER", "true")
42
- )
43
-
44
- # Cache Aligner settings
45
- self.cache_aligner_enabled = str_to_bool(
46
- os.environ.get("HEADROOM_CACHE_ALIGNER", "true")
47
- )
48
-
49
- # Rolling Window settings
50
- self.rolling_window_enabled = str_to_bool(
51
- os.environ.get("HEADROOM_ROLLING_WINDOW", "true")
52
- )
53
- self.keep_turns = int(os.environ.get("HEADROOM_KEEP_TURNS", "3"))
54
-
55
- # CCR settings
56
- self.ccr_enabled = str_to_bool(os.environ.get("HEADROOM_CCR", "true"))
57
- self.ccr_ttl = int(os.environ.get("HEADROOM_CCR_TTL", "300"))
58
-
59
- # LLMLingua settings
60
- self.llmlingua_enabled = str_to_bool(
61
- os.environ.get("HEADROOM_LLMLINGUA", "false")
62
- )
63
- self.llmlingua_device = os.environ.get("HEADROOM_LLMLINGUA_DEVICE", "auto")
64
-
65
- def to_dict(self) -> dict:
66
- """Return configuration as dictionary"""
67
- return {
68
- "host": self.host,
69
- "port": self.port,
70
- "log_level": self.log_level,
71
- "mode": self.mode,
72
- "provider": self.provider,
73
- "smart_crusher": {
74
- "enabled": self.smart_crusher_enabled,
75
- "min_tokens": self.smart_crusher_min_tokens,
76
- "max_items": self.smart_crusher_max_items,
77
- },
78
- "tool_crusher": {"enabled": self.tool_crusher_enabled},
79
- "cache_aligner": {"enabled": self.cache_aligner_enabled},
80
- "rolling_window": {
81
- "enabled": self.rolling_window_enabled,
82
- "keep_turns": self.keep_turns,
83
- },
84
- "ccr": {"enabled": self.ccr_enabled, "ttl": self.ccr_ttl},
85
- "llmlingua": {
86
- "enabled": self.llmlingua_enabled,
87
- "device": self.llmlingua_device,
88
- },
89
- }
90
-
91
-
92
- # Global config instance
93
- config = HeadroomConfig()
@@ -1,14 +0,0 @@
1
- # Headroom Sidecar Dependencies
2
-
3
- # Core framework
4
- fastapi>=0.109.0
5
- uvicorn[standard]>=0.27.0
6
- pydantic>=2.5.0
7
-
8
- # Headroom SDK
9
- headroom-ai>=0.1.0
10
-
11
- # Optional: LLMLingua support (uncomment for ML compression)
12
- # llmlingua>=0.2.0
13
- # torch>=2.0.0
14
- # transformers>=4.36.0
@@ -1,451 +0,0 @@
1
- """
2
- Headroom Sidecar Server
3
- FastAPI application providing context compression via HTTP API
4
- """
5
-
6
- import logging
7
- import time
8
- import hashlib
9
- import json
10
- from typing import Any, Dict, List, Optional
11
- from datetime import datetime
12
-
13
- from fastapi import FastAPI, HTTPException
14
- from fastapi.responses import JSONResponse
15
- from pydantic import BaseModel
16
- import uvicorn
17
-
18
- from config import config
19
-
20
- # Setup logging
21
- logging.basicConfig(
22
- level=getattr(logging, config.log_level.upper()),
23
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
24
- )
25
- logger = logging.getLogger("headroom-sidecar")
26
-
27
- # Initialize FastAPI app
28
- app = FastAPI(
29
- title="Headroom Sidecar",
30
- description="Context compression service for LLM requests",
31
- version="1.0.0",
32
- )
33
-
34
- # Try to import headroom, fallback to basic compression if not available
35
- try:
36
- from headroom import (
37
- TransformPipeline,
38
- SmartCrusher,
39
- SmartCrusherConfig,
40
- ToolCrusher,
41
- ToolCrusherConfig,
42
- RollingWindow,
43
- RollingWindowConfig,
44
- AnthropicProvider,
45
- OpenAIProvider,
46
- )
47
- import warnings
48
- warnings.filterwarnings("ignore", message=".*tiktoken approximation.*")
49
-
50
- # Create transforms based on config
51
- transforms = []
52
-
53
- if config.smart_crusher_enabled:
54
- transforms.append(SmartCrusher(SmartCrusherConfig(
55
- enabled=True,
56
- min_tokens_to_crush=config.smart_crusher_min_tokens,
57
- max_items_after_crush=config.smart_crusher_max_items,
58
- )))
59
- logger.info("SmartCrusher enabled")
60
-
61
- if config.tool_crusher_enabled:
62
- transforms.append(ToolCrusher(ToolCrusherConfig(
63
- enabled=True,
64
- )))
65
- logger.info("ToolCrusher enabled")
66
-
67
- if config.rolling_window_enabled:
68
- transforms.append(RollingWindow(RollingWindowConfig(
69
- enabled=True,
70
- keep_last_turns=config.keep_turns,
71
- )))
72
- logger.info("RollingWindow enabled")
73
-
74
- # Create provider based on config
75
- if config.provider == "openai":
76
- headroom_provider = OpenAIProvider()
77
- else:
78
- headroom_provider = AnthropicProvider()
79
-
80
- headroom_pipeline = TransformPipeline(transforms=transforms, provider=headroom_provider) if transforms else None
81
- HEADROOM_AVAILABLE = headroom_pipeline is not None
82
- logger.info(f"Headroom SDK loaded successfully with {len(transforms)} transforms (provider: {config.provider})")
83
- except ImportError as e:
84
- logger.warning(f"Headroom SDK not available: {e}. Using basic compression.")
85
- headroom_pipeline = None
86
- HEADROOM_AVAILABLE = False
87
-
88
- # CCR Store (in-memory with TTL)
89
- ccr_store: Dict[str, Dict[str, Any]] = {}
90
-
91
- # Metrics
92
- metrics = {
93
- "requests_total": 0,
94
- "compressions_applied": 0,
95
- "compressions_skipped": 0,
96
- "errors": 0,
97
- "ccr_stores": 0,
98
- "ccr_retrievals": 0,
99
- "total_tokens_before": 0,
100
- "total_tokens_after": 0,
101
- "start_time": datetime.utcnow().isoformat(),
102
- }
103
-
104
-
105
- # Request/Response models
106
- class CompressRequest(BaseModel):
107
- messages: List[Dict[str, Any]]
108
- tools: Optional[List[Dict[str, Any]]] = None
109
- model: Optional[str] = "claude-3-5-sonnet-20241022"
110
- model_limit: Optional[int] = 200000
111
- mode: Optional[str] = None
112
- token_budget: Optional[int] = None
113
- query_context: Optional[str] = None
114
- preserve_recent_turns: Optional[int] = None
115
- target_ratio: Optional[float] = None
116
-
117
-
118
- class CompressResponse(BaseModel):
119
- messages: List[Dict[str, Any]]
120
- tools: Optional[List[Dict[str, Any]]] = None
121
- compressed: bool
122
- stats: Dict[str, Any]
123
-
124
-
125
- class CCRRetrieveRequest(BaseModel):
126
- hash: str
127
- query: Optional[str] = None
128
- max_results: Optional[int] = 20
129
-
130
-
131
- class CCRRetrieveResponse(BaseModel):
132
- success: bool
133
- content: Optional[Any] = None
134
- items_retrieved: int = 0
135
- was_search: bool = False
136
- error: Optional[str] = None
137
-
138
-
139
- def estimate_tokens(data: Any) -> int:
140
- """Estimate token count (rough approximation: ~4 chars per token)"""
141
- text = json.dumps(data) if not isinstance(data, str) else data
142
- return len(text) // 4
143
-
144
-
145
- def generate_hash(content: Any) -> str:
146
- """Generate hash for CCR storage"""
147
- text = json.dumps(content, sort_keys=True)
148
- return hashlib.sha256(text.encode()).hexdigest()[:12]
149
-
150
-
151
- def cleanup_expired_ccr():
152
- """Remove expired CCR entries"""
153
- now = time.time()
154
- expired = [k for k, v in ccr_store.items() if now - v["timestamp"] > config.ccr_ttl]
155
- for key in expired:
156
- del ccr_store[key]
157
-
158
-
159
- def basic_compress(messages: List[Dict], tools: Optional[List] = None) -> Dict:
160
- """Basic compression when Headroom SDK is not available"""
161
- tokens_before = estimate_tokens(messages)
162
- compressed_messages = []
163
-
164
- for msg in messages:
165
- compressed_msg = msg.copy()
166
-
167
- # Compress large tool results
168
- if msg.get("role") == "user" and isinstance(msg.get("content"), list):
169
- new_content = []
170
- for block in msg["content"]:
171
- if block.get("type") == "tool_result":
172
- content = block.get("content", "")
173
- if isinstance(content, str) and len(content) > 2000:
174
- # Store in CCR and replace with reference
175
- hash_key = generate_hash(content)
176
- ccr_store[hash_key] = {
177
- "content": content,
178
- "timestamp": time.time(),
179
- "tool_name": block.get("tool_use_id", "unknown"),
180
- }
181
- metrics["ccr_stores"] += 1
182
- block = block.copy()
183
- block["content"] = (
184
- f"[CCR:{hash_key}] Content compressed ({len(content)} chars). "
185
- f"Use ccr_retrieve to access full content."
186
- )
187
- new_content.append(block)
188
- compressed_msg["content"] = new_content
189
- compressed_messages.append(compressed_msg)
190
-
191
- tokens_after = estimate_tokens(compressed_messages)
192
-
193
- return {
194
- "messages": compressed_messages,
195
- "tools": tools,
196
- "compressed": tokens_after < tokens_before,
197
- "stats": {
198
- "tokens_before": tokens_before,
199
- "tokens_after": tokens_after,
200
- "tokens_saved": tokens_before - tokens_after,
201
- "savings_percent": round(
202
- (1 - tokens_after / tokens_before) * 100, 1
203
- ) if tokens_before > 0 else 0,
204
- "transforms_applied": ["basic_ccr"] if tokens_after < tokens_before else [],
205
- "latency_ms": 0,
206
- },
207
- }
208
-
209
-
210
- @app.get("/health")
211
- async def health_check():
212
- """Health check endpoint"""
213
- cleanup_expired_ccr()
214
- return {
215
- "status": "healthy",
216
- "headroom_loaded": HEADROOM_AVAILABLE,
217
- "ccr_enabled": config.ccr_enabled,
218
- "llmlingua_enabled": config.llmlingua_enabled,
219
- "entries_cached": len(ccr_store),
220
- "config": config.to_dict(),
221
- }
222
-
223
-
224
- @app.get("/metrics")
225
- async def get_metrics():
226
- """Get compression metrics"""
227
- return {
228
- **metrics,
229
- "average_compression_ratio": (
230
- round(metrics["total_tokens_after"] / metrics["total_tokens_before"], 3)
231
- if metrics["total_tokens_before"] > 0
232
- else 1.0
233
- ),
234
- "ccr_entries": len(ccr_store),
235
- "uptime_seconds": (
236
- datetime.utcnow() - datetime.fromisoformat(metrics["start_time"])
237
- ).total_seconds(),
238
- }
239
-
240
-
241
- @app.post("/compress", response_model=CompressResponse)
242
- async def compress_messages(request: CompressRequest):
243
- """Compress messages and tools"""
244
- start_time = time.time()
245
- metrics["requests_total"] += 1
246
-
247
- try:
248
- tokens_before = estimate_tokens(request.messages)
249
- metrics["total_tokens_before"] += tokens_before
250
-
251
- # Skip if below minimum tokens
252
- if tokens_before < config.smart_crusher_min_tokens:
253
- metrics["compressions_skipped"] += 1
254
- return CompressResponse(
255
- messages=request.messages,
256
- tools=request.tools,
257
- compressed=False,
258
- stats={
259
- "skipped": True,
260
- "reason": f"Below threshold ({tokens_before} < {config.smart_crusher_min_tokens})",
261
- },
262
- )
263
-
264
- # Use Headroom SDK if available
265
- if HEADROOM_AVAILABLE and headroom_pipeline:
266
- try:
267
- result = headroom_pipeline.apply(
268
- request.messages,
269
- model=request.model,
270
- model_limit=request.model_limit,
271
- )
272
-
273
- # Extract messages from TransformResult
274
- if hasattr(result, 'messages'):
275
- compressed_messages = result.messages
276
- # transforms_applied may be strings or objects with .name
277
- if hasattr(result, 'transforms_applied'):
278
- transforms_applied = [t if isinstance(t, str) else getattr(t, 'name', str(t)) for t in result.transforms_applied]
279
- else:
280
- transforms_applied = []
281
- elif isinstance(result, dict):
282
- compressed_messages = result.get("messages", request.messages)
283
- transforms_applied = result.get("transforms", [])
284
- else:
285
- compressed_messages = result if isinstance(result, list) else request.messages
286
- transforms_applied = []
287
-
288
- tokens_after = estimate_tokens(compressed_messages)
289
- metrics["total_tokens_after"] += tokens_after
290
- metrics["compressions_applied"] += 1
291
-
292
- return CompressResponse(
293
- messages=compressed_messages,
294
- tools=request.tools, # Tools not modified by current transforms
295
- compressed=tokens_after < tokens_before,
296
- stats={
297
- "tokens_before": tokens_before,
298
- "tokens_after": tokens_after,
299
- "tokens_saved": tokens_before - tokens_after,
300
- "savings_percent": round(
301
- (1 - tokens_after / tokens_before) * 100, 1
302
- ) if tokens_before > 0 else 0,
303
- "transforms_applied": transforms_applied,
304
- "latency_ms": round((time.time() - start_time) * 1000, 1),
305
- },
306
- )
307
- except Exception as e:
308
- logger.warning(f"Headroom SDK error, falling back to basic: {e}")
309
-
310
- # Fallback to basic compression
311
- result = basic_compress(request.messages, request.tools)
312
- metrics["total_tokens_after"] += result["stats"]["tokens_after"]
313
- if result["compressed"]:
314
- metrics["compressions_applied"] += 1
315
- else:
316
- metrics["compressions_skipped"] += 1
317
-
318
- result["stats"]["latency_ms"] = round((time.time() - start_time) * 1000, 1)
319
- return CompressResponse(**result)
320
-
321
- except Exception as e:
322
- metrics["errors"] += 1
323
- logger.error(f"Compression error: {e}")
324
- raise HTTPException(status_code=500, detail=str(e))
325
-
326
-
327
- @app.post("/ccr/retrieve", response_model=CCRRetrieveResponse)
328
- async def ccr_retrieve(request: CCRRetrieveRequest):
329
- """Retrieve content from CCR store"""
330
- cleanup_expired_ccr()
331
-
332
- if request.hash not in ccr_store:
333
- return CCRRetrieveResponse(
334
- success=False,
335
- error=f"Hash {request.hash} not found or expired",
336
- )
337
-
338
- entry = ccr_store[request.hash]
339
- content = entry["content"]
340
- metrics["ccr_retrievals"] += 1
341
-
342
- # If query provided, search within content
343
- if request.query:
344
- if isinstance(content, list):
345
- # Filter list items by query
346
- filtered = [
347
- item
348
- for item in content
349
- if request.query.lower() in json.dumps(item).lower()
350
- ][: request.max_results]
351
- return CCRRetrieveResponse(
352
- success=True,
353
- content=filtered,
354
- items_retrieved=len(filtered),
355
- was_search=True,
356
- )
357
- elif isinstance(content, str):
358
- # Return content if query matches
359
- if request.query.lower() in content.lower():
360
- return CCRRetrieveResponse(
361
- success=True,
362
- content=content,
363
- items_retrieved=1,
364
- was_search=True,
365
- )
366
- return CCRRetrieveResponse(
367
- success=False,
368
- error="Query not found in content",
369
- )
370
-
371
- # Return full content
372
- return CCRRetrieveResponse(
373
- success=True,
374
- content=content,
375
- items_retrieved=1 if not isinstance(content, list) else len(content),
376
- was_search=False,
377
- )
378
-
379
-
380
- @app.post("/ccr/track")
381
- async def ccr_track(
382
- hash_key: str,
383
- turn_number: int,
384
- tool_name: str,
385
- sample: str,
386
- ):
387
- """Track compression for proactive expansion"""
388
- return {"tracked": True, "hash_key": hash_key}
389
-
390
-
391
- @app.post("/ccr/analyze")
392
- async def ccr_analyze(query: str, turn_number: int):
393
- """Analyze query for proactive CCR expansion"""
394
- # Simple keyword matching for expansion suggestions
395
- expansions = []
396
- for hash_key, entry in ccr_store.items():
397
- if query.lower() in json.dumps(entry["content"]).lower():
398
- expansions.append(
399
- {
400
- "hash": hash_key,
401
- "tool_name": entry.get("tool_name", "unknown"),
402
- "relevance": 0.8,
403
- }
404
- )
405
- return {"expansions": expansions[:5]}
406
-
407
-
408
- @app.post("/compress/llmlingua")
409
- async def llmlingua_compress(
410
- text: str,
411
- target_ratio: float = 0.5,
412
- force_tokens: Optional[str] = None,
413
- ):
414
- """Compress text using LLMLingua (if available)"""
415
- if not config.llmlingua_enabled:
416
- raise HTTPException(status_code=400, detail="LLMLingua is not enabled")
417
-
418
- try:
419
- # Try to import and use llmlingua
420
- from llmlingua import PromptCompressor
421
-
422
- compressor = PromptCompressor(device_map=config.llmlingua_device)
423
- result = compressor.compress_prompt(
424
- text,
425
- rate=target_ratio,
426
- force_tokens=json.loads(force_tokens) if force_tokens else None,
427
- )
428
- return {
429
- "compressed": result["compressed_prompt"],
430
- "original_tokens": result.get("origin_tokens", len(text) // 4),
431
- "compressed_tokens": result.get("compressed_tokens", len(result["compressed_prompt"]) // 4),
432
- "ratio": result.get("rate", target_ratio),
433
- }
434
- except ImportError:
435
- raise HTTPException(
436
- status_code=501,
437
- detail="LLMLingua not installed. Add llmlingua to requirements.txt",
438
- )
439
- except Exception as e:
440
- raise HTTPException(status_code=500, detail=str(e))
441
-
442
-
443
- if __name__ == "__main__":
444
- logger.info(f"Starting Headroom sidecar on {config.host}:{config.port}")
445
- logger.info(f"Configuration: {json.dumps(config.to_dict(), indent=2)}")
446
- uvicorn.run(
447
- app,
448
- host=config.host,
449
- port=config.port,
450
- log_level=config.log_level,
451
- )
package/monitor-agents.sh DELETED
@@ -1,31 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Monitor agent activity in real-time
4
-
5
- echo "🔍 Monitoring Agent Activity"
6
- echo "=============================="
7
- echo ""
8
-
9
- while true; do
10
- clear
11
- echo "🔍 Agent Statistics (refreshing every 3s)"
12
- echo "=========================================="
13
- echo ""
14
-
15
- # Get stats
16
- curl -s http://localhost:8080/v1/agents/stats | jq -r '.stats[] |
17
- "Agent: \(.agent_type)
18
- Executions: \(.total_executions) (\(.completed) completed, \(.failed) failed)
19
- Avg Duration: \(.avg_duration_ms)ms
20
- Tokens: \(.total_input_tokens) in / \(.total_output_tokens) out
21
- "' || echo "Proxy not responding..."
22
-
23
- echo ""
24
- echo "Latest transcripts:"
25
- ls -lt data/agent-transcripts/*.jsonl 2>/dev/null | head -3 || echo "No transcripts yet"
26
-
27
- echo ""
28
- echo "Press Ctrl+C to stop monitoring"
29
-
30
- sleep 3
31
- done