entroplain 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,349 +1,561 @@
1
- """
2
- Entropy Monitoring Proxy for OpenClaw/Claude Code.
3
-
4
- This proxy intercepts LLM API calls and adds entropy monitoring,
5
- enabling early exit without modifying the agent framework itself.
6
-
7
- Usage:
8
- # Set as your API endpoint
9
- export OPENAI_BASE_URL=http://localhost:8765
10
-
11
- # Run the proxy
12
- python -m entroplain.proxy --port 8765 --provider openai
13
- """
14
-
15
- import json
16
- import asyncio
17
- import logging
18
- from typing import Optional, Dict, Any, AsyncIterator
19
- from dataclasses import dataclass
20
- import httpx
21
- from fastapi import FastAPI, Request, Response
22
- from fastapi.responses import StreamingResponse
23
- import uvicorn
24
-
25
- from .monitor import EntropyMonitor
26
- from .cost_tracker import CostTracker, format_cost_report
27
-
28
- logger = logging.getLogger(__name__)
29
-
30
-
31
- @dataclass
32
- class ProxyConfig:
33
- """Configuration for the entropy proxy."""
34
- port: int = 8765
35
- provider: str = "openai" # openai, anthropic, nvidia
36
- api_base: str = "https://api.openai.com/v1"
37
- model: str = "default" # For cost tracking
38
- entropy_threshold: float = 0.15
39
- min_valleys: int = 2
40
- min_tokens: int = 50
41
- velocity_threshold: float = 0.05
42
- enable_early_exit: bool = True
43
- log_entropy: bool = True
44
- track_cost: bool = True
45
-
46
-
47
- class EntropyProxy:
48
- """
49
- Proxy that adds entropy monitoring to LLM API calls.
50
-
51
- Intercepts streaming responses, calculates entropy, and can
52
- terminate early when reasoning has converged.
53
- """
54
-
55
- def __init__(self, config: ProxyConfig):
56
- self.config = config
57
- self.monitor = EntropyMonitor(
58
- entropy_threshold=config.entropy_threshold,
59
- min_valleys=config.min_valleys,
60
- min_tokens=config.min_tokens,
61
- velocity_threshold=config.velocity_threshold
62
- )
63
- self.cost_tracker = CostTracker(model=config.model) if config.track_cost else None
64
- self.app = FastAPI(title="Entroplain Proxy")
65
- self._setup_routes()
66
-
67
- def _setup_routes(self):
68
- @self.app.post("/v1/chat/completions")
69
- async def chat_completions(request: Request):
70
- return await self._handle_chat(request)
71
-
72
- @self.app.get("/health")
73
- async def health():
74
- stats = self.monitor.get_stats()
75
- if self.cost_tracker and self.cost_tracker.output_tokens > 0:
76
- stats["cost"] = self.cost_tracker.get_stats()
77
- return {"status": "ok", "monitor": stats}
78
-
79
- @self.app.post("/reset")
80
- async def reset():
81
- self.monitor.reset()
82
- if self.cost_tracker:
83
- self.cost_tracker.reset()
84
- return {"status": "reset"}
85
-
86
- async def _handle_chat(self, request: Request):
87
- """Handle chat completion requests with entropy monitoring."""
88
- body = await request.json()
89
-
90
- # Extract model for cost tracking
91
- model = body.get("model", "default")
92
- if self.cost_tracker:
93
- self.cost_tracker = CostTracker(model=model)
94
-
95
- # Estimate input tokens
96
- input_tokens = self._estimate_tokens(body.get("messages", []))
97
- if self.cost_tracker:
98
- self.cost_tracker.track_input(input_tokens)
99
-
100
- # Ensure logprobs are enabled for entropy calculation
101
- if "logprobs" not in body:
102
- body["logprobs"] = True
103
- if "top_logprobs" not in body:
104
- body["top_logprobs"] = 5
105
-
106
- # Reset monitor for new request
107
- self.monitor.reset()
108
-
109
- # Forward request to actual API
110
- async with httpx.AsyncClient() as client:
111
- response = await client.post(
112
- f"{self.config.api_base}/chat/completions",
113
- json=body,
114
- headers={
115
- "Content-Type": "application/json",
116
- "Authorization": request.headers.get("Authorization", "")
117
- },
118
- timeout=120.0
119
- )
120
-
121
- if not body.get("stream", False):
122
- # Non-streaming: just return response
123
- return Response(
124
- content=response.content,
125
- status_code=response.status_code,
126
- headers=dict(response.headers)
127
- )
128
-
129
- # Streaming: monitor entropy and potentially exit early
130
- return StreamingResponse(
131
- self._stream_with_entropy(response),
132
- media_type="text/event-stream"
133
- )
134
-
135
- def _estimate_tokens(self, messages: list) -> int:
136
- """Rough estimate of input tokens from messages."""
137
- total = 0
138
- for msg in messages:
139
- content = msg.get("content", "")
140
- if isinstance(content, str):
141
- # Rough estimate: ~4 chars per token
142
- total += len(content) // 4
143
- elif isinstance(content, list):
144
- for part in content:
145
- if isinstance(part, dict) and part.get("type") == "text":
146
- total += len(part.get("text", "")) // 4
147
- return max(total, 10) # Minimum 10 tokens
148
-
149
- async def _stream_with_entropy(
150
- self, response: httpx.Response
151
- ) -> AsyncIterator[str]:
152
- """Stream response with entropy monitoring."""
153
- exited_early = False
154
- full_content = ""
155
-
156
- async for line in response.aiter_lines():
157
- if not line.startswith("data: "):
158
- yield line + "\n"
159
- continue
160
-
161
- data = line[6:] # Remove "data: " prefix
162
- if data == "[DONE]":
163
- yield line + "\n"
164
- break
165
-
166
- try:
167
- chunk = json.loads(data)
168
- except json.JSONDecodeError:
169
- yield line + "\n"
170
- continue
171
-
172
- # Extract token and logprobs
173
- if chunk.get("choices"):
174
- choice = chunk["choices"][0]
175
-
176
- # Get token content
177
- if choice.get("delta", {}).get("content"):
178
- token = choice["delta"]["content"]
179
- full_content += token
180
-
181
- # Track output token for cost
182
- if self.cost_tracker:
183
- self.cost_tracker.track_output(1)
184
-
185
- # Calculate entropy from logprobs (handle null)
186
- logprobs = choice.get("logprobs")
187
- if logprobs and logprobs.get("content"):
188
- logprobs_data = logprobs["content"]
189
- if logprobs_data:
190
- entropy = self._calculate_entropy(logprobs_data[0])
191
-
192
- # Get confidence (top token probability)
193
- confidence = 0.0
194
- if logprobs_data[0].get("top_logprobs"):
195
- confidence = math.exp(logprobs_data[0]["top_logprobs"][0]["logprob"])
196
-
197
- self.monitor.track(token, entropy, confidence)
198
-
199
- if self.config.log_entropy:
200
- logger.info(
201
- f"Token: {repr(token)}, Entropy: {entropy:.4f}, "
202
- f"Confidence: {confidence:.2%}, "
203
- f"Valleys: {len(self.monitor.get_valleys())}"
204
- )
205
-
206
- # Check for early exit
207
- if (
208
- self.config.enable_early_exit
209
- and self.monitor.should_exit()
210
- ):
211
- exit_reason = self.monitor._get_exit_reason()
212
- logger.info(
213
- f"Early exit triggered! "
214
- f"Reason: {exit_reason}, "
215
- f"Tokens: {len(full_content)}, "
216
- f"Valleys: {len(self.monitor.get_valleys())}"
217
- )
218
-
219
- # Log cost savings
220
- if self.cost_tracker:
221
- # Estimate what full output would have been
222
- # Typically 2-3x for reasoning tasks
223
- estimated_full = len(full_content) * 2.5
224
- self.cost_tracker.set_full_estimate(int(estimated_full))
225
- estimate = self.cost_tracker.get_estimate()
226
- logger.info(f"Cost savings: ${estimate.cost_saved_usd:.4f} ({estimate.savings_percent:.1f}%)")
227
-
228
- exited_early = True
229
- yield "data: [DONE]\n\n"
230
- break
231
-
232
- yield line + "\n"
233
-
234
- if not exited_early:
235
- logger.info(
236
- f"Stream completed. "
237
- f"Tokens: {self.monitor.get_stats()['token_count']}, "
238
- f"Valleys: {len(self.monitor.get_valleys())}"
239
- )
240
-
241
- def _calculate_entropy(self, logprobs_data: Dict) -> float:
242
- """Calculate Shannon entropy from logprobs."""
243
- import math
244
-
245
- if not logprobs_data or "top_logprobs" not in logprobs_data:
246
- return 0.0
247
-
248
- entropy = 0.0
249
- for lp in logprobs_data["top_logprobs"]:
250
- prob = math.exp(lp["logprob"])
251
- if prob > 0:
252
- entropy -= prob * math.log2(prob + 1e-10)
253
-
254
- return entropy
255
-
256
- def run(self):
257
- """Start the proxy server."""
258
- uvicorn.run(self.app, host="0.0.0.0", port=self.config.port)
259
-
260
-
261
- def main():
262
- """CLI entry point for running the proxy."""
263
- import argparse
264
- import math # Needed for entropy calculation
265
-
266
- parser = argparse.ArgumentParser(description="Entropy Monitoring Proxy")
267
- parser.add_argument("--port", type=int, default=8765, help="Proxy port")
268
- parser.add_argument(
269
- "--provider",
270
- default="openai",
271
- choices=["openai", "anthropic", "nvidia"],
272
- help="LLM provider"
273
- )
274
- parser.add_argument(
275
- "--api-base",
276
- default="https://api.openai.com/v1",
277
- help="API base URL"
278
- )
279
- parser.add_argument(
280
- "--model",
281
- default="default",
282
- help="Model name for cost tracking"
283
- )
284
- parser.add_argument(
285
- "--entropy-threshold",
286
- type=float,
287
- default=0.15,
288
- help="Entropy threshold for early exit"
289
- )
290
- parser.add_argument(
291
- "--min-valleys",
292
- type=int,
293
- default=2,
294
- help="Minimum valleys before early exit"
295
- )
296
- parser.add_argument(
297
- "--no-early-exit",
298
- action="store_true",
299
- help="Disable early exit (monitor only)"
300
- )
301
- parser.add_argument(
302
- "--log-entropy",
303
- action="store_true",
304
- help="Log entropy values to console"
305
- )
306
- parser.add_argument(
307
- "--no-cost-tracking",
308
- action="store_true",
309
- help="Disable cost tracking"
310
- )
311
-
312
- args = parser.parse_args()
313
-
314
- config = ProxyConfig(
315
- port=args.port,
316
- provider=args.provider,
317
- api_base=args.api_base,
318
- model=args.model,
319
- entropy_threshold=args.entropy_threshold,
320
- min_valleys=args.min_valleys,
321
- enable_early_exit=not args.no_early_exit,
322
- log_entropy=args.log_entropy,
323
- track_cost=not args.no_cost_tracking
324
- )
325
-
326
- proxy = EntropyProxy(config)
327
-
328
- # Clean banner with fixed formatting
329
- print("\n" + "="*62)
330
- print(" ENTROPPLAIN ENTROPY MONITORING PROXY")
331
- print("="*62)
332
- print(f" Proxy: http://localhost:{args.port}")
333
- print(f" Provider: {args.provider}")
334
- print(f" API Base: {args.api_base}")
335
- print(f" Model: {args.model}")
336
- print(f" Early Exit: {'ENABLED' if not args.no_early_exit else 'DISABLED'}")
337
- print(f" Cost Track: {'DISABLED' if args.no_cost_tracking else 'ENABLED'}")
338
- print("="*62)
339
- print(" Usage:")
340
- print(f" export OPENAI_BASE_URL=http://localhost:{args.port}")
341
- print(" # or for NVIDIA:")
342
- print(f" export NVIDIA_BASE_URL=http://localhost:{args.port}")
343
- print("="*62 + "\n")
344
-
345
- proxy.run()
346
-
347
-
348
- if __name__ == "__main__":
349
- main()
1
+ """
2
+ Entropy Monitoring Proxy with built-in Dashboard.
3
+
4
+ This proxy intercepts LLM API calls and adds entropy monitoring,
5
+ enabling early exit without modifying the agent framework itself.
6
+
7
+ Usage:
8
+ # Set as your API endpoint
9
+ export OPENAI_BASE_URL=http://localhost:8765
10
+
11
+ # Run the proxy (includes dashboard at /dashboard)
12
+ python -m entroplain.proxy --port 8765 --provider openai
13
+ """
14
+
15
+ import json
16
+ import asyncio
17
+ import logging
18
+ from typing import Optional, Dict, Any, AsyncIterator, List
19
+ from dataclasses import dataclass
20
+ import httpx
21
+ from fastapi import FastAPI, Request, Response, WebSocket, WebSocketDisconnect
22
+ from fastapi.responses import StreamingResponse, HTMLResponse
23
+ import uvicorn
24
+
25
+ from .monitor import EntropyMonitor
26
+ from .cost_tracker import CostTracker, format_cost_report
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ # Dashboard HTML - embedded in proxy
32
+ DASHBOARD_HTML = """
33
+ <!DOCTYPE html>
34
+ <html>
35
+ <head>
36
+ <title>Entroplain Dashboard</title>
37
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
38
+ <style>
39
+ * { box-sizing: border-box; margin: 0; padding: 0; }
40
+ html, body {
41
+ height: 100%;
42
+ overflow: hidden;
43
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
44
+ background: #0a0a0a;
45
+ color: #e0e0e0;
46
+ }
47
+ .app {
48
+ height: 100vh;
49
+ display: flex;
50
+ flex-direction: column;
51
+ padding: 20px;
52
+ max-width: 1400px;
53
+ margin: 0 auto;
54
+ }
55
+ h1 {
56
+ font-size: 20px;
57
+ margin-bottom: 15px;
58
+ color: #4ade80;
59
+ flex-shrink: 0;
60
+ }
61
+ .main-grid {
62
+ flex: 1;
63
+ display: grid;
64
+ grid-template-columns: 1fr 280px;
65
+ gap: 15px;
66
+ min-height: 0;
67
+ }
68
+ .chart-container {
69
+ background: #1a1a1a;
70
+ border-radius: 8px;
71
+ padding: 15px;
72
+ display: flex;
73
+ flex-direction: column;
74
+ min-height: 0;
75
+ }
76
+ .chart-wrapper {
77
+ flex: 1;
78
+ position: relative;
79
+ min-height: 200px;
80
+ }
81
+ .chart-wrapper canvas {
82
+ position: absolute;
83
+ top: 0;
84
+ left: 0;
85
+ width: 100%;
86
+ height: 100%;
87
+ }
88
+ .legend {
89
+ display: flex;
90
+ gap: 15px;
91
+ margin-top: 10px;
92
+ font-size: 11px;
93
+ flex-shrink: 0;
94
+ }
95
+ .legend-item { display: flex; align-items: center; gap: 5px; }
96
+ .legend-dot { width: 8px; height: 8px; border-radius: 50%; }
97
+ .dot-entropy { background: #60a5fa; }
98
+ .dot-valley { background: #f59e0b; }
99
+ .dot-threshold { background: #ef4444; }
100
+
101
+ .stats-panel {
102
+ display: flex;
103
+ flex-direction: column;
104
+ gap: 10px;
105
+ overflow-y: auto;
106
+ }
107
+ .stat-card {
108
+ background: #1a1a1a;
109
+ border-radius: 8px;
110
+ padding: 12px;
111
+ flex-shrink: 0;
112
+ }
113
+ .stat-label {
114
+ font-size: 10px;
115
+ color: #888;
116
+ text-transform: uppercase;
117
+ letter-spacing: 0.05em;
118
+ }
119
+ .stat-value {
120
+ font-size: 24px;
121
+ font-weight: 600;
122
+ color: #fff;
123
+ margin-top: 4px;
124
+ }
125
+ .stat-value.savings { color: #4ade80; }
126
+ .stat-value.cost { color: #fbbf24; }
127
+ .stat-value.valleys { color: #60a5fa; }
128
+
129
+ .status-badge {
130
+ display: inline-block;
131
+ padding: 3px 10px;
132
+ border-radius: 10px;
133
+ font-size: 11px;
134
+ font-weight: 500;
135
+ }
136
+ .status-active { background: #22c55e; color: #000; }
137
+ .status-idle { background: #374151; color: #888; }
138
+ .status-exited { background: #f59e0b; color: #000; }
139
+
140
+ .connection-status {
141
+ padding: 6px 10px;
142
+ border-radius: 6px;
143
+ font-size: 11px;
144
+ margin-bottom: 10px;
145
+ }
146
+ .connected { background: #166534; color: #4ade80; }
147
+ .disconnected { background: #7f1d1d; color: #fca5a5; }
148
+
149
+ .waiting-message {
150
+ position: absolute;
151
+ top: 50%;
152
+ left: 50%;
153
+ transform: translate(-50%, -50%);
154
+ text-align: center;
155
+ color: #666;
156
+ }
157
+ .waiting-message h2 { font-size: 14px; margin-bottom: 5px; }
158
+ .waiting-message p { font-size: 11px; }
159
+ </style>
160
+ </head>
161
+ <body>
162
+ <div class="app">
163
+ <h1>🎯 Entroplain Dashboard</h1>
164
+
165
+ <div class="main-grid">
166
+ <div class="chart-container">
167
+ <div id="connectionStatus" class="connection-status disconnected">Connecting...</div>
168
+ <div class="chart-wrapper">
169
+ <canvas id="entropyChart"></canvas>
170
+ <div id="waitingMessage" class="waiting-message">
171
+ <h2>Waiting for data...</h2>
172
+ <p>Make a request through the proxy to see entropy visualization</p>
173
+ </div>
174
+ </div>
175
+ <div class="legend">
176
+ <div class="legend-item"><div class="legend-dot dot-entropy"></div><span>Entropy</span></div>
177
+ <div class="legend-item"><div class="legend-dot dot-valley"></div><span>Valley</span></div>
178
+ <div class="legend-item"><div class="legend-dot dot-threshold"></div><span>Threshold</span></div>
179
+ </div>
180
+ </div>
181
+
182
+ <div class="stats-panel">
183
+ <div class="stat-card">
184
+ <div class="stat-label">Status</div>
185
+ <div class="stat-value" id="status"><span class="status-badge status-idle">Idle</span></div>
186
+ </div>
187
+ <div class="stat-card">
188
+ <div class="stat-label">Tokens</div>
189
+ <div class="stat-value" id="tokens">0</div>
190
+ </div>
191
+ <div class="stat-card">
192
+ <div class="stat-label">Valleys</div>
193
+ <div class="stat-value valleys" id="valleys">0</div>
194
+ </div>
195
+ <div class="stat-card">
196
+ <div class="stat-label">Entropy</div>
197
+ <div class="stat-value" id="currentEntropy">-</div>
198
+ </div>
199
+ <div class="stat-card">
200
+ <div class="stat-label">Mean</div>
201
+ <div class="stat-value" id="meanEntropy">-</div>
202
+ </div>
203
+ <div class="stat-card">
204
+ <div class="stat-label">Saved</div>
205
+ <div class="stat-value savings" id="saved">-</div>
206
+ </div>
207
+ </div>
208
+ </div>
209
+ </div>
210
+
211
+ <script>
212
+ let chart = null;
213
+ let hasData = false;
214
+ const maxPoints = 200;
215
+
216
+ function initChart() {
217
+ const ctx = document.getElementById('entropyChart').getContext('2d');
218
+ chart = new Chart(ctx, {
219
+ type: 'line',
220
+ data: {
221
+ labels: [],
222
+ datasets: [
223
+ { label: 'Entropy', data: [], borderColor: '#60a5fa', backgroundColor: 'rgba(96, 165, 250, 0.1)', fill: true, tension: 0.3, pointRadius: 0, borderWidth: 2 },
224
+ { label: 'Threshold', data: [], borderColor: '#ef4444', borderDash: [5, 5], pointRadius: 0, fill: false, borderWidth: 1 },
225
+ { label: 'Valleys', data: [], borderColor: '#f59e0b', pointBackgroundColor: '#f59e0b', pointRadius: 5, showLine: false }
226
+ ]
227
+ },
228
+ options: {
229
+ responsive: true,
230
+ maintainAspectRatio: false,
231
+ animation: { duration: 0 },
232
+ scales: {
233
+ x: { title: { display: true, text: 'Tokens', color: '#888' }, grid: { color: '#222' }, ticks: { color: '#666', maxTicksLimit: 10 } },
234
+ y: { title: { display: true, text: 'Entropy', color: '#888' }, min: 0, max: 1, grid: { color: '#222' }, ticks: { color: '#666' } }
235
+ },
236
+ plugins: { legend: { display: false } }
237
+ }
238
+ });
239
+ }
240
+
241
+ function updateChart(data) {
242
+ if (!chart || !data.trajectory || data.trajectory.length === 0) return;
243
+
244
+ if (!hasData) {
245
+ hasData = true;
246
+ document.getElementById('waitingMessage').style.display = 'none';
247
+ document.getElementById('connectionStatus').className = 'connection-status connected';
248
+ document.getElementById('connectionStatus').textContent = 'Live';
249
+ }
250
+
251
+ let traj = data.trajectory.slice(-maxPoints);
252
+ chart.data.labels = traj.map((_, i) => i);
253
+ chart.data.datasets[0].data = traj.map(p => p.entropy);
254
+ chart.data.datasets[1].data = traj.map(() => 0.15);
255
+ chart.data.datasets[2].data = traj.map(p => p.is_valley ? p.entropy : null);
256
+ chart.update('none');
257
+ }
258
+
259
+ function updateStats(data) {
260
+ document.getElementById('tokens').textContent = data.token_count || 0;
261
+ document.getElementById('valleys').textContent = data.valley_count || 0;
262
+ if (data.current_entropy !== undefined) document.getElementById('currentEntropy').textContent = data.current_entropy.toFixed(3);
263
+ if (data.mean_entropy !== undefined) document.getElementById('meanEntropy').textContent = data.mean_entropy.toFixed(3);
264
+
265
+ if (data.exited_early) {
266
+ document.getElementById('status').innerHTML = '<span class="status-badge status-exited">Exited</span>';
267
+ if (data.cost_saved) document.getElementById('saved').textContent = '$' + data.cost_saved.toFixed(4);
268
+ } else if (data.active) {
269
+ document.getElementById('status').innerHTML = '<span class="status-badge status-active">Active</span>';
270
+ document.getElementById('saved').textContent = '-';
271
+ } else {
272
+ document.getElementById('status').innerHTML = '<span class="status-badge status-idle">Idle</span>';
273
+ }
274
+ }
275
+
276
+ initChart();
277
+
278
+ const ws = new WebSocket((location.protocol === 'https:' ? 'wss:' : 'ws:') + '//' + location.host + '/ws');
279
+ ws.onopen = () => { document.getElementById('connectionStatus').className = 'connection-status connected'; document.getElementById('connectionStatus').textContent = 'Connected'; };
280
+ ws.onclose = () => { document.getElementById('connectionStatus').className = 'connection-status disconnected'; document.getElementById('connectionStatus').textContent = 'Disconnected'; };
281
+ ws.onmessage = (e) => { try { const d = JSON.parse(e.data); updateChart(d); updateStats(d); } catch(err) {} };
282
+ </script>
283
+ </body>
284
+ </html>
285
+ """
286
+
287
+
288
+ @dataclass
289
+ class ProxyConfig:
290
+ """Configuration for the entropy proxy."""
291
+ port: int = 8765
292
+ provider: str = "openai"
293
+ api_base: str = "https://api.openai.com/v1"
294
+ model: str = "default"
295
+ entropy_threshold: float = 0.15
296
+ min_valleys: int = 2
297
+ min_tokens: int = 50
298
+ velocity_threshold: float = 0.05
299
+ enable_early_exit: bool = True
300
+ log_entropy: bool = True
301
+ track_cost: bool = True
302
+
303
+
304
+ class EntropyProxy:
305
+ """Proxy that adds entropy monitoring to LLM API calls."""
306
+
307
+ def __init__(self, config: ProxyConfig):
308
+ self.config = config
309
+ self.monitor = EntropyMonitor(
310
+ entropy_threshold=config.entropy_threshold,
311
+ min_valleys=config.min_valleys,
312
+ min_tokens=config.min_tokens,
313
+ velocity_threshold=config.velocity_threshold
314
+ )
315
+ self.cost_tracker = CostTracker(model=config.model) if config.track_cost else None
316
+ self.app = FastAPI(title="Entroplain Proxy")
317
+ self._ws_clients: List[WebSocket] = []
318
+ self._current_data: Dict[str, Any] = {"trajectory": [], "token_count": 0, "valley_count": 0, "active": False}
319
+ self._setup_routes()
320
+
321
+ def _setup_routes(self):
322
+ @self.app.get("/")
323
+ async def root():
324
+ return {"service": "Entroplain Proxy", "dashboard": "/dashboard", "health": "/health"}
325
+
326
+ @self.app.get("/dashboard")
327
+ async def dashboard():
328
+ return HTMLResponse(content=DASHBOARD_HTML)
329
+
330
+ @self.app.websocket("/ws")
331
+ async def ws_endpoint(websocket: WebSocket):
332
+ await websocket.accept()
333
+ self._ws_clients.append(websocket)
334
+ try:
335
+ await websocket.send_json(self._current_data)
336
+ while True:
337
+ await websocket.receive_text()
338
+ except WebSocketDisconnect:
339
+ if websocket in self._ws_clients:
340
+ self._ws_clients.remove(websocket)
341
+
342
+ @self.app.post("/v1/chat/completions")
343
+ async def chat_completions(request: Request):
344
+ return await self._handle_chat(request)
345
+
346
+ @self.app.get("/health")
347
+ async def health():
348
+ stats = self.monitor.get_stats()
349
+ if self.cost_tracker and self.cost_tracker.output_tokens > 0:
350
+ stats["cost"] = self.cost_tracker.get_stats()
351
+ return {"status": "ok", "monitor": stats}
352
+
353
+ @self.app.post("/reset")
354
+ async def reset():
355
+ self.monitor.reset()
356
+ if self.cost_tracker:
357
+ self.cost_tracker.reset()
358
+ return {"status": "reset"}
359
+
360
+ async def _broadcast(self, data: Dict[str, Any]):
361
+ """Broadcast data to all WebSocket clients."""
362
+ self._current_data = data
363
+ for ws in self._ws_clients[:]:
364
+ try:
365
+ await ws.send_json(data)
366
+ except Exception:
367
+ self._ws_clients.remove(ws)
368
+
369
+ async def _handle_chat(self, request: Request):
370
+ """Handle chat completion requests with entropy monitoring."""
371
+ body = await request.json()
372
+
373
+ model = body.get("model", "default")
374
+ if self.cost_tracker:
375
+ self.cost_tracker = CostTracker(model=model)
376
+
377
+ input_tokens = self._estimate_tokens(body.get("messages", []))
378
+ if self.cost_tracker:
379
+ self.cost_tracker.track_input(input_tokens)
380
+
381
+ if "logprobs" not in body:
382
+ body["logprobs"] = True
383
+ if "top_logprobs" not in body:
384
+ body["top_logprobs"] = 5
385
+
386
+ self.monitor.reset()
387
+
388
+ # Broadcast active state
389
+ await self._broadcast({"active": True, "trajectory": [], "token_count": 0, "valley_count": 0})
390
+
391
+ async with httpx.AsyncClient() as client:
392
+ response = await client.post(
393
+ f"{self.config.api_base}/chat/completions",
394
+ json=body,
395
+ headers={
396
+ "Content-Type": "application/json",
397
+ "Authorization": request.headers.get("Authorization", "")
398
+ },
399
+ timeout=120.0
400
+ )
401
+
402
+ if not body.get("stream", False):
403
+ return Response(content=response.content, status_code=response.status_code, headers=dict(response.headers))
404
+
405
+ return StreamingResponse(self._stream_with_entropy(response), media_type="text/event-stream")
406
+
407
+ def _estimate_tokens(self, messages: list) -> int:
408
+ total = 0
409
+ for msg in messages:
410
+ content = msg.get("content", "")
411
+ if isinstance(content, str):
412
+ total += len(content) // 4
413
+ elif isinstance(content, list):
414
+ for part in content:
415
+ if isinstance(part, dict) and part.get("type") == "text":
416
+ total += len(part.get("text", "")) // 4
417
+ return max(total, 10)
418
+
419
+ async def _stream_with_entropy(self, response: httpx.Response) -> AsyncIterator[str]:
420
+ import math
421
+ exited_early = False
422
+ full_content = ""
423
+
424
+ async for line in response.aiter_lines():
425
+ if not line.startswith("data: "):
426
+ yield line + "\n"
427
+ continue
428
+
429
+ data = line[6:]
430
+ if data == "[DONE]":
431
+ yield line + "\n"
432
+ break
433
+
434
+ try:
435
+ chunk = json.loads(data)
436
+ except json.JSONDecodeError:
437
+ yield line + "\n"
438
+ continue
439
+
440
+ if chunk.get("choices"):
441
+ choice = chunk["choices"][0]
442
+
443
+ if choice.get("delta", {}).get("content"):
444
+ token = choice["delta"]["content"]
445
+ full_content += token
446
+ if self.cost_tracker:
447
+ self.cost_tracker.track_output(1)
448
+
449
+ logprobs = choice.get("logprobs")
450
+ if logprobs and logprobs.get("content"):
451
+ logprobs_data = logprobs["content"]
452
+ if logprobs_data:
453
+ entropy = self._calculate_entropy(logprobs_data[0])
454
+ confidence = 0.0
455
+ if logprobs_data[0].get("top_logprobs"):
456
+ confidence = math.exp(logprobs_data[0]["top_logprobs"][0]["logprob"])
457
+
458
+ self.monitor.track(token, entropy, confidence)
459
+
460
+ if self.config.log_entropy:
461
+ logger.info(f"Token: {repr(token)}, Entropy: {entropy:.4f}, Valleys: {len(self.monitor.get_valleys())}")
462
+
463
+ # Broadcast update to dashboard
464
+ stats = self.monitor.get_stats()
465
+ await self._broadcast({
466
+ "active": True,
467
+ "trajectory": [{"entropy": p.entropy, "is_valley": p.is_valley} for p in self.monitor._trajectory],
468
+ "token_count": stats["token_count"],
469
+ "valley_count": stats["valley_count"],
470
+ "current_entropy": stats["current_entropy"],
471
+ "mean_entropy": stats["mean_entropy"],
472
+ "exited_early": False
473
+ })
474
+
475
+ if self.config.enable_early_exit and self.monitor.should_exit():
476
+ logger.info(f"Early exit! Tokens: {len(full_content)}, Valleys: {len(self.monitor.get_valleys())}")
477
+ if self.cost_tracker:
478
+ estimated_full = len(full_content) * 2.5
479
+ self.cost_tracker.set_full_estimate(int(estimated_full))
480
+ estimate = self.cost_tracker.get_estimate()
481
+ logger.info(f"Cost savings: ${estimate.cost_saved_usd:.4f}")
482
+ await self._broadcast({
483
+ "active": False,
484
+ "trajectory": [{"entropy": p.entropy, "is_valley": p.is_valley} for p in self.monitor._trajectory],
485
+ "token_count": stats["token_count"],
486
+ "valley_count": stats["valley_count"],
487
+ "current_entropy": stats["current_entropy"],
488
+ "mean_entropy": stats["mean_entropy"],
489
+ "exited_early": True,
490
+ "cost_saved": estimate.cost_saved_usd
491
+ })
492
+ exited_early = True
493
+ yield "data: [DONE]\n\n"
494
+ break
495
+
496
+ yield line + "\n"
497
+
498
+ if not exited_early:
499
+ stats = self.monitor.get_stats()
500
+ await self._broadcast({
501
+ "active": False,
502
+ "trajectory": [{"entropy": p.entropy, "is_valley": p.is_valley} for p in self.monitor._trajectory],
503
+ "token_count": stats["token_count"],
504
+ "valley_count": stats["valley_count"],
505
+ "current_entropy": stats["current_entropy"],
506
+ "mean_entropy": stats["mean_entropy"],
507
+ "exited_early": False
508
+ })
509
+
510
+ def _calculate_entropy(self, logprobs_data: Dict) -> float:
511
+ import math
512
+ if not logprobs_data or "top_logprobs" not in logprobs_data:
513
+ return 0.0
514
+ entropy = 0.0
515
+ for lp in logprobs_data["top_logprobs"]:
516
+ prob = math.exp(lp["logprob"])
517
+ if prob > 0:
518
+ entropy -= prob * math.log2(prob + 1e-10)
519
+ return entropy
520
+
521
+ def run(self):
522
+ uvicorn.run(self.app, host="0.0.0.0", port=self.config.port)
523
+
524
+
525
+ def main():
526
+ import argparse
527
+ parser = argparse.ArgumentParser(description="Entropy Monitoring Proxy")
528
+ parser.add_argument("--port", type=int, default=8765)
529
+ parser.add_argument("--provider", default="openai", choices=["openai", "anthropic", "nvidia"])
530
+ parser.add_argument("--api-base", default=None, help="API base URL (auto-set based on provider)")
531
+ parser.add_argument("--model", default="default")
532
+ parser.add_argument("--entropy-threshold", type=float, default=0.15)
533
+ parser.add_argument("--min-valleys", type=int, default=2)
534
+ parser.add_argument("--no-early-exit", action="store_true")
535
+ parser.add_argument("--log-entropy", action="store_true")
536
+ parser.add_argument("--no-cost-tracking", action="store_true")
537
+ args = parser.parse_args()
538
+
539
+ # Auto-set API base URL based on provider`n api_base = args.api_base`n if api_base is None:`n if args.provider == "openai":`n api_base = "https://api.openai.com/v1"`n elif args.provider == "anthropic":`n api_base = "https://api.anthropic.com/v1"`n elif args.provider == "nvidia":`n api_base = "https://integrate.api.nvidia.com/v1"`n `n config = ProxyConfig(`n port=args.port,`n provider=args.provider,`n api_base=api_base,
540
+ model=args.model,
541
+ entropy_threshold=args.entropy_threshold,
542
+ min_valleys=args.min_valleys,
543
+ enable_early_exit=not args.no_early_exit,
544
+ log_entropy=args.log_entropy,
545
+ track_cost=not args.no_cost_tracking
546
+ )
547
+
548
+ proxy = EntropyProxy(config)
549
+
550
+ print(f"\n{'='*62}\n ENTROPPLAIN PROXY WITH DASHBOARD\n{'='*62}")
551
+ print(f" Proxy: http://localhost:{args.port}")
552
+ print(f" Dashboard: http://localhost:{args.port}/dashboard")
553
+ print(f" API Base: {args.api_base}")
554
+ print(f"{'='*62}\n")
555
+
556
+ proxy.run()
557
+
558
+
559
+ if __name__ == "__main__":
560
+ main()
561
+