entroplain 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,278 +1,561 @@
1
- """
2
- Entropy Monitoring Proxy for OpenClaw/Claude Code.
3
-
4
- This proxy intercepts LLM API calls and adds entropy monitoring,
5
- enabling early exit without modifying the agent framework itself.
6
-
7
- Usage:
8
- # Set as your API endpoint
9
- export OPENAI_BASE_URL=http://localhost:8765
10
-
11
- # Run the proxy
12
- python -m entroplain.proxy --port 8765 --provider openai
13
- """
14
-
15
- import json
16
- import asyncio
17
- import logging
18
- from typing import Optional, Dict, Any, AsyncIterator
19
- from dataclasses import dataclass
20
- import httpx
21
- from fastapi import FastAPI, Request, Response
22
- from fastapi.responses import StreamingResponse
23
- import uvicorn
24
-
25
- from .monitor import EntropyMonitor
26
-
27
- logger = logging.getLogger(__name__)
28
-
29
-
30
- @dataclass
31
- class ProxyConfig:
32
- """Configuration for the entropy proxy."""
33
- port: int = 8765
34
- provider: str = "openai" # openai, anthropic, nvidia
35
- api_base: str = "https://api.openai.com/v1"
36
- entropy_threshold: float = 0.15
37
- min_valleys: int = 2
38
- min_tokens: int = 50
39
- velocity_threshold: float = 0.05
40
- enable_early_exit: bool = True
41
- log_entropy: bool = True
42
-
43
-
44
- class EntropyProxy:
45
- """
46
- Proxy that adds entropy monitoring to LLM API calls.
47
-
48
- Intercepts streaming responses, calculates entropy, and can
49
- terminate early when reasoning has converged.
50
- """
51
-
52
- def __init__(self, config: ProxyConfig):
53
- self.config = config
54
- self.monitor = EntropyMonitor(
55
- entropy_threshold=config.entropy_threshold,
56
- min_valleys=config.min_valleys,
57
- min_tokens=config.min_tokens,
58
- velocity_threshold=config.velocity_threshold
59
- )
60
- self.app = FastAPI(title="Entroplain Proxy")
61
- self._setup_routes()
62
-
63
- def _setup_routes(self):
64
- @self.app.post("/v1/chat/completions")
65
- async def chat_completions(request: Request):
66
- return await self._handle_chat(request)
67
-
68
- @self.app.get("/health")
69
- async def health():
70
- return {"status": "ok", "monitor": self.monitor.get_stats()}
71
-
72
- @self.app.post("/reset")
73
- async def reset():
74
- self.monitor.reset()
75
- return {"status": "reset"}
76
-
77
- async def _handle_chat(self, request: Request):
78
- """Handle chat completion requests with entropy monitoring."""
79
- body = await request.json()
80
-
81
- # Ensure logprobs are enabled for entropy calculation
82
- if "logprobs" not in body:
83
- body["logprobs"] = True
84
- if "top_logprobs" not in body:
85
- body["top_logprobs"] = 5
86
-
87
- # Reset monitor for new request
88
- self.monitor.reset()
89
-
90
- # Forward request to actual API
91
- async with httpx.AsyncClient() as client:
92
- response = await client.post(
93
- f"{self.config.api_base}/chat/completions",
94
- json=body,
95
- headers={
96
- "Content-Type": "application/json",
97
- "Authorization": request.headers.get("Authorization", "")
98
- },
99
- timeout=120.0
100
- )
101
-
102
- if not body.get("stream", False):
103
- # Non-streaming: just return response
104
- return Response(
105
- content=response.content,
106
- status_code=response.status_code,
107
- headers=dict(response.headers)
108
- )
109
-
110
- # Streaming: monitor entropy and potentially exit early
111
- return StreamingResponse(
112
- self._stream_with_entropy(response),
113
- media_type="text/event-stream"
114
- )
115
-
116
- async def _stream_with_entropy(
117
- self, response: httpx.Response
118
- ) -> AsyncIterator[str]:
119
- """Stream response with entropy monitoring."""
120
- exited_early = False
121
- full_content = ""
122
-
123
- async for line in response.aiter_lines():
124
- if not line.startswith("data: "):
125
- yield line + "\n"
126
- continue
127
-
128
- data = line[6:] # Remove "data: " prefix
129
- if data == "[DONE]":
130
- yield line + "\n"
131
- break
132
-
133
- try:
134
- chunk = json.loads(data)
135
- except json.JSONDecodeError:
136
- yield line + "\n"
137
- continue
138
-
139
- # Extract token and logprobs
140
- if chunk.get("choices"):
141
- choice = chunk["choices"][0]
142
-
143
- # Get token content
144
- if choice.get("delta", {}).get("content"):
145
- token = choice["delta"]["content"]
146
- full_content += token
147
-
148
- # Calculate entropy from logprobs
149
- if choice.get("logprobs", {}).get("content"):
150
- logprobs_data = choice["logprobs"]["content"]
151
- if logprobs_data:
152
- entropy = self._calculate_entropy(logprobs_data[0])
153
- self.monitor.track(token, entropy)
154
-
155
- if self.config.log_entropy:
156
- logger.info(
157
- f"Token: {repr(token)}, Entropy: {entropy:.4f}, "
158
- f"Valleys: {len(self.monitor.get_valleys())}"
159
- )
160
-
161
- # Check for early exit
162
- if (
163
- self.config.enable_early_exit
164
- and self.monitor.should_exit()
165
- ):
166
- logger.info(
167
- f"Early exit triggered! "
168
- f"Tokens: {len(full_content)}, "
169
- f"Valleys: {len(self.monitor.get_valleys())}"
170
- )
171
- exited_early = True
172
- yield "data: [DONE]\n\n"
173
- break
174
-
175
- yield line + "\n"
176
-
177
- if not exited_early:
178
- logger.info(
179
- f"Stream completed. "
180
- f"Tokens: {self.monitor.get_stats()['token_count']}, "
181
- f"Valleys: {len(self.monitor.get_valleys())}"
182
- )
183
-
184
- def _calculate_entropy(self, logprobs_data: Dict) -> float:
185
- """Calculate Shannon entropy from logprobs."""
186
- import math
187
-
188
- if not logprobs_data or "top_logprobs" not in logprobs_data:
189
- return 0.0
190
-
191
- entropy = 0.0
192
- for lp in logprobs_data["top_logprobs"]:
193
- prob = math.exp(lp["logprob"])
194
- if prob > 0:
195
- entropy -= prob * math.log2(prob + 1e-10)
196
-
197
- return entropy
198
-
199
- def run(self):
200
- """Start the proxy server."""
201
- uvicorn.run(self.app, host="0.0.0.0", port=self.config.port)
202
-
203
-
204
- def main():
205
- """CLI entry point for running the proxy."""
206
- import argparse
207
-
208
- parser = argparse.ArgumentParser(description="Entropy Monitoring Proxy")
209
- parser.add_argument("--port", type=int, default=8765, help="Proxy port")
210
- parser.add_argument(
211
- "--provider",
212
- default="openai",
213
- choices=["openai", "anthropic", "nvidia"],
214
- help="LLM provider"
215
- )
216
- parser.add_argument(
217
- "--api-base",
218
- default="https://api.openai.com/v1",
219
- help="API base URL"
220
- )
221
- parser.add_argument(
222
- "--entropy-threshold",
223
- type=float,
224
- default=0.15,
225
- help="Entropy threshold for early exit"
226
- )
227
- parser.add_argument(
228
- "--min-valleys",
229
- type=int,
230
- default=2,
231
- help="Minimum valleys before early exit"
232
- )
233
- parser.add_argument(
234
- "--no-early-exit",
235
- action="store_true",
236
- help="Disable early exit (monitor only)"
237
- )
238
- parser.add_argument(
239
- "--log-entropy",
240
- action="store_true",
241
- help="Log entropy values to console"
242
- )
243
-
244
- args = parser.parse_args()
245
-
246
- config = ProxyConfig(
247
- port=args.port,
248
- provider=args.provider,
249
- api_base=args.api_base,
250
- entropy_threshold=args.entropy_threshold,
251
- min_valleys=args.min_valleys,
252
- enable_early_exit=not args.no_early_exit,
253
- log_entropy=args.log_entropy
254
- )
255
-
256
- proxy = EntropyProxy(config)
257
-
258
- print(f"""
259
- ╔═══════════════════════════════════════════════════════════╗
260
- ║ ENTROPPLAIN ENTROPY MONITORING PROXY ║
261
- ╠═══════════════════════════════════════════════════════════╣
262
- ║ Proxy running on: http://localhost:{args.port} ║
263
- ║ Provider: {args.provider:<10} ║
264
- ║ API Base: {args.api_base:<30} ║
265
- ║ Early Exit: {'DISABLED' if args.no_early_exit else 'ENABLED'} ║
266
- ╠═══════════════════════════════════════════════════════════╣
267
- ║ Set your agent's API endpoint to: ║
268
- ║ export OPENAI_BASE_URL=http://localhost:{args.port} ║
269
- ║ # or for NVIDIA: ║
270
- ║ export NVIDIA_BASE_URL=http://localhost:{args.port} ║
271
- ╚═══════════════════════════════════════════════════════════╝
272
- """)
273
-
274
- proxy.run()
275
-
276
-
277
- if __name__ == "__main__":
278
- main()
1
+ """
2
+ Entropy Monitoring Proxy with built-in Dashboard.
3
+
4
+ This proxy intercepts LLM API calls and adds entropy monitoring,
5
+ enabling early exit without modifying the agent framework itself.
6
+
7
+ Usage:
8
+ # Set as your API endpoint
9
+ export OPENAI_BASE_URL=http://localhost:8765
10
+
11
+ # Run the proxy (includes dashboard at /dashboard)
12
+ python -m entroplain.proxy --port 8765 --provider openai
13
+ """
14
+
15
+ import json
16
+ import asyncio
17
+ import logging
18
+ from typing import Optional, Dict, Any, AsyncIterator, List
19
+ from dataclasses import dataclass
20
+ import httpx
21
+ from fastapi import FastAPI, Request, Response, WebSocket, WebSocketDisconnect
22
+ from fastapi.responses import StreamingResponse, HTMLResponse
23
+ import uvicorn
24
+
25
+ from .monitor import EntropyMonitor
26
+ from .cost_tracker import CostTracker, format_cost_report
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ # Dashboard HTML - embedded in proxy
32
+ DASHBOARD_HTML = """
33
+ <!DOCTYPE html>
34
+ <html>
35
+ <head>
36
+ <title>Entroplain Dashboard</title>
37
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
38
+ <style>
39
+ * { box-sizing: border-box; margin: 0; padding: 0; }
40
+ html, body {
41
+ height: 100%;
42
+ overflow: hidden;
43
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
44
+ background: #0a0a0a;
45
+ color: #e0e0e0;
46
+ }
47
+ .app {
48
+ height: 100vh;
49
+ display: flex;
50
+ flex-direction: column;
51
+ padding: 20px;
52
+ max-width: 1400px;
53
+ margin: 0 auto;
54
+ }
55
+ h1 {
56
+ font-size: 20px;
57
+ margin-bottom: 15px;
58
+ color: #4ade80;
59
+ flex-shrink: 0;
60
+ }
61
+ .main-grid {
62
+ flex: 1;
63
+ display: grid;
64
+ grid-template-columns: 1fr 280px;
65
+ gap: 15px;
66
+ min-height: 0;
67
+ }
68
+ .chart-container {
69
+ background: #1a1a1a;
70
+ border-radius: 8px;
71
+ padding: 15px;
72
+ display: flex;
73
+ flex-direction: column;
74
+ min-height: 0;
75
+ }
76
+ .chart-wrapper {
77
+ flex: 1;
78
+ position: relative;
79
+ min-height: 200px;
80
+ }
81
+ .chart-wrapper canvas {
82
+ position: absolute;
83
+ top: 0;
84
+ left: 0;
85
+ width: 100%;
86
+ height: 100%;
87
+ }
88
+ .legend {
89
+ display: flex;
90
+ gap: 15px;
91
+ margin-top: 10px;
92
+ font-size: 11px;
93
+ flex-shrink: 0;
94
+ }
95
+ .legend-item { display: flex; align-items: center; gap: 5px; }
96
+ .legend-dot { width: 8px; height: 8px; border-radius: 50%; }
97
+ .dot-entropy { background: #60a5fa; }
98
+ .dot-valley { background: #f59e0b; }
99
+ .dot-threshold { background: #ef4444; }
100
+
101
+ .stats-panel {
102
+ display: flex;
103
+ flex-direction: column;
104
+ gap: 10px;
105
+ overflow-y: auto;
106
+ }
107
+ .stat-card {
108
+ background: #1a1a1a;
109
+ border-radius: 8px;
110
+ padding: 12px;
111
+ flex-shrink: 0;
112
+ }
113
+ .stat-label {
114
+ font-size: 10px;
115
+ color: #888;
116
+ text-transform: uppercase;
117
+ letter-spacing: 0.05em;
118
+ }
119
+ .stat-value {
120
+ font-size: 24px;
121
+ font-weight: 600;
122
+ color: #fff;
123
+ margin-top: 4px;
124
+ }
125
+ .stat-value.savings { color: #4ade80; }
126
+ .stat-value.cost { color: #fbbf24; }
127
+ .stat-value.valleys { color: #60a5fa; }
128
+
129
+ .status-badge {
130
+ display: inline-block;
131
+ padding: 3px 10px;
132
+ border-radius: 10px;
133
+ font-size: 11px;
134
+ font-weight: 500;
135
+ }
136
+ .status-active { background: #22c55e; color: #000; }
137
+ .status-idle { background: #374151; color: #888; }
138
+ .status-exited { background: #f59e0b; color: #000; }
139
+
140
+ .connection-status {
141
+ padding: 6px 10px;
142
+ border-radius: 6px;
143
+ font-size: 11px;
144
+ margin-bottom: 10px;
145
+ }
146
+ .connected { background: #166534; color: #4ade80; }
147
+ .disconnected { background: #7f1d1d; color: #fca5a5; }
148
+
149
+ .waiting-message {
150
+ position: absolute;
151
+ top: 50%;
152
+ left: 50%;
153
+ transform: translate(-50%, -50%);
154
+ text-align: center;
155
+ color: #666;
156
+ }
157
+ .waiting-message h2 { font-size: 14px; margin-bottom: 5px; }
158
+ .waiting-message p { font-size: 11px; }
159
+ </style>
160
+ </head>
161
+ <body>
162
+ <div class="app">
163
+ <h1>🎯 Entroplain Dashboard</h1>
164
+
165
+ <div class="main-grid">
166
+ <div class="chart-container">
167
+ <div id="connectionStatus" class="connection-status disconnected">Connecting...</div>
168
+ <div class="chart-wrapper">
169
+ <canvas id="entropyChart"></canvas>
170
+ <div id="waitingMessage" class="waiting-message">
171
+ <h2>Waiting for data...</h2>
172
+ <p>Make a request through the proxy to see entropy visualization</p>
173
+ </div>
174
+ </div>
175
+ <div class="legend">
176
+ <div class="legend-item"><div class="legend-dot dot-entropy"></div><span>Entropy</span></div>
177
+ <div class="legend-item"><div class="legend-dot dot-valley"></div><span>Valley</span></div>
178
+ <div class="legend-item"><div class="legend-dot dot-threshold"></div><span>Threshold</span></div>
179
+ </div>
180
+ </div>
181
+
182
+ <div class="stats-panel">
183
+ <div class="stat-card">
184
+ <div class="stat-label">Status</div>
185
+ <div class="stat-value" id="status"><span class="status-badge status-idle">Idle</span></div>
186
+ </div>
187
+ <div class="stat-card">
188
+ <div class="stat-label">Tokens</div>
189
+ <div class="stat-value" id="tokens">0</div>
190
+ </div>
191
+ <div class="stat-card">
192
+ <div class="stat-label">Valleys</div>
193
+ <div class="stat-value valleys" id="valleys">0</div>
194
+ </div>
195
+ <div class="stat-card">
196
+ <div class="stat-label">Entropy</div>
197
+ <div class="stat-value" id="currentEntropy">-</div>
198
+ </div>
199
+ <div class="stat-card">
200
+ <div class="stat-label">Mean</div>
201
+ <div class="stat-value" id="meanEntropy">-</div>
202
+ </div>
203
+ <div class="stat-card">
204
+ <div class="stat-label">Saved</div>
205
+ <div class="stat-value savings" id="saved">-</div>
206
+ </div>
207
+ </div>
208
+ </div>
209
+ </div>
210
+
211
+ <script>
212
+ let chart = null;
213
+ let hasData = false;
214
+ const maxPoints = 200;
215
+
216
+ function initChart() {
217
+ const ctx = document.getElementById('entropyChart').getContext('2d');
218
+ chart = new Chart(ctx, {
219
+ type: 'line',
220
+ data: {
221
+ labels: [],
222
+ datasets: [
223
+ { label: 'Entropy', data: [], borderColor: '#60a5fa', backgroundColor: 'rgba(96, 165, 250, 0.1)', fill: true, tension: 0.3, pointRadius: 0, borderWidth: 2 },
224
+ { label: 'Threshold', data: [], borderColor: '#ef4444', borderDash: [5, 5], pointRadius: 0, fill: false, borderWidth: 1 },
225
+ { label: 'Valleys', data: [], borderColor: '#f59e0b', pointBackgroundColor: '#f59e0b', pointRadius: 5, showLine: false }
226
+ ]
227
+ },
228
+ options: {
229
+ responsive: true,
230
+ maintainAspectRatio: false,
231
+ animation: { duration: 0 },
232
+ scales: {
233
+ x: { title: { display: true, text: 'Tokens', color: '#888' }, grid: { color: '#222' }, ticks: { color: '#666', maxTicksLimit: 10 } },
234
+ y: { title: { display: true, text: 'Entropy', color: '#888' }, min: 0, max: 1, grid: { color: '#222' }, ticks: { color: '#666' } }
235
+ },
236
+ plugins: { legend: { display: false } }
237
+ }
238
+ });
239
+ }
240
+
241
+ function updateChart(data) {
242
+ if (!chart || !data.trajectory || data.trajectory.length === 0) return;
243
+
244
+ if (!hasData) {
245
+ hasData = true;
246
+ document.getElementById('waitingMessage').style.display = 'none';
247
+ document.getElementById('connectionStatus').className = 'connection-status connected';
248
+ document.getElementById('connectionStatus').textContent = 'Live';
249
+ }
250
+
251
+ let traj = data.trajectory.slice(-maxPoints);
252
+ chart.data.labels = traj.map((_, i) => i);
253
+ chart.data.datasets[0].data = traj.map(p => p.entropy);
254
+ chart.data.datasets[1].data = traj.map(() => 0.15);
255
+ chart.data.datasets[2].data = traj.map(p => p.is_valley ? p.entropy : null);
256
+ chart.update('none');
257
+ }
258
+
259
+ function updateStats(data) {
260
+ document.getElementById('tokens').textContent = data.token_count || 0;
261
+ document.getElementById('valleys').textContent = data.valley_count || 0;
262
+ if (data.current_entropy !== undefined) document.getElementById('currentEntropy').textContent = data.current_entropy.toFixed(3);
263
+ if (data.mean_entropy !== undefined) document.getElementById('meanEntropy').textContent = data.mean_entropy.toFixed(3);
264
+
265
+ if (data.exited_early) {
266
+ document.getElementById('status').innerHTML = '<span class="status-badge status-exited">Exited</span>';
267
+ if (data.cost_saved) document.getElementById('saved').textContent = '$' + data.cost_saved.toFixed(4);
268
+ } else if (data.active) {
269
+ document.getElementById('status').innerHTML = '<span class="status-badge status-active">Active</span>';
270
+ document.getElementById('saved').textContent = '-';
271
+ } else {
272
+ document.getElementById('status').innerHTML = '<span class="status-badge status-idle">Idle</span>';
273
+ }
274
+ }
275
+
276
+ initChart();
277
+
278
+ const ws = new WebSocket((location.protocol === 'https:' ? 'wss:' : 'ws:') + '//' + location.host + '/ws');
279
+ ws.onopen = () => { document.getElementById('connectionStatus').className = 'connection-status connected'; document.getElementById('connectionStatus').textContent = 'Connected'; };
280
+ ws.onclose = () => { document.getElementById('connectionStatus').className = 'connection-status disconnected'; document.getElementById('connectionStatus').textContent = 'Disconnected'; };
281
+ ws.onmessage = (e) => { try { const d = JSON.parse(e.data); updateChart(d); updateStats(d); } catch(err) {} };
282
+ </script>
283
+ </body>
284
+ </html>
285
+ """
286
+
287
+
288
+ @dataclass
289
+ class ProxyConfig:
290
+ """Configuration for the entropy proxy."""
291
+ port: int = 8765
292
+ provider: str = "openai"
293
+ api_base: str = "https://api.openai.com/v1"
294
+ model: str = "default"
295
+ entropy_threshold: float = 0.15
296
+ min_valleys: int = 2
297
+ min_tokens: int = 50
298
+ velocity_threshold: float = 0.05
299
+ enable_early_exit: bool = True
300
+ log_entropy: bool = True
301
+ track_cost: bool = True
302
+
303
+
304
+ class EntropyProxy:
305
+ """Proxy that adds entropy monitoring to LLM API calls."""
306
+
307
+ def __init__(self, config: ProxyConfig):
308
+ self.config = config
309
+ self.monitor = EntropyMonitor(
310
+ entropy_threshold=config.entropy_threshold,
311
+ min_valleys=config.min_valleys,
312
+ min_tokens=config.min_tokens,
313
+ velocity_threshold=config.velocity_threshold
314
+ )
315
+ self.cost_tracker = CostTracker(model=config.model) if config.track_cost else None
316
+ self.app = FastAPI(title="Entroplain Proxy")
317
+ self._ws_clients: List[WebSocket] = []
318
+ self._current_data: Dict[str, Any] = {"trajectory": [], "token_count": 0, "valley_count": 0, "active": False}
319
+ self._setup_routes()
320
+
321
+ def _setup_routes(self):
322
+ @self.app.get("/")
323
+ async def root():
324
+ return {"service": "Entroplain Proxy", "dashboard": "/dashboard", "health": "/health"}
325
+
326
+ @self.app.get("/dashboard")
327
+ async def dashboard():
328
+ return HTMLResponse(content=DASHBOARD_HTML)
329
+
330
+ @self.app.websocket("/ws")
331
+ async def ws_endpoint(websocket: WebSocket):
332
+ await websocket.accept()
333
+ self._ws_clients.append(websocket)
334
+ try:
335
+ await websocket.send_json(self._current_data)
336
+ while True:
337
+ await websocket.receive_text()
338
+ except WebSocketDisconnect:
339
+ if websocket in self._ws_clients:
340
+ self._ws_clients.remove(websocket)
341
+
342
+ @self.app.post("/v1/chat/completions")
343
+ async def chat_completions(request: Request):
344
+ return await self._handle_chat(request)
345
+
346
+ @self.app.get("/health")
347
+ async def health():
348
+ stats = self.monitor.get_stats()
349
+ if self.cost_tracker and self.cost_tracker.output_tokens > 0:
350
+ stats["cost"] = self.cost_tracker.get_stats()
351
+ return {"status": "ok", "monitor": stats}
352
+
353
+ @self.app.post("/reset")
354
+ async def reset():
355
+ self.monitor.reset()
356
+ if self.cost_tracker:
357
+ self.cost_tracker.reset()
358
+ return {"status": "reset"}
359
+
360
+ async def _broadcast(self, data: Dict[str, Any]):
361
+ """Broadcast data to all WebSocket clients."""
362
+ self._current_data = data
363
+ for ws in self._ws_clients[:]:
364
+ try:
365
+ await ws.send_json(data)
366
+ except Exception:
367
+ self._ws_clients.remove(ws)
368
+
369
+ async def _handle_chat(self, request: Request):
370
+ """Handle chat completion requests with entropy monitoring."""
371
+ body = await request.json()
372
+
373
+ model = body.get("model", "default")
374
+ if self.cost_tracker:
375
+ self.cost_tracker = CostTracker(model=model)
376
+
377
+ input_tokens = self._estimate_tokens(body.get("messages", []))
378
+ if self.cost_tracker:
379
+ self.cost_tracker.track_input(input_tokens)
380
+
381
+ if "logprobs" not in body:
382
+ body["logprobs"] = True
383
+ if "top_logprobs" not in body:
384
+ body["top_logprobs"] = 5
385
+
386
+ self.monitor.reset()
387
+
388
+ # Broadcast active state
389
+ await self._broadcast({"active": True, "trajectory": [], "token_count": 0, "valley_count": 0})
390
+
391
+ async with httpx.AsyncClient() as client:
392
+ response = await client.post(
393
+ f"{self.config.api_base}/chat/completions",
394
+ json=body,
395
+ headers={
396
+ "Content-Type": "application/json",
397
+ "Authorization": request.headers.get("Authorization", "")
398
+ },
399
+ timeout=120.0
400
+ )
401
+
402
+ if not body.get("stream", False):
403
+ return Response(content=response.content, status_code=response.status_code, headers=dict(response.headers))
404
+
405
+ return StreamingResponse(self._stream_with_entropy(response), media_type="text/event-stream")
406
+
407
+ def _estimate_tokens(self, messages: list) -> int:
408
+ total = 0
409
+ for msg in messages:
410
+ content = msg.get("content", "")
411
+ if isinstance(content, str):
412
+ total += len(content) // 4
413
+ elif isinstance(content, list):
414
+ for part in content:
415
+ if isinstance(part, dict) and part.get("type") == "text":
416
+ total += len(part.get("text", "")) // 4
417
+ return max(total, 10)
418
+
419
+ async def _stream_with_entropy(self, response: httpx.Response) -> AsyncIterator[str]:
420
+ import math
421
+ exited_early = False
422
+ full_content = ""
423
+
424
+ async for line in response.aiter_lines():
425
+ if not line.startswith("data: "):
426
+ yield line + "\n"
427
+ continue
428
+
429
+ data = line[6:]
430
+ if data == "[DONE]":
431
+ yield line + "\n"
432
+ break
433
+
434
+ try:
435
+ chunk = json.loads(data)
436
+ except json.JSONDecodeError:
437
+ yield line + "\n"
438
+ continue
439
+
440
+ if chunk.get("choices"):
441
+ choice = chunk["choices"][0]
442
+
443
+ if choice.get("delta", {}).get("content"):
444
+ token = choice["delta"]["content"]
445
+ full_content += token
446
+ if self.cost_tracker:
447
+ self.cost_tracker.track_output(1)
448
+
449
+ logprobs = choice.get("logprobs")
450
+ if logprobs and logprobs.get("content"):
451
+ logprobs_data = logprobs["content"]
452
+ if logprobs_data:
453
+ entropy = self._calculate_entropy(logprobs_data[0])
454
+ confidence = 0.0
455
+ if logprobs_data[0].get("top_logprobs"):
456
+ confidence = math.exp(logprobs_data[0]["top_logprobs"][0]["logprob"])
457
+
458
+ self.monitor.track(token, entropy, confidence)
459
+
460
+ if self.config.log_entropy:
461
+ logger.info(f"Token: {repr(token)}, Entropy: {entropy:.4f}, Valleys: {len(self.monitor.get_valleys())}")
462
+
463
+ # Broadcast update to dashboard
464
+ stats = self.monitor.get_stats()
465
+ await self._broadcast({
466
+ "active": True,
467
+ "trajectory": [{"entropy": p.entropy, "is_valley": p.is_valley} for p in self.monitor._trajectory],
468
+ "token_count": stats["token_count"],
469
+ "valley_count": stats["valley_count"],
470
+ "current_entropy": stats["current_entropy"],
471
+ "mean_entropy": stats["mean_entropy"],
472
+ "exited_early": False
473
+ })
474
+
475
+ if self.config.enable_early_exit and self.monitor.should_exit():
476
+ logger.info(f"Early exit! Tokens: {len(full_content)}, Valleys: {len(self.monitor.get_valleys())}")
477
+ if self.cost_tracker:
478
+ estimated_full = len(full_content) * 2.5
479
+ self.cost_tracker.set_full_estimate(int(estimated_full))
480
+ estimate = self.cost_tracker.get_estimate()
481
+ logger.info(f"Cost savings: ${estimate.cost_saved_usd:.4f}")
482
+ await self._broadcast({
483
+ "active": False,
484
+ "trajectory": [{"entropy": p.entropy, "is_valley": p.is_valley} for p in self.monitor._trajectory],
485
+ "token_count": stats["token_count"],
486
+ "valley_count": stats["valley_count"],
487
+ "current_entropy": stats["current_entropy"],
488
+ "mean_entropy": stats["mean_entropy"],
489
+ "exited_early": True,
490
+ "cost_saved": estimate.cost_saved_usd
491
+ })
492
+ exited_early = True
493
+ yield "data: [DONE]\n\n"
494
+ break
495
+
496
+ yield line + "\n"
497
+
498
+ if not exited_early:
499
+ stats = self.monitor.get_stats()
500
+ await self._broadcast({
501
+ "active": False,
502
+ "trajectory": [{"entropy": p.entropy, "is_valley": p.is_valley} for p in self.monitor._trajectory],
503
+ "token_count": stats["token_count"],
504
+ "valley_count": stats["valley_count"],
505
+ "current_entropy": stats["current_entropy"],
506
+ "mean_entropy": stats["mean_entropy"],
507
+ "exited_early": False
508
+ })
509
+
510
+ def _calculate_entropy(self, logprobs_data: Dict) -> float:
511
+ import math
512
+ if not logprobs_data or "top_logprobs" not in logprobs_data:
513
+ return 0.0
514
+ entropy = 0.0
515
+ for lp in logprobs_data["top_logprobs"]:
516
+ prob = math.exp(lp["logprob"])
517
+ if prob > 0:
518
+ entropy -= prob * math.log2(prob + 1e-10)
519
+ return entropy
520
+
521
+ def run(self):
522
+ uvicorn.run(self.app, host="0.0.0.0", port=self.config.port)
523
+
524
+
525
+ def main():
526
+ import argparse
527
+ parser = argparse.ArgumentParser(description="Entropy Monitoring Proxy")
528
+ parser.add_argument("--port", type=int, default=8765)
529
+ parser.add_argument("--provider", default="openai", choices=["openai", "anthropic", "nvidia"])
530
+ parser.add_argument("--api-base", default=None, help="API base URL (auto-set based on provider)")
531
+ parser.add_argument("--model", default="default")
532
+ parser.add_argument("--entropy-threshold", type=float, default=0.15)
533
+ parser.add_argument("--min-valleys", type=int, default=2)
534
+ parser.add_argument("--no-early-exit", action="store_true")
535
+ parser.add_argument("--log-entropy", action="store_true")
536
+ parser.add_argument("--no-cost-tracking", action="store_true")
537
+ args = parser.parse_args()
538
+
539
+ # Auto-set API base URL based on provider`n api_base = args.api_base`n if api_base is None:`n if args.provider == "openai":`n api_base = "https://api.openai.com/v1"`n elif args.provider == "anthropic":`n api_base = "https://api.anthropic.com/v1"`n elif args.provider == "nvidia":`n api_base = "https://integrate.api.nvidia.com/v1"`n `n config = ProxyConfig(`n port=args.port,`n provider=args.provider,`n api_base=api_base,
540
+ model=args.model,
541
+ entropy_threshold=args.entropy_threshold,
542
+ min_valleys=args.min_valleys,
543
+ enable_early_exit=not args.no_early_exit,
544
+ log_entropy=args.log_entropy,
545
+ track_cost=not args.no_cost_tracking
546
+ )
547
+
548
+ proxy = EntropyProxy(config)
549
+
550
+ print(f"\n{'='*62}\n ENTROPPLAIN PROXY WITH DASHBOARD\n{'='*62}")
551
+ print(f" Proxy: http://localhost:{args.port}")
552
+ print(f" Dashboard: http://localhost:{args.port}/dashboard")
553
+ print(f" API Base: {args.api_base}")
554
+ print(f"{'='*62}\n")
555
+
556
+ proxy.run()
557
+
558
+
559
+ if __name__ == "__main__":
560
+ main()
561
+