entroplain 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/26.0.1 +0 -0
- package/CONTRIBUTING.md +103 -103
- package/DEPLOY.md +41 -0
- package/README.md +478 -389
- package/dist/entroplain-0.2.2-py3-none-any.whl +0 -0
- package/dist/entroplain-0.2.2.tar.gz +0 -0
- package/dist/entroplain-0.2.3-py3-none-any.whl +0 -0
- package/dist/entroplain-0.2.3.tar.gz +0 -0
- package/docs/AGENT_USAGE.md +178 -178
- package/docs/USAGE.md +302 -302
- package/entroplain/__init__.py +32 -33
- package/entroplain/cost_tracker.py +231 -0
- package/entroplain/dashboard.py +480 -0
- package/entroplain/monitor.py +390 -272
- package/entroplain/providers.py +626 -626
- package/entroplain/proxy.py +561 -278
- package/entroplain/shared_state.py +72 -0
- package/entroplain-proxy +0 -0
- package/package.json +47 -44
- package/paper.md +299 -0
- package/pip +0 -0
- package/pyproject.toml +96 -89
- package/scripts/setup.bat +89 -0
- package/scripts/setup.sh +98 -0
- package/test_nvidia.py +56 -0
- package/test_proxy.py +16 -0
- package/vercel.json +6 -0
- package/website/README.md +14 -0
- package/website/app/globals.css +88 -0
- package/website/app/layout.tsx +34 -0
- package/website/app/page.tsx +537 -0
- package/website/package-lock.json +520 -0
- package/website/package.json +25 -0
- package/website/tsconfig.json +40 -0
- package/website/vercel.json +3 -0
- package/dist/entroplain-0.1.1-py3-none-any.whl +0 -0
- package/dist/entroplain-0.1.1.tar.gz +0 -0
package/entroplain/proxy.py
CHANGED
|
@@ -1,278 +1,561 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Entropy Monitoring Proxy
|
|
3
|
-
|
|
4
|
-
This proxy intercepts LLM API calls and adds entropy monitoring,
|
|
5
|
-
enabling early exit without modifying the agent framework itself.
|
|
6
|
-
|
|
7
|
-
Usage:
|
|
8
|
-
# Set as your API endpoint
|
|
9
|
-
export OPENAI_BASE_URL=http://localhost:8765
|
|
10
|
-
|
|
11
|
-
# Run the proxy
|
|
12
|
-
python -m entroplain.proxy --port 8765 --provider openai
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
import json
|
|
16
|
-
import asyncio
|
|
17
|
-
import logging
|
|
18
|
-
from typing import Optional, Dict, Any, AsyncIterator
|
|
19
|
-
from dataclasses import dataclass
|
|
20
|
-
import httpx
|
|
21
|
-
from fastapi import FastAPI, Request, Response
|
|
22
|
-
from fastapi.responses import StreamingResponse
|
|
23
|
-
import uvicorn
|
|
24
|
-
|
|
25
|
-
from .monitor import EntropyMonitor
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
1
|
+
"""
|
|
2
|
+
Entropy Monitoring Proxy with built-in Dashboard.
|
|
3
|
+
|
|
4
|
+
This proxy intercepts LLM API calls and adds entropy monitoring,
|
|
5
|
+
enabling early exit without modifying the agent framework itself.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
# Set as your API endpoint
|
|
9
|
+
export OPENAI_BASE_URL=http://localhost:8765
|
|
10
|
+
|
|
11
|
+
# Run the proxy (includes dashboard at /dashboard)
|
|
12
|
+
python -m entroplain.proxy --port 8765 --provider openai
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import asyncio
|
|
17
|
+
import logging
|
|
18
|
+
from typing import Optional, Dict, Any, AsyncIterator, List
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
import httpx
|
|
21
|
+
from fastapi import FastAPI, Request, Response, WebSocket, WebSocketDisconnect
|
|
22
|
+
from fastapi.responses import StreamingResponse, HTMLResponse
|
|
23
|
+
import uvicorn
|
|
24
|
+
|
|
25
|
+
from .monitor import EntropyMonitor
|
|
26
|
+
from .cost_tracker import CostTracker, format_cost_report
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Dashboard HTML - embedded in proxy
|
|
32
|
+
DASHBOARD_HTML = """
|
|
33
|
+
<!DOCTYPE html>
|
|
34
|
+
<html>
|
|
35
|
+
<head>
|
|
36
|
+
<title>Entroplain Dashboard</title>
|
|
37
|
+
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
|
38
|
+
<style>
|
|
39
|
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
|
40
|
+
html, body {
|
|
41
|
+
height: 100%;
|
|
42
|
+
overflow: hidden;
|
|
43
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
44
|
+
background: #0a0a0a;
|
|
45
|
+
color: #e0e0e0;
|
|
46
|
+
}
|
|
47
|
+
.app {
|
|
48
|
+
height: 100vh;
|
|
49
|
+
display: flex;
|
|
50
|
+
flex-direction: column;
|
|
51
|
+
padding: 20px;
|
|
52
|
+
max-width: 1400px;
|
|
53
|
+
margin: 0 auto;
|
|
54
|
+
}
|
|
55
|
+
h1 {
|
|
56
|
+
font-size: 20px;
|
|
57
|
+
margin-bottom: 15px;
|
|
58
|
+
color: #4ade80;
|
|
59
|
+
flex-shrink: 0;
|
|
60
|
+
}
|
|
61
|
+
.main-grid {
|
|
62
|
+
flex: 1;
|
|
63
|
+
display: grid;
|
|
64
|
+
grid-template-columns: 1fr 280px;
|
|
65
|
+
gap: 15px;
|
|
66
|
+
min-height: 0;
|
|
67
|
+
}
|
|
68
|
+
.chart-container {
|
|
69
|
+
background: #1a1a1a;
|
|
70
|
+
border-radius: 8px;
|
|
71
|
+
padding: 15px;
|
|
72
|
+
display: flex;
|
|
73
|
+
flex-direction: column;
|
|
74
|
+
min-height: 0;
|
|
75
|
+
}
|
|
76
|
+
.chart-wrapper {
|
|
77
|
+
flex: 1;
|
|
78
|
+
position: relative;
|
|
79
|
+
min-height: 200px;
|
|
80
|
+
}
|
|
81
|
+
.chart-wrapper canvas {
|
|
82
|
+
position: absolute;
|
|
83
|
+
top: 0;
|
|
84
|
+
left: 0;
|
|
85
|
+
width: 100%;
|
|
86
|
+
height: 100%;
|
|
87
|
+
}
|
|
88
|
+
.legend {
|
|
89
|
+
display: flex;
|
|
90
|
+
gap: 15px;
|
|
91
|
+
margin-top: 10px;
|
|
92
|
+
font-size: 11px;
|
|
93
|
+
flex-shrink: 0;
|
|
94
|
+
}
|
|
95
|
+
.legend-item { display: flex; align-items: center; gap: 5px; }
|
|
96
|
+
.legend-dot { width: 8px; height: 8px; border-radius: 50%; }
|
|
97
|
+
.dot-entropy { background: #60a5fa; }
|
|
98
|
+
.dot-valley { background: #f59e0b; }
|
|
99
|
+
.dot-threshold { background: #ef4444; }
|
|
100
|
+
|
|
101
|
+
.stats-panel {
|
|
102
|
+
display: flex;
|
|
103
|
+
flex-direction: column;
|
|
104
|
+
gap: 10px;
|
|
105
|
+
overflow-y: auto;
|
|
106
|
+
}
|
|
107
|
+
.stat-card {
|
|
108
|
+
background: #1a1a1a;
|
|
109
|
+
border-radius: 8px;
|
|
110
|
+
padding: 12px;
|
|
111
|
+
flex-shrink: 0;
|
|
112
|
+
}
|
|
113
|
+
.stat-label {
|
|
114
|
+
font-size: 10px;
|
|
115
|
+
color: #888;
|
|
116
|
+
text-transform: uppercase;
|
|
117
|
+
letter-spacing: 0.05em;
|
|
118
|
+
}
|
|
119
|
+
.stat-value {
|
|
120
|
+
font-size: 24px;
|
|
121
|
+
font-weight: 600;
|
|
122
|
+
color: #fff;
|
|
123
|
+
margin-top: 4px;
|
|
124
|
+
}
|
|
125
|
+
.stat-value.savings { color: #4ade80; }
|
|
126
|
+
.stat-value.cost { color: #fbbf24; }
|
|
127
|
+
.stat-value.valleys { color: #60a5fa; }
|
|
128
|
+
|
|
129
|
+
.status-badge {
|
|
130
|
+
display: inline-block;
|
|
131
|
+
padding: 3px 10px;
|
|
132
|
+
border-radius: 10px;
|
|
133
|
+
font-size: 11px;
|
|
134
|
+
font-weight: 500;
|
|
135
|
+
}
|
|
136
|
+
.status-active { background: #22c55e; color: #000; }
|
|
137
|
+
.status-idle { background: #374151; color: #888; }
|
|
138
|
+
.status-exited { background: #f59e0b; color: #000; }
|
|
139
|
+
|
|
140
|
+
.connection-status {
|
|
141
|
+
padding: 6px 10px;
|
|
142
|
+
border-radius: 6px;
|
|
143
|
+
font-size: 11px;
|
|
144
|
+
margin-bottom: 10px;
|
|
145
|
+
}
|
|
146
|
+
.connected { background: #166534; color: #4ade80; }
|
|
147
|
+
.disconnected { background: #7f1d1d; color: #fca5a5; }
|
|
148
|
+
|
|
149
|
+
.waiting-message {
|
|
150
|
+
position: absolute;
|
|
151
|
+
top: 50%;
|
|
152
|
+
left: 50%;
|
|
153
|
+
transform: translate(-50%, -50%);
|
|
154
|
+
text-align: center;
|
|
155
|
+
color: #666;
|
|
156
|
+
}
|
|
157
|
+
.waiting-message h2 { font-size: 14px; margin-bottom: 5px; }
|
|
158
|
+
.waiting-message p { font-size: 11px; }
|
|
159
|
+
</style>
|
|
160
|
+
</head>
|
|
161
|
+
<body>
|
|
162
|
+
<div class="app">
|
|
163
|
+
<h1>🎯 Entroplain Dashboard</h1>
|
|
164
|
+
|
|
165
|
+
<div class="main-grid">
|
|
166
|
+
<div class="chart-container">
|
|
167
|
+
<div id="connectionStatus" class="connection-status disconnected">Connecting...</div>
|
|
168
|
+
<div class="chart-wrapper">
|
|
169
|
+
<canvas id="entropyChart"></canvas>
|
|
170
|
+
<div id="waitingMessage" class="waiting-message">
|
|
171
|
+
<h2>Waiting for data...</h2>
|
|
172
|
+
<p>Make a request through the proxy to see entropy visualization</p>
|
|
173
|
+
</div>
|
|
174
|
+
</div>
|
|
175
|
+
<div class="legend">
|
|
176
|
+
<div class="legend-item"><div class="legend-dot dot-entropy"></div><span>Entropy</span></div>
|
|
177
|
+
<div class="legend-item"><div class="legend-dot dot-valley"></div><span>Valley</span></div>
|
|
178
|
+
<div class="legend-item"><div class="legend-dot dot-threshold"></div><span>Threshold</span></div>
|
|
179
|
+
</div>
|
|
180
|
+
</div>
|
|
181
|
+
|
|
182
|
+
<div class="stats-panel">
|
|
183
|
+
<div class="stat-card">
|
|
184
|
+
<div class="stat-label">Status</div>
|
|
185
|
+
<div class="stat-value" id="status"><span class="status-badge status-idle">Idle</span></div>
|
|
186
|
+
</div>
|
|
187
|
+
<div class="stat-card">
|
|
188
|
+
<div class="stat-label">Tokens</div>
|
|
189
|
+
<div class="stat-value" id="tokens">0</div>
|
|
190
|
+
</div>
|
|
191
|
+
<div class="stat-card">
|
|
192
|
+
<div class="stat-label">Valleys</div>
|
|
193
|
+
<div class="stat-value valleys" id="valleys">0</div>
|
|
194
|
+
</div>
|
|
195
|
+
<div class="stat-card">
|
|
196
|
+
<div class="stat-label">Entropy</div>
|
|
197
|
+
<div class="stat-value" id="currentEntropy">-</div>
|
|
198
|
+
</div>
|
|
199
|
+
<div class="stat-card">
|
|
200
|
+
<div class="stat-label">Mean</div>
|
|
201
|
+
<div class="stat-value" id="meanEntropy">-</div>
|
|
202
|
+
</div>
|
|
203
|
+
<div class="stat-card">
|
|
204
|
+
<div class="stat-label">Saved</div>
|
|
205
|
+
<div class="stat-value savings" id="saved">-</div>
|
|
206
|
+
</div>
|
|
207
|
+
</div>
|
|
208
|
+
</div>
|
|
209
|
+
</div>
|
|
210
|
+
|
|
211
|
+
<script>
|
|
212
|
+
let chart = null;
|
|
213
|
+
let hasData = false;
|
|
214
|
+
const maxPoints = 200;
|
|
215
|
+
|
|
216
|
+
function initChart() {
|
|
217
|
+
const ctx = document.getElementById('entropyChart').getContext('2d');
|
|
218
|
+
chart = new Chart(ctx, {
|
|
219
|
+
type: 'line',
|
|
220
|
+
data: {
|
|
221
|
+
labels: [],
|
|
222
|
+
datasets: [
|
|
223
|
+
{ label: 'Entropy', data: [], borderColor: '#60a5fa', backgroundColor: 'rgba(96, 165, 250, 0.1)', fill: true, tension: 0.3, pointRadius: 0, borderWidth: 2 },
|
|
224
|
+
{ label: 'Threshold', data: [], borderColor: '#ef4444', borderDash: [5, 5], pointRadius: 0, fill: false, borderWidth: 1 },
|
|
225
|
+
{ label: 'Valleys', data: [], borderColor: '#f59e0b', pointBackgroundColor: '#f59e0b', pointRadius: 5, showLine: false }
|
|
226
|
+
]
|
|
227
|
+
},
|
|
228
|
+
options: {
|
|
229
|
+
responsive: true,
|
|
230
|
+
maintainAspectRatio: false,
|
|
231
|
+
animation: { duration: 0 },
|
|
232
|
+
scales: {
|
|
233
|
+
x: { title: { display: true, text: 'Tokens', color: '#888' }, grid: { color: '#222' }, ticks: { color: '#666', maxTicksLimit: 10 } },
|
|
234
|
+
y: { title: { display: true, text: 'Entropy', color: '#888' }, min: 0, max: 1, grid: { color: '#222' }, ticks: { color: '#666' } }
|
|
235
|
+
},
|
|
236
|
+
plugins: { legend: { display: false } }
|
|
237
|
+
}
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
function updateChart(data) {
|
|
242
|
+
if (!chart || !data.trajectory || data.trajectory.length === 0) return;
|
|
243
|
+
|
|
244
|
+
if (!hasData) {
|
|
245
|
+
hasData = true;
|
|
246
|
+
document.getElementById('waitingMessage').style.display = 'none';
|
|
247
|
+
document.getElementById('connectionStatus').className = 'connection-status connected';
|
|
248
|
+
document.getElementById('connectionStatus').textContent = 'Live';
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
let traj = data.trajectory.slice(-maxPoints);
|
|
252
|
+
chart.data.labels = traj.map((_, i) => i);
|
|
253
|
+
chart.data.datasets[0].data = traj.map(p => p.entropy);
|
|
254
|
+
chart.data.datasets[1].data = traj.map(() => 0.15);
|
|
255
|
+
chart.data.datasets[2].data = traj.map(p => p.is_valley ? p.entropy : null);
|
|
256
|
+
chart.update('none');
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
function updateStats(data) {
|
|
260
|
+
document.getElementById('tokens').textContent = data.token_count || 0;
|
|
261
|
+
document.getElementById('valleys').textContent = data.valley_count || 0;
|
|
262
|
+
if (data.current_entropy !== undefined) document.getElementById('currentEntropy').textContent = data.current_entropy.toFixed(3);
|
|
263
|
+
if (data.mean_entropy !== undefined) document.getElementById('meanEntropy').textContent = data.mean_entropy.toFixed(3);
|
|
264
|
+
|
|
265
|
+
if (data.exited_early) {
|
|
266
|
+
document.getElementById('status').innerHTML = '<span class="status-badge status-exited">Exited</span>';
|
|
267
|
+
if (data.cost_saved) document.getElementById('saved').textContent = '$' + data.cost_saved.toFixed(4);
|
|
268
|
+
} else if (data.active) {
|
|
269
|
+
document.getElementById('status').innerHTML = '<span class="status-badge status-active">Active</span>';
|
|
270
|
+
document.getElementById('saved').textContent = '-';
|
|
271
|
+
} else {
|
|
272
|
+
document.getElementById('status').innerHTML = '<span class="status-badge status-idle">Idle</span>';
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
initChart();
|
|
277
|
+
|
|
278
|
+
const ws = new WebSocket((location.protocol === 'https:' ? 'wss:' : 'ws:') + '//' + location.host + '/ws');
|
|
279
|
+
ws.onopen = () => { document.getElementById('connectionStatus').className = 'connection-status connected'; document.getElementById('connectionStatus').textContent = 'Connected'; };
|
|
280
|
+
ws.onclose = () => { document.getElementById('connectionStatus').className = 'connection-status disconnected'; document.getElementById('connectionStatus').textContent = 'Disconnected'; };
|
|
281
|
+
ws.onmessage = (e) => { try { const d = JSON.parse(e.data); updateChart(d); updateStats(d); } catch(err) {} };
|
|
282
|
+
</script>
|
|
283
|
+
</body>
|
|
284
|
+
</html>
|
|
285
|
+
"""
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
@dataclass
|
|
289
|
+
class ProxyConfig:
|
|
290
|
+
"""Configuration for the entropy proxy."""
|
|
291
|
+
port: int = 8765
|
|
292
|
+
provider: str = "openai"
|
|
293
|
+
api_base: str = "https://api.openai.com/v1"
|
|
294
|
+
model: str = "default"
|
|
295
|
+
entropy_threshold: float = 0.15
|
|
296
|
+
min_valleys: int = 2
|
|
297
|
+
min_tokens: int = 50
|
|
298
|
+
velocity_threshold: float = 0.05
|
|
299
|
+
enable_early_exit: bool = True
|
|
300
|
+
log_entropy: bool = True
|
|
301
|
+
track_cost: bool = True
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
class EntropyProxy:
|
|
305
|
+
"""Proxy that adds entropy monitoring to LLM API calls."""
|
|
306
|
+
|
|
307
|
+
def __init__(self, config: ProxyConfig):
|
|
308
|
+
self.config = config
|
|
309
|
+
self.monitor = EntropyMonitor(
|
|
310
|
+
entropy_threshold=config.entropy_threshold,
|
|
311
|
+
min_valleys=config.min_valleys,
|
|
312
|
+
min_tokens=config.min_tokens,
|
|
313
|
+
velocity_threshold=config.velocity_threshold
|
|
314
|
+
)
|
|
315
|
+
self.cost_tracker = CostTracker(model=config.model) if config.track_cost else None
|
|
316
|
+
self.app = FastAPI(title="Entroplain Proxy")
|
|
317
|
+
self._ws_clients: List[WebSocket] = []
|
|
318
|
+
self._current_data: Dict[str, Any] = {"trajectory": [], "token_count": 0, "valley_count": 0, "active": False}
|
|
319
|
+
self._setup_routes()
|
|
320
|
+
|
|
321
|
+
def _setup_routes(self):
|
|
322
|
+
@self.app.get("/")
|
|
323
|
+
async def root():
|
|
324
|
+
return {"service": "Entroplain Proxy", "dashboard": "/dashboard", "health": "/health"}
|
|
325
|
+
|
|
326
|
+
@self.app.get("/dashboard")
|
|
327
|
+
async def dashboard():
|
|
328
|
+
return HTMLResponse(content=DASHBOARD_HTML)
|
|
329
|
+
|
|
330
|
+
@self.app.websocket("/ws")
|
|
331
|
+
async def ws_endpoint(websocket: WebSocket):
|
|
332
|
+
await websocket.accept()
|
|
333
|
+
self._ws_clients.append(websocket)
|
|
334
|
+
try:
|
|
335
|
+
await websocket.send_json(self._current_data)
|
|
336
|
+
while True:
|
|
337
|
+
await websocket.receive_text()
|
|
338
|
+
except WebSocketDisconnect:
|
|
339
|
+
if websocket in self._ws_clients:
|
|
340
|
+
self._ws_clients.remove(websocket)
|
|
341
|
+
|
|
342
|
+
@self.app.post("/v1/chat/completions")
|
|
343
|
+
async def chat_completions(request: Request):
|
|
344
|
+
return await self._handle_chat(request)
|
|
345
|
+
|
|
346
|
+
@self.app.get("/health")
|
|
347
|
+
async def health():
|
|
348
|
+
stats = self.monitor.get_stats()
|
|
349
|
+
if self.cost_tracker and self.cost_tracker.output_tokens > 0:
|
|
350
|
+
stats["cost"] = self.cost_tracker.get_stats()
|
|
351
|
+
return {"status": "ok", "monitor": stats}
|
|
352
|
+
|
|
353
|
+
@self.app.post("/reset")
|
|
354
|
+
async def reset():
|
|
355
|
+
self.monitor.reset()
|
|
356
|
+
if self.cost_tracker:
|
|
357
|
+
self.cost_tracker.reset()
|
|
358
|
+
return {"status": "reset"}
|
|
359
|
+
|
|
360
|
+
async def _broadcast(self, data: Dict[str, Any]):
|
|
361
|
+
"""Broadcast data to all WebSocket clients."""
|
|
362
|
+
self._current_data = data
|
|
363
|
+
for ws in self._ws_clients[:]:
|
|
364
|
+
try:
|
|
365
|
+
await ws.send_json(data)
|
|
366
|
+
except Exception:
|
|
367
|
+
self._ws_clients.remove(ws)
|
|
368
|
+
|
|
369
|
+
async def _handle_chat(self, request: Request):
|
|
370
|
+
"""Handle chat completion requests with entropy monitoring."""
|
|
371
|
+
body = await request.json()
|
|
372
|
+
|
|
373
|
+
model = body.get("model", "default")
|
|
374
|
+
if self.cost_tracker:
|
|
375
|
+
self.cost_tracker = CostTracker(model=model)
|
|
376
|
+
|
|
377
|
+
input_tokens = self._estimate_tokens(body.get("messages", []))
|
|
378
|
+
if self.cost_tracker:
|
|
379
|
+
self.cost_tracker.track_input(input_tokens)
|
|
380
|
+
|
|
381
|
+
if "logprobs" not in body:
|
|
382
|
+
body["logprobs"] = True
|
|
383
|
+
if "top_logprobs" not in body:
|
|
384
|
+
body["top_logprobs"] = 5
|
|
385
|
+
|
|
386
|
+
self.monitor.reset()
|
|
387
|
+
|
|
388
|
+
# Broadcast active state
|
|
389
|
+
await self._broadcast({"active": True, "trajectory": [], "token_count": 0, "valley_count": 0})
|
|
390
|
+
|
|
391
|
+
async with httpx.AsyncClient() as client:
|
|
392
|
+
response = await client.post(
|
|
393
|
+
f"{self.config.api_base}/chat/completions",
|
|
394
|
+
json=body,
|
|
395
|
+
headers={
|
|
396
|
+
"Content-Type": "application/json",
|
|
397
|
+
"Authorization": request.headers.get("Authorization", "")
|
|
398
|
+
},
|
|
399
|
+
timeout=120.0
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
if not body.get("stream", False):
|
|
403
|
+
return Response(content=response.content, status_code=response.status_code, headers=dict(response.headers))
|
|
404
|
+
|
|
405
|
+
return StreamingResponse(self._stream_with_entropy(response), media_type="text/event-stream")
|
|
406
|
+
|
|
407
|
+
def _estimate_tokens(self, messages: list) -> int:
|
|
408
|
+
total = 0
|
|
409
|
+
for msg in messages:
|
|
410
|
+
content = msg.get("content", "")
|
|
411
|
+
if isinstance(content, str):
|
|
412
|
+
total += len(content) // 4
|
|
413
|
+
elif isinstance(content, list):
|
|
414
|
+
for part in content:
|
|
415
|
+
if isinstance(part, dict) and part.get("type") == "text":
|
|
416
|
+
total += len(part.get("text", "")) // 4
|
|
417
|
+
return max(total, 10)
|
|
418
|
+
|
|
419
|
+
async def _stream_with_entropy(self, response: httpx.Response) -> AsyncIterator[str]:
|
|
420
|
+
import math
|
|
421
|
+
exited_early = False
|
|
422
|
+
full_content = ""
|
|
423
|
+
|
|
424
|
+
async for line in response.aiter_lines():
|
|
425
|
+
if not line.startswith("data: "):
|
|
426
|
+
yield line + "\n"
|
|
427
|
+
continue
|
|
428
|
+
|
|
429
|
+
data = line[6:]
|
|
430
|
+
if data == "[DONE]":
|
|
431
|
+
yield line + "\n"
|
|
432
|
+
break
|
|
433
|
+
|
|
434
|
+
try:
|
|
435
|
+
chunk = json.loads(data)
|
|
436
|
+
except json.JSONDecodeError:
|
|
437
|
+
yield line + "\n"
|
|
438
|
+
continue
|
|
439
|
+
|
|
440
|
+
if chunk.get("choices"):
|
|
441
|
+
choice = chunk["choices"][0]
|
|
442
|
+
|
|
443
|
+
if choice.get("delta", {}).get("content"):
|
|
444
|
+
token = choice["delta"]["content"]
|
|
445
|
+
full_content += token
|
|
446
|
+
if self.cost_tracker:
|
|
447
|
+
self.cost_tracker.track_output(1)
|
|
448
|
+
|
|
449
|
+
logprobs = choice.get("logprobs")
|
|
450
|
+
if logprobs and logprobs.get("content"):
|
|
451
|
+
logprobs_data = logprobs["content"]
|
|
452
|
+
if logprobs_data:
|
|
453
|
+
entropy = self._calculate_entropy(logprobs_data[0])
|
|
454
|
+
confidence = 0.0
|
|
455
|
+
if logprobs_data[0].get("top_logprobs"):
|
|
456
|
+
confidence = math.exp(logprobs_data[0]["top_logprobs"][0]["logprob"])
|
|
457
|
+
|
|
458
|
+
self.monitor.track(token, entropy, confidence)
|
|
459
|
+
|
|
460
|
+
if self.config.log_entropy:
|
|
461
|
+
logger.info(f"Token: {repr(token)}, Entropy: {entropy:.4f}, Valleys: {len(self.monitor.get_valleys())}")
|
|
462
|
+
|
|
463
|
+
# Broadcast update to dashboard
|
|
464
|
+
stats = self.monitor.get_stats()
|
|
465
|
+
await self._broadcast({
|
|
466
|
+
"active": True,
|
|
467
|
+
"trajectory": [{"entropy": p.entropy, "is_valley": p.is_valley} for p in self.monitor._trajectory],
|
|
468
|
+
"token_count": stats["token_count"],
|
|
469
|
+
"valley_count": stats["valley_count"],
|
|
470
|
+
"current_entropy": stats["current_entropy"],
|
|
471
|
+
"mean_entropy": stats["mean_entropy"],
|
|
472
|
+
"exited_early": False
|
|
473
|
+
})
|
|
474
|
+
|
|
475
|
+
if self.config.enable_early_exit and self.monitor.should_exit():
|
|
476
|
+
logger.info(f"Early exit! Tokens: {len(full_content)}, Valleys: {len(self.monitor.get_valleys())}")
|
|
477
|
+
if self.cost_tracker:
|
|
478
|
+
estimated_full = len(full_content) * 2.5
|
|
479
|
+
self.cost_tracker.set_full_estimate(int(estimated_full))
|
|
480
|
+
estimate = self.cost_tracker.get_estimate()
|
|
481
|
+
logger.info(f"Cost savings: ${estimate.cost_saved_usd:.4f}")
|
|
482
|
+
await self._broadcast({
|
|
483
|
+
"active": False,
|
|
484
|
+
"trajectory": [{"entropy": p.entropy, "is_valley": p.is_valley} for p in self.monitor._trajectory],
|
|
485
|
+
"token_count": stats["token_count"],
|
|
486
|
+
"valley_count": stats["valley_count"],
|
|
487
|
+
"current_entropy": stats["current_entropy"],
|
|
488
|
+
"mean_entropy": stats["mean_entropy"],
|
|
489
|
+
"exited_early": True,
|
|
490
|
+
"cost_saved": estimate.cost_saved_usd
|
|
491
|
+
})
|
|
492
|
+
exited_early = True
|
|
493
|
+
yield "data: [DONE]\n\n"
|
|
494
|
+
break
|
|
495
|
+
|
|
496
|
+
yield line + "\n"
|
|
497
|
+
|
|
498
|
+
if not exited_early:
|
|
499
|
+
stats = self.monitor.get_stats()
|
|
500
|
+
await self._broadcast({
|
|
501
|
+
"active": False,
|
|
502
|
+
"trajectory": [{"entropy": p.entropy, "is_valley": p.is_valley} for p in self.monitor._trajectory],
|
|
503
|
+
"token_count": stats["token_count"],
|
|
504
|
+
"valley_count": stats["valley_count"],
|
|
505
|
+
"current_entropy": stats["current_entropy"],
|
|
506
|
+
"mean_entropy": stats["mean_entropy"],
|
|
507
|
+
"exited_early": False
|
|
508
|
+
})
|
|
509
|
+
|
|
510
|
+
def _calculate_entropy(self, logprobs_data: Dict) -> float:
|
|
511
|
+
import math
|
|
512
|
+
if not logprobs_data or "top_logprobs" not in logprobs_data:
|
|
513
|
+
return 0.0
|
|
514
|
+
entropy = 0.0
|
|
515
|
+
for lp in logprobs_data["top_logprobs"]:
|
|
516
|
+
prob = math.exp(lp["logprob"])
|
|
517
|
+
if prob > 0:
|
|
518
|
+
entropy -= prob * math.log2(prob + 1e-10)
|
|
519
|
+
return entropy
|
|
520
|
+
|
|
521
|
+
def run(self):
|
|
522
|
+
uvicorn.run(self.app, host="0.0.0.0", port=self.config.port)
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def main():
|
|
526
|
+
import argparse
|
|
527
|
+
parser = argparse.ArgumentParser(description="Entropy Monitoring Proxy")
|
|
528
|
+
parser.add_argument("--port", type=int, default=8765)
|
|
529
|
+
parser.add_argument("--provider", default="openai", choices=["openai", "anthropic", "nvidia"])
|
|
530
|
+
parser.add_argument("--api-base", default=None, help="API base URL (auto-set based on provider)")
|
|
531
|
+
parser.add_argument("--model", default="default")
|
|
532
|
+
parser.add_argument("--entropy-threshold", type=float, default=0.15)
|
|
533
|
+
parser.add_argument("--min-valleys", type=int, default=2)
|
|
534
|
+
parser.add_argument("--no-early-exit", action="store_true")
|
|
535
|
+
parser.add_argument("--log-entropy", action="store_true")
|
|
536
|
+
parser.add_argument("--no-cost-tracking", action="store_true")
|
|
537
|
+
args = parser.parse_args()
|
|
538
|
+
|
|
539
|
+
# Auto-set API base URL based on provider`n api_base = args.api_base`n if api_base is None:`n if args.provider == "openai":`n api_base = "https://api.openai.com/v1"`n elif args.provider == "anthropic":`n api_base = "https://api.anthropic.com/v1"`n elif args.provider == "nvidia":`n api_base = "https://integrate.api.nvidia.com/v1"`n `n config = ProxyConfig(`n port=args.port,`n provider=args.provider,`n api_base=api_base,
|
|
540
|
+
model=args.model,
|
|
541
|
+
entropy_threshold=args.entropy_threshold,
|
|
542
|
+
min_valleys=args.min_valleys,
|
|
543
|
+
enable_early_exit=not args.no_early_exit,
|
|
544
|
+
log_entropy=args.log_entropy,
|
|
545
|
+
track_cost=not args.no_cost_tracking
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
proxy = EntropyProxy(config)
|
|
549
|
+
|
|
550
|
+
print(f"\n{'='*62}\n ENTROPPLAIN PROXY WITH DASHBOARD\n{'='*62}")
|
|
551
|
+
print(f" Proxy: http://localhost:{args.port}")
|
|
552
|
+
print(f" Dashboard: http://localhost:{args.port}/dashboard")
|
|
553
|
+
print(f" API Base: {args.api_base}")
|
|
554
|
+
print(f"{'='*62}\n")
|
|
555
|
+
|
|
556
|
+
proxy.run()
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
if __name__ == "__main__":
|
|
560
|
+
main()
|
|
561
|
+
|