@askalf/dario 4.0.1 → 4.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -5
- package/dist/cli.js +128 -1
- package/dist/config-file.d.ts +26 -0
- package/dist/config-file.js +23 -0
- package/dist/notify.d.ts +48 -0
- package/dist/notify.js +120 -0
- package/dist/overage-guard.d.ts +102 -0
- package/dist/overage-guard.js +189 -0
- package/dist/proxy.d.ts +14 -0
- package/dist/proxy.js +106 -1
- package/dist/tui/proxy-client.d.ts +44 -1
- package/dist/tui/proxy-client.js +66 -2
- package/dist/tui/tabs/analytics.js +13 -0
- package/dist/tui/tabs/config.js +35 -0
- package/dist/tui/tabs/hits.d.ts +14 -0
- package/dist/tui/tabs/hits.js +54 -4
- package/dist/tui/tabs/status.d.ts +14 -0
- package/dist/tui/tabs/status.js +109 -3
- package/package.json +1 -1
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Overage-guard — halt the proxy on the first `representative-claim: overage`
|
|
3
|
+
* response to prevent silent API-rate bleed.
|
|
4
|
+
*
|
|
5
|
+
* Subscribers should never see a single overage hit during normal
|
|
6
|
+
* operation. One means something is wrong (wire-shape drift, classifier
|
|
7
|
+
* change, account misconfig, billing-flip after a CC release) and
|
|
8
|
+
* continuing to forward requests bleeds against per-token billing.
|
|
9
|
+
*
|
|
10
|
+
* The guard subscribes to the Analytics record stream — every completed
|
|
11
|
+
* request emits a record carrying its `claim` (raw representative-claim
|
|
12
|
+
* value). When `claim === 'overage'` lands, the guard transitions to a
|
|
13
|
+
* halted state and emits a `'halt'` event. The HTTP request path checks
|
|
14
|
+
* `isHalted()` on every incoming request and returns 503 with an
|
|
15
|
+
* Anthropic-shaped error body when halted.
|
|
16
|
+
*
|
|
17
|
+
* Resume paths:
|
|
18
|
+
* - explicit: `dario resume` CLI → POST /admin/resume → `clear('manual')`
|
|
19
|
+
* - automatic: cooldown expires (default 30 min) → `clear('cooldown')`
|
|
20
|
+
* - TUI: `r` key on Status tab → POST /admin/resume (same as CLI)
|
|
21
|
+
*
|
|
22
|
+
* Behavior:
|
|
23
|
+
* - `halt` (default) — record halted state + return 503 on subsequent requests
|
|
24
|
+
* - `warn` — emit events + notify only; proxy keeps forwarding (visibility-only mode)
|
|
25
|
+
*
|
|
26
|
+
* See dario#288.
|
|
27
|
+
*/
|
|
28
|
+
import { EventEmitter } from 'node:events';
|
|
29
|
+
export class OverageGuard extends EventEmitter {
|
|
30
|
+
opts;
|
|
31
|
+
halted = null;
|
|
32
|
+
cooldownTimer = null;
|
|
33
|
+
analyticsListener = null;
|
|
34
|
+
constructor(opts) {
|
|
35
|
+
super();
|
|
36
|
+
// /analytics/stream + TUI tabs each register a listener; the in-proc
|
|
37
|
+
// event listeners ceiling matches the Analytics class's choice.
|
|
38
|
+
this.setMaxListeners(100);
|
|
39
|
+
this.opts = opts;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Subscribe to an Analytics instance. Every record emitted with
|
|
43
|
+
* `claim === 'overage'` triggers halt (when behavior === 'halt') or a
|
|
44
|
+
* warn-only event (when behavior === 'warn').
|
|
45
|
+
*
|
|
46
|
+
* Idempotent — calling attach() a second time replaces the listener
|
|
47
|
+
* rather than stacking; useful for tests.
|
|
48
|
+
*/
|
|
49
|
+
attach(analytics) {
|
|
50
|
+
if (this.analyticsListener) {
|
|
51
|
+
analytics.off('record', this.analyticsListener);
|
|
52
|
+
}
|
|
53
|
+
if (!this.opts.enabled) {
|
|
54
|
+
// Guard fully disabled — don't even register the listener. No
|
|
55
|
+
// detection, no halt, no events.
|
|
56
|
+
this.analyticsListener = null;
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
this.analyticsListener = (r) => {
|
|
60
|
+
if (r.claim === 'overage') {
|
|
61
|
+
this.onOverageDetected(r);
|
|
62
|
+
}
|
|
63
|
+
};
|
|
64
|
+
analytics.on('record', this.analyticsListener);
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Synthesize a halt event from a record. Public for the test harness;
|
|
68
|
+
* production code reaches this via attach() + the live Analytics stream.
|
|
69
|
+
*/
|
|
70
|
+
onOverageDetected(r) {
|
|
71
|
+
if (this.halted) {
|
|
72
|
+
// Already halted — don't re-fire halt events. The original halt
|
|
73
|
+
// state stays in place until cleared. A second overage hit while
|
|
74
|
+
// halted is expected (the client may not have noticed the 503
|
|
75
|
+
// yet); silent.
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
const state = {
|
|
79
|
+
since: Date.now(),
|
|
80
|
+
cooldownUntil: Date.now() + this.opts.cooldownMs,
|
|
81
|
+
reason: 'overage_detected',
|
|
82
|
+
request: {
|
|
83
|
+
timestamp: r.timestamp,
|
|
84
|
+
model: r.model,
|
|
85
|
+
account: r.account,
|
|
86
|
+
claim: r.claim,
|
|
87
|
+
},
|
|
88
|
+
};
|
|
89
|
+
if (this.opts.behavior === 'halt') {
|
|
90
|
+
this.halted = state;
|
|
91
|
+
// Schedule auto-resume. Timer reference is held so we can cancel
|
|
92
|
+
// it on a manual resume — otherwise a manual resume followed by
|
|
93
|
+
// continued use, then the original cooldown firing, would emit a
|
|
94
|
+
// spurious second 'resume' event.
|
|
95
|
+
this.cooldownTimer = setTimeout(() => {
|
|
96
|
+
if (this.halted && this.halted.since === state.since) {
|
|
97
|
+
this.clear('cooldown');
|
|
98
|
+
}
|
|
99
|
+
}, this.opts.cooldownMs);
|
|
100
|
+
this.cooldownTimer.unref();
|
|
101
|
+
}
|
|
102
|
+
// Always fire 'halt' (or 'warn') so SSE subscribers see the event
|
|
103
|
+
// even in warn-only mode — the TUI's job is to surface this to the
|
|
104
|
+
// user regardless of whether the proxy chose to block traffic.
|
|
105
|
+
const eventName = this.opts.behavior === 'halt' ? 'halt' : 'warn';
|
|
106
|
+
try {
|
|
107
|
+
this.emit(eventName, state);
|
|
108
|
+
}
|
|
109
|
+
catch (err) {
|
|
110
|
+
// A subscriber threw — log + swallow, don't crash on event side-effects.
|
|
111
|
+
console.error('[dario] overage-guard subscriber threw:', err.message);
|
|
112
|
+
}
|
|
113
|
+
if (this.opts.notifyOs && this.opts.notifier) {
|
|
114
|
+
try {
|
|
115
|
+
const title = this.opts.behavior === 'halt' ? 'dario halted' : 'dario warning';
|
|
116
|
+
const msg = `Request classified as 'overage' (per-token billing)${this.opts.behavior === 'halt' ? '. Proxy halted. Run `dario resume` to continue.' : ''}`;
|
|
117
|
+
this.opts.notifier(title, msg);
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
// Native notification failure is non-fatal. Already emitted to
|
|
121
|
+
// SSE / TUI; the user gets the in-app banner regardless.
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Resume the proxy. Emits a 'resume' event with the reason.
|
|
127
|
+
*
|
|
128
|
+
* No-op when not currently halted. Safe to call from any path
|
|
129
|
+
* (CLI, /admin/resume HTTP endpoint, TUI `r` key, cooldown timer).
|
|
130
|
+
*/
|
|
131
|
+
clear(reason) {
|
|
132
|
+
if (!this.halted)
|
|
133
|
+
return;
|
|
134
|
+
const wasHaltedAt = this.halted.since;
|
|
135
|
+
this.halted = null;
|
|
136
|
+
if (this.cooldownTimer) {
|
|
137
|
+
clearTimeout(this.cooldownTimer);
|
|
138
|
+
this.cooldownTimer = null;
|
|
139
|
+
}
|
|
140
|
+
try {
|
|
141
|
+
this.emit('resume', { reason, previousSince: wasHaltedAt });
|
|
142
|
+
}
|
|
143
|
+
catch (err) {
|
|
144
|
+
console.error('[dario] overage-guard resume subscriber threw:', err.message);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
/** Current halt state, or `null` if not halted. */
|
|
148
|
+
state() {
|
|
149
|
+
return this.halted;
|
|
150
|
+
}
|
|
151
|
+
/** Quick boolean for the request hot-path. */
|
|
152
|
+
isHalted() {
|
|
153
|
+
return this.halted !== null && this.opts.behavior === 'halt';
|
|
154
|
+
}
|
|
155
|
+
/** Detach from Analytics. Used by tests and by graceful shutdown. */
|
|
156
|
+
destroy() {
|
|
157
|
+
this.removeAllListeners();
|
|
158
|
+
if (this.cooldownTimer) {
|
|
159
|
+
clearTimeout(this.cooldownTimer);
|
|
160
|
+
this.cooldownTimer = null;
|
|
161
|
+
}
|
|
162
|
+
this.halted = null;
|
|
163
|
+
this.analyticsListener = null;
|
|
164
|
+
}
|
|
165
|
+
/** Expose options for the /status endpoint + TUI Status tab. */
|
|
166
|
+
config() {
|
|
167
|
+
return this.opts;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* The Anthropic-shaped error body returned by halted-503 responses. The
|
|
172
|
+
* shape matches what `api.anthropic.com` emits for any 4xx so CC /
|
|
173
|
+
* Cursor / Aider / Cline surface the message verbatim to the user — no
|
|
174
|
+
* client-specific handling needed.
|
|
175
|
+
*/
|
|
176
|
+
export function buildHaltErrorBody(state) {
|
|
177
|
+
const isoCooldown = new Date(state.cooldownUntil).toISOString();
|
|
178
|
+
return {
|
|
179
|
+
type: 'error',
|
|
180
|
+
error: {
|
|
181
|
+
type: 'dario_overage_guard',
|
|
182
|
+
message: `dario halted to prevent API-rate bleed. A request was classified ` +
|
|
183
|
+
`as 'overage' (per-token billing) instead of your subscription pool. ` +
|
|
184
|
+
`To resume: run \`dario resume\` in another terminal, or wait until ` +
|
|
185
|
+
`${isoCooldown} for the cooldown to auto-clear. ` +
|
|
186
|
+
`Details: github.com/askalf/dario/issues/288`,
|
|
187
|
+
},
|
|
188
|
+
};
|
|
189
|
+
}
|
package/dist/proxy.d.ts
CHANGED
|
@@ -162,6 +162,20 @@ interface ProxyOptions {
|
|
|
162
162
|
* Sourced from `--system-prompt=<value>` or DARIO_SYSTEM_PROMPT.
|
|
163
163
|
*/
|
|
164
164
|
systemPrompt?: string;
|
|
165
|
+
/**
|
|
166
|
+
* Overage-guard — halt the proxy on the first response carrying
|
|
167
|
+
* `representative-claim: overage`. Subscribers should never see a
|
|
168
|
+
* single overage hit during normal operation; one means something
|
|
169
|
+
* is wrong (wire-shape drift, classifier change, account misconfig)
|
|
170
|
+
* and continuing to forward bleeds against per-token billing.
|
|
171
|
+
*
|
|
172
|
+
* Default: enabled, halt behavior, 30-min cooldown, OS-notify on.
|
|
173
|
+
* See dario#288.
|
|
174
|
+
*/
|
|
175
|
+
overageGuardEnabled?: boolean;
|
|
176
|
+
overageGuardBehavior?: 'halt' | 'warn';
|
|
177
|
+
overageGuardCooldownMs?: number;
|
|
178
|
+
overageGuardNotifyOs?: boolean;
|
|
165
179
|
}
|
|
166
180
|
/**
|
|
167
181
|
* One JSON-ND record per completed request. Field set kept narrow to
|
package/dist/proxy.js
CHANGED
|
@@ -10,6 +10,8 @@ import { buildCCRequest, reverseMapResponse, createStreamingReverseMapper, order
|
|
|
10
10
|
import { describeTemplate, detectDrift, checkCCCompat } from './live-fingerprint.js';
|
|
11
11
|
import { AccountPool, computeStickyKey, parseRateLimits, modelFamily, isInAuthCooldown, authCooldownMs } from './pool.js';
|
|
12
12
|
import { Analytics, billingBucketFromClaim } from './analytics.js';
|
|
13
|
+
import { OverageGuard, buildHaltErrorBody } from './overage-guard.js';
|
|
14
|
+
import { notify as osNotify } from './notify.js';
|
|
13
15
|
import { loadAllAccounts, loadAccount, refreshAccountToken, resyncLoginFromCredentialsIfStale } from './accounts.js';
|
|
14
16
|
import { getOpenAIBackend, isOpenAIModel, forwardToOpenAI } from './openai-backend.js';
|
|
15
17
|
import { RequestQueue, QueueFullError, QueueTimeoutError, DEFAULT_MAX_CONCURRENT, DEFAULT_MAX_QUEUED, DEFAULT_QUEUE_TIMEOUT_MS } from './request-queue.js';
|
|
@@ -573,6 +575,31 @@ export async function startProxy(opts = {}) {
|
|
|
573
575
|
// endpoint, but burn-rate / per-request visibility is useful for
|
|
574
576
|
// single-account users too.
|
|
575
577
|
const analytics = new Analytics();
|
|
578
|
+
// Overage-guard (v4.1, dario#288). Resolved from opts with built-in
|
|
579
|
+
// defaults (enabled=true, behavior='halt', cooldown=30min, notifyOs=true)
|
|
580
|
+
// so an opts-less proxy still gets protection. The notifier is wired
|
|
581
|
+
// separately below once notify.ts is loaded.
|
|
582
|
+
const overageGuard = new OverageGuard({
|
|
583
|
+
enabled: opts.overageGuardEnabled ?? true,
|
|
584
|
+
behavior: opts.overageGuardBehavior ?? 'halt',
|
|
585
|
+
cooldownMs: opts.overageGuardCooldownMs ?? 30 * 60 * 1000,
|
|
586
|
+
notifyOs: opts.overageGuardNotifyOs ?? true,
|
|
587
|
+
notifier: osNotify,
|
|
588
|
+
});
|
|
589
|
+
overageGuard.attach(analytics);
|
|
590
|
+
// Surface halt + resume to the foreground startup banner so an
|
|
591
|
+
// operator running `dario proxy` directly sees the event even without
|
|
592
|
+
// a TUI attached. -v / --verbose is not required — this is loud by
|
|
593
|
+
// design.
|
|
594
|
+
overageGuard.on('halt', (state) => {
|
|
595
|
+
console.error(`[dario] OVERAGE-GUARD HALTED: ${state.request.model} on account=${state.request.account} returned representative-claim=overage at ${new Date(state.request.timestamp).toISOString()}. Returning 503 to new requests until \`dario resume\` or cooldown expires (${new Date(state.cooldownUntil).toISOString()}). See dario#288.`);
|
|
596
|
+
});
|
|
597
|
+
overageGuard.on('warn', (state) => {
|
|
598
|
+
console.error(`[dario] OVERAGE-GUARD WARN: ${state.request.model} on account=${state.request.account} returned representative-claim=overage at ${new Date(state.request.timestamp).toISOString()}. Behavior=warn — proxy continuing to forward; investigate before bill bleeds. See dario#288.`);
|
|
599
|
+
});
|
|
600
|
+
overageGuard.on('resume', (info) => {
|
|
601
|
+
console.error(`[dario] overage-guard resumed (${info.reason}). Normal request handling restored.`);
|
|
602
|
+
});
|
|
576
603
|
let status;
|
|
577
604
|
if (pool) {
|
|
578
605
|
for (const acc of accountsList) {
|
|
@@ -955,7 +982,16 @@ export async function startProxy(opts = {}) {
|
|
|
955
982
|
for (const past of analytics.recent(50)) {
|
|
956
983
|
res.write(`data: ${JSON.stringify(past)}\n\n`);
|
|
957
984
|
}
|
|
958
|
-
//
|
|
985
|
+
// Backlog the current halt state if any — a TUI attaching mid-halt
|
|
986
|
+
// needs to see the banner immediately without waiting for the
|
|
987
|
+
// next overage hit (which won't come, because the proxy is halted).
|
|
988
|
+
const haltedNow = overageGuard.state();
|
|
989
|
+
if (haltedNow) {
|
|
990
|
+
res.write(`event: overage_halt\ndata: ${JSON.stringify(haltedNow)}\n\n`);
|
|
991
|
+
}
|
|
992
|
+
// Live tail — request records on default 'message' event, halt /
|
|
993
|
+
// warn / resume on named events so the TUI can route on event type
|
|
994
|
+
// without changing the existing record shape.
|
|
959
995
|
const onRecord = (r) => {
|
|
960
996
|
// Use try/catch so a broken socket (peer hung up between events)
|
|
961
997
|
// doesn't crash the request hot-path — Analytics already wraps
|
|
@@ -965,7 +1001,28 @@ export async function startProxy(opts = {}) {
|
|
|
965
1001
|
}
|
|
966
1002
|
catch { /* ignored */ }
|
|
967
1003
|
};
|
|
1004
|
+
const onHalt = (state) => {
|
|
1005
|
+
try {
|
|
1006
|
+
res.write(`event: overage_halt\ndata: ${JSON.stringify(state)}\n\n`);
|
|
1007
|
+
}
|
|
1008
|
+
catch { /* ignored */ }
|
|
1009
|
+
};
|
|
1010
|
+
const onWarn = (state) => {
|
|
1011
|
+
try {
|
|
1012
|
+
res.write(`event: overage_warn\ndata: ${JSON.stringify(state)}\n\n`);
|
|
1013
|
+
}
|
|
1014
|
+
catch { /* ignored */ }
|
|
1015
|
+
};
|
|
1016
|
+
const onResume = (info) => {
|
|
1017
|
+
try {
|
|
1018
|
+
res.write(`event: overage_resume\ndata: ${JSON.stringify(info)}\n\n`);
|
|
1019
|
+
}
|
|
1020
|
+
catch { /* ignored */ }
|
|
1021
|
+
};
|
|
968
1022
|
analytics.on('record', onRecord);
|
|
1023
|
+
overageGuard.on('halt', onHalt);
|
|
1024
|
+
overageGuard.on('warn', onWarn);
|
|
1025
|
+
overageGuard.on('resume', onResume);
|
|
969
1026
|
// Heartbeat every 25s — SSE comments are ignored by clients but
|
|
970
1027
|
// keep middle-boxes (CDNs, dev-proxies) from closing the pipe.
|
|
971
1028
|
const heartbeat = setInterval(() => {
|
|
@@ -977,10 +1034,39 @@ export async function startProxy(opts = {}) {
|
|
|
977
1034
|
heartbeat.unref?.();
|
|
978
1035
|
req.on('close', () => {
|
|
979
1036
|
analytics.off('record', onRecord);
|
|
1037
|
+
overageGuard.off('halt', onHalt);
|
|
1038
|
+
overageGuard.off('warn', onWarn);
|
|
1039
|
+
overageGuard.off('resume', onResume);
|
|
980
1040
|
clearInterval(heartbeat);
|
|
981
1041
|
});
|
|
982
1042
|
return;
|
|
983
1043
|
}
|
|
1044
|
+
// POST /admin/resume — clear overage-guard halt state (v4.1, dario#288).
|
|
1045
|
+
// Idempotent: returns 200 with `wasHalted: false` if the proxy is
|
|
1046
|
+
// already running normally. Auth gating is the same as every other
|
|
1047
|
+
// endpoint (loopback-bind by default; DARIO_API_KEY needed for
|
|
1048
|
+
// non-loopback). GET returns the current state for read-only queries.
|
|
1049
|
+
if (urlPath === '/admin/resume' && req.method === 'GET') {
|
|
1050
|
+
const state = overageGuard.state();
|
|
1051
|
+
res.writeHead(200, { ...JSON_HEADERS, 'Access-Control-Allow-Origin': corsOrigin });
|
|
1052
|
+
res.end(JSON.stringify({
|
|
1053
|
+
halted: state !== null,
|
|
1054
|
+
state,
|
|
1055
|
+
config: overageGuard.config(),
|
|
1056
|
+
}));
|
|
1057
|
+
return;
|
|
1058
|
+
}
|
|
1059
|
+
if (urlPath === '/admin/resume' && req.method === 'POST') {
|
|
1060
|
+
const wasHalted = overageGuard.state() !== null;
|
|
1061
|
+
overageGuard.clear('manual');
|
|
1062
|
+
res.writeHead(200, { ...JSON_HEADERS, 'Access-Control-Allow-Origin': corsOrigin });
|
|
1063
|
+
res.end(JSON.stringify({
|
|
1064
|
+
ok: true,
|
|
1065
|
+
wasHalted,
|
|
1066
|
+
resumedAt: new Date().toISOString(),
|
|
1067
|
+
}));
|
|
1068
|
+
return;
|
|
1069
|
+
}
|
|
984
1070
|
if (urlPath === '/v1/models' && req.method === 'GET') {
|
|
985
1071
|
requestCount++;
|
|
986
1072
|
res.writeHead(200, { ...JSON_HEADERS, 'Access-Control-Allow-Origin': corsOrigin });
|
|
@@ -1006,6 +1092,25 @@ export async function startProxy(opts = {}) {
|
|
|
1006
1092
|
res.end(ERR_METHOD);
|
|
1007
1093
|
return;
|
|
1008
1094
|
}
|
|
1095
|
+
// Overage-guard halt check (v4.1, dario#288). Subscribers should never
|
|
1096
|
+
// see a single `representative-claim: overage` response during normal
|
|
1097
|
+
// operation; one means traffic is being reclassified to per-token
|
|
1098
|
+
// billing. Block upstream forwarding with a 503 + Anthropic-shaped
|
|
1099
|
+
// error body until the user runs `dario resume` or the cooldown
|
|
1100
|
+
// auto-expires. Health / status / analytics / admin endpoints above
|
|
1101
|
+
// bypass this check intentionally — the TUI needs them to surface
|
|
1102
|
+
// the halt and the user needs /admin/resume to clear it.
|
|
1103
|
+
if (overageGuard.isHalted()) {
|
|
1104
|
+
requestCount++;
|
|
1105
|
+
const state = overageGuard.state();
|
|
1106
|
+
writeLogLine(logFileStream, {
|
|
1107
|
+
ts: new Date().toISOString(), req: requestCount,
|
|
1108
|
+
method: req.method ?? '', path: urlPath, status: 503, reject: 'overage-halt',
|
|
1109
|
+
});
|
|
1110
|
+
res.writeHead(503, { ...JSON_HEADERS, 'Access-Control-Allow-Origin': corsOrigin });
|
|
1111
|
+
res.end(JSON.stringify(buildHaltErrorBody(state)));
|
|
1112
|
+
return;
|
|
1113
|
+
}
|
|
1009
1114
|
// Proxy to Anthropic (with concurrency control). The bounded queue
|
|
1010
1115
|
// replaces the v3.30.x-and-earlier unbounded semaphore — dario#80. A
|
|
1011
1116
|
// queue-full condition returns an explicit 429 with a `"queue-full"`
|
|
@@ -48,10 +48,53 @@ export declare class ProxyClient {
|
|
|
48
48
|
* Auto-reconnect is intentionally NOT included. The Hits tab decides
|
|
49
49
|
* when to retry (and how often) — pushing that policy into here would
|
|
50
50
|
* couple the client to UI semantics.
|
|
51
|
+
*
|
|
52
|
+
* v4.1 (dario#288): the proxy emits named events alongside the default
|
|
53
|
+
* 'message' event — `event: overage_halt`, `event: overage_warn`,
|
|
54
|
+
* `event: overage_resume`. The `eventType` passed to `onMessage` is
|
|
55
|
+
* the value of the `event:` line on the frame (or `'message'` for an
|
|
56
|
+
* unlabeled / default frame). Existing consumers that pass a
|
|
57
|
+
* single-arg callback continue to work unchanged.
|
|
58
|
+
*/
|
|
59
|
+
subscribeAnalyticsStream<T = unknown>(onMessage: (msg: T, eventType?: string) => void, onError?: (err: Error) => void): () => void;
|
|
60
|
+
/**
|
|
61
|
+
* Query the overage-guard state (v4.1, dario#288). Returns the current
|
|
62
|
+
* halt state + configuration. Returns null on any error so the Status
|
|
63
|
+
* tab can render "unknown" without crashing.
|
|
51
64
|
*/
|
|
52
|
-
|
|
65
|
+
getOverageGuard(): Promise<OverageGuardStatus | null>;
|
|
66
|
+
/**
|
|
67
|
+
* Clear the overage-guard halt state. POSTs /admin/resume. Returns the
|
|
68
|
+
* server's response (`wasHalted` indicates whether the call actually
|
|
69
|
+
* cleared a halt vs no-op'd on already-clear state).
|
|
70
|
+
*/
|
|
71
|
+
resume(): Promise<{
|
|
72
|
+
ok: boolean;
|
|
73
|
+
wasHalted: boolean;
|
|
74
|
+
resumedAt: string;
|
|
75
|
+
}>;
|
|
53
76
|
private headers;
|
|
54
77
|
}
|
|
78
|
+
export interface OverageGuardStatus {
|
|
79
|
+
halted: boolean;
|
|
80
|
+
state: {
|
|
81
|
+
since: number;
|
|
82
|
+
cooldownUntil: number;
|
|
83
|
+
reason: string;
|
|
84
|
+
request: {
|
|
85
|
+
timestamp: number;
|
|
86
|
+
model: string;
|
|
87
|
+
account: string;
|
|
88
|
+
claim: string;
|
|
89
|
+
};
|
|
90
|
+
} | null;
|
|
91
|
+
config: {
|
|
92
|
+
enabled: boolean;
|
|
93
|
+
behavior: 'halt' | 'warn';
|
|
94
|
+
cooldownMs: number;
|
|
95
|
+
notifyOs: boolean;
|
|
96
|
+
};
|
|
97
|
+
}
|
|
55
98
|
export interface HealthResponse {
|
|
56
99
|
status: string;
|
|
57
100
|
oauth: string;
|
package/dist/tui/proxy-client.js
CHANGED
|
@@ -86,6 +86,13 @@ export class ProxyClient {
|
|
|
86
86
|
* Auto-reconnect is intentionally NOT included. The Hits tab decides
|
|
87
87
|
* when to retry (and how often) — pushing that policy into here would
|
|
88
88
|
* couple the client to UI semantics.
|
|
89
|
+
*
|
|
90
|
+
* v4.1 (dario#288): the proxy emits named events alongside the default
|
|
91
|
+
* 'message' event — `event: overage_halt`, `event: overage_warn`,
|
|
92
|
+
* `event: overage_resume`. The `eventType` passed to `onMessage` is
|
|
93
|
+
* the value of the `event:` line on the frame (or `'message'` for an
|
|
94
|
+
* unlabeled / default frame). Existing consumers that pass a
|
|
95
|
+
* single-arg callback continue to work unchanged.
|
|
89
96
|
*/
|
|
90
97
|
subscribeAnalyticsStream(onMessage, onError) {
|
|
91
98
|
const url = new URL(this.baseUrl + '/analytics/stream');
|
|
@@ -121,15 +128,19 @@ export class ProxyClient {
|
|
|
121
128
|
while ((idx = buf.indexOf('\n\n')) >= 0) {
|
|
122
129
|
const frame = buf.slice(0, idx);
|
|
123
130
|
buf = buf.slice(idx + 2);
|
|
124
|
-
const
|
|
131
|
+
const lines = frame.split('\n');
|
|
132
|
+
const dataLines = lines
|
|
125
133
|
.filter(l => l.startsWith('data:'))
|
|
126
134
|
.map(l => l.slice(5).replace(/^ /, ''));
|
|
127
135
|
if (dataLines.length === 0)
|
|
128
136
|
continue;
|
|
137
|
+
// Pull the `event:` line if present. Default is 'message' per SSE spec.
|
|
138
|
+
const eventLine = lines.find(l => l.startsWith('event:'));
|
|
139
|
+
const eventType = eventLine ? eventLine.slice(6).trim() : 'message';
|
|
129
140
|
const payload = dataLines.join('\n');
|
|
130
141
|
try {
|
|
131
142
|
const parsed = JSON.parse(payload);
|
|
132
|
-
onMessage(parsed);
|
|
143
|
+
onMessage(parsed, eventType);
|
|
133
144
|
}
|
|
134
145
|
catch (e) {
|
|
135
146
|
onError?.(new Error(`SSE parse: ${e.message}`));
|
|
@@ -157,6 +168,59 @@ export class ProxyClient {
|
|
|
157
168
|
catch { /* ignored */ }
|
|
158
169
|
};
|
|
159
170
|
}
|
|
171
|
+
/**
|
|
172
|
+
* Query the overage-guard state (v4.1, dario#288). Returns the current
|
|
173
|
+
* halt state + configuration. Returns null on any error so the Status
|
|
174
|
+
* tab can render "unknown" without crashing.
|
|
175
|
+
*/
|
|
176
|
+
async getOverageGuard() {
|
|
177
|
+
try {
|
|
178
|
+
return await this.getJson('/admin/resume');
|
|
179
|
+
}
|
|
180
|
+
catch {
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Clear the overage-guard halt state. POSTs /admin/resume. Returns the
|
|
186
|
+
* server's response (`wasHalted` indicates whether the call actually
|
|
187
|
+
* cleared a halt vs no-op'd on already-clear state).
|
|
188
|
+
*/
|
|
189
|
+
async resume() {
|
|
190
|
+
const url = new URL(this.baseUrl + '/admin/resume');
|
|
191
|
+
return new Promise((resolve, reject) => {
|
|
192
|
+
const req = httpRequest({
|
|
193
|
+
hostname: url.hostname,
|
|
194
|
+
port: url.port || 80,
|
|
195
|
+
path: url.pathname,
|
|
196
|
+
method: 'POST',
|
|
197
|
+
headers: { ...this.headers(), 'Content-Type': 'application/json', 'Content-Length': '2' },
|
|
198
|
+
}, (res) => {
|
|
199
|
+
const chunks = [];
|
|
200
|
+
res.on('data', (c) => chunks.push(c));
|
|
201
|
+
res.on('end', () => {
|
|
202
|
+
const body = Buffer.concat(chunks).toString('utf-8');
|
|
203
|
+
if (!res.statusCode || res.statusCode < 200 || res.statusCode >= 300) {
|
|
204
|
+
reject(new Error(`HTTP ${res.statusCode}: ${body.slice(0, 200)}`));
|
|
205
|
+
return;
|
|
206
|
+
}
|
|
207
|
+
try {
|
|
208
|
+
resolve(JSON.parse(body));
|
|
209
|
+
}
|
|
210
|
+
catch (e) {
|
|
211
|
+
reject(new Error(`JSON parse: ${e.message}`));
|
|
212
|
+
}
|
|
213
|
+
});
|
|
214
|
+
res.on('error', reject);
|
|
215
|
+
});
|
|
216
|
+
req.on('error', reject);
|
|
217
|
+
req.setTimeout(this.timeoutMs, () => {
|
|
218
|
+
req.destroy(new Error(`timeout after ${this.timeoutMs}ms`));
|
|
219
|
+
});
|
|
220
|
+
req.write('{}');
|
|
221
|
+
req.end();
|
|
222
|
+
});
|
|
223
|
+
}
|
|
160
224
|
headers() {
|
|
161
225
|
const h = {};
|
|
162
226
|
if (this.apiKey)
|
|
@@ -102,6 +102,19 @@ export const AnalyticsTab = {
|
|
|
102
102
|
lines.push(' ' + pad('7d', 6) +
|
|
103
103
|
fg('cyan', progressBar(s.utilization.lastUtil7d, barWidth)) +
|
|
104
104
|
' ' + dim(`${(s.utilization.lastUtil7d * 100).toFixed(0)}%`));
|
|
105
|
+
// Overage bucket (v4.1, dario#288). Count of requests that landed in
|
|
106
|
+
// the overage bucket within the rolling window. Empty bar in normal
|
|
107
|
+
// operation; non-zero count renders in red. Hard zero IS the success
|
|
108
|
+
// signal here — anything else is "investigate immediately."
|
|
109
|
+
const overageCount = s.window.billingBucketBreakdown?.extra_usage ?? 0;
|
|
110
|
+
const totalCount = Object.values(s.window.billingBucketBreakdown ?? {}).reduce((a, b) => a + b, 0);
|
|
111
|
+
const overageFrac = totalCount > 0 ? overageCount / totalCount : 0;
|
|
112
|
+
const overageColor = overageCount > 0 ? 'red' : 'cyan';
|
|
113
|
+
lines.push(' ' + pad('Overage', 6) +
|
|
114
|
+
fg(overageColor, progressBar(overageFrac, barWidth)) +
|
|
115
|
+
' ' + (overageCount > 0
|
|
116
|
+
? fg('red', `${overageCount} req`) + dim(` of ${totalCount}`)
|
|
117
|
+
: dim('0 ← clean')));
|
|
105
118
|
// ── Billing buckets ───────────────────────────────────────
|
|
106
119
|
const buckets = s.window.billingBucketBreakdown;
|
|
107
120
|
const totalBucketCount = Object.values(buckets).reduce((a, b) => a + b, 0);
|
package/dist/tui/tabs/config.js
CHANGED
|
@@ -38,6 +38,11 @@ const FIELDS = [
|
|
|
38
38
|
{ path: 'thinkTime.maxMs', label: 'Think-time cap (ms)', type: 'number', hint: 'upper bound for the whole formula' },
|
|
39
39
|
{ path: 'sessionStart.minMs', label: 'Session-start min', type: 'number', hint: 'first-request delay floor' },
|
|
40
40
|
{ path: 'sessionStart.jitterMs', label: 'Session-start jitter', type: 'number' },
|
|
41
|
+
// ── Overage-guard (v4.1, dario#288) ─────────────────────────
|
|
42
|
+
{ path: 'overageGuard.enabled', label: 'Overage-guard', type: 'bool', hint: 'halt proxy on any representative-claim=overage' },
|
|
43
|
+
{ path: 'overageGuard.behavior', label: 'Overage behavior', type: 'string', hint: '"halt" (default) or "warn"' },
|
|
44
|
+
{ path: 'overageGuard.cooldownMs', label: 'Overage cooldown (ms)', type: 'number', hint: 'auto-resume delay; default 1800000 (30 min)' },
|
|
45
|
+
{ path: 'overageGuard.notifyOs', label: 'Overage OS-notify', type: 'bool', hint: 'native desktop notification on halt' },
|
|
41
46
|
];
|
|
42
47
|
export const ConfigTab = {
|
|
43
48
|
id: 'config',
|
|
@@ -212,15 +217,45 @@ function commitEdit(state) {
|
|
|
212
217
|
if (!Number.isFinite(n)) {
|
|
213
218
|
return { ...state, editBuffer: null, statusMessage: `Not a number: "${state.editBuffer}"`, statusKind: 'error' };
|
|
214
219
|
}
|
|
220
|
+
// Path-specific guards. cooldownMs must be non-negative — silently
|
|
221
|
+
// dropping a bad value on next config-file load is correct but lets
|
|
222
|
+
// the user save an invalid file. Surface immediately. (v4.1.1)
|
|
223
|
+
if (f.path === 'overageGuard.cooldownMs' && n < 0) {
|
|
224
|
+
return { ...state, editBuffer: null, statusMessage: `overageGuard.cooldownMs must be >= 0 (got ${n})`, statusKind: 'error' };
|
|
225
|
+
}
|
|
215
226
|
parsed = n;
|
|
216
227
|
}
|
|
217
228
|
}
|
|
229
|
+
else if (f.type === 'string') {
|
|
230
|
+
// String enums: validate so we reject bad input at commit time rather
|
|
231
|
+
// than let the proxy's sanitize() silently drop it on next load. v4.1.1
|
|
232
|
+
// adds the overageGuard.behavior enum; future enums register here.
|
|
233
|
+
const enumValues = STRING_ENUMS[f.path];
|
|
234
|
+
if (enumValues && !enumValues.includes(state.editBuffer)) {
|
|
235
|
+
return {
|
|
236
|
+
...state,
|
|
237
|
+
editBuffer: null,
|
|
238
|
+
statusMessage: `${f.label} must be one of: ${enumValues.join(', ')} (got "${state.editBuffer}")`,
|
|
239
|
+
statusKind: 'error',
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
parsed = state.editBuffer;
|
|
243
|
+
}
|
|
218
244
|
else {
|
|
219
245
|
parsed = state.editBuffer;
|
|
220
246
|
}
|
|
221
247
|
const next = setByPath(state.config, f.path, parsed);
|
|
222
248
|
return { ...state, config: next, editBuffer: null, statusMessage: `Updated ${f.label}.`, statusKind: 'success' };
|
|
223
249
|
}
|
|
250
|
+
/**
|
|
251
|
+
* Allowed values for string-enum fields. Keyed by FIELDS path. Anything
|
|
252
|
+
* absent here is treated as free-text (no enum validation). v4.1.1+ —
|
|
253
|
+
* additive: registering a new entry forces enum validation on the next
|
|
254
|
+
* commit without touching the rest of the editor.
|
|
255
|
+
*/
|
|
256
|
+
const STRING_ENUMS = {
|
|
257
|
+
'overageGuard.behavior': ['halt', 'warn'],
|
|
258
|
+
};
|
|
224
259
|
function doSave(state) {
|
|
225
260
|
try {
|
|
226
261
|
saveConfig(undefined, { ...state.config, version: CONFIG_SCHEMA_VERSION });
|
package/dist/tui/tabs/hits.d.ts
CHANGED
|
@@ -25,10 +25,24 @@
|
|
|
25
25
|
*/
|
|
26
26
|
import type { Tab } from '../tab.js';
|
|
27
27
|
import type { RequestRecord } from '../../analytics.js';
|
|
28
|
+
/** Live overage-halt state — populated from SSE event:overage_halt frames. */
|
|
29
|
+
interface HitsHaltState {
|
|
30
|
+
since: number;
|
|
31
|
+
cooldownUntil: number;
|
|
32
|
+
request: {
|
|
33
|
+
timestamp: number;
|
|
34
|
+
model: string;
|
|
35
|
+
account: string;
|
|
36
|
+
claim: string;
|
|
37
|
+
};
|
|
38
|
+
}
|
|
28
39
|
export interface HitsState {
|
|
29
40
|
buffer: RequestRecord[];
|
|
30
41
|
selectedIdx: number;
|
|
31
42
|
subscribed: boolean;
|
|
32
43
|
connectionError: string | null;
|
|
44
|
+
/** Overage-guard halt banner (v4.1, dario#288). Null when running normally. */
|
|
45
|
+
halt: HitsHaltState | null;
|
|
33
46
|
}
|
|
34
47
|
export declare const HitsTab: Tab<HitsState>;
|
|
48
|
+
export {};
|