free-coding-models 0.3.55 → 0.3.57
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +55 -56
- package/README.md +214 -160
- package/bin/free-coding-models.js +46 -0
- package/package.json +2 -2
- package/sources.js +134 -310
- package/src/analysis.js +23 -10
- package/src/app.js +66 -27
- package/src/cache.js +1 -1
- package/src/cli-help.js +9 -0
- package/src/command-palette.js +15 -13
- package/src/config.js +201 -35
- package/src/constants.js +4 -4
- package/src/endpoint-installer.js +45 -1
- package/src/favorites.js +22 -0
- package/src/graphify-out/cache/089db1c1def873cf6d112f1590da4490e61e691aff0db41e006aa2fb15ba0656.json +1 -0
- package/src/graphify-out/cache/0b510b53cf1a1393fb52b1fc3bbbf88b63938e961ec5b82119a2e9715fee8bd7.json +1 -0
- package/src/graphify-out/cache/0ec9a95a326bde58e0316889018b278062d06d494d0f31ba177c9de71e5fed2d.json +1 -0
- package/src/graphify-out/cache/1548663a24a68dce740ebab1bd1d3091048c9604e9d067a1650a42a6d82541d4.json +1 -0
- package/src/graphify-out/cache/1783af63cb6d0dfb4d469009f71ac83a74ba0b33d48186ff2c6e63f9429e900a.json +1 -0
- package/src/graphify-out/cache/1e109f5eb5dc4fd285871c3613e32b6b14a8c225f4080ee34b51c7e1a1764571.json +1 -0
- package/src/graphify-out/cache/1eb24dbeb69b46c8bc1caf925df2f2a964af0f33aea143adf8ddf88e017db6ca.json +1 -0
- package/src/graphify-out/cache/21e1bcfed11685e8347243f9d8516072dda183266a4bfe22c52fb31753a446c8.json +1 -0
- package/src/graphify-out/cache/2327473478b9c4b1940bf7ef66c9ee960b3cba8d5302e56b625df8274246e0b4.json +1 -0
- package/src/graphify-out/cache/25955b81fd25454c8fa90fb71a47db8d1215cf621beb8ff3cbd580aaf011b4f3.json +1 -0
- package/src/graphify-out/cache/2739677f19c702f88f3de0a0bac475066adbda98709907ad3de967aef689f86d.json +1 -0
- package/src/graphify-out/cache/2bba03422f6b3ee7f5b5d29cc90314a064d259e5822a176657bda3e04505cf00.json +1 -0
- package/src/graphify-out/cache/2ddf1d2c6d10147b0402446bc71a7988187b79b6210dd7e7250be8c555b9ff35.json +1 -0
- package/src/graphify-out/cache/2ee07457a5767c95a57f8e9eb95b28f800044f35666e0715e9d88ad1103a092e.json +1 -0
- package/src/graphify-out/cache/2fe9f75dc2951c417f2c8dd22749092cf550dc67599f1c8d1866900dc6e9154e.json +1 -0
- package/src/graphify-out/cache/41c4b7c27e7fc3e2948d3a4bf95a72de2ed9a6f0463994babdce8ed2cc84598c.json +1 -0
- package/src/graphify-out/cache/5028defd54b7fbd3c7e444973e493de036e097e9b1d2a7cae7f19b88d68aacde.json +1 -0
- package/src/graphify-out/cache/5b133aba3fb16410c5b1fdbd1730039fc7fa1ac93abd99d7be08f60da70fc8d4.json +1 -0
- package/src/graphify-out/cache/74252e5b0978d85ab3421a3de1a9384aa282ffd2be2cfe7db2530139089f4275.json +1 -0
- package/src/graphify-out/cache/7695ebeea056095edd14332963cc43354ef3a097caf46f1e28d0f01369642901.json +1 -0
- package/src/graphify-out/cache/777aa7085c395a935c6556bbde182cd871edb61f3a685ed8068ec0c8f6fb0075.json +1 -0
- package/src/graphify-out/cache/82a723881980e82273c113def8315533d7da28827e300413d9ad30f27b7407df.json +1 -0
- package/src/graphify-out/cache/86b87c9603e6cd188f42c7eed3b86c291d48a781c223a707e74f3e7ed0c02a21.json +1 -0
- package/src/graphify-out/cache/890fead9a78cadaed560a2d2453916121fa605c3e43a334910ac4bc951a9ef6d.json +1 -0
- package/src/graphify-out/cache/89d3ea66f52783caa775ef9a30923d7d6225e1d8ae9e962f4741b8c7785dab1e.json +1 -0
- package/src/graphify-out/cache/8cc82cd9edce41f0e1c092f14a94fd52bf847addf3237b616dc5a9e505bd05bd.json +1 -0
- package/src/graphify-out/cache/93ba2e25e3ff7ad525f397902345fbd375df7315de7b402e20cc803c14eccde8.json +1 -0
- package/src/graphify-out/cache/99beed29580b9c7bfecfee794cb3d8e535fcf0eb3b92113108f88bdd0a8e79b3.json +1 -0
- package/src/graphify-out/cache/aeeb931fa477c65ce2e51d8149957350fa54225c613222bbbe8448998d1afd3d.json +1 -0
- package/src/graphify-out/cache/baf91bef5b5ecb2a476433b6cc0c48c563c54ee2d07fc3c192e543685e3e7222.json +1 -0
- package/src/graphify-out/cache/bd98b94ac4e9b92b6336d47b26e0366b51a4eaf0711d722f05f98dfae23ab42b.json +1 -0
- package/src/graphify-out/cache/bfcb51e9328e9cbfbee4f6fee0f56635d7b03488addc9f6c4e4b190b70a73362.json +1 -0
- package/src/graphify-out/cache/c0d3dabeb093aa758c49eadf41b87ecc96a16c1449c2670aaf48cbfc891d8da6.json +1 -0
- package/src/graphify-out/cache/c20d6630236f473c1406068c3ae205853e649b216495c93dfec055dd222c55cf.json +1 -0
- package/src/graphify-out/cache/c22b9122816bebce0a2f79af41a986559d01e00163dbcd579c5755621b4cb483.json +1 -0
- package/src/graphify-out/cache/ca556ec14453ddb8f9e0c5a832dac90d77111b9bad5f8c2d80d272e2e7a06371.json +1 -0
- package/src/graphify-out/cache/d6dbc9135dfa35a756b3b09b06700e4bc229fdccba11bb963f2ba44028e0bbae.json +1 -0
- package/src/graphify-out/cache/e1cf71276f1779d0fa075f79bd7c8a9fd0b8eef6932ac043137451b7c7fa7cbe.json +1 -0
- package/src/graphify-out/cache/e4b3be14494467df2d2ed389bc4f18f099021cb5bc355b901fa88387b2d8b8a2.json +1 -0
- package/src/graphify-out/cache/eaea0dded097f6f9553b654220046c6ec0c9be592a5973d906564ee60af34e0d.json +1 -0
- package/src/graphify-out/cache/ef07d0cd2675d1f79d2a2fdbf3bc3319687638751e9ce89b0d0d97ed1cd9f7e1.json +1 -0
- package/src/graphify-out/cache/f81272d6eb8aaff9e96d5a1d9f06777db70ac3652a646b951ded51f79871d733.json +1 -0
- package/src/graphify-out/cache/f9619dd92186f75a6dbda937e0c606647153918524cdb5763f956e6ec2a9e386.json +1 -0
- package/src/graphify-out/cache/fd88b1b2ff4bfcae08559d9c2aaeeb9a3f1e2f5cd8928762c311196956c170a5.json +1 -0
- package/src/key-handler.js +322 -114
- package/src/kilo.js +20 -1
- package/src/opencode.js +23 -2
- package/src/overlays.js +199 -98
- package/src/provider-metadata.js +26 -17
- package/src/quota-capabilities.js +6 -10
- package/src/render-helpers.js +38 -8
- package/src/render-table.js +119 -248
- package/src/router-daemon.js +1986 -0
- package/src/router-dashboard.js +902 -0
- package/src/sync-set.js +479 -0
- package/src/theme.js +4 -0
- package/src/tool-launchers.js +1 -0
- package/src/tool-metadata.js +6 -2
- package/src/utils.js +30 -6
- package/web/dist/assets/{index-C03JjCgA.js → index-DKHCzbK1.js} +2 -2
- package/web/dist/index.html +1 -1
|
@@ -0,0 +1,1986 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file router-daemon.js
|
|
3
|
+
* @description Smart Model Router daemon for local OpenAI-compatible failover routing.
|
|
4
|
+
*
|
|
5
|
+
* @details
|
|
6
|
+
* 📖 The router daemon is the persistent part of FCM: coding tools point at
|
|
7
|
+
* `http://localhost:19280/v1`, send `model: "fcm"`, and this server forwards
|
|
8
|
+
* the request to the healthiest configured provider/model in the active set.
|
|
9
|
+
*
|
|
10
|
+
* 📖 It deliberately uses only Node built-ins and the existing provider catalog
|
|
11
|
+
* so the npm package keeps its tiny dependency surface. The daemon stores only
|
|
12
|
+
* metadata (latency, status, token counts); request and response bodies are
|
|
13
|
+
* never written to logs or telemetry.
|
|
14
|
+
*
|
|
15
|
+
* @functions
|
|
16
|
+
* → runRouterDaemon() — Start the foreground daemon HTTP server
|
|
17
|
+
* → startRouterDaemonBackground() — Spawn the daemon detached from the TUI
|
|
18
|
+
* → stopRouterDaemon() — Send SIGTERM to the recorded daemon process
|
|
19
|
+
* → getRouterDaemonStatus() — Discover and read `/health` from a running daemon
|
|
20
|
+
* → buildDefaultRouterSet() — Create the first priority-ordered model set
|
|
21
|
+
* → formatOpenAiError() — Build OpenAI-compatible error response payloads
|
|
22
|
+
* → createRouterRuntimeForTest() — Build an isolated runtime for mock-upstream tests
|
|
23
|
+
*
|
|
24
|
+
* @exports runRouterDaemon, startRouterDaemonBackground, stopRouterDaemon
|
|
25
|
+
* @exports getRouterDaemonStatus, buildDefaultRouterSet, formatOpenAiError
|
|
26
|
+
* @exports createRouterRuntimeForTest
|
|
27
|
+
*
|
|
28
|
+
* @see ./config.js — router config is persisted under `router`
|
|
29
|
+
* @see ../sources.js — provider URLs and model IDs are resolved from the catalog
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
import { createServer } from 'node:http'
|
|
33
|
+
import { fork } from 'node:child_process'
|
|
34
|
+
import { randomUUID } from 'node:crypto'
|
|
35
|
+
import { appendFileSync, existsSync, readFileSync, renameSync, statSync, unlinkSync, writeFileSync } from 'node:fs'
|
|
36
|
+
import { homedir } from 'node:os'
|
|
37
|
+
import { dirname, join } from 'node:path'
|
|
38
|
+
import { fileURLToPath } from 'node:url'
|
|
39
|
+
import { sources } from '../sources.js'
|
|
40
|
+
import {
|
|
41
|
+
CONFIG_PATH,
|
|
42
|
+
DEFAULT_ROUTER_SETTINGS,
|
|
43
|
+
getApiKey,
|
|
44
|
+
loadConfig,
|
|
45
|
+
normalizeRouterConfig,
|
|
46
|
+
saveConfig,
|
|
47
|
+
} from './config.js'
|
|
48
|
+
import { resolveCloudflareUrl } from './ping.js'
|
|
49
|
+
import { sendUsageTelemetry } from './telemetry.js'
|
|
50
|
+
|
|
51
|
+
export const ROUTER_DEFAULT_PORT = 19280
|
|
52
|
+
export const ROUTER_MAX_PORT = 19289
|
|
53
|
+
export const ROUTER_DEFAULT_PORT_DEV = 29280
|
|
54
|
+
export const ROUTER_MAX_PORT_DEV = 29289
|
|
55
|
+
|
|
56
|
+
// 📖 Dev mode uses -dev suffixed files so the local dev daemon never clashes
|
|
57
|
+
// 📖 with a production install running on the same machine.
|
|
58
|
+
const _dev = typeof process.env.FCM_DEV !== 'undefined' ? !!process.env.FCM_DEV : false
|
|
59
|
+
export const ROUTER_PID_PATH = join(homedir(), `.free-coding-models-daemon${_dev ? '-dev' : ''}.pid`)
|
|
60
|
+
export const ROUTER_PORT_PATH = join(homedir(), `.free-coding-models-daemon${_dev ? '-dev' : ''}.port`)
|
|
61
|
+
export const ROUTER_LOG_PATH = join(homedir(), `.free-coding-models-daemon${_dev ? '-dev' : ''}.log`)
|
|
62
|
+
export const ROUTER_TOKENS_PATH = join(homedir(), `.free-coding-models-tokens${_dev ? '-dev' : ''}.json`)
|
|
63
|
+
|
|
64
|
+
// 📖 Returns effective port range for current mode (dev vs production)
|
|
65
|
+
export function getRouterPortRange() {
|
|
66
|
+
return _dev
|
|
67
|
+
? { defaultPort: ROUTER_DEFAULT_PORT_DEV, maxPort: ROUTER_MAX_PORT_DEV }
|
|
68
|
+
: { defaultPort: ROUTER_DEFAULT_PORT, maxPort: ROUTER_MAX_PORT }
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const __dirname = dirname(fileURLToPath(import.meta.url))
|
|
72
|
+
const CLI_ENTRY_PATH = join(__dirname, '..', 'bin', 'free-coding-models.js')
|
|
73
|
+
const MAX_BODY_BYTES = 10 * 1024 * 1024
|
|
74
|
+
const MAX_REQUEST_LOG = 200
|
|
75
|
+
const MAX_SSE_CLIENTS = 10
|
|
76
|
+
const MAX_CONCURRENT_REQUESTS = 50
|
|
77
|
+
const MAX_PROBE_WINDOW = 20
|
|
78
|
+
const TOKEN_FLUSH_INTERVAL_MS = 60000
|
|
79
|
+
const CONFIG_RELOAD_INTERVAL_MS = 60000
|
|
80
|
+
const STATS_RETENTION_DAYS = 90
|
|
81
|
+
const TIER_ORDER = ['S+', 'S', 'A+', 'A', 'A-', 'B+', 'B', 'C']
|
|
82
|
+
const RETRYABLE_STATUS_CODES = new Set([429, 500, 502, 503])
|
|
83
|
+
const AUTH_STATUS_CODES = new Set([401, 403])
|
|
84
|
+
const RATE_LIMIT_HEADER_NAMES = [
|
|
85
|
+
'retry-after',
|
|
86
|
+
'x-ratelimit-limit',
|
|
87
|
+
'x-ratelimit-remaining',
|
|
88
|
+
'x-ratelimit-reset',
|
|
89
|
+
'ratelimit-limit',
|
|
90
|
+
'ratelimit-remaining',
|
|
91
|
+
'ratelimit-reset',
|
|
92
|
+
'x-ratelimit-limit-requests',
|
|
93
|
+
'x-ratelimit-remaining-requests',
|
|
94
|
+
'x-ratelimit-reset-requests',
|
|
95
|
+
'x-ratelimit-limit-tokens',
|
|
96
|
+
'x-ratelimit-remaining-tokens',
|
|
97
|
+
'x-ratelimit-reset-tokens',
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
function nowIso() {
|
|
101
|
+
return new Date().toISOString()
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function modelKey(provider, model) {
|
|
105
|
+
return `${provider}/${model}`
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function safeJsonParse(raw, fallback = null) {
|
|
109
|
+
try {
|
|
110
|
+
return JSON.parse(raw)
|
|
111
|
+
} catch {
|
|
112
|
+
return fallback
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function parseJsonResult(raw) {
|
|
117
|
+
try {
|
|
118
|
+
return { ok: true, value: JSON.parse(raw) }
|
|
119
|
+
} catch (error) {
|
|
120
|
+
return { ok: false, error }
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function atomicWriteJson(path, data, mode = 0o600) {
|
|
125
|
+
const tempPath = `${path}.tmp-${process.pid}-${Date.now()}`
|
|
126
|
+
writeFileSync(tempPath, JSON.stringify(data, null, 2), { mode })
|
|
127
|
+
renameSync(tempPath, path)
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function sleep(ms) {
|
|
131
|
+
return new Promise((resolve) => setTimeout(resolve, ms))
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function isProcessAlive(pid) {
|
|
135
|
+
if (!Number.isInteger(pid) || pid <= 0) return false
|
|
136
|
+
try {
|
|
137
|
+
process.kill(pid, 0)
|
|
138
|
+
return true
|
|
139
|
+
} catch {
|
|
140
|
+
return false
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function readNumberFile(path) {
|
|
145
|
+
try {
|
|
146
|
+
const value = Number.parseInt(readFileSync(path, 'utf8').trim(), 10)
|
|
147
|
+
return Number.isFinite(value) ? value : null
|
|
148
|
+
} catch {
|
|
149
|
+
return null
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function headerEntries(headers) {
|
|
154
|
+
const entries = {}
|
|
155
|
+
if (!headers || typeof headers.forEach !== 'function') return entries
|
|
156
|
+
headers.forEach((value, key) => {
|
|
157
|
+
const lower = key.toLowerCase()
|
|
158
|
+
if (['connection', 'content-encoding', 'content-length', 'transfer-encoding'].includes(lower)) return
|
|
159
|
+
entries[lower] = value
|
|
160
|
+
})
|
|
161
|
+
return entries
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function getHeaderValue(headers, name) {
|
|
165
|
+
if (!headers || typeof headers.get !== 'function') return ''
|
|
166
|
+
return headers.get(name) || ''
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function extractRateLimitHeaders(headers) {
|
|
170
|
+
const values = {}
|
|
171
|
+
for (const name of RATE_LIMIT_HEADER_NAMES) {
|
|
172
|
+
const value = getHeaderValue(headers, name)
|
|
173
|
+
if (value) values[name] = value
|
|
174
|
+
}
|
|
175
|
+
return values
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function parseRetryAfterMs(value) {
|
|
179
|
+
if (!value) return null
|
|
180
|
+
const seconds = Number(value)
|
|
181
|
+
if (Number.isFinite(seconds)) return Math.max(0, Math.round(seconds * 1000))
|
|
182
|
+
const dateMs = Date.parse(value)
|
|
183
|
+
if (Number.isFinite(dateMs)) return Math.max(0, dateMs - Date.now())
|
|
184
|
+
return null
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function hasZeroRemainingQuota(rateLimitHeaders) {
|
|
188
|
+
return Object.entries(rateLimitHeaders).some(([name, value]) => {
|
|
189
|
+
if (!name.includes('remaining')) return false
|
|
190
|
+
const numeric = Number(value)
|
|
191
|
+
return Number.isFinite(numeric) && numeric <= 0
|
|
192
|
+
})
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function isLikelyHtmlText(text) {
|
|
196
|
+
return /^\s*(<!doctype\s+html|<html[\s>]|<head[\s>]|<body[\s>])/i.test(text || '')
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function isLikelyHtmlResponse(headers, text = '') {
|
|
200
|
+
const contentType = getHeaderValue(headers, 'content-type').toLowerCase()
|
|
201
|
+
return contentType.includes('text/html') || isLikelyHtmlText(text)
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function buildUpstreamMeta(response, text = '') {
|
|
205
|
+
// 📖 Keep quota diagnostics structural only: headers and retry timing are safe,
|
|
206
|
+
// 📖 while upstream response bodies stay out of logs and telemetry.
|
|
207
|
+
const rateLimitHeaders = extractRateLimitHeaders(response.headers)
|
|
208
|
+
const retryAfterMs = parseRetryAfterMs(rateLimitHeaders['retry-after'])
|
|
209
|
+
const quotaExhausted = response.status === 429
|
|
210
|
+
|| hasZeroRemainingQuota(rateLimitHeaders)
|
|
211
|
+
|| /\b(quota|rate[_ -]?limit|too many requests)\b/i.test(text || '')
|
|
212
|
+
return {
|
|
213
|
+
retryAfterMs,
|
|
214
|
+
rateLimitHeaders,
|
|
215
|
+
quotaExhausted,
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function attachClientAbort(req, res, controller) {
|
|
220
|
+
let clientAborted = false
|
|
221
|
+
const abort = () => {
|
|
222
|
+
if (res.writableEnded) return
|
|
223
|
+
// 📖 If the coding tool disconnects, stop spending provider quota
|
|
224
|
+
// 📖 immediately and do not mark the upstream model unhealthy.
|
|
225
|
+
clientAborted = true
|
|
226
|
+
try {
|
|
227
|
+
controller.abort(new Error('client_disconnected'))
|
|
228
|
+
} catch {
|
|
229
|
+
controller.abort()
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
req.on('aborted', abort)
|
|
233
|
+
res.on('close', abort)
|
|
234
|
+
return {
|
|
235
|
+
get aborted() {
|
|
236
|
+
return clientAborted
|
|
237
|
+
},
|
|
238
|
+
dispose() {
|
|
239
|
+
req.off('aborted', abort)
|
|
240
|
+
res.off('close', abort)
|
|
241
|
+
},
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
export function cloneHeadersForUpstream(reqHeaders, apiKey, providerKey) {
|
|
246
|
+
const headers = {}
|
|
247
|
+
for (const [key, value] of Object.entries(reqHeaders || {})) {
|
|
248
|
+
const lower = key.toLowerCase()
|
|
249
|
+
if (['host', 'connection', 'content-length', 'authorization'].includes(lower)) continue
|
|
250
|
+
if (typeof value !== 'string') continue
|
|
251
|
+
if (lower === 'content-type') {
|
|
252
|
+
headers['Content-Type'] = value
|
|
253
|
+
continue
|
|
254
|
+
}
|
|
255
|
+
headers[key] = value
|
|
256
|
+
}
|
|
257
|
+
headers['Content-Type'] = headers['Content-Type'] || 'application/json'
|
|
258
|
+
headers.Authorization = `Bearer ${apiKey}`
|
|
259
|
+
if (providerKey === 'openrouter') {
|
|
260
|
+
headers['HTTP-Referer'] = 'https://github.com/vava-nessa/free-coding-models'
|
|
261
|
+
headers['X-Title'] = 'free-coding-models'
|
|
262
|
+
}
|
|
263
|
+
return headers
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
function getApiModelId(providerKey, modelId) {
|
|
267
|
+
return providerKey === 'zai' ? modelId.replace(/^zai\//, '') : modelId
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
function isRouteableProvider(providerKey) {
|
|
271
|
+
const source = sources[providerKey]
|
|
272
|
+
return Boolean(source?.url && !source.cliOnly && source.url.includes('/chat/completions'))
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function resolveProviderUrl(providerKey) {
|
|
276
|
+
const url = sources[providerKey]?.url
|
|
277
|
+
if (!url) return null
|
|
278
|
+
return providerKey === 'cloudflare' ? resolveCloudflareUrl(url) : url
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function buildProviderModelsUrl(providerKey) {
|
|
282
|
+
const url = resolveProviderUrl(providerKey)
|
|
283
|
+
if (typeof url !== 'string' || !url.includes('/chat/completions')) return null
|
|
284
|
+
return url.replace(/\/chat\/completions$/, '/models')
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function extractUsage(payload) {
|
|
288
|
+
const usage = payload?.usage
|
|
289
|
+
if (!usage || typeof usage !== 'object') return null
|
|
290
|
+
const promptTokens = Number(usage.prompt_tokens ?? 0)
|
|
291
|
+
const completionTokens = Number(usage.completion_tokens ?? 0)
|
|
292
|
+
const totalTokens = Number(usage.total_tokens ?? promptTokens + completionTokens)
|
|
293
|
+
if (![promptTokens, completionTokens, totalTokens].every(Number.isFinite)) return null
|
|
294
|
+
if (promptTokens <= 0 && completionTokens <= 0 && totalTokens <= 0) return null
|
|
295
|
+
return {
|
|
296
|
+
prompt_tokens: Math.max(0, Math.round(promptTokens)),
|
|
297
|
+
completion_tokens: Math.max(0, Math.round(completionTokens)),
|
|
298
|
+
total_tokens: Math.max(0, Math.round(totalTokens)),
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
export function formatOpenAiError(message, type, code, requestId, extra = {}) {
|
|
303
|
+
return {
|
|
304
|
+
error: {
|
|
305
|
+
message,
|
|
306
|
+
type,
|
|
307
|
+
code,
|
|
308
|
+
request_id: requestId,
|
|
309
|
+
...extra,
|
|
310
|
+
},
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
function sendJson(res, statusCode, payload, headers = {}) {
|
|
315
|
+
if (res.writableEnded) return
|
|
316
|
+
const body = JSON.stringify(payload)
|
|
317
|
+
res.writeHead(statusCode, {
|
|
318
|
+
'Content-Type': 'application/json; charset=utf-8',
|
|
319
|
+
'Content-Length': Buffer.byteLength(body),
|
|
320
|
+
...headers,
|
|
321
|
+
})
|
|
322
|
+
res.end(body)
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
function sendError(res, statusCode, message, type, code, requestId, extra = {}) {
|
|
326
|
+
sendJson(res, statusCode, formatOpenAiError(message, type, code, requestId, extra))
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
function readRequestBody(req, limit = MAX_BODY_BYTES) {
|
|
330
|
+
return new Promise((resolve, reject) => {
|
|
331
|
+
let size = 0
|
|
332
|
+
const chunks = []
|
|
333
|
+
req.on('data', (chunk) => {
|
|
334
|
+
size += chunk.length
|
|
335
|
+
if (size > limit) {
|
|
336
|
+
reject(Object.assign(new Error('Request body too large'), { code: 'BODY_TOO_LARGE' }))
|
|
337
|
+
req.destroy()
|
|
338
|
+
return
|
|
339
|
+
}
|
|
340
|
+
chunks.push(chunk)
|
|
341
|
+
})
|
|
342
|
+
req.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')))
|
|
343
|
+
req.on('error', reject)
|
|
344
|
+
})
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function readJsonBody(req) {
|
|
348
|
+
return readRequestBody(req).then((raw) => {
|
|
349
|
+
if (!raw.trim()) return {}
|
|
350
|
+
const parsed = safeJsonParse(raw)
|
|
351
|
+
if (parsed === null) {
|
|
352
|
+
throw Object.assign(new Error('Invalid JSON'), { code: 'INVALID_JSON' })
|
|
353
|
+
}
|
|
354
|
+
return parsed
|
|
355
|
+
})
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
class RouterLogger {
|
|
359
|
+
constructor(logPath, level = 'info') {
|
|
360
|
+
this.logPath = logPath
|
|
361
|
+
this.level = level
|
|
362
|
+
this.levelRank = { error: 0, warn: 1, info: 2, debug: 3 }
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
shouldLog(level) {
|
|
366
|
+
return this.levelRank[level] <= this.levelRank[this.level]
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
rotateIfNeeded() {
|
|
370
|
+
try {
|
|
371
|
+
if (!existsSync(this.logPath)) return
|
|
372
|
+
const stat = statSync(this.logPath)
|
|
373
|
+
if (stat.size < 5 * 1024 * 1024) return
|
|
374
|
+
const rotatedPath = `${this.logPath}.1`
|
|
375
|
+
try { unlinkSync(rotatedPath) } catch {}
|
|
376
|
+
renameSync(this.logPath, rotatedPath)
|
|
377
|
+
} catch {
|
|
378
|
+
// 📖 Logging should never be capable of taking the daemon down.
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
write(level, message, meta = null) {
|
|
383
|
+
if (!this.shouldLog(level)) return
|
|
384
|
+
const suffix = meta ? ` ${this.safeStringify(meta)}` : ''
|
|
385
|
+
const line = `[${nowIso()}] [${level.toUpperCase()}] ${message}${suffix}\n`
|
|
386
|
+
try {
|
|
387
|
+
this.rotateIfNeeded()
|
|
388
|
+
appendFileSync(this.logPath, line, { mode: 0o600 })
|
|
389
|
+
} catch {
|
|
390
|
+
try { process.stderr.write(line) } catch {}
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
safeStringify(meta) {
|
|
395
|
+
try {
|
|
396
|
+
return JSON.stringify(meta)
|
|
397
|
+
} catch {
|
|
398
|
+
return '[unserializable-meta]'
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
error(message, meta = null) { this.write('error', message, meta) }
|
|
403
|
+
warn(message, meta = null) { this.write('warn', message, meta) }
|
|
404
|
+
info(message, meta = null) { this.write('info', message, meta) }
|
|
405
|
+
debug(message, meta = null) { this.write('debug', message, meta) }
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
class TokenTracker {
|
|
409
|
+
constructor(path, logger) {
|
|
410
|
+
this.path = path
|
|
411
|
+
this.logger = logger
|
|
412
|
+
this.stats = this.load()
|
|
413
|
+
this.dirty = false
|
|
414
|
+
this.flushFailures = 0
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
load() {
|
|
418
|
+
try {
|
|
419
|
+
if (!existsSync(this.path)) {
|
|
420
|
+
return {
|
|
421
|
+
daily: {},
|
|
422
|
+
all_time: {
|
|
423
|
+
total_tokens: 0,
|
|
424
|
+
prompt_tokens: 0,
|
|
425
|
+
completion_tokens: 0,
|
|
426
|
+
requests: 0,
|
|
427
|
+
first_tracked: nowIso(),
|
|
428
|
+
},
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
const parsed = safeJsonParse(readFileSync(this.path, 'utf8'), null)
|
|
432
|
+
if (!parsed || typeof parsed !== 'object') throw new Error('Token stats JSON is invalid')
|
|
433
|
+
return {
|
|
434
|
+
daily: parsed.daily && typeof parsed.daily === 'object' ? parsed.daily : {},
|
|
435
|
+
all_time: {
|
|
436
|
+
total_tokens: Number(parsed.all_time?.total_tokens ?? 0),
|
|
437
|
+
prompt_tokens: Number(parsed.all_time?.prompt_tokens ?? 0),
|
|
438
|
+
completion_tokens: Number(parsed.all_time?.completion_tokens ?? 0),
|
|
439
|
+
requests: Number(parsed.all_time?.requests ?? 0),
|
|
440
|
+
first_tracked: parsed.all_time?.first_tracked || nowIso(),
|
|
441
|
+
},
|
|
442
|
+
}
|
|
443
|
+
} catch (error) {
|
|
444
|
+
this.logger.warn('Token stats read failed; starting fresh counters', { error: error.message })
|
|
445
|
+
return {
|
|
446
|
+
daily: {},
|
|
447
|
+
all_time: {
|
|
448
|
+
total_tokens: 0,
|
|
449
|
+
prompt_tokens: 0,
|
|
450
|
+
completion_tokens: 0,
|
|
451
|
+
requests: 0,
|
|
452
|
+
first_tracked: nowIso(),
|
|
453
|
+
},
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
todayKey() {
|
|
459
|
+
return new Date().toISOString().slice(0, 10)
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
ensureDaily(dateKey) {
|
|
463
|
+
if (!this.stats.daily[dateKey]) {
|
|
464
|
+
this.stats.daily[dateKey] = {
|
|
465
|
+
total_tokens: 0,
|
|
466
|
+
prompt_tokens: 0,
|
|
467
|
+
completion_tokens: 0,
|
|
468
|
+
requests: 0,
|
|
469
|
+
by_model: {},
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
if (!this.stats.daily[dateKey].by_model || typeof this.stats.daily[dateKey].by_model !== 'object') {
|
|
473
|
+
this.stats.daily[dateKey].by_model = {}
|
|
474
|
+
}
|
|
475
|
+
return this.stats.daily[dateKey]
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
record(provider, model, usage) {
|
|
479
|
+
if (!usage) return
|
|
480
|
+
const dateKey = this.todayKey()
|
|
481
|
+
const daily = this.ensureDaily(dateKey)
|
|
482
|
+
const key = modelKey(provider, model)
|
|
483
|
+
if (!daily.by_model[key]) daily.by_model[key] = { total: 0, requests: 0 }
|
|
484
|
+
|
|
485
|
+
daily.total_tokens += usage.total_tokens
|
|
486
|
+
daily.prompt_tokens += usage.prompt_tokens
|
|
487
|
+
daily.completion_tokens += usage.completion_tokens
|
|
488
|
+
daily.requests += 1
|
|
489
|
+
daily.by_model[key].total += usage.total_tokens
|
|
490
|
+
daily.by_model[key].requests += 1
|
|
491
|
+
|
|
492
|
+
this.stats.all_time.total_tokens += usage.total_tokens
|
|
493
|
+
this.stats.all_time.prompt_tokens += usage.prompt_tokens
|
|
494
|
+
this.stats.all_time.completion_tokens += usage.completion_tokens
|
|
495
|
+
this.stats.all_time.requests += 1
|
|
496
|
+
this.dirty = true
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
prune() {
|
|
500
|
+
const cutoff = Date.now() - STATS_RETENTION_DAYS * 24 * 60 * 60 * 1000
|
|
501
|
+
for (const dateKey of Object.keys(this.stats.daily)) {
|
|
502
|
+
const time = Date.parse(`${dateKey}T00:00:00.000Z`)
|
|
503
|
+
if (Number.isFinite(time) && time < cutoff) delete this.stats.daily[dateKey]
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
flush({ force = false } = {}) {
|
|
508
|
+
if (!this.dirty && !force) return
|
|
509
|
+
try {
|
|
510
|
+
this.prune()
|
|
511
|
+
atomicWriteJson(this.path, this.stats, 0o600)
|
|
512
|
+
this.dirty = false
|
|
513
|
+
this.flushFailures = 0
|
|
514
|
+
} catch (error) {
|
|
515
|
+
this.flushFailures += 1
|
|
516
|
+
this.logger.warn('Token stats write failed; keeping counters in memory', {
|
|
517
|
+
error: error.message,
|
|
518
|
+
failures: this.flushFailures,
|
|
519
|
+
})
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
summary() {
|
|
524
|
+
const today = this.ensureDaily(this.todayKey())
|
|
525
|
+
return {
|
|
526
|
+
today,
|
|
527
|
+
all_time: this.stats.all_time,
|
|
528
|
+
daily: this.stats.daily,
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
class RouterRuntime {
|
|
534
|
+
constructor({ config, port, logger, tokenPath = ROUTER_TOKENS_PATH, persistConfig = true }) {
|
|
535
|
+
this.config = config
|
|
536
|
+
this.port = port
|
|
537
|
+
this.logger = logger
|
|
538
|
+
this.persistConfig = persistConfig
|
|
539
|
+
this.startedAt = Date.now()
|
|
540
|
+
this.inFlight = 0
|
|
541
|
+
this.shuttingDown = false
|
|
542
|
+
this.crashRecovered = 0
|
|
543
|
+
this.uncaughtTimestamps = []
|
|
544
|
+
this.server = null
|
|
545
|
+
this.configReloadTimer = null
|
|
546
|
+
this.tokenFlushTimer = null
|
|
547
|
+
this.probeTimer = null
|
|
548
|
+
this.probeTimeouts = new Set()
|
|
549
|
+
this.tokenTracker = new TokenTracker(tokenPath, logger)
|
|
550
|
+
this.modelCatalog = this.buildModelCatalog()
|
|
551
|
+
this.probeWindows = new Map()
|
|
552
|
+
this.circuit = new Map()
|
|
553
|
+
this.requestLog = []
|
|
554
|
+
this.sseClients = new Set()
|
|
555
|
+
this.lastProbeAt = null
|
|
556
|
+
this.totalRequestsRouted = 0
|
|
557
|
+
this.quotaExhausted = new Set()
|
|
558
|
+
this.quotaDetails = new Map()
|
|
559
|
+
this.staleNotifications = new Set()
|
|
560
|
+
this.refreshRouteState()
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
buildModelCatalog() {
|
|
564
|
+
const catalog = new Map()
|
|
565
|
+
for (const [providerKey, source] of Object.entries(sources)) {
|
|
566
|
+
if (!Array.isArray(source.models)) continue
|
|
567
|
+
for (const [modelId, label, tier, sweScore, ctx] of source.models) {
|
|
568
|
+
catalog.set(modelKey(providerKey, modelId), {
|
|
569
|
+
providerKey,
|
|
570
|
+
modelId,
|
|
571
|
+
label,
|
|
572
|
+
tier,
|
|
573
|
+
sweScore,
|
|
574
|
+
ctx,
|
|
575
|
+
routeable: isRouteableProvider(providerKey),
|
|
576
|
+
})
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
return catalog
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
refreshRouteState() {
|
|
583
|
+
const router = this.routerConfig()
|
|
584
|
+
this.logger.level = router.logLevel
|
|
585
|
+
for (const set of Object.values(router.sets || {})) {
|
|
586
|
+
for (const model of set.models || []) {
|
|
587
|
+
const key = modelKey(model.provider, model.model)
|
|
588
|
+
if (!this.probeWindows.has(key)) this.probeWindows.set(key, [])
|
|
589
|
+
if (!this.circuit.has(key)) {
|
|
590
|
+
this.circuit.set(key, {
|
|
591
|
+
state: 'CLOSED',
|
|
592
|
+
consecutiveFailures: 0,
|
|
593
|
+
cooldownMs: router.circuitBreaker.initialCooldownMs,
|
|
594
|
+
openedAt: null,
|
|
595
|
+
lastError: null,
|
|
596
|
+
authError: false,
|
|
597
|
+
stale: false,
|
|
598
|
+
})
|
|
599
|
+
}
|
|
600
|
+
const entry = this.circuit.get(key)
|
|
601
|
+
entry.stale = !this.modelCatalog.has(key)
|
|
602
|
+
const catalogEntry = this.modelCatalog.get(key)
|
|
603
|
+
entry.unsupported = Boolean(catalogEntry && !catalogEntry.routeable)
|
|
604
|
+
if (entry.stale && !this.staleNotifications.has(key)) {
|
|
605
|
+
this.staleNotifications.add(key)
|
|
606
|
+
this.logger.warn(`${key} is no longer available and will be skipped`)
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
routerConfig() {
|
|
613
|
+
const normalized = normalizeRouterConfig(this.config.router)
|
|
614
|
+
if (normalized) return normalized
|
|
615
|
+
const defaultSet = buildDefaultRouterSet(this.config)
|
|
616
|
+
return normalizeRouterConfig({
|
|
617
|
+
...DEFAULT_ROUTER_SETTINGS,
|
|
618
|
+
enabled: true,
|
|
619
|
+
onboardingSeen: true,
|
|
620
|
+
activeSet: defaultSet.name,
|
|
621
|
+
sets: { [defaultSet.name]: defaultSet },
|
|
622
|
+
})
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
setRouterConfig(router) {
|
|
626
|
+
this.config.router = normalizeRouterConfig(router)
|
|
627
|
+
this.refreshRouteState()
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
saveRouterConfig() {
|
|
631
|
+
if (this.persistConfig === false) return { success: true, backupCreated: false }
|
|
632
|
+
const result = saveConfig(this.config)
|
|
633
|
+
if (!result.success) this.logger.warn('Router config write failed', { error: result.error })
|
|
634
|
+
return result
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
reloadConfigFromDisk() {
|
|
638
|
+
try {
|
|
639
|
+
const nextConfig = loadConfig()
|
|
640
|
+
if (!nextConfig.router) nextConfig.router = this.routerConfig()
|
|
641
|
+
this.config = nextConfig
|
|
642
|
+
this.refreshRouteState()
|
|
643
|
+
this.scheduleProbeLoop()
|
|
644
|
+
this.broadcast('config', { activeSet: this.routerConfig().activeSet })
|
|
645
|
+
this.logger.debug('Router config reloaded from disk')
|
|
646
|
+
} catch (error) {
|
|
647
|
+
this.logger.warn('Config reload failed; keeping in-memory config', { error: error.message })
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
getApiKeyForProvider(providerKey) {
|
|
652
|
+
// 📖 Router background startup should work without inherited shell env, so
|
|
653
|
+
// 📖 config keys are primary. Env is only a fallback for headless sessions.
|
|
654
|
+
const configured = this.config?.apiKeys?.[providerKey]
|
|
655
|
+
if (Array.isArray(configured)) return configured.find(Boolean) || null
|
|
656
|
+
if (typeof configured === 'string' && configured.trim()) return configured.trim()
|
|
657
|
+
return getApiKey({ apiKeys: {}, providers: {} }, providerKey)
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
getSet(setName = null) {
|
|
661
|
+
const router = this.routerConfig()
|
|
662
|
+
const name = setName || router.activeSet
|
|
663
|
+
return router.sets?.[name] || null
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
listSetModels(set) {
|
|
667
|
+
return [...(set?.models || [])].sort((a, b) => a.priority - b.priority)
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
updateCircuitForCooldown(key) {
|
|
671
|
+
const state = this.circuit.get(key)
|
|
672
|
+
if (!state || state.state !== 'OPEN') return state
|
|
673
|
+
const elapsed = Date.now() - (state.openedAt || 0)
|
|
674
|
+
if (elapsed >= state.cooldownMs) {
|
|
675
|
+
const oldState = state.state
|
|
676
|
+
state.state = 'HALF_OPEN'
|
|
677
|
+
this.broadcast('circuit', { model: key, old_state: oldState, new_state: state.state, cooldown_ms: state.cooldownMs })
|
|
678
|
+
}
|
|
679
|
+
return state
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
recordProbeResult(key, result) {
|
|
683
|
+
const window = this.probeWindows.get(key) || []
|
|
684
|
+
window.push({ ...result, at: Date.now() })
|
|
685
|
+
while (window.length > MAX_PROBE_WINDOW) window.shift()
|
|
686
|
+
this.probeWindows.set(key, window)
|
|
687
|
+
this.lastProbeAt = Date.now()
|
|
688
|
+
this.broadcast('probe', {
|
|
689
|
+
model: key,
|
|
690
|
+
status: result.ok ? 'ok' : 'fail',
|
|
691
|
+
latency_ms: result.latencyMs ?? null,
|
|
692
|
+
circuit_state: this.circuit.get(key)?.state || 'UNKNOWN',
|
|
693
|
+
})
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
markAuthError(key, detail = 'authentication failed') {
|
|
697
|
+
const state = this.circuit.get(key)
|
|
698
|
+
if (!state) return
|
|
699
|
+
state.authError = true
|
|
700
|
+
state.lastError = detail
|
|
701
|
+
this.broadcast('circuit', { model: key, old_state: state.state, new_state: 'AUTH_ERROR', cooldown_ms: 0 })
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
markSuccess(key, latencyMs = null) {
|
|
705
|
+
const state = this.circuit.get(key)
|
|
706
|
+
if (!state) return
|
|
707
|
+
const oldState = state.state
|
|
708
|
+
state.state = 'CLOSED'
|
|
709
|
+
state.consecutiveFailures = 0
|
|
710
|
+
state.cooldownMs = this.routerConfig().circuitBreaker.initialCooldownMs
|
|
711
|
+
state.openedAt = null
|
|
712
|
+
state.lastError = null
|
|
713
|
+
state.authError = false
|
|
714
|
+
this.quotaExhausted.delete(key)
|
|
715
|
+
this.quotaDetails.delete(key)
|
|
716
|
+
if (oldState !== state.state) {
|
|
717
|
+
this.broadcast('circuit', { model: key, old_state: oldState, new_state: state.state, cooldown_ms: state.cooldownMs })
|
|
718
|
+
}
|
|
719
|
+
if (latencyMs !== null) this.recordProbeResult(key, { ok: true, latencyMs, code: 200 })
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
markFailure(key, detail, statusCode = null, meta = {}) {
|
|
723
|
+
const state = this.circuit.get(key)
|
|
724
|
+
if (!state) return
|
|
725
|
+
state.authError = false
|
|
726
|
+
state.consecutiveFailures += 1
|
|
727
|
+
state.lastError = detail
|
|
728
|
+
if (statusCode === 429 || meta.quotaExhausted) {
|
|
729
|
+
this.quotaExhausted.add(key)
|
|
730
|
+
this.quotaDetails.set(key, {
|
|
731
|
+
model: key,
|
|
732
|
+
status: statusCode,
|
|
733
|
+
retry_after_ms: meta.retryAfterMs ?? null,
|
|
734
|
+
rate_limit_headers: meta.rateLimitHeaders || {},
|
|
735
|
+
last_seen: nowIso(),
|
|
736
|
+
})
|
|
737
|
+
}
|
|
738
|
+
const router = this.routerConfig()
|
|
739
|
+
if (state.state === 'HALF_OPEN' || state.consecutiveFailures >= router.circuitBreaker.failureThreshold) {
|
|
740
|
+
const oldState = state.state
|
|
741
|
+
state.state = 'OPEN'
|
|
742
|
+
state.openedAt = Date.now()
|
|
743
|
+
state.cooldownMs = Math.min(
|
|
744
|
+
router.circuitBreaker.maxCooldownMs,
|
|
745
|
+
Math.max(router.circuitBreaker.initialCooldownMs, state.cooldownMs * router.circuitBreaker.backoffMultiplier),
|
|
746
|
+
)
|
|
747
|
+
this.broadcast('circuit', { model: key, old_state: oldState, new_state: state.state, cooldown_ms: state.cooldownMs })
|
|
748
|
+
this.logger.warn(`Circuit opened for ${key}`, { reason: detail, cooldown_ms: state.cooldownMs })
|
|
749
|
+
void sendUsageTelemetry(this.config, {}, {
|
|
750
|
+
event: 'app_router_circuit_open',
|
|
751
|
+
mode: 'daemon',
|
|
752
|
+
properties: {
|
|
753
|
+
model: key,
|
|
754
|
+
consecutive_failures: state.consecutiveFailures,
|
|
755
|
+
cooldown_ms: state.cooldownMs,
|
|
756
|
+
},
|
|
757
|
+
})
|
|
758
|
+
}
|
|
759
|
+
this.recordProbeResult(key, { ok: false, latencyMs: null, code: statusCode || 'ERR', error: detail })
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
quotaDetailsForKeys(keys) {
|
|
763
|
+
return keys
|
|
764
|
+
.filter((key) => this.quotaExhausted.has(key))
|
|
765
|
+
.map((key) => this.quotaDetails.get(key) || {
|
|
766
|
+
model: key,
|
|
767
|
+
status: 429,
|
|
768
|
+
retry_after_ms: null,
|
|
769
|
+
rate_limit_headers: {},
|
|
770
|
+
last_seen: null,
|
|
771
|
+
})
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
recordRouterError(kind, requestId, properties = {}) {
|
|
775
|
+
void sendUsageTelemetry(this.config, {}, {
|
|
776
|
+
event: 'app_router_error',
|
|
777
|
+
mode: 'daemon',
|
|
778
|
+
properties: {
|
|
779
|
+
kind,
|
|
780
|
+
request_id: requestId,
|
|
781
|
+
...properties,
|
|
782
|
+
},
|
|
783
|
+
})
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
getWindowStats(key) {
|
|
787
|
+
const window = this.probeWindows.get(key) || []
|
|
788
|
+
const successes = window.filter((entry) => entry.ok && Number.isFinite(entry.latencyMs))
|
|
789
|
+
const sortedLatencies = successes.map((entry) => entry.latencyMs).sort((a, b) => a - b)
|
|
790
|
+
const p95 = sortedLatencies.length > 0
|
|
791
|
+
? sortedLatencies[Math.max(0, Math.ceil(sortedLatencies.length * 0.95) - 1)]
|
|
792
|
+
: null
|
|
793
|
+
return {
|
|
794
|
+
total: window.length,
|
|
795
|
+
successful: successes.length,
|
|
796
|
+
uptime: window.length > 0 ? successes.length / window.length : null,
|
|
797
|
+
p95,
|
|
798
|
+
last: window[window.length - 1] || null,
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
scoreCandidates(set) {
|
|
803
|
+
const models = this.listSetModels(set)
|
|
804
|
+
const maxP95 = Math.max(
|
|
805
|
+
1,
|
|
806
|
+
...models
|
|
807
|
+
.map((entry) => this.getWindowStats(modelKey(entry.provider, entry.model)).p95)
|
|
808
|
+
.filter((value) => Number.isFinite(value)),
|
|
809
|
+
)
|
|
810
|
+
const router = this.routerConfig()
|
|
811
|
+
const setSize = Math.max(1, models.length)
|
|
812
|
+
const weights = router.scoring
|
|
813
|
+
|
|
814
|
+
return models.map((entry) => {
|
|
815
|
+
const key = modelKey(entry.provider, entry.model)
|
|
816
|
+
const stats = this.getWindowStats(key)
|
|
817
|
+
const hasData = stats.total > 0
|
|
818
|
+
const latencyScore = stats.p95 === null ? 0.5 : Math.max(0, 1 - (stats.p95 / maxP95))
|
|
819
|
+
const uptimeScore = stats.uptime === null ? 0.5 : stats.uptime
|
|
820
|
+
const priorityBonus = 1 - ((entry.priority - 1) / setSize)
|
|
821
|
+
const score = hasData
|
|
822
|
+
? (weights.latencyWeight * latencyScore) + (weights.uptimeWeight * uptimeScore) + (weights.priorityWeight * priorityBonus)
|
|
823
|
+
: priorityBonus
|
|
824
|
+
const state = this.updateCircuitForCooldown(key) || {}
|
|
825
|
+
return {
|
|
826
|
+
...entry,
|
|
827
|
+
key,
|
|
828
|
+
score,
|
|
829
|
+
stats,
|
|
830
|
+
circuit: state,
|
|
831
|
+
catalog: this.modelCatalog.get(key) || null,
|
|
832
|
+
}
|
|
833
|
+
})
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
getRoutingCandidates(set) {
|
|
837
|
+
const scored = this.scoreCandidates(set)
|
|
838
|
+
const usable = scored.filter((candidate) => {
|
|
839
|
+
if (!candidate.catalog || candidate.circuit?.stale) return false
|
|
840
|
+
if (!candidate.catalog.routeable || candidate.circuit?.unsupported) return false
|
|
841
|
+
if (candidate.circuit?.authError) return false
|
|
842
|
+
if (!this.getApiKeyForProvider(candidate.provider)) return false
|
|
843
|
+
return candidate.circuit?.state === 'CLOSED' || candidate.circuit?.state === 'HALF_OPEN'
|
|
844
|
+
})
|
|
845
|
+
const closed = usable.filter((candidate) => candidate.circuit.state === 'CLOSED')
|
|
846
|
+
const halfOpen = usable.filter((candidate) => candidate.circuit.state === 'HALF_OPEN')
|
|
847
|
+
const byScore = (a, b) => b.score - a.score || a.priority - b.priority
|
|
848
|
+
return [...closed.sort(byScore), ...halfOpen.sort(byScore)]
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
getModelHealth(set = this.getSet()) {
|
|
852
|
+
return this.scoreCandidates(set || { models: [] }).map((candidate) => ({
|
|
853
|
+
provider: candidate.provider,
|
|
854
|
+
model: candidate.model,
|
|
855
|
+
key: candidate.key,
|
|
856
|
+
priority: candidate.priority,
|
|
857
|
+
state: candidate.circuit?.authError
|
|
858
|
+
? 'AUTH_ERROR'
|
|
859
|
+
: candidate.circuit?.stale
|
|
860
|
+
? 'STALE'
|
|
861
|
+
: candidate.circuit?.unsupported
|
|
862
|
+
? 'UNSUPPORTED'
|
|
863
|
+
: candidate.circuit?.state || 'UNKNOWN',
|
|
864
|
+
score: Number(candidate.score.toFixed(4)),
|
|
865
|
+
last_latency_ms: candidate.stats.last?.latencyMs ?? null,
|
|
866
|
+
uptime: candidate.stats.uptime,
|
|
867
|
+
last_error: candidate.circuit?.lastError || null,
|
|
868
|
+
}))
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
addRequestLog(entry) {
|
|
872
|
+
this.requestLog.unshift({ ...entry, at: nowIso() })
|
|
873
|
+
while (this.requestLog.length > MAX_REQUEST_LOG) this.requestLog.pop()
|
|
874
|
+
this.broadcast('request', entry)
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
broadcast(event, payload) {
|
|
878
|
+
const message = `event: ${event}\ndata: ${JSON.stringify(payload)}\n\n`
|
|
879
|
+
for (const client of [...this.sseClients]) {
|
|
880
|
+
try {
|
|
881
|
+
client.write(message)
|
|
882
|
+
} catch {
|
|
883
|
+
this.sseClients.delete(client)
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
statusPayload() {
|
|
889
|
+
const router = this.routerConfig()
|
|
890
|
+
const activeSet = this.getSet(router.activeSet)
|
|
891
|
+
return {
|
|
892
|
+
ok: true,
|
|
893
|
+
pid: process.pid,
|
|
894
|
+
port: this.port,
|
|
895
|
+
enabled: router.enabled,
|
|
896
|
+
activeSet: router.activeSet,
|
|
897
|
+
activeModelCount: activeSet?.models?.length || 0,
|
|
898
|
+
setCount: Object.keys(router.sets || {}).length,
|
|
899
|
+
uptimeSeconds: Math.floor((Date.now() - this.startedAt) / 1000),
|
|
900
|
+
requestsRouted: this.totalRequestsRouted,
|
|
901
|
+
inFlight: this.inFlight,
|
|
902
|
+
shuttingDown: this.shuttingDown,
|
|
903
|
+
probeMode: router.probeMode,
|
|
904
|
+
lastProbeAt: this.lastProbeAt ? new Date(this.lastProbeAt).toISOString() : null,
|
|
905
|
+
crashRecovered: this.crashRecovered,
|
|
906
|
+
configPath: CONFIG_PATH,
|
|
907
|
+
tokenStatsPath: ROUTER_TOKENS_PATH,
|
|
908
|
+
logPath: ROUTER_LOG_PATH,
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
statsPayload() {
|
|
913
|
+
const router = this.routerConfig()
|
|
914
|
+
const activeSet = this.getSet(router.activeSet)
|
|
915
|
+
return {
|
|
916
|
+
...this.statusPayload(),
|
|
917
|
+
tokens: this.tokenTracker.summary(),
|
|
918
|
+
models: this.getModelHealth(activeSet),
|
|
919
|
+
requestLog: this.requestLog.slice(0, 20),
|
|
920
|
+
circuitBreakers: Object.fromEntries([...this.circuit.entries()].map(([key, value]) => [key, {
|
|
921
|
+
state: value.authError ? 'AUTH_ERROR' : value.stale ? 'STALE' : value.unsupported ? 'UNSUPPORTED' : value.state,
|
|
922
|
+
consecutiveFailures: value.consecutiveFailures,
|
|
923
|
+
cooldownMs: value.cooldownMs,
|
|
924
|
+
openedAt: value.openedAt ? new Date(value.openedAt).toISOString() : null,
|
|
925
|
+
lastError: value.lastError,
|
|
926
|
+
}])),
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
async probeCandidate(candidate, { eco = false } = {}) {
|
|
931
|
+
const key = modelKey(candidate.provider, candidate.model)
|
|
932
|
+
const apiKey = this.getApiKeyForProvider(candidate.provider)
|
|
933
|
+
if (!apiKey) {
|
|
934
|
+
this.markAuthError(key, 'missing API key')
|
|
935
|
+
return
|
|
936
|
+
}
|
|
937
|
+
// 📖 Guard: skip probe if the provider URL cannot be resolved (e.g. missing account ID)
|
|
938
|
+
const providerUrl = resolveProviderUrl(candidate.provider)
|
|
939
|
+
if (!providerUrl) {
|
|
940
|
+
this.markAuthError(key, 'provider URL unresolvable')
|
|
941
|
+
return
|
|
942
|
+
}
|
|
943
|
+
const controller = new AbortController()
|
|
944
|
+
const timeout = setTimeout(() => controller.abort(), 10000)
|
|
945
|
+
const started = performance.now()
|
|
946
|
+
try {
|
|
947
|
+
const modelsUrl = eco ? buildProviderModelsUrl(candidate.provider) : null
|
|
948
|
+
const response = modelsUrl
|
|
949
|
+
? await fetch(modelsUrl, {
|
|
950
|
+
method: 'GET',
|
|
951
|
+
headers: cloneHeadersForUpstream({}, apiKey, candidate.provider),
|
|
952
|
+
signal: controller.signal,
|
|
953
|
+
})
|
|
954
|
+
: await fetch(providerUrl, {
|
|
955
|
+
method: 'POST',
|
|
956
|
+
headers: cloneHeadersForUpstream({}, apiKey, candidate.provider),
|
|
957
|
+
body: JSON.stringify({
|
|
958
|
+
model: getApiModelId(candidate.provider, candidate.model),
|
|
959
|
+
messages: [{ role: 'user', content: 'hi' }],
|
|
960
|
+
max_tokens: 1,
|
|
961
|
+
stream: false,
|
|
962
|
+
}),
|
|
963
|
+
signal: controller.signal,
|
|
964
|
+
})
|
|
965
|
+
const latencyMs = Math.round(performance.now() - started)
|
|
966
|
+
if (response.ok) {
|
|
967
|
+
this.markSuccess(key)
|
|
968
|
+
this.recordProbeResult(key, { ok: true, latencyMs, code: response.status })
|
|
969
|
+
this.logger.info(`Probe ok ${key} — ${latencyMs}ms`)
|
|
970
|
+
} else if (AUTH_STATUS_CODES.has(response.status)) {
|
|
971
|
+
this.markAuthError(key, `HTTP ${response.status}`)
|
|
972
|
+
this.recordProbeResult(key, { ok: false, latencyMs, code: response.status })
|
|
973
|
+
} else if (RETRYABLE_STATUS_CODES.has(response.status)) {
|
|
974
|
+
this.markFailure(key, `HTTP ${response.status}`, response.status)
|
|
975
|
+
} else {
|
|
976
|
+
this.recordProbeResult(key, { ok: false, latencyMs, code: response.status })
|
|
977
|
+
}
|
|
978
|
+
} catch (error) {
|
|
979
|
+
const detail = error.name === 'AbortError' ? 'probe timeout' : error.message
|
|
980
|
+
this.markFailure(key, detail)
|
|
981
|
+
} finally {
|
|
982
|
+
clearTimeout(timeout)
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
async runProbeBurst() {
|
|
987
|
+
const set = this.getSet()
|
|
988
|
+
if (!set) return
|
|
989
|
+
const candidates = this.scoreCandidates(set)
|
|
990
|
+
.filter((candidate) => candidate.catalog?.routeable && !candidate.circuit?.stale)
|
|
991
|
+
await Promise.allSettled(candidates.map((candidate) => this.probeCandidate(candidate, {
|
|
992
|
+
eco: this.routerConfig().probeMode === 'eco',
|
|
993
|
+
})))
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
scheduleProbeLoop() {
|
|
997
|
+
if (this.probeTimer) clearInterval(this.probeTimer)
|
|
998
|
+
for (const timeout of this.probeTimeouts) clearTimeout(timeout)
|
|
999
|
+
this.probeTimeouts.clear()
|
|
1000
|
+
const router = this.routerConfig()
|
|
1001
|
+
const interval = router.probeIntervals[router.probeMode] || DEFAULT_ROUTER_SETTINGS.probeIntervals.balanced
|
|
1002
|
+
this.probeTimer = setInterval(() => {
|
|
1003
|
+
const set = this.getSet()
|
|
1004
|
+
if (!set || this.shuttingDown) return
|
|
1005
|
+
const candidates = this.scoreCandidates(set)
|
|
1006
|
+
.filter((candidate) => candidate.catalog?.routeable && !candidate.circuit?.stale)
|
|
1007
|
+
const stagger = candidates.length > 0 ? Math.max(250, Math.floor(interval / candidates.length)) : interval
|
|
1008
|
+
candidates.forEach((candidate, index) => {
|
|
1009
|
+
const timeout = setTimeout(() => {
|
|
1010
|
+
this.probeTimeouts.delete(timeout)
|
|
1011
|
+
void this.probeCandidate(candidate, { eco: router.probeMode === 'eco' })
|
|
1012
|
+
}, index * stagger)
|
|
1013
|
+
timeout.unref?.()
|
|
1014
|
+
this.probeTimeouts.add(timeout)
|
|
1015
|
+
})
|
|
1016
|
+
}, interval)
|
|
1017
|
+
this.probeTimer.unref?.()
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
async routeRequest({ req, res, body, setName, requestId }) {
|
|
1021
|
+
if (this.shuttingDown) {
|
|
1022
|
+
sendError(res, 503, 'Daemon is shutting down', 'service_unavailable', 'daemon_shutting_down', requestId)
|
|
1023
|
+
return
|
|
1024
|
+
}
|
|
1025
|
+
if (this.inFlight >= MAX_CONCURRENT_REQUESTS) {
|
|
1026
|
+
sendError(res, 503, 'Router overloaded, too many concurrent requests', 'service_unavailable', 'router_overloaded', requestId)
|
|
1027
|
+
return
|
|
1028
|
+
}
|
|
1029
|
+
if (!body || typeof body !== 'object' || Array.isArray(body)) {
|
|
1030
|
+
sendError(res, 400, 'Request body must be a JSON object', 'invalid_request_error', 'invalid_json_object', requestId)
|
|
1031
|
+
return
|
|
1032
|
+
}
|
|
1033
|
+
if (typeof body.model !== 'string' || !body.model.trim()) {
|
|
1034
|
+
sendError(res, 400, 'Missing required field: model', 'invalid_request_error', 'missing_model', requestId)
|
|
1035
|
+
return
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
const set = this.getSet(setName)
|
|
1039
|
+
if (!set) {
|
|
1040
|
+
sendError(res, 404, `Router set not found: ${setName || this.routerConfig().activeSet}`, 'invalid_request_error', 'set_not_found', requestId)
|
|
1041
|
+
return
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
const candidates = this.getRoutingCandidates(set)
|
|
1045
|
+
const maxRetries = this.routerConfig().failover.maxRetries
|
|
1046
|
+
const maxAttempts = Math.max(1, maxRetries)
|
|
1047
|
+
if (candidates.length === 0) {
|
|
1048
|
+
const health = this.getModelHealth(set)
|
|
1049
|
+
const quotaExhausted = [...this.quotaExhausted].filter((key) => set.models.some((model) => modelKey(model.provider, model.model) === key))
|
|
1050
|
+
|
|
1051
|
+
let statusCode = 503
|
|
1052
|
+
let errorCode = 'all_models_unavailable'
|
|
1053
|
+
let errorType = 'service_unavailable'
|
|
1054
|
+
if (health.length > 0) {
|
|
1055
|
+
if (health.every((h) => h.state === 'AUTH_ERROR')) {
|
|
1056
|
+
statusCode = 401
|
|
1057
|
+
errorCode = 'invalid_api_key'
|
|
1058
|
+
errorType = 'invalid_request_error'
|
|
1059
|
+
} else if (health.every((h) => h.state === 'AUTH_ERROR' || quotaExhausted.includes(h.key))) {
|
|
1060
|
+
statusCode = 429
|
|
1061
|
+
errorCode = 'insufficient_quota'
|
|
1062
|
+
errorType = 'insufficient_quota'
|
|
1063
|
+
} else if (health.every((h) => h.state === 'STALE' || h.state === 'UNSUPPORTED')) {
|
|
1064
|
+
statusCode = 400
|
|
1065
|
+
errorCode = 'invalid_model'
|
|
1066
|
+
errorType = 'invalid_request_error'
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
sendError(res, statusCode, `All models in set are unavailable: ${set.name}`, errorType, errorCode, requestId, {
|
|
1071
|
+
set: set.name,
|
|
1072
|
+
models_tried: [],
|
|
1073
|
+
quota_exhausted: quotaExhausted,
|
|
1074
|
+
quota_exhausted_details: this.quotaDetailsForKeys(quotaExhausted),
|
|
1075
|
+
model_health: health,
|
|
1076
|
+
})
|
|
1077
|
+
void sendUsageTelemetry(this.config, {}, {
|
|
1078
|
+
event: 'app_router_all_down',
|
|
1079
|
+
mode: 'daemon',
|
|
1080
|
+
properties: {
|
|
1081
|
+
set_name: set.name,
|
|
1082
|
+
models_tried: [],
|
|
1083
|
+
quota_exhausted_count: quotaExhausted.length,
|
|
1084
|
+
},
|
|
1085
|
+
})
|
|
1086
|
+
return
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
this.inFlight += 1
|
|
1090
|
+
try {
|
|
1091
|
+
const tried = []
|
|
1092
|
+
const blockedProviders = new Set()
|
|
1093
|
+
let attemptIndex = 0
|
|
1094
|
+
for (const candidate of candidates) {
|
|
1095
|
+
if (attemptIndex >= maxAttempts) break
|
|
1096
|
+
if (blockedProviders.has(candidate.provider)) continue
|
|
1097
|
+
tried.push(candidate.key)
|
|
1098
|
+
const result = body.stream === true
|
|
1099
|
+
? await this.proxyStreamingRequest({ req, res, body, candidate, requestId, attemptIndex })
|
|
1100
|
+
: await this.proxyJsonRequest({ req, res, body, candidate, requestId, attemptIndex })
|
|
1101
|
+
if (result.done) return
|
|
1102
|
+
attemptIndex += 1
|
|
1103
|
+
if (result.authFailure) blockedProviders.add(candidate.provider)
|
|
1104
|
+
if (result.failoverToNext && attemptIndex < maxAttempts) {
|
|
1105
|
+
const next = candidates.find((entry) => !tried.includes(entry.key) && !blockedProviders.has(entry.provider))
|
|
1106
|
+
this.logger.warn(`Failover ${candidate.key}${next ? ` -> ${next.key}` : ''}`, { request_id: requestId, reason: result.reason })
|
|
1107
|
+
void sendUsageTelemetry(this.config, {}, {
|
|
1108
|
+
event: 'app_router_failover',
|
|
1109
|
+
mode: 'daemon',
|
|
1110
|
+
properties: {
|
|
1111
|
+
from_model: candidate.key,
|
|
1112
|
+
to_model: next?.key || null,
|
|
1113
|
+
reason: result.reason,
|
|
1114
|
+
attempt_number: attemptIndex,
|
|
1115
|
+
},
|
|
1116
|
+
})
|
|
1117
|
+
continue
|
|
1118
|
+
}
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
const quotaExhausted = [...this.quotaExhausted].filter((key) => tried.includes(key))
|
|
1122
|
+
const allAuthError = tried.every((key) => {
|
|
1123
|
+
const [provider] = key.split('/')
|
|
1124
|
+
return blockedProviders.has(provider)
|
|
1125
|
+
})
|
|
1126
|
+
const allQuotaError = tried.length > 0 && quotaExhausted.length === tried.length
|
|
1127
|
+
const allAuthOrQuota = tried.every((key) => {
|
|
1128
|
+
const [provider] = key.split('/')
|
|
1129
|
+
return blockedProviders.has(provider) || quotaExhausted.includes(key)
|
|
1130
|
+
})
|
|
1131
|
+
|
|
1132
|
+
let statusCode = 503
|
|
1133
|
+
let errorCode = 'all_models_failed'
|
|
1134
|
+
let errorType = 'service_unavailable'
|
|
1135
|
+
|
|
1136
|
+
if (tried.length > 0) {
|
|
1137
|
+
if (allAuthError) {
|
|
1138
|
+
statusCode = 401
|
|
1139
|
+
errorCode = 'invalid_api_key'
|
|
1140
|
+
errorType = 'invalid_request_error'
|
|
1141
|
+
} else if (allQuotaError || allAuthOrQuota) {
|
|
1142
|
+
statusCode = 429
|
|
1143
|
+
errorCode = 'insufficient_quota'
|
|
1144
|
+
errorType = 'insufficient_quota'
|
|
1145
|
+
}
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1148
|
+
sendError(res, statusCode, `All routed models failed for set: ${set.name}`, errorType, errorCode, requestId, {
|
|
1149
|
+
set: set.name,
|
|
1150
|
+
models_tried: tried,
|
|
1151
|
+
quota_exhausted: quotaExhausted,
|
|
1152
|
+
quota_exhausted_details: this.quotaDetailsForKeys(quotaExhausted),
|
|
1153
|
+
})
|
|
1154
|
+
} finally {
|
|
1155
|
+
this.inFlight -= 1
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
async proxyJsonRequest({ req, res, body, candidate, requestId, attemptIndex }) {
|
|
1160
|
+
const key = candidate.key
|
|
1161
|
+
const apiKey = this.getApiKeyForProvider(candidate.provider)
|
|
1162
|
+
// 📖 Guard: bail early if provider URL cannot be resolved
|
|
1163
|
+
const providerUrl = resolveProviderUrl(candidate.provider)
|
|
1164
|
+
if (!providerUrl) {
|
|
1165
|
+
this.markFailure(key, 'provider URL unresolvable')
|
|
1166
|
+
this.addRequestLog({ request_id: requestId, model: key, status: 'ERR', latency_ms: null, tokens: 0, failover: attemptIndex > 0, error: 'provider_url_unresolvable' })
|
|
1167
|
+
return { done: false, failoverToNext: true, reason: 'provider_url_unresolvable' }
|
|
1168
|
+
}
|
|
1169
|
+
const controller = new AbortController()
|
|
1170
|
+
const timeout = setTimeout(() => controller.abort(), this.routerConfig().failover.requestTimeoutMs)
|
|
1171
|
+
const started = performance.now()
|
|
1172
|
+
const upstreamBody = {
|
|
1173
|
+
...body,
|
|
1174
|
+
model: getApiModelId(candidate.provider, candidate.model),
|
|
1175
|
+
stream: false,
|
|
1176
|
+
}
|
|
1177
|
+
// 📖 Some providers/models fail if we send custom internal params, so strip them
|
|
1178
|
+
if (upstreamBody.add_generation_prompt !== undefined) delete upstreamBody.add_generation_prompt
|
|
1179
|
+
if (upstreamBody.continue_final_message !== undefined) delete upstreamBody.continue_final_message
|
|
1180
|
+
if (upstreamBody.tools?.length === 0) delete upstreamBody.tools
|
|
1181
|
+
|
|
1182
|
+
const clientAbort = attachClientAbort(req, res, controller)
|
|
1183
|
+
try {
|
|
1184
|
+
const response = await fetch(providerUrl, {
|
|
1185
|
+
method: 'POST',
|
|
1186
|
+
headers: {
|
|
1187
|
+
...cloneHeadersForUpstream(req.headers, apiKey, candidate.provider),
|
|
1188
|
+
'X-Request-Id': requestId,
|
|
1189
|
+
},
|
|
1190
|
+
body: JSON.stringify(upstreamBody),
|
|
1191
|
+
signal: controller.signal,
|
|
1192
|
+
})
|
|
1193
|
+
clearTimeout(timeout)
|
|
1194
|
+
const latencyMs = Math.round(performance.now() - started)
|
|
1195
|
+
const text = await response.text()
|
|
1196
|
+
const upstreamMeta = buildUpstreamMeta(response, text)
|
|
1197
|
+
|
|
1198
|
+
if (isLikelyHtmlResponse(response.headers, text)) {
|
|
1199
|
+
this.markFailure(key, 'upstream_html_maintenance', 503, upstreamMeta)
|
|
1200
|
+
this.recordRouterError('upstream_html_maintenance', requestId, { model: key, status: response.status })
|
|
1201
|
+
this.addRequestLog({ request_id: requestId, model: key, status: 503, latency_ms: latencyMs, tokens: 0, failover: attemptIndex > 0, error: 'upstream_html_maintenance' })
|
|
1202
|
+
return { done: false, failoverToNext: true, reason: 'upstream_html_maintenance' }
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
if (response.ok) {
|
|
1206
|
+
const parsed = parseJsonResult(text)
|
|
1207
|
+
if (!parsed.ok || !parsed.value || typeof parsed.value !== 'object') {
|
|
1208
|
+
this.markFailure(key, 'upstream_invalid_json', 502, upstreamMeta)
|
|
1209
|
+
this.recordRouterError('upstream_invalid_json', requestId, { model: key, status: response.status })
|
|
1210
|
+
this.addRequestLog({ request_id: requestId, model: key, status: 502, latency_ms: latencyMs, tokens: 0, failover: attemptIndex > 0, error: 'upstream_invalid_json' })
|
|
1211
|
+
return { done: false, failoverToNext: true, reason: 'upstream_invalid_json' }
|
|
1212
|
+
}
|
|
1213
|
+
this.markSuccess(key, latencyMs)
|
|
1214
|
+
const usage = extractUsage(parsed.value)
|
|
1215
|
+
this.tokenTracker.record(candidate.provider, candidate.model, usage)
|
|
1216
|
+
this.totalRequestsRouted += 1
|
|
1217
|
+
// 📖 Fire app_router_use telemetry once per 10 routed requests
|
|
1218
|
+
if (this.totalRequestsRouted % 10 === 0) {
|
|
1219
|
+
void sendUsageTelemetry(this.config, {}, {
|
|
1220
|
+
event: 'app_router_use',
|
|
1221
|
+
mode: 'daemon',
|
|
1222
|
+
properties: {
|
|
1223
|
+
total_requests: this.totalRequestsRouted,
|
|
1224
|
+
active_set: this.routerConfig().activeSet,
|
|
1225
|
+
},
|
|
1226
|
+
})
|
|
1227
|
+
}
|
|
1228
|
+
this.addRequestLog({
|
|
1229
|
+
request_id: requestId,
|
|
1230
|
+
model: key,
|
|
1231
|
+
status: response.status,
|
|
1232
|
+
latency_ms: latencyMs,
|
|
1233
|
+
tokens: usage?.total_tokens || 0,
|
|
1234
|
+
failover: attemptIndex > 0,
|
|
1235
|
+
})
|
|
1236
|
+
this.logger.info(`Routed to ${key} — ${latencyMs}ms`, { request_id: requestId, status: response.status })
|
|
1237
|
+
if (!res.writableEnded) {
|
|
1238
|
+
res.writeHead(response.status, {
|
|
1239
|
+
...headerEntries(response.headers),
|
|
1240
|
+
'x-fcm-router-model': key,
|
|
1241
|
+
'x-request-id': requestId,
|
|
1242
|
+
})
|
|
1243
|
+
res.end(text)
|
|
1244
|
+
}
|
|
1245
|
+
return { done: true }
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
if (AUTH_STATUS_CODES.has(response.status)) {
|
|
1249
|
+
this.markAuthError(key, `HTTP ${response.status}`)
|
|
1250
|
+
this.addRequestLog({ request_id: requestId, model: key, status: response.status, latency_ms: latencyMs, tokens: 0, failover: attemptIndex > 0, error: 'auth_error' })
|
|
1251
|
+
return { done: false, failoverToNext: true, reason: `auth_${response.status}`, authFailure: true }
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
if (RETRYABLE_STATUS_CODES.has(response.status)) {
|
|
1255
|
+
this.markFailure(key, `HTTP ${response.status}`, response.status, upstreamMeta)
|
|
1256
|
+
this.addRequestLog({ request_id: requestId, model: key, status: response.status, latency_ms: latencyMs, tokens: 0, failover: attemptIndex > 0, error: `http_${response.status}` })
|
|
1257
|
+
return { done: false, failoverToNext: true, reason: `http_${response.status}` }
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
// 📖 Provide failover fallback for non-retryable errors from the provider (like 400 Bad Request)
|
|
1261
|
+
// when they are caused by format idiosyncrasies (e.g. empty tools array that another model might accept)
|
|
1262
|
+
if (response.status >= 400 && response.status < 500) {
|
|
1263
|
+
this.recordRouterError(`http_${response.status}`, requestId, { model: key, status: response.status, body: text })
|
|
1264
|
+
this.markFailure(key, `HTTP ${response.status}`)
|
|
1265
|
+
this.addRequestLog({ request_id: requestId, model: key, status: response.status, latency_ms: latencyMs, tokens: 0, failover: attemptIndex > 0, error: `http_${response.status}` })
|
|
1266
|
+
return { done: false, failoverToNext: true, reason: `http_${response.status}` }
|
|
1267
|
+
}
|
|
1268
|
+
|
|
1269
|
+
if (!res.writableEnded) {
|
|
1270
|
+
res.writeHead(response.status, {
|
|
1271
|
+
...headerEntries(response.headers),
|
|
1272
|
+
'x-fcm-router-model': key,
|
|
1273
|
+
'x-request-id': requestId,
|
|
1274
|
+
})
|
|
1275
|
+
res.end(text)
|
|
1276
|
+
}
|
|
1277
|
+
return { done: true }
|
|
1278
|
+
} catch (error) {
|
|
1279
|
+
if (clientAbort.aborted) {
|
|
1280
|
+
this.logger.info(`Client disconnected before upstream response from ${key}`, { request_id: requestId })
|
|
1281
|
+
return { done: true }
|
|
1282
|
+
}
|
|
1283
|
+
const reason = error.name === 'AbortError' ? 'timeout' : (error.message || String(error))
|
|
1284
|
+
this.markFailure(key, reason)
|
|
1285
|
+
this.recordRouterError('upstream_transport_error', requestId, { model: key, reason })
|
|
1286
|
+
this.addRequestLog({ request_id: requestId, model: key, status: 'ERR', latency_ms: null, tokens: 0, failover: attemptIndex > 0, error: reason })
|
|
1287
|
+
return { done: false, failoverToNext: true, reason }
|
|
1288
|
+
} finally {
|
|
1289
|
+
clearTimeout(timeout)
|
|
1290
|
+
clientAbort.dispose()
|
|
1291
|
+
}
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
async proxyStreamingRequest({ req, res, body, candidate, requestId, attemptIndex }) {
|
|
1295
|
+
const key = candidate.key
|
|
1296
|
+
const apiKey = this.getApiKeyForProvider(candidate.provider)
|
|
1297
|
+
// 📖 Guard: bail early if provider URL cannot be resolved
|
|
1298
|
+
const providerUrl = resolveProviderUrl(candidate.provider)
|
|
1299
|
+
if (!providerUrl) {
|
|
1300
|
+
this.markFailure(key, 'provider URL unresolvable')
|
|
1301
|
+
this.addRequestLog({ request_id: requestId, model: key, status: 'ERR', latency_ms: null, tokens: 0, failover: attemptIndex > 0, error: 'provider_url_unresolvable', stream: true })
|
|
1302
|
+
return { done: false, failoverToNext: true, reason: 'provider_url_unresolvable' }
|
|
1303
|
+
}
|
|
1304
|
+
const controller = new AbortController()
|
|
1305
|
+
const started = performance.now()
|
|
1306
|
+
const upstreamBody = {
|
|
1307
|
+
...body,
|
|
1308
|
+
model: getApiModelId(candidate.provider, candidate.model),
|
|
1309
|
+
stream: true,
|
|
1310
|
+
}
|
|
1311
|
+
// 📖 Some providers/models fail if we send custom internal params, so strip them
|
|
1312
|
+
if (upstreamBody.add_generation_prompt !== undefined) delete upstreamBody.add_generation_prompt
|
|
1313
|
+
if (upstreamBody.continue_final_message !== undefined) delete upstreamBody.continue_final_message
|
|
1314
|
+
if (upstreamBody.tools?.length === 0) delete upstreamBody.tools
|
|
1315
|
+
|
|
1316
|
+
const timeout = setTimeout(() => controller.abort(), this.routerConfig().failover.requestTimeoutMs)
|
|
1317
|
+
let sentToClient = false
|
|
1318
|
+
const clientAbort = attachClientAbort(req, res, controller)
|
|
1319
|
+
try {
|
|
1320
|
+
const response = await fetch(providerUrl, {
|
|
1321
|
+
method: 'POST',
|
|
1322
|
+
headers: {
|
|
1323
|
+
...cloneHeadersForUpstream(req.headers, apiKey, candidate.provider),
|
|
1324
|
+
'X-Request-Id': requestId,
|
|
1325
|
+
},
|
|
1326
|
+
body: JSON.stringify(upstreamBody),
|
|
1327
|
+
signal: controller.signal,
|
|
1328
|
+
})
|
|
1329
|
+
clearTimeout(timeout)
|
|
1330
|
+
const latencyMs = Math.round(performance.now() - started)
|
|
1331
|
+
const upstreamMeta = buildUpstreamMeta(response)
|
|
1332
|
+
if (isLikelyHtmlResponse(response.headers)) {
|
|
1333
|
+
this.markFailure(key, 'upstream_html_maintenance', 503, upstreamMeta)
|
|
1334
|
+
this.recordRouterError('upstream_html_maintenance', requestId, { model: key, status: response.status, stream: true })
|
|
1335
|
+
this.addRequestLog({ request_id: requestId, model: key, status: 503, latency_ms: latencyMs, tokens: 0, failover: attemptIndex > 0, error: 'upstream_html_maintenance', stream: true })
|
|
1336
|
+
return { done: false, failoverToNext: true, reason: 'upstream_html_maintenance' }
|
|
1337
|
+
}
|
|
1338
|
+
if (!response.ok) {
|
|
1339
|
+
if (AUTH_STATUS_CODES.has(response.status)) {
|
|
1340
|
+
this.markAuthError(key, `HTTP ${response.status}`)
|
|
1341
|
+
this.addRequestLog({ request_id: requestId, model: key, status: response.status, latency_ms: latencyMs, tokens: 0, failover: attemptIndex > 0, error: 'auth_error', stream: true })
|
|
1342
|
+
return { done: false, failoverToNext: true, reason: `auth_${response.status}`, authFailure: true }
|
|
1343
|
+
}
|
|
1344
|
+
if (RETRYABLE_STATUS_CODES.has(response.status)) {
|
|
1345
|
+
this.markFailure(key, `HTTP ${response.status}`, response.status, upstreamMeta)
|
|
1346
|
+
this.addRequestLog({ request_id: requestId, model: key, status: response.status, latency_ms: latencyMs, tokens: 0, failover: attemptIndex > 0, error: `http_${response.status}`, stream: true })
|
|
1347
|
+
return { done: false, failoverToNext: true, reason: `http_${response.status}` }
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
// 📖 Provide failover fallback for non-retryable errors from the provider (like 400 Bad Request)
|
|
1351
|
+
// when they are caused by format idiosyncrasies (e.g. empty tools array that another model might accept)
|
|
1352
|
+
if (response.status >= 400 && response.status < 500) {
|
|
1353
|
+
const rawErr = await response.text()
|
|
1354
|
+
this.recordRouterError(`http_${response.status}`, requestId, { model: key, status: response.status, body: rawErr, stream: true })
|
|
1355
|
+
this.markFailure(key, `HTTP ${response.status}`)
|
|
1356
|
+
this.addRequestLog({ request_id: requestId, model: key, status: response.status, latency_ms: latencyMs, tokens: 0, failover: attemptIndex > 0, error: `http_${response.status}`, stream: true })
|
|
1357
|
+
return { done: false, failoverToNext: true, reason: `http_${response.status}` }
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
if (!res.writableEnded) {
|
|
1361
|
+
res.writeHead(response.status, {
|
|
1362
|
+
...headerEntries(response.headers),
|
|
1363
|
+
'x-fcm-router-model': key,
|
|
1364
|
+
'x-request-id': requestId,
|
|
1365
|
+
})
|
|
1366
|
+
try { res.end(await response.text()) } catch {}
|
|
1367
|
+
}
|
|
1368
|
+
return { done: true }
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1371
|
+
const reader = response.body?.getReader()
|
|
1372
|
+
if (!reader) {
|
|
1373
|
+
this.markFailure(key, 'empty stream')
|
|
1374
|
+
return { done: false, failoverToNext: true, reason: 'empty_stream' }
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
const firstChunk = await this.readStreamChunkWithTimeout(reader)
|
|
1378
|
+
if (firstChunk.done || !firstChunk.value) {
|
|
1379
|
+
this.markFailure(key, 'stream ended before first chunk')
|
|
1380
|
+
return { done: false, failoverToNext: true, reason: 'empty_stream' }
|
|
1381
|
+
}
|
|
1382
|
+
// 📖 Guard: ensure value is a valid buffer source before conversion
|
|
1383
|
+
const firstChunkBuffer = Buffer.isBuffer(firstChunk.value) ? firstChunk.value : Buffer.from(firstChunk.value)
|
|
1384
|
+
if (isLikelyHtmlText(firstChunkBuffer.toString('utf8'))) {
|
|
1385
|
+
this.markFailure(key, 'upstream_html_maintenance', 503, upstreamMeta)
|
|
1386
|
+
this.recordRouterError('upstream_html_maintenance', requestId, { model: key, status: response.status, stream: true })
|
|
1387
|
+
return { done: false, failoverToNext: true, reason: 'upstream_html_maintenance' }
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
if (res.writableEnded) return { done: true }
|
|
1391
|
+
res.writeHead(response.status, {
|
|
1392
|
+
...headerEntries(response.headers),
|
|
1393
|
+
'x-fcm-router-model': key,
|
|
1394
|
+
'x-request-id': requestId,
|
|
1395
|
+
})
|
|
1396
|
+
sentToClient = true
|
|
1397
|
+
res.write(firstChunkBuffer)
|
|
1398
|
+
|
|
1399
|
+
while (!res.writableEnded) {
|
|
1400
|
+
const chunk = await this.readStreamChunkWithTimeout(reader)
|
|
1401
|
+
if (chunk.done || !chunk.value) break
|
|
1402
|
+
// 📖 Guard: ensure chunk value is safe for Buffer conversion
|
|
1403
|
+
const buf = Buffer.isBuffer(chunk.value) ? chunk.value : Buffer.from(chunk.value)
|
|
1404
|
+
res.write(buf)
|
|
1405
|
+
}
|
|
1406
|
+
|
|
1407
|
+
this.markSuccess(key, latencyMs)
|
|
1408
|
+
this.totalRequestsRouted += 1
|
|
1409
|
+
this.addRequestLog({
|
|
1410
|
+
request_id: requestId,
|
|
1411
|
+
model: key,
|
|
1412
|
+
status: response.status,
|
|
1413
|
+
latency_ms: latencyMs,
|
|
1414
|
+
tokens: 0,
|
|
1415
|
+
failover: attemptIndex > 0,
|
|
1416
|
+
stream: true,
|
|
1417
|
+
})
|
|
1418
|
+
if (!res.writableEnded) res.end()
|
|
1419
|
+
return { done: true }
|
|
1420
|
+
} catch (error) {
|
|
1421
|
+
try { controller.abort() } catch {}
|
|
1422
|
+
if (clientAbort.aborted) {
|
|
1423
|
+
this.logger.info(`Client disconnected during streaming response from ${key}`, { request_id: requestId })
|
|
1424
|
+
return { done: true }
|
|
1425
|
+
}
|
|
1426
|
+
const reason = error.name === 'AbortError' ? 'timeout' : (error.message || String(error))
|
|
1427
|
+
this.markFailure(key, reason)
|
|
1428
|
+
if (reason !== 'timeout') {
|
|
1429
|
+
this.recordRouterError('upstream_stream_error', requestId, { model: key, reason, partial: sentToClient })
|
|
1430
|
+
} else {
|
|
1431
|
+
this.recordRouterError('timeout', requestId, { model: key, reason, partial: sentToClient })
|
|
1432
|
+
}
|
|
1433
|
+
this.addRequestLog({ request_id: requestId, model: key, status: 'ERR', latency_ms: null, tokens: 0, failover: attemptIndex > 0, error: reason, stream: true })
|
|
1434
|
+
if (sentToClient) {
|
|
1435
|
+
this.logger.warn(`Streaming failure after partial response from ${key}`, { request_id: requestId, reason })
|
|
1436
|
+
try { if (!res.writableEnded) res.end() } catch {}
|
|
1437
|
+
return { done: true }
|
|
1438
|
+
}
|
|
1439
|
+
return { done: false, failoverToNext: true, reason }
|
|
1440
|
+
} finally {
|
|
1441
|
+
clearTimeout(timeout)
|
|
1442
|
+
clientAbort.dispose()
|
|
1443
|
+
}
|
|
1444
|
+
}
|
|
1445
|
+
|
|
1446
|
+
readStreamChunkWithTimeout(reader) {
|
|
1447
|
+
const timeoutMs = this.routerConfig().failover.streamStallTimeoutMs
|
|
1448
|
+
let timeout = null
|
|
1449
|
+
return Promise.race([
|
|
1450
|
+
reader.read().finally(() => {
|
|
1451
|
+
if (timeout) clearTimeout(timeout)
|
|
1452
|
+
}),
|
|
1453
|
+
new Promise((_, reject) => {
|
|
1454
|
+
timeout = setTimeout(() => reject(new Error('stream_stall_timeout')), timeoutMs)
|
|
1455
|
+
}),
|
|
1456
|
+
])
|
|
1457
|
+
}
|
|
1458
|
+
|
|
1459
|
+
async handleSetsRequest(req, res, url, requestId) {
|
|
1460
|
+
const router = this.routerConfig()
|
|
1461
|
+
const setNameMatch = url.pathname.match(/^\/sets\/([^/]+)$/)
|
|
1462
|
+
const activateMatch = url.pathname.match(/^\/sets\/([^/]+)\/activate$/)
|
|
1463
|
+
|
|
1464
|
+
if (req.method === 'GET' && url.pathname === '/sets') {
|
|
1465
|
+
sendJson(res, 200, { activeSet: router.activeSet, sets: router.sets })
|
|
1466
|
+
return
|
|
1467
|
+
}
|
|
1468
|
+
|
|
1469
|
+
if (req.method === 'POST' && url.pathname === '/sets') {
|
|
1470
|
+
const body = await readJsonBody(req)
|
|
1471
|
+
const name = typeof body.name === 'string' ? body.name.trim() : ''
|
|
1472
|
+
if (!name) {
|
|
1473
|
+
sendError(res, 400, 'Set name is required', 'invalid_request_error', 'missing_set_name', requestId)
|
|
1474
|
+
return
|
|
1475
|
+
}
|
|
1476
|
+
const normalized = normalizeRouterConfig({
|
|
1477
|
+
...router,
|
|
1478
|
+
sets: {
|
|
1479
|
+
...router.sets,
|
|
1480
|
+
[name]: {
|
|
1481
|
+
name,
|
|
1482
|
+
models: Array.isArray(body.models) ? body.models : [],
|
|
1483
|
+
created: nowIso(),
|
|
1484
|
+
},
|
|
1485
|
+
},
|
|
1486
|
+
})
|
|
1487
|
+
this.setRouterConfig(normalized)
|
|
1488
|
+
this.saveRouterConfig()
|
|
1489
|
+
this.broadcast('set_change', { old_set: router.activeSet, new_set: normalized.activeSet })
|
|
1490
|
+
sendJson(res, 201, { set: normalized.sets[normalized.activeSet] || normalized.sets[name], router: normalized })
|
|
1491
|
+
return
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1494
|
+
if (activateMatch && req.method === 'POST') {
|
|
1495
|
+
const name = decodeURIComponent(activateMatch[1])
|
|
1496
|
+
if (!router.sets[name]) {
|
|
1497
|
+
sendError(res, 404, `Router set not found: ${name}`, 'invalid_request_error', 'set_not_found', requestId)
|
|
1498
|
+
return
|
|
1499
|
+
}
|
|
1500
|
+
this.setRouterConfig({ ...router, activeSet: name })
|
|
1501
|
+
this.saveRouterConfig()
|
|
1502
|
+
this.broadcast('set_change', { old_set: router.activeSet, new_set: name })
|
|
1503
|
+
void this.runProbeBurst()
|
|
1504
|
+
sendJson(res, 200, { activeSet: name })
|
|
1505
|
+
return
|
|
1506
|
+
}
|
|
1507
|
+
|
|
1508
|
+
if (setNameMatch && req.method === 'PUT') {
|
|
1509
|
+
const name = decodeURIComponent(setNameMatch[1])
|
|
1510
|
+
if (!router.sets[name]) {
|
|
1511
|
+
sendError(res, 404, `Router set not found: ${name}`, 'invalid_request_error', 'set_not_found', requestId)
|
|
1512
|
+
return
|
|
1513
|
+
}
|
|
1514
|
+
const body = await readJsonBody(req)
|
|
1515
|
+
const nextName = typeof body.name === 'string' && body.name.trim() ? body.name.trim() : name
|
|
1516
|
+
const nextSets = { ...router.sets }
|
|
1517
|
+
delete nextSets[name]
|
|
1518
|
+
nextSets[nextName] = {
|
|
1519
|
+
...router.sets[name],
|
|
1520
|
+
...body,
|
|
1521
|
+
name: nextName,
|
|
1522
|
+
models: Array.isArray(body.models) ? body.models : router.sets[name].models,
|
|
1523
|
+
}
|
|
1524
|
+
const nextActiveSet = router.activeSet === name ? nextName : router.activeSet
|
|
1525
|
+
const normalized = normalizeRouterConfig({ ...router, activeSet: nextActiveSet, sets: nextSets })
|
|
1526
|
+
this.setRouterConfig(normalized)
|
|
1527
|
+
this.saveRouterConfig()
|
|
1528
|
+
sendJson(res, 200, { set: normalized.sets[nextName], router: normalized })
|
|
1529
|
+
return
|
|
1530
|
+
}
|
|
1531
|
+
|
|
1532
|
+
if (setNameMatch && req.method === 'DELETE') {
|
|
1533
|
+
const name = decodeURIComponent(setNameMatch[1])
|
|
1534
|
+
if (!router.sets[name]) {
|
|
1535
|
+
sendError(res, 404, `Router set not found: ${name}`, 'invalid_request_error', 'set_not_found', requestId)
|
|
1536
|
+
return
|
|
1537
|
+
}
|
|
1538
|
+
const nextSets = { ...router.sets }
|
|
1539
|
+
delete nextSets[name]
|
|
1540
|
+
const nextActiveSet = router.activeSet === name ? (Object.keys(nextSets)[0] || DEFAULT_ROUTER_SETTINGS.activeSet) : router.activeSet
|
|
1541
|
+
this.setRouterConfig({ ...router, activeSet: nextActiveSet, sets: nextSets })
|
|
1542
|
+
this.saveRouterConfig()
|
|
1543
|
+
sendJson(res, 200, { deleted: name, activeSet: this.routerConfig().activeSet })
|
|
1544
|
+
return
|
|
1545
|
+
}
|
|
1546
|
+
|
|
1547
|
+
sendError(res, 404, 'Not found', 'invalid_request_error', 'not_found', requestId)
|
|
1548
|
+
}
|
|
1549
|
+
|
|
1550
|
+
async handleProbeModeRequest(req, res, requestId) {
|
|
1551
|
+
const body = await readJsonBody(req)
|
|
1552
|
+
const nextProbeMode = typeof body.probeMode === 'string'
|
|
1553
|
+
? body.probeMode.trim().toLowerCase()
|
|
1554
|
+
: typeof body.mode === 'string'
|
|
1555
|
+
? body.mode.trim().toLowerCase()
|
|
1556
|
+
: ''
|
|
1557
|
+
if (!['eco', 'balanced', 'aggressive'].includes(nextProbeMode)) {
|
|
1558
|
+
sendError(res, 400, 'probeMode must be one of: eco, balanced, aggressive', 'invalid_request_error', 'invalid_probe_mode', requestId)
|
|
1559
|
+
return
|
|
1560
|
+
}
|
|
1561
|
+
|
|
1562
|
+
const router = this.routerConfig()
|
|
1563
|
+
const previousProbeMode = router.probeMode
|
|
1564
|
+
this.setRouterConfig({ ...router, probeMode: nextProbeMode })
|
|
1565
|
+
this.saveRouterConfig()
|
|
1566
|
+
this.scheduleProbeLoop()
|
|
1567
|
+
this.broadcast('config', {
|
|
1568
|
+
activeSet: this.routerConfig().activeSet,
|
|
1569
|
+
old_probe_mode: previousProbeMode,
|
|
1570
|
+
probe_mode: nextProbeMode,
|
|
1571
|
+
})
|
|
1572
|
+
void this.runProbeBurst()
|
|
1573
|
+
sendJson(res, 200, {
|
|
1574
|
+
ok: true,
|
|
1575
|
+
previousProbeMode,
|
|
1576
|
+
probeMode: nextProbeMode,
|
|
1577
|
+
}, { 'x-request-id': requestId })
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1580
|
+
async handleHttp(req, res) {
|
|
1581
|
+
const requestId = req.headers['x-request-id'] || `req-${randomUUID()}`
|
|
1582
|
+
const url = new URL(req.url, `http://localhost:${this.port}`)
|
|
1583
|
+
try {
|
|
1584
|
+
if (req.method === 'GET' && url.pathname === '/health') {
|
|
1585
|
+
sendJson(res, 200, this.statusPayload(), { 'x-request-id': requestId })
|
|
1586
|
+
return
|
|
1587
|
+
}
|
|
1588
|
+
if (req.method === 'GET' && url.pathname === '/stats') {
|
|
1589
|
+
sendJson(res, 200, this.statsPayload(), { 'x-request-id': requestId })
|
|
1590
|
+
return
|
|
1591
|
+
}
|
|
1592
|
+
if (req.method === 'GET' && url.pathname === '/stats/tokens') {
|
|
1593
|
+
sendJson(res, 200, this.tokenTracker.summary(), { 'x-request-id': requestId })
|
|
1594
|
+
return
|
|
1595
|
+
}
|
|
1596
|
+
if (req.method === 'GET' && url.pathname.startsWith('/stats/tokens/daily/')) {
|
|
1597
|
+
const date = decodeURIComponent(url.pathname.replace('/stats/tokens/daily/', ''))
|
|
1598
|
+
sendJson(res, 200, { date, usage: this.tokenTracker.stats.daily[date] || null }, { 'x-request-id': requestId })
|
|
1599
|
+
return
|
|
1600
|
+
}
|
|
1601
|
+
if (req.method === 'GET' && url.pathname === '/v1/models') {
|
|
1602
|
+
const router = this.routerConfig()
|
|
1603
|
+
sendJson(res, 200, {
|
|
1604
|
+
object: 'list',
|
|
1605
|
+
data: [
|
|
1606
|
+
{ id: 'fcm', object: 'model', owned_by: 'fcm-router' },
|
|
1607
|
+
...Object.keys(router.sets || {}).map((name) => ({ id: `fcm:${name}`, object: 'model', owned_by: 'fcm-router' })),
|
|
1608
|
+
],
|
|
1609
|
+
}, { 'x-request-id': requestId })
|
|
1610
|
+
return
|
|
1611
|
+
}
|
|
1612
|
+
if (req.method === 'GET' && url.pathname === '/stream/events') {
|
|
1613
|
+
if (this.sseClients.size >= MAX_SSE_CLIENTS) {
|
|
1614
|
+
sendError(res, 503, 'Too many dashboard clients', 'service_unavailable', 'too_many_sse_clients', requestId)
|
|
1615
|
+
return
|
|
1616
|
+
}
|
|
1617
|
+
res.writeHead(200, {
|
|
1618
|
+
'Content-Type': 'text/event-stream',
|
|
1619
|
+
'Cache-Control': 'no-cache',
|
|
1620
|
+
Connection: 'keep-alive',
|
|
1621
|
+
'x-request-id': requestId,
|
|
1622
|
+
})
|
|
1623
|
+
res.write(`event: hello\ndata: ${JSON.stringify(this.statusPayload())}\n\n`)
|
|
1624
|
+
this.sseClients.add(res)
|
|
1625
|
+
req.on('close', () => this.sseClients.delete(res))
|
|
1626
|
+
return
|
|
1627
|
+
}
|
|
1628
|
+
if (url.pathname === '/daemon/shutdown' && req.method === 'POST') {
|
|
1629
|
+
sendJson(res, 200, { ok: true, message: 'Daemon shutting down' }, { 'x-request-id': requestId })
|
|
1630
|
+
setTimeout(() => this.shutdown(0), 50)
|
|
1631
|
+
return
|
|
1632
|
+
}
|
|
1633
|
+
if (url.pathname === '/daemon/probe-mode' && req.method === 'POST') {
|
|
1634
|
+
await this.handleProbeModeRequest(req, res, requestId)
|
|
1635
|
+
return
|
|
1636
|
+
}
|
|
1637
|
+
if (url.pathname === '/sets' || url.pathname.startsWith('/sets/')) {
|
|
1638
|
+
await this.handleSetsRequest(req, res, url, requestId)
|
|
1639
|
+
return
|
|
1640
|
+
}
|
|
1641
|
+
if (url.pathname === '/v1/chat/completions' || url.pathname.match(/^\/v1\/sets\/[^/]+\/chat\/completions$/)) {
|
|
1642
|
+
if (req.method !== 'POST') {
|
|
1643
|
+
sendError(res, 405, 'Method not allowed', 'invalid_request_error', 'method_not_allowed', requestId, { allowed: ['POST'] })
|
|
1644
|
+
return
|
|
1645
|
+
}
|
|
1646
|
+
const setMatch = url.pathname.match(/^\/v1\/sets\/([^/]+)\/chat\/completions$/)
|
|
1647
|
+
const body = await readJsonBody(req)
|
|
1648
|
+
await this.routeRequest({ req, res, body, setName: setMatch ? decodeURIComponent(setMatch[1]) : null, requestId })
|
|
1649
|
+
return
|
|
1650
|
+
}
|
|
1651
|
+
sendError(res, 404, 'Not found', 'invalid_request_error', 'not_found', requestId)
|
|
1652
|
+
} catch (error) {
|
|
1653
|
+
if (error.code === 'BODY_TOO_LARGE') {
|
|
1654
|
+
sendError(res, 413, 'Request body too large', 'invalid_request_error', 'request_body_too_large', requestId, { max_bytes: MAX_BODY_BYTES })
|
|
1655
|
+
return
|
|
1656
|
+
}
|
|
1657
|
+
if (error.code === 'INVALID_JSON') {
|
|
1658
|
+
sendError(res, 400, 'Invalid JSON', 'invalid_request_error', 'invalid_json', requestId, { detail: error.message })
|
|
1659
|
+
return
|
|
1660
|
+
}
|
|
1661
|
+
this.logger.error('Internal router error', { request_id: requestId, error: error?.stack || error?.message || String(error) })
|
|
1662
|
+
this.recordRouterError('internal_router_error', requestId, { message: error?.message || String(error) })
|
|
1663
|
+
if (!res.writableEnded) {
|
|
1664
|
+
sendError(res, 500, 'Internal router error', 'server_error', 'internal_router_error', requestId)
|
|
1665
|
+
}
|
|
1666
|
+
}
|
|
1667
|
+
}
|
|
1668
|
+
|
|
1669
|
+
installProcessSafety() {
|
|
1670
|
+
process.on('uncaughtException', (error) => {
|
|
1671
|
+
this.crashRecovered += 1
|
|
1672
|
+
this.uncaughtTimestamps.push(Date.now())
|
|
1673
|
+
this.uncaughtTimestamps = this.uncaughtTimestamps.filter((ts) => Date.now() - ts < 5 * 60 * 1000)
|
|
1674
|
+
this.logger.error('Recovered uncaught exception', { error: error.stack || error.message })
|
|
1675
|
+
if (this.uncaughtTimestamps.length >= 10) {
|
|
1676
|
+
this.logger.error('Too many uncaught exceptions; shutting down for external restart')
|
|
1677
|
+
void sendUsageTelemetry(this.config, {}, {
|
|
1678
|
+
event: 'app_router_self_restart',
|
|
1679
|
+
mode: 'daemon',
|
|
1680
|
+
properties: {
|
|
1681
|
+
uncaught_count: this.uncaughtTimestamps.length,
|
|
1682
|
+
uptime_before_restart: Math.floor((Date.now() - this.startedAt) / 1000),
|
|
1683
|
+
strategy: 'exit_for_service_restart',
|
|
1684
|
+
},
|
|
1685
|
+
})
|
|
1686
|
+
void this.shutdown(1)
|
|
1687
|
+
}
|
|
1688
|
+
})
|
|
1689
|
+
process.on('unhandledRejection', (reason) => {
|
|
1690
|
+
this.crashRecovered += 1
|
|
1691
|
+
this.uncaughtTimestamps.push(Date.now())
|
|
1692
|
+
this.uncaughtTimestamps = this.uncaughtTimestamps.filter((ts) => Date.now() - ts < 5 * 60 * 1000)
|
|
1693
|
+
this.logger.error('Recovered unhandled rejection', { error: reason?.stack || String(reason) })
|
|
1694
|
+
if (this.uncaughtTimestamps.length >= 10) {
|
|
1695
|
+
this.logger.error('Too many uncaught exceptions/rejections; shutting down for external restart')
|
|
1696
|
+
void this.shutdown(1)
|
|
1697
|
+
}
|
|
1698
|
+
})
|
|
1699
|
+
process.on('SIGTERM', () => void this.shutdown(0))
|
|
1700
|
+
process.on('SIGINT', () => void this.shutdown(0))
|
|
1701
|
+
process.on('SIGHUP', () => this.reloadConfigFromDisk())
|
|
1702
|
+
}
|
|
1703
|
+
|
|
1704
|
+
async shutdown(exitCode = 0) {
|
|
1705
|
+
if (this.shuttingDown) return
|
|
1706
|
+
this.shuttingDown = true
|
|
1707
|
+
this.logger.info('Router daemon stopping')
|
|
1708
|
+
if (this.probeTimer) clearInterval(this.probeTimer)
|
|
1709
|
+
if (this.configReloadTimer) clearInterval(this.configReloadTimer)
|
|
1710
|
+
if (this.tokenFlushTimer) clearInterval(this.tokenFlushTimer)
|
|
1711
|
+
for (const timeout of this.probeTimeouts) clearTimeout(timeout)
|
|
1712
|
+
const started = Date.now()
|
|
1713
|
+
while (this.inFlight > 0 && Date.now() - started < 30000) {
|
|
1714
|
+
await sleep(100)
|
|
1715
|
+
}
|
|
1716
|
+
this.tokenTracker.flush({ force: true })
|
|
1717
|
+
try { this.server?.close() } catch {}
|
|
1718
|
+
try { unlinkSync(ROUTER_PID_PATH) } catch {}
|
|
1719
|
+
try { unlinkSync(ROUTER_PORT_PATH) } catch {}
|
|
1720
|
+
void sendUsageTelemetry(this.config, {}, {
|
|
1721
|
+
event: 'app_daemon_stop',
|
|
1722
|
+
mode: 'daemon',
|
|
1723
|
+
properties: {
|
|
1724
|
+
uptime_seconds: Math.floor((Date.now() - this.startedAt) / 1000),
|
|
1725
|
+
total_requests_routed: this.totalRequestsRouted,
|
|
1726
|
+
total_tokens: this.tokenTracker.stats.all_time.total_tokens,
|
|
1727
|
+
},
|
|
1728
|
+
})
|
|
1729
|
+
setTimeout(() => process.exit(exitCode), 20)
|
|
1730
|
+
}
|
|
1731
|
+
}
|
|
1732
|
+
|
|
1733
|
+
const PREFERRED_DEFAULT_MODELS = [
|
|
1734
|
+
{ provider: 'nvidia', model: 'minimaxai/minimax-m2.7' },
|
|
1735
|
+
{ provider: 'nvidia', model: 'z-ai/glm-5.1' },
|
|
1736
|
+
{ provider: 'nvidia', model: 'deepseek-ai/deepseek-v4-flash' },
|
|
1737
|
+
{ provider: 'nvidia', model: 'openai/gpt-oss-120b' },
|
|
1738
|
+
]
|
|
1739
|
+
|
|
1740
|
+
export function buildDefaultRouterSet(config = {}, maxModels = 5) {
|
|
1741
|
+
const keyedProviders = new Set(Object.entries(config.apiKeys || {})
|
|
1742
|
+
.filter(([, value]) => (Array.isArray(value) ? value.length > 0 : typeof value === 'string' && value.trim()))
|
|
1743
|
+
.map(([provider]) => provider))
|
|
1744
|
+
const entries = []
|
|
1745
|
+
for (const [providerKey, source] of Object.entries(sources)) {
|
|
1746
|
+
if (!isRouteableProvider(providerKey)) continue
|
|
1747
|
+
for (const [model, label, tier, sweScore, ctx] of source.models || []) {
|
|
1748
|
+
entries.push({
|
|
1749
|
+
provider: providerKey,
|
|
1750
|
+
model,
|
|
1751
|
+
label,
|
|
1752
|
+
tier,
|
|
1753
|
+
sweScore,
|
|
1754
|
+
ctx,
|
|
1755
|
+
hasKey: keyedProviders.has(providerKey),
|
|
1756
|
+
})
|
|
1757
|
+
}
|
|
1758
|
+
}
|
|
1759
|
+
const preferred = entries.some((entry) => entry.hasKey)
|
|
1760
|
+
? entries.filter((entry) => entry.hasKey)
|
|
1761
|
+
: entries
|
|
1762
|
+
const pinned = []
|
|
1763
|
+
const allRemaining = [...entries]
|
|
1764
|
+
for (const pref of PREFERRED_DEFAULT_MODELS) {
|
|
1765
|
+
const idx = allRemaining.findIndex((e) => e.provider === pref.provider && e.model === pref.model)
|
|
1766
|
+
if (idx >= 0) {
|
|
1767
|
+
pinned.push(allRemaining.splice(idx, 1)[0])
|
|
1768
|
+
}
|
|
1769
|
+
}
|
|
1770
|
+
const remaining = preferred.filter((e) => !pinned.some((p) => p.provider === e.provider && p.model === e.model))
|
|
1771
|
+
remaining.sort((a, b) => {
|
|
1772
|
+
const tierCmp = TIER_ORDER.indexOf(a.tier) - TIER_ORDER.indexOf(b.tier)
|
|
1773
|
+
if (tierCmp !== 0) return tierCmp
|
|
1774
|
+
const sweA = Number.parseFloat(a.sweScore) || 0
|
|
1775
|
+
const sweB = Number.parseFloat(b.sweScore) || 0
|
|
1776
|
+
return sweB - sweA
|
|
1777
|
+
})
|
|
1778
|
+
const ordered = [...pinned, ...remaining]
|
|
1779
|
+
return {
|
|
1780
|
+
name: DEFAULT_ROUTER_SETTINGS.activeSet,
|
|
1781
|
+
models: ordered.slice(0, maxModels).map((entry, index) => ({
|
|
1782
|
+
provider: entry.provider,
|
|
1783
|
+
model: entry.model,
|
|
1784
|
+
priority: index + 1,
|
|
1785
|
+
})),
|
|
1786
|
+
created: nowIso(),
|
|
1787
|
+
}
|
|
1788
|
+
}
|
|
1789
|
+
|
|
1790
|
+
export function createRouterRuntimeForTest({ config, port = 0, logger = null, tokenPath = ROUTER_TOKENS_PATH } = {}) {
|
|
1791
|
+
const testLogger = logger || {
|
|
1792
|
+
level: 'error',
|
|
1793
|
+
error() {},
|
|
1794
|
+
warn() {},
|
|
1795
|
+
info() {},
|
|
1796
|
+
debug() {},
|
|
1797
|
+
}
|
|
1798
|
+
// 📖 Tests use this factory to exercise the real HTTP router against local
|
|
1799
|
+
// 📖 fake providers without spawning a daemon or touching user token files.
|
|
1800
|
+
// 📖 Router config persistence is disabled here so set/probe-mode endpoint
|
|
1801
|
+
// 📖 tests cannot write fixture router sets into ~/.free-coding-models.json.
|
|
1802
|
+
return new RouterRuntime({
|
|
1803
|
+
config: config || {},
|
|
1804
|
+
port,
|
|
1805
|
+
logger: testLogger,
|
|
1806
|
+
tokenPath,
|
|
1807
|
+
persistConfig: false,
|
|
1808
|
+
})
|
|
1809
|
+
}
|
|
1810
|
+
|
|
1811
|
+
function ensureRouterConfigForDaemon(config) {
|
|
1812
|
+
// 📖 Always rebuild from favorites or defaults — no more manual set management
|
|
1813
|
+
const favSet = buildRouterSetFromFavorites(config)
|
|
1814
|
+
const activeSet = favSet || buildDefaultRouterSet(config)
|
|
1815
|
+
config.router = normalizeRouterConfig({
|
|
1816
|
+
...DEFAULT_ROUTER_SETTINGS,
|
|
1817
|
+
enabled: true,
|
|
1818
|
+
onboardingSeen: true,
|
|
1819
|
+
activeSet: activeSet.name,
|
|
1820
|
+
sets: { [activeSet.name]: activeSet },
|
|
1821
|
+
})
|
|
1822
|
+
saveConfig(config)
|
|
1823
|
+
return config.router
|
|
1824
|
+
}
|
|
1825
|
+
|
|
1826
|
+
/**
|
|
1827
|
+
* 📖 Build a router set from the user's favorites list.
|
|
1828
|
+
* 📖 Each favorite "providerKey/modelId" is resolved to its source model entry.
|
|
1829
|
+
* 📖 Falls back to buildDefaultRouterSet if no favorites exist.
|
|
1830
|
+
*/
|
|
1831
|
+
function buildRouterSetFromFavorites(config) {
|
|
1832
|
+
const favorites = config.favorites
|
|
1833
|
+
if (!Array.isArray(favorites) || favorites.length === 0) return null
|
|
1834
|
+
const models = []
|
|
1835
|
+
for (let i = 0; i < favorites.length; i++) {
|
|
1836
|
+
const fav = favorites[i]
|
|
1837
|
+
const slashIdx = fav.indexOf('/')
|
|
1838
|
+
if (slashIdx < 0) continue
|
|
1839
|
+
const providerKey = fav.slice(0, slashIdx)
|
|
1840
|
+
const modelId = fav.slice(slashIdx + 1)
|
|
1841
|
+
if (!isRouteableProvider(providerKey)) continue
|
|
1842
|
+
const source = sources[providerKey]
|
|
1843
|
+
if (!source) continue
|
|
1844
|
+
const found = (source.models || []).find((m) => m[0] === modelId)
|
|
1845
|
+
if (!found) {
|
|
1846
|
+
models.push({ provider: providerKey, model: modelId, priority: i + 1 })
|
|
1847
|
+
continue
|
|
1848
|
+
}
|
|
1849
|
+
models.push({ provider: providerKey, model: found[0], priority: i + 1 })
|
|
1850
|
+
}
|
|
1851
|
+
if (models.length === 0) return null
|
|
1852
|
+
return {
|
|
1853
|
+
name: DEFAULT_ROUTER_SETTINGS.activeSet,
|
|
1854
|
+
models,
|
|
1855
|
+
created: nowIso(),
|
|
1856
|
+
}
|
|
1857
|
+
}
|
|
1858
|
+
|
|
1859
|
+
function listenOnPort(server, port) {
|
|
1860
|
+
return new Promise((resolve, reject) => {
|
|
1861
|
+
const onError = (error) => {
|
|
1862
|
+
server.off('listening', onListening)
|
|
1863
|
+
reject(error)
|
|
1864
|
+
}
|
|
1865
|
+
const onListening = () => {
|
|
1866
|
+
server.off('error', onError)
|
|
1867
|
+
resolve(port)
|
|
1868
|
+
}
|
|
1869
|
+
server.once('error', onError)
|
|
1870
|
+
server.once('listening', onListening)
|
|
1871
|
+
server.listen(port, '127.0.0.1')
|
|
1872
|
+
})
|
|
1873
|
+
}
|
|
1874
|
+
|
|
1875
|
+
async function listenWithFallback(server, preferredPort, logger) {
|
|
1876
|
+
const { defaultPort, maxPort } = getRouterPortRange()
|
|
1877
|
+
const start = Math.max(1, preferredPort || defaultPort)
|
|
1878
|
+
const candidates = []
|
|
1879
|
+
for (let port = start; port <= maxPort; port += 1) candidates.push(port)
|
|
1880
|
+
if (!candidates.includes(defaultPort)) {
|
|
1881
|
+
for (let port = defaultPort; port <= maxPort; port += 1) candidates.push(port)
|
|
1882
|
+
}
|
|
1883
|
+
let lastError = null
|
|
1884
|
+
for (const port of candidates) {
|
|
1885
|
+
try {
|
|
1886
|
+
await listenOnPort(server, port)
|
|
1887
|
+
return port
|
|
1888
|
+
} catch (error) {
|
|
1889
|
+
lastError = error
|
|
1890
|
+
logger.warn(`Port ${port} unavailable`, { error: error.code || error.message })
|
|
1891
|
+
}
|
|
1892
|
+
}
|
|
1893
|
+
throw lastError || new Error('No router ports available')
|
|
1894
|
+
}
|
|
1895
|
+
|
|
1896
|
+
export async function runRouterDaemon() {
|
|
1897
|
+
const config = loadConfig()
|
|
1898
|
+
const router = ensureRouterConfigForDaemon(config)
|
|
1899
|
+
const logger = new RouterLogger(ROUTER_LOG_PATH, router.logLevel)
|
|
1900
|
+
const runtime = new RouterRuntime({ config, port: router.port, logger })
|
|
1901
|
+
runtime.installProcessSafety()
|
|
1902
|
+
const server = createServer((req, res) => void runtime.handleHttp(req, res))
|
|
1903
|
+
runtime.server = server
|
|
1904
|
+
const port = await listenWithFallback(server, router.port, logger)
|
|
1905
|
+
runtime.port = port
|
|
1906
|
+
runtime.config.router.port = port
|
|
1907
|
+
saveConfig(runtime.config)
|
|
1908
|
+
try { writeFileSync(ROUTER_PID_PATH, String(process.pid), { mode: 0o600 }) } catch (error) { logger.warn('PID file write failed', { error: error.message }) }
|
|
1909
|
+
try { writeFileSync(ROUTER_PORT_PATH, String(port), { mode: 0o600 }) } catch (error) { logger.warn('Port file write failed', { error: error.message }) }
|
|
1910
|
+
logger.info('Router daemon started', { pid: process.pid, port, activeSet: runtime.routerConfig().activeSet })
|
|
1911
|
+
void sendUsageTelemetry(runtime.config, {}, {
|
|
1912
|
+
event: 'app_daemon_start',
|
|
1913
|
+
mode: 'daemon',
|
|
1914
|
+
properties: {
|
|
1915
|
+
port,
|
|
1916
|
+
set_count: Object.keys(runtime.routerConfig().sets || {}).length,
|
|
1917
|
+
models_in_active_set: runtime.getSet()?.models?.length || 0,
|
|
1918
|
+
auto_start: false,
|
|
1919
|
+
probe_mode: runtime.routerConfig().probeMode,
|
|
1920
|
+
},
|
|
1921
|
+
})
|
|
1922
|
+
runtime.configReloadTimer = setInterval(() => runtime.reloadConfigFromDisk(), CONFIG_RELOAD_INTERVAL_MS)
|
|
1923
|
+
runtime.tokenFlushTimer = setInterval(() => runtime.tokenTracker.flush(), TOKEN_FLUSH_INTERVAL_MS)
|
|
1924
|
+
void runtime.runProbeBurst()
|
|
1925
|
+
runtime.scheduleProbeLoop()
|
|
1926
|
+
return runtime
|
|
1927
|
+
}
|
|
1928
|
+
|
|
1929
|
+
export async function getRouterDaemonStatus() {
|
|
1930
|
+
const { defaultPort, maxPort } = getRouterPortRange()
|
|
1931
|
+
const ports = []
|
|
1932
|
+
const recordedPort = readNumberFile(ROUTER_PORT_PATH)
|
|
1933
|
+
if (recordedPort) ports.push(recordedPort)
|
|
1934
|
+
for (let port = defaultPort; port <= maxPort; port += 1) {
|
|
1935
|
+
if (!ports.includes(port)) ports.push(port)
|
|
1936
|
+
}
|
|
1937
|
+
for (const port of ports) {
|
|
1938
|
+
try {
|
|
1939
|
+
const response = await fetch(`http://127.0.0.1:${port}/health`, { signal: AbortSignal.timeout(1000) })
|
|
1940
|
+
if (response.ok) return await response.json()
|
|
1941
|
+
} catch {
|
|
1942
|
+
// 📖 Keep scanning the small discovery range.
|
|
1943
|
+
}
|
|
1944
|
+
}
|
|
1945
|
+
const pid = readNumberFile(ROUTER_PID_PATH)
|
|
1946
|
+
return {
|
|
1947
|
+
ok: false,
|
|
1948
|
+
running: false,
|
|
1949
|
+
stalePid: pid && !isProcessAlive(pid) ? pid : null,
|
|
1950
|
+
pid: pid || null,
|
|
1951
|
+
port: recordedPort || null,
|
|
1952
|
+
}
|
|
1953
|
+
}
|
|
1954
|
+
|
|
1955
|
+
export async function startRouterDaemonBackground() {
|
|
1956
|
+
const existing = await getRouterDaemonStatus()
|
|
1957
|
+
if (existing.ok) return { ...existing, alreadyRunning: true }
|
|
1958
|
+
|
|
1959
|
+
const child = fork(CLI_ENTRY_PATH, ['--daemon'], {
|
|
1960
|
+
detached: true,
|
|
1961
|
+
stdio: 'ignore',
|
|
1962
|
+
env: process.env,
|
|
1963
|
+
})
|
|
1964
|
+
child.unref()
|
|
1965
|
+
for (let i = 0; i < 40; i += 1) {
|
|
1966
|
+
await sleep(250)
|
|
1967
|
+
const status = await getRouterDaemonStatus()
|
|
1968
|
+
if (status.ok) return { ...status, alreadyRunning: false }
|
|
1969
|
+
}
|
|
1970
|
+
return { ok: false, running: false, pid: child.pid, error: 'Daemon did not become healthy before timeout' }
|
|
1971
|
+
}
|
|
1972
|
+
|
|
1973
|
+
export async function stopRouterDaemon() {
|
|
1974
|
+
const pid = readNumberFile(ROUTER_PID_PATH)
|
|
1975
|
+
if (!pid) return { ok: false, stopped: false, error: 'No daemon PID file found' }
|
|
1976
|
+
if (!isProcessAlive(pid)) {
|
|
1977
|
+
try { unlinkSync(ROUTER_PID_PATH) } catch {}
|
|
1978
|
+
return { ok: true, stopped: false, stalePid: pid }
|
|
1979
|
+
}
|
|
1980
|
+
process.kill(pid, 'SIGTERM')
|
|
1981
|
+
for (let i = 0; i < 60; i += 1) {
|
|
1982
|
+
await sleep(250)
|
|
1983
|
+
if (!isProcessAlive(pid)) return { ok: true, stopped: true, pid }
|
|
1984
|
+
}
|
|
1985
|
+
return { ok: false, stopped: false, pid, error: 'Daemon did not stop before timeout' }
|
|
1986
|
+
}
|