@starlink-awaken/agentmesh 1.2.8 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/gateway.yaml +212 -250
- package/dist/src/cli.js +41 -2
- package/dist/src/model-gateway/metrics.d.ts +28 -0
- package/dist/src/model-gateway/metrics.js +60 -0
- package/dist/src/model-gateway/providers.d.ts +0 -1
- package/dist/src/model-gateway/providers.js +240 -34
- package/dist/src/model-gateway/router.d.ts +1 -0
- package/dist/src/model-gateway/router.js +21 -1
- package/dist/src/model-gateway/routes.js +26 -29
- package/package.json +1 -1
package/config/gateway.yaml
CHANGED
|
@@ -1,268 +1,230 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
- name: code-review
|
|
24
|
-
keywords:
|
|
25
|
-
- review
|
|
26
|
-
- code review
|
|
27
|
-
- pr review
|
|
28
|
-
- review code
|
|
29
|
-
- 代码审查
|
|
30
|
-
- review code
|
|
31
|
-
agent: claude-code
|
|
32
|
-
priority: 15
|
|
33
|
-
|
|
34
|
-
- name: debugging
|
|
35
|
-
keywords:
|
|
36
|
-
- debug
|
|
37
|
-
- fix bug
|
|
38
|
-
- error
|
|
39
|
-
- bug
|
|
40
|
-
- 调试
|
|
41
|
-
- 修复错误
|
|
42
|
-
agent: claude-code
|
|
43
|
-
priority: 15
|
|
44
|
-
|
|
45
|
-
- name: refactoring
|
|
46
|
-
keywords:
|
|
47
|
-
- refactor
|
|
48
|
-
- 重构
|
|
49
|
-
- improve code
|
|
50
|
-
agent: claude-code
|
|
51
|
-
priority: 10
|
|
52
|
-
|
|
53
|
-
- name: documentation
|
|
54
|
-
keywords:
|
|
55
|
-
- docs
|
|
56
|
-
- document
|
|
57
|
-
- 文档
|
|
58
|
-
- write docs
|
|
59
|
-
agent: claude-code
|
|
60
|
-
priority: 10
|
|
61
|
-
|
|
62
|
-
# OpenClaw - 浏览器自动化
|
|
63
|
-
- name: browser-automation
|
|
64
|
-
keywords:
|
|
65
|
-
- browser
|
|
66
|
-
- scrape
|
|
67
|
-
- click
|
|
68
|
-
- screenshot
|
|
69
|
-
- web automation
|
|
70
|
-
- 浏览器
|
|
71
|
-
- 爬虫
|
|
72
|
-
- 截图
|
|
73
|
-
agent: openclaw
|
|
74
|
-
priority: 15
|
|
75
|
-
|
|
76
|
-
- name: web-scraping
|
|
77
|
-
keywords:
|
|
78
|
-
- scrap
|
|
79
|
-
- crawl
|
|
80
|
-
- extract data
|
|
81
|
-
- 抓取
|
|
82
|
-
- 采集
|
|
83
|
-
agent: openclaw
|
|
84
|
-
priority: 15
|
|
85
|
-
|
|
86
|
-
# Cursor - AI 编程
|
|
87
|
-
- name: cursor-task
|
|
88
|
-
keywords:
|
|
89
|
-
- cursor
|
|
90
|
-
- cursor task
|
|
91
|
-
agent: cursor
|
|
92
|
-
priority: 12
|
|
93
|
-
|
|
94
|
-
# Windsurf - Flow 状态编程
|
|
95
|
-
- name: windsurf-task
|
|
96
|
-
keywords:
|
|
97
|
-
- windsurf
|
|
98
|
-
- flow state
|
|
99
|
-
agent: windsurf
|
|
100
|
-
priority: 12
|
|
101
|
-
|
|
102
|
-
# Qwen Code - 中文编程
|
|
103
|
-
- name: qwen-task
|
|
104
|
-
keywords:
|
|
105
|
-
- qwen
|
|
106
|
-
- 通义千问
|
|
107
|
-
agent: qwen-code
|
|
108
|
-
priority: 12
|
|
109
|
-
|
|
110
|
-
# Gemini - 多模态
|
|
111
|
-
- name: gemini-task
|
|
112
|
-
keywords:
|
|
113
|
-
- gemini
|
|
114
|
-
- multimodal
|
|
115
|
-
- 多模态
|
|
116
|
-
agent: gemini
|
|
117
|
-
priority: 12
|
|
118
|
-
|
|
119
|
-
# Droid - Android 开发
|
|
120
|
-
- name: android-development
|
|
121
|
-
keywords:
|
|
122
|
-
- android
|
|
123
|
-
- apk
|
|
124
|
-
- mobile
|
|
125
|
-
- 安卓
|
|
126
|
-
- 手机应用
|
|
127
|
-
agent: droid
|
|
128
|
-
priority: 15
|
|
129
|
-
|
|
130
|
-
# Aider - Git 集成编辑
|
|
131
|
-
- name: aider-task
|
|
132
|
-
keywords:
|
|
133
|
-
- aider
|
|
134
|
-
- git edit
|
|
135
|
-
- refactor git
|
|
136
|
-
agent: aider
|
|
137
|
-
priority: 12
|
|
138
|
-
|
|
139
|
-
# Cline - 自主编程
|
|
140
|
-
- name: cline-task
|
|
141
|
-
keywords:
|
|
142
|
-
- cline
|
|
143
|
-
- autonomous
|
|
144
|
-
agent: cline
|
|
145
|
-
priority: 12
|
|
146
|
-
|
|
147
|
-
# Roo Code
|
|
148
|
-
- name: roo-code-task
|
|
149
|
-
keywords:
|
|
150
|
-
- roo-code
|
|
151
|
-
- roo
|
|
152
|
-
agent: roo-code
|
|
153
|
-
priority: 12
|
|
154
|
-
|
|
155
|
-
# 多 Agent 协作
|
|
156
|
-
- name: multi-agent
|
|
157
|
-
keywords:
|
|
158
|
-
- collaborate
|
|
159
|
-
- team
|
|
160
|
-
- together
|
|
161
|
-
- multiple agents
|
|
162
|
-
- 协作
|
|
163
|
-
- 多个
|
|
164
|
-
strategy: broadcast
|
|
165
|
-
agents:
|
|
166
|
-
- claude-code
|
|
167
|
-
- openclaw
|
|
168
|
-
priority: 20
|
|
169
|
-
|
|
170
|
-
# =============================================================================
|
|
171
|
-
# 模型网关配置 — 多 Provider 路由 + 配额感知 Fallback
|
|
172
|
-
# =============================================================================
|
|
1
|
+
agents:
|
|
2
|
+
- capabilities:
|
|
3
|
+
- code-generation
|
|
4
|
+
- code-review
|
|
5
|
+
- debugging
|
|
6
|
+
- refactoring
|
|
7
|
+
- documentation
|
|
8
|
+
- file-operations
|
|
9
|
+
id: claude-code
|
|
10
|
+
name: Claude Code
|
|
11
|
+
type: claude-code
|
|
12
|
+
- capabilities:
|
|
13
|
+
- browser-automation
|
|
14
|
+
- web-scraping
|
|
15
|
+
- form-filling
|
|
16
|
+
- ui-testing
|
|
17
|
+
id: openclaw
|
|
18
|
+
name: OpenClaw
|
|
19
|
+
type: openclaw
|
|
20
|
+
dataDir: ./data
|
|
21
|
+
host: 0.0.0.0
|
|
22
|
+
logDir: ./logs
|
|
173
23
|
models:
|
|
174
24
|
default_model: deepseek-chat
|
|
175
|
-
|
|
176
|
-
# 默认配置(各 Provider 可覆盖)
|
|
177
25
|
defaults:
|
|
178
26
|
circuit_breaker:
|
|
179
27
|
failure_threshold: 3
|
|
180
|
-
reset_timeout_ms: 30000
|
|
181
28
|
half_open_max_requests: 1
|
|
29
|
+
reset_timeout_ms: 30000
|
|
182
30
|
retry:
|
|
183
|
-
max_retries: 3
|
|
184
31
|
base_delay_ms: 500
|
|
185
32
|
max_delay_ms: 10000
|
|
186
|
-
|
|
187
|
-
|
|
33
|
+
max_retries: 3
|
|
34
|
+
retryable_statuses:
|
|
35
|
+
- 429
|
|
36
|
+
- 500
|
|
37
|
+
- 502
|
|
38
|
+
- 503
|
|
39
|
+
- 504
|
|
40
|
+
fallback_chain:
|
|
41
|
+
- deepseek
|
|
42
|
+
- openrouter
|
|
43
|
+
- ollama
|
|
44
|
+
model_routing:
|
|
45
|
+
claude:
|
|
46
|
+
- openrouter
|
|
47
|
+
codestral:
|
|
48
|
+
- ollama
|
|
49
|
+
deepseek:
|
|
50
|
+
- deepseek
|
|
51
|
+
gemini:
|
|
52
|
+
- openrouter
|
|
53
|
+
gpt-:
|
|
54
|
+
- openai
|
|
55
|
+
- deepseek
|
|
56
|
+
gpt-5.3-codex:
|
|
57
|
+
- deepseek
|
|
58
|
+
llama:
|
|
59
|
+
- ollama
|
|
60
|
+
o1:
|
|
61
|
+
- openai
|
|
62
|
+
- deepseek
|
|
63
|
+
o4:
|
|
64
|
+
- openai
|
|
65
|
+
- deepseek
|
|
66
|
+
qwen:
|
|
67
|
+
- ollama
|
|
188
68
|
providers:
|
|
189
69
|
deepseek:
|
|
190
|
-
base_url: https://api.deepseek.com/v1
|
|
191
70
|
api_key_env: DEEPSEEK_API_KEY
|
|
71
|
+
base_url: https://api.deepseek.com/v1
|
|
72
|
+
models:
|
|
73
|
+
- deepseek-chat
|
|
74
|
+
- deepseek-reasoner
|
|
75
|
+
- deepseek-v4-pro
|
|
76
|
+
- deepseek-v4-flash
|
|
77
|
+
ollama:
|
|
78
|
+
api_key: ollama
|
|
79
|
+
base_url: http://127.0.0.1:11434/v1
|
|
192
80
|
models:
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
- deepseek-v4-flash
|
|
81
|
+
- qwen3:14b
|
|
82
|
+
- codestral:22b
|
|
83
|
+
- llama3.1:8b
|
|
197
84
|
openai:
|
|
198
|
-
base_url: https://api.openai.com/v1
|
|
199
85
|
api_key_env: OPENAI_API_KEY
|
|
86
|
+
base_url: https://api.openai.com/v1
|
|
200
87
|
models:
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
88
|
+
- gpt-5.1
|
|
89
|
+
- gpt-5.1-codex
|
|
90
|
+
- o4-mini
|
|
204
91
|
openrouter:
|
|
205
|
-
base_url: https://openrouter.ai/api/v1
|
|
206
92
|
api_key_env: OPENROUTER_API_KEY
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
-
|
|
218
|
-
-
|
|
219
|
-
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
93
|
+
base_url: https://openrouter.ai/api/v1
|
|
94
|
+
port: 3000
|
|
95
|
+
routing:
|
|
96
|
+
defaultAgent: claude-code
|
|
97
|
+
rules:
|
|
98
|
+
- agent: claude-code
|
|
99
|
+
keywords:
|
|
100
|
+
- write code
|
|
101
|
+
- generate code
|
|
102
|
+
- create function
|
|
103
|
+
- create class
|
|
104
|
+
- 写代码
|
|
105
|
+
- 生成代码
|
|
106
|
+
name: code-generation
|
|
107
|
+
priority: 10
|
|
108
|
+
- agent: claude-code
|
|
109
|
+
keywords:
|
|
110
|
+
- review
|
|
111
|
+
- code review
|
|
112
|
+
- pr review
|
|
113
|
+
- review code
|
|
114
|
+
- 代码审查
|
|
115
|
+
- review code
|
|
116
|
+
name: code-review
|
|
117
|
+
priority: 15
|
|
118
|
+
- agent: claude-code
|
|
119
|
+
keywords:
|
|
120
|
+
- debug
|
|
121
|
+
- fix bug
|
|
122
|
+
- error
|
|
123
|
+
- bug
|
|
124
|
+
- 调试
|
|
125
|
+
- 修复错误
|
|
126
|
+
name: debugging
|
|
127
|
+
priority: 15
|
|
128
|
+
- agent: claude-code
|
|
129
|
+
keywords:
|
|
130
|
+
- refactor
|
|
131
|
+
- 重构
|
|
132
|
+
- improve code
|
|
133
|
+
name: refactoring
|
|
134
|
+
priority: 10
|
|
135
|
+
- agent: claude-code
|
|
136
|
+
keywords:
|
|
137
|
+
- docs
|
|
138
|
+
- document
|
|
139
|
+
- 文档
|
|
140
|
+
- write docs
|
|
141
|
+
name: documentation
|
|
142
|
+
priority: 10
|
|
143
|
+
- agent: openclaw
|
|
144
|
+
keywords:
|
|
145
|
+
- browser
|
|
146
|
+
- scrape
|
|
147
|
+
- click
|
|
148
|
+
- screenshot
|
|
149
|
+
- web automation
|
|
150
|
+
- 浏览器
|
|
151
|
+
- 爬虫
|
|
152
|
+
- 截图
|
|
153
|
+
name: browser-automation
|
|
154
|
+
priority: 15
|
|
155
|
+
- agent: openclaw
|
|
156
|
+
keywords:
|
|
157
|
+
- scrap
|
|
158
|
+
- crawl
|
|
159
|
+
- extract data
|
|
160
|
+
- 抓取
|
|
161
|
+
- 采集
|
|
162
|
+
name: web-scraping
|
|
163
|
+
priority: 15
|
|
164
|
+
- agent: cursor
|
|
165
|
+
keywords:
|
|
166
|
+
- cursor
|
|
167
|
+
- cursor task
|
|
168
|
+
name: cursor-task
|
|
169
|
+
priority: 12
|
|
170
|
+
- agent: windsurf
|
|
171
|
+
keywords:
|
|
172
|
+
- windsurf
|
|
173
|
+
- flow state
|
|
174
|
+
name: windsurf-task
|
|
175
|
+
priority: 12
|
|
176
|
+
- agent: qwen-code
|
|
177
|
+
keywords:
|
|
178
|
+
- qwen
|
|
179
|
+
- 通义千问
|
|
180
|
+
name: qwen-task
|
|
181
|
+
priority: 12
|
|
182
|
+
- agent: gemini
|
|
183
|
+
keywords:
|
|
184
|
+
- gemini
|
|
185
|
+
- multimodal
|
|
186
|
+
- 多模态
|
|
187
|
+
name: gemini-task
|
|
188
|
+
priority: 12
|
|
189
|
+
- agent: droid
|
|
190
|
+
keywords:
|
|
191
|
+
- android
|
|
192
|
+
- apk
|
|
193
|
+
- mobile
|
|
194
|
+
- 安卓
|
|
195
|
+
- 手机应用
|
|
196
|
+
name: android-development
|
|
197
|
+
priority: 15
|
|
198
|
+
- agent: aider
|
|
199
|
+
keywords:
|
|
200
|
+
- aider
|
|
201
|
+
- git edit
|
|
202
|
+
- refactor git
|
|
203
|
+
name: aider-task
|
|
204
|
+
priority: 12
|
|
205
|
+
- agent: cline
|
|
206
|
+
keywords:
|
|
207
|
+
- cline
|
|
208
|
+
- autonomous
|
|
209
|
+
name: cline-task
|
|
210
|
+
priority: 12
|
|
211
|
+
- agent: roo-code
|
|
212
|
+
keywords:
|
|
213
|
+
- roo-code
|
|
214
|
+
- roo
|
|
215
|
+
name: roo-code-task
|
|
216
|
+
priority: 12
|
|
217
|
+
- agents:
|
|
218
|
+
- claude-code
|
|
219
|
+
- openclaw
|
|
220
|
+
keywords:
|
|
221
|
+
- collaborate
|
|
222
|
+
- team
|
|
223
|
+
- together
|
|
224
|
+
- multiple agents
|
|
225
|
+
- 协作
|
|
226
|
+
- 多个
|
|
227
|
+
name: multi-agent
|
|
228
|
+
priority: 20
|
|
229
|
+
strategy: broadcast
|
|
230
|
+
wsPort: 3001
|
package/dist/src/cli.js
CHANGED
|
@@ -7,7 +7,7 @@ import { existsSync, readFileSync } from 'node:fs';
|
|
|
7
7
|
import { resolve, dirname, join } from 'node:path';
|
|
8
8
|
import { initLogger } from './core/logger.js';
|
|
9
9
|
const PROJECT_ROOT = resolve(dirname(import.meta.dir), '..');
|
|
10
|
-
const VERSION = '1.2
|
|
10
|
+
const VERSION = '1.3.2';
|
|
11
11
|
const BANNER = `
|
|
12
12
|
█████╗ ██████╗ ███████╗███╗ ██╗████████╗
|
|
13
13
|
██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝
|
|
@@ -250,6 +250,42 @@ async function cmdConfig(args) {
|
|
|
250
250
|
}
|
|
251
251
|
}
|
|
252
252
|
}
|
|
253
|
+
async function cmdStatus() {
|
|
254
|
+
try {
|
|
255
|
+
const [health, models, stats] = await Promise.all([
|
|
256
|
+
apiRequest('/health'),
|
|
257
|
+
apiRequest('/v1/models').catch(() => ({ data: [] })),
|
|
258
|
+
apiRequest('/model-gateway/stats').catch(() => null),
|
|
259
|
+
]);
|
|
260
|
+
console.log(`
|
|
261
|
+
╔═══════════════════════════════════════════════════╗
|
|
262
|
+
║ Agent Mesh Gateway Status ║
|
|
263
|
+
╠═══════════════════════════════════════════════════╣
|
|
264
|
+
║ Status: ${health.status} ║
|
|
265
|
+
║ Agents: ${String(health.agents?.length || 0).padStart(2)} online ║
|
|
266
|
+
║ Models: ${String(models.data?.length || 0).padStart(2)} available ║
|
|
267
|
+
║ Uptime: ${stats?.uptime_seconds ? Math.floor(stats.uptime_seconds) + 's' : 'N/A'} ║
|
|
268
|
+
╠═══════════════════════════════════════════════════╣`);
|
|
269
|
+
if (stats?.providers) {
|
|
270
|
+
console.log('║ Provider Metrics: ║');
|
|
271
|
+
for (const [name, m] of Object.entries(stats.providers)) {
|
|
272
|
+
console.log(`║ ${name.padEnd(12)} reqs:${String(m.requests).padStart(5)} ok:${(m.success_rate || 'N/A').padStart(6)} avg:${String(m.avg_latency_ms || 0).padStart(4)}ms ║`);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
if (stats?.recent?.length) {
|
|
276
|
+
console.log('╠═══════════════════════════════════════════════════╣');
|
|
277
|
+
console.log('║ Recent: ║');
|
|
278
|
+
for (const r of stats.recent.slice(0, 5)) {
|
|
279
|
+
const time = new Date(r.time).toLocaleTimeString();
|
|
280
|
+
console.log(`║ ${r.status >= 400 ? '❌' : '✅'} ${time} ${r.model} → ${r.actual} ${r.latency_ms}ms ║`);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
console.log('╚═══════════════════════════════════════════════════╝\n');
|
|
284
|
+
}
|
|
285
|
+
catch {
|
|
286
|
+
console.error('\n ❌ Gateway not reachable. Start: agentmesh start\n');
|
|
287
|
+
}
|
|
288
|
+
}
|
|
253
289
|
async function cmdDoctor() {
|
|
254
290
|
console.log('\n 🔍 Agent Mesh Gateway Diagnostics\n');
|
|
255
291
|
const checks = [];
|
|
@@ -326,9 +362,12 @@ async function main() {
|
|
|
326
362
|
await runSetup();
|
|
327
363
|
break;
|
|
328
364
|
case 'health':
|
|
329
|
-
case 'status':
|
|
330
365
|
await cmdHealth();
|
|
331
366
|
break;
|
|
367
|
+
case 'status':
|
|
368
|
+
case 'info':
|
|
369
|
+
await cmdStatus();
|
|
370
|
+
break;
|
|
332
371
|
case 'models':
|
|
333
372
|
case 'model':
|
|
334
373
|
await cmdModels();
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
interface RequestLog {
|
|
2
|
+
timestamp: number;
|
|
3
|
+
model: string;
|
|
4
|
+
provider: string;
|
|
5
|
+
actualModel: string;
|
|
6
|
+
latencyMs: number;
|
|
7
|
+
status: number;
|
|
8
|
+
error?: string;
|
|
9
|
+
streaming: boolean;
|
|
10
|
+
}
|
|
11
|
+
export declare function recordRequest(log: RequestLog): void;
|
|
12
|
+
export declare function getMetrics(): {
|
|
13
|
+
uptime_seconds: number;
|
|
14
|
+
total_requests: number;
|
|
15
|
+
total_failures: number;
|
|
16
|
+
providers: Record<string, any>;
|
|
17
|
+
recent: {
|
|
18
|
+
time: string;
|
|
19
|
+
model: string;
|
|
20
|
+
provider: string;
|
|
21
|
+
actual: string;
|
|
22
|
+
latency_ms: number;
|
|
23
|
+
status: number;
|
|
24
|
+
streaming: boolean;
|
|
25
|
+
error: string | undefined;
|
|
26
|
+
}[];
|
|
27
|
+
};
|
|
28
|
+
export {};
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
// 网关运行时指标收集
|
|
2
|
+
const providerMetrics = new Map();
|
|
3
|
+
const recentRequests = [];
|
|
4
|
+
const MAX_RECENT = 200;
|
|
5
|
+
function getOrInitProvider(name) {
|
|
6
|
+
if (!providerMetrics.has(name)) {
|
|
7
|
+
providerMetrics.set(name, { requests: 0, success: 0, failures: 0, totalLatencyMs: 0 });
|
|
8
|
+
}
|
|
9
|
+
return providerMetrics.get(name);
|
|
10
|
+
}
|
|
11
|
+
export function recordRequest(log) {
|
|
12
|
+
const m = getOrInitProvider(log.provider);
|
|
13
|
+
m.requests++;
|
|
14
|
+
m.totalLatencyMs += log.latencyMs;
|
|
15
|
+
if (log.status >= 200 && log.status < 400) {
|
|
16
|
+
m.success++;
|
|
17
|
+
m.lastSuccessTime = log.timestamp;
|
|
18
|
+
}
|
|
19
|
+
else {
|
|
20
|
+
m.failures++;
|
|
21
|
+
m.lastError = log.error;
|
|
22
|
+
m.lastErrorTime = log.timestamp;
|
|
23
|
+
}
|
|
24
|
+
recentRequests.unshift(log);
|
|
25
|
+
if (recentRequests.length > MAX_RECENT)
|
|
26
|
+
recentRequests.pop();
|
|
27
|
+
}
|
|
28
|
+
export function getMetrics() {
|
|
29
|
+
const providers = {};
|
|
30
|
+
let totalRequests = 0;
|
|
31
|
+
let totalFailures = 0;
|
|
32
|
+
for (const [name, m] of providerMetrics) {
|
|
33
|
+
totalRequests += m.requests;
|
|
34
|
+
totalFailures += m.failures;
|
|
35
|
+
providers[name] = {
|
|
36
|
+
requests: m.requests,
|
|
37
|
+
success_rate: m.requests > 0 ? ((m.success / m.requests) * 100).toFixed(1) + '%' : 'N/A',
|
|
38
|
+
avg_latency_ms: m.requests > 0 ? Math.round(m.totalLatencyMs / m.requests) : 0,
|
|
39
|
+
last_success: m.lastSuccessTime ? new Date(m.lastSuccessTime).toISOString() : null,
|
|
40
|
+
last_error: m.lastError || null,
|
|
41
|
+
last_error_time: m.lastErrorTime ? new Date(m.lastErrorTime).toISOString() : null,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
return {
|
|
45
|
+
uptime_seconds: Math.round(process.uptime()),
|
|
46
|
+
total_requests: totalRequests,
|
|
47
|
+
total_failures: totalFailures,
|
|
48
|
+
providers,
|
|
49
|
+
recent: recentRequests.slice(0, 20).map(r => ({
|
|
50
|
+
time: new Date(r.timestamp).toISOString(),
|
|
51
|
+
model: r.model,
|
|
52
|
+
provider: r.provider,
|
|
53
|
+
actual: r.actualModel,
|
|
54
|
+
latency_ms: r.latencyMs,
|
|
55
|
+
status: r.status,
|
|
56
|
+
streaming: r.streaming,
|
|
57
|
+
error: r.error,
|
|
58
|
+
})),
|
|
59
|
+
};
|
|
60
|
+
}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
1
|
import type { ChatCompletionRequest, ResolvedProvider } from './types.js';
|
|
2
2
|
export declare function callChatCompletions(provider: ResolvedProvider, request: ChatCompletionRequest): Promise<Response>;
|
|
3
3
|
export declare function callResponsesApi(provider: ResolvedProvider, body: Record<string, any>): Promise<Response>;
|
|
4
|
-
export declare function buildStreamingResponse(upstreamResp: Response): Response;
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { circuitBreakerRegistry } from './circuit-breaker.js';
|
|
2
2
|
import { withRetry, isRetryable } from './retry.js';
|
|
3
|
-
// 所有目标 Provider 都兼容 OpenAI API 格式,统一客户端即可
|
|
4
3
|
export async function callChatCompletions(provider, request) {
|
|
5
4
|
const { base_url, api_key, name: providerName } = provider;
|
|
6
5
|
const { model, messages, stream, temperature, max_tokens, tools, tool_choice } = request;
|
|
@@ -19,22 +18,18 @@ export async function callChatCompletions(provider, request) {
|
|
|
19
18
|
'Content-Type': 'application/json',
|
|
20
19
|
Authorization: `Bearer ${api_key}`,
|
|
21
20
|
};
|
|
22
|
-
// OpenRouter 需要额外的头部
|
|
23
21
|
if (providerName === 'openrouter') {
|
|
24
22
|
headers['HTTP-Referer'] = 'http://127.0.0.1:3000';
|
|
25
23
|
headers['X-Title'] = 'Agent Mesh Gateway';
|
|
26
24
|
}
|
|
27
25
|
const url = `${base_url.replace(/\/$/, '')}/chat/completions`;
|
|
28
|
-
// 熔断器检查
|
|
29
26
|
if (!circuitBreakerRegistry.canRequest(providerName)) {
|
|
30
27
|
throw new Error(`Circuit breaker open for ${providerName}`);
|
|
31
28
|
}
|
|
32
29
|
try {
|
|
33
30
|
const resp = await withRetry(providerName, async () => {
|
|
34
31
|
const r = await fetch(url, {
|
|
35
|
-
method: 'POST',
|
|
36
|
-
headers,
|
|
37
|
-
body: JSON.stringify(body),
|
|
32
|
+
method: 'POST', headers, body: JSON.stringify(body),
|
|
38
33
|
signal: AbortSignal.timeout(120_000),
|
|
39
34
|
});
|
|
40
35
|
return r;
|
|
@@ -42,7 +37,6 @@ export async function callChatCompletions(provider, request) {
|
|
|
42
37
|
console.warn(`[Retry] ${providerName} attempt ${attempt} after ${status} — retrying in ${delayMs}ms`);
|
|
43
38
|
});
|
|
44
39
|
if (!resp.ok && isRetryable(resp.status)) {
|
|
45
|
-
// Retry logic already handled in withRetry, but if we get here after max retries:
|
|
46
40
|
circuitBreakerRegistry.recordFailure(providerName);
|
|
47
41
|
}
|
|
48
42
|
else if (resp.ok) {
|
|
@@ -58,38 +52,260 @@ export async function callChatCompletions(provider, request) {
|
|
|
58
52
|
throw err;
|
|
59
53
|
}
|
|
60
54
|
}
|
|
55
|
+
// ============================================================================
|
|
56
|
+
// Responses API → Chat Completions 双向转换(含 tool_calls 往返)
|
|
57
|
+
// ============================================================================
|
|
61
58
|
export async function callResponsesApi(provider, body) {
|
|
62
|
-
|
|
63
|
-
const messages = convertResponsesInputToMessages(body.input || []);
|
|
59
|
+
const messages = convertInputToMessages(body.input || []);
|
|
64
60
|
if (body.instructions) {
|
|
65
61
|
messages.unshift({ role: 'system', content: body.instructions });
|
|
66
62
|
}
|
|
67
|
-
|
|
63
|
+
// 转换 tools 定义(Codex 的 tool schema → OpenAI format)
|
|
64
|
+
const tools = convertToolSchemas(body.tools);
|
|
65
|
+
const chatResp = await callChatCompletions(provider, {
|
|
68
66
|
model: body.model,
|
|
69
67
|
messages,
|
|
70
68
|
stream: body.stream,
|
|
71
|
-
tools
|
|
69
|
+
tools,
|
|
70
|
+
tool_choice: body.tool_choice,
|
|
71
|
+
});
|
|
72
|
+
// 非流式:直接转换响应
|
|
73
|
+
if (!body.stream) {
|
|
74
|
+
const ccData = (await chatResp.json());
|
|
75
|
+
return new Response(JSON.stringify(convertChatToResponses(ccData)), {
|
|
76
|
+
status: 200,
|
|
77
|
+
headers: { 'Content-Type': 'application/json' },
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
// 流式:解析 SSE → 转换 → 重新打包 SSE
|
|
81
|
+
const transformed = transformSSEStream(chatResp.body);
|
|
82
|
+
return new Response(transformed, {
|
|
83
|
+
status: 200,
|
|
84
|
+
headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache' },
|
|
72
85
|
});
|
|
73
86
|
}
|
|
74
|
-
|
|
87
|
+
// ============================================================================
|
|
88
|
+
// 输入转换: Responses input[] → Chat messages[]
|
|
89
|
+
// ============================================================================
|
|
90
|
+
function convertInputToMessages(input) {
|
|
75
91
|
const messages = [];
|
|
76
92
|
for (const item of input) {
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
93
|
+
switch (item.type || item.role) {
|
|
94
|
+
// 标准消息类型
|
|
95
|
+
case 'message':
|
|
96
|
+
messages.push({ role: item.role || 'user', content: extractTextContent(item.content) });
|
|
97
|
+
break;
|
|
98
|
+
// Function call(Assistant 侧发起工具调用)
|
|
99
|
+
case 'function_call': {
|
|
100
|
+
messages.push({
|
|
101
|
+
role: 'assistant',
|
|
102
|
+
content: null,
|
|
103
|
+
tool_calls: [{
|
|
104
|
+
id: item.call_id,
|
|
105
|
+
type: 'function',
|
|
106
|
+
function: {
|
|
107
|
+
name: item.name,
|
|
108
|
+
arguments: typeof item.arguments === 'string' ? item.arguments : JSON.stringify(item.arguments),
|
|
109
|
+
},
|
|
110
|
+
}],
|
|
111
|
+
});
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
114
|
+
// Function call output(Tool 返回结果)
|
|
115
|
+
case 'function_call_output':
|
|
116
|
+
messages.push({
|
|
117
|
+
role: 'tool',
|
|
118
|
+
tool_call_id: item.call_id,
|
|
119
|
+
content: typeof item.output === 'string' ? item.output : JSON.stringify(item.output),
|
|
120
|
+
});
|
|
121
|
+
break;
|
|
122
|
+
// 简单角色
|
|
123
|
+
case 'system':
|
|
124
|
+
messages.push({ role: 'system', content: extractTextContent(item.content) });
|
|
125
|
+
break;
|
|
126
|
+
case 'user':
|
|
127
|
+
messages.push({ role: 'user', content: extractTextContent(item.content) });
|
|
128
|
+
break;
|
|
129
|
+
case 'assistant':
|
|
130
|
+
messages.push({ role: 'assistant', content: extractTextContent(item.content) });
|
|
131
|
+
break;
|
|
132
|
+
default:
|
|
133
|
+
// 回退: role 字段
|
|
134
|
+
if (item.role) {
|
|
135
|
+
messages.push({ role: item.role, content: extractTextContent(item.content) });
|
|
136
|
+
}
|
|
89
137
|
}
|
|
90
138
|
}
|
|
91
139
|
return messages;
|
|
92
140
|
}
|
|
141
|
+
// ============================================================================
|
|
142
|
+
// 输出转换: Chat completions response → Responses API response
|
|
143
|
+
// ============================================================================
|
|
144
|
+
function convertChatToResponses(ccData) {
|
|
145
|
+
const choice = ccData.choices?.[0];
|
|
146
|
+
if (!choice) {
|
|
147
|
+
return { id: ccData.id, object: 'response', model: ccData.model, output: [], usage: ccData.usage };
|
|
148
|
+
}
|
|
149
|
+
const output = [];
|
|
150
|
+
// 文本回复
|
|
151
|
+
if (choice.message?.content) {
|
|
152
|
+
output.push({
|
|
153
|
+
type: 'message',
|
|
154
|
+
role: 'assistant',
|
|
155
|
+
content: [{ type: 'output_text', text: choice.message.content }],
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
// 工具调用
|
|
159
|
+
if (choice.message?.tool_calls) {
|
|
160
|
+
for (const tc of choice.message.tool_calls) {
|
|
161
|
+
output.push({
|
|
162
|
+
type: 'function_call',
|
|
163
|
+
call_id: tc.id,
|
|
164
|
+
name: tc.function?.name,
|
|
165
|
+
arguments: tc.function?.arguments,
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
// finish_reason = 'tool_calls' 表示等待 tool 结果(Codex 需要此信息)
|
|
170
|
+
const status = choice.finish_reason === 'tool_calls' ? 'requires_action' : 'completed';
|
|
171
|
+
return {
|
|
172
|
+
id: ccData.id,
|
|
173
|
+
object: 'response',
|
|
174
|
+
model: ccData.model,
|
|
175
|
+
status,
|
|
176
|
+
output,
|
|
177
|
+
usage: ccData.usage,
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
// ============================================================================
|
|
181
|
+
// SSE 流式转换: Chat SSE → Responses SSE
|
|
182
|
+
// ============================================================================
|
|
183
|
+
function transformSSEStream(upstreamBody) {
|
|
184
|
+
const encoder = new TextEncoder();
|
|
185
|
+
let buffer = '';
|
|
186
|
+
let responseId = '';
|
|
187
|
+
let modelName = '';
|
|
188
|
+
let contentBuffer = '';
|
|
189
|
+
let toolCallAccum = {};
|
|
190
|
+
return new ReadableStream({
|
|
191
|
+
async start(controller) {
|
|
192
|
+
const reader = upstreamBody.getReader();
|
|
193
|
+
const decoder = new TextDecoder();
|
|
194
|
+
try {
|
|
195
|
+
while (true) {
|
|
196
|
+
const { done, value } = await reader.read();
|
|
197
|
+
if (done)
|
|
198
|
+
break;
|
|
199
|
+
buffer += decoder.decode(value, { stream: true });
|
|
200
|
+
const lines = buffer.split('\n');
|
|
201
|
+
buffer = lines.pop() || '';
|
|
202
|
+
for (const line of lines) {
|
|
203
|
+
if (!line.startsWith('data: '))
|
|
204
|
+
continue;
|
|
205
|
+
const data = line.slice(6).trim();
|
|
206
|
+
if (data === '[DONE]') {
|
|
207
|
+
// 发送最终事件
|
|
208
|
+
const finalEvt = buildResponseEvent(responseId, modelName, contentBuffer, toolCallAccum, true);
|
|
209
|
+
controller.enqueue(encoder.encode(finalEvt));
|
|
210
|
+
controller.enqueue(encoder.encode('data: [DONE]\n\n'));
|
|
211
|
+
continue;
|
|
212
|
+
}
|
|
213
|
+
try {
|
|
214
|
+
const chunk = JSON.parse(data);
|
|
215
|
+
responseId = chunk.id || responseId;
|
|
216
|
+
modelName = chunk.model || modelName;
|
|
217
|
+
const delta = chunk.choices?.[0]?.delta;
|
|
218
|
+
if (!delta)
|
|
219
|
+
continue;
|
|
220
|
+
// 文本增量
|
|
221
|
+
if (delta.content) {
|
|
222
|
+
contentBuffer += delta.content;
|
|
223
|
+
const evt = `data: ${JSON.stringify({
|
|
224
|
+
type: 'response.output_text.delta',
|
|
225
|
+
delta: delta.content,
|
|
226
|
+
})}\n\n`;
|
|
227
|
+
controller.enqueue(encoder.encode(evt));
|
|
228
|
+
}
|
|
229
|
+
// 工具调用增量
|
|
230
|
+
if (delta.tool_calls) {
|
|
231
|
+
for (const tc of delta.tool_calls) {
|
|
232
|
+
if (!toolCallAccum[tc.index]) {
|
|
233
|
+
toolCallAccum[tc.index] = {
|
|
234
|
+
id: tc.id || '',
|
|
235
|
+
type: 'function',
|
|
236
|
+
function: { name: tc.function?.name || '', arguments: '' },
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
if (tc.function?.arguments) {
|
|
240
|
+
toolCallAccum[tc.index].function.arguments += tc.function.arguments;
|
|
241
|
+
}
|
|
242
|
+
if (tc.id)
|
|
243
|
+
toolCallAccum[tc.index].id = tc.id;
|
|
244
|
+
if (tc.function?.name)
|
|
245
|
+
toolCallAccum[tc.index].function.name = tc.function.name;
|
|
246
|
+
}
|
|
247
|
+
// 发送 tool_call delta 事件
|
|
248
|
+
const tcEvt = `data: ${JSON.stringify({
|
|
249
|
+
type: 'response.function_call_arguments.delta',
|
|
250
|
+
tool_calls: Object.values(toolCallAccum),
|
|
251
|
+
})}\n\n`;
|
|
252
|
+
controller.enqueue(encoder.encode(tcEvt));
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
catch {
|
|
256
|
+
// 非 JSON 行直接透传
|
|
257
|
+
controller.enqueue(encoder.encode(line + '\n'));
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
catch (err) {
|
|
263
|
+
controller.error(err);
|
|
264
|
+
}
|
|
265
|
+
finally {
|
|
266
|
+
reader.releaseLock();
|
|
267
|
+
controller.close();
|
|
268
|
+
}
|
|
269
|
+
},
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
function buildResponseEvent(id, model, content, toolCalls, isFinal) {
|
|
273
|
+
const output = [];
|
|
274
|
+
if (content) {
|
|
275
|
+
output.push({ type: 'message', role: 'assistant', content: [{ type: 'output_text', text: content }] });
|
|
276
|
+
}
|
|
277
|
+
for (const tc of Object.values(toolCalls)) {
|
|
278
|
+
output.push({
|
|
279
|
+
type: 'function_call',
|
|
280
|
+
call_id: tc.id,
|
|
281
|
+
name: tc.function.name,
|
|
282
|
+
arguments: tc.function.arguments,
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
return `data: ${JSON.stringify({
|
|
286
|
+
type: isFinal ? 'response.completed' : 'response.output_text.delta',
|
|
287
|
+
response: isFinal ? { id, model, object: 'response', status: 'completed', output } : undefined,
|
|
288
|
+
delta: isFinal ? undefined : content,
|
|
289
|
+
})}\n\n`;
|
|
290
|
+
}
|
|
291
|
+
// ============================================================================
|
|
292
|
+
// 工具定义转换
|
|
293
|
+
// ============================================================================
|
|
294
|
+
function convertToolSchemas(tools) {
|
|
295
|
+
if (!tools || !Array.isArray(tools))
|
|
296
|
+
return undefined;
|
|
297
|
+
return tools.map(t => ({
|
|
298
|
+
type: 'function',
|
|
299
|
+
function: {
|
|
300
|
+
name: t.name,
|
|
301
|
+
description: t.description,
|
|
302
|
+
parameters: t.parameters || t.input_schema,
|
|
303
|
+
},
|
|
304
|
+
}));
|
|
305
|
+
}
|
|
306
|
+
// ============================================================================
|
|
307
|
+
// 辅助函数
|
|
308
|
+
// ============================================================================
|
|
93
309
|
function extractTextContent(content) {
|
|
94
310
|
if (typeof content === 'string')
|
|
95
311
|
return content;
|
|
@@ -101,13 +317,3 @@ function extractTextContent(content) {
|
|
|
101
317
|
}
|
|
102
318
|
return String(content || '');
|
|
103
319
|
}
|
|
104
|
-
export function buildStreamingResponse(upstreamResp) {
|
|
105
|
-
return new Response(upstreamResp.body, {
|
|
106
|
-
status: upstreamResp.status,
|
|
107
|
-
headers: {
|
|
108
|
-
'Content-Type': 'text/event-stream',
|
|
109
|
-
'Cache-Control': 'no-cache',
|
|
110
|
-
Connection: 'keep-alive',
|
|
111
|
-
},
|
|
112
|
-
});
|
|
113
|
-
}
|
|
@@ -2,3 +2,4 @@ import type { ModelGatewayConfig, ResolvedProvider } from './types.js';
|
|
|
2
2
|
export declare function initModelRouter(cfg: ModelGatewayConfig): void;
|
|
3
3
|
export declare function getConfig(): ModelGatewayConfig;
|
|
4
4
|
export declare function resolveProvider(model: string): ResolvedProvider | null;
|
|
5
|
+
export declare function remapModel(model: string, providerName: string): string;
|
|
@@ -1,8 +1,25 @@
|
|
|
1
1
|
import { isProviderAvailable } from './quota.js';
|
|
2
2
|
import { circuitBreakerRegistry } from './circuit-breaker.js';
|
|
3
|
+
// 模型名重映射:对外模型名 → 实际 Provider 的模型名(可从 config 覆盖)
|
|
4
|
+
let modelAliases = {
|
|
5
|
+
deepseek: {
|
|
6
|
+
'gpt-5.3-codex': 'deepseek-v4-pro',
|
|
7
|
+
'gpt-5.4': 'deepseek-v4-pro',
|
|
8
|
+
'gpt-5.5': 'deepseek-v4-pro',
|
|
9
|
+
'o4-mini': 'deepseek-v4-flash',
|
|
10
|
+
'claude-sonnet-4-6': 'deepseek-v4-pro',
|
|
11
|
+
},
|
|
12
|
+
};
|
|
3
13
|
let config;
|
|
4
14
|
export function initModelRouter(cfg) {
|
|
5
15
|
config = cfg;
|
|
16
|
+
// 从 config 加载模型别名(覆盖默认)
|
|
17
|
+
if (cfg.model_aliases) {
|
|
18
|
+
for (const [key, val] of Object.entries(cfg.model_aliases)) {
|
|
19
|
+
modelAliases.deepseek = modelAliases.deepseek || {};
|
|
20
|
+
modelAliases.deepseek[key] = val;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
6
23
|
}
|
|
7
24
|
export function getConfig() {
|
|
8
25
|
return config;
|
|
@@ -14,7 +31,7 @@ export function resolveProvider(model) {
|
|
|
14
31
|
// 1. 按 model_routing 配置查找
|
|
15
32
|
const routingEntries = Object.entries(config.model_routing);
|
|
16
33
|
for (const [pattern, providers] of routingEntries) {
|
|
17
|
-
if (model.
|
|
34
|
+
if (model.startsWith(pattern)) {
|
|
18
35
|
for (const providerName of providers) {
|
|
19
36
|
const providerCfg = config.providers[providerName];
|
|
20
37
|
if (!providerCfg)
|
|
@@ -68,6 +85,9 @@ export function resolveProvider(model) {
|
|
|
68
85
|
}
|
|
69
86
|
return null;
|
|
70
87
|
}
|
|
88
|
+
export function remapModel(model, providerName) {
|
|
89
|
+
return modelAliases[providerName]?.[model] || model;
|
|
90
|
+
}
|
|
71
91
|
function resolveApiKey(_name, providerCfg) {
|
|
72
92
|
if (providerCfg.api_key && providerCfg.api_key !== '') {
|
|
73
93
|
return providerCfg.api_key;
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
import { resolveProvider, getConfig } from './router.js';
|
|
1
|
+
import { resolveProvider, getConfig, remapModel } from './router.js';
|
|
2
2
|
import { callChatCompletions, callResponsesApi } from './providers.js';
|
|
3
3
|
import { getQuotaSummary, probeQuota } from './quota.js';
|
|
4
4
|
import { circuitBreakerRegistry } from './circuit-breaker.js';
|
|
5
5
|
import { checkAllProviders } from './health.js';
|
|
6
|
+
import { getMetrics, recordRequest } from './metrics.js';
|
|
6
7
|
export async function modelGatewayRoutes(fastify) {
|
|
7
8
|
// 健康检查 + 配额总览
|
|
8
9
|
fastify.get('/model-gateway/health', async (_req, _reply) => {
|
|
@@ -44,14 +45,16 @@ export async function modelGatewayRoutes(fastify) {
|
|
|
44
45
|
if (!body || !body.messages) {
|
|
45
46
|
return reply.code(400).send({ error: { message: 'messages is required' } });
|
|
46
47
|
}
|
|
47
|
-
const
|
|
48
|
-
const provider = resolveProvider(
|
|
48
|
+
const originalModel = body.model || 'deepseek-chat';
|
|
49
|
+
const provider = resolveProvider(originalModel);
|
|
49
50
|
if (!provider) {
|
|
50
51
|
return reply.code(503).send({
|
|
51
52
|
error: { message: 'No available provider. Check API keys and quota.' },
|
|
52
53
|
});
|
|
53
54
|
}
|
|
54
|
-
|
|
55
|
+
const model = remapModel(originalModel, provider.name);
|
|
56
|
+
const reqStart = Date.now();
|
|
57
|
+
console.log(`[ModelGW] ${originalModel} → ${provider.name}/${model} (${body.stream ? 'stream' : 'sync'})`);
|
|
55
58
|
try {
|
|
56
59
|
const upstreamResp = await callChatCompletions(provider, {
|
|
57
60
|
model,
|
|
@@ -64,12 +67,14 @@ export async function modelGatewayRoutes(fastify) {
|
|
|
64
67
|
});
|
|
65
68
|
if (!upstreamResp.ok && upstreamResp.status !== 200) {
|
|
66
69
|
const errText = await upstreamResp.text();
|
|
70
|
+
recordRequest({ timestamp: Date.now(), model: originalModel, provider: provider.name, actualModel: model, latencyMs: Date.now() - reqStart, status: upstreamResp.status, error: errText.slice(0, 200), streaming: !!body.stream });
|
|
67
71
|
console.error(`[ModelGW] ${provider.name} error ${upstreamResp.status}: ${errText.slice(0, 200)}`);
|
|
68
72
|
return reply.code(upstreamResp.status).send({
|
|
69
73
|
error: { message: `${provider.name}: ${errText.slice(0, 500)}` },
|
|
70
74
|
});
|
|
71
75
|
}
|
|
72
76
|
if (body.stream) {
|
|
77
|
+
recordRequest({ timestamp: Date.now(), model: originalModel, provider: provider.name, actualModel: model, latencyMs: Date.now() - reqStart, status: 200, streaming: true });
|
|
73
78
|
return reply.headers({
|
|
74
79
|
'Content-Type': 'text/event-stream',
|
|
75
80
|
'Cache-Control': 'no-cache',
|
|
@@ -77,9 +82,11 @@ export async function modelGatewayRoutes(fastify) {
|
|
|
77
82
|
}).send(upstreamResp.body);
|
|
78
83
|
}
|
|
79
84
|
const data = await upstreamResp.json();
|
|
85
|
+
recordRequest({ timestamp: Date.now(), model: originalModel, provider: provider.name, actualModel: model, latencyMs: Date.now() - reqStart, status: 200, streaming: false });
|
|
80
86
|
reply.send(data);
|
|
81
87
|
}
|
|
82
88
|
catch (err) {
|
|
89
|
+
recordRequest({ timestamp: Date.now(), model: originalModel, provider: provider.name, actualModel: model, latencyMs: Date.now() - reqStart, status: 502, error: err.message, streaming: !!body.stream });
|
|
83
90
|
console.error(`[ModelGW] Error calling ${provider.name}:`, err.message);
|
|
84
91
|
reply.code(502).send({
|
|
85
92
|
error: { message: `Provider error: ${err.message}` },
|
|
@@ -92,58 +99,48 @@ export async function modelGatewayRoutes(fastify) {
|
|
|
92
99
|
if (!body || !body.input) {
|
|
93
100
|
return reply.code(400).send({ error: { message: 'input is required' } });
|
|
94
101
|
}
|
|
95
|
-
const
|
|
96
|
-
const provider = resolveProvider(
|
|
102
|
+
const originalModel = body.model || 'deepseek-chat';
|
|
103
|
+
const provider = resolveProvider(originalModel);
|
|
97
104
|
if (!provider) {
|
|
98
105
|
return reply.code(503).send({
|
|
99
106
|
error: { message: 'No available provider. Check API keys and quota.' },
|
|
100
107
|
});
|
|
101
108
|
}
|
|
102
|
-
|
|
109
|
+
const model = remapModel(originalModel, provider.name);
|
|
110
|
+
body.model = model;
|
|
111
|
+
console.log(`[ModelGW:Responses] ${originalModel} → ${provider.name}/${model}`);
|
|
112
|
+
const reqStart2 = Date.now();
|
|
103
113
|
try {
|
|
104
114
|
const upstreamResp = await callResponsesApi(provider, body);
|
|
105
115
|
if (!upstreamResp.ok) {
|
|
106
116
|
const errText = await upstreamResp.text();
|
|
117
|
+
recordRequest({ timestamp: Date.now(), model: originalModel, provider: provider.name, actualModel: model, latencyMs: Date.now() - reqStart2, status: upstreamResp.status, error: errText.slice(0, 200), streaming: !!body.stream });
|
|
107
118
|
return reply.code(upstreamResp.status).send({
|
|
108
119
|
error: { message: `${provider.name}: ${errText.slice(0, 500)}` },
|
|
109
120
|
});
|
|
110
121
|
}
|
|
122
|
+
recordRequest({ timestamp: Date.now(), model: originalModel, provider: provider.name, actualModel: model, latencyMs: Date.now() - reqStart2, status: 200, streaming: !!body.stream });
|
|
111
123
|
if (body.stream) {
|
|
112
124
|
return reply.headers({
|
|
113
125
|
'Content-Type': 'text/event-stream',
|
|
114
126
|
'Cache-Control': 'no-cache',
|
|
115
127
|
}).send(upstreamResp.body);
|
|
116
128
|
}
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
const choice = ccData.choices?.[0];
|
|
120
|
-
const responsesData = {
|
|
121
|
-
id: ccData.id,
|
|
122
|
-
object: 'response',
|
|
123
|
-
model: ccData.model,
|
|
124
|
-
output: [
|
|
125
|
-
{
|
|
126
|
-
type: 'message',
|
|
127
|
-
role: 'assistant',
|
|
128
|
-
content: [
|
|
129
|
-
{
|
|
130
|
-
type: 'output_text',
|
|
131
|
-
text: choice?.message?.content || '',
|
|
132
|
-
},
|
|
133
|
-
],
|
|
134
|
-
},
|
|
135
|
-
],
|
|
136
|
-
usage: ccData.usage,
|
|
137
|
-
};
|
|
138
|
-
reply.send(responsesData);
|
|
129
|
+
const data = await upstreamResp.json();
|
|
130
|
+
reply.send(data);
|
|
139
131
|
}
|
|
140
132
|
catch (err) {
|
|
133
|
+
recordRequest({ timestamp: Date.now(), model: originalModel, provider: provider.name, actualModel: model, latencyMs: Date.now() - reqStart2, status: 502, error: err.message, streaming: !!body.stream });
|
|
141
134
|
console.error(`[ModelGW:Responses] Error:`, err.message);
|
|
142
135
|
reply.code(502).send({
|
|
143
136
|
error: { message: `Provider error: ${err.message}` },
|
|
144
137
|
});
|
|
145
138
|
}
|
|
146
139
|
});
|
|
140
|
+
// 运行时统计
|
|
141
|
+
fastify.get('/model-gateway/stats', async (_request, reply) => {
|
|
142
|
+
reply.send(getMetrics());
|
|
143
|
+
});
|
|
147
144
|
// Provider 健康检查 + 熔断器状态
|
|
148
145
|
fastify.get('/model-gateway/health/:provider', async (request, reply) => {
|
|
149
146
|
const { provider } = request.params;
|