llmapi-v2 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/.env.example +40 -0
  2. package/Dockerfile +17 -0
  3. package/dist/config.d.ts +48 -0
  4. package/dist/config.js +98 -0
  5. package/dist/config.js.map +1 -0
  6. package/dist/converter/request.d.ts +6 -0
  7. package/dist/converter/request.js +184 -0
  8. package/dist/converter/request.js.map +1 -0
  9. package/dist/converter/response.d.ts +6 -0
  10. package/dist/converter/response.js +76 -0
  11. package/dist/converter/response.js.map +1 -0
  12. package/dist/converter/stream.d.ts +54 -0
  13. package/dist/converter/stream.js +318 -0
  14. package/dist/converter/stream.js.map +1 -0
  15. package/dist/converter/types.d.ts +239 -0
  16. package/dist/converter/types.js +6 -0
  17. package/dist/converter/types.js.map +1 -0
  18. package/dist/data/posts.d.ts +19 -0
  19. package/dist/data/posts.js +462 -0
  20. package/dist/data/posts.js.map +1 -0
  21. package/dist/index.d.ts +1 -0
  22. package/dist/index.js +233 -0
  23. package/dist/index.js.map +1 -0
  24. package/dist/middleware/api-key-auth.d.ts +6 -0
  25. package/dist/middleware/api-key-auth.js +76 -0
  26. package/dist/middleware/api-key-auth.js.map +1 -0
  27. package/dist/middleware/quota-guard.d.ts +10 -0
  28. package/dist/middleware/quota-guard.js +27 -0
  29. package/dist/middleware/quota-guard.js.map +1 -0
  30. package/dist/middleware/rate-limiter.d.ts +5 -0
  31. package/dist/middleware/rate-limiter.js +50 -0
  32. package/dist/middleware/rate-limiter.js.map +1 -0
  33. package/dist/middleware/request-logger.d.ts +6 -0
  34. package/dist/middleware/request-logger.js +37 -0
  35. package/dist/middleware/request-logger.js.map +1 -0
  36. package/dist/middleware/session-auth.d.ts +19 -0
  37. package/dist/middleware/session-auth.js +99 -0
  38. package/dist/middleware/session-auth.js.map +1 -0
  39. package/dist/providers/aliyun.d.ts +13 -0
  40. package/dist/providers/aliyun.js +20 -0
  41. package/dist/providers/aliyun.js.map +1 -0
  42. package/dist/providers/base-provider.d.ts +36 -0
  43. package/dist/providers/base-provider.js +133 -0
  44. package/dist/providers/base-provider.js.map +1 -0
  45. package/dist/providers/deepseek.d.ts +11 -0
  46. package/dist/providers/deepseek.js +18 -0
  47. package/dist/providers/deepseek.js.map +1 -0
  48. package/dist/providers/registry.d.ts +18 -0
  49. package/dist/providers/registry.js +98 -0
  50. package/dist/providers/registry.js.map +1 -0
  51. package/dist/providers/types.d.ts +17 -0
  52. package/dist/providers/types.js +3 -0
  53. package/dist/providers/types.js.map +1 -0
  54. package/dist/routes/admin.d.ts +1 -0
  55. package/dist/routes/admin.js +153 -0
  56. package/dist/routes/admin.js.map +1 -0
  57. package/dist/routes/auth.d.ts +2 -0
  58. package/dist/routes/auth.js +318 -0
  59. package/dist/routes/auth.js.map +1 -0
  60. package/dist/routes/blog.d.ts +1 -0
  61. package/dist/routes/blog.js +29 -0
  62. package/dist/routes/blog.js.map +1 -0
  63. package/dist/routes/dashboard.d.ts +1 -0
  64. package/dist/routes/dashboard.js +184 -0
  65. package/dist/routes/dashboard.js.map +1 -0
  66. package/dist/routes/messages.d.ts +1 -0
  67. package/dist/routes/messages.js +309 -0
  68. package/dist/routes/messages.js.map +1 -0
  69. package/dist/routes/models.d.ts +1 -0
  70. package/dist/routes/models.js +39 -0
  71. package/dist/routes/models.js.map +1 -0
  72. package/dist/routes/payment.d.ts +1 -0
  73. package/dist/routes/payment.js +150 -0
  74. package/dist/routes/payment.js.map +1 -0
  75. package/dist/routes/sitemap.d.ts +1 -0
  76. package/dist/routes/sitemap.js +38 -0
  77. package/dist/routes/sitemap.js.map +1 -0
  78. package/dist/services/alipay.d.ts +27 -0
  79. package/dist/services/alipay.js +106 -0
  80. package/dist/services/alipay.js.map +1 -0
  81. package/dist/services/database.d.ts +4 -0
  82. package/dist/services/database.js +170 -0
  83. package/dist/services/database.js.map +1 -0
  84. package/dist/services/health-checker.d.ts +13 -0
  85. package/dist/services/health-checker.js +95 -0
  86. package/dist/services/health-checker.js.map +1 -0
  87. package/dist/services/mailer.d.ts +3 -0
  88. package/dist/services/mailer.js +91 -0
  89. package/dist/services/mailer.js.map +1 -0
  90. package/dist/services/metrics.d.ts +56 -0
  91. package/dist/services/metrics.js +94 -0
  92. package/dist/services/metrics.js.map +1 -0
  93. package/dist/services/remote-control.d.ts +20 -0
  94. package/dist/services/remote-control.js +209 -0
  95. package/dist/services/remote-control.js.map +1 -0
  96. package/dist/services/remote-ws.d.ts +5 -0
  97. package/dist/services/remote-ws.js +143 -0
  98. package/dist/services/remote-ws.js.map +1 -0
  99. package/dist/services/usage.d.ts +13 -0
  100. package/dist/services/usage.js +39 -0
  101. package/dist/services/usage.js.map +1 -0
  102. package/dist/utils/errors.d.ts +27 -0
  103. package/dist/utils/errors.js +48 -0
  104. package/dist/utils/errors.js.map +1 -0
  105. package/dist/utils/logger.d.ts +2 -0
  106. package/dist/utils/logger.js +14 -0
  107. package/dist/utils/logger.js.map +1 -0
  108. package/docker-compose.yml +19 -0
  109. package/package.json +39 -0
  110. package/public/robots.txt +8 -0
  111. package/src/config.ts +140 -0
  112. package/src/converter/request.ts +207 -0
  113. package/src/converter/response.ts +85 -0
  114. package/src/converter/stream.ts +373 -0
  115. package/src/converter/types.ts +257 -0
  116. package/src/data/posts.ts +474 -0
  117. package/src/index.ts +219 -0
  118. package/src/middleware/api-key-auth.ts +82 -0
  119. package/src/middleware/quota-guard.ts +28 -0
  120. package/src/middleware/rate-limiter.ts +61 -0
  121. package/src/middleware/request-logger.ts +36 -0
  122. package/src/middleware/session-auth.ts +91 -0
  123. package/src/providers/aliyun.ts +16 -0
  124. package/src/providers/base-provider.ts +148 -0
  125. package/src/providers/deepseek.ts +14 -0
  126. package/src/providers/registry.ts +111 -0
  127. package/src/providers/types.ts +26 -0
  128. package/src/routes/admin.ts +169 -0
  129. package/src/routes/auth.ts +369 -0
  130. package/src/routes/blog.ts +28 -0
  131. package/src/routes/dashboard.ts +208 -0
  132. package/src/routes/messages.ts +346 -0
  133. package/src/routes/models.ts +37 -0
  134. package/src/routes/payment.ts +189 -0
  135. package/src/routes/sitemap.ts +40 -0
  136. package/src/services/alipay.ts +116 -0
  137. package/src/services/database.ts +187 -0
  138. package/src/services/health-checker.ts +115 -0
  139. package/src/services/mailer.ts +90 -0
  140. package/src/services/metrics.ts +104 -0
  141. package/src/services/remote-control.ts +226 -0
  142. package/src/services/remote-ws.ts +145 -0
  143. package/src/services/usage.ts +57 -0
  144. package/src/types/express.d.ts +46 -0
  145. package/src/utils/errors.ts +44 -0
  146. package/src/utils/logger.ts +8 -0
  147. package/tsconfig.json +17 -0
  148. package/views/pages/404.ejs +14 -0
  149. package/views/pages/admin.ejs +307 -0
  150. package/views/pages/blog-post.ejs +378 -0
  151. package/views/pages/blog.ejs +148 -0
  152. package/views/pages/dashboard.ejs +441 -0
  153. package/views/pages/docs.ejs +807 -0
  154. package/views/pages/index.ejs +416 -0
  155. package/views/pages/login.ejs +170 -0
  156. package/views/pages/orders.ejs +111 -0
  157. package/views/pages/pricing.ejs +379 -0
  158. package/views/pages/register.ejs +397 -0
  159. package/views/pages/remote.ejs +334 -0
  160. package/views/pages/settings.ejs +373 -0
  161. package/views/partials/header.ejs +70 -0
  162. package/views/partials/nav.ejs +140 -0
@@ -0,0 +1,28 @@
1
+ import { Router } from 'express';
2
+ import { getRecentPosts, getPostBySlug, getAllCategories } from '../data/posts';
3
+
4
+ export const blogRouter = Router();
5
+
6
+ blogRouter.get('/blog', (req, res) => {
7
+ const posts = getRecentPosts(20);
8
+ const categories = getAllCategories();
9
+ res.render('pages/blog', {
10
+ viewUser: req.user || null,
11
+ posts,
12
+ categories,
13
+ });
14
+ });
15
+
16
+ blogRouter.get('/blog/:slug', (req, res) => {
17
+ const post = getPostBySlug(req.params.slug);
18
+ if (!post) {
19
+ res.status(404).render('pages/404', { viewUser: req.user || null });
20
+ return;
21
+ }
22
+ const recentPosts = getRecentPosts(5).filter(p => p.slug !== post.slug).slice(0, 3);
23
+ res.render('pages/blog-post', {
24
+ viewUser: req.user || null,
25
+ post,
26
+ recentPosts,
27
+ });
28
+ });
@@ -0,0 +1,208 @@
1
+ import { Router } from 'express';
2
+ import type { Request, Response } from 'express';
3
+ import crypto from 'crypto';
4
+ import { v4 as uuidv4 } from 'uuid';
5
+ import { getPool } from '../services/database';
6
+ import { sessionAuth } from '../middleware/session-auth';
7
+ import { logger } from '../utils/logger';
8
+
9
+ export const dashboardRouter = Router();
10
+
11
+ // All routes require session auth
12
+ dashboardRouter.use(sessionAuth);
13
+
14
+ /**
15
+ * GET /api/dashboard/stats
16
+ * User's usage statistics.
17
+ */
18
+ dashboardRouter.get('/stats', async (req: Request, res: Response) => {
19
+ try {
20
+ const pool = getPool();
21
+ const userId = req.userId!;
22
+
23
+ // Subscription info
24
+ const [subs] = await pool.execute(`
25
+ SELECT s.*, p.name as plan_name, p.display_name, p.token_limit_monthly, p.rate_limit_rpm, p.max_api_keys, p.price_monthly
26
+ FROM subscriptions s JOIN plans p ON s.plan_id = p.id
27
+ WHERE s.user_id = ? ORDER BY s.period_start DESC LIMIT 1
28
+ `, [userId]);
29
+ const sub = (subs as any[])[0];
30
+
31
+ // Token breakdown
32
+ const [tokens] = await pool.execute(`
33
+ SELECT
34
+ COALESCE(SUM(input_tokens), 0) as total_input,
35
+ COALESCE(SUM(output_tokens), 0) as total_output,
36
+ COALESCE(SUM(thinking_tokens), 0) as total_thinking,
37
+ COALESCE(SUM(provider_cost), 0) as total_cost,
38
+ COUNT(*) as total_requests,
39
+ COALESCE(AVG(ttft_ms), 0) as avg_ttft,
40
+ COALESCE(AVG(tokens_per_sec), 0) as avg_tps,
41
+ COALESCE(AVG(duration_ms), 0) as avg_duration
42
+ FROM usage_logs WHERE user_id = ?
43
+ `, [userId]);
44
+
45
+ // Today's requests
46
+ const [today] = await pool.execute(`
47
+ SELECT COUNT(*) as cnt, COALESCE(SUM(input_tokens + output_tokens), 0) as tokens
48
+ FROM usage_logs WHERE user_id = ? AND DATE(created_at) = CURRENT_DATE
49
+ `, [userId]);
50
+
51
+ // API key count
52
+ const [keys] = await pool.execute(
53
+ 'SELECT COUNT(*) as cnt FROM api_keys WHERE user_id = ? AND status = ?',
54
+ [userId, 'active'],
55
+ );
56
+
57
+ const tokenData = (tokens as any[])[0];
58
+ const totalUsed = tokenData.total_input + tokenData.total_output;
59
+ const limit = sub?.token_limit_monthly || 0;
60
+
61
+ res.json({
62
+ success: true,
63
+ subscription: sub,
64
+ usage: {
65
+ totalInput: tokenData.total_input,
66
+ totalOutput: tokenData.total_output,
67
+ totalThinking: tokenData.total_thinking,
68
+ totalCost: parseFloat(tokenData.total_cost),
69
+ totalRequests: tokenData.total_requests,
70
+ avgTtft: Math.round(tokenData.avg_ttft),
71
+ avgTps: Math.round(tokenData.avg_tps * 100) / 100,
72
+ avgDuration: Math.round(tokenData.avg_duration),
73
+ totalUsed,
74
+ percentUsed: limit > 0 ? Math.round((totalUsed / limit) * 10000) / 100 : 0,
75
+ },
76
+ today: (today as any[])[0],
77
+ apiKeyCount: (keys as any[])[0].cnt,
78
+ });
79
+ } catch (err) {
80
+ logger.error({ err }, 'dashboard stats error');
81
+ res.status(500).json({ success: false, error: 'Internal error' });
82
+ }
83
+ });
84
+
85
+ /**
86
+ * GET /api/dashboard/usage
87
+ * Last 30 days daily usage.
88
+ */
89
+ dashboardRouter.get('/usage', async (req: Request, res: Response) => {
90
+ try {
91
+ const pool = getPool();
92
+ const [rows] = await pool.execute(`
93
+ SELECT
94
+ DATE(created_at) as date,
95
+ COALESCE(SUM(input_tokens), 0) as input_tokens,
96
+ COALESCE(SUM(output_tokens), 0) as output_tokens,
97
+ COUNT(*) as requests
98
+ FROM usage_logs
99
+ WHERE user_id = ? AND created_at >= CURRENT_DATE - INTERVAL '30 days'
100
+ GROUP BY DATE(created_at)
101
+ ORDER BY date
102
+ `, [req.userId!]);
103
+
104
+ res.json({ success: true, daily: rows });
105
+ } catch (err) {
106
+ logger.error({ err }, 'dashboard usage error');
107
+ res.status(500).json({ success: false, error: 'Internal error' });
108
+ }
109
+ });
110
+
111
+ /**
112
+ * GET /api/dashboard/api-keys
113
+ * List user's active API keys.
114
+ */
115
+ dashboardRouter.get('/api-keys', async (req: Request, res: Response) => {
116
+ try {
117
+ const pool = getPool();
118
+ const [rows] = await pool.execute(
119
+ 'SELECT id, key_prefix, name, status, last_used_at, created_at FROM api_keys WHERE user_id = ? AND status = ? ORDER BY created_at DESC',
120
+ [req.userId!, 'active'],
121
+ );
122
+ res.json({ success: true, keys: rows });
123
+ } catch (err) {
124
+ logger.error({ err }, 'list keys error');
125
+ res.status(500).json({ success: false, error: 'Internal error' });
126
+ }
127
+ });
128
+
129
+ /**
130
+ * POST /api/dashboard/api-keys
131
+ * Create a new API key.
132
+ */
133
+ dashboardRouter.post('/api-keys', async (req: Request, res: Response) => {
134
+ try {
135
+ const pool = getPool();
136
+ const { name = 'Default' } = req.body;
137
+
138
+ // Check plan limit
139
+ const [subs] = await pool.execute(`
140
+ SELECT p.max_api_keys FROM subscriptions s JOIN plans p ON s.plan_id = p.id
141
+ WHERE s.user_id = ? ORDER BY s.period_start DESC LIMIT 1
142
+ `, [req.userId!]);
143
+ const maxKeys = (subs as any[])[0]?.max_api_keys || 1;
144
+
145
+ const [existing] = await pool.execute(
146
+ 'SELECT COUNT(*) as cnt FROM api_keys WHERE user_id = ? AND status = ?',
147
+ [req.userId!, 'active'],
148
+ );
149
+ if ((existing as any[])[0].cnt >= maxKeys) {
150
+ res.status(400).json({ success: false, error: `Maximum ${maxKeys} API keys allowed on your plan` });
151
+ return;
152
+ }
153
+
154
+ // Generate key: sk-relay-<48 hex chars>
155
+ const rawKey = `sk-relay-${crypto.randomBytes(24).toString('hex')}`;
156
+ const keyPrefix = rawKey.substring(0, 12);
157
+ const keyHash = crypto.createHash('sha256').update(rawKey).digest('hex');
158
+
159
+ await pool.execute(
160
+ 'INSERT INTO api_keys (user_id, key_prefix, key_hash, name) VALUES (?, ?, ?, ?)',
161
+ [req.userId!, keyPrefix, keyHash, name],
162
+ );
163
+
164
+ // Return full key ONCE (never stored in plain text)
165
+ res.json({ success: true, key: rawKey, prefix: keyPrefix });
166
+ } catch (err) {
167
+ logger.error({ err }, 'create key error');
168
+ res.status(500).json({ success: false, error: 'Internal error' });
169
+ }
170
+ });
171
+
172
+ /**
173
+ * DELETE /api/dashboard/api-keys/:id
174
+ * Revoke an API key.
175
+ */
176
+ dashboardRouter.delete('/api-keys/:id', async (req: Request, res: Response) => {
177
+ try {
178
+ const pool = getPool();
179
+ await pool.execute(
180
+ 'UPDATE api_keys SET status = ? WHERE id = ? AND user_id = ?',
181
+ ['revoked', req.params.id, req.userId!],
182
+ );
183
+ res.json({ success: true });
184
+ } catch (err) {
185
+ logger.error({ err }, 'revoke key error');
186
+ res.status(500).json({ success: false, error: 'Internal error' });
187
+ }
188
+ });
189
+
190
+ /**
191
+ * GET /api/dashboard/recent
192
+ * Recent API requests (last 10).
193
+ */
194
+ dashboardRouter.get('/recent', async (req: Request, res: Response) => {
195
+ try {
196
+ const pool = getPool();
197
+ const [rows] = await pool.execute(`
198
+ SELECT model, provider_name, input_tokens, output_tokens, thinking_tokens,
199
+ ttft_ms, tokens_per_sec, duration_ms, status, created_at
200
+ FROM usage_logs WHERE user_id = ?
201
+ ORDER BY created_at DESC LIMIT 10
202
+ `, [req.userId!]);
203
+ res.json({ success: true, recent: rows });
204
+ } catch (err) {
205
+ logger.error({ err }, 'recent error');
206
+ res.status(500).json({ success: false, error: 'Internal error' });
207
+ }
208
+ });
@@ -0,0 +1,346 @@
1
+ import { Router } from 'express';
2
+ import type { Request, Response } from 'express';
3
+ import type { IncomingMessage } from 'http';
4
+ import { apiKeyAuth } from '../middleware/api-key-auth';
5
+ import { rateLimiter } from '../middleware/rate-limiter';
6
+ import { quotaGuard } from '../middleware/quota-guard';
7
+ import { smartResolve, resolveWithFailover } from '../providers/registry';
8
+ import { recordUsage } from '../services/usage';
9
+ import { metrics } from '../services/metrics';
10
+ import { OverloadedError } from '../utils/errors';
11
+ import { detectRemoteTrigger, handleRemoteTrigger } from '../services/remote-control';
12
+ import { logger } from '../utils/logger';
13
+ /// <reference path="../types/express.d.ts" />
14
+
15
+ export const messagesRouter = Router();
16
+
17
+ /**
18
+ * POST /v1/messages
19
+ *
20
+ * Transparent proxy to provider's native Anthropic endpoint.
21
+ *
22
+ * Architecture:
23
+ * Claude Code -> this proxy -> provider's Anthropic endpoint -> Claude Code
24
+ *
25
+ * We do NOT convert formats. The provider handles all Anthropic protocol
26
+ * details. We only:
27
+ * 1. Authenticate the user (API key)
28
+ * 2. Smart-route to the best provider
29
+ * 3. Rewrite the model name in the request body
30
+ * 4. Forward the request transparently
31
+ * 5. Intercept the response to extract usage for billing
32
+ * 6. Pipe the response back to the client
33
+ */
34
+ messagesRouter.post('/messages', apiKeyAuth, rateLimiter, quotaGuard, async (req: Request, res: Response) => {
35
+ const startTime = Date.now();
36
+
37
+ // Parse just the fields we need for routing (don't fully parse the body)
38
+ const body = req.body;
39
+
40
+ // Command interception: remote control trigger
41
+ if (detectRemoteTrigger(body)) {
42
+ await handleRemoteTrigger(req, res);
43
+ return;
44
+ }
45
+ const claudeModel = body.model || 'claude-sonnet-4-6';
46
+ const isStream = !!body.stream;
47
+ const hasTools = Array.isArray(body.tools) && body.tools.length > 0;
48
+ const messageCount = Array.isArray(body.messages) ? body.messages.length : 0;
49
+
50
+ logger.info({
51
+ userId: req.userId,
52
+ model: claudeModel,
53
+ stream: isStream,
54
+ tools: hasTools,
55
+ messages: messageCount,
56
+ }, 'API request');
57
+
58
+ // Smart routing: pick the best provider based on task characteristics
59
+ let resolved = smartResolve(claudeModel, hasTools, messageCount);
60
+
61
+ // Try smart resolution first, then fall back to priority-based routing
62
+ const providersToTry: Array<{ provider: any; backendModel: string }> = [];
63
+ if (resolved) {
64
+ providersToTry.push(resolved);
65
+ }
66
+ // Always add fallback providers
67
+ for await (const route of resolveWithFailover(claudeModel)) {
68
+ // Skip if already the smart-resolved provider
69
+ if (resolved && route.provider.name === resolved.provider.name) continue;
70
+ providersToTry.push(route);
71
+ }
72
+
73
+ if (providersToTry.length === 0) {
74
+ const err = new OverloadedError('No providers available for this model.');
75
+ res.status(err.statusCode).json(err.toJSON());
76
+ return;
77
+ }
78
+
79
+ let lastError: Error | null = null;
80
+
81
+ for (const { provider, backendModel } of providersToTry) {
82
+ try {
83
+ // Rewrite the model name in the request body
84
+ const modifiedBody = { ...body, model: backendModel };
85
+ const normalized = normalizeForCache(modifiedBody);
86
+ const payload = JSON.stringify(normalized);
87
+
88
+ logger.debug({ provider: provider.name, model: backendModel, smart: !!resolved }, 'Routing to provider');
89
+
90
+ if (isStream) {
91
+ metrics.streamStarted();
92
+ }
93
+
94
+ // Forward to provider's native Anthropic endpoint
95
+ const backendRes = await provider.proxy(
96
+ '/v1/messages',
97
+ payload,
98
+ {
99
+ 'anthropic-version': req.headers['anthropic-version'] as string || '2023-06-01',
100
+ 'anthropic-beta': req.headers['anthropic-beta'] as string || '',
101
+ },
102
+ isStream,
103
+ );
104
+
105
+ // Check for error status from provider
106
+ if (backendRes.statusCode && backendRes.statusCode >= 400) {
107
+ const errorBody = await bufferResponse(backendRes);
108
+ const errMsg = extractErrorMessage(errorBody, backendRes.statusCode, provider.name);
109
+ throw new Error(errMsg);
110
+ }
111
+
112
+ provider.markHealthy();
113
+
114
+ if (isStream) {
115
+ // Streaming: pipe response directly, intercept usage from SSE
116
+ await pipeStreamingResponse(backendRes, res, {
117
+ userId: req.userId!,
118
+ apiKeyId: req.apiKey?.id ?? null,
119
+ displayModel: claudeModel,
120
+ providerName: provider.name,
121
+ backendModel,
122
+ startTime,
123
+ });
124
+ } else {
125
+ // Non-streaming: buffer, extract usage, forward
126
+ const responseBody = await bufferResponse(backendRes);
127
+ const parsed = JSON.parse(responseBody);
128
+
129
+ // Record usage from the response
130
+ const usage = parsed.usage || {};
131
+ recordUsage(req.userId!, req.apiKey?.id ?? null, claudeModel, provider.name, backendModel, {
132
+ inputTokens: usage.input_tokens || 0,
133
+ outputTokens: usage.output_tokens || 0,
134
+ thinkingTokens: 0,
135
+ ttftMs: Date.now() - startTime,
136
+ tokensPerSec: 0,
137
+ durationMs: Date.now() - startTime,
138
+ });
139
+ metrics.recordRequest(true, false);
140
+ metrics.recordProviderRequest(provider.name, true);
141
+
142
+ // Forward response with original headers
143
+ res.setHeader('Content-Type', 'application/json');
144
+ res.status(backendRes.statusCode || 200).send(responseBody);
145
+ }
146
+
147
+ return; // Success
148
+
149
+ } catch (err) {
150
+ lastError = err as Error;
151
+ provider.markUnhealthy(lastError.message);
152
+ metrics.recordProviderRequest(provider.name, false);
153
+ if (isStream) metrics.streamEnded();
154
+ logger.warn({ provider: provider.name, err: lastError.message }, 'Provider failed, trying next');
155
+ }
156
+ }
157
+
158
+ // All providers failed
159
+ metrics.recordRequest(false, isStream);
160
+ logger.error({ model: claudeModel, lastError: lastError?.message }, 'All providers failed');
161
+ const error = new OverloadedError(lastError?.message || 'All providers unavailable.');
162
+ res.status(error.statusCode).json(error.toJSON());
163
+ });
164
+
165
+ /**
166
+ * Pipe a streaming response from the provider directly to the client.
167
+ * Intercept SSE events to extract usage data for billing.
168
+ */
169
+ async function pipeStreamingResponse(
170
+ backendRes: IncomingMessage,
171
+ clientRes: Response,
172
+ meta: {
173
+ userId: number;
174
+ apiKeyId: number | null;
175
+ displayModel: string;
176
+ providerName: string;
177
+ backendModel: string;
178
+ startTime: number;
179
+ },
180
+ ): Promise<void> {
181
+ return new Promise((resolve) => {
182
+ // Forward headers
183
+ clientRes.setHeader('Content-Type', 'text/event-stream');
184
+ clientRes.setHeader('Cache-Control', 'no-cache');
185
+ clientRes.setHeader('Connection', 'keep-alive');
186
+ clientRes.flushHeaders();
187
+
188
+ let usage = { inputTokens: 0, outputTokens: 0 };
189
+ let firstChunkTime = 0;
190
+ let buffer = '';
191
+
192
+ // Handle client disconnect
193
+ clientRes.on('close', () => {
194
+ backendRes.destroy();
195
+ });
196
+
197
+ backendRes.setEncoding('utf8');
198
+
199
+ backendRes.on('data', (chunk: string) => {
200
+ if (!firstChunkTime) firstChunkTime = Date.now();
201
+
202
+ // Write chunk directly to client (transparent proxy)
203
+ if (!clientRes.writableEnded) {
204
+ try { clientRes.write(chunk); } catch {}
205
+ }
206
+
207
+ // Parse SSE to extract usage (for billing)
208
+ buffer += chunk;
209
+ const lines = buffer.split('\n');
210
+ buffer = lines.pop() || '';
211
+
212
+ for (const line of lines) {
213
+ if (!line.startsWith('data: ')) continue;
214
+ const payload = line.slice(6).trim();
215
+ if (payload === '[DONE]' || !payload) continue;
216
+
217
+ try {
218
+ const data = JSON.parse(payload);
219
+ // Extract usage from message_start or message_delta events
220
+ if (data.type === 'message_start' && data.message?.usage) {
221
+ usage.inputTokens = data.message.usage.input_tokens || 0;
222
+ }
223
+ if (data.type === 'message_delta' && data.usage) {
224
+ usage.outputTokens = data.usage.output_tokens || 0;
225
+ }
226
+ } catch {}
227
+ }
228
+ });
229
+
230
+ backendRes.on('end', () => {
231
+ if (!clientRes.writableEnded) {
232
+ try { clientRes.end(); } catch {}
233
+ }
234
+
235
+ const duration = Date.now() - meta.startTime;
236
+ const ttft = firstChunkTime ? firstChunkTime - meta.startTime : duration;
237
+ const tps = duration > 0 && usage.outputTokens > 0
238
+ ? Math.round((usage.outputTokens / (duration / 1000)) * 100) / 100
239
+ : 0;
240
+
241
+ // Record usage for billing
242
+ recordUsage(meta.userId, meta.apiKeyId, meta.displayModel, meta.providerName, meta.backendModel, {
243
+ inputTokens: usage.inputTokens,
244
+ outputTokens: usage.outputTokens,
245
+ thinkingTokens: 0,
246
+ ttftMs: ttft,
247
+ tokensPerSec: tps,
248
+ durationMs: duration,
249
+ });
250
+
251
+ metrics.recordRequest(true, true);
252
+ metrics.recordProviderRequest(meta.providerName, true);
253
+ metrics.recordLatency(duration);
254
+ if (ttft > 0) metrics.recordTTFT(ttft);
255
+ metrics.streamEnded();
256
+
257
+ logger.info({
258
+ userId: meta.userId,
259
+ provider: meta.providerName,
260
+ model: meta.backendModel,
261
+ inputTokens: usage.inputTokens,
262
+ outputTokens: usage.outputTokens,
263
+ ttftMs: ttft,
264
+ durationMs: duration,
265
+ }, 'Stream completed');
266
+
267
+ resolve();
268
+ });
269
+
270
+ backendRes.on('error', (err) => {
271
+ logger.error({ err }, 'Backend stream error');
272
+ if (!clientRes.writableEnded) {
273
+ try { clientRes.end(); } catch {}
274
+ }
275
+ metrics.streamEnded();
276
+ resolve();
277
+ });
278
+ });
279
+ }
280
+
281
+ /**
282
+ * Optimize request for DashScope's prompt caching:
283
+ * 1. Inject cache_control on system prompt blocks (explicit caching, 90% cost reduction)
284
+ * 2. Strip timestamps/session IDs from system prompt (maximize prefix match)
285
+ *
286
+ * DashScope explicit cache: add cache_control: {type: "ephemeral"} to cacheable blocks.
287
+ * When the same prefix is seen again within 5 minutes, cached tokens cost only 10%.
288
+ */
289
+ function normalizeForCache(body: any): any {
290
+ const normalized = { ...body };
291
+
292
+ if (typeof normalized.system === 'string') {
293
+ // Convert string system to array format so we can add cache_control
294
+ const cleanText = normalized.system
295
+ .replace(/\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:?\d{2})?)?/g, 'DATE_PLACEHOLDER')
296
+ .replace(/\b\d{10,13}\b/g, 'TIMESTAMP');
297
+ normalized.system = [
298
+ { type: 'text', text: cleanText, cache_control: { type: 'ephemeral' } },
299
+ ];
300
+ } else if (Array.isArray(normalized.system) && normalized.system.length > 0) {
301
+ normalized.system = normalized.system.map((block: any, i: number) => {
302
+ if (block.type === 'text' && typeof block.text === 'string') {
303
+ const cleanText = block.text
304
+ .replace(/\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:?\d{2})?)?/g, 'DATE_PLACEHOLDER')
305
+ .replace(/\b\d{10,13}\b/g, 'TIMESTAMP');
306
+ return {
307
+ ...block,
308
+ text: cleanText,
309
+ // Add cache_control to the last system block (marks end of cacheable prefix)
310
+ ...(i === normalized.system.length - 1 ? { cache_control: { type: 'ephemeral' } } : {}),
311
+ };
312
+ }
313
+ return block;
314
+ });
315
+ }
316
+
317
+ return normalized;
318
+ }
319
+
320
+ function bufferResponse(res: IncomingMessage): Promise<string> {
321
+ return new Promise((resolve, reject) => {
322
+ const chunks: Buffer[] = [];
323
+ res.on('data', (chunk) => chunks.push(chunk));
324
+ res.on('end', () => resolve(Buffer.concat(chunks).toString()));
325
+ res.on('error', reject);
326
+ });
327
+ }
328
+
329
+ function extractErrorMessage(body: string, statusCode: number, provider: string): string {
330
+ try {
331
+ const parsed = JSON.parse(body);
332
+ return parsed.error?.message || `Provider ${provider} returned ${statusCode}`;
333
+ } catch {
334
+ return `Provider ${provider} returned ${statusCode}`;
335
+ }
336
+ }
337
+
338
+ /**
339
+ * POST /v1/messages/count_tokens
340
+ */
341
+ messagesRouter.post('/messages/count_tokens', apiKeyAuth, (req: Request, res: Response) => {
342
+ const { messages, system } = req.body;
343
+ const text = JSON.stringify(messages || []) + JSON.stringify(system || '');
344
+ const inputTokens = Math.ceil(text.length / 4);
345
+ res.json({ input_tokens: inputTokens });
346
+ });
@@ -0,0 +1,37 @@
1
+ import { Router } from 'express';
2
+
3
+ export const modelsRouter = Router();
4
+
5
+ /**
6
+ * GET /v1/models
7
+ *
8
+ * Returns the Claude model list that Claude Code expects.
9
+ * This makes Claude Code believe it's talking to the Anthropic API.
10
+ */
11
+ modelsRouter.get('/models', (_req, res) => {
12
+ res.json({
13
+ data: [
14
+ {
15
+ id: 'claude-sonnet-4-6',
16
+ display_name: 'Claude Sonnet 4.6',
17
+ type: 'model',
18
+ created_at: '2025-05-14T00:00:00Z',
19
+ },
20
+ {
21
+ id: 'claude-opus-4-6',
22
+ display_name: 'Claude Opus 4.6',
23
+ type: 'model',
24
+ created_at: '2025-05-14T00:00:00Z',
25
+ },
26
+ {
27
+ id: 'claude-haiku-4-5',
28
+ display_name: 'Claude Haiku 4.5',
29
+ type: 'model',
30
+ created_at: '2025-02-24T00:00:00Z',
31
+ },
32
+ ],
33
+ has_more: false,
34
+ first_id: 'claude-sonnet-4-6',
35
+ last_id: 'claude-haiku-4-5',
36
+ });
37
+ });