krasavacode 0.3.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,18 +5,18 @@ import { ensurePreset } from '../src/preset.js';
5
5
  import { launchClaude } from '../src/launch.js';
6
6
  import { runUpgrade } from '../src/upgrade.js';
7
7
  import { runDoctor } from '../src/doctor.js';
8
- import { runSetupGemini } from '../src/setup-gemini.js';
8
+ import { runSetup } from '../src/setup.js';
9
9
 
10
10
  // Hardcoded so it works inside Bun --compile (no FS access to package.json)
11
- const VERSION = '0.3.6';
11
+ const VERSION = '0.4.0';
12
12
 
13
13
  const cmd = process.argv[2];
14
14
 
15
15
  async function main() {
16
16
  if (cmd === 'doctor') return runDoctor();
17
17
  if (cmd === 'upgrade') return runUpgrade();
18
- if (cmd === 'setup-gemini' || cmd === 'gemini') {
19
- const result = await runSetupGemini();
18
+ if (cmd === 'setup' || cmd === 'setup-gemini' || cmd === 'gemini') {
19
+ const result = await runSetup();
20
20
  if (!result?.launchAfter) return;
21
21
  // fall through to normal launch flow below
22
22
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "krasavacode",
3
- "version": "0.3.6",
3
+ "version": "0.4.0",
4
4
  "description": "KRASAVACODE — однокнопочный бесплатный вайбкодинг для учеников. Claude Code на бесплатных провайдерах через локальный gateway. Сам ставит Node при необходимости.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Per-provider cooldown tracking.
3
+ *
4
+ * When a provider returns 429, metrics-proxy stamps a cooldown until X.
5
+ * - per-minute rate limit (RPM): 60 sec cooldown
6
+ * - per-day quota (RPD/TPD): until 11:00 МСК next day (~daily reset)
7
+ *
8
+ * The custom router (~/.krasavacode/router.js) reads this file on every
9
+ * request and skips providers whose cooldown is still in the future.
10
+ */
11
+
12
+ import { readFile, writeFile, mkdir } from 'node:fs/promises';
13
+ import { homedir } from 'node:os';
14
+ import { join } from 'node:path';
15
+
16
+ const ROOT = join(homedir(), '.krasavacode');
17
+ const COOLDOWN_FILE = join(ROOT, 'cooldowns.json');
18
+
19
+ export async function getCooldowns() {
20
+ try { return JSON.parse(await readFile(COOLDOWN_FILE, 'utf8')); }
21
+ catch { return {}; }
22
+ }
23
+
24
+ export async function setCooldown(providerId, until) {
25
+ await mkdir(ROOT, { recursive: true });
26
+ const cd = await getCooldowns();
27
+ cd[providerId] = until.toISOString();
28
+ cd._lastUpdated = new Date().toISOString();
29
+ await writeFile(COOLDOWN_FILE, JSON.stringify(cd, null, 2));
30
+ }
31
+
32
+ export async function clearCooldown(providerId) {
33
+ const cd = await getCooldowns();
34
+ delete cd[providerId];
35
+ await writeFile(COOLDOWN_FILE, JSON.stringify(cd, null, 2));
36
+ }
37
+
38
+ export async function isOnCooldown(providerId) {
39
+ const cd = await getCooldowns();
40
+ if (!cd[providerId]) return false;
41
+ return new Date(cd[providerId]).getTime() > Date.now();
42
+ }
43
+
44
+ /** Compute when to lift cooldown based on the 429 reason. */
45
+ export function cooldownUntil(reason) {
46
+ if (reason === 'per-minute') {
47
+ return new Date(Date.now() + 60_000);
48
+ }
49
+ // per-day or unknown — until 11:00 MSK tomorrow (≈ midnight Pacific reset)
50
+ const next = new Date();
51
+ next.setUTCHours(8, 0, 0, 0); // 11:00 MSK == 08:00 UTC
52
+ if (next.getTime() < Date.now()) next.setUTCDate(next.getUTCDate() + 1);
53
+ return next;
54
+ }
package/src/doctor.js CHANGED
@@ -3,6 +3,8 @@ import { homedir, platform, arch, totalmem } from 'node:os';
3
3
  import { join } from 'node:path';
4
4
  import { access, readFile } from 'node:fs/promises';
5
5
  import net from 'node:net';
6
+ import { configuredProviders, PROVIDERS } from './providers.js';
7
+ import { getCooldowns } from './cooldowns.js';
6
8
 
7
9
  const ROOT = join(homedir(), '.krasavacode');
8
10
 
@@ -63,6 +65,23 @@ export async function runDoctor() {
63
65
  check('3456 (claude-code-router)', await checkPort(3456), 'свободен или используется ccr');
64
66
  check('20128 (omniroute upgrade)', await checkPort(20128));
65
67
 
68
+ console.log('\nПровайдеры:');
69
+ const cfg = await configuredProviders();
70
+ const cd = await getCooldowns();
71
+ if (cfg.length === 0) {
72
+ console.log(' пусто (используется Pollinations gpt-oss-20b — слабая модель)');
73
+ console.log(' → krasavacode setup для подключения Cerebras / Groq / Gemini');
74
+ } else {
75
+ let i = 1;
76
+ for (const id of cfg) {
77
+ const p = PROVIDERS[id];
78
+ const onCD = cd[id] && new Date(cd[id]).getTime() > Date.now();
79
+ const status = onCD ? `⏳ cooldown до ${new Date(cd[id]).toLocaleString('ru')}` : '✓ готов';
80
+ console.log(` ${i++}. ${p.name} — ${status}`);
81
+ }
82
+ console.log(` ${i}. Pollinations (последний резерв)`);
83
+ }
84
+
66
85
  console.log('\nState:');
67
86
  try {
68
87
  const state = JSON.parse(await readFile(join(ROOT, 'state.json'), 'utf8'));
package/src/hub.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import { spawn } from 'node:child_process';
2
2
  import { setTimeout as sleep } from 'node:timers/promises';
3
3
  import { CCR_PORT } from './preset.js';
4
- import { loadGeminiKey } from './setup-gemini.js';
4
+ import { configuredProviders, loadProviderKey, getProviderEnvVarName } from './providers.js';
5
5
  import { startMetricsProxy } from './metrics-proxy.js';
6
6
 
7
7
  const HOST = '127.0.0.1';
@@ -40,11 +40,13 @@ export async function startHub(paths) {
40
40
 
41
41
  process.stdout.write(`🚀 Поднимаю локальный gateway на порту ${PORT}… `);
42
42
 
43
- // Inject GEMINI_API_KEY into ccr env if user has configured Gemini.
44
- // ccr's config.json references it as $GEMINI_API_KEY (env-interpolation).
43
+ // Inject every configured provider's API key as env var so that ccr's
44
+ // config.json can reference them via interpolation ($CEREBRAS_API_KEY etc).
45
45
  const ccrEnv = { ...paths.env };
46
- const geminiKey = await loadGeminiKey();
47
- if (geminiKey) ccrEnv.GEMINI_API_KEY = geminiKey;
46
+ for (const id of await configuredProviders()) {
47
+ const key = await loadProviderKey(id);
48
+ if (key) ccrEnv[getProviderEnvVarName(id)] = key;
49
+ }
48
50
 
49
51
  const child = spawn(paths.ccrBin, ['start'], {
50
52
  stdio: process.env.KRASAVACODE_DEBUG ? 'inherit' : 'pipe',
package/src/launch.js CHANGED
@@ -2,14 +2,15 @@ import { spawn } from 'node:child_process';
2
2
  import { mkdir } from 'node:fs/promises';
3
3
  import { homedir } from 'node:os';
4
4
  import { join } from 'node:path';
5
- import { isGeminiConfigured } from './setup-gemini.js';
6
- import { getQuotaInfo } from './metrics-proxy.js';
5
+ import { configuredProviders, PROVIDERS } from './providers.js';
6
+ import { getCooldowns } from './cooldowns.js';
7
7
 
8
8
  const PLACEHOLDER_TOKEN = 'sk-krasavacode-local';
9
9
  const CLAUDE_CONFIG_DIR = join(homedir(), '.krasavacode', 'claude-config');
10
10
 
11
11
  export async function launchClaude(paths, hub /*, detection */) {
12
- const geminiOn = await isGeminiConfigured();
12
+ const configured = await configuredProviders();
13
+ const cooldowns = await getCooldowns();
13
14
  // Isolate Claude Code's config/credentials from any real Anthropic login
14
15
  // the student may have on this machine (~/.claude/). This is the *only*
15
16
  // way to suppress the "Welcome back, NAME · publerplatforma@gmail.com's
@@ -47,7 +48,7 @@ export async function launchClaude(paths, hub /*, detection */) {
47
48
  // Set KRASAVACODE_BARE=0 to disable for debugging.
48
49
  const useBare = process.env.KRASAVACODE_BARE !== '0';
49
50
  const passthroughArgs = process.argv.slice(2)
50
- .filter(a => !['doctor', 'upgrade', 'setup-gemini', 'gemini'].includes(a));
51
+ .filter(a => !['doctor', 'upgrade', 'setup', 'setup-gemini', 'gemini'].includes(a));
51
52
  if (useBare && !passthroughArgs.includes('--bare')) passthroughArgs.unshift('--bare');
52
53
 
53
54
  const W = 64;
@@ -55,30 +56,26 @@ export async function launchClaude(paths, hub /*, detection */) {
55
56
  const pad = Math.max(0, W - 2 - [...txt].length);
56
57
  return '┃ ' + txt + ' '.repeat(pad) + '┃';
57
58
  };
58
- const quota = await getQuotaInfo();
59
59
  console.log('');
60
60
  console.log('┏' + '━'.repeat(W - 1) + '┓');
61
61
  console.log(line(' K R A S A V A C O D E'));
62
62
  console.log(line(' Бесплатный вайбкодинг через локальный hub'));
63
63
  console.log('┣' + '━'.repeat(W - 1) + '┫');
64
- if (geminiOn) {
65
- console.log(line(' Модель: Google Gemini 2.5 Flash'));
66
- const left = quota.perDay - quota.used;
67
- const warn = Math.floor(quota.perDay / 5); // 20%
68
- if (left > warn) {
69
- console.log(line(` Сегодня осталось: ${left} из ${quota.perDay} запросов`));
70
- } else if (left > 0) {
71
- console.log(line(` ⚠️ Осталось ${left} из ${quota.perDay} — обнулится в ~11:00 МСК`));
72
- } else {
73
- console.log(line(` ❌ Лимит на сегодня исчерпан (${quota.used} из ${quota.perDay})`));
74
- console.log(line(` Обнулится в ~11:00 МСК. krasavacode setup-gemini`));
75
- }
76
- console.log(line(' 1 твой вопрос ≈ 3–10 запросов (Claude использует tools)'));
64
+ if (configured.length === 0) {
65
+ console.log(line(' · Pollinations (gpt-oss-20b, без квот, слабая модель)'));
66
+ console.log(line(' 💡 Лучше модель бесплатно: krasavacode setup'));
77
67
  } else {
78
- console.log(line(' · Модель: gpt-oss-20b через Pollinations'));
79
- console.log(line(' (бесплатно, без логина)'));
80
- console.log(line(` Сегодня сделал ${quota.used} запросов`));
81
- console.log(line(' 💡 Лучше модель: krasavacode setup-gemini'));
68
+ console.log(line(' Активная цепочка фолбэков:'));
69
+ let i = 1;
70
+ for (const id of configured) {
71
+ const p = PROVIDERS[id];
72
+ const cd = cooldowns[id];
73
+ const onCooldown = cd && new Date(cd).getTime() > Date.now();
74
+ const tag = onCooldown ? '⏳ на cooldown' : '✓ готов';
75
+ console.log(line(` ${i++}. ${p.name} — ${tag}`));
76
+ }
77
+ console.log(line(` ${i}. Pollinations (последний резерв)`));
78
+ console.log(line(' При 429 — автоматически прыгает на следующий'));
82
79
  }
83
80
  console.log('┗' + '━'.repeat(W - 1) + '┛');
84
81
  console.log('');
@@ -1,40 +1,33 @@
1
1
  import http from 'node:http';
2
2
  import net from 'node:net';
3
- import { readFile, writeFile, mkdir, access } from 'node:fs/promises';
3
+ import { readFile, writeFile, mkdir } from 'node:fs/promises';
4
4
  import { join } from 'node:path';
5
5
  import { homedir } from 'node:os';
6
- import { isGeminiConfigured } from './setup-gemini.js';
6
+ import { configuredProviders, PROVIDERS, PROVIDER_PRIORITY } from './providers.js';
7
+ import { setCooldown, getCooldowns, cooldownUntil } from './cooldowns.js';
7
8
 
8
9
  const ROOT = join(homedir(), '.krasavacode');
9
10
  const USAGE_FILE = join(ROOT, 'usage.json');
10
11
 
11
- // Google free tier (2026): https://ai.google.dev/gemini-api/docs/rate-limits
12
- // Gemini 2.5 Flash free: 10 RPM, 250k TPM, 250 RPD (request-per-day).
13
- const FREE_QUOTA = {
14
- gemini: { perDay: 250, rpm: 10, label: 'Google Gemini 2.5 Flash (free tier)' },
15
- pollinations: { perDay: null, label: 'Pollinations (free)' },
16
- };
17
-
18
- function todayKey() {
19
- return new Date().toISOString().slice(0, 10);
20
- }
12
+ function todayKey() { return new Date().toISOString().slice(0, 10); }
21
13
 
22
14
  async function readUsage() {
23
15
  try { return JSON.parse(await readFile(USAGE_FILE, 'utf8')); }
24
16
  catch { return {}; }
25
17
  }
26
-
27
18
  async function writeUsage(u) {
28
19
  await mkdir(ROOT, { recursive: true });
29
20
  await writeFile(USAGE_FILE, JSON.stringify(u, null, 2));
30
21
  }
31
22
 
32
- async function bump() {
23
+ async function bump(providerId) {
33
24
  const u = await readUsage();
34
25
  const day = todayKey();
35
- u[day] = (u[day] || 0) + 1;
26
+ if (!u[day]) u[day] = {};
27
+ if (typeof u[day] === 'number') u[day] = { _total: u[day] };
28
+ u[day][providerId || '_unknown'] = (u[day][providerId || '_unknown'] || 0) + 1;
29
+ u[day]._total = (u[day]._total || 0) + 1;
36
30
  u.lastRequestAt = new Date().toISOString();
37
- // keep only last 30 days
38
31
  for (const k of Object.keys(u)) {
39
32
  if (/^\d{4}-\d{2}-\d{2}$/.test(k)) {
40
33
  const age = (Date.now() - new Date(k).getTime()) / 86400000;
@@ -46,14 +39,10 @@ async function bump() {
46
39
 
47
40
  export async function getTodayUsage() {
48
41
  const u = await readUsage();
49
- return u[todayKey()] || 0;
50
- }
51
-
52
- export async function getQuotaInfo() {
53
- const provider = (await isGeminiConfigured()) ? 'gemini' : 'pollinations';
54
- const used = await getTodayUsage();
55
- const { perDay, label } = FREE_QUOTA[provider];
56
- return { provider, used, perDay, label, remaining: perDay ? Math.max(0, perDay - used) : null };
42
+ const today = u[todayKey()];
43
+ if (!today) return 0;
44
+ if (typeof today === 'number') return today;
45
+ return today._total || 0;
57
46
  }
58
47
 
59
48
  function getFreePort() {
@@ -68,125 +57,178 @@ function getFreePort() {
68
57
  });
69
58
  }
70
59
 
71
- function formatGeminiQuotaReason(upstreamBody) {
72
- // Google's 429 body looks like:
73
- // { "error": { "code": 429, "message": "...",
74
- // "details": [{"@type": ".../QuotaFailure",
75
- // "violations": [{"quotaMetric":"...generate_content_free_tier_requests",
76
- // "quotaId":"...PerDay..."}]}] }}
60
+ /** Pick the first available provider not on cooldown, in priority order. */
61
+ async function chooseProvider() {
62
+ const cd = await getCooldowns();
63
+ const configured = await configuredProviders();
64
+ const now = Date.now();
65
+ const onCooldown = (id) => cd[id] && new Date(cd[id]).getTime() > now;
66
+
67
+ for (const id of configured) {
68
+ if (!onCooldown(id)) return { id, model: PROVIDERS[id].defaultModel };
69
+ }
70
+ // All custom providers exhausted — fall back to Pollinations
71
+ if (!onCooldown('pollinations')) return { id: 'pollinations', model: 'openai' };
72
+ return null;
73
+ }
74
+
75
+ function parseQuotaReason(upstreamBody) {
77
76
  try {
78
77
  const parsed = JSON.parse(upstreamBody);
79
- const violations = parsed.error?.details?.find(d => d['@type']?.includes('QuotaFailure'))?.violations || [];
80
- if (violations.length === 0) return null;
81
-
82
- const v = violations[0];
83
- const id = v.quotaId || v.quotaMetric || '';
84
- const isPerMinute = /PerMinute/i.test(id);
85
- const isPerDay = /PerDay/i.test(id);
86
- const isTokens = /Token|input_token|output_token/i.test(id);
87
-
88
- if (isPerMinute) return 'Слишком много запросов в минуту (лимит — 10 запросов/мин). Подожди 30–60 секунд и продолжай.';
89
- if (isPerDay && isTokens) return 'Закончился дневной лимит входных токенов Gemini (≈250k/день).';
90
- if (isPerDay) return 'Закончилась дневная квота запросов к Gemini (≈250 запросов/день для 2.5-flash).';
91
- return `Google Gemini ограничил запрос: ${id}`;
92
- } catch { return null; }
78
+ const violations = parsed.error?.details?.find(d => d['@type']?.includes('QuotaFailure'))?.violations;
79
+ if (violations?.length) {
80
+ const id = violations[0].quotaId || violations[0].quotaMetric || '';
81
+ if (/PerMinute/i.test(id)) return 'per-minute';
82
+ return 'per-day';
83
+ }
84
+ const msg = String(parsed.error?.message || '').toLowerCase();
85
+ if (msg.includes('per minute') || msg.includes('per-minute') || msg.includes('rpm')) return 'per-minute';
86
+ if (msg.includes('per day') || msg.includes('per-day') || msg.includes('rpd') || msg.includes('quota')) return 'per-day';
87
+ } catch {}
88
+ return null;
93
89
  }
94
90
 
95
- const FRIENDLY_429 = (provider, used, upstreamBody) => {
96
- if (provider === 'gemini') {
97
- const reason = formatGeminiQuotaReason(upstreamBody) ||
98
- `Google ограничил запрос (использовано ${used} запросов сегодня через нас).`;
99
- return {
100
- type: 'error',
101
- error: {
102
- type: 'rate_limit_error',
103
- message:
104
- `${reason}\n\n` +
105
- `Квоты обнуляются в полночь по тихоокеанскому времени (≈11:00 МСК).\n` +
106
- `На один твой вопрос Claude Code делает 3–10 запросов (читает файлы, использует инструменты),\n` +
107
- `поэтому реальный счёт у Google быстрее, чем в нашем счётчике.\n\n` +
108
- `Что делать:\n` +
109
- ` Подожди минуту (если упёрлись в RPM) или до завтра (если в дневной)\n` +
110
- ` Подключи второй Google-аккаунт: krasavacode setup-gemini\n` +
111
- ` Временно вернись на Pollinations (без квот): удали ~/.krasavacode/gemini.env`,
112
- },
113
- };
91
+ const FRIENDLY_429 = () => ({
92
+ type: 'error',
93
+ error: {
94
+ type: 'rate_limit_error',
95
+ message:
96
+ `Все настроенные AI-провайдеры исчерпаны или временно перегружены.\n\n` +
97
+ `Что делать:\n` +
98
+ ` • Подожди 1–2 минуты (если упёрлись в RPM) и попробуй опять\n` +
99
+ ` • Подключи ещё провайдер: krasavacode setup\n` +
100
+ ` Дневные лимиты обновляются в ~11:00 МСК`,
101
+ },
102
+ });
103
+
104
+ function rewriteBodyWithProvider(originalBody, providerId, modelName) {
105
+ // claude-code-router treats body.model in form "provider,modelName" as a
106
+ // direct route, bypassing Router config. We use that to fully control
107
+ // provider selection from the proxy layer.
108
+ try {
109
+ const parsed = JSON.parse(originalBody);
110
+ parsed.model = `${providerId},${modelName}`;
111
+ return Buffer.from(JSON.stringify(parsed));
112
+ } catch {
113
+ return originalBody;
114
114
  }
115
- return {
116
- type: 'error',
117
- error: {
118
- type: 'rate_limit_error',
119
- message:
120
- `Pollinations на минуту перегружен. Подожди ~30 секунд и попробуй ещё раз.\n` +
121
- `Или подключи Gemini: krasavacode setup-gemini`,
122
- },
123
- };
124
- };
125
-
126
- /**
127
- * Proxy: Claude Code metrics-proxy (this) → ccr → upstream.
128
- *
129
- * - Counts every successful POST /v1/messages as one request, written to ~/.krasavacode/usage.json
130
- * - Replaces 429 responses with a friendly Russian message
131
- * - Streams everything else through unmodified (so SSE works)
132
- */
115
+ }
116
+
117
+ function forward(upstream, method, path, headers, bodyBuffer) {
118
+ return new Promise((resolve, reject) => {
119
+ const req = http.request({
120
+ hostname: upstream.hostname,
121
+ port: upstream.port,
122
+ path,
123
+ method,
124
+ headers: {
125
+ ...headers,
126
+ host: `${upstream.hostname}:${upstream.port}`,
127
+ 'content-length': bodyBuffer ? bodyBuffer.length : 0,
128
+ },
129
+ }, (res) => resolve(res));
130
+ req.on('error', reject);
131
+ if (bodyBuffer && bodyBuffer.length) req.write(bodyBuffer);
132
+ req.end();
133
+ });
134
+ }
135
+
133
136
  export async function startMetricsProxy(upstreamBaseUrl) {
134
137
  const upstream = new URL(upstreamBaseUrl);
135
138
  const port = await getFreePort();
136
-
137
139
  const debug = process.env.KRASAVACODE_DEBUG === '1';
138
- const server = http.createServer((req, res) => {
140
+
141
+ const server = http.createServer(async (req, res) => {
139
142
  const path = (req.url || '').split('?')[0];
140
143
  const isMessages = req.method === 'POST' && path === '/v1/messages';
141
144
  if (debug) console.error(`[metrics] ${req.method} ${req.url}`);
142
145
 
143
- const proxyReq = http.request({
144
- hostname: upstream.hostname,
145
- port: upstream.port,
146
- path: req.url,
147
- method: req.method,
148
- headers: req.headers,
149
- }, async (upRes) => {
150
- if (debug) console.error(`[metrics] ← ${upRes.statusCode} ${req.url}`);
151
- // Treat any 2xx on /v1/messages as one billable request — count immediately.
152
- if (isMessages && upRes.statusCode >= 200 && upRes.statusCode < 300) {
153
- bump().catch(e => debug && console.error('[metrics] bump fail', e));
154
- }
155
-
156
- // 429: replace body with a friendly Russian message that includes
157
- // a parsed reason from Google's QuotaFailure details.
158
- if (upRes.statusCode === 429 && !/text\/event-stream/.test(upRes.headers['content-type'] || '')) {
159
- const used = await getTodayUsage();
160
- const provider = (await isGeminiConfigured()) ? 'gemini' : 'pollinations';
161
- const chunks = [];
162
- upRes.on('data', d => chunks.push(d));
163
- upRes.on('end', () => {
164
- const upstreamBody = Buffer.concat(chunks).toString('utf8');
165
- if (debug) console.error('[metrics] 429 upstream body:', upstreamBody.slice(0, 500));
166
- const friendly = JSON.stringify(FRIENDLY_429(provider, used, upstreamBody));
167
- const headers = { ...upRes.headers, 'content-type': 'application/json' };
168
- delete headers['content-length'];
169
- delete headers['content-encoding'];
170
- res.writeHead(429, headers);
171
- res.end(friendly);
172
- });
146
+ const chunks = [];
147
+ req.on('data', d => chunks.push(d));
148
+ req.on('end', async () => {
149
+ const originalBody = Buffer.concat(chunks);
150
+
151
+ if (!isMessages) {
152
+ try {
153
+ const upRes = await forward(upstream, req.method, req.url, req.headers, originalBody);
154
+ res.writeHead(upRes.statusCode, upRes.headers);
155
+ upRes.pipe(res);
156
+ } catch (e) {
157
+ res.writeHead(502, { 'Content-Type': 'application/json' });
158
+ res.end(JSON.stringify({ error: { type: 'upstream_error', message: e.message } }));
159
+ }
173
160
  return;
174
161
  }
175
162
 
176
- res.writeHead(upRes.statusCode, upRes.headers);
177
- upRes.pipe(res);
178
- });
163
+ // /v1/messages: provider selection with retry-on-429
164
+ for (let attempt = 1; attempt <= 4; attempt++) {
165
+ const choice = await chooseProvider();
166
+ if (!choice) {
167
+ if (debug) console.error('[metrics] all providers on cooldown');
168
+ res.writeHead(429, { 'Content-Type': 'application/json' });
169
+ res.end(JSON.stringify(FRIENDLY_429()));
170
+ return;
171
+ }
172
+
173
+ const rewrittenBody = rewriteBodyWithProvider(originalBody, choice.id, choice.model);
174
+ if (debug) console.error(`[metrics] attempt ${attempt}: routing to ${choice.id},${choice.model}`);
175
+
176
+ let upRes;
177
+ try {
178
+ upRes = await forward(upstream, req.method, req.url, req.headers, rewrittenBody);
179
+ } catch (e) {
180
+ res.writeHead(502, { 'Content-Type': 'application/json' });
181
+ res.end(JSON.stringify({ error: { type: 'upstream_error', message: e.message } }));
182
+ return;
183
+ }
184
+
185
+ if (debug) console.error(`[metrics] attempt ${attempt} → ${upRes.statusCode}`);
186
+
187
+ if (upRes.statusCode !== 429) {
188
+ if (upRes.statusCode >= 200 && upRes.statusCode < 300) {
189
+ bump(choice.id).catch(() => {});
190
+ }
191
+ res.writeHead(upRes.statusCode, upRes.headers);
192
+ upRes.pipe(res);
193
+ return;
194
+ }
195
+
196
+ // 429 — buffer body, set cooldown for THIS provider, retry with next
197
+ const errChunks = [];
198
+ upRes.on('data', d => errChunks.push(d));
199
+ await new Promise(r => upRes.on('end', r));
200
+ const upBody = Buffer.concat(errChunks).toString('utf8');
201
+ if (debug) console.error(`[metrics] 429 from ${choice.id}: ${upBody.slice(0, 200)}`);
202
+
203
+ const reason = parseQuotaReason(upBody);
204
+ // Pollinations has no daily quota — only short burst-throttling.
205
+ // Treat its 429 as a 60s cooldown so we don't block it until tomorrow.
206
+ const effectiveReason = choice.id === 'pollinations' ? 'per-minute' : reason;
207
+ await setCooldown(choice.id, cooldownUntil(effectiveReason));
208
+ // loop continues — next iteration picks a different provider
209
+ }
179
210
 
180
- proxyReq.on('error', (err) => {
181
- res.writeHead(502, { 'Content-Type': 'application/json' });
182
- res.end(JSON.stringify({ error: { type: 'upstream_error', message: err.message } }));
211
+ // Exhausted attempts
212
+ res.writeHead(429, { 'Content-Type': 'application/json' });
213
+ res.end(JSON.stringify(FRIENDLY_429()));
183
214
  });
184
215
 
185
- req.pipe(proxyReq);
216
+ req.on('error', () => {});
186
217
  });
187
218
 
188
219
  await new Promise(r => server.listen(port, '127.0.0.1', r));
220
+ return {
221
+ server,
222
+ port,
223
+ baseUrl: `http://127.0.0.1:${port}`,
224
+ stop: () => new Promise(r => server.close(r)),
225
+ };
226
+ }
189
227
 
190
- const baseUrl = `http://127.0.0.1:${port}`;
191
- return { server, port, baseUrl, stop: () => new Promise(r => server.close(r)) };
228
+ export async function getQuotaInfo() {
229
+ return {
230
+ used: await getTodayUsage(),
231
+ configured: await configuredProviders(),
232
+ cooldowns: await getCooldowns(),
233
+ };
192
234
  }