thebird 1.2.3 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CLAUDE.md ADDED
@@ -0,0 +1,116 @@
1
+ # thebird Development Notes
2
+
3
+ ## Architecture Overview
4
+
5
+ **thebird** is an Anthropic SDK adapter that translates message format and tool calls to multiple LLM providers (Gemini, OpenAI-compatible APIs). It's a drop-in bridge — you write Anthropic-format code, thebird routes to any provider.
6
+
7
+ ### Message Translation
8
+
9
+ Anthropic format:
10
+ ```js
11
+ [{ role: 'user', content: [
12
+ { type: 'text', text: '...' },
13
+ { type: 'image', source: { type: 'base64', media_type: 'image/png', data: '...' } }
14
+ ] }]
15
+ ```
16
+
17
+ Translates to provider-native format:
18
+ - **Gemini**: `parts: [{ text: '...' }, { inlineData: { mimeType: '...', data: '...' } }]`
19
+ - **OpenAI**: `content: [{ type: 'text', text: '...' }, { type: 'image_url', image_url: { url: '...' } }]`
20
+
21
+ ### Tool Calling
22
+
23
+ Anthropic tool schema → provider native → normalized response back to Anthropic format.
24
+
25
+ Streaming events (all events are Anthropic-compatible):
26
+ - `text-delta`, `tool-use-start`, `tool-use-delta`, `message-start`, `message-stop`
27
+
28
+ ### Routing (Multi-Provider)
29
+
30
+ `createRouter()` picks provider+model per request based on:
31
+ 1. `taskType` (e.g., 'think', 'background', 'longContext')
32
+ 2. Token count vs `longContextThreshold`
33
+
34
+ Routes are defined as `provider,model` strings in config.
35
+
36
+ ### Transformers
37
+
38
+ Some providers need field adjustments:
39
+ - `deepseek`: strips `cache_control`, `repetition_penalty`
40
+ - `groq`: removes `top_k`
41
+ - `reasoning`: moves `reasoning_content` to `_reasoning`
42
+
43
+ Applied automatically during request building.
44
+
45
+ ## gembird — Image Generation via Browser
46
+
47
+ **gembird** generates 4-view product images (front, back, left-side, right-side) using Gemini's web UI.
48
+
49
+ ### Why Browser Automation?
50
+
51
+ Gemini API free tier has 0 quota for image generation. Web UI works without limits. Tradeoff: slower than API, depends on UI stability, but no quota needed.
52
+
53
+ ### Workflow
54
+
55
+ 1. Playwright CDP connection to Chrome on `localhost:9222`
56
+ 2. Navigate to gemini.google.com
57
+ 3. For each view:
58
+ - Type prompt asking for that view
59
+ - Poll for new `<img alt="AI generated">` (120s timeout)
60
+ - Extract via canvas: `canvas.drawImage(img) → canvas.toDataURL('image/png')`
61
+ - POST base64 to local HTTP save server
62
+ 4. Save 4 PNGs to output dir
63
+
64
+ ### CLI
65
+
66
+ ```bash
67
+ node index.js "prompt"
68
+ node index.js --image ref.png "prompt"
69
+ node index.js --output ./dir "prompt"
70
+ ```
71
+
72
+ Arguments parsed in index.js lines 144-172.
73
+
74
+ ### Observability
75
+
76
+ - Chrome console logs Gemini errors
77
+ - 120s timeout is conservative; real generation ~30-60s
78
+ - If extraction fails, check `img[alt*="AI generated"]` selector
79
+
80
+ ## Development Constraints
81
+
82
+ - Max 200 lines per file (split before hitting limit)
83
+ - No comments
84
+ - No test files
85
+ - No hardcoded values
86
+ - Errors throw with context (no silent failures)
87
+ - Messages must stay Anthropic-compatible (other code depends on this contract)
88
+ - Tool schemas must translate cleanly to all providers
89
+
90
+ ## Testing
91
+
92
+ No test files. Validation via:
93
+ - `examples/basic-chat.js`: Single-turn Anthropic format → Gemini
94
+ - `examples/streaming.js`: Streaming events
95
+ - `examples/tool-use.js`: Tool calling and tool result handling
96
+ - `examples/vision.js`: Image blocks (base64, URL, inline)
97
+ - `examples/multi-turn.js`: Multi-turn chat with context
98
+
99
+ Run examples against real Gemini API to validate message translation.
100
+
101
+ ## Known Issues & Workarounds
102
+
103
+ - Gemini API doesn't support `tool_choice: 'required'` — treated as `'auto'`
104
+ - Some models have different tool naming conventions — check provider docs
105
+ - Streaming response parsing varies by provider — see lib/providers/ for details
106
+ - OAuth tokens expire — gembird uses browser session instead of capturing tokens
107
+
108
+ ## Files
109
+
110
+ - `lib/convert.js`: Message/tool translation logic
111
+ - `lib/client.js`: Provider client factory
112
+ - `lib/errors.js`: Error handling and retry logic
113
+ - `lib/providers/`: Provider-specific streaming implementations
114
+ - `index.js`: Main entry point, streaming and generation wrappers
115
+ - `index.d.ts`: TypeScript type definitions
116
+ - `examples/`: Working examples using Anthropic SDK format
package/index.js CHANGED
@@ -170,4 +170,7 @@ async function generateRouter(params) {
170
170
  return createRouter(config).generate(params);
171
171
  }
172
172
 
173
- module.exports = { streamGemini, generateGemini, streamRouter, generateRouter, createRouter, convertMessages, convertTools, cleanSchema, GeminiError };
173
+ const { cloudGenerate, streamCloud, cloudStream } = require('./lib/cloud-generate');
174
+ const { ensureAuth, login: oauthLogin } = require('./lib/oauth');
175
+
176
+ module.exports = { streamGemini, generateGemini, streamRouter, generateRouter, createRouter, convertMessages, convertTools, cleanSchema, GeminiError, cloudGenerate, streamCloud, cloudStream, ensureAuth, oauthLogin };
@@ -0,0 +1,119 @@
1
+ const { convertMessages, convertTools, cleanSchema, extractModelId, buildConfig } = require('./convert');
2
+ const { ensureAuth, CODE_ASSIST_BASE, CODE_ASSIST_HEADERS } = require('./oauth');
3
+ const crypto = require('crypto');
4
+
5
+ function buildUserAgent(model) {
6
+ return `gemini-cli/0.30.0 (node; ${process.platform}) model/${model || 'unknown'}`;
7
+ }
8
+
9
+ async function cloudGenerate({ model, system, messages, tools, temperature, maxOutputTokens, topP, topK, safetySettings, responseModalities, authPort }) {
10
+ const tokens = await ensureAuth(authPort);
11
+ const modelId = extractModelId(model);
12
+ const contents = convertMessages(messages);
13
+ const { config } = buildConfig({ system, tools, temperature, maxOutputTokens, topP, topK, safetySettings, responseModalities });
14
+
15
+ const request = { contents };
16
+ if (config.systemInstruction) request.systemInstruction = { parts: [{ text: config.systemInstruction }] };
17
+ if (config.tools) request.tools = config.tools;
18
+ const genConfig = {};
19
+ if (config.maxOutputTokens) genConfig.maxOutputTokens = config.maxOutputTokens;
20
+ if (config.temperature != null) genConfig.temperature = config.temperature;
21
+ if (config.topP != null) genConfig.topP = config.topP;
22
+ if (config.topK != null) genConfig.topK = config.topK;
23
+ if (config.responseModalities) genConfig.responseModalities = config.responseModalities;
24
+ if (Object.keys(genConfig).length) request.generationConfig = genConfig;
25
+
26
+ const envelope = { project: tokens.projectId, model: modelId, user_prompt_id: crypto.randomUUID(), request };
27
+
28
+ const res = await fetch(`${CODE_ASSIST_BASE}:generateContent`, {
29
+ method: 'POST',
30
+ headers: {
31
+ 'Content-Type': 'application/json',
32
+ Authorization: `Bearer ${tokens.accessToken}`,
33
+ 'User-Agent': buildUserAgent(modelId),
34
+ 'x-activity-request-id': crypto.randomUUID(),
35
+ ...CODE_ASSIST_HEADERS
36
+ },
37
+ body: JSON.stringify(envelope)
38
+ });
39
+
40
+ if (!res.ok) throw new Error(`Cloud generate failed (${res.status}): ${await res.text()}`);
41
+ const data = await res.json();
42
+ const inner = data.response || data;
43
+ const candidate = inner.candidates?.[0];
44
+ if (!candidate) throw new Error('No candidates returned');
45
+ const allParts = candidate.content?.parts || [];
46
+ const text = allParts.filter(p => p.text && !p.thought).map(p => p.text).join('');
47
+ return { text, parts: allParts, response: inner };
48
+ }
49
+
50
+ async function* cloudStream({ model, system, messages, tools, onStepFinish, temperature, maxOutputTokens, topP, topK, safetySettings, responseModalities, authPort }) {
51
+ const tokens = await ensureAuth(authPort);
52
+ const modelId = extractModelId(model);
53
+ const contents = convertMessages(messages);
54
+ const { config } = buildConfig({ system, tools, temperature, maxOutputTokens, topP, topK, safetySettings, responseModalities });
55
+
56
+ const request = { contents };
57
+ if (config.systemInstruction) request.systemInstruction = { parts: [{ text: config.systemInstruction }] };
58
+ if (config.tools) request.tools = config.tools;
59
+ const genConfig = {};
60
+ if (config.maxOutputTokens) genConfig.maxOutputTokens = config.maxOutputTokens;
61
+ if (config.temperature != null) genConfig.temperature = config.temperature;
62
+ if (config.topP != null) genConfig.topP = config.topP;
63
+ if (config.topK != null) genConfig.topK = config.topK;
64
+ if (config.responseModalities) genConfig.responseModalities = config.responseModalities;
65
+ if (Object.keys(genConfig).length) request.generationConfig = genConfig;
66
+
67
+ const envelope = { project: tokens.projectId, model: modelId, user_prompt_id: crypto.randomUUID(), request };
68
+
69
+ const res = await fetch(`${CODE_ASSIST_BASE}:streamGenerateContent?alt=sse`, {
70
+ method: 'POST',
71
+ headers: {
72
+ 'Content-Type': 'application/json',
73
+ Authorization: `Bearer ${tokens.accessToken}`,
74
+ 'User-Agent': buildUserAgent(modelId),
75
+ 'x-activity-request-id': crypto.randomUUID(),
76
+ Accept: 'text/event-stream',
77
+ ...CODE_ASSIST_HEADERS
78
+ },
79
+ body: JSON.stringify(envelope)
80
+ });
81
+
82
+ if (!res.ok) throw new Error(`Cloud stream failed (${res.status}): ${await res.text()}`);
83
+
84
+ yield { type: 'start-step' };
85
+ const reader = res.body.getReader();
86
+ const decoder = new TextDecoder();
87
+ let buffer = '';
88
+
89
+ while (true) {
90
+ const { done, value } = await reader.read();
91
+ if (done) break;
92
+ buffer += decoder.decode(value, { stream: true });
93
+ const lines = buffer.split('\n');
94
+ buffer = lines.pop() || '';
95
+ for (const line of lines) {
96
+ const trimmed = line.trim();
97
+ if (!trimmed.startsWith('data:')) continue;
98
+ const json = trimmed.slice(5).trim();
99
+ if (!json || json === '[DONE]') continue;
100
+ try {
101
+ const parsed = JSON.parse(json);
102
+ const inner = parsed.response || parsed;
103
+ const parts = inner.candidates?.[0]?.content?.parts || [];
104
+ for (const part of parts) {
105
+ if (part.text && !part.thought) yield { type: 'text-delta', textDelta: part.text };
106
+ if (part.inlineData) yield { type: 'image-data', inlineData: part.inlineData };
107
+ }
108
+ } catch {}
109
+ }
110
+ }
111
+ yield { type: 'finish-step', finishReason: 'stop' };
112
+ if (onStepFinish) await onStepFinish();
113
+ }
114
+
115
+ function streamCloud(params) {
116
+ return { fullStream: cloudStream(params), warnings: Promise.resolve([]) };
117
+ }
118
+
119
+ module.exports = { cloudGenerate, cloudStream, streamCloud };
package/lib/oauth.js ADDED
@@ -0,0 +1,133 @@
1
+ const http = require('http');
2
+ const crypto = require('crypto');
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+
6
+ const CLIENT_ID = process.env.GOOGLE_OAUTH_CLIENT_ID || '';
7
+ const CLIENT_SECRET = process.env.GOOGLE_OAUTH_CLIENT_SECRET || '';
8
+ const SCOPES = 'https://www.googleapis.com/auth/cloud-platform https://www.googleapis.com/auth/userinfo.email https://www.googleapis.com/auth/userinfo.profile';
9
+ const AUTH_URL = 'https://accounts.google.com/o/oauth2/v2/auth';
10
+ const TOKEN_URL = 'https://oauth2.googleapis.com/token';
11
+ const CODE_ASSIST_BASE = 'https://cloudcode-pa.googleapis.com/v1internal';
12
+ const CODE_ASSIST_HEADERS = { 'X-Goog-Api-Client': 'gl-node/22.17.0', 'Client-Metadata': 'ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI' };
13
+ const TOKEN_PATH = path.join(process.env.HOME || process.env.USERPROFILE || '.', '.thebird', 'oauth-tokens.json');
14
+
15
+ function base64url(buf) {
16
+ return buf.toString('base64').replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
17
+ }
18
+
19
+ function generatePkce() {
20
+ const verifier = base64url(crypto.randomBytes(32));
21
+ const challenge = base64url(crypto.createHash('sha256').update(verifier).digest());
22
+ return { verifier, challenge };
23
+ }
24
+
25
+ function readTokens() {
26
+ try { return JSON.parse(fs.readFileSync(TOKEN_PATH, 'utf8')); } catch { return null; }
27
+ }
28
+
29
+ function writeTokens(tokens) {
30
+ fs.mkdirSync(path.dirname(TOKEN_PATH), { recursive: true });
31
+ fs.writeFileSync(TOKEN_PATH, JSON.stringify(tokens, null, 2));
32
+ }
33
+
34
+ async function refreshAccessToken(refreshToken) {
35
+ const res = await fetch(TOKEN_URL, {
36
+ method: 'POST',
37
+ headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
38
+ body: new URLSearchParams({ grant_type: 'refresh_token', refresh_token: refreshToken, client_id: CLIENT_ID, client_secret: CLIENT_SECRET })
39
+ });
40
+ if (!res.ok) throw new Error('Token refresh failed: ' + await res.text());
41
+ const data = await res.json();
42
+ return { accessToken: data.access_token, refreshToken: data.refresh_token || refreshToken, expiresAt: Date.now() + data.expires_in * 1000 };
43
+ }
44
+
45
+ async function getValidToken() {
46
+ const tokens = readTokens();
47
+ if (!tokens?.refreshToken) return null;
48
+ if (tokens.expiresAt && tokens.expiresAt > Date.now() + 60000) return tokens;
49
+ const refreshed = await refreshAccessToken(tokens.refreshToken);
50
+ const updated = { ...tokens, ...refreshed };
51
+ writeTokens(updated);
52
+ return updated;
53
+ }
54
+
55
+ async function resolveProject(accessToken) {
56
+ const res = await fetch(`${CODE_ASSIST_BASE}:loadCodeAssist`, {
57
+ method: 'POST',
58
+ headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${accessToken}`, ...CODE_ASSIST_HEADERS },
59
+ body: JSON.stringify({ metadata: { ideType: 'IDE_UNSPECIFIED', platform: 'PLATFORM_UNSPECIFIED', pluginType: 'GEMINI' } })
60
+ });
61
+ if (!res.ok) throw new Error('Failed to load Code Assist project');
62
+ const data = await res.json();
63
+ const proj = data.cloudaicompanionProject;
64
+ if (proj) return typeof proj === 'string' ? proj : proj.id;
65
+ const tier = data.allowedTiers?.find(t => t.id === 'free-tier') || data.allowedTiers?.[0];
66
+ if (!tier) throw new Error('No eligible tier: ' + (data.ineligibleTiers?.[0]?.reasonMessage || 'unknown'));
67
+ const obRes = await fetch(`${CODE_ASSIST_BASE}:onboardUser`, {
68
+ method: 'POST',
69
+ headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${accessToken}`, ...CODE_ASSIST_HEADERS },
70
+ body: JSON.stringify({ tierId: tier.id || 'legacy-tier', metadata: { ideType: 'IDE_UNSPECIFIED', platform: 'PLATFORM_UNSPECIFIED', pluginType: 'GEMINI' } })
71
+ });
72
+ if (!obRes.ok) throw new Error('Onboarding failed');
73
+ let op = await obRes.json();
74
+ for (let i = 0; i < 10 && !op.done && op.name; i++) {
75
+ await new Promise(r => setTimeout(r, 5000));
76
+ const pollRes = await fetch(`${CODE_ASSIST_BASE}/${op.name}`, { headers: { Authorization: `Bearer ${accessToken}`, ...CODE_ASSIST_HEADERS } });
77
+ if (pollRes.ok) op = await pollRes.json();
78
+ }
79
+ return op.response?.cloudaicompanionProject?.id;
80
+ }
81
+
82
+ function login(port) {
83
+ return new Promise((resolve, reject) => {
84
+ const { verifier, challenge } = generatePkce();
85
+ const state = crypto.randomBytes(32).toString('hex');
86
+ const callbackUrl = `http://localhost:${port}/callback`;
87
+ const url = new URL(AUTH_URL);
88
+ url.searchParams.set('client_id', CLIENT_ID);
89
+ url.searchParams.set('response_type', 'code');
90
+ url.searchParams.set('redirect_uri', callbackUrl);
91
+ url.searchParams.set('scope', SCOPES);
92
+ url.searchParams.set('code_challenge', challenge);
93
+ url.searchParams.set('code_challenge_method', 'S256');
94
+ url.searchParams.set('state', state);
95
+ url.searchParams.set('access_type', 'offline');
96
+ url.searchParams.set('prompt', 'consent');
97
+
98
+ const server = http.createServer(async (req, res) => {
99
+ const u = new URL(req.url, `http://localhost:${port}`);
100
+ if (!u.pathname.startsWith('/callback')) { res.end('waiting...'); return; }
101
+ if (u.searchParams.get('state') !== state) { res.end('Invalid state'); server.close(); reject(new Error('Invalid state')); return; }
102
+ const code = u.searchParams.get('code');
103
+ try {
104
+ const tokRes = await fetch(TOKEN_URL, {
105
+ method: 'POST',
106
+ headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
107
+ body: new URLSearchParams({ client_id: CLIENT_ID, client_secret: CLIENT_SECRET, code, grant_type: 'authorization_code', redirect_uri: callbackUrl, code_verifier: verifier })
108
+ });
109
+ if (!tokRes.ok) throw new Error('Token exchange failed: ' + await tokRes.text());
110
+ const payload = await tokRes.json();
111
+ if (!payload.refresh_token) throw new Error('No refresh token — ensure prompt=consent');
112
+ const projectId = await resolveProject(payload.access_token);
113
+ const tokens = { accessToken: payload.access_token, refreshToken: payload.refresh_token, expiresAt: Date.now() + payload.expires_in * 1000, projectId };
114
+ writeTokens(tokens);
115
+ res.end('Authenticated! You can close this tab.');
116
+ server.close();
117
+ resolve(tokens);
118
+ } catch (e) { res.end('Error: ' + e.message); server.close(); reject(e); }
119
+ });
120
+ server.listen(port, () => {
121
+ console.log(`Open this URL to authenticate:\n${url.toString()}\n`);
122
+ try { const { exec } = require('child_process'); exec(`start "" "${url.toString()}"`); } catch {}
123
+ });
124
+ });
125
+ }
126
+
127
+ async function ensureAuth(port) {
128
+ const existing = await getValidToken();
129
+ if (existing?.accessToken && existing?.projectId) return existing;
130
+ return login(port || 8585);
131
+ }
132
+
133
+ module.exports = { login, ensureAuth, getValidToken, readTokens, writeTokens, resolveProject, CODE_ASSIST_BASE, CODE_ASSIST_HEADERS };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "thebird",
3
- "version": "1.2.3",
3
+ "version": "1.2.5",
4
4
  "description": "Anthropic SDK to Gemini streaming bridge — drop-in proxy that translates Anthropic message format and tool calls to Google Gemini",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",