thebird 1.2.5 → 1.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ name: Deploy GitHub Pages
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ paths: [docs/**]
7
+ workflow_dispatch:
8
+
9
+ permissions:
10
+ contents: read
11
+ pages: write
12
+ id-token: write
13
+
14
+ concurrency:
15
+ group: pages
16
+ cancel-in-progress: true
17
+
18
+ jobs:
19
+ deploy:
20
+ runs-on: ubuntu-latest
21
+ environment:
22
+ name: github-pages
23
+ url: ${{ steps.deploy.outputs.page_url }}
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ - uses: actions/configure-pages@v4
27
+ - uses: actions/upload-pages-artifact@v3
28
+ with:
29
+ path: docs
30
+ - id: deploy
31
+ uses: actions/deploy-pages@v4
package/CLAUDE.md CHANGED
@@ -42,24 +42,28 @@ Some providers need field adjustments:
42
42
 
43
43
  Applied automatically during request building.
44
44
 
45
- ## gembird — Image Generation via Browser
45
+ ## gembird — Image Generation via Hybrid Approach
46
46
 
47
- **gembird** generates 4-view product images (front, back, left-side, right-side) using Gemini's web UI.
47
+ **gembird** generates 4-view product images (front, back, left-side, right-side) using Gemini's web UI + HTTP API hybrid approach.
48
48
 
49
- ### Why Browser Automation?
49
+ ### Workflow
50
50
 
51
- Gemini API free tier has 0 quota for image generation. Web UI works without limits. Tradeoff: slower than API, depends on UI stability, but no quota needed.
51
+ 1. **Auth via Browser**: Playwright CDP connects to Chrome on `localhost:9222` and navigates to gemini.google.com. User is already logged in.
52
+ 2. **Session Capture**: Intercept network requests to extract session: `{ cookies, xsrf, fsid, template }`. One-time capture, cached in `.gemini-session.json`.
53
+ 3. **HTTP Generation**: For each view, POST prompt to Cloud Code Assist API with captured session. Stream response.
54
+ 4. **Parse Response**: Extract image URLs from streaming response via regex.
55
+ 5. **Download**: Download PNG from `lh3.googleusercontent.com` with cookies. Save to disk.
52
56
 
53
- ### Workflow
57
+ ### Why Hybrid?
58
+
59
+ Gemini API free tier has 0 quota for image generation. Browser provides free auth. HTTP API (Cloud Code Assist) provides faster generation than DOM polling + canvas extraction. Hybrid = free auth + fast generation + no quota limits.
60
+
61
+ ### Performance
54
62
 
55
- 1. Playwright CDP connection to Chrome on `localhost:9222`
56
- 2. Navigate to gemini.google.com
57
- 3. For each view:
58
- - Type prompt asking for that view
59
- - Poll for new `<img alt="AI generated">` (120s timeout)
60
- - Extract via canvas: `canvas.drawImage(img) → canvas.toDataURL('image/png')`
61
- - POST base64 to local HTTP save server
62
- 4. Save 4 PNGs to output dir
63
+ - Browser connection: one-time, 30s
64
+ - HTTP generation per image: ~30-60s (Gemini's generation time, not polling overhead)
65
+ - Download per image: ~5s
66
+ - Total: ~4 images in 2-3 minutes (vs ~8 minutes with browser polling + canvas extraction)
63
67
 
64
68
  ### CLI
65
69
 
@@ -69,13 +73,14 @@ node index.js --image ref.png "prompt"
69
73
  node index.js --output ./dir "prompt"
70
74
  ```
71
75
 
72
- Arguments parsed in index.js lines 144-172.
76
+ Arguments parsed in index.js lines 88-115.
73
77
 
74
78
  ### Observability
75
79
 
76
- - Chrome console logs Gemini errors
77
- - 120s timeout is conservative; real generation ~30-60s
78
- - If extraction fails, check `img[alt*="AI generated"]` selector
80
+ - Session cached in `.gemini-session.json` (expires after 1 hour)
81
+ - HTTP response streamed and parsed for image URLs
82
+ - Download errors logged with context
83
+ - Progress logged per view: `[1/4] front view...`
79
84
 
80
85
  ## Development Constraints
81
86
 
package/docs/app.js ADDED
@@ -0,0 +1,127 @@
1
+ import { GoogleGenAI } from 'https://esm.sh/@google/genai@1';
2
+
3
+ function convertMessages(messages) {
4
+ const contents = [];
5
+ for (const m of messages) {
6
+ const role = m.role === 'assistant' ? 'model' : 'user';
7
+ if (typeof m.content === 'string') {
8
+ if (m.content) contents.push({ role, parts: [{ text: m.content }] });
9
+ continue;
10
+ }
11
+ if (Array.isArray(m.content)) {
12
+ const parts = m.content.map(b => {
13
+ if (b.type === 'text' && b.text) return { text: b.text };
14
+ if (b.type === 'tool_use') return { functionCall: { name: b.name, args: b.input || {} } };
15
+ if (b.type === 'tool_result') {
16
+ let resp;
17
+ try { resp = typeof b.content === 'string' ? JSON.parse(b.content) : (b.content || {}); }
18
+ catch { resp = { result: b.content }; }
19
+ return { functionResponse: { name: b.name || 'unknown', response: resp } };
20
+ }
21
+ return null;
22
+ }).filter(Boolean);
23
+ if (parts.length) contents.push({ role, parts });
24
+ }
25
+ }
26
+ return contents;
27
+ }
28
+
29
+ function buildConfig({ system, temperature, maxOutputTokens } = {}) {
30
+ const config = { maxOutputTokens: maxOutputTokens ?? 8192, temperature: temperature ?? 0.7 };
31
+ if (system) config.systemInstruction = system;
32
+ return config;
33
+ }
34
+
35
+ const state = { messages: [], streaming: false, model: 'gemini-2.0-flash' };
36
+
37
+ window.__debug = {
38
+ get state() { return state; },
39
+ get messages() { return state.messages; },
40
+ };
41
+
42
+ const $messages = document.getElementById('messages');
43
+ const $input = document.getElementById('input');
44
+ const $form = document.getElementById('chat-form');
45
+ const $send = document.getElementById('send-btn');
46
+ const $apiKey = document.getElementById('api-key');
47
+ const $model = document.getElementById('model-select');
48
+ const $status = document.getElementById('status');
49
+ const $clear = document.getElementById('clear-btn');
50
+
51
+ const savedKey = localStorage.getItem('gemini_api_key') || '';
52
+ if (savedKey) $apiKey.value = savedKey;
53
+ $apiKey.addEventListener('change', () => localStorage.setItem('gemini_api_key', $apiKey.value.trim()));
54
+ $model.addEventListener('change', () => { state.model = $model.value; });
55
+
56
+ function addMsg(role, text) {
57
+ const el = document.createElement('div');
58
+ el.className = 'msg ' + role;
59
+ el.textContent = text;
60
+ $messages.appendChild(el);
61
+ $messages.scrollTop = $messages.scrollHeight;
62
+ return el;
63
+ }
64
+
65
+ function setStatus(t) { $status.textContent = t; }
66
+
67
+ async function sendMessage(userText) {
68
+ const apiKey = $apiKey.value.trim();
69
+ if (!apiKey) { addMsg('error', 'Enter a Gemini API key above.'); return; }
70
+ state.messages.push({ role: 'user', content: userText });
71
+ addMsg('user', userText);
72
+ state.streaming = true;
73
+ $send.disabled = true;
74
+ setStatus('Streaming…');
75
+ const modelEl = addMsg('model', '');
76
+ let full = '';
77
+ try {
78
+ const ai = new GoogleGenAI({ apiKey });
79
+ const stream = await ai.models.generateContentStream({
80
+ model: state.model,
81
+ contents: convertMessages(state.messages),
82
+ config: buildConfig({ temperature: 0.7 }),
83
+ });
84
+ for await (const chunk of stream) {
85
+ for (const candidate of (chunk.candidates || [])) {
86
+ for (const part of (candidate.content?.parts || [])) {
87
+ if (part.text && !part.thought) { full += part.text; modelEl.textContent = full; $messages.scrollTop = $messages.scrollHeight; }
88
+ }
89
+ }
90
+ }
91
+ if (!full) full = '(empty response)';
92
+ modelEl.textContent = full;
93
+ state.messages.push({ role: 'assistant', content: full });
94
+ setStatus('');
95
+ } catch (err) {
96
+ modelEl.remove();
97
+ addMsg('error', 'Error: ' + (err?.message || String(err)));
98
+ state.messages.pop();
99
+ setStatus('');
100
+ } finally {
101
+ state.streaming = false;
102
+ $send.disabled = false;
103
+ $input.focus();
104
+ }
105
+ }
106
+
107
+ $input.addEventListener('input', () => { $input.style.height = 'auto'; $input.style.height = Math.min($input.scrollHeight, 160) + 'px'; });
108
+
109
+ $form.addEventListener('submit', e => {
110
+ e.preventDefault();
111
+ const text = $input.value.trim();
112
+ if (!text || state.streaming) return;
113
+ $input.value = '';
114
+ $input.style.height = 'auto';
115
+ sendMessage(text);
116
+ });
117
+
118
+ $input.addEventListener('keydown', e => {
119
+ if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); $form.requestSubmit(); }
120
+ });
121
+
122
+ $clear.addEventListener('click', () => {
123
+ state.messages = [];
124
+ $messages.innerHTML = '';
125
+ setStatus('Cleared.');
126
+ setTimeout(() => setStatus(''), 1500);
127
+ });
@@ -0,0 +1,59 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>thebird — Anthropic SDK → Gemini demo</title>
7
+ <style>
8
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
9
+ :root {
10
+ --bg: #0f1117; --surface: #1a1d27; --border: #2a2d3e;
11
+ --text: #e2e8f0; --muted: #64748b; --accent: #6366f1;
12
+ --user: #1e293b; --model: #0f2027;
13
+ }
14
+ body { background: var(--bg); color: var(--text); font-family: system-ui, sans-serif; height: 100dvh; display: flex; flex-direction: column; }
15
+ header { padding: 1rem 1.5rem; border-bottom: 1px solid var(--border); display: flex; align-items: center; gap: 1rem; flex-wrap: wrap; }
16
+ header h1 { font-size: 1.1rem; font-weight: 700; color: var(--accent); }
17
+ header span { font-size: 0.8rem; color: var(--muted); }
18
+ #key-row { display: flex; gap: 0.5rem; flex: 1; min-width: 240px; }
19
+ #api-key { flex: 1; background: var(--surface); border: 1px solid var(--border); color: var(--text); padding: 0.4rem 0.75rem; border-radius: 6px; font-size: 0.85rem; outline: none; }
20
+ #api-key:focus { border-color: var(--accent); }
21
+ #model-select { background: var(--surface); border: 1px solid var(--border); color: var(--text); padding: 0.4rem 0.5rem; border-radius: 6px; font-size: 0.85rem; outline: none; }
22
+ #messages { flex: 1; overflow-y: auto; padding: 1rem 1.5rem; display: flex; flex-direction: column; gap: 0.75rem; }
23
+ .msg { padding: 0.75rem 1rem; border-radius: 8px; font-size: 0.9rem; line-height: 1.6; white-space: pre-wrap; word-break: break-word; max-width: 800px; }
24
+ .msg.user { background: var(--user); align-self: flex-end; border: 1px solid var(--border); }
25
+ .msg.model { background: var(--model); align-self: flex-start; border: 1px solid #1e3a4a; }
26
+ .msg.error { background: #2d1515; border: 1px solid #5a2020; color: #fca5a5; align-self: center; font-size: 0.8rem; }
27
+ form { padding: 1rem 1.5rem; border-top: 1px solid var(--border); display: flex; gap: 0.5rem; }
28
+ textarea { flex: 1; background: var(--surface); border: 1px solid var(--border); color: var(--text); padding: 0.6rem 0.75rem; border-radius: 8px; font-size: 0.9rem; resize: none; outline: none; font-family: inherit; min-height: 42px; max-height: 160px; line-height: 1.5; }
29
+ textarea:focus { border-color: var(--accent); }
30
+ button { background: var(--accent); color: #fff; border: none; padding: 0.6rem 1.25rem; border-radius: 8px; font-size: 0.9rem; cursor: pointer; white-space: nowrap; }
31
+ button:disabled { opacity: 0.4; cursor: not-allowed; }
32
+ button#clear-btn { background: transparent; border: 1px solid var(--border); color: var(--muted); padding: 0.4rem 0.75rem; font-size: 0.8rem; }
33
+ #status { font-size: 0.75rem; color: var(--muted); padding: 0 1.5rem 0.5rem; }
34
+ </style>
35
+ </head>
36
+ <body>
37
+ <header>
38
+ <h1>thebird</h1>
39
+ <span>Anthropic SDK format → Gemini API</span>
40
+ <div id="key-row">
41
+ <input id="api-key" type="password" placeholder="GEMINI_API_KEY" autocomplete="off" />
42
+ <select id="model-select">
43
+ <option value="gemini-2.0-flash">gemini-2.0-flash</option>
44
+ <option value="gemini-2.0-flash-thinking-exp">gemini-2.0-flash-thinking</option>
45
+ <option value="gemini-1.5-pro">gemini-1.5-pro</option>
46
+ <option value="gemini-1.5-flash">gemini-1.5-flash</option>
47
+ </select>
48
+ <button id="clear-btn" type="button">Clear</button>
49
+ </div>
50
+ </header>
51
+ <div id="messages"></div>
52
+ <div id="status"></div>
53
+ <form id="chat-form">
54
+ <textarea id="input" placeholder="Message… (Shift+Enter for newline)" rows="1"></textarea>
55
+ <button type="submit" id="send-btn">Send</button>
56
+ </form>
57
+ <script type="module" src="app.js"></script>
58
+ </body>
59
+ </html>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "thebird",
3
- "version": "1.2.5",
3
+ "version": "1.2.7",
4
4
  "description": "Anthropic SDK to Gemini streaming bridge — drop-in proxy that translates Anthropic message format and tool calls to Google Gemini",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
@@ -0,0 +1,70 @@
1
+ import { GoogleGenAI } from '@google/genai';
2
+
3
+ function cleanSchema(s) {
4
+ if (!s || typeof s !== 'object') return s;
5
+ if (Array.isArray(s)) return s.map(cleanSchema);
6
+ const out = {};
7
+ for (const [k, v] of Object.entries(s)) {
8
+ if (k === 'additionalProperties' || k === '$schema') continue;
9
+ out[k] = cleanSchema(v);
10
+ }
11
+ return out;
12
+ }
13
+
14
+ function partToGemini(b) {
15
+ if (b.kind === 'text' && b.text) return { text: b.text };
16
+ if (b.kind === 'tool_use') return { functionCall: { name: b.toolName || '', args: b.toolInput ? JSON.parse(b.toolInput) : {} } };
17
+ if (b.kind === 'tool_result') {
18
+ let resp;
19
+ try { resp = JSON.parse(b.text || '{}'); } catch { resp = { result: b.text }; }
20
+ return { functionResponse: { name: b.toolName || 'unknown', response: resp } };
21
+ }
22
+ return null;
23
+ }
24
+
25
+ function convertMessages(messages) {
26
+ const contents = [];
27
+ for (const m of messages) {
28
+ const role = m.role === 'assistant' ? 'model' : 'user';
29
+ const parts = m.content.map(partToGemini).filter(Boolean);
30
+ if (parts.length) contents.push({ role, parts });
31
+ }
32
+ return contents;
33
+ }
34
+
35
+ function buildConfig({ system, temperature, maxOutputTokens } = {}) {
36
+ const config = {
37
+ maxOutputTokens: maxOutputTokens ?? 8192,
38
+ temperature: temperature ?? 0.7,
39
+ };
40
+ if (system) config.systemInstruction = system;
41
+ return config;
42
+ }
43
+
44
+ export async function generate(messages, config) {
45
+ const { apiKey, model, system, temperature, maxOutputTokens } = config;
46
+ if (!apiKey) return { text: '', error: 'apiKey required' };
47
+ try {
48
+ const ai = new GoogleGenAI({ apiKey });
49
+ const contents = convertMessages(messages);
50
+ const geminiConfig = buildConfig({ system, temperature, maxOutputTokens });
51
+ const response = await ai.models.generateContent({
52
+ model: model || 'gemini-2.0-flash',
53
+ contents,
54
+ config: geminiConfig,
55
+ });
56
+ const candidate = response.candidates?.[0];
57
+ if (!candidate) return { text: '', error: 'no candidates returned' };
58
+ const text = (candidate.content?.parts || [])
59
+ .filter(p => p.text && !p.thought)
60
+ .map(p => p.text)
61
+ .join('');
62
+ return { text, error: null };
63
+ } catch (err) {
64
+ return { text: '', error: err?.message || String(err) };
65
+ }
66
+ }
67
+
68
+ export function convertMessagesExport(messages) {
69
+ return JSON.stringify(convertMessages(messages));
70
+ }
package/wasi/demo.js ADDED
@@ -0,0 +1,23 @@
1
+ import { generate } from './dist/js/component.js';
2
+
3
+ const apiKey = process.env.GEMINI_API_KEY;
4
+ if (!apiKey) throw new Error('GEMINI_API_KEY env var required');
5
+
6
+ const messages = [
7
+ {
8
+ role: 'user',
9
+ content: [{ kind: 'text', text: 'Say hello in exactly 5 words.' }],
10
+ },
11
+ ];
12
+
13
+ const config = {
14
+ model: 'gemini-2.0-flash',
15
+ apiKey,
16
+ system: null,
17
+ temperature: 0.7,
18
+ maxOutputTokens: 256,
19
+ };
20
+
21
+ const result = await generate(messages, config);
22
+ if (result.error) throw new Error('generate failed: ' + result.error);
23
+ console.log(result.text);
@@ -0,0 +1,18 @@
1
+ {
2
+ "name": "thebird-wasi",
3
+ "version": "0.1.0",
4
+ "description": "WASI component wrapper for thebird — runs thebird in any WASI runtime or Node.js via jco transpile",
5
+ "type": "module",
6
+ "scripts": {
7
+ "build": "jco componentize component.js --wit wit/world.wit --world-name thebird --out dist/component.wasm",
8
+ "transpile": "jco transpile dist/component.wasm --out-dir dist/js",
9
+ "run": "jco run dist/component.wasm",
10
+ "demo": "node demo.js"
11
+ },
12
+ "devDependencies": {
13
+ "@bytecodealliance/jco": "^1.0.0"
14
+ },
15
+ "dependencies": {
16
+ "@google/genai": "^1.0.0"
17
+ }
18
+ }
@@ -0,0 +1,45 @@
1
+ package thebird:core@0.1.0;
2
+
3
+ interface types {
4
+ record content-part {
5
+ kind: string,
6
+ text: option<string>,
7
+ tool-name: option<string>,
8
+ tool-input: option<string>,
9
+ tool-id: option<string>,
10
+ }
11
+
12
+ record message {
13
+ role: string,
14
+ content: list<content-part>,
15
+ }
16
+
17
+ record generate-config {
18
+ model: string,
19
+ api-key: string,
20
+ system: option<string>,
21
+ temperature: option<f32>,
22
+ max-output-tokens: option<u32>,
23
+ }
24
+
25
+ record generate-result {
26
+ text: string,
27
+ error: option<string>,
28
+ }
29
+
30
+ record stream-chunk {
31
+ chunk-type: string,
32
+ text-delta: option<string>,
33
+ tool-call-id: option<string>,
34
+ tool-name: option<string>,
35
+ finish-reason: option<string>,
36
+ error: option<string>,
37
+ }
38
+ }
39
+
40
+ world thebird {
41
+ use types.{message, generate-config, generate-result, stream-chunk};
42
+
43
+ export generate: func(messages: list<message>, config: generate-config) -> generate-result;
44
+ export convert-messages: func(messages: list<message>) -> string;
45
+ }