@tryhamster/gerbil 1.0.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/LICENSE +23 -0
  2. package/README.md +253 -0
  3. package/bin/cli.js +2 -0
  4. package/dist/auto-update-BbNHbSU1.mjs +3 -0
  5. package/dist/browser/index.d.mts +262 -0
  6. package/dist/browser/index.d.mts.map +1 -0
  7. package/dist/browser/index.mjs +755 -0
  8. package/dist/browser/index.mjs.map +1 -0
  9. package/dist/chrome-backend-C5Un08O4.mjs +771 -0
  10. package/dist/chrome-backend-C5Un08O4.mjs.map +1 -0
  11. package/dist/chrome-backend-CtwPENIW.mjs +3 -0
  12. package/dist/chunk-Ct1HF2bE.mjs +7 -0
  13. package/dist/cli.d.mts +1 -0
  14. package/dist/cli.mjs +7078 -0
  15. package/dist/cli.mjs.map +1 -0
  16. package/dist/frameworks/express.d.mts +22 -0
  17. package/dist/frameworks/express.d.mts.map +1 -0
  18. package/dist/frameworks/express.mjs +123 -0
  19. package/dist/frameworks/express.mjs.map +1 -0
  20. package/dist/frameworks/fastify.d.mts +11 -0
  21. package/dist/frameworks/fastify.d.mts.map +1 -0
  22. package/dist/frameworks/fastify.mjs +73 -0
  23. package/dist/frameworks/fastify.mjs.map +1 -0
  24. package/dist/frameworks/hono.d.mts +14 -0
  25. package/dist/frameworks/hono.d.mts.map +1 -0
  26. package/dist/frameworks/hono.mjs +82 -0
  27. package/dist/frameworks/hono.mjs.map +1 -0
  28. package/dist/frameworks/next.d.mts +31 -0
  29. package/dist/frameworks/next.d.mts.map +1 -0
  30. package/dist/frameworks/next.mjs +116 -0
  31. package/dist/frameworks/next.mjs.map +1 -0
  32. package/dist/frameworks/react.d.mts +56 -0
  33. package/dist/frameworks/react.d.mts.map +1 -0
  34. package/dist/frameworks/react.mjs +172 -0
  35. package/dist/frameworks/react.mjs.map +1 -0
  36. package/dist/frameworks/trpc.d.mts +12 -0
  37. package/dist/frameworks/trpc.d.mts.map +1 -0
  38. package/dist/frameworks/trpc.mjs +80 -0
  39. package/dist/frameworks/trpc.mjs.map +1 -0
  40. package/dist/gerbil-BfnsFWRE.mjs +644 -0
  41. package/dist/gerbil-BfnsFWRE.mjs.map +1 -0
  42. package/dist/gerbil-BjW-z7Fq.mjs +5 -0
  43. package/dist/gerbil-DZ1k3ChC.d.mts +138 -0
  44. package/dist/gerbil-DZ1k3ChC.d.mts.map +1 -0
  45. package/dist/index.d.mts +223 -0
  46. package/dist/index.d.mts.map +1 -0
  47. package/dist/index.mjs +13 -0
  48. package/dist/index.mjs.map +1 -0
  49. package/dist/integrations/ai-sdk.d.mts +78 -0
  50. package/dist/integrations/ai-sdk.d.mts.map +1 -0
  51. package/dist/integrations/ai-sdk.mjs +199 -0
  52. package/dist/integrations/ai-sdk.mjs.map +1 -0
  53. package/dist/integrations/langchain.d.mts +41 -0
  54. package/dist/integrations/langchain.d.mts.map +1 -0
  55. package/dist/integrations/langchain.mjs +93 -0
  56. package/dist/integrations/langchain.mjs.map +1 -0
  57. package/dist/integrations/llamaindex.d.mts +45 -0
  58. package/dist/integrations/llamaindex.d.mts.map +1 -0
  59. package/dist/integrations/llamaindex.mjs +86 -0
  60. package/dist/integrations/llamaindex.mjs.map +1 -0
  61. package/dist/integrations/mcp-client.d.mts +206 -0
  62. package/dist/integrations/mcp-client.d.mts.map +1 -0
  63. package/dist/integrations/mcp-client.mjs +507 -0
  64. package/dist/integrations/mcp-client.mjs.map +1 -0
  65. package/dist/integrations/mcp.d.mts +177 -0
  66. package/dist/integrations/mcp.d.mts.map +1 -0
  67. package/dist/integrations/mcp.mjs +8 -0
  68. package/dist/mcp-R8kRLIKb.mjs +348 -0
  69. package/dist/mcp-R8kRLIKb.mjs.map +1 -0
  70. package/dist/models-DKULvhOr.mjs +136 -0
  71. package/dist/models-DKULvhOr.mjs.map +1 -0
  72. package/dist/models-De2-_GmQ.d.mts +22 -0
  73. package/dist/models-De2-_GmQ.d.mts.map +1 -0
  74. package/dist/one-liner-BUQR0nqq.mjs +98 -0
  75. package/dist/one-liner-BUQR0nqq.mjs.map +1 -0
  76. package/dist/skills/index.d.mts +390 -0
  77. package/dist/skills/index.d.mts.map +1 -0
  78. package/dist/skills/index.mjs +7 -0
  79. package/dist/skills-D3CEpgDc.mjs +630 -0
  80. package/dist/skills-D3CEpgDc.mjs.map +1 -0
  81. package/dist/tools-BsiEE6f2.mjs +567 -0
  82. package/dist/tools-BsiEE6f2.mjs.map +1 -0
  83. package/dist/types-BS1N92Jt.d.mts +183 -0
  84. package/dist/types-BS1N92Jt.d.mts.map +1 -0
  85. package/dist/utils-7vXqtq2Q.mjs +63 -0
  86. package/dist/utils-7vXqtq2Q.mjs.map +1 -0
  87. package/docs/ai-sdk.md +80 -0
  88. package/docs/architecture/README.md +84 -0
  89. package/docs/architecture/caching.md +227 -0
  90. package/docs/architecture/inference.md +176 -0
  91. package/docs/architecture/overview.md +179 -0
  92. package/docs/architecture/streaming.md +261 -0
  93. package/docs/architecture/webgpu.md +213 -0
  94. package/docs/browser.md +328 -0
  95. package/docs/cli.md +155 -0
  96. package/docs/frameworks.md +90 -0
  97. package/docs/mcp-client.md +224 -0
  98. package/docs/mcp.md +109 -0
  99. package/docs/memory.md +229 -0
  100. package/docs/repl.md +473 -0
  101. package/docs/skills.md +261 -0
  102. package/docs/tools.md +304 -0
  103. package/package.json +207 -0
@@ -0,0 +1,227 @@
1
+ # Model Caching
2
+
3
+ How Gerbil caches models for fast subsequent loads.
4
+
5
+ ## Overview
6
+
7
+ Model files are large (100MB - 500MB). Gerbil caches them locally to avoid re-downloading:
8
+
9
+ | Environment | Cache Location | Mechanism |
10
+ |-------------|----------------|-----------|
11
+ | Browser | IndexedDB | transformers.js built-in |
12
+ | Node.js (CPU) | `~/.cache/huggingface/hub` | transformers.js built-in |
13
+ | Node.js (WebGPU) | Chrome's IndexedDB | Via ChromeGPUBackend |
14
+
15
+ ## Browser Caching
16
+
17
+ ### IndexedDB
18
+
19
+ transformers.js automatically caches model files in IndexedDB:
20
+
21
+ ```
22
+ IndexedDB
23
+ └── transformers-cache
24
+ └── onnx-community/Qwen3-0.6B-ONNX
25
+ ├── tokenizer.json
26
+ ├── config.json
27
+ ├── model_q4f16.onnx
28
+ └── ...
29
+ ```
30
+
31
+ ### Cache Behavior
32
+
33
+ 1. **First load**: Downloads from Hugging Face Hub (~15-30s)
34
+ 2. **Subsequent loads**: Reads from IndexedDB (~1-2s)
35
+
36
+ ### Checking Cache
37
+
38
+ ```typescript
39
+ import { getWebGPUInfo } from "@tryhamster/gerbil/browser";
40
+
41
+ // Models are cached per-origin
42
+ // Same origin = same cache
43
+ ```
44
+
45
+ ### Clearing Cache
46
+
47
+ ```javascript
48
+ // In browser DevTools:
49
+ indexedDB.deleteDatabase("transformers-cache");
50
+ ```
51
+
52
+ ## Node.js CPU Caching
53
+
54
+ ### Hugging Face Hub Cache
55
+
56
+ transformers.js uses the standard HF cache directory:
57
+
58
+ ```
59
+ ~/.cache/huggingface/hub/
60
+ └── models--onnx-community--Qwen3-0.6B-ONNX/
61
+ ├── blobs/
62
+ │ └── [sha256 hashes]
63
+ ├── refs/
64
+ │ └── main
65
+ └── snapshots/
66
+ └── [commit hash]/
67
+ ├── tokenizer.json
68
+ ├── config.json
69
+ └── model_q4.onnx
70
+ ```
71
+
72
+ ### Environment Variables
73
+
74
+ ```bash
75
+ # Custom cache directory
76
+ export HF_HOME=/path/to/cache
77
+ export TRANSFORMERS_CACHE=/path/to/cache
78
+
79
+ # Offline mode (use cache only)
80
+ export TRANSFORMERS_OFFLINE=1
81
+ ```
82
+
83
+ ### CLI Cache Management
84
+
85
+ ```bash
86
+ # View cache
87
+ npx @tryhamster/gerbil cache
88
+
89
+ # Clear cache
90
+ npx @tryhamster/gerbil cache --clean
91
+
92
+ # Clear old models
93
+ npx @tryhamster/gerbil cache --older-than 30
94
+ ```
95
+
96
+ ## Node.js WebGPU Caching
97
+
98
+ ### ChromeGPUBackend Cache
99
+
100
+ When using WebGPU in Node.js, models are cached in Chrome's IndexedDB:
101
+
102
+ ```
103
+ ~/.gerbil/chrome-cache/
104
+ └── Default/
105
+ └── IndexedDB/
106
+ └── http_127.0.0.1_43724.indexeddb.leveldb/
107
+ └── [model cache]
108
+ ```
109
+
110
+ ### Why a Fixed Port?
111
+
112
+ The ChromeGPUBackend uses port 43724 ("GERBI") for a critical reason:
113
+
114
+ IndexedDB caches are **origin-specific**. The origin includes:
115
+ - Protocol: `http://`
116
+ - Host: `127.0.0.1`
117
+ - Port: `43724`
118
+
119
+ A fixed port ensures the same origin every time → same cache.
120
+
121
+ ```typescript
122
+ const GERBIL_LOCAL_PORT = 43724; // "GERBI" on phone keypad
123
+
124
+ // Always same origin:
125
+ // http://127.0.0.1:43724
126
+ ```
127
+
128
+ ### Cache Persistence
129
+
130
+ The Chrome user data directory persists between runs:
131
+
132
+ ```typescript
133
+ this.userDataDir = join(homedir(), ".gerbil", "chrome-cache");
134
+ ```
135
+
136
+ This means:
137
+ - Model downloads are cached
138
+ - Shader compilations are cached
139
+ - ~1.5s startup when cached vs ~20s first run
140
+
141
+ ## Cache Sizes
142
+
143
+ | Model | Download Size | Cache Size |
144
+ |-------|--------------|------------|
145
+ | qwen3-0.6b | ~400MB | ~400MB |
146
+ | smollm2-360m | ~250MB | ~250MB |
147
+ | smollm2-135m | ~100MB | ~100MB |
148
+
149
+ ## Preloading Models
150
+
151
+ ### Browser
152
+
153
+ ```typescript
154
+ // Preload during idle time
155
+ const gerbil = await createGerbilWorker({
156
+ modelId: "qwen3-0.6b",
157
+ onProgress: (p) => {
158
+ if (p.status === "ready") {
159
+ console.log("Model cached and ready");
160
+ }
161
+ },
162
+ });
163
+
164
+ // Model is now in IndexedDB for instant loads
165
+ ```
166
+
167
+ ### Node.js
168
+
169
+ ```typescript
170
+ // Preload in background
171
+ const g = new Gerbil();
172
+ await g.loadModel("qwen3-0.6b");
173
+ // Model is now in HF cache
174
+ ```
175
+
176
+ ### CLI
177
+
178
+ ```bash
179
+ # Download without running
180
+ npx @tryhamster/gerbil info -m qwen3-0.6b
181
+ # Model is now cached
182
+ ```
183
+
184
+ ## Offline Usage
185
+
186
+ Once cached, models work offline:
187
+
188
+ ```typescript
189
+ // Browser: Works if model is in IndexedDB
190
+ const gerbil = await createGerbilWorker({ modelId: "qwen3-0.6b" });
191
+
192
+ // Node.js: Set offline mode
193
+ process.env.TRANSFORMERS_OFFLINE = "1";
194
+ const g = new Gerbil();
195
+ await g.loadModel("qwen3-0.6b"); // Uses cache only
196
+ ```
197
+
198
+ ## Troubleshooting
199
+
200
+ ### "Model not found" after cache clear
201
+
202
+ Re-download by loading the model:
203
+
204
+ ```typescript
205
+ await g.loadModel("qwen3-0.6b"); // Will re-download
206
+ ```
207
+
208
+ ### Cache taking too much space
209
+
210
+ ```bash
211
+ # View cache size
212
+ npx @tryhamster/gerbil cache
213
+
214
+ # Clear old models
215
+ npx @tryhamster/gerbil cache --older-than 7
216
+
217
+ # Clear everything
218
+ npx @tryhamster/gerbil cache --clean
219
+ ```
220
+
221
+ ### Browser cache not persisting
222
+
223
+ Check browser settings:
224
+ - Cookies/site data must be allowed
225
+ - IndexedDB must not be blocked
226
+ - Storage quota must not be exceeded
227
+
@@ -0,0 +1,176 @@
1
+ # Inference Pipeline
2
+
3
+ How Gerbil runs LLM inference using ONNX Runtime and transformers.js.
4
+
5
+ ## The Stack
6
+
7
+ ```
8
+ ┌─────────────────────────────────────┐
9
+ │ transformers.js │ ← Tokenization, model loading, generation
10
+ ├─────────────────────────────────────┤
11
+ │ ONNX Runtime │ ← Neural network execution
12
+ ├─────────────────────────────────────┤
13
+ │ WebGPU │ CPU │ WASM │ ← Execution backends
14
+ └─────────────────────────────────────┘
15
+ ```
16
+
17
+ ## transformers.js
18
+
19
+ [transformers.js](https://huggingface.co/docs/transformers.js) is a JavaScript port of Hugging Face Transformers that runs ONNX models in the browser and Node.js.
20
+
21
+ ### Model Loading
22
+
23
+ ```typescript
24
+ import { AutoModelForCausalLM, AutoTokenizer } from "@huggingface/transformers";
25
+
26
+ const tokenizer = await AutoTokenizer.from_pretrained(modelId);
27
+ const model = await AutoModelForCausalLM.from_pretrained(modelId, {
28
+ dtype: "q4f16", // Quantization
29
+ device: "webgpu", // Backend
30
+ });
31
+ ```
32
+
33
+ ### Generation
34
+
35
+ ```typescript
36
+ const inputs = tokenizer.apply_chat_template(messages, {
37
+ add_generation_prompt: true,
38
+ return_dict: true,
39
+ });
40
+
41
+ const output = await model.generate({
42
+ ...inputs,
43
+ max_new_tokens: 256,
44
+ temperature: 0.7,
45
+ do_sample: true,
46
+ });
47
+
48
+ const text = tokenizer.decode(output[0], { skip_special_tokens: true });
49
+ ```
50
+
51
+ ### Streaming with TextStreamer
52
+
53
+ ```typescript
54
+ const streamer = new TextStreamer(tokenizer, {
55
+ skip_prompt: true,
56
+ skip_special_tokens: true,
57
+ callback_function: (text) => {
58
+ console.log(text); // Called for each token
59
+ },
60
+ });
61
+
62
+ await model.generate({ ...inputs, streamer });
63
+ ```
64
+
65
+ ## ONNX Runtime
66
+
67
+ ONNX Runtime is the execution engine that runs the neural network operations.
68
+
69
+ ### Backends
70
+
71
+ | Backend | Environment | Speed | Notes |
72
+ |---------|-------------|-------|-------|
73
+ | **WebGPU** | Browser, Chrome | ~70-100 tok/s | Fastest, requires GPU |
74
+ | **CPU** | Node.js | ~10-30 tok/s | Uses SIMD, good on Apple Silicon |
75
+ | **WASM** | Browser fallback | ~5-10 tok/s | Works everywhere |
76
+
77
+ ### Execution Providers
78
+
79
+ ONNX Runtime selects execution providers based on availability:
80
+
81
+ ```
82
+ WebGPU EP → WASM EP → CPU EP
83
+ ```
84
+
85
+ Gerbil explicitly requests the desired backend:
86
+
87
+ ```typescript
88
+ // For WebGPU
89
+ await AutoModelForCausalLM.from_pretrained(modelId, { device: "webgpu" });
90
+
91
+ // For CPU
92
+ await pipeline("text-generation", modelId, { device: "cpu" });
93
+ ```
94
+
95
+ ## Quantization
96
+
97
+ Quantization reduces model size and improves inference speed by using lower-precision numbers.
98
+
99
+ ### Quantization Types
100
+
101
+ | Type | Weights | Compute | Size Reduction | Use Case |
102
+ |------|---------|---------|----------------|----------|
103
+ | **fp32** | 32-bit float | 32-bit | 1x (baseline) | Training |
104
+ | **fp16** | 16-bit float | 16-bit | 2x | GPU inference |
105
+ | **q4f16** | 4-bit int | 16-bit | ~4x | WebGPU inference |
106
+ | **q4** | 4-bit int | 32-bit | ~4x | CPU inference |
107
+
108
+ ### Why q4f16 for WebGPU?
109
+
110
+ WebGPU shaders work best with fp16 compute. The `q4f16` format:
111
+ - Stores weights as 4-bit integers (small download)
112
+ - Dequantizes to fp16 during inference (fast on GPU)
113
+ - Maintains good quality for small models
114
+
115
+ ### Model Sizes
116
+
117
+ | Model | Original | q4f16 | Download |
118
+ |-------|----------|-------|----------|
119
+ | Qwen3-0.6B | ~2.4GB | ~400MB | ~400MB |
120
+ | SmolLM2-360M | ~1.4GB | ~250MB | ~250MB |
121
+ | SmolLM2-135M | ~540MB | ~100MB | ~100MB |
122
+
123
+ ## Tokenization
124
+
125
+ ### Chat Templates
126
+
127
+ Gerbil uses model-specific chat templates to format conversations:
128
+
129
+ ```typescript
130
+ const messages = [
131
+ { role: "system", content: "You are helpful." },
132
+ { role: "user", content: "Hello!" },
133
+ ];
134
+
135
+ const inputs = tokenizer.apply_chat_template(messages, {
136
+ add_generation_prompt: true, // Add assistant turn start
137
+ return_dict: true, // Return input_ids + attention_mask
138
+ enable_thinking: true, // Qwen3 thinking mode
139
+ });
140
+ ```
141
+
142
+ ### Thinking Mode (Qwen3)
143
+
144
+ Qwen3 models support a "thinking" mode where the model shows reasoning:
145
+
146
+ ```
147
+ <think>
148
+ Let me work through this step by step...
149
+ 127 × 43 = 127 × 40 + 127 × 3 = 5080 + 381 = 5461
150
+ </think>
151
+ The answer is 5461.
152
+ ```
153
+
154
+ Enabled via `enable_thinking: true` in the chat template.
155
+
156
+ ## KV Cache
157
+
158
+ The Key-Value cache stores intermediate attention states to speed up autoregressive generation:
159
+
160
+ ```typescript
161
+ const { past_key_values, sequences } = await model.generate({
162
+ ...inputs,
163
+ past_key_values: previousCache, // Reuse from last turn
164
+ return_dict_in_generate: true,
165
+ });
166
+
167
+ // Save for next turn
168
+ cache = past_key_values;
169
+ ```
170
+
171
+ Benefits:
172
+ - Faster multi-turn conversations
173
+ - Reduced compute for long contexts
174
+
175
+ Gerbil manages the KV cache automatically for multi-turn chat.
176
+
@@ -0,0 +1,179 @@
1
+ # Architecture Overview
2
+
3
+ Gerbil is a local LLM inference library that runs entirely on-device, with no API calls or cloud dependencies.
4
+
5
+ ## Core Components
6
+
7
+ ### 1. Gerbil Class (`src/core/gerbil.ts`)
8
+
9
+ The main entry point for Node.js applications:
10
+
11
+ ```typescript
12
+ const g = new Gerbil();
13
+ await g.loadModel("qwen3-0.6b");
14
+ const result = await g.generate("Hello");
15
+ ```
16
+
17
+ Responsibilities:
18
+ - Model loading and lifecycle management
19
+ - Device selection (WebGPU vs CPU)
20
+ - Generation orchestration
21
+ - Streaming coordination
22
+ - Session statistics
23
+
24
+ ### 2. Model Registry (`src/core/models.ts`)
25
+
26
+ Maps friendly model IDs to Hugging Face paths:
27
+
28
+ ```typescript
29
+ const BUILTIN_MODELS = {
30
+ "qwen3-0.6b": {
31
+ id: "qwen3-0.6b",
32
+ path: "onnx-community/Qwen3-0.6B-ONNX",
33
+ family: "qwen",
34
+ size: "0.6B",
35
+ contextLength: 32768,
36
+ supportsThinking: true,
37
+ },
38
+ // ...
39
+ };
40
+ ```
41
+
42
+ ### 3. Chrome GPU Backend (`src/core/chrome-backend.ts`)
43
+
44
+ Enables WebGPU in Node.js by using headless Chrome as a GPU accelerator:
45
+
46
+ ```
47
+ ┌─────────────┐ HTTP ┌──────────────────┐
48
+ │ Node.js │◄──────────►│ Headless Chrome │
49
+ │ (Gerbil) │ :43724 │ (WebGPU worker) │
50
+ └─────────────┘ └──────────────────┘
51
+ │ │
52
+ │ CDP (DevTools) │ WebGPU
53
+ └──────────────────────────┘
54
+
55
+ ┌─────▼─────┐
56
+ │ GPU │
57
+ └───────────┘
58
+ ```
59
+
60
+ ### 4. Browser Worker (`src/browser/index.ts`)
61
+
62
+ Provides `createGerbilWorker()` for browser applications:
63
+
64
+ ```typescript
65
+ const gerbil = await createGerbilWorker({
66
+ modelId: "qwen3-0.6b",
67
+ onToken: (token) => console.log(token.text),
68
+ });
69
+ ```
70
+
71
+ Uses an inline Web Worker to:
72
+ - Load models without blocking the UI
73
+ - Stream tokens in real-time
74
+ - Manage GPU memory separately from main thread
75
+
76
+ ## Execution Paths
77
+
78
+ ### Browser Path
79
+
80
+ ```
81
+ User Code
82
+
83
+
84
+ createGerbilWorker()
85
+
86
+
87
+ Web Worker (inline blob)
88
+
89
+
90
+ transformers.js
91
+
92
+
93
+ ONNX Runtime (WebGPU)
94
+
95
+
96
+ GPU
97
+ ```
98
+
99
+ ### Node.js CPU Path
100
+
101
+ ```
102
+ User Code
103
+
104
+
105
+ Gerbil.generate()
106
+
107
+
108
+ transformers.js pipeline
109
+
110
+
111
+ ONNX Runtime (CPU)
112
+
113
+
114
+ CPU (with SIMD)
115
+ ```
116
+
117
+ ### Node.js WebGPU Path
118
+
119
+ ```
120
+ User Code
121
+
122
+
123
+ Gerbil.generate()
124
+
125
+
126
+ ChromeGPUBackend
127
+
128
+
129
+ Headless Chrome (via CDP)
130
+
131
+
132
+ transformers.js (in Chrome)
133
+
134
+
135
+ ONNX Runtime (WebGPU)
136
+
137
+
138
+ GPU
139
+ ```
140
+
141
+ ## Device Selection
142
+
143
+ Gerbil automatically selects the best available backend:
144
+
145
+ ```typescript
146
+ // Explicit selection
147
+ await g.loadModel("qwen3-0.6b", { device: "webgpu" }); // or "cpu"
148
+
149
+ // Check current device
150
+ g.getDeviceMode(); // "webgpu" | "cpu" | "wasm"
151
+ ```
152
+
153
+ Priority:
154
+ 1. **Browser**: WebGPU → WASM fallback
155
+ 2. **Node.js with --gpu**: ChromeGPUBackend (headless Chrome)
156
+ 3. **Node.js default**: CPU via ONNX Runtime
157
+
158
+ ## File Structure
159
+
160
+ ```
161
+ src/
162
+ ├── core/
163
+ │ ├── gerbil.ts # Main Gerbil class
164
+ │ ├── models.ts # Model registry
165
+ │ ├── types.ts # TypeScript types
166
+ │ ├── tools.ts # Tool calling system
167
+ │ └── chrome-backend.ts # Node.js WebGPU via Chrome
168
+ ├── browser/
169
+ │ └── index.ts # createGerbilWorker + utilities
170
+ ├── skills/
171
+ │ └── ... # Built-in skills (commit, summarize, etc.)
172
+ ├── integrations/
173
+ │ └── ... # AI SDK, LangChain, MCP
174
+ ├── frameworks/
175
+ │ └── ... # Next.js, Express, React, etc.
176
+ └── cli/
177
+ └── repl/ # Interactive terminal UI
178
+ ```
179
+