@tryhamster/gerbil 1.0.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +23 -0
- package/README.md +253 -0
- package/bin/cli.js +2 -0
- package/dist/auto-update-BbNHbSU1.mjs +3 -0
- package/dist/browser/index.d.mts +262 -0
- package/dist/browser/index.d.mts.map +1 -0
- package/dist/browser/index.mjs +755 -0
- package/dist/browser/index.mjs.map +1 -0
- package/dist/chrome-backend-C5Un08O4.mjs +771 -0
- package/dist/chrome-backend-C5Un08O4.mjs.map +1 -0
- package/dist/chrome-backend-CtwPENIW.mjs +3 -0
- package/dist/chunk-Ct1HF2bE.mjs +7 -0
- package/dist/cli.d.mts +1 -0
- package/dist/cli.mjs +7078 -0
- package/dist/cli.mjs.map +1 -0
- package/dist/frameworks/express.d.mts +22 -0
- package/dist/frameworks/express.d.mts.map +1 -0
- package/dist/frameworks/express.mjs +123 -0
- package/dist/frameworks/express.mjs.map +1 -0
- package/dist/frameworks/fastify.d.mts +11 -0
- package/dist/frameworks/fastify.d.mts.map +1 -0
- package/dist/frameworks/fastify.mjs +73 -0
- package/dist/frameworks/fastify.mjs.map +1 -0
- package/dist/frameworks/hono.d.mts +14 -0
- package/dist/frameworks/hono.d.mts.map +1 -0
- package/dist/frameworks/hono.mjs +82 -0
- package/dist/frameworks/hono.mjs.map +1 -0
- package/dist/frameworks/next.d.mts +31 -0
- package/dist/frameworks/next.d.mts.map +1 -0
- package/dist/frameworks/next.mjs +116 -0
- package/dist/frameworks/next.mjs.map +1 -0
- package/dist/frameworks/react.d.mts +56 -0
- package/dist/frameworks/react.d.mts.map +1 -0
- package/dist/frameworks/react.mjs +172 -0
- package/dist/frameworks/react.mjs.map +1 -0
- package/dist/frameworks/trpc.d.mts +12 -0
- package/dist/frameworks/trpc.d.mts.map +1 -0
- package/dist/frameworks/trpc.mjs +80 -0
- package/dist/frameworks/trpc.mjs.map +1 -0
- package/dist/gerbil-BfnsFWRE.mjs +644 -0
- package/dist/gerbil-BfnsFWRE.mjs.map +1 -0
- package/dist/gerbil-BjW-z7Fq.mjs +5 -0
- package/dist/gerbil-DZ1k3ChC.d.mts +138 -0
- package/dist/gerbil-DZ1k3ChC.d.mts.map +1 -0
- package/dist/index.d.mts +223 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +13 -0
- package/dist/index.mjs.map +1 -0
- package/dist/integrations/ai-sdk.d.mts +78 -0
- package/dist/integrations/ai-sdk.d.mts.map +1 -0
- package/dist/integrations/ai-sdk.mjs +199 -0
- package/dist/integrations/ai-sdk.mjs.map +1 -0
- package/dist/integrations/langchain.d.mts +41 -0
- package/dist/integrations/langchain.d.mts.map +1 -0
- package/dist/integrations/langchain.mjs +93 -0
- package/dist/integrations/langchain.mjs.map +1 -0
- package/dist/integrations/llamaindex.d.mts +45 -0
- package/dist/integrations/llamaindex.d.mts.map +1 -0
- package/dist/integrations/llamaindex.mjs +86 -0
- package/dist/integrations/llamaindex.mjs.map +1 -0
- package/dist/integrations/mcp-client.d.mts +206 -0
- package/dist/integrations/mcp-client.d.mts.map +1 -0
- package/dist/integrations/mcp-client.mjs +507 -0
- package/dist/integrations/mcp-client.mjs.map +1 -0
- package/dist/integrations/mcp.d.mts +177 -0
- package/dist/integrations/mcp.d.mts.map +1 -0
- package/dist/integrations/mcp.mjs +8 -0
- package/dist/mcp-R8kRLIKb.mjs +348 -0
- package/dist/mcp-R8kRLIKb.mjs.map +1 -0
- package/dist/models-DKULvhOr.mjs +136 -0
- package/dist/models-DKULvhOr.mjs.map +1 -0
- package/dist/models-De2-_GmQ.d.mts +22 -0
- package/dist/models-De2-_GmQ.d.mts.map +1 -0
- package/dist/one-liner-BUQR0nqq.mjs +98 -0
- package/dist/one-liner-BUQR0nqq.mjs.map +1 -0
- package/dist/skills/index.d.mts +390 -0
- package/dist/skills/index.d.mts.map +1 -0
- package/dist/skills/index.mjs +7 -0
- package/dist/skills-D3CEpgDc.mjs +630 -0
- package/dist/skills-D3CEpgDc.mjs.map +1 -0
- package/dist/tools-BsiEE6f2.mjs +567 -0
- package/dist/tools-BsiEE6f2.mjs.map +1 -0
- package/dist/types-BS1N92Jt.d.mts +183 -0
- package/dist/types-BS1N92Jt.d.mts.map +1 -0
- package/dist/utils-7vXqtq2Q.mjs +63 -0
- package/dist/utils-7vXqtq2Q.mjs.map +1 -0
- package/docs/ai-sdk.md +80 -0
- package/docs/architecture/README.md +84 -0
- package/docs/architecture/caching.md +227 -0
- package/docs/architecture/inference.md +176 -0
- package/docs/architecture/overview.md +179 -0
- package/docs/architecture/streaming.md +261 -0
- package/docs/architecture/webgpu.md +213 -0
- package/docs/browser.md +328 -0
- package/docs/cli.md +155 -0
- package/docs/frameworks.md +90 -0
- package/docs/mcp-client.md +224 -0
- package/docs/mcp.md +109 -0
- package/docs/memory.md +229 -0
- package/docs/repl.md +473 -0
- package/docs/skills.md +261 -0
- package/docs/tools.md +304 -0
- package/package.json +207 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# MCP Client
|
|
2
|
+
|
|
3
|
+
Connect Gerbil to external MCP (Model Context Protocol) servers and use their tools in chat or skills.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
import { connectMCP, callMCPTool, getMCPHub } from "@tryhamster/gerbil/mcp-client";
|
|
9
|
+
|
|
10
|
+
// Connect to an MCP server
|
|
11
|
+
await connectMCP("filesystem", {
|
|
12
|
+
command: "uvx",
|
|
13
|
+
args: ["mcp-server-filesystem", "/home/user"],
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
// Call a tool
|
|
17
|
+
const files = await callMCPTool("filesystem:list_directory", { path: "/" });
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## MCPClient
|
|
21
|
+
|
|
22
|
+
Direct client for a single MCP server:
|
|
23
|
+
|
|
24
|
+
```typescript
|
|
25
|
+
import { MCPClient } from "@tryhamster/gerbil/mcp-client";
|
|
26
|
+
|
|
27
|
+
const client = new MCPClient("browser", {
|
|
28
|
+
command: "npx",
|
|
29
|
+
args: ["-y", "@anthropic/mcp-server-puppeteer"],
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
// Connect
|
|
33
|
+
await client.connect();
|
|
34
|
+
|
|
35
|
+
// List available tools
|
|
36
|
+
const tools = client.getTools();
|
|
37
|
+
console.log(tools);
|
|
38
|
+
// [{ name: "navigate", description: "Navigate to URL", parameters: {...} }, ...]
|
|
39
|
+
|
|
40
|
+
// Call a tool
|
|
41
|
+
const result = await client.callTool("navigate", { url: "https://example.com" });
|
|
42
|
+
|
|
43
|
+
// Check connection status
|
|
44
|
+
console.log(client.isConnected()); // true
|
|
45
|
+
|
|
46
|
+
// Disconnect when done
|
|
47
|
+
await client.disconnect();
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## MCPHub
|
|
51
|
+
|
|
52
|
+
Manage multiple MCP servers:
|
|
53
|
+
|
|
54
|
+
```typescript
|
|
55
|
+
import { MCPHub } from "@tryhamster/gerbil/mcp-client";
|
|
56
|
+
|
|
57
|
+
const hub = new MCPHub();
|
|
58
|
+
|
|
59
|
+
// Add multiple servers
|
|
60
|
+
await hub.addServer("filesystem", {
|
|
61
|
+
command: "uvx",
|
|
62
|
+
args: ["mcp-server-filesystem", "/tmp"],
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
await hub.addServer("browser", {
|
|
66
|
+
command: "npx",
|
|
67
|
+
args: ["-y", "@anthropic/mcp-server-puppeteer"],
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
// List all connected servers
|
|
71
|
+
console.log(hub.listServers()); // ["filesystem", "browser"]
|
|
72
|
+
|
|
73
|
+
// Get all tools from all servers
|
|
74
|
+
const allTools = hub.getAllTools();
|
|
75
|
+
// Tools are prefixed: "filesystem:list_directory", "browser:navigate"
|
|
76
|
+
|
|
77
|
+
// Call tool using server:tool format
|
|
78
|
+
await hub.callTool("filesystem:list_directory", { path: "/" });
|
|
79
|
+
|
|
80
|
+
// Remove a server
|
|
81
|
+
await hub.removeServer("browser");
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Singleton Hub
|
|
85
|
+
|
|
86
|
+
Use the global hub for app-wide MCP connections:
|
|
87
|
+
|
|
88
|
+
```typescript
|
|
89
|
+
import { getMCPHub, connectMCP, disconnectMCP, callMCPTool } from "@tryhamster/gerbil/mcp-client";
|
|
90
|
+
|
|
91
|
+
// These all use the same global hub
|
|
92
|
+
await connectMCP("myserver", { command: "...", args: [...] });
|
|
93
|
+
const result = await callMCPTool("myserver:sometool", { arg: "value" });
|
|
94
|
+
await disconnectMCP("myserver");
|
|
95
|
+
|
|
96
|
+
// Or access the hub directly
|
|
97
|
+
const hub = getMCPHub();
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Using MCP Tools in Chat
|
|
101
|
+
|
|
102
|
+
When connected, MCP tools are automatically registered in Gerbil's tool registry. In **Agent mode**, the AI can use them:
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
User: Read the file /tmp/notes.txt
|
|
106
|
+
|
|
107
|
+
Gerbil: {"tool": "mcp_call", "params": {"tool_name": "filesystem:read_file", "params": {"path": "/tmp/notes.txt"}}}
|
|
108
|
+
|
|
109
|
+
[Tool Result: Contents of notes.txt...]
|
|
110
|
+
|
|
111
|
+
Gerbil: The file contains your meeting notes from yesterday...
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Built-in MCP Tools
|
|
115
|
+
|
|
116
|
+
When MCP servers are connected, these tools become available to the AI:
|
|
117
|
+
|
|
118
|
+
| Tool | Description |
|
|
119
|
+
|------|-------------|
|
|
120
|
+
| `mcp_call` | Call any tool from a connected MCP server |
|
|
121
|
+
| `mcp_list` | List all available tools from connected servers |
|
|
122
|
+
|
|
123
|
+
## Using MCP Tools in Skills
|
|
124
|
+
|
|
125
|
+
Create skills that leverage MCP tools:
|
|
126
|
+
|
|
127
|
+
```typescript
|
|
128
|
+
import { defineSkill } from "@tryhamster/gerbil/skills";
|
|
129
|
+
import { getMCPHub } from "@tryhamster/gerbil/mcp-client";
|
|
130
|
+
import { z } from "zod";
|
|
131
|
+
|
|
132
|
+
export const webScraper = defineSkill({
|
|
133
|
+
name: "web-scraper",
|
|
134
|
+
description: "Scrape and summarize a webpage",
|
|
135
|
+
input: z.object({
|
|
136
|
+
url: z.string().url(),
|
|
137
|
+
}),
|
|
138
|
+
async run(input, gerbil) {
|
|
139
|
+
const hub = getMCPHub();
|
|
140
|
+
|
|
141
|
+
// Navigate to page
|
|
142
|
+
await hub.callTool("browser:navigate", { url: input.url });
|
|
143
|
+
|
|
144
|
+
// Get page content
|
|
145
|
+
const content = await hub.callTool("browser:get_content", {});
|
|
146
|
+
|
|
147
|
+
// Summarize with Gerbil
|
|
148
|
+
const summary = await gerbil.generate(
|
|
149
|
+
`Summarize this webpage:\n\n${content}`,
|
|
150
|
+
{ maxTokens: 300 }
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
return summary.text;
|
|
154
|
+
},
|
|
155
|
+
});
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Popular MCP Servers
|
|
159
|
+
|
|
160
|
+
| Server | Install | Description |
|
|
161
|
+
|--------|---------|-------------|
|
|
162
|
+
| Filesystem | `uvx mcp-server-filesystem /path` | Read/write files |
|
|
163
|
+
| Puppeteer | `npx -y @anthropic/mcp-server-puppeteer` | Browser automation |
|
|
164
|
+
| SQLite | `uvx mcp-server-sqlite --db-path db.sqlite` | Database access |
|
|
165
|
+
| GitHub | `npx -y @anthropic/mcp-server-github` | GitHub API |
|
|
166
|
+
| Slack | `npx -y @anthropic/mcp-server-slack` | Slack integration |
|
|
167
|
+
|
|
168
|
+
See [modelcontextprotocol.io](https://modelcontextprotocol.io) for more servers.
|
|
169
|
+
|
|
170
|
+
## Server Configuration
|
|
171
|
+
|
|
172
|
+
```typescript
|
|
173
|
+
interface MCPServerConfig {
|
|
174
|
+
/** Command to run (e.g., "npx", "uvx", "node") */
|
|
175
|
+
command: string;
|
|
176
|
+
|
|
177
|
+
/** Arguments for the command */
|
|
178
|
+
args?: string[];
|
|
179
|
+
|
|
180
|
+
/** Environment variables */
|
|
181
|
+
env?: Record<string, string>;
|
|
182
|
+
|
|
183
|
+
/** Working directory */
|
|
184
|
+
cwd?: string;
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Error Handling
|
|
189
|
+
|
|
190
|
+
```typescript
|
|
191
|
+
import { connectMCP, callMCPTool } from "@tryhamster/gerbil/mcp-client";
|
|
192
|
+
|
|
193
|
+
try {
|
|
194
|
+
await connectMCP("myserver", { command: "bad-command" });
|
|
195
|
+
} catch (e) {
|
|
196
|
+
console.error("Failed to connect:", e.message);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
try {
|
|
200
|
+
const result = await callMCPTool("myserver:unknown_tool", {});
|
|
201
|
+
} catch (e) {
|
|
202
|
+
console.error("Tool call failed:", e.message);
|
|
203
|
+
}
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## REPL Integration
|
|
207
|
+
|
|
208
|
+
In the Gerbil REPL, you can use MCP tools in **Chat** with Agent mode enabled.
|
|
209
|
+
|
|
210
|
+
```
|
|
211
|
+
[gerbil / Chat]
|
|
212
|
+
Mode: [@] Agent
|
|
213
|
+
|
|
214
|
+
You: Connect to the filesystem server at /tmp and list the files
|
|
215
|
+
|
|
216
|
+
Gerbil: I'll use the mcp_list tool to see available tools, then list files.
|
|
217
|
+
{"tool": "mcp_list", "params": {}}
|
|
218
|
+
|
|
219
|
+
[Available MCP tools: filesystem:list_directory, filesystem:read_file...]
|
|
220
|
+
|
|
221
|
+
{"tool": "mcp_call", "params": {"tool_name": "filesystem:list_directory", "params": {"path": "/tmp"}}}
|
|
222
|
+
|
|
223
|
+
[files listed...]
|
|
224
|
+
```
|
package/docs/mcp.md
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# MCP Server
|
|
2
|
+
|
|
3
|
+
Gerbil can run as a Model Context Protocol (MCP) server for Claude Desktop, Cursor, and other MCP clients.
|
|
4
|
+
|
|
5
|
+
## Start Server
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
gerbil serve --mcp
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Claude Desktop Config
|
|
12
|
+
|
|
13
|
+
Add to your Claude Desktop config:
|
|
14
|
+
|
|
15
|
+
**macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
16
|
+
**Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
|
|
17
|
+
**Linux**: `~/.config/claude/claude_desktop_config.json`
|
|
18
|
+
|
|
19
|
+
```json
|
|
20
|
+
{
|
|
21
|
+
"mcpServers": {
|
|
22
|
+
"gerbil": {
|
|
23
|
+
"command": "npx",
|
|
24
|
+
"args": ["-y", "@tryhamster/gerbil", "serve", "--mcp"]
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Cursor Config
|
|
31
|
+
|
|
32
|
+
Add to `.cursor/mcp.json` in your project:
|
|
33
|
+
|
|
34
|
+
```json
|
|
35
|
+
{
|
|
36
|
+
"mcpServers": {
|
|
37
|
+
"gerbil": {
|
|
38
|
+
"command": "npx",
|
|
39
|
+
"args": ["-y", "@tryhamster/gerbil", "serve", "--mcp"]
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Available Tools
|
|
46
|
+
|
|
47
|
+
Built-in skills are exposed as MCP tools:
|
|
48
|
+
|
|
49
|
+
| Tool | Description |
|
|
50
|
+
|------|-------------|
|
|
51
|
+
| `gerbil_generate` | Generate text with local LLM |
|
|
52
|
+
| `gerbil_summarize` | Summarize content |
|
|
53
|
+
| `gerbil_explain` | Explain code or concepts |
|
|
54
|
+
| `gerbil_review` | Code review |
|
|
55
|
+
| `gerbil_commit` | Generate commit message |
|
|
56
|
+
| `gerbil_translate` | Translate text |
|
|
57
|
+
| `gerbil_embed` | Generate embeddings |
|
|
58
|
+
|
|
59
|
+
## Custom Skills as Tools
|
|
60
|
+
|
|
61
|
+
Custom skills loaded via `loadSkills()` are automatically exposed:
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
// In your MCP server setup
|
|
65
|
+
import { loadSkills } from "@tryhamster/gerbil/skills";
|
|
66
|
+
|
|
67
|
+
await loadSkills("./skills");
|
|
68
|
+
// Now your custom skills are available as MCP tools
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Programmatic Usage
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
import { createMCPServer, startMCPServer } from "@tryhamster/gerbil/mcp";
|
|
75
|
+
|
|
76
|
+
// Create server instance
|
|
77
|
+
const server = await createMCPServer({ model: "qwen3-0.6b" });
|
|
78
|
+
|
|
79
|
+
// List tools
|
|
80
|
+
const tools = server.listTools();
|
|
81
|
+
console.log(tools.map(t => t.name));
|
|
82
|
+
|
|
83
|
+
// Call a tool
|
|
84
|
+
const result = await server.callTool("gerbil_summarize", {
|
|
85
|
+
content: "Long text here...",
|
|
86
|
+
length: "short",
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// Or start stdio server for MCP clients
|
|
90
|
+
await startMCPServer({ model: "qwen3-0.6b" });
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Options
|
|
94
|
+
|
|
95
|
+
```typescript
|
|
96
|
+
interface MCPServerOptions {
|
|
97
|
+
/** Model to load (default: "qwen3-0.6b") */
|
|
98
|
+
model?: string;
|
|
99
|
+
|
|
100
|
+
/** Device to use */
|
|
101
|
+
device?: "auto" | "gpu" | "cpu";
|
|
102
|
+
|
|
103
|
+
/** Quantization */
|
|
104
|
+
dtype?: "q4" | "q8" | "fp16" | "fp32";
|
|
105
|
+
|
|
106
|
+
/** Specific tools to expose (default: all) */
|
|
107
|
+
tools?: string[];
|
|
108
|
+
}
|
|
109
|
+
```
|
package/docs/memory.md
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# Memory Management
|
|
2
|
+
|
|
3
|
+
Gerbil automatically manages memory to prevent leaks while maintaining performance. For WebGPU inference (Node.js), memory is bounded and monitored.
|
|
4
|
+
|
|
5
|
+
## Automatic Management
|
|
6
|
+
|
|
7
|
+
### Context-Aware Auto-Reset
|
|
8
|
+
|
|
9
|
+
The KV cache automatically resets when it exceeds the model's context length (2048 tokens for Qwen3). This prevents unbounded memory growth:
|
|
10
|
+
|
|
11
|
+
```typescript
|
|
12
|
+
// Memory automatically resets after ~2048 tokens
|
|
13
|
+
// No action needed - happens transparently
|
|
14
|
+
const gerbil = new Gerbil();
|
|
15
|
+
await gerbil.loadModel("qwen3-0.6b");
|
|
16
|
+
|
|
17
|
+
// Long conversations work fine - auto-reset preserves context window
|
|
18
|
+
for (let i = 0; i < 100; i++) {
|
|
19
|
+
await gerbil.generate("Tell me something interesting");
|
|
20
|
+
}
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### Memory Bounds
|
|
24
|
+
|
|
25
|
+
- **Per-page maximum**: ~4GB (context length × token size)
|
|
26
|
+
- **Concurrent page limit**: 5 pages maximum
|
|
27
|
+
- **Total worst-case**: ~20GB (5 pages × 4GB each)
|
|
28
|
+
- **Typical usage**: < 2GB (most conversations are < 500 tokens)
|
|
29
|
+
|
|
30
|
+
## Manual Control
|
|
31
|
+
|
|
32
|
+
### Check Memory Usage
|
|
33
|
+
|
|
34
|
+
```typescript
|
|
35
|
+
const mem = await gerbil.getMemoryUsage();
|
|
36
|
+
if (mem) {
|
|
37
|
+
console.log(`Using ${mem.usedGB.toFixed(1)}GB / ${mem.totalGB.toFixed(1)}GB`);
|
|
38
|
+
}
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Clear Cache Manually
|
|
42
|
+
|
|
43
|
+
```typescript
|
|
44
|
+
// Clear KV cache to free memory (resets conversation context)
|
|
45
|
+
await gerbil.clearCache();
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Auto-Cleanup with Threshold
|
|
49
|
+
|
|
50
|
+
```typescript
|
|
51
|
+
// Check and cleanup if memory exceeds threshold
|
|
52
|
+
const didCleanup = await gerbil.checkMemoryAndCleanup(8); // 8GB threshold
|
|
53
|
+
if (didCleanup) {
|
|
54
|
+
console.log("Memory cleaned up");
|
|
55
|
+
}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## REPL Commands
|
|
59
|
+
|
|
60
|
+
### `/memory` - Show Memory Usage
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
> /memory
|
|
64
|
+
💾 Memory: 2.3GB / 8.5GB (27.1%)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### `/reset-cache` - Clear Cache
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
> /reset-cache
|
|
71
|
+
🧹 Cache cleared (was 2.3GB). Conversation context reset.
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Long-Running Sessions
|
|
75
|
+
|
|
76
|
+
For background services or long-running processes:
|
|
77
|
+
|
|
78
|
+
```typescript
|
|
79
|
+
import { Gerbil } from '@tryhamster/gerbil';
|
|
80
|
+
|
|
81
|
+
const gerbil = new Gerbil();
|
|
82
|
+
await gerbil.loadModel("qwen3-0.6b");
|
|
83
|
+
|
|
84
|
+
// Periodic memory monitoring
|
|
85
|
+
setInterval(async () => {
|
|
86
|
+
const mem = await gerbil.getMemoryUsage();
|
|
87
|
+
|
|
88
|
+
if (mem && mem.usedGB > 10) {
|
|
89
|
+
console.warn(`High memory: ${mem.usedGB.toFixed(1)}GB`);
|
|
90
|
+
await gerbil.clearCache(); // Clear if threshold exceeded
|
|
91
|
+
}
|
|
92
|
+
}, 60000); // Check every minute
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Development Best Practices
|
|
96
|
+
|
|
97
|
+
### Multiple Instances
|
|
98
|
+
|
|
99
|
+
When creating multiple Gerbil instances during development:
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
// ✅ Good: Dispose when done
|
|
103
|
+
const gerbil = new Gerbil();
|
|
104
|
+
await gerbil.loadModel("qwen3-0.6b");
|
|
105
|
+
// ... use gerbil ...
|
|
106
|
+
await gerbil.dispose(); // Frees resources
|
|
107
|
+
|
|
108
|
+
// ❌ Bad: Creating many instances without cleanup
|
|
109
|
+
for (let i = 0; i < 10; i++) {
|
|
110
|
+
const g = new Gerbil();
|
|
111
|
+
await g.loadModel("qwen3-0.6b");
|
|
112
|
+
// Forgot to dispose - pages accumulate!
|
|
113
|
+
}
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Page Limit
|
|
117
|
+
|
|
118
|
+
Gerbil limits concurrent Chrome pages to 5. If you hit this limit:
|
|
119
|
+
|
|
120
|
+
```typescript
|
|
121
|
+
// Error: Maximum concurrent pages (5) reached.
|
|
122
|
+
// Call dispose() on old Gerbil instances to free resources.
|
|
123
|
+
|
|
124
|
+
// Solution: Dispose unused instances
|
|
125
|
+
await oldGerbil.dispose();
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## Architecture
|
|
129
|
+
|
|
130
|
+
### WebGPU (Node.js)
|
|
131
|
+
- Uses headless Chrome for GPU acceleration
|
|
132
|
+
- Shared browser process, separate pages per instance
|
|
133
|
+
- KV cache stored in-memory per page
|
|
134
|
+
- Auto-reset prevents unbounded growth
|
|
135
|
+
|
|
136
|
+
### WebGPU (Browser)
|
|
137
|
+
- Native WebGPU API
|
|
138
|
+
- KV cache in browser memory
|
|
139
|
+
- Auto-reset applies same as Node.js
|
|
140
|
+
|
|
141
|
+
### CPU/WASM
|
|
142
|
+
- Memory monitoring not available
|
|
143
|
+
- Smaller models, less memory pressure
|
|
144
|
+
- Manual cleanup still works
|
|
145
|
+
|
|
146
|
+
## Console Events
|
|
147
|
+
|
|
148
|
+
When using WebGPU via Chrome, the backend emits events:
|
|
149
|
+
|
|
150
|
+
```javascript
|
|
151
|
+
// Auto-reset event (cache exceeded context)
|
|
152
|
+
{
|
|
153
|
+
type: "cache_reset",
|
|
154
|
+
reason: "context_exceeded",
|
|
155
|
+
tokensInCache: 2100,
|
|
156
|
+
contextLength: 2048
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Manual reset event
|
|
160
|
+
{ type: "cache_reset", reason: "manual" }
|
|
161
|
+
|
|
162
|
+
// Generation complete (includes cache info)
|
|
163
|
+
{
|
|
164
|
+
type: "complete",
|
|
165
|
+
text: "...",
|
|
166
|
+
numTokens: 50,
|
|
167
|
+
tokensInCache: 520
|
|
168
|
+
}
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## Performance Impact
|
|
172
|
+
|
|
173
|
+
- ✅ **Zero impact on short conversations** (< 2048 tokens)
|
|
174
|
+
- ✅ **Graceful degradation on long conversations** (auto-reset maintains context window)
|
|
175
|
+
- ✅ **Minimal overhead** (memory check is async, doesn't block generation)
|
|
176
|
+
|
|
177
|
+
## API Reference
|
|
178
|
+
|
|
179
|
+
### `getMemoryUsage()`
|
|
180
|
+
|
|
181
|
+
Returns memory statistics in GB (WebGPU only):
|
|
182
|
+
|
|
183
|
+
```typescript
|
|
184
|
+
const mem = await gerbil.getMemoryUsage();
|
|
185
|
+
// Returns: { usedGB: number; totalGB: number; usedPercent: number } | null
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### `clearCache()`
|
|
189
|
+
|
|
190
|
+
Manually clear KV cache:
|
|
191
|
+
|
|
192
|
+
```typescript
|
|
193
|
+
await gerbil.clearCache();
|
|
194
|
+
// Resets conversation context, frees memory
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### `checkMemoryAndCleanup(thresholdGB?)`
|
|
198
|
+
|
|
199
|
+
Check memory and auto-cleanup if threshold exceeded:
|
|
200
|
+
|
|
201
|
+
```typescript
|
|
202
|
+
const didCleanup = await gerbil.checkMemoryAndCleanup(8); // 8GB threshold
|
|
203
|
+
// Returns: boolean (true if cleanup was performed)
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### `dispose()`
|
|
207
|
+
|
|
208
|
+
Clean up all resources:
|
|
209
|
+
|
|
210
|
+
```typescript
|
|
211
|
+
await gerbil.dispose();
|
|
212
|
+
// Closes Chrome page, releases memory
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## Testing
|
|
216
|
+
|
|
217
|
+
Run the memory management test suite:
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
node test-memory-management.js
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
This verifies:
|
|
224
|
+
- Memory monitoring works
|
|
225
|
+
- Cache clearing reduces memory
|
|
226
|
+
- Auto-reset triggers correctly
|
|
227
|
+
- Threshold-based cleanup works
|
|
228
|
+
- Proper cleanup on dispose
|
|
229
|
+
|