@askjo/camofox-browser 1.1.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Dockerfile CHANGED
@@ -31,8 +31,14 @@ RUN apt-get update && apt-get install -y \
31
31
  ca-certificates \
32
32
  curl \
33
33
  unzip \
34
+ # yt-dlp runtime dependency
35
+ python3-minimal \
34
36
  && rm -rf /var/lib/apt/lists/*
35
37
 
38
+ # Install yt-dlp for YouTube transcript extraction (no browser needed)
39
+ RUN curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp \
40
+ && chmod +x /usr/local/bin/yt-dlp
41
+
36
42
  # Pre-bake Camoufox browser binary into image
37
43
  # This avoids downloading at runtime and pins the version
38
44
  # Note: unzip returns exit code 1 for warnings (Unicode filenames), so we use || true and verify
@@ -57,4 +63,4 @@ ENV CAMOFOX_PORT=3000
57
63
 
58
64
  EXPOSE 3000
59
65
 
60
- CMD ["node", "server.js"]
66
+ CMD ["sh", "-c", "node --max-old-space-size=${MAX_OLD_SPACE_SIZE:-128} server.js"]
package/README.md CHANGED
@@ -32,13 +32,25 @@ This project wraps that engine in a REST API built for agents: accessibility sna
32
32
  - **C++ Anti-Detection** - bypasses Google, Cloudflare, and most bot detection
33
33
  - **Element Refs** - stable `e1`, `e2`, `e3` identifiers for reliable interaction
34
34
  - **Token-Efficient** - accessibility snapshots are ~90% smaller than raw HTML
35
+ - **Runs on Anything** - lazy browser launch + idle shutdown keeps memory at ~40MB when idle. Designed to share a box with the rest of your stack — Raspberry Pi, $5 VPS, shared Railway infra.
35
36
  - **Session Isolation** - separate cookies/storage per user
36
37
  - **Cookie Import** - inject Netscape-format cookie files for authenticated browsing
37
38
  - **Proxy + GeoIP** - route traffic through residential proxies with automatic locale/timezone
38
39
  - **Structured Logging** - JSON log lines with request IDs for production observability
40
+ - **YouTube Transcripts** - extract captions from any YouTube video via yt-dlp, no API key needed
39
41
  - **Search Macros** - `@google_search`, `@youtube_search`, `@amazon_search`, `@reddit_subreddit`, and 10 more
42
+ - **Snapshot Screenshots** - include a base64 PNG screenshot alongside the accessibility snapshot
43
+ - **Large Page Handling** - automatic snapshot truncation with offset-based pagination
40
44
  - **Deploy Anywhere** - Docker, Fly.io, Railway
41
45
 
46
+ ## Optional Dependencies
47
+
48
+ | Dependency | Purpose | Install |
49
+ |-----------|---------|---------|
50
+ | [yt-dlp](https://github.com/yt-dlp/yt-dlp) | YouTube transcript extraction (fast path) | `pip install yt-dlp` or `brew install yt-dlp` |
51
+
52
+ The Docker image includes yt-dlp. For local dev, install it for the `/youtube/transcript` endpoint. Without it, the endpoint falls back to a slower browser-based method.
53
+
42
54
  ## Quick Start
43
55
 
44
56
  ### OpenClaw Plugin
@@ -251,7 +263,7 @@ curl -X POST http://localhost:9377/tabs/TAB_ID/navigate \
251
263
 
252
264
  | Method | Endpoint | Description |
253
265
  |--------|----------|-------------|
254
- | `GET` | `/tabs/:id/snapshot` | Accessibility snapshot with element refs |
266
+ | `GET` | `/tabs/:id/snapshot` | Accessibility snapshot with element refs. Query params: `includeScreenshot=true` (add base64 PNG), `offset=N` (paginate large snapshots) |
255
267
  | `POST` | `/tabs/:id/click` | Click element by ref or CSS selector |
256
268
  | `POST` | `/tabs/:id/type` | Type text into element |
257
269
  | `POST` | `/tabs/:id/press` | Press a keyboard key |
@@ -264,6 +276,21 @@ curl -X POST http://localhost:9377/tabs/TAB_ID/navigate \
264
276
  | `POST` | `/tabs/:id/forward` | Go forward |
265
277
  | `POST` | `/tabs/:id/refresh` | Refresh page |
266
278
 
279
+ ### YouTube Transcript
280
+
281
+ | Method | Endpoint | Description |
282
+ |--------|----------|-------------|
283
+ | `POST` | `/youtube/transcript` | Extract captions from a YouTube video |
284
+
285
+ ```bash
286
+ curl -X POST http://localhost:9377/youtube/transcript \
287
+ -H 'Content-Type: application/json' \
288
+ -d '{"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "languages": ["en"]}'
289
+ # → { "status": "ok", "transcript": "[00:18] ♪ We're no strangers to love ♪\n...", "video_title": "...", "total_words": 548 }
290
+ ```
291
+
292
+ Uses [yt-dlp](https://github.com/yt-dlp/yt-dlp) when available (fast, no browser needed). Falls back to a browser-based intercept method if yt-dlp is not installed — this is slower and less reliable due to YouTube ad pre-rolls.
293
+
267
294
  ### Server
268
295
 
269
296
  | Method | Endpoint | Description |
@@ -294,6 +321,13 @@ Reddit macros return JSON directly (no HTML parsing needed):
294
321
  | `CAMOFOX_API_KEY` | Enable cookie import endpoint (disabled if unset) | - |
295
322
  | `CAMOFOX_ADMIN_KEY` | Required for `POST /stop` | - |
296
323
  | `CAMOFOX_COOKIES_DIR` | Directory for cookie files | `~/.camofox/cookies` |
324
+ | `MAX_SESSIONS` | Max concurrent browser sessions | `50` |
325
+ | `MAX_TABS_PER_SESSION` | Max tabs per session | `10` |
326
+ | `SESSION_TIMEOUT_MS` | Session inactivity timeout | `1800000` (30min) |
327
+ | `BROWSER_IDLE_TIMEOUT_MS` | Kill browser when idle (0 = never) | `300000` (5min) |
328
+ | `HANDLER_TIMEOUT_MS` | Max time for any handler | `30000` (30s) |
329
+ | `MAX_CONCURRENT_PER_USER` | Concurrent request cap per user | `3` |
330
+ | `MAX_OLD_SPACE_SIZE` | Node.js V8 heap limit (MB) | `128` |
297
331
  | `PROXY_HOST` | Proxy hostname or IP | - |
298
332
  | `PROXY_PORT` | Proxy port | - |
299
333
  | `PROXY_USERNAME` | Proxy auth username | - |
@@ -311,7 +345,7 @@ Browser Instance (Camoufox)
311
345
  └── Tab (amazon.com)
312
346
  ```
313
347
 
314
- Sessions auto-expire after 30 minutes of inactivity.
348
+ Sessions auto-expire after 30 minutes of inactivity. The browser itself shuts down after 5 minutes with no active sessions, and relaunches on the next request.
315
349
 
316
350
  ## Testing
317
351
 
package/lib/config.js CHANGED
@@ -15,6 +15,8 @@ function loadConfig() {
15
15
  adminKey: process.env.CAMOFOX_ADMIN_KEY || '',
16
16
  apiKey: process.env.CAMOFOX_API_KEY || '',
17
17
  cookiesDir: process.env.CAMOFOX_COOKIES_DIR || join(os.homedir(), '.camofox', 'cookies'),
18
+ handlerTimeoutMs: parseInt(process.env.HANDLER_TIMEOUT_MS) || 30000,
19
+ maxConcurrentPerUser: parseInt(process.env.MAX_CONCURRENT_PER_USER) || 3,
18
20
  proxy: {
19
21
  host: process.env.PROXY_HOST || '',
20
22
  port: process.env.PROXY_PORT || '',
package/lib/launcher.js CHANGED
@@ -14,12 +14,14 @@ const startProcess = cp.spawn;
14
14
  * @param {string} opts.pluginDir - Directory containing server.js
15
15
  * @param {number} opts.port - Port number for the server
16
16
  * @param {object} opts.env - Environment variables to pass to the subprocess
17
+ * @param {string[]} [opts.nodeArgs] - Extra Node.js CLI flags (e.g. --max-old-space-size=128)
17
18
  * @param {{ info: (msg: string) => void, error: (msg: string) => void }} opts.log - Logger
18
19
  * @returns {import('child_process').ChildProcess}
19
20
  */
20
- function launchServer({ pluginDir, port, env, log }) {
21
+ function launchServer({ pluginDir, port, env, nodeArgs, log }) {
21
22
  const serverPath = join(pluginDir, 'server.js');
22
- const proc = startProcess('node', [serverPath], {
23
+ const args = [...(nodeArgs || []), serverPath];
24
+ const proc = startProcess('node', args, {
23
25
  cwd: pluginDir,
24
26
  env: {
25
27
  ...env,
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Snapshot windowing — truncate large accessibility snapshots while
3
+ * preserving pagination/navigation links at the tail.
4
+ */
5
+
6
+ const MAX_SNAPSHOT_CHARS = 80000; // ~20K tokens
7
+ const SNAPSHOT_TAIL_CHARS = 5000; // keep last ~5K for pagination/nav links
8
+
9
+ /**
10
+ * Return a window of the snapshot YAML.
11
+ * offset=0 (default): head chunk + tail (pagination/nav).
12
+ * offset=N: chars N..N+budget from the full snapshot.
13
+ * Always appends pagination tail so nav refs are available in every chunk.
14
+ */
15
+ function windowSnapshot(yaml, offset = 0) {
16
+ if (!yaml) return { text: '', truncated: false, totalChars: 0, offset: 0 };
17
+ const total = yaml.length;
18
+ if (total <= MAX_SNAPSHOT_CHARS) return { text: yaml, truncated: false, totalChars: total, offset: 0 };
19
+
20
+ const contentBudget = MAX_SNAPSHOT_CHARS - SNAPSHOT_TAIL_CHARS - 200; // room for marker
21
+ const tail = yaml.slice(-SNAPSHOT_TAIL_CHARS);
22
+ const clampedOffset = Math.min(Math.max(0, offset), total - SNAPSHOT_TAIL_CHARS);
23
+ const chunk = yaml.slice(clampedOffset, clampedOffset + contentBudget);
24
+ const chunkEnd = clampedOffset + contentBudget;
25
+ const hasMore = chunkEnd < total - SNAPSHOT_TAIL_CHARS;
26
+
27
+ const marker = hasMore
28
+ ? `\n[... truncated at char ${chunkEnd} of ${total}. Call snapshot with offset=${chunkEnd} to see more. Pagination links below. ...]\n`
29
+ : '\n';
30
+
31
+ return {
32
+ text: chunk + marker + tail,
33
+ truncated: true,
34
+ totalChars: total,
35
+ offset: clampedOffset,
36
+ hasMore,
37
+ nextOffset: hasMore ? chunkEnd : null
38
+ };
39
+ }
40
+
41
+ module.exports = { windowSnapshot, MAX_SNAPSHOT_CHARS, SNAPSHOT_TAIL_CHARS };
@@ -2,7 +2,7 @@
2
2
  "id": "camofox-browser",
3
3
  "name": "Camofox Browser",
4
4
  "description": "Anti-detection browser automation for AI agents using Camoufox (Firefox-based)",
5
- "version": "1.0.11",
5
+ "version": "1.0.12",
6
6
  "configSchema": {
7
7
  "type": "object",
8
8
  "properties": {
@@ -19,6 +19,31 @@
19
19
  "type": "boolean",
20
20
  "description": "Auto-start the camofox-browser server with the Gateway",
21
21
  "default": true
22
+ },
23
+ "maxSessions": {
24
+ "type": "number",
25
+ "description": "Maximum concurrent browser sessions (server default: 50)",
26
+ "default": 5
27
+ },
28
+ "maxTabsPerSession": {
29
+ "type": "number",
30
+ "description": "Maximum tabs per session (server default: 10)",
31
+ "default": 3
32
+ },
33
+ "sessionTimeoutMs": {
34
+ "type": "number",
35
+ "description": "Session inactivity timeout in milliseconds (server default: 1800000)",
36
+ "default": 600000
37
+ },
38
+ "browserIdleTimeoutMs": {
39
+ "type": "number",
40
+ "description": "Kill browser after this many ms with no sessions (0 = never)",
41
+ "default": 300000
42
+ },
43
+ "maxOldSpaceSize": {
44
+ "type": "number",
45
+ "description": "Node.js V8 heap limit in MB",
46
+ "default": 128
22
47
  }
23
48
  },
24
49
  "additionalProperties": false
@@ -34,6 +59,26 @@
34
59
  },
35
60
  "autoStart": {
36
61
  "label": "Auto-start server with Gateway"
62
+ },
63
+ "maxSessions": {
64
+ "label": "Max Sessions",
65
+ "placeholder": "5"
66
+ },
67
+ "maxTabsPerSession": {
68
+ "label": "Max Tabs per Session",
69
+ "placeholder": "3"
70
+ },
71
+ "sessionTimeoutMs": {
72
+ "label": "Session Timeout (ms)",
73
+ "placeholder": "600000"
74
+ },
75
+ "browserIdleTimeoutMs": {
76
+ "label": "Browser Idle Timeout (ms)",
77
+ "placeholder": "300000"
78
+ },
79
+ "maxOldSpaceSize": {
80
+ "label": "Node Heap Limit (MB)",
81
+ "placeholder": "128"
37
82
  }
38
83
  }
39
84
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askjo/camofox-browser",
3
- "version": "1.1.2",
3
+ "version": "1.3.0",
4
4
  "description": "Headless browser automation server and OpenClaw plugin for AI agents - anti-detection, element refs, and session isolation",
5
5
  "main": "server.js",
6
6
  "license": "MIT",
@@ -26,7 +26,9 @@
26
26
  "clawdbot",
27
27
  "moltbot",
28
28
  "playwright",
29
- "firefox"
29
+ "firefox",
30
+ "youtube",
31
+ "transcript"
30
32
  ],
31
33
  "engines": {
32
34
  "node": ">=18"
@@ -63,6 +65,7 @@
63
65
  "puppeteer-extra-plugin-stealth": "^2.11.2"
64
66
  },
65
67
  "devDependencies": {
66
- "jest": "^29.7.0"
68
+ "jest": "^29.7.0",
69
+ "pngjs": "^7.0.0"
67
70
  }
68
71
  }
package/plugin.ts CHANGED
@@ -29,10 +29,15 @@ interface PluginConfig {
29
29
  url?: string;
30
30
  autoStart?: boolean;
31
31
  port?: number;
32
+ maxSessions?: number;
33
+ maxTabsPerSession?: number;
34
+ sessionTimeoutMs?: number;
35
+ browserIdleTimeoutMs?: number;
36
+ maxOldSpaceSize?: number;
32
37
  }
33
38
 
34
39
  interface ToolResult {
35
- content: Array<{ type: string; text: string }>;
40
+ content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>;
36
41
  }
37
42
 
38
43
  interface HealthCheckResult {
@@ -109,10 +114,16 @@ let serverProcess: ChildProcess | null = null;
109
114
  async function startServer(
110
115
  pluginDir: string,
111
116
  port: number,
112
- log: PluginApi["log"]
117
+ log: PluginApi["log"],
118
+ pluginCfg?: PluginConfig
113
119
  ): Promise<ChildProcess> {
114
120
  const cfg = loadConfig();
115
- const proc = launchServer({ pluginDir, port, env: cfg.serverEnv, log });
121
+ const env: Record<string, string> = { ...cfg.serverEnv };
122
+ if (pluginCfg?.maxSessions != null) env.MAX_SESSIONS = String(pluginCfg.maxSessions);
123
+ if (pluginCfg?.maxTabsPerSession != null) env.MAX_TABS_PER_SESSION = String(pluginCfg.maxTabsPerSession);
124
+ if (pluginCfg?.sessionTimeoutMs != null) env.SESSION_TIMEOUT_MS = String(pluginCfg.sessionTimeoutMs);
125
+ if (pluginCfg?.browserIdleTimeoutMs != null) env.BROWSER_IDLE_TIMEOUT_MS = String(pluginCfg.browserIdleTimeoutMs);
126
+ const proc = launchServer({ pluginDir, port, env, log, nodeArgs: pluginCfg?.maxOldSpaceSize != null ? [`--max-old-space-size=${pluginCfg.maxOldSpaceSize}`] : undefined });
116
127
 
117
128
  proc.on("error", (err: Error) => {
118
129
  log?.error?.(`Server process error: ${err.message}`);
@@ -194,7 +205,7 @@ export default function register(api: PluginApi) {
194
205
  api.log?.info?.(`Camoufox server already running at ${baseUrl}`);
195
206
  } else {
196
207
  try {
197
- serverProcess = await startServer(pluginDir, port, api.log);
208
+ serverProcess = await startServer(pluginDir, port, api.log, cfg);
198
209
  } catch (err) {
199
210
  api.log?.error?.(`Failed to auto-start server: ${(err as Error).message}`);
200
211
  }
@@ -227,19 +238,30 @@ export default function register(api: PluginApi) {
227
238
  api.registerTool((ctx: ToolContext) => ({
228
239
  name: "camofox_snapshot",
229
240
  description:
230
- "Get accessibility snapshot of a Camoufox page with element refs (e1, e2, etc.) for interaction. Use with camofox_create_tab.",
241
+ "Get accessibility snapshot of a Camoufox page with element refs (e1, e2, etc.) for interaction, plus a visual screenshot. " +
242
+ "Large pages are truncated with pagination links preserved at the bottom. " +
243
+ "If the response includes hasMore=true and nextOffset, call again with that offset to see more content.",
231
244
  parameters: {
232
245
  type: "object",
233
246
  properties: {
234
247
  tabId: { type: "string", description: "Tab identifier" },
248
+ offset: { type: "number", description: "Character offset for paginated snapshots. Use nextOffset from a previous truncated response." },
235
249
  },
236
250
  required: ["tabId"],
237
251
  },
238
252
  async execute(_id, params) {
239
- const { tabId } = params as { tabId: string };
253
+ const { tabId, offset } = params as { tabId: string; offset?: number };
240
254
  const userId = ctx.agentId || fallbackUserId;
241
- const result = await fetchApi(baseUrl, `/tabs/${tabId}/snapshot?userId=${userId}`);
242
- return toToolResult(result);
255
+ const qs = offset ? `&offset=${offset}` : '';
256
+ const result = await fetchApi(baseUrl, `/tabs/${tabId}/snapshot?userId=${userId}&includeScreenshot=true${qs}`) as Record<string, unknown>;
257
+ const content: ToolResult["content"] = [
258
+ { type: "text", text: JSON.stringify({ url: result.url, refsCount: result.refsCount, snapshot: result.snapshot, truncated: result.truncated, totalChars: result.totalChars, hasMore: result.hasMore, nextOffset: result.nextOffset }, null, 2) },
259
+ ];
260
+ const screenshot = result.screenshot as { data?: string; mimeType?: string } | undefined;
261
+ if (screenshot?.data) {
262
+ content.push({ type: "image", data: screenshot.data, mimeType: screenshot.mimeType || "image/png" });
263
+ }
264
+ return { content };
243
265
  },
244
266
  }));
245
267
 
@@ -499,7 +521,7 @@ export default function register(api: PluginApi) {
499
521
  return;
500
522
  }
501
523
  try {
502
- serverProcess = await startServer(pluginDir, port, api.log);
524
+ serverProcess = await startServer(pluginDir, port, api.log, cfg);
503
525
  } catch (err) {
504
526
  api.log?.error?.(`Failed to start server: ${(err as Error).message}`);
505
527
  }
@@ -622,7 +644,7 @@ export default function register(api: PluginApi) {
622
644
  }
623
645
  try {
624
646
  console.log(`Starting camofox server on port ${port}...`);
625
- serverProcess = await startServer(pluginDir, port, api.log);
647
+ serverProcess = await startServer(pluginDir, port, api.log, cfg);
626
648
  console.log(`Camoufox server started at ${baseUrl}`);
627
649
  } catch (err) {
628
650
  console.error(`Failed to start server: ${(err as Error).message}`);
package/run.sh CHANGED
@@ -32,6 +32,6 @@ fi
32
32
 
33
33
  echo "Starting camofox-browser on http://localhost:$CAMOFOX_PORT (with auto-reload)"
34
34
  echo "Logs: /tmp/camofox-browser.log"
35
- nodemon --watch server.js --exec "node server.js" 2>&1 | while IFS= read -r line; do
35
+ nodemon --watch server.js --exec "node --max-old-space-size=128 server.js" 2>&1 | while IFS= read -r line; do
36
36
  echo "[$(date '+%Y-%m-%d %H:%M:%S')] $line"
37
37
  done | tee -a /tmp/camofox-browser.log