@askjo/camofox-browser 1.4.1 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +17 -13
- package/README.md +54 -6
- package/lib/config.js +53 -1
- package/lib/downloads.js +0 -80
- package/lib/fly.js +54 -0
- package/lib/images.js +88 -0
- package/lib/metrics.js +137 -81
- package/lib/proxy.js +260 -2
- package/lib/request-utils.js +56 -0
- package/lib/youtube.js +19 -4
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/plugin.ts +1 -0
- package/server.js +623 -139
package/Dockerfile
CHANGED
|
@@ -4,7 +4,7 @@ FROM node:20-slim
|
|
|
4
4
|
# Update these when upgrading Camoufox
|
|
5
5
|
ARG CAMOUFOX_VERSION=135.0.1
|
|
6
6
|
ARG CAMOUFOX_RELEASE=beta.24
|
|
7
|
-
ARG
|
|
7
|
+
ARG ARCH=x86_64
|
|
8
8
|
|
|
9
9
|
# Install dependencies for Camoufox (Firefox-based)
|
|
10
10
|
RUN apt-get update && apt-get install -y \
|
|
@@ -23,33 +23,37 @@ RUN apt-get update && apt-get install -y \
|
|
|
23
23
|
libxrender1 \
|
|
24
24
|
libxss1 \
|
|
25
25
|
libxtst6 \
|
|
26
|
+
# Mesa OpenGL/EGL for WebGL support (software rendering via llvmpipe)
|
|
27
|
+
# Without these, Firefox cannot create WebGL contexts — a major bot detection signal
|
|
28
|
+
libegl1-mesa \
|
|
29
|
+
libgl1-mesa-dri \
|
|
30
|
+
libgbm1 \
|
|
31
|
+
# Xvfb virtual display — runs Camoufox as if on a real desktop (better anti-detection)
|
|
32
|
+
xvfb \
|
|
26
33
|
# Fonts
|
|
27
34
|
fonts-liberation \
|
|
28
35
|
fonts-noto-color-emoji \
|
|
29
36
|
fontconfig \
|
|
30
37
|
# Utils
|
|
31
38
|
ca-certificates \
|
|
32
|
-
curl \
|
|
33
39
|
unzip \
|
|
34
40
|
# yt-dlp runtime dependency
|
|
35
41
|
python3-minimal \
|
|
36
42
|
&& rm -rf /var/lib/apt/lists/*
|
|
37
43
|
|
|
38
|
-
#
|
|
39
|
-
RUN curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp \
|
|
40
|
-
&& chmod +x /usr/local/bin/yt-dlp
|
|
41
|
-
|
|
42
|
-
# Pre-bake Camoufox browser binary into image
|
|
43
|
-
# This avoids downloading at runtime and pins the version
|
|
44
|
+
# Pre-bake Camoufox browser binary into image via bind mount (downloaded by Makefile)
|
|
44
45
|
# Note: unzip returns exit code 1 for warnings (Unicode filenames), so we use || true and verify
|
|
45
|
-
RUN
|
|
46
|
-
|
|
47
|
-
&& (unzip -q /
|
|
48
|
-
&& rm /tmp/camoufox.zip \
|
|
46
|
+
RUN --mount=type=bind,source=dist,target=/dist \
|
|
47
|
+
mkdir -p /root/.cache/camoufox \
|
|
48
|
+
&& (unzip -q /dist/camoufox-${ARCH}.zip -d /root/.cache/camoufox || true) \
|
|
49
49
|
&& chmod -R 755 /root/.cache/camoufox \
|
|
50
50
|
&& echo "{\"version\":\"${CAMOUFOX_VERSION}\",\"release\":\"${CAMOUFOX_RELEASE}\"}" > /root/.cache/camoufox/version.json \
|
|
51
51
|
&& test -f /root/.cache/camoufox/camoufox-bin && echo "Camoufox installed successfully"
|
|
52
52
|
|
|
53
|
+
# Install yt-dlp for YouTube transcript extraction (no browser needed)
|
|
54
|
+
RUN --mount=type=bind,source=dist,target=/dist \
|
|
55
|
+
install -m 755 /dist/yt-dlp-${ARCH} /usr/local/bin/yt-dlp
|
|
56
|
+
|
|
53
57
|
WORKDIR /app
|
|
54
58
|
|
|
55
59
|
COPY package.json ./
|
|
@@ -61,6 +65,6 @@ COPY lib/ ./lib/
|
|
|
61
65
|
ENV NODE_ENV=production
|
|
62
66
|
ENV CAMOFOX_PORT=3000
|
|
63
67
|
|
|
64
|
-
EXPOSE
|
|
68
|
+
EXPOSE 9377
|
|
65
69
|
|
|
66
70
|
CMD ["sh", "-c", "node --max-old-space-size=${MAX_OLD_SPACE_SIZE:-128} server.js"]
|
package/README.md
CHANGED
|
@@ -11,12 +11,18 @@
|
|
|
11
11
|
<p>
|
|
12
12
|
Standing on the mighty shoulders of <a href="https://camoufox.com">Camoufox</a> - a Firefox fork with fingerprint spoofing at the C++ level.
|
|
13
13
|
<br/><br/>
|
|
14
|
-
The same engine behind <a href="https://askjo.ai">
|
|
14
|
+
The same engine behind <a href="https://askjo.ai?ref=camofox">Jo</a> — an AI assistant that doesn't need you to babysit it. Runs half on your Mac, half on a dedicated cloud machine that only you use. Available on macOS, Telegram, and WhatsApp. <a href="https://askjo.ai?ref=camofox">Try the beta free →</a>
|
|
15
15
|
</p>
|
|
16
16
|
</div>
|
|
17
17
|
|
|
18
18
|
<br/>
|
|
19
19
|
|
|
20
|
+
```bash
|
|
21
|
+
git clone https://github.com/jo-inc/camofox-browser && cd camofox-browser
|
|
22
|
+
npm install && npm start
|
|
23
|
+
# → http://localhost:9377
|
|
24
|
+
```
|
|
25
|
+
|
|
20
26
|
---
|
|
21
27
|
|
|
22
28
|
## Why
|
|
@@ -76,11 +82,28 @@ Default port is `9377`. See [Environment Variables](#environment-variables) for
|
|
|
76
82
|
|
|
77
83
|
### Docker
|
|
78
84
|
|
|
85
|
+
The included `Makefile` auto-detects your CPU architecture and pre-downloads Camoufox + yt-dlp binaries outside the Docker build, so rebuilds are fast (~30s vs ~3min).
|
|
86
|
+
|
|
79
87
|
```bash
|
|
80
|
-
|
|
81
|
-
|
|
88
|
+
# Build and start (auto-detects arch: aarch64 on M1/M2, x86_64 on Intel)
|
|
89
|
+
make up
|
|
90
|
+
|
|
91
|
+
# Stop and remove the container
|
|
92
|
+
make down
|
|
93
|
+
|
|
94
|
+
# Force a clean rebuild (e.g. after upgrading VERSION/RELEASE)
|
|
95
|
+
make reset
|
|
96
|
+
|
|
97
|
+
# Just download binaries (without building)
|
|
98
|
+
make fetch
|
|
99
|
+
|
|
100
|
+
# Override arch or version explicitly
|
|
101
|
+
make up ARCH=x86_64
|
|
102
|
+
make up VERSION=135.0.1 RELEASE=beta.24
|
|
82
103
|
```
|
|
83
104
|
|
|
105
|
+
Note: `make fetch` (or `make build`) must be run first — the Dockerfile expects pre-downloaded binaries in `dist/`.
|
|
106
|
+
|
|
84
107
|
### Fly.io / Railway
|
|
85
108
|
|
|
86
109
|
`fly.toml` and `railway.toml` are included. Deploy with `fly deploy` or connect the repo to Railway.
|
|
@@ -182,7 +205,7 @@ fly secrets set CAMOFOX_API_KEY="your-generated-key"
|
|
|
182
205
|
|
|
183
206
|
Route all browser traffic through a proxy with automatic locale, timezone, and geolocation derived from the proxy's IP address via Camoufox's built-in GeoIP.
|
|
184
207
|
|
|
185
|
-
|
|
208
|
+
**Simple proxy (single endpoint):**
|
|
186
209
|
|
|
187
210
|
```bash
|
|
188
211
|
export PROXY_HOST=166.88.179.132
|
|
@@ -192,6 +215,21 @@ export PROXY_PASSWORD=mypass
|
|
|
192
215
|
npm start
|
|
193
216
|
```
|
|
194
217
|
|
|
218
|
+
**Backconnect proxy (rotating sticky sessions):**
|
|
219
|
+
|
|
220
|
+
For providers like Decodo, Bright Data, or Oxylabs that offer a single gateway endpoint with session-based sticky IPs:
|
|
221
|
+
|
|
222
|
+
```bash
|
|
223
|
+
export PROXY_STRATEGY=backconnect
|
|
224
|
+
export PROXY_BACKCONNECT_HOST=gate.provider.com
|
|
225
|
+
export PROXY_BACKCONNECT_PORT=7000
|
|
226
|
+
export PROXY_USERNAME=myuser
|
|
227
|
+
export PROXY_PASSWORD=mypass
|
|
228
|
+
npm start
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Each browser context gets a unique sticky session, so different users get different IP addresses. Sessions rotate automatically on proxy errors or Google blocks.
|
|
232
|
+
|
|
195
233
|
Or in Docker:
|
|
196
234
|
|
|
197
235
|
```bash
|
|
@@ -322,6 +360,7 @@ Reddit macros return JSON directly (no HTML parsing needed):
|
|
|
322
360
|
| Variable | Description | Default |
|
|
323
361
|
|----------|-------------|---------|
|
|
324
362
|
| `CAMOFOX_PORT` | Server port | `9377` |
|
|
363
|
+
| `PORT` | Server port (fallback, for platforms like Fly.io) | `9377` |
|
|
325
364
|
| `CAMOFOX_API_KEY` | Enable cookie import endpoint (disabled if unset) | - |
|
|
326
365
|
| `CAMOFOX_ADMIN_KEY` | Required for `POST /stop` | - |
|
|
327
366
|
| `CAMOFOX_COOKIES_DIR` | Directory for cookie files | `~/.camofox/cookies` |
|
|
@@ -332,10 +371,17 @@ Reddit macros return JSON directly (no HTML parsing needed):
|
|
|
332
371
|
| `HANDLER_TIMEOUT_MS` | Max time for any handler | `30000` (30s) |
|
|
333
372
|
| `MAX_CONCURRENT_PER_USER` | Concurrent request cap per user | `3` |
|
|
334
373
|
| `MAX_OLD_SPACE_SIZE` | Node.js V8 heap limit (MB) | `128` |
|
|
335
|
-
| `
|
|
336
|
-
| `
|
|
374
|
+
| `PROXY_STRATEGY` | Proxy mode: `backconnect` (rotating sticky sessions) or blank (single endpoint) | - |
|
|
375
|
+
| `PROXY_PROVIDER` | Provider name for session format (e.g. `decodo`) | `decodo` |
|
|
376
|
+
| `PROXY_HOST` | Proxy hostname or IP (simple mode) | - |
|
|
377
|
+
| `PROXY_PORT` | Proxy port (simple mode) | - |
|
|
337
378
|
| `PROXY_USERNAME` | Proxy auth username | - |
|
|
338
379
|
| `PROXY_PASSWORD` | Proxy auth password | - |
|
|
380
|
+
| `PROXY_BACKCONNECT_HOST` | Backconnect gateway hostname | - |
|
|
381
|
+
| `PROXY_BACKCONNECT_PORT` | Backconnect gateway port | `7000` |
|
|
382
|
+
| `PROXY_COUNTRY` | Target country for proxy geo-targeting | - |
|
|
383
|
+
| `PROXY_STATE` | Target state/region for proxy geo-targeting | - |
|
|
384
|
+
| `TAB_INACTIVITY_MS` | Close tabs idle longer than this | `300000` (5min) |
|
|
339
385
|
|
|
340
386
|
## Architecture
|
|
341
387
|
|
|
@@ -351,6 +397,8 @@ Browser Instance (Camoufox)
|
|
|
351
397
|
|
|
352
398
|
Sessions auto-expire after 30 minutes of inactivity. The browser itself shuts down after 5 minutes with no active sessions, and relaunches on the next request.
|
|
353
399
|
|
|
400
|
+
When a session's tab limit is reached, the oldest/least-used tab is automatically recycled instead of returning an error — so long-running agent sessions don't hit dead ends.
|
|
401
|
+
|
|
354
402
|
## Testing
|
|
355
403
|
|
|
356
404
|
```bash
|
package/lib/config.js
CHANGED
|
@@ -8,10 +8,41 @@
|
|
|
8
8
|
import { join } from 'path';
|
|
9
9
|
import os from 'os';
|
|
10
10
|
|
|
11
|
+
/**
|
|
12
|
+
* Parse PROXY_PORTS env var into an array of port numbers.
|
|
13
|
+
* Supports range ("10001-10010") or comma-separated ("10001,10002,10003").
|
|
14
|
+
* Falls back to single PROXY_PORT if PROXY_PORTS is not set.
|
|
15
|
+
*/
|
|
16
|
+
function parseProxyPorts(portsEnv, singlePort) {
|
|
17
|
+
if (portsEnv) {
|
|
18
|
+
if (portsEnv.includes('-')) {
|
|
19
|
+
const [start, end] = portsEnv.split('-').map(s => parseInt(s.trim(), 10));
|
|
20
|
+
if (!isNaN(start) && !isNaN(end) && end >= start) {
|
|
21
|
+
return Array.from({ length: end - start + 1 }, (_, i) => start + i);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
const parsed = portsEnv.split(',').map(s => parseInt(s.trim(), 10)).filter(n => !isNaN(n));
|
|
25
|
+
if (parsed.length > 0) return parsed;
|
|
26
|
+
}
|
|
27
|
+
if (singlePort) {
|
|
28
|
+
const p = parseInt(singlePort, 10);
|
|
29
|
+
if (!isNaN(p)) return [p];
|
|
30
|
+
}
|
|
31
|
+
return [];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function inferProxyStrategy(explicitStrategy) {
|
|
35
|
+
if (explicitStrategy) return explicitStrategy;
|
|
36
|
+
return 'round_robin';
|
|
37
|
+
}
|
|
38
|
+
|
|
11
39
|
function loadConfig() {
|
|
12
40
|
return {
|
|
13
41
|
port: parseInt(process.env.CAMOFOX_PORT || process.env.PORT || '9377', 10),
|
|
14
42
|
nodeEnv: process.env.NODE_ENV || 'development',
|
|
43
|
+
flyMachineId: process.env.FLY_MACHINE_ID || '',
|
|
44
|
+
flyAppName: process.env.FLY_APP_NAME || '',
|
|
45
|
+
flyApiToken: process.env.FLY_API_TOKEN || '',
|
|
15
46
|
adminKey: process.env.CAMOFOX_ADMIN_KEY || '',
|
|
16
47
|
apiKey: process.env.CAMOFOX_API_KEY || '',
|
|
17
48
|
cookiesDir: process.env.CAMOFOX_COOKIES_DIR || join(os.homedir(), '.camofox', 'cookies'),
|
|
@@ -21,15 +52,26 @@ function loadConfig() {
|
|
|
21
52
|
tabInactivityMs: parseInt(process.env.TAB_INACTIVITY_MS) || 300000,
|
|
22
53
|
maxSessions: parseInt(process.env.MAX_SESSIONS) || 50,
|
|
23
54
|
maxTabsPerSession: parseInt(process.env.MAX_TABS_PER_SESSION) || 10,
|
|
24
|
-
maxTabsGlobal: parseInt(process.env.MAX_TABS_GLOBAL) ||
|
|
55
|
+
maxTabsGlobal: parseInt(process.env.MAX_TABS_GLOBAL) || 50,
|
|
25
56
|
navigateTimeoutMs: parseInt(process.env.NAVIGATE_TIMEOUT_MS) || 25000,
|
|
26
57
|
buildrefsTimeoutMs: parseInt(process.env.BUILDREFS_TIMEOUT_MS) || 12000,
|
|
27
58
|
browserIdleTimeoutMs: parseInt(process.env.BROWSER_IDLE_TIMEOUT_MS) || 300000,
|
|
59
|
+
prometheusEnabled: process.env.PROMETHEUS_ENABLED === '1' || process.env.PROMETHEUS_ENABLED === 'true',
|
|
28
60
|
proxy: {
|
|
61
|
+
strategy: inferProxyStrategy(process.env.PROXY_STRATEGY || ''),
|
|
62
|
+
providerName: process.env.PROXY_PROVIDER || 'decodo',
|
|
29
63
|
host: process.env.PROXY_HOST || '',
|
|
30
64
|
port: process.env.PROXY_PORT || '',
|
|
65
|
+
ports: parseProxyPorts(process.env.PROXY_PORTS, process.env.PROXY_PORT),
|
|
31
66
|
username: process.env.PROXY_USERNAME || '',
|
|
32
67
|
password: process.env.PROXY_PASSWORD || '',
|
|
68
|
+
backconnectHost: process.env.PROXY_BACKCONNECT_HOST || '',
|
|
69
|
+
backconnectPort: parseInt(process.env.PROXY_BACKCONNECT_PORT || '7000', 10),
|
|
70
|
+
country: process.env.PROXY_COUNTRY || '',
|
|
71
|
+
state: process.env.PROXY_STATE || '',
|
|
72
|
+
city: process.env.PROXY_CITY || '',
|
|
73
|
+
zip: process.env.PROXY_ZIP || '',
|
|
74
|
+
sessionDurationMinutes: parseInt(process.env.PROXY_SESSION_DURATION_MINUTES || '10', 10),
|
|
33
75
|
},
|
|
34
76
|
// Env vars forwarded to the server subprocess
|
|
35
77
|
serverEnv: {
|
|
@@ -39,10 +81,20 @@ function loadConfig() {
|
|
|
39
81
|
CAMOFOX_ADMIN_KEY: process.env.CAMOFOX_ADMIN_KEY,
|
|
40
82
|
CAMOFOX_API_KEY: process.env.CAMOFOX_API_KEY,
|
|
41
83
|
CAMOFOX_COOKIES_DIR: process.env.CAMOFOX_COOKIES_DIR,
|
|
84
|
+
PROXY_STRATEGY: process.env.PROXY_STRATEGY,
|
|
85
|
+
PROXY_PROVIDER: process.env.PROXY_PROVIDER,
|
|
42
86
|
PROXY_HOST: process.env.PROXY_HOST,
|
|
43
87
|
PROXY_PORT: process.env.PROXY_PORT,
|
|
88
|
+
PROXY_PORTS: process.env.PROXY_PORTS,
|
|
44
89
|
PROXY_USERNAME: process.env.PROXY_USERNAME,
|
|
45
90
|
PROXY_PASSWORD: process.env.PROXY_PASSWORD,
|
|
91
|
+
PROXY_BACKCONNECT_HOST: process.env.PROXY_BACKCONNECT_HOST,
|
|
92
|
+
PROXY_BACKCONNECT_PORT: process.env.PROXY_BACKCONNECT_PORT,
|
|
93
|
+
PROXY_COUNTRY: process.env.PROXY_COUNTRY,
|
|
94
|
+
PROXY_STATE: process.env.PROXY_STATE,
|
|
95
|
+
PROXY_CITY: process.env.PROXY_CITY,
|
|
96
|
+
PROXY_ZIP: process.env.PROXY_ZIP,
|
|
97
|
+
PROXY_SESSION_DURATION_MINUTES: process.env.PROXY_SESSION_DURATION_MINUTES,
|
|
46
98
|
},
|
|
47
99
|
};
|
|
48
100
|
}
|
package/lib/downloads.js
CHANGED
|
@@ -149,85 +149,6 @@ async function getDownloadsList(tabState, { includeData = false, maxBytes = MAX_
|
|
|
149
149
|
return downloads;
|
|
150
150
|
}
|
|
151
151
|
|
|
152
|
-
/**
|
|
153
|
-
* In-page image extraction script for page.evaluate().
|
|
154
|
-
* Returns image metadata and optionally inline data URLs.
|
|
155
|
-
*/
|
|
156
|
-
async function extractPageImages(page, { includeData = false, maxBytes = MAX_DOWNLOAD_INLINE_BYTES, limit = 8 } = {}) {
|
|
157
|
-
return page.evaluate(
|
|
158
|
-
async ({ includeData, maxBytes, limit }) => {
|
|
159
|
-
const toDataUrl = (blob) =>
|
|
160
|
-
new Promise((resolve, reject) => {
|
|
161
|
-
const reader = new FileReader();
|
|
162
|
-
reader.onload = () => resolve(typeof reader.result === 'string' ? reader.result : '');
|
|
163
|
-
reader.onerror = () => reject(new Error('file_reader_failed'));
|
|
164
|
-
reader.readAsDataURL(blob);
|
|
165
|
-
});
|
|
166
|
-
|
|
167
|
-
const nodes = Array.from(document.querySelectorAll('img'));
|
|
168
|
-
const seen = new Set();
|
|
169
|
-
const candidates = [];
|
|
170
|
-
|
|
171
|
-
for (const node of nodes) {
|
|
172
|
-
const src = String(node.currentSrc || node.src || node.getAttribute('src') || '').trim();
|
|
173
|
-
if (!src || seen.has(src)) continue;
|
|
174
|
-
seen.add(src);
|
|
175
|
-
candidates.push({
|
|
176
|
-
src,
|
|
177
|
-
alt: String(node.alt || '').trim(),
|
|
178
|
-
width: Number(node.naturalWidth || node.width || 0) || undefined,
|
|
179
|
-
height: Number(node.naturalHeight || node.height || 0) || undefined,
|
|
180
|
-
});
|
|
181
|
-
if (candidates.length >= limit) break;
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
const results = [];
|
|
185
|
-
for (const image of candidates) {
|
|
186
|
-
const entry = { src: image.src, alt: image.alt, width: image.width, height: image.height };
|
|
187
|
-
|
|
188
|
-
if (includeData) {
|
|
189
|
-
try {
|
|
190
|
-
if (image.src.startsWith('data:')) {
|
|
191
|
-
const mimeMatch = image.src.match(/^data:([^;,]+)[;,]/i);
|
|
192
|
-
const isBase64 = /;base64,/i.test(image.src);
|
|
193
|
-
const payload = image.src.slice(image.src.indexOf(',') + 1);
|
|
194
|
-
const estimatedBytes = isBase64 ? Math.floor((payload.length * 3) / 4) : payload.length;
|
|
195
|
-
entry.mimeType = mimeMatch ? mimeMatch[1] : 'application/octet-stream';
|
|
196
|
-
entry.bytes = estimatedBytes;
|
|
197
|
-
if (estimatedBytes <= maxBytes) {
|
|
198
|
-
entry.dataUrl = image.src;
|
|
199
|
-
} else {
|
|
200
|
-
entry.dataSkipped = 'max_bytes_exceeded';
|
|
201
|
-
}
|
|
202
|
-
} else {
|
|
203
|
-
const response = await fetch(image.src, { credentials: 'include' });
|
|
204
|
-
if (response.ok) {
|
|
205
|
-
const blob = await response.blob();
|
|
206
|
-
entry.mimeType = blob.type || 'application/octet-stream';
|
|
207
|
-
entry.bytes = blob.size;
|
|
208
|
-
if (blob.size <= maxBytes) {
|
|
209
|
-
entry.dataUrl = await toDataUrl(blob);
|
|
210
|
-
} else {
|
|
211
|
-
entry.dataSkipped = 'max_bytes_exceeded';
|
|
212
|
-
}
|
|
213
|
-
} else {
|
|
214
|
-
entry.fetchError = `http_${response.status}`;
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
} catch (err) {
|
|
218
|
-
entry.fetchError = String(err?.message || err || 'image_fetch_failed');
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
results.push(entry);
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
return results;
|
|
226
|
-
},
|
|
227
|
-
{ includeData, maxBytes, limit },
|
|
228
|
-
);
|
|
229
|
-
}
|
|
230
|
-
|
|
231
152
|
export {
|
|
232
153
|
MAX_DOWNLOAD_INLINE_BYTES,
|
|
233
154
|
sanitizeFilename,
|
|
@@ -236,5 +157,4 @@ export {
|
|
|
236
157
|
clearSessionDownloads,
|
|
237
158
|
attachDownloadListener,
|
|
238
159
|
getDownloadsList,
|
|
239
|
-
extractPageImages,
|
|
240
160
|
};
|
package/lib/fly.js
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fly.io horizontal scaling helpers.
|
|
3
|
+
*
|
|
4
|
+
* Tab IDs encode the owning machine: "{machineId}_{uuid}"
|
|
5
|
+
* Requests for tabs on other machines get replayed via fly-replay header.
|
|
6
|
+
*
|
|
7
|
+
* When not running on Fly (no FLY_MACHINE_ID), all helpers are no-ops:
|
|
8
|
+
* makeTabId() returns a plain UUID and isLocalTab() always returns true.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import crypto from 'crypto';
|
|
12
|
+
|
|
13
|
+
export function createFlyHelpers(config) {
|
|
14
|
+
const machineId = config.flyMachineId || '';
|
|
15
|
+
|
|
16
|
+
function makeTabId() {
|
|
17
|
+
const uuid = crypto.randomUUID();
|
|
18
|
+
return machineId ? `${machineId}_${uuid}` : uuid;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function parseTabOwner(tabId) {
|
|
22
|
+
if (!machineId || !tabId) return null;
|
|
23
|
+
const idx = tabId.indexOf('_');
|
|
24
|
+
if (idx === -1) return null; // legacy tab ID (no machine prefix)
|
|
25
|
+
const candidate = tabId.slice(0, idx);
|
|
26
|
+
// Fly machine IDs are hex strings (14 chars). UUIDs start with 8 hex chars then '-'.
|
|
27
|
+
// If the candidate contains '-', it's a UUID segment, not a machine ID.
|
|
28
|
+
if (candidate.includes('-')) return null;
|
|
29
|
+
return candidate;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function isLocalTab(tabId) {
|
|
33
|
+
const owner = parseTabOwner(tabId);
|
|
34
|
+
return owner === null || owner === machineId;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Express middleware: replay requests for tabs owned by other machines.
|
|
39
|
+
* No-op when not running on Fly.
|
|
40
|
+
*/
|
|
41
|
+
function replayMiddleware(log) {
|
|
42
|
+
return (req, res, next) => {
|
|
43
|
+
if (!machineId) return next();
|
|
44
|
+
const tabId = req.params.tabId;
|
|
45
|
+
if (!tabId || isLocalTab(tabId)) return next();
|
|
46
|
+
const owner = parseTabOwner(tabId);
|
|
47
|
+
log('info', 'fly-replay', { reqId: req.reqId, tabId, owner, self: machineId });
|
|
48
|
+
res.set('fly-replay', `instance=${owner}`);
|
|
49
|
+
res.status(307).send();
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return { machineId, makeTabId, parseTabOwner, isLocalTab, replayMiddleware };
|
|
54
|
+
}
|
package/lib/images.js
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-page image extraction via Playwright page.evaluate().
|
|
3
|
+
*
|
|
4
|
+
* Separated from downloads.js to avoid OpenClaw scanner false positives
|
|
5
|
+
* (browser-side fetch inside page.evaluate + Node fs reads in same file).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { MAX_DOWNLOAD_INLINE_BYTES } from './downloads.js';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Extract image metadata (and optionally inline data) from visible <img> elements.
|
|
12
|
+
*/
|
|
13
|
+
async function extractPageImages(page, { includeData = false, maxBytes = MAX_DOWNLOAD_INLINE_BYTES, limit = 8 } = {}) {
|
|
14
|
+
return page.evaluate(
|
|
15
|
+
async ({ includeData, maxBytes, limit }) => {
|
|
16
|
+
const toDataUrl = (blob) =>
|
|
17
|
+
new Promise((resolve, reject) => {
|
|
18
|
+
const reader = new FileReader();
|
|
19
|
+
reader.onload = () => resolve(typeof reader.result === 'string' ? reader.result : '');
|
|
20
|
+
reader.onerror = () => reject(new Error('file_reader_failed'));
|
|
21
|
+
reader.readAsDataURL(blob);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
const nodes = Array.from(document.querySelectorAll('img'));
|
|
25
|
+
const seen = new Set();
|
|
26
|
+
const candidates = [];
|
|
27
|
+
|
|
28
|
+
for (const node of nodes) {
|
|
29
|
+
const src = String(node.currentSrc || node.src || node.getAttribute('src') || '').trim();
|
|
30
|
+
if (!src || seen.has(src)) continue;
|
|
31
|
+
seen.add(src);
|
|
32
|
+
candidates.push({
|
|
33
|
+
src,
|
|
34
|
+
alt: String(node.alt || '').trim(),
|
|
35
|
+
width: Number(node.naturalWidth || node.width || 0) || undefined,
|
|
36
|
+
height: Number(node.naturalHeight || node.height || 0) || undefined,
|
|
37
|
+
});
|
|
38
|
+
if (candidates.length >= limit) break;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const results = [];
|
|
42
|
+
for (const image of candidates) {
|
|
43
|
+
const entry = { src: image.src, alt: image.alt, width: image.width, height: image.height };
|
|
44
|
+
|
|
45
|
+
if (includeData) {
|
|
46
|
+
try {
|
|
47
|
+
if (image.src.startsWith('data:')) {
|
|
48
|
+
const mimeMatch = image.src.match(/^data:([^;,]+)[;,]/i);
|
|
49
|
+
const isBase64 = /;base64,/i.test(image.src);
|
|
50
|
+
const payload = image.src.slice(image.src.indexOf(',') + 1);
|
|
51
|
+
const estimatedBytes = isBase64 ? Math.floor((payload.length * 3) / 4) : payload.length;
|
|
52
|
+
entry.mimeType = mimeMatch ? mimeMatch[1] : 'application/octet-stream';
|
|
53
|
+
entry.bytes = estimatedBytes;
|
|
54
|
+
if (estimatedBytes <= maxBytes) {
|
|
55
|
+
entry.dataUrl = image.src;
|
|
56
|
+
} else {
|
|
57
|
+
entry.dataSkipped = 'max_bytes_exceeded';
|
|
58
|
+
}
|
|
59
|
+
} else {
|
|
60
|
+
const response = await fetch(image.src, { credentials: 'include' });
|
|
61
|
+
if (response.ok) {
|
|
62
|
+
const blob = await response.blob();
|
|
63
|
+
entry.mimeType = blob.type || 'application/octet-stream';
|
|
64
|
+
entry.bytes = blob.size;
|
|
65
|
+
if (blob.size <= maxBytes) {
|
|
66
|
+
entry.dataUrl = await toDataUrl(blob);
|
|
67
|
+
} else {
|
|
68
|
+
entry.dataSkipped = 'max_bytes_exceeded';
|
|
69
|
+
}
|
|
70
|
+
} else {
|
|
71
|
+
entry.fetchError = `http_${response.status}`;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
} catch (err) {
|
|
75
|
+
entry.fetchError = String(err?.message || err || 'image_fetch_failed');
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
results.push(entry);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return results;
|
|
83
|
+
},
|
|
84
|
+
{ includeData, maxBytes, limit },
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export { extractPageImages };
|