omniwire 3.3.1 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.omc/state/hud-stdin-cache.json +1 -1
- package/.omniwire-state/update-state.json +1 -1
- package/README.md +8 -6
- package/assets/banner-dark.svg +15 -13
- package/assets/banner-light.svg +15 -13
- package/dist/mcp/server.js +165 -45
- package/dist/mcp/server.js.map +1 -1
- package/dist/update.d.ts +9 -0
- package/dist/update.js +26 -0
- package/dist/update.js.map +1 -1
- package/package.json +2 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"session_id":"8ef02123-7368-447d-82e3-ee14a27328b0","transcript_path":"C:\\Users\\Admin\\.claude\\projects\\C--Users-Admin\\8ef02123-7368-447d-82e3-ee14a27328b0.jsonl","cwd":"C:\\Users\\Admin\\omniwire","model":{"id":"claude-opus-4-6[1m]","display_name":"Opus 4.6 (1M context)"},"workspace":{"current_dir":"C:\\Users\\Admin\\omniwire","project_dir":"C:\\Users\\Admin","added_dirs":["C:/Users/Admin"]},"version":"2.1.87","output_style":{"name":"default"},"cost":{"total_cost_usd":
|
|
1
|
+
{"session_id":"8ef02123-7368-447d-82e3-ee14a27328b0","transcript_path":"C:\\Users\\Admin\\.claude\\projects\\C--Users-Admin\\8ef02123-7368-447d-82e3-ee14a27328b0.jsonl","cwd":"C:\\Users\\Admin\\omniwire","model":{"id":"claude-opus-4-6[1m]","display_name":"Opus 4.6 (1M context)"},"workspace":{"current_dir":"C:\\Users\\Admin\\omniwire","project_dir":"C:\\Users\\Admin","added_dirs":["C:/Users/Admin"]},"version":"2.1.87","output_style":{"name":"default"},"cost":{"total_cost_usd":20.34991630000001,"total_duration_ms":3367671,"total_api_duration_ms":2589826,"total_lines_added":563,"total_lines_removed":143},"context_window":{"total_input_tokens":180627,"total_output_tokens":109433,"context_window_size":1000000,"current_usage":{"input_tokens":1,"output_tokens":277,"cache_creation_input_tokens":374,"cache_read_input_tokens":197273},"used_percentage":20,"remaining_percentage":80},"exceeds_200k_tokens":false,"rate_limits":{"five_hour":{"used_percentage":3,"resets_at":1774846800},"seven_day":{"used_percentage":41,"resets_at":1775206800}}}
|
package/README.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
<p align="center">
|
|
10
10
|
<a href="https://www.npmjs.com/package/omniwire"><img src="https://img.shields.io/npm/v/omniwire?style=for-the-badge&logo=npm&color=CB3837&labelColor=0D1117" alt="npm" /></a>
|
|
11
|
-
<img src="https://img.shields.io/badge/MCP_Tools-
|
|
11
|
+
<img src="https://img.shields.io/badge/MCP_Tools-88-59C2FF?style=for-the-badge&labelColor=0D1117" alt="tools" />
|
|
12
12
|
<img src="https://img.shields.io/badge/A2A-Protocol-00C853?style=for-the-badge&labelColor=0D1117" alt="A2A" />
|
|
13
13
|
<img src="https://img.shields.io/badge/Latency-~80ms-FF6D00?style=for-the-badge&labelColor=0D1117" alt="latency" />
|
|
14
14
|
<img src="https://img.shields.io/badge/CyberBase-Sync-CC93E6?style=for-the-badge&labelColor=0D1117" alt="cyberbase" />
|
|
@@ -214,7 +214,7 @@ graph TB
|
|
|
214
214
|
direction TB
|
|
215
215
|
MCP["MCP Protocol Layer<br/>stdio | SSE | REST"]
|
|
216
216
|
|
|
217
|
-
subgraph tools["
|
|
217
|
+
subgraph tools["88 Tools"]
|
|
218
218
|
direction LR
|
|
219
219
|
EXEC["Execution<br/>exec run batch<br/>broadcast pipeline bg"]
|
|
220
220
|
AGENT["Agentic<br/>store watch task<br/>a2a events locks"]
|
|
@@ -348,7 +348,7 @@ watch(assert="ready") poll until
|
|
|
348
348
|
|
|
349
349
|
---
|
|
350
350
|
|
|
351
|
-
## All
|
|
351
|
+
## All 88 Tools
|
|
352
352
|
|
|
353
353
|
> **Every tool** supports `background: true` — returns a task ID immediately. Poll with `omniwire_bg`.
|
|
354
354
|
|
|
@@ -632,7 +632,7 @@ Create `~/.omniwire/mesh.json`:
|
|
|
632
632
|
<details>
|
|
633
633
|
<summary><b>v2.5.1 -- Universal Background Dispatch</b></summary>
|
|
634
634
|
|
|
635
|
-
**`background: true`** auto-injected into all
|
|
635
|
+
**`background: true`** auto-injected into all 88 tools via server-level wrapper. Returns task ID, poll with `omniwire_bg`. New `omniwire_bg` tool for list/poll/result.
|
|
636
636
|
|
|
637
637
|
</details>
|
|
638
638
|
|
|
@@ -671,7 +671,7 @@ Security fixes, multi-path SSH failover, CyberBase integration, VaultBridge Obsi
|
|
|
671
671
|
```
|
|
672
672
|
omniwire/
|
|
673
673
|
src/
|
|
674
|
-
mcp/ MCP server (
|
|
674
|
+
mcp/ MCP server (88 tools, 3 transports)
|
|
675
675
|
nodes/ SSH2 pool, transfer engine, PTY, tunnels
|
|
676
676
|
sync/ CyberSync + CyberBase (PostgreSQL, Obsidian, encryption)
|
|
677
677
|
protocol/ Mesh config, types, path parsing
|
|
@@ -687,7 +687,9 @@ omniwire/
|
|
|
687
687
|
|
|
688
688
|
| Version | Date | Changes |
|
|
689
689
|
|---------|------|---------|
|
|
690
|
-
| **v3.
|
|
690
|
+
| **v3.4.1** | 2026-03-30 | Cross-OS: `omniwire_scrape` install works on Linux (systemd), macOS (launchd), Windows, Docker (nohup). Auto-upgrades deps + browsers. Python/pip path detection. |
|
|
691
|
+
| **v3.4.0** | 2026-03-30 | Rewrite: `omniwire_scrape` — OmniMesh-routed Scrapling with auto-install, VPN routing, adaptive selectors, XPath, bulk sessions. install/status actions. Full README audit (88 tools). |
|
|
692
|
+
| **v3.3.1** | 2026-03-30 | New: `omniwire_scrape` tool — Scrapling-powered web scraping (static/browser/stealth modes, Cloudflare bypass, TLS spoofing). |
|
|
691
693
|
| **v3.3.0** | 2026-03-30 | New: `omniwire_coc` tool — unified CyberBase + Obsidian + Canvas sync. Auto-creates vault + canvas. `mirror-db` exports entire DB as .md. Configurable vault via `OMNIWIRE_VAULT_ROOT` env. |
|
|
692
694
|
| **v3.2.2** | 2026-03-30 | Fix: sync GitHub/npm metadata — badge, description, mermaid diagram all reflect 86 tools |
|
|
693
695
|
| **v3.2.1** | 2026-03-30 | New: 5 bi-directional sync tools (`omniwire_sync`, `omniwire_sync_rules`, `omniwire_sync_hooks`, `omniwire_sync_memory`, `omniwire_sync_agents`) — 86 tools total |
|
package/assets/banner-dark.svg
CHANGED
|
@@ -61,23 +61,25 @@
|
|
|
61
61
|
|
|
62
62
|
<!-- Stats bar -->
|
|
63
63
|
<g font-family="'Segoe UI Mono', 'SF Mono', monospace" font-size="12" fill="#59C2FF" opacity="0.7">
|
|
64
|
-
<text x="
|
|
65
|
-
<text x="
|
|
66
|
-
<text x="
|
|
67
|
-
<text x="
|
|
68
|
-
<text x="
|
|
69
|
-
<text x="
|
|
70
|
-
<text x="
|
|
64
|
+
<text x="100" y="175" text-anchor="middle">88 MCP Tools</text>
|
|
65
|
+
<text x="225" y="175" text-anchor="middle">A2A Protocol</text>
|
|
66
|
+
<text x="338" y="175" text-anchor="middle">OmniMesh</text>
|
|
67
|
+
<text x="440" y="175" text-anchor="middle">COC Sync</text>
|
|
68
|
+
<text x="545" y="175" text-anchor="middle">Scrapling</text>
|
|
69
|
+
<text x="650" y="175" text-anchor="middle">CyberBase</text>
|
|
70
|
+
<text x="750" y="175" text-anchor="middle">~80ms</text>
|
|
71
|
+
<text x="830" y="175" text-anchor="middle">v3.3</text>
|
|
71
72
|
</g>
|
|
72
73
|
|
|
73
74
|
<!-- Separator dots between stats -->
|
|
74
75
|
<g fill="#59C2FF" opacity="0.3">
|
|
75
|
-
<circle cx="
|
|
76
|
-
<circle cx="
|
|
77
|
-
<circle cx="
|
|
78
|
-
<circle cx="
|
|
79
|
-
<circle cx="
|
|
80
|
-
<circle cx="
|
|
76
|
+
<circle cx="163" cy="172" r="1.5"/>
|
|
77
|
+
<circle cx="282" cy="172" r="1.5"/>
|
|
78
|
+
<circle cx="389" cy="172" r="1.5"/>
|
|
79
|
+
<circle cx="493" cy="172" r="1.5"/>
|
|
80
|
+
<circle cx="598" cy="172" r="1.5"/>
|
|
81
|
+
<circle cx="700" cy="172" r="1.5"/>
|
|
82
|
+
<circle cx="790" cy="172" r="1.5"/>
|
|
81
83
|
</g>
|
|
82
84
|
|
|
83
85
|
<!-- Bottom wave -->
|
package/assets/banner-light.svg
CHANGED
|
@@ -54,23 +54,25 @@
|
|
|
54
54
|
|
|
55
55
|
<!-- Stats bar -->
|
|
56
56
|
<g font-family="'Segoe UI Mono', 'SF Mono', monospace" font-size="12" fill="#1A3A5C" opacity="0.6">
|
|
57
|
-
<text x="
|
|
58
|
-
<text x="
|
|
59
|
-
<text x="
|
|
60
|
-
<text x="
|
|
61
|
-
<text x="
|
|
62
|
-
<text x="
|
|
63
|
-
<text x="
|
|
57
|
+
<text x="100" y="175" text-anchor="middle">88 MCP Tools</text>
|
|
58
|
+
<text x="225" y="175" text-anchor="middle">A2A Protocol</text>
|
|
59
|
+
<text x="338" y="175" text-anchor="middle">OmniMesh</text>
|
|
60
|
+
<text x="440" y="175" text-anchor="middle">COC Sync</text>
|
|
61
|
+
<text x="545" y="175" text-anchor="middle">Scrapling</text>
|
|
62
|
+
<text x="650" y="175" text-anchor="middle">CyberBase</text>
|
|
63
|
+
<text x="750" y="175" text-anchor="middle">~80ms</text>
|
|
64
|
+
<text x="830" y="175" text-anchor="middle">v3.3</text>
|
|
64
65
|
</g>
|
|
65
66
|
|
|
66
67
|
<!-- Separator dots between stats -->
|
|
67
68
|
<g fill="#1A3A5C" opacity="0.25">
|
|
68
|
-
<circle cx="
|
|
69
|
-
<circle cx="
|
|
70
|
-
<circle cx="
|
|
71
|
-
<circle cx="
|
|
72
|
-
<circle cx="
|
|
73
|
-
<circle cx="
|
|
69
|
+
<circle cx="163" cy="172" r="1.5"/>
|
|
70
|
+
<circle cx="282" cy="172" r="1.5"/>
|
|
71
|
+
<circle cx="389" cy="172" r="1.5"/>
|
|
72
|
+
<circle cx="493" cy="172" r="1.5"/>
|
|
73
|
+
<circle cx="598" cy="172" r="1.5"/>
|
|
74
|
+
<circle cx="700" cy="172" r="1.5"/>
|
|
75
|
+
<circle cx="790" cy="172" r="1.5"/>
|
|
74
76
|
</g>
|
|
75
77
|
|
|
76
78
|
<!-- Bottom wave -->
|
package/dist/mcp/server.js
CHANGED
|
@@ -4146,29 +4146,116 @@ echo "port-knock configured: ${ports.join(' -> ')} -> port ${target}"`;
|
|
|
4146
4146
|
return fail('invalid action');
|
|
4147
4147
|
});
|
|
4148
4148
|
// --- Tool: omniwire_scrape ---
|
|
4149
|
-
// Scrapling-powered web scraping
|
|
4150
|
-
//
|
|
4151
|
-
|
|
4152
|
-
|
|
4149
|
+
// Scrapling-powered web scraping routed through the OmniMesh WireGuard/Tailscale network.
|
|
4150
|
+
// Auto-installs Scrapling on target node if missing. Supports VPN routing for anonymity.
|
|
4151
|
+
// MCP server runs on Contabo:8931 (systemd), Python CLI fallback on any node.
|
|
4152
|
+
server.tool('omniwire_scrape', 'Scrape web pages using Scrapling via OmniMesh. Modes: http (TLS-spoofed, ~200ms), browser (Playwright JS rendering), stealth (Camoufox + Cloudflare Turnstile bypass). Auto-installs on target node if missing. Routes through WireGuard mesh. Supports VPN routing (via_vpn), bulk URLs with session pooling, CSS/XPath selectors, adaptive self-healing selectors. Actions: scrape (default), install, status.', {
|
|
4153
|
+
action: z.enum(['scrape', 'install', 'status']).default('scrape').describe('scrape=fetch pages, install=setup Scrapling on node, status=check Scrapling health on node'),
|
|
4154
|
+
url: z.string().optional().describe('Target URL to scrape'),
|
|
4153
4155
|
urls: z.array(z.string()).optional().describe('Multiple URLs for bulk scraping (uses session pooling)'),
|
|
4154
|
-
mode: z.enum(['http', 'browser', 'stealth']).default('http').describe('http=fast
|
|
4156
|
+
mode: z.enum(['http', 'browser', 'stealth']).default('http').describe('http=fast TLS-spoofed, browser=Playwright JS, stealth=Camoufox+CF bypass'),
|
|
4155
4157
|
extraction_type: z.enum(['markdown', 'html', 'text']).default('markdown').describe('Output format'),
|
|
4156
|
-
css_selector: z.string().optional().describe('CSS selector to extract specific elements
|
|
4157
|
-
|
|
4158
|
-
|
|
4158
|
+
css_selector: z.string().optional().describe('CSS selector to extract specific elements'),
|
|
4159
|
+
xpath: z.string().optional().describe('XPath selector (alternative to css_selector)'),
|
|
4160
|
+
solve_cloudflare: z.boolean().optional().describe('Solve Cloudflare Turnstile (stealth mode)'),
|
|
4161
|
+
wait_selector: z.string().optional().describe('Wait for CSS selector before extracting (browser/stealth)'),
|
|
4159
4162
|
network_idle: z.boolean().optional().describe('Wait for network idle before extracting'),
|
|
4160
4163
|
proxy: z.string().optional().describe('Proxy URL (http://user:pass@host:port)'),
|
|
4164
|
+
via_vpn: z.string().optional().describe('Route through VPN: "mullvad", "mullvad:se", "wg:wg-vpn"'),
|
|
4161
4165
|
timeout: z.number().default(30).describe('Timeout in seconds'),
|
|
4162
|
-
impersonate: z.string().
|
|
4163
|
-
|
|
4166
|
+
impersonate: z.string().default('chrome').describe('TLS fingerprint: chrome, safari, firefox (http mode)'),
|
|
4167
|
+
adaptive: z.boolean().optional().describe('Enable adaptive self-healing selectors (stores element signatures)'),
|
|
4168
|
+
disable_resources: z.array(z.string()).optional().describe('Block resource types: image, font, stylesheet, script'),
|
|
4169
|
+
node: z.string().optional().describe('Node to run on (default: auto-selects best available)'),
|
|
4164
4170
|
label: z.string().optional().describe('Short label for task tracking'),
|
|
4165
|
-
}, async ({ url, urls, mode, extraction_type, css_selector, solve_cloudflare, wait_selector, network_idle, proxy, timeout, impersonate, node: targetNode, label }) => {
|
|
4171
|
+
}, async ({ action, url, urls, mode, extraction_type, css_selector, xpath, solve_cloudflare, wait_selector, network_idle, proxy, via_vpn, timeout, impersonate, adaptive, disable_resources, node: targetNode, label }) => {
|
|
4166
4172
|
if (!manager)
|
|
4167
4173
|
return fail('NodeManager not initialized');
|
|
4174
|
+
// Auto-select best node: prefer contabo (has Scrapling + browsers installed)
|
|
4168
4175
|
const target = targetNode ?? 'contabo';
|
|
4169
|
-
//
|
|
4176
|
+
// --- Action: install ---
|
|
4177
|
+
if (action === 'install') {
|
|
4178
|
+
// Detect target OS for cross-platform install
|
|
4179
|
+
const targetOs = remoteNodes().find(n => n.id === target)?.os ?? 'linux';
|
|
4180
|
+
const pyCmd = targetOs === 'windows' ? 'python' : 'python3';
|
|
4181
|
+
const pipCmd = targetOs === 'windows' ? 'pip' : 'pip3';
|
|
4182
|
+
const installScript = targetOs === 'windows'
|
|
4183
|
+
? `${pipCmd} install --upgrade "scrapling[all]" 2>&1 | Select-Object -Last 3; scrapling install 2>&1 | Select-Object -Last 3; ${pyCmd} -c "import scrapling; print('scrapling', scrapling.__version__)" 2>&1`
|
|
4184
|
+
: `
|
|
4185
|
+
# Ensure Python 3.10+ and pip are available
|
|
4186
|
+
command -v ${pyCmd} &>/dev/null || { echo "ERROR: ${pyCmd} not found — install Python 3.10+"; exit 1; }
|
|
4187
|
+
command -v ${pipCmd} &>/dev/null || { ${pyCmd} -m ensurepip --upgrade 2>&1 | tail -1; }
|
|
4188
|
+
# Install/upgrade Scrapling and all dependencies
|
|
4189
|
+
${pipCmd} install --upgrade "scrapling[all]" 2>&1 | tail -3
|
|
4190
|
+
# Download/update Playwright + Camoufox browsers
|
|
4191
|
+
scrapling install 2>&1 | tail -3
|
|
4192
|
+
${pyCmd} -c "import scrapling; print('scrapling', scrapling.__version__)" 2>&1
|
|
4193
|
+
# Set up systemd service if available
|
|
4194
|
+
if command -v systemctl &>/dev/null; then
|
|
4195
|
+
if [ ! -f /etc/systemd/system/scrapling-mcp.service ]; then
|
|
4196
|
+
cat > /etc/systemd/system/scrapling-mcp.service << 'UNIT'
|
|
4197
|
+
[Unit]
|
|
4198
|
+
Description=Scrapling MCP HTTP Server
|
|
4199
|
+
After=network.target
|
|
4200
|
+
[Service]
|
|
4201
|
+
Type=simple
|
|
4202
|
+
ExecStart=/usr/local/bin/scrapling mcp --http --port 8931
|
|
4203
|
+
Restart=always
|
|
4204
|
+
RestartSec=5
|
|
4205
|
+
Environment=HOME=/root
|
|
4206
|
+
[Install]
|
|
4207
|
+
WantedBy=multi-user.target
|
|
4208
|
+
UNIT
|
|
4209
|
+
systemctl daemon-reload
|
|
4210
|
+
systemctl enable scrapling-mcp
|
|
4211
|
+
systemctl start scrapling-mcp
|
|
4212
|
+
echo "systemd service created and started"
|
|
4213
|
+
else
|
|
4214
|
+
systemctl restart scrapling-mcp
|
|
4215
|
+
echo "systemd service restarted"
|
|
4216
|
+
fi
|
|
4217
|
+
elif command -v launchctl &>/dev/null; then
|
|
4218
|
+
# macOS: create launchd plist
|
|
4219
|
+
PLIST="/Library/LaunchDaemons/com.scrapling.mcp.plist"
|
|
4220
|
+
if [ ! -f "$PLIST" ]; then
|
|
4221
|
+
cat > "$PLIST" << 'PLIST'
|
|
4222
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
4223
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
4224
|
+
<plist version="1.0"><dict>
|
|
4225
|
+
<key>Label</key><string>com.scrapling.mcp</string>
|
|
4226
|
+
<key>ProgramArguments</key><array><string>/usr/local/bin/scrapling</string><string>mcp</string><string>--http</string><string>--port</string><string>8931</string></array>
|
|
4227
|
+
<key>RunAtLoad</key><true/>
|
|
4228
|
+
<key>KeepAlive</key><true/>
|
|
4229
|
+
</dict></plist>
|
|
4230
|
+
PLIST
|
|
4231
|
+
launchctl load "$PLIST"
|
|
4232
|
+
echo "launchd service created and loaded"
|
|
4233
|
+
else
|
|
4234
|
+
launchctl unload "$PLIST" 2>/dev/null; launchctl load "$PLIST"
|
|
4235
|
+
echo "launchd service reloaded"
|
|
4236
|
+
fi
|
|
4237
|
+
else
|
|
4238
|
+
# Fallback: run in background with nohup
|
|
4239
|
+
nohup scrapling mcp --http --port 8931 &>/tmp/scrapling-mcp.log &
|
|
4240
|
+
echo "started in background (no service manager)"
|
|
4241
|
+
fi`.trim();
|
|
4242
|
+
const r = await manager.exec(target, installScript);
|
|
4243
|
+
return okBrief(`Scrapling install on ${target}:\n${r.stdout.trim()}`);
|
|
4244
|
+
}
|
|
4245
|
+
// --- Action: status ---
|
|
4246
|
+
if (action === 'status') {
|
|
4247
|
+
const targetOs = remoteNodes().find(n => n.id === target)?.os ?? 'linux';
|
|
4248
|
+
const pyCmd = targetOs === 'windows' ? 'python' : 'python3';
|
|
4249
|
+
const statusScript = targetOs === 'windows'
|
|
4250
|
+
? `${pyCmd} -c "import scrapling; print('version:', scrapling.__version__)" 2>&1; curl -s --connect-timeout 2 http://localhost:8931/ 2>&1 | head -1`
|
|
4251
|
+
: `${pyCmd} -c "import scrapling; print('version:', scrapling.__version__)" 2>&1; systemctl is-active scrapling-mcp 2>/dev/null || (launchctl list com.scrapling.mcp 2>/dev/null && echo "launchd") || echo "no service manager"; curl -s --connect-timeout 2 http://localhost:8931/ 2>&1 | head -1 || echo "MCP server not reachable"`;
|
|
4252
|
+
const r = await manager.exec(target, statusScript);
|
|
4253
|
+
return okBrief(`Scrapling on ${target}:\n${r.stdout.trim()}`);
|
|
4254
|
+
}
|
|
4255
|
+
// --- Action: scrape ---
|
|
4256
|
+
if (!url && !urls?.length)
|
|
4257
|
+
return fail('url or urls required for scrape action');
|
|
4170
4258
|
const allUrls = urls?.length ? urls : [url];
|
|
4171
|
-
const urlList = allUrls.map(u => `'${u.replace(/'/g, "'\\''")}'`).join(' ');
|
|
4172
4259
|
// Map mode to Scrapling fetcher
|
|
4173
4260
|
const fetcherMap = {
|
|
4174
4261
|
http: 'Fetcher',
|
|
@@ -4176,45 +4263,78 @@ echo "port-knock configured: ${ports.join(' -> ')} -> port ${target}"`;
|
|
|
4176
4263
|
stealth: 'StealthyFetcher',
|
|
4177
4264
|
};
|
|
4178
4265
|
const fetcher = fetcherMap[mode] ?? 'Fetcher';
|
|
4179
|
-
|
|
4180
|
-
const
|
|
4181
|
-
|
|
4182
|
-
const
|
|
4183
|
-
|
|
4184
|
-
|
|
4185
|
-
|
|
4186
|
-
|
|
4187
|
-
|
|
4188
|
-
|
|
4189
|
-
|
|
4190
|
-
|
|
4191
|
-
|
|
4192
|
-
|
|
4193
|
-
|
|
4266
|
+
const isSession = allUrls.length > 1;
|
|
4267
|
+
const sessionClass = isSession ? { http: 'FetcherSession', browser: 'AsyncDynamicFetcher', stealth: 'AsyncStealthyFetcher' }[mode] ?? 'FetcherSession' : '';
|
|
4268
|
+
// Build Python kwargs
|
|
4269
|
+
const kwargs = [];
|
|
4270
|
+
if (proxy)
|
|
4271
|
+
kwargs.push(`proxy='${proxy.replace(/'/g, "'\\''")}'`);
|
|
4272
|
+
if (impersonate && mode === 'http')
|
|
4273
|
+
kwargs.push(`impersonate='${impersonate}'`);
|
|
4274
|
+
if (timeout)
|
|
4275
|
+
kwargs.push(`timeout=${timeout}`);
|
|
4276
|
+
if (solve_cloudflare)
|
|
4277
|
+
kwargs.push('solve_cloudflare=True');
|
|
4278
|
+
if (wait_selector)
|
|
4279
|
+
kwargs.push(`wait_selector='${wait_selector.replace(/'/g, "'\\''")}'`);
|
|
4280
|
+
if (network_idle)
|
|
4281
|
+
kwargs.push('network_idle=True');
|
|
4282
|
+
if (disable_resources?.length)
|
|
4283
|
+
kwargs.push(`disable_resources=${JSON.stringify(disable_resources)}`);
|
|
4284
|
+
const kwargsStr = kwargs.length ? ', ' + kwargs.join(', ') : '';
|
|
4285
|
+
// Build selector chain
|
|
4286
|
+
let selectorChain = '';
|
|
4287
|
+
if (css_selector) {
|
|
4288
|
+
selectorChain = adaptive
|
|
4289
|
+
? `.css('${css_selector.replace(/'/g, "\\'")}', adaptive=True, auto_save=True)`
|
|
4290
|
+
: `.css('${css_selector.replace(/'/g, "\\'")}')`;
|
|
4291
|
+
}
|
|
4292
|
+
else if (xpath) {
|
|
4293
|
+
selectorChain = `.xpath('${xpath.replace(/'/g, "\\'")}')`;
|
|
4294
|
+
}
|
|
4295
|
+
// Build extraction
|
|
4296
|
+
const extractExpr = selectorChain
|
|
4297
|
+
? `${selectorChain}.getall()`
|
|
4298
|
+
: extraction_type === 'html'
|
|
4299
|
+
? '.body.decode("utf-8", errors="replace") if hasattr(page, "body") else str(page)'
|
|
4300
|
+
: '.get_all_text()';
|
|
4301
|
+
// Auto-install check: try import, install if missing
|
|
4302
|
+
const autoInstall = `
|
|
4303
|
+
try:
|
|
4304
|
+
from scrapling import ${fetcher}${isSession && sessionClass ? ', ' + sessionClass : ''}
|
|
4305
|
+
except ImportError:
|
|
4306
|
+
import subprocess, sys
|
|
4307
|
+
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'scrapling[all]', '-q'])
|
|
4308
|
+
subprocess.check_call(['scrapling', 'install'])
|
|
4309
|
+
from scrapling import ${fetcher}${isSession && sessionClass ? ', ' + sessionClass : ''}`;
|
|
4194
4310
|
const script = `
|
|
4195
4311
|
import json, sys
|
|
4312
|
+
${autoInstall}
|
|
4313
|
+
results = []
|
|
4314
|
+
urls = ${JSON.stringify(allUrls)}
|
|
4196
4315
|
try:
|
|
4197
|
-
|
|
4198
|
-
results = []
|
|
4199
|
-
urls = ${JSON.stringify(allUrls)}
|
|
4316
|
+
fetcher = ${fetcher}()
|
|
4200
4317
|
for u in urls:
|
|
4201
4318
|
try:
|
|
4202
|
-
page =
|
|
4203
|
-
|
|
4204
|
-
|
|
4205
|
-
|
|
4206
|
-
|
|
4207
|
-
results.append({"url": u, "status": page.status, "content": str(content)[:50000]})
|
|
4208
|
-
else:
|
|
4209
|
-
results.append({"url": u, "status": page.status, "content": f"HTTP {page.status}"})
|
|
4319
|
+
page = fetcher.get(u${kwargsStr})
|
|
4320
|
+
content = page${extractExpr}
|
|
4321
|
+
if isinstance(content, list):
|
|
4322
|
+
content = '\\n'.join(str(c) for c in content[:200])
|
|
4323
|
+
results.append({"url": u, "status": getattr(page, 'status', 200), "content": str(content)[:50000], "size": len(str(content))})
|
|
4210
4324
|
except Exception as e:
|
|
4211
4325
|
results.append({"url": u, "status": 0, "error": str(e)[:500]})
|
|
4212
|
-
print(json.dumps(results))
|
|
4213
4326
|
except Exception as e:
|
|
4214
|
-
|
|
4327
|
+
results.append({"error": f"init failed: {e}"})
|
|
4328
|
+
print(json.dumps(results))
|
|
4215
4329
|
`.trim();
|
|
4216
4330
|
try {
|
|
4217
|
-
|
|
4331
|
+
// Route through VPN if requested, otherwise direct exec via WireGuard mesh
|
|
4332
|
+
const targetOs = remoteNodes().find(n => n.id === target)?.os ?? 'linux';
|
|
4333
|
+
const pyCmd = targetOs === 'windows' ? 'python' : 'python3';
|
|
4334
|
+
let execCmd = `${pyCmd} -c ${JSON.stringify(script)}`;
|
|
4335
|
+
if (via_vpn)
|
|
4336
|
+
execCmd = buildVpnWrappedCmd(via_vpn, execCmd);
|
|
4337
|
+
const r = await manager.exec(target, execCmd);
|
|
4218
4338
|
const output = r.stdout.trim();
|
|
4219
4339
|
try {
|
|
4220
4340
|
const results = JSON.parse(output);
|
|
@@ -4222,10 +4342,10 @@ except Exception as e:
|
|
|
4222
4342
|
const res = results[0];
|
|
4223
4343
|
if (res.error)
|
|
4224
4344
|
return fail(`scrape error: ${res.error}`);
|
|
4225
|
-
return okBrief(`[${res.status}] ${res.url}\n\n${res.content}`);
|
|
4345
|
+
return okBrief(`[${res.status}] ${res.url} (${res.size ?? 0} chars, ${mode})\n\n${res.content}`);
|
|
4226
4346
|
}
|
|
4227
|
-
const summary = results.map(
|
|
4228
|
-
return okBrief(`Scraped ${results.length} URLs:\n${summary}\n\n${results.map(
|
|
4347
|
+
const summary = results.map(r => `[${r.status ?? 'ERR'}] ${r.url ?? '?'}: ${r.error ?? `${r.size ?? 0} chars`}`).join('\n');
|
|
4348
|
+
return okBrief(`Scraped ${results.length} URLs (${mode}):\n${summary}\n\n${results.map(r => r.content ?? '').join('\n---\n').slice(0, 50000)}`);
|
|
4229
4349
|
}
|
|
4230
4350
|
catch {
|
|
4231
4351
|
return okBrief(output.slice(0, 10000));
|