stealth-cli 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +295 -0
- package/bin/stealth.js +50 -0
- package/package.json +65 -0
- package/skills/SKILL.md +244 -0
- package/src/browser.js +341 -0
- package/src/client.js +115 -0
- package/src/commands/batch.js +180 -0
- package/src/commands/browse.js +101 -0
- package/src/commands/config.js +85 -0
- package/src/commands/crawl.js +169 -0
- package/src/commands/daemon.js +143 -0
- package/src/commands/extract.js +153 -0
- package/src/commands/fingerprint.js +306 -0
- package/src/commands/interactive.js +284 -0
- package/src/commands/mcp.js +68 -0
- package/src/commands/monitor.js +160 -0
- package/src/commands/pdf.js +109 -0
- package/src/commands/profile.js +112 -0
- package/src/commands/proxy.js +116 -0
- package/src/commands/screenshot.js +96 -0
- package/src/commands/search.js +162 -0
- package/src/commands/serve.js +240 -0
- package/src/config.js +123 -0
- package/src/cookies.js +67 -0
- package/src/daemon-entry.js +19 -0
- package/src/daemon.js +294 -0
- package/src/errors.js +136 -0
- package/src/extractors/base.js +59 -0
- package/src/extractors/bing.js +47 -0
- package/src/extractors/duckduckgo.js +91 -0
- package/src/extractors/github.js +103 -0
- package/src/extractors/google.js +173 -0
- package/src/extractors/index.js +55 -0
- package/src/extractors/youtube.js +87 -0
- package/src/humanize.js +210 -0
- package/src/index.js +32 -0
- package/src/macros.js +36 -0
- package/src/mcp-server.js +341 -0
- package/src/output.js +65 -0
- package/src/profiles.js +308 -0
- package/src/proxy-pool.js +256 -0
- package/src/retry.js +112 -0
- package/src/session.js +159 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* stealth browse <url> - Visit a URL and print page content
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import ora from 'ora';
|
|
6
|
+
import {
|
|
7
|
+
launchBrowser, closeBrowser, navigate, getSnapshot,
|
|
8
|
+
getTextContent, getTitle, getUrl, evaluate, waitForReady,
|
|
9
|
+
} from '../browser.js';
|
|
10
|
+
import { formatOutput, log } from '../output.js';
|
|
11
|
+
|
|
12
|
+
export function registerBrowse(program) {
|
|
13
|
+
program
|
|
14
|
+
.command('browse')
|
|
15
|
+
.description('Visit a URL and print page content')
|
|
16
|
+
.argument('<url>', 'URL to visit')
|
|
17
|
+
.option('-f, --format <format>', 'Output format: text, json, markdown, snapshot', 'text')
|
|
18
|
+
.option('-w, --wait <ms>', 'Wait time after page load (ms)', '2000')
|
|
19
|
+
.option('--proxy <proxy>', 'Proxy server (http://user:pass@host:port)')
|
|
20
|
+
.option('--cookies <file>', 'Load cookies from Netscape-format file')
|
|
21
|
+
.option('--no-headless', 'Show browser window')
|
|
22
|
+
.option('--locale <locale>', 'Browser locale', 'en-US')
|
|
23
|
+
.option('--user-agent', 'Print the browser user-agent')
|
|
24
|
+
.option('--humanize', 'Enable human behavior simulation')
|
|
25
|
+
.option('--retries <n>', 'Max retries on failure', '2')
|
|
26
|
+
.option('--profile <name>', 'Use a browser profile')
|
|
27
|
+
.option('--session <name>', 'Use/restore a named session')
|
|
28
|
+
.option('--proxy-rotate', 'Rotate proxy from pool')
|
|
29
|
+
.action(async (url, opts) => {
|
|
30
|
+
const spinner = ora('Launching stealth browser...').start();
|
|
31
|
+
let handle;
|
|
32
|
+
|
|
33
|
+
try {
|
|
34
|
+
handle = await launchBrowser({
|
|
35
|
+
headless: opts.headless,
|
|
36
|
+
proxy: opts.proxy,
|
|
37
|
+
proxyRotate: opts.proxyRotate,
|
|
38
|
+
profile: opts.profile,
|
|
39
|
+
session: opts.session,
|
|
40
|
+
locale: opts.locale,
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
// Load cookies if provided (direct mode only)
|
|
44
|
+
if (opts.cookies && !handle.isDaemon) {
|
|
45
|
+
const { loadCookies } = await import('../cookies.js');
|
|
46
|
+
const result = await loadCookies(handle.context, opts.cookies);
|
|
47
|
+
spinner.text = `Loaded ${result.count} cookies`;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
spinner.text = `Navigating to ${url}...`;
|
|
51
|
+
await navigate(handle, url, {
|
|
52
|
+
humanize: opts.humanize,
|
|
53
|
+
retries: parseInt(opts.retries),
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
if (!handle.isDaemon) {
|
|
57
|
+
await waitForReady(handle.page, { timeout: parseInt(opts.wait) });
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
spinner.stop();
|
|
61
|
+
|
|
62
|
+
// Print user-agent if requested
|
|
63
|
+
if (opts.userAgent) {
|
|
64
|
+
const ua = await evaluate(handle, 'navigator.userAgent');
|
|
65
|
+
log.info(`User-Agent: ${ua}`);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Get page content based on format
|
|
69
|
+
let output;
|
|
70
|
+
|
|
71
|
+
if (opts.format === 'snapshot') {
|
|
72
|
+
output = await getSnapshot(handle);
|
|
73
|
+
} else if (opts.format === 'json') {
|
|
74
|
+
const title = await getTitle(handle);
|
|
75
|
+
const currentUrl = await getUrl(handle);
|
|
76
|
+
const ua = await evaluate(handle, 'navigator.userAgent');
|
|
77
|
+
const text = await getTextContent(handle);
|
|
78
|
+
output = formatOutput({
|
|
79
|
+
url: currentUrl,
|
|
80
|
+
title,
|
|
81
|
+
userAgent: ua,
|
|
82
|
+
content: text.slice(0, 10000),
|
|
83
|
+
timestamp: new Date().toISOString(),
|
|
84
|
+
}, 'json');
|
|
85
|
+
} else {
|
|
86
|
+
output = await getTextContent(handle);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
console.log(output);
|
|
90
|
+
|
|
91
|
+
const currentUrl = await getUrl(handle);
|
|
92
|
+
log.success(`Done: ${currentUrl}`);
|
|
93
|
+
} catch (err) {
|
|
94
|
+
spinner.stop();
|
|
95
|
+
log.error(`Browse failed: ${err.message}`);
|
|
96
|
+
process.exit(1);
|
|
97
|
+
} finally {
|
|
98
|
+
if (handle) await closeBrowser(handle);
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* stealth config - Manage global configuration
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import chalk from 'chalk';
|
|
6
|
+
import {
|
|
7
|
+
getConfigValue, setConfigValue, deleteConfigValue,
|
|
8
|
+
listConfig, resetConfig, CONFIG_FILE,
|
|
9
|
+
} from '../config.js';
|
|
10
|
+
import { log } from '../output.js';
|
|
11
|
+
|
|
12
|
+
export function registerConfig(program) {
|
|
13
|
+
const config = program
|
|
14
|
+
.command('config')
|
|
15
|
+
.description('Manage global configuration (~/.stealth/config.json)');
|
|
16
|
+
|
|
17
|
+
// stealth config list
|
|
18
|
+
config
|
|
19
|
+
.command('list')
|
|
20
|
+
.description('Show all config values')
|
|
21
|
+
.action(() => {
|
|
22
|
+
const items = listConfig();
|
|
23
|
+
console.log(chalk.bold('\n Configuration:\n'));
|
|
24
|
+
console.log(chalk.dim(` ${'Key'.padEnd(20)} ${'Value'.padEnd(25)} Source`));
|
|
25
|
+
console.log(chalk.dim(' ' + '─'.repeat(55)));
|
|
26
|
+
|
|
27
|
+
for (const item of items) {
|
|
28
|
+
const val = item.value === null ? chalk.dim('null') : String(item.value);
|
|
29
|
+
const src = item.source === 'user' ? chalk.cyan('user') : chalk.dim('default');
|
|
30
|
+
console.log(` ${item.key.padEnd(20)} ${val.padEnd(25)} ${src}`);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
console.log(chalk.dim(`\n File: ${CONFIG_FILE}\n`));
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
// stealth config get <key>
|
|
37
|
+
config
|
|
38
|
+
.command('get')
|
|
39
|
+
.description('Get a config value')
|
|
40
|
+
.argument('<key>', 'Config key')
|
|
41
|
+
.action((key) => {
|
|
42
|
+
try {
|
|
43
|
+
const value = getConfigValue(key);
|
|
44
|
+
console.log(value);
|
|
45
|
+
} catch (err) {
|
|
46
|
+
log.error(err.message);
|
|
47
|
+
process.exit(1);
|
|
48
|
+
}
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
// stealth config set <key> <value>
|
|
52
|
+
config
|
|
53
|
+
.command('set')
|
|
54
|
+
.description('Set a config value')
|
|
55
|
+
.argument('<key>', 'Config key')
|
|
56
|
+
.argument('<value>', 'Config value')
|
|
57
|
+
.action((key, value) => {
|
|
58
|
+
try {
|
|
59
|
+
const result = setConfigValue(key, value);
|
|
60
|
+
log.success(`${key} = ${result}`);
|
|
61
|
+
} catch (err) {
|
|
62
|
+
log.error(err.message);
|
|
63
|
+
process.exit(1);
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
// stealth config delete <key>
|
|
68
|
+
config
|
|
69
|
+
.command('delete')
|
|
70
|
+
.description('Reset a config value to default')
|
|
71
|
+
.argument('<key>', 'Config key')
|
|
72
|
+
.action((key) => {
|
|
73
|
+
deleteConfigValue(key);
|
|
74
|
+
log.success(`${key} reset to default`);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// stealth config reset
|
|
78
|
+
config
|
|
79
|
+
.command('reset')
|
|
80
|
+
.description('Reset all config to defaults')
|
|
81
|
+
.action(() => {
|
|
82
|
+
resetConfig();
|
|
83
|
+
log.success('All config reset to defaults');
|
|
84
|
+
});
|
|
85
|
+
}
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* stealth crawl <url> - Crawl pages recursively with anti-detection
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import ora from 'ora';
|
|
6
|
+
import { launchBrowser, closeBrowser, getTextContent, evaluate, waitForReady } from '../browser.js';
|
|
7
|
+
import { navigateWithRetry } from '../retry.js';
|
|
8
|
+
import { randomDelay, humanScroll } from '../humanize.js';
|
|
9
|
+
import { formatOutput, log } from '../output.js';
|
|
10
|
+
|
|
11
|
+
export function registerCrawl(program) {
|
|
12
|
+
program
|
|
13
|
+
.command('crawl')
|
|
14
|
+
.description('Crawl pages recursively with anti-detection')
|
|
15
|
+
.argument('<url>', 'Starting URL')
|
|
16
|
+
.option('-d, --depth <n>', 'Maximum crawl depth', '1')
|
|
17
|
+
.option('-l, --limit <n>', 'Maximum pages to crawl', '10')
|
|
18
|
+
.option('--same-origin', 'Only follow same-origin links (default: true)', true)
|
|
19
|
+
.option('--delay <ms>', 'Delay between requests (ms)', '1000')
|
|
20
|
+
.option('-f, --format <format>', 'Output format: json, jsonl, text', 'jsonl')
|
|
21
|
+
.option('-o, --output <file>', 'Output file (default: stdout)')
|
|
22
|
+
.option('--proxy <proxy>', 'Proxy server')
|
|
23
|
+
.option('--cookies <file>', 'Load cookies from Netscape-format file')
|
|
24
|
+
.option('--no-headless', 'Show browser window')
|
|
25
|
+
.option('--include <pattern>', 'Only crawl URLs matching this pattern (regex)')
|
|
26
|
+
.option('--exclude <pattern>', 'Skip URLs matching this pattern (regex)')
|
|
27
|
+
.option('--humanize', 'Enable human behavior simulation')
|
|
28
|
+
.option('--retries <n>', 'Max retries per page', '2')
|
|
29
|
+
.option('--profile <name>', 'Use a browser profile')
|
|
30
|
+
.option('--proxy-rotate', 'Rotate proxy from pool')
|
|
31
|
+
.action(async (startUrl, opts) => {
|
|
32
|
+
const spinner = ora('Launching stealth browser...').start();
|
|
33
|
+
let handle;
|
|
34
|
+
|
|
35
|
+
const maxDepth = parseInt(opts.depth);
|
|
36
|
+
const maxPages = parseInt(opts.limit);
|
|
37
|
+
const delay = parseInt(opts.delay);
|
|
38
|
+
const maxRetries = parseInt(opts.retries);
|
|
39
|
+
const includeRegex = opts.include ? new RegExp(opts.include) : null;
|
|
40
|
+
const excludeRegex = opts.exclude ? new RegExp(opts.exclude) : null;
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
handle = await launchBrowser({
|
|
44
|
+
headless: opts.headless,
|
|
45
|
+
proxy: opts.proxy,
|
|
46
|
+
proxyRotate: opts.proxyRotate,
|
|
47
|
+
profile: opts.profile,
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// Crawl requires direct mode (page access)
|
|
51
|
+
if (handle.isDaemon) {
|
|
52
|
+
log.warn('Crawl uses direct mode (daemon does not support multi-page crawling)');
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (opts.cookies && handle.context) {
|
|
56
|
+
const { loadCookies } = await import('../cookies.js');
|
|
57
|
+
await loadCookies(handle.context, opts.cookies);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const startOrigin = new URL(startUrl).origin;
|
|
61
|
+
const visited = new Set();
|
|
62
|
+
const queue = [{ url: startUrl, depth: 0 }];
|
|
63
|
+
const results = [];
|
|
64
|
+
let outputStream;
|
|
65
|
+
|
|
66
|
+
if (opts.output) {
|
|
67
|
+
const { createWriteStream } = await import('fs');
|
|
68
|
+
outputStream = createWriteStream(opts.output);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const writeResult = (result) => {
|
|
72
|
+
const line = opts.format === 'jsonl'
|
|
73
|
+
? JSON.stringify(result)
|
|
74
|
+
: formatOutput(result, opts.format);
|
|
75
|
+
|
|
76
|
+
if (outputStream) {
|
|
77
|
+
outputStream.write(line + '\n');
|
|
78
|
+
} else {
|
|
79
|
+
console.log(line);
|
|
80
|
+
}
|
|
81
|
+
results.push(result);
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
while (queue.length > 0 && results.length < maxPages) {
|
|
85
|
+
const { url, depth } = queue.shift();
|
|
86
|
+
|
|
87
|
+
if (visited.has(url)) continue;
|
|
88
|
+
visited.add(url);
|
|
89
|
+
|
|
90
|
+
if (includeRegex && !includeRegex.test(url)) continue;
|
|
91
|
+
if (excludeRegex && excludeRegex.test(url)) continue;
|
|
92
|
+
|
|
93
|
+
spinner.text = `[${results.length + 1}/${maxPages}] Crawling: ${url.slice(0, 60)}...`;
|
|
94
|
+
|
|
95
|
+
try {
|
|
96
|
+
// Navigate with retry
|
|
97
|
+
await navigateWithRetry(handle.page, url, {
|
|
98
|
+
timeout: 30000,
|
|
99
|
+
maxRetries,
|
|
100
|
+
});
|
|
101
|
+
await waitForReady(handle.page, { timeout: 3000 });
|
|
102
|
+
|
|
103
|
+
const title = await handle.page.title().catch(() => '');
|
|
104
|
+
const text = await getTextContent(handle);
|
|
105
|
+
|
|
106
|
+
const result = {
|
|
107
|
+
url: handle.page.url(),
|
|
108
|
+
title,
|
|
109
|
+
content: text.slice(0, 5000),
|
|
110
|
+
depth,
|
|
111
|
+
timestamp: new Date().toISOString(),
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
writeResult(result);
|
|
115
|
+
|
|
116
|
+
// Extract links for next depth
|
|
117
|
+
if (depth < maxDepth) {
|
|
118
|
+
const links = await handle.page.evaluate(() => {
|
|
119
|
+
return Array.from(document.querySelectorAll('a[href]'))
|
|
120
|
+
.map((a) => a.href)
|
|
121
|
+
.filter((href) => href && href.startsWith('http'));
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
for (const link of links) {
|
|
125
|
+
if (visited.has(link)) continue;
|
|
126
|
+
|
|
127
|
+
if (opts.sameOrigin) {
|
|
128
|
+
try {
|
|
129
|
+
if (new URL(link).origin !== startOrigin) continue;
|
|
130
|
+
} catch { continue; }
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
queue.push({ url: link, depth: depth + 1 });
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Human-like delay between pages
|
|
138
|
+
if (delay > 0) {
|
|
139
|
+
if (opts.humanize) {
|
|
140
|
+
// Human mode: scroll + random delay
|
|
141
|
+
await humanScroll(handle.page, { scrolls: 1 });
|
|
142
|
+
await randomDelay(delay * 0.8, delay * 1.5);
|
|
143
|
+
} else {
|
|
144
|
+
// Standard: fixed delay + small jitter
|
|
145
|
+
const jitter = delay + Math.random() * delay * 0.3;
|
|
146
|
+
await handle.page.waitForTimeout(jitter);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
} catch (err) {
|
|
150
|
+
log.warn(`Failed to crawl ${url}: ${err.message}`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (outputStream) outputStream.end();
|
|
155
|
+
|
|
156
|
+
spinner.stop();
|
|
157
|
+
log.success(`Crawl complete: ${results.length} pages crawled`);
|
|
158
|
+
log.dim(` Start: ${startUrl}`);
|
|
159
|
+
log.dim(` Depth: ${maxDepth}, Visited: ${visited.size}`);
|
|
160
|
+
if (opts.output) log.dim(` Output: ${opts.output}`);
|
|
161
|
+
} catch (err) {
|
|
162
|
+
spinner.stop();
|
|
163
|
+
log.error(`Crawl failed: ${err.message}`);
|
|
164
|
+
process.exit(1);
|
|
165
|
+
} finally {
|
|
166
|
+
if (handle) await closeBrowser(handle);
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* stealth daemon - Manage background browser daemon
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { fork } from 'child_process';
|
|
6
|
+
import { fileURLToPath } from 'url';
|
|
7
|
+
import path from 'path';
|
|
8
|
+
import fs from 'fs';
|
|
9
|
+
import { isDaemonRunning, PID_PATH, SOCKET_PATH, STEALTH_DIR } from '../daemon.js';
|
|
10
|
+
import { daemonStatus, daemonShutdown } from '../client.js';
|
|
11
|
+
import { log } from '../output.js';
|
|
12
|
+
|
|
13
|
+
export function registerDaemon(program) {
|
|
14
|
+
const daemon = program
|
|
15
|
+
.command('daemon')
|
|
16
|
+
.description('Manage background browser daemon for instant startup');
|
|
17
|
+
|
|
18
|
+
// stealth daemon start
|
|
19
|
+
daemon
|
|
20
|
+
.command('start')
|
|
21
|
+
.description('Start background browser daemon')
|
|
22
|
+
.option('--idle-timeout <minutes>', 'Auto-shutdown after idle (minutes)', '5')
|
|
23
|
+
.option('--verbose', 'Show daemon logs in terminal')
|
|
24
|
+
.action(async (opts) => {
|
|
25
|
+
if (isDaemonRunning()) {
|
|
26
|
+
const status = await daemonStatus();
|
|
27
|
+
if (status?.ok) {
|
|
28
|
+
log.info(`Daemon already running (pid: ${status.pid}, uptime: ${status.uptime}s, memory: ${status.memoryMB}MB)`);
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const idleTimeout = parseInt(opts.idleTimeout) * 60 * 1000;
|
|
34
|
+
|
|
35
|
+
if (opts.verbose) {
|
|
36
|
+
// Run in foreground
|
|
37
|
+
log.info('Starting daemon in foreground (Ctrl+C to stop)...');
|
|
38
|
+
const { startDaemon } = await import('../daemon.js');
|
|
39
|
+
await startDaemon({ idleTimeout, verbose: true });
|
|
40
|
+
} else {
|
|
41
|
+
// Fork as background process
|
|
42
|
+
fs.mkdirSync(STEALTH_DIR, { recursive: true });
|
|
43
|
+
|
|
44
|
+
const daemonScript = path.join(
|
|
45
|
+
path.dirname(fileURLToPath(import.meta.url)),
|
|
46
|
+
'..',
|
|
47
|
+
'daemon-entry.js',
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
const child = fork(daemonScript, [], {
|
|
51
|
+
detached: true,
|
|
52
|
+
stdio: ['ignore', 'ignore', 'ignore', 'ipc'],
|
|
53
|
+
env: {
|
|
54
|
+
...process.env,
|
|
55
|
+
STEALTH_IDLE_TIMEOUT: String(idleTimeout),
|
|
56
|
+
},
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
// Wait for daemon to report ready
|
|
60
|
+
await new Promise((resolve, reject) => {
|
|
61
|
+
const timer = setTimeout(() => {
|
|
62
|
+
reject(new Error('Daemon startup timeout (15s)'));
|
|
63
|
+
}, 15000);
|
|
64
|
+
|
|
65
|
+
child.on('message', (msg) => {
|
|
66
|
+
if (msg === 'ready') {
|
|
67
|
+
clearTimeout(timer);
|
|
68
|
+
resolve();
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
child.on('error', (err) => {
|
|
73
|
+
clearTimeout(timer);
|
|
74
|
+
reject(err);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
child.on('exit', (code) => {
|
|
78
|
+
if (code !== 0) {
|
|
79
|
+
clearTimeout(timer);
|
|
80
|
+
reject(new Error(`Daemon exited with code ${code}`));
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
child.unref();
|
|
86
|
+
child.disconnect();
|
|
87
|
+
|
|
88
|
+
log.success(`Daemon started (pid: ${child.pid})`);
|
|
89
|
+
log.dim(` Socket: ${SOCKET_PATH}`);
|
|
90
|
+
log.dim(` Idle timeout: ${opts.idleTimeout} minutes`);
|
|
91
|
+
log.dim(` Stop with: stealth daemon stop`);
|
|
92
|
+
}
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
// stealth daemon stop
|
|
96
|
+
daemon
|
|
97
|
+
.command('stop')
|
|
98
|
+
.description('Stop the background daemon')
|
|
99
|
+
.action(async () => {
|
|
100
|
+
if (!isDaemonRunning()) {
|
|
101
|
+
log.info('Daemon is not running');
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const result = await daemonShutdown();
|
|
106
|
+
if (result?.ok) {
|
|
107
|
+
log.success('Daemon stopped');
|
|
108
|
+
} else {
|
|
109
|
+
// Force kill via PID
|
|
110
|
+
try {
|
|
111
|
+
const pid = parseInt(fs.readFileSync(PID_PATH, 'utf-8').trim());
|
|
112
|
+
process.kill(pid, 'SIGTERM');
|
|
113
|
+
log.success(`Daemon killed (pid: ${pid})`);
|
|
114
|
+
} catch {
|
|
115
|
+
log.error('Failed to stop daemon');
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
// stealth daemon status
|
|
121
|
+
daemon
|
|
122
|
+
.command('status')
|
|
123
|
+
.description('Show daemon status')
|
|
124
|
+
.action(async () => {
|
|
125
|
+
if (!isDaemonRunning()) {
|
|
126
|
+
log.info('Daemon is not running');
|
|
127
|
+
log.dim(' Start with: stealth daemon start');
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const status = await daemonStatus();
|
|
132
|
+
if (status?.ok) {
|
|
133
|
+
log.success('Daemon is running');
|
|
134
|
+
log.dim(` PID: ${status.pid}`);
|
|
135
|
+
log.dim(` Uptime: ${status.uptime}s`);
|
|
136
|
+
log.dim(` Contexts: ${status.contexts}`);
|
|
137
|
+
log.dim(` Memory: ${status.memoryMB}MB`);
|
|
138
|
+
log.dim(` Browser: ${status.browserConnected ? 'connected' : 'disconnected'}`);
|
|
139
|
+
} else {
|
|
140
|
+
log.warn('Daemon is running but not responding');
|
|
141
|
+
}
|
|
142
|
+
});
|
|
143
|
+
}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* stealth extract <url> - Extract structured data from a page
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import ora from 'ora';
|
|
6
|
+
import {
|
|
7
|
+
launchBrowser, closeBrowser, navigate, getTitle,
|
|
8
|
+
getUrl, evaluate, waitForReady,
|
|
9
|
+
} from '../browser.js';
|
|
10
|
+
import { formatOutput, log } from '../output.js';
|
|
11
|
+
|
|
12
|
+
export function registerExtract(program) {
|
|
13
|
+
program
|
|
14
|
+
.command('extract')
|
|
15
|
+
.description('Extract structured data from a page')
|
|
16
|
+
.argument('<url>', 'URL to extract from')
|
|
17
|
+
.option('-s, --selector <selector>', 'CSS selector to extract', 'body')
|
|
18
|
+
.option('-a, --attr <attribute>', 'Extract attribute instead of text (e.g. href, src)')
|
|
19
|
+
.option('--all', 'Extract all matching elements (not just the first)')
|
|
20
|
+
.option('--links', 'Extract all links from the page')
|
|
21
|
+
.option('--images', 'Extract all image URLs from the page')
|
|
22
|
+
.option('--meta', 'Extract meta tags (title, description, og tags)')
|
|
23
|
+
.option('--headers', 'Extract all headings (h1-h6)')
|
|
24
|
+
.option('-f, --format <format>', 'Output format: json, text, markdown', 'json')
|
|
25
|
+
.option('--wait <ms>', 'Wait time after page load (ms)', '2000')
|
|
26
|
+
.option('--proxy <proxy>', 'Proxy server')
|
|
27
|
+
.option('--cookies <file>', 'Load cookies from Netscape-format file')
|
|
28
|
+
.option('--no-headless', 'Show browser window')
|
|
29
|
+
.option('--humanize', 'Enable human behavior simulation')
|
|
30
|
+
.option('--retries <n>', 'Max retries on failure', '2')
|
|
31
|
+
.option('--profile <name>', 'Use a browser profile')
|
|
32
|
+
.option('--session <name>', 'Use/restore a named session')
|
|
33
|
+
.option('--proxy-rotate', 'Rotate proxy from pool')
|
|
34
|
+
.action(async (url, opts) => {
|
|
35
|
+
const spinner = ora('Launching stealth browser...').start();
|
|
36
|
+
let handle;
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
handle = await launchBrowser({
|
|
40
|
+
headless: opts.headless,
|
|
41
|
+
proxy: opts.proxy,
|
|
42
|
+
proxyRotate: opts.proxyRotate,
|
|
43
|
+
profile: opts.profile,
|
|
44
|
+
session: opts.session,
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
if (opts.cookies && !handle.isDaemon) {
|
|
48
|
+
const { loadCookies } = await import('../cookies.js');
|
|
49
|
+
await loadCookies(handle.context, opts.cookies);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
spinner.text = `Navigating to ${url}...`;
|
|
53
|
+
await navigate(handle, url, {
|
|
54
|
+
humanize: opts.humanize,
|
|
55
|
+
retries: parseInt(opts.retries),
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
if (!handle.isDaemon) {
|
|
59
|
+
await waitForReady(handle.page, { timeout: parseInt(opts.wait) });
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
spinner.text = 'Extracting data...';
|
|
63
|
+
spinner.stop();
|
|
64
|
+
|
|
65
|
+
let result;
|
|
66
|
+
const evalFn = (expr) => evaluate(handle, expr);
|
|
67
|
+
|
|
68
|
+
if (opts.links) {
|
|
69
|
+
result = await evalFn(`(() => {
|
|
70
|
+
const links = [];
|
|
71
|
+
document.querySelectorAll('a[href]').forEach(a => {
|
|
72
|
+
const href = a.href;
|
|
73
|
+
const text = a.textContent?.trim().slice(0, 200) || '';
|
|
74
|
+
if (href && href.startsWith('http')) links.push({ url: href, text });
|
|
75
|
+
});
|
|
76
|
+
return links;
|
|
77
|
+
})()`);
|
|
78
|
+
} else if (opts.images) {
|
|
79
|
+
result = await evalFn(`(() => {
|
|
80
|
+
const images = [];
|
|
81
|
+
document.querySelectorAll('img[src]').forEach(img => {
|
|
82
|
+
images.push({
|
|
83
|
+
src: img.src,
|
|
84
|
+
alt: img.alt || '',
|
|
85
|
+
width: img.naturalWidth || img.width || 0,
|
|
86
|
+
height: img.naturalHeight || img.height || 0,
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
return images;
|
|
90
|
+
})()`);
|
|
91
|
+
} else if (opts.meta) {
|
|
92
|
+
result = await evalFn(`(() => {
|
|
93
|
+
const getMeta = (name) => {
|
|
94
|
+
const el = document.querySelector('meta[name="' + name + '"]')
|
|
95
|
+
|| document.querySelector('meta[property="' + name + '"]');
|
|
96
|
+
return el?.getAttribute('content') || '';
|
|
97
|
+
};
|
|
98
|
+
return {
|
|
99
|
+
title: document.title || '',
|
|
100
|
+
description: getMeta('description'),
|
|
101
|
+
keywords: getMeta('keywords'),
|
|
102
|
+
ogTitle: getMeta('og:title'),
|
|
103
|
+
ogDescription: getMeta('og:description'),
|
|
104
|
+
ogImage: getMeta('og:image'),
|
|
105
|
+
ogUrl: getMeta('og:url'),
|
|
106
|
+
canonical: document.querySelector('link[rel="canonical"]')?.href || '',
|
|
107
|
+
};
|
|
108
|
+
})()`);
|
|
109
|
+
} else if (opts.headers) {
|
|
110
|
+
result = await evalFn(`(() => {
|
|
111
|
+
const headings = [];
|
|
112
|
+
document.querySelectorAll('h1, h2, h3, h4, h5, h6').forEach(h => {
|
|
113
|
+
headings.push({ level: parseInt(h.tagName[1]), text: h.textContent?.trim() || '' });
|
|
114
|
+
});
|
|
115
|
+
return headings;
|
|
116
|
+
})()`);
|
|
117
|
+
} else {
|
|
118
|
+
const selector = opts.selector;
|
|
119
|
+
const attr = opts.attr;
|
|
120
|
+
const all = opts.all;
|
|
121
|
+
result = await evalFn(`(() => {
|
|
122
|
+
const elements = ${all}
|
|
123
|
+
? Array.from(document.querySelectorAll('${selector}'))
|
|
124
|
+
: [document.querySelector('${selector}')].filter(Boolean);
|
|
125
|
+
return elements.map(el => {
|
|
126
|
+
if ('${attr || ''}') return el.getAttribute('${attr || ''}');
|
|
127
|
+
return el.textContent?.trim() || '';
|
|
128
|
+
});
|
|
129
|
+
})()`);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const title = await getTitle(handle);
|
|
133
|
+
const currentUrl = await getUrl(handle);
|
|
134
|
+
|
|
135
|
+
const output = formatOutput({
|
|
136
|
+
url: currentUrl,
|
|
137
|
+
title,
|
|
138
|
+
data: result,
|
|
139
|
+
count: Array.isArray(result) ? result.length : 1,
|
|
140
|
+
timestamp: new Date().toISOString(),
|
|
141
|
+
}, opts.format);
|
|
142
|
+
|
|
143
|
+
console.log(output);
|
|
144
|
+
log.success(`Extracted from: ${currentUrl}`);
|
|
145
|
+
} catch (err) {
|
|
146
|
+
spinner.stop();
|
|
147
|
+
log.error(`Extract failed: ${err.message}`);
|
|
148
|
+
process.exit(1);
|
|
149
|
+
} finally {
|
|
150
|
+
if (handle) await closeBrowser(handle);
|
|
151
|
+
}
|
|
152
|
+
});
|
|
153
|
+
}
|