stealth-cli 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +295 -0
  3. package/bin/stealth.js +50 -0
  4. package/package.json +65 -0
  5. package/skills/SKILL.md +244 -0
  6. package/src/browser.js +341 -0
  7. package/src/client.js +115 -0
  8. package/src/commands/batch.js +180 -0
  9. package/src/commands/browse.js +101 -0
  10. package/src/commands/config.js +85 -0
  11. package/src/commands/crawl.js +169 -0
  12. package/src/commands/daemon.js +143 -0
  13. package/src/commands/extract.js +153 -0
  14. package/src/commands/fingerprint.js +306 -0
  15. package/src/commands/interactive.js +284 -0
  16. package/src/commands/mcp.js +68 -0
  17. package/src/commands/monitor.js +160 -0
  18. package/src/commands/pdf.js +109 -0
  19. package/src/commands/profile.js +112 -0
  20. package/src/commands/proxy.js +116 -0
  21. package/src/commands/screenshot.js +96 -0
  22. package/src/commands/search.js +162 -0
  23. package/src/commands/serve.js +240 -0
  24. package/src/config.js +123 -0
  25. package/src/cookies.js +67 -0
  26. package/src/daemon-entry.js +19 -0
  27. package/src/daemon.js +294 -0
  28. package/src/errors.js +136 -0
  29. package/src/extractors/base.js +59 -0
  30. package/src/extractors/bing.js +47 -0
  31. package/src/extractors/duckduckgo.js +91 -0
  32. package/src/extractors/github.js +103 -0
  33. package/src/extractors/google.js +173 -0
  34. package/src/extractors/index.js +55 -0
  35. package/src/extractors/youtube.js +87 -0
  36. package/src/humanize.js +210 -0
  37. package/src/index.js +32 -0
  38. package/src/macros.js +36 -0
  39. package/src/mcp-server.js +341 -0
  40. package/src/output.js +65 -0
  41. package/src/profiles.js +308 -0
  42. package/src/proxy-pool.js +256 -0
  43. package/src/retry.js +112 -0
  44. package/src/session.js +159 -0
@@ -0,0 +1,101 @@
1
+ /**
2
+ * stealth browse <url> - Visit a URL and print page content
3
+ */
4
+
5
+ import ora from 'ora';
6
+ import {
7
+ launchBrowser, closeBrowser, navigate, getSnapshot,
8
+ getTextContent, getTitle, getUrl, evaluate, waitForReady,
9
+ } from '../browser.js';
10
+ import { formatOutput, log } from '../output.js';
11
+
12
+ export function registerBrowse(program) {
13
+ program
14
+ .command('browse')
15
+ .description('Visit a URL and print page content')
16
+ .argument('<url>', 'URL to visit')
17
+ .option('-f, --format <format>', 'Output format: text, json, markdown, snapshot', 'text')
18
+ .option('-w, --wait <ms>', 'Wait time after page load (ms)', '2000')
19
+ .option('--proxy <proxy>', 'Proxy server (http://user:pass@host:port)')
20
+ .option('--cookies <file>', 'Load cookies from Netscape-format file')
21
+ .option('--no-headless', 'Show browser window')
22
+ .option('--locale <locale>', 'Browser locale', 'en-US')
23
+ .option('--user-agent', 'Print the browser user-agent')
24
+ .option('--humanize', 'Enable human behavior simulation')
25
+ .option('--retries <n>', 'Max retries on failure', '2')
26
+ .option('--profile <name>', 'Use a browser profile')
27
+ .option('--session <name>', 'Use/restore a named session')
28
+ .option('--proxy-rotate', 'Rotate proxy from pool')
29
+ .action(async (url, opts) => {
30
+ const spinner = ora('Launching stealth browser...').start();
31
+ let handle;
32
+
33
+ try {
34
+ handle = await launchBrowser({
35
+ headless: opts.headless,
36
+ proxy: opts.proxy,
37
+ proxyRotate: opts.proxyRotate,
38
+ profile: opts.profile,
39
+ session: opts.session,
40
+ locale: opts.locale,
41
+ });
42
+
43
+ // Load cookies if provided (direct mode only)
44
+ if (opts.cookies && !handle.isDaemon) {
45
+ const { loadCookies } = await import('../cookies.js');
46
+ const result = await loadCookies(handle.context, opts.cookies);
47
+ spinner.text = `Loaded ${result.count} cookies`;
48
+ }
49
+
50
+ spinner.text = `Navigating to ${url}...`;
51
+ await navigate(handle, url, {
52
+ humanize: opts.humanize,
53
+ retries: parseInt(opts.retries),
54
+ });
55
+
56
+ if (!handle.isDaemon) {
57
+ await waitForReady(handle.page, { timeout: parseInt(opts.wait) });
58
+ }
59
+
60
+ spinner.stop();
61
+
62
+ // Print user-agent if requested
63
+ if (opts.userAgent) {
64
+ const ua = await evaluate(handle, 'navigator.userAgent');
65
+ log.info(`User-Agent: ${ua}`);
66
+ }
67
+
68
+ // Get page content based on format
69
+ let output;
70
+
71
+ if (opts.format === 'snapshot') {
72
+ output = await getSnapshot(handle);
73
+ } else if (opts.format === 'json') {
74
+ const title = await getTitle(handle);
75
+ const currentUrl = await getUrl(handle);
76
+ const ua = await evaluate(handle, 'navigator.userAgent');
77
+ const text = await getTextContent(handle);
78
+ output = formatOutput({
79
+ url: currentUrl,
80
+ title,
81
+ userAgent: ua,
82
+ content: text.slice(0, 10000),
83
+ timestamp: new Date().toISOString(),
84
+ }, 'json');
85
+ } else {
86
+ output = await getTextContent(handle);
87
+ }
88
+
89
+ console.log(output);
90
+
91
+ const currentUrl = await getUrl(handle);
92
+ log.success(`Done: ${currentUrl}`);
93
+ } catch (err) {
94
+ spinner.stop();
95
+ log.error(`Browse failed: ${err.message}`);
96
+ process.exit(1);
97
+ } finally {
98
+ if (handle) await closeBrowser(handle);
99
+ }
100
+ });
101
+ }
@@ -0,0 +1,85 @@
1
+ /**
2
+ * stealth config - Manage global configuration
3
+ */
4
+
5
+ import chalk from 'chalk';
6
+ import {
7
+ getConfigValue, setConfigValue, deleteConfigValue,
8
+ listConfig, resetConfig, CONFIG_FILE,
9
+ } from '../config.js';
10
+ import { log } from '../output.js';
11
+
12
+ export function registerConfig(program) {
13
+ const config = program
14
+ .command('config')
15
+ .description('Manage global configuration (~/.stealth/config.json)');
16
+
17
+ // stealth config list
18
+ config
19
+ .command('list')
20
+ .description('Show all config values')
21
+ .action(() => {
22
+ const items = listConfig();
23
+ console.log(chalk.bold('\n Configuration:\n'));
24
+ console.log(chalk.dim(` ${'Key'.padEnd(20)} ${'Value'.padEnd(25)} Source`));
25
+ console.log(chalk.dim(' ' + '─'.repeat(55)));
26
+
27
+ for (const item of items) {
28
+ const val = item.value === null ? chalk.dim('null') : String(item.value);
29
+ const src = item.source === 'user' ? chalk.cyan('user') : chalk.dim('default');
30
+ console.log(` ${item.key.padEnd(20)} ${val.padEnd(25)} ${src}`);
31
+ }
32
+
33
+ console.log(chalk.dim(`\n File: ${CONFIG_FILE}\n`));
34
+ });
35
+
36
+ // stealth config get <key>
37
+ config
38
+ .command('get')
39
+ .description('Get a config value')
40
+ .argument('<key>', 'Config key')
41
+ .action((key) => {
42
+ try {
43
+ const value = getConfigValue(key);
44
+ console.log(value);
45
+ } catch (err) {
46
+ log.error(err.message);
47
+ process.exit(1);
48
+ }
49
+ });
50
+
51
+ // stealth config set <key> <value>
52
+ config
53
+ .command('set')
54
+ .description('Set a config value')
55
+ .argument('<key>', 'Config key')
56
+ .argument('<value>', 'Config value')
57
+ .action((key, value) => {
58
+ try {
59
+ const result = setConfigValue(key, value);
60
+ log.success(`${key} = ${result}`);
61
+ } catch (err) {
62
+ log.error(err.message);
63
+ process.exit(1);
64
+ }
65
+ });
66
+
67
+ // stealth config delete <key>
68
+ config
69
+ .command('delete')
70
+ .description('Reset a config value to default')
71
+ .argument('<key>', 'Config key')
72
+ .action((key) => {
73
+ deleteConfigValue(key);
74
+ log.success(`${key} reset to default`);
75
+ });
76
+
77
+ // stealth config reset
78
+ config
79
+ .command('reset')
80
+ .description('Reset all config to defaults')
81
+ .action(() => {
82
+ resetConfig();
83
+ log.success('All config reset to defaults');
84
+ });
85
+ }
@@ -0,0 +1,169 @@
1
+ /**
2
+ * stealth crawl <url> - Crawl pages recursively with anti-detection
3
+ */
4
+
5
+ import ora from 'ora';
6
+ import { launchBrowser, closeBrowser, getTextContent, evaluate, waitForReady } from '../browser.js';
7
+ import { navigateWithRetry } from '../retry.js';
8
+ import { randomDelay, humanScroll } from '../humanize.js';
9
+ import { formatOutput, log } from '../output.js';
10
+
11
+ export function registerCrawl(program) {
12
+ program
13
+ .command('crawl')
14
+ .description('Crawl pages recursively with anti-detection')
15
+ .argument('<url>', 'Starting URL')
16
+ .option('-d, --depth <n>', 'Maximum crawl depth', '1')
17
+ .option('-l, --limit <n>', 'Maximum pages to crawl', '10')
18
+ .option('--same-origin', 'Only follow same-origin links (default: true)', true)
19
+ .option('--delay <ms>', 'Delay between requests (ms)', '1000')
20
+ .option('-f, --format <format>', 'Output format: json, jsonl, text', 'jsonl')
21
+ .option('-o, --output <file>', 'Output file (default: stdout)')
22
+ .option('--proxy <proxy>', 'Proxy server')
23
+ .option('--cookies <file>', 'Load cookies from Netscape-format file')
24
+ .option('--no-headless', 'Show browser window')
25
+ .option('--include <pattern>', 'Only crawl URLs matching this pattern (regex)')
26
+ .option('--exclude <pattern>', 'Skip URLs matching this pattern (regex)')
27
+ .option('--humanize', 'Enable human behavior simulation')
28
+ .option('--retries <n>', 'Max retries per page', '2')
29
+ .option('--profile <name>', 'Use a browser profile')
30
+ .option('--proxy-rotate', 'Rotate proxy from pool')
31
+ .action(async (startUrl, opts) => {
32
+ const spinner = ora('Launching stealth browser...').start();
33
+ let handle;
34
+
35
+ const maxDepth = parseInt(opts.depth);
36
+ const maxPages = parseInt(opts.limit);
37
+ const delay = parseInt(opts.delay);
38
+ const maxRetries = parseInt(opts.retries);
39
+ const includeRegex = opts.include ? new RegExp(opts.include) : null;
40
+ const excludeRegex = opts.exclude ? new RegExp(opts.exclude) : null;
41
+
42
+ try {
43
+ handle = await launchBrowser({
44
+ headless: opts.headless,
45
+ proxy: opts.proxy,
46
+ proxyRotate: opts.proxyRotate,
47
+ profile: opts.profile,
48
+ });
49
+
50
+ // Crawl requires direct mode (page access)
51
+ if (handle.isDaemon) {
52
+ log.warn('Crawl uses direct mode (daemon does not support multi-page crawling)');
53
+ }
54
+
55
+ if (opts.cookies && handle.context) {
56
+ const { loadCookies } = await import('../cookies.js');
57
+ await loadCookies(handle.context, opts.cookies);
58
+ }
59
+
60
+ const startOrigin = new URL(startUrl).origin;
61
+ const visited = new Set();
62
+ const queue = [{ url: startUrl, depth: 0 }];
63
+ const results = [];
64
+ let outputStream;
65
+
66
+ if (opts.output) {
67
+ const { createWriteStream } = await import('fs');
68
+ outputStream = createWriteStream(opts.output);
69
+ }
70
+
71
+ const writeResult = (result) => {
72
+ const line = opts.format === 'jsonl'
73
+ ? JSON.stringify(result)
74
+ : formatOutput(result, opts.format);
75
+
76
+ if (outputStream) {
77
+ outputStream.write(line + '\n');
78
+ } else {
79
+ console.log(line);
80
+ }
81
+ results.push(result);
82
+ };
83
+
84
+ while (queue.length > 0 && results.length < maxPages) {
85
+ const { url, depth } = queue.shift();
86
+
87
+ if (visited.has(url)) continue;
88
+ visited.add(url);
89
+
90
+ if (includeRegex && !includeRegex.test(url)) continue;
91
+ if (excludeRegex && excludeRegex.test(url)) continue;
92
+
93
+ spinner.text = `[${results.length + 1}/${maxPages}] Crawling: ${url.slice(0, 60)}...`;
94
+
95
+ try {
96
+ // Navigate with retry
97
+ await navigateWithRetry(handle.page, url, {
98
+ timeout: 30000,
99
+ maxRetries,
100
+ });
101
+ await waitForReady(handle.page, { timeout: 3000 });
102
+
103
+ const title = await handle.page.title().catch(() => '');
104
+ const text = await getTextContent(handle);
105
+
106
+ const result = {
107
+ url: handle.page.url(),
108
+ title,
109
+ content: text.slice(0, 5000),
110
+ depth,
111
+ timestamp: new Date().toISOString(),
112
+ };
113
+
114
+ writeResult(result);
115
+
116
+ // Extract links for next depth
117
+ if (depth < maxDepth) {
118
+ const links = await handle.page.evaluate(() => {
119
+ return Array.from(document.querySelectorAll('a[href]'))
120
+ .map((a) => a.href)
121
+ .filter((href) => href && href.startsWith('http'));
122
+ });
123
+
124
+ for (const link of links) {
125
+ if (visited.has(link)) continue;
126
+
127
+ if (opts.sameOrigin) {
128
+ try {
129
+ if (new URL(link).origin !== startOrigin) continue;
130
+ } catch { continue; }
131
+ }
132
+
133
+ queue.push({ url: link, depth: depth + 1 });
134
+ }
135
+ }
136
+
137
+ // Human-like delay between pages
138
+ if (delay > 0) {
139
+ if (opts.humanize) {
140
+ // Human mode: scroll + random delay
141
+ await humanScroll(handle.page, { scrolls: 1 });
142
+ await randomDelay(delay * 0.8, delay * 1.5);
143
+ } else {
144
+ // Standard: fixed delay + small jitter
145
+ const jitter = delay + Math.random() * delay * 0.3;
146
+ await handle.page.waitForTimeout(jitter);
147
+ }
148
+ }
149
+ } catch (err) {
150
+ log.warn(`Failed to crawl ${url}: ${err.message}`);
151
+ }
152
+ }
153
+
154
+ if (outputStream) outputStream.end();
155
+
156
+ spinner.stop();
157
+ log.success(`Crawl complete: ${results.length} pages crawled`);
158
+ log.dim(` Start: ${startUrl}`);
159
+ log.dim(` Depth: ${maxDepth}, Visited: ${visited.size}`);
160
+ if (opts.output) log.dim(` Output: ${opts.output}`);
161
+ } catch (err) {
162
+ spinner.stop();
163
+ log.error(`Crawl failed: ${err.message}`);
164
+ process.exit(1);
165
+ } finally {
166
+ if (handle) await closeBrowser(handle);
167
+ }
168
+ });
169
+ }
@@ -0,0 +1,143 @@
1
+ /**
2
+ * stealth daemon - Manage background browser daemon
3
+ */
4
+
5
+ import { fork } from 'child_process';
6
+ import { fileURLToPath } from 'url';
7
+ import path from 'path';
8
+ import fs from 'fs';
9
+ import { isDaemonRunning, PID_PATH, SOCKET_PATH, STEALTH_DIR } from '../daemon.js';
10
+ import { daemonStatus, daemonShutdown } from '../client.js';
11
+ import { log } from '../output.js';
12
+
13
+ export function registerDaemon(program) {
14
+ const daemon = program
15
+ .command('daemon')
16
+ .description('Manage background browser daemon for instant startup');
17
+
18
+ // stealth daemon start
19
+ daemon
20
+ .command('start')
21
+ .description('Start background browser daemon')
22
+ .option('--idle-timeout <minutes>', 'Auto-shutdown after idle (minutes)', '5')
23
+ .option('--verbose', 'Show daemon logs in terminal')
24
+ .action(async (opts) => {
25
+ if (isDaemonRunning()) {
26
+ const status = await daemonStatus();
27
+ if (status?.ok) {
28
+ log.info(`Daemon already running (pid: ${status.pid}, uptime: ${status.uptime}s, memory: ${status.memoryMB}MB)`);
29
+ return;
30
+ }
31
+ }
32
+
33
+ const idleTimeout = parseInt(opts.idleTimeout) * 60 * 1000;
34
+
35
+ if (opts.verbose) {
36
+ // Run in foreground
37
+ log.info('Starting daemon in foreground (Ctrl+C to stop)...');
38
+ const { startDaemon } = await import('../daemon.js');
39
+ await startDaemon({ idleTimeout, verbose: true });
40
+ } else {
41
+ // Fork as background process
42
+ fs.mkdirSync(STEALTH_DIR, { recursive: true });
43
+
44
+ const daemonScript = path.join(
45
+ path.dirname(fileURLToPath(import.meta.url)),
46
+ '..',
47
+ 'daemon-entry.js',
48
+ );
49
+
50
+ const child = fork(daemonScript, [], {
51
+ detached: true,
52
+ stdio: ['ignore', 'ignore', 'ignore', 'ipc'],
53
+ env: {
54
+ ...process.env,
55
+ STEALTH_IDLE_TIMEOUT: String(idleTimeout),
56
+ },
57
+ });
58
+
59
+ // Wait for daemon to report ready
60
+ await new Promise((resolve, reject) => {
61
+ const timer = setTimeout(() => {
62
+ reject(new Error('Daemon startup timeout (15s)'));
63
+ }, 15000);
64
+
65
+ child.on('message', (msg) => {
66
+ if (msg === 'ready') {
67
+ clearTimeout(timer);
68
+ resolve();
69
+ }
70
+ });
71
+
72
+ child.on('error', (err) => {
73
+ clearTimeout(timer);
74
+ reject(err);
75
+ });
76
+
77
+ child.on('exit', (code) => {
78
+ if (code !== 0) {
79
+ clearTimeout(timer);
80
+ reject(new Error(`Daemon exited with code ${code}`));
81
+ }
82
+ });
83
+ });
84
+
85
+ child.unref();
86
+ child.disconnect();
87
+
88
+ log.success(`Daemon started (pid: ${child.pid})`);
89
+ log.dim(` Socket: ${SOCKET_PATH}`);
90
+ log.dim(` Idle timeout: ${opts.idleTimeout} minutes`);
91
+ log.dim(` Stop with: stealth daemon stop`);
92
+ }
93
+ });
94
+
95
+ // stealth daemon stop
96
+ daemon
97
+ .command('stop')
98
+ .description('Stop the background daemon')
99
+ .action(async () => {
100
+ if (!isDaemonRunning()) {
101
+ log.info('Daemon is not running');
102
+ return;
103
+ }
104
+
105
+ const result = await daemonShutdown();
106
+ if (result?.ok) {
107
+ log.success('Daemon stopped');
108
+ } else {
109
+ // Force kill via PID
110
+ try {
111
+ const pid = parseInt(fs.readFileSync(PID_PATH, 'utf-8').trim());
112
+ process.kill(pid, 'SIGTERM');
113
+ log.success(`Daemon killed (pid: ${pid})`);
114
+ } catch {
115
+ log.error('Failed to stop daemon');
116
+ }
117
+ }
118
+ });
119
+
120
+ // stealth daemon status
121
+ daemon
122
+ .command('status')
123
+ .description('Show daemon status')
124
+ .action(async () => {
125
+ if (!isDaemonRunning()) {
126
+ log.info('Daemon is not running');
127
+ log.dim(' Start with: stealth daemon start');
128
+ return;
129
+ }
130
+
131
+ const status = await daemonStatus();
132
+ if (status?.ok) {
133
+ log.success('Daemon is running');
134
+ log.dim(` PID: ${status.pid}`);
135
+ log.dim(` Uptime: ${status.uptime}s`);
136
+ log.dim(` Contexts: ${status.contexts}`);
137
+ log.dim(` Memory: ${status.memoryMB}MB`);
138
+ log.dim(` Browser: ${status.browserConnected ? 'connected' : 'disconnected'}`);
139
+ } else {
140
+ log.warn('Daemon is running but not responding');
141
+ }
142
+ });
143
+ }
@@ -0,0 +1,153 @@
1
+ /**
2
+ * stealth extract <url> - Extract structured data from a page
3
+ */
4
+
5
+ import ora from 'ora';
6
+ import {
7
+ launchBrowser, closeBrowser, navigate, getTitle,
8
+ getUrl, evaluate, waitForReady,
9
+ } from '../browser.js';
10
+ import { formatOutput, log } from '../output.js';
11
+
12
+ export function registerExtract(program) {
13
+ program
14
+ .command('extract')
15
+ .description('Extract structured data from a page')
16
+ .argument('<url>', 'URL to extract from')
17
+ .option('-s, --selector <selector>', 'CSS selector to extract', 'body')
18
+ .option('-a, --attr <attribute>', 'Extract attribute instead of text (e.g. href, src)')
19
+ .option('--all', 'Extract all matching elements (not just the first)')
20
+ .option('--links', 'Extract all links from the page')
21
+ .option('--images', 'Extract all image URLs from the page')
22
+ .option('--meta', 'Extract meta tags (title, description, og tags)')
23
+ .option('--headers', 'Extract all headings (h1-h6)')
24
+ .option('-f, --format <format>', 'Output format: json, text, markdown', 'json')
25
+ .option('--wait <ms>', 'Wait time after page load (ms)', '2000')
26
+ .option('--proxy <proxy>', 'Proxy server')
27
+ .option('--cookies <file>', 'Load cookies from Netscape-format file')
28
+ .option('--no-headless', 'Show browser window')
29
+ .option('--humanize', 'Enable human behavior simulation')
30
+ .option('--retries <n>', 'Max retries on failure', '2')
31
+ .option('--profile <name>', 'Use a browser profile')
32
+ .option('--session <name>', 'Use/restore a named session')
33
+ .option('--proxy-rotate', 'Rotate proxy from pool')
34
+ .action(async (url, opts) => {
35
+ const spinner = ora('Launching stealth browser...').start();
36
+ let handle;
37
+
38
+ try {
39
+ handle = await launchBrowser({
40
+ headless: opts.headless,
41
+ proxy: opts.proxy,
42
+ proxyRotate: opts.proxyRotate,
43
+ profile: opts.profile,
44
+ session: opts.session,
45
+ });
46
+
47
+ if (opts.cookies && !handle.isDaemon) {
48
+ const { loadCookies } = await import('../cookies.js');
49
+ await loadCookies(handle.context, opts.cookies);
50
+ }
51
+
52
+ spinner.text = `Navigating to ${url}...`;
53
+ await navigate(handle, url, {
54
+ humanize: opts.humanize,
55
+ retries: parseInt(opts.retries),
56
+ });
57
+
58
+ if (!handle.isDaemon) {
59
+ await waitForReady(handle.page, { timeout: parseInt(opts.wait) });
60
+ }
61
+
62
+ spinner.text = 'Extracting data...';
63
+ spinner.stop();
64
+
65
+ let result;
66
+ const evalFn = (expr) => evaluate(handle, expr);
67
+
68
+ if (opts.links) {
69
+ result = await evalFn(`(() => {
70
+ const links = [];
71
+ document.querySelectorAll('a[href]').forEach(a => {
72
+ const href = a.href;
73
+ const text = a.textContent?.trim().slice(0, 200) || '';
74
+ if (href && href.startsWith('http')) links.push({ url: href, text });
75
+ });
76
+ return links;
77
+ })()`);
78
+ } else if (opts.images) {
79
+ result = await evalFn(`(() => {
80
+ const images = [];
81
+ document.querySelectorAll('img[src]').forEach(img => {
82
+ images.push({
83
+ src: img.src,
84
+ alt: img.alt || '',
85
+ width: img.naturalWidth || img.width || 0,
86
+ height: img.naturalHeight || img.height || 0,
87
+ });
88
+ });
89
+ return images;
90
+ })()`);
91
+ } else if (opts.meta) {
92
+ result = await evalFn(`(() => {
93
+ const getMeta = (name) => {
94
+ const el = document.querySelector('meta[name="' + name + '"]')
95
+ || document.querySelector('meta[property="' + name + '"]');
96
+ return el?.getAttribute('content') || '';
97
+ };
98
+ return {
99
+ title: document.title || '',
100
+ description: getMeta('description'),
101
+ keywords: getMeta('keywords'),
102
+ ogTitle: getMeta('og:title'),
103
+ ogDescription: getMeta('og:description'),
104
+ ogImage: getMeta('og:image'),
105
+ ogUrl: getMeta('og:url'),
106
+ canonical: document.querySelector('link[rel="canonical"]')?.href || '',
107
+ };
108
+ })()`);
109
+ } else if (opts.headers) {
110
+ result = await evalFn(`(() => {
111
+ const headings = [];
112
+ document.querySelectorAll('h1, h2, h3, h4, h5, h6').forEach(h => {
113
+ headings.push({ level: parseInt(h.tagName[1]), text: h.textContent?.trim() || '' });
114
+ });
115
+ return headings;
116
+ })()`);
117
+ } else {
118
+ const selector = opts.selector;
119
+ const attr = opts.attr;
120
+ const all = opts.all;
121
+ result = await evalFn(`(() => {
122
+ const elements = ${all}
123
+ ? Array.from(document.querySelectorAll('${selector}'))
124
+ : [document.querySelector('${selector}')].filter(Boolean);
125
+ return elements.map(el => {
126
+ if ('${attr || ''}') return el.getAttribute('${attr || ''}');
127
+ return el.textContent?.trim() || '';
128
+ });
129
+ })()`);
130
+ }
131
+
132
+ const title = await getTitle(handle);
133
+ const currentUrl = await getUrl(handle);
134
+
135
+ const output = formatOutput({
136
+ url: currentUrl,
137
+ title,
138
+ data: result,
139
+ count: Array.isArray(result) ? result.length : 1,
140
+ timestamp: new Date().toISOString(),
141
+ }, opts.format);
142
+
143
+ console.log(output);
144
+ log.success(`Extracted from: ${currentUrl}`);
145
+ } catch (err) {
146
+ spinner.stop();
147
+ log.error(`Extract failed: ${err.message}`);
148
+ process.exit(1);
149
+ } finally {
150
+ if (handle) await closeBrowser(handle);
151
+ }
152
+ });
153
+ }