crawlforge-mcp-server 3.4.0 → 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +28 -2
  2. package/package.json +6 -4
  3. package/server.js +166 -32
  4. package/src/cli/commands/actions.js +36 -0
  5. package/src/cli/commands/analyze.js +19 -0
  6. package/src/cli/commands/batch.js +45 -0
  7. package/src/cli/commands/crawl.js +30 -0
  8. package/src/cli/commands/extract.js +45 -0
  9. package/src/cli/commands/install-skills.js +46 -0
  10. package/src/cli/commands/llmstxt.js +24 -0
  11. package/src/cli/commands/localize.js +29 -0
  12. package/src/cli/commands/map.js +26 -0
  13. package/src/cli/commands/monitor.js +29 -0
  14. package/src/cli/commands/research.js +26 -0
  15. package/src/cli/commands/scrape.js +37 -0
  16. package/src/cli/commands/search.js +28 -0
  17. package/src/cli/commands/stealth.js +29 -0
  18. package/src/cli/commands/template.js +26 -0
  19. package/src/cli/commands/track.js +24 -0
  20. package/src/cli/commands/uninstall-skills.js +35 -0
  21. package/src/cli/formatter.js +57 -0
  22. package/src/cli/index.js +94 -0
  23. package/src/cli/lib/runTool.js +40 -0
  24. package/src/core/ActionExecutor.js +8 -6
  25. package/src/core/AuthManager.js +103 -3
  26. package/src/core/ChangeTracker.js +34 -0
  27. package/src/core/ElicitationHelper.js +112 -0
  28. package/src/core/JobManager.js +36 -2
  29. package/src/core/LocalizationManager.js +19 -5
  30. package/src/core/PerformanceManager.js +53 -17
  31. package/src/core/ResearchOrchestrator.js +40 -5
  32. package/src/core/SamplingClient.js +191 -0
  33. package/src/core/StealthBrowserManager.js +248 -2
  34. package/src/core/WebhookDispatcher.js +18 -10
  35. package/src/prompts/PromptRegistry.js +199 -0
  36. package/src/resources/ResourceRegistry.js +273 -0
  37. package/src/server/transports/streamableHttp.js +6 -6
  38. package/src/server/withAuth.js +25 -0
  39. package/src/skills/crawlforge-cli.md +157 -0
  40. package/src/skills/crawlforge-mcp.md +80 -0
  41. package/src/skills/crawlforge-research.md +104 -0
  42. package/src/skills/crawlforge-stealth.md +98 -0
  43. package/src/skills/installer.js +141 -0
  44. package/src/tools/advanced/batchScrape/index.js +30 -0
  45. package/src/tools/advanced/batchScrape/schema.js +1 -1
  46. package/src/tools/basic/extractText.js +19 -8
  47. package/src/tools/crawl/crawlDeep.js +27 -0
  48. package/src/tools/extract/extractContent.js +5 -17
  49. package/src/tools/extract/extractStructured.js +8 -0
  50. package/src/tools/extract/extractWithLlm.js +35 -25
  51. package/src/tools/extract/listOllamaModels.js +66 -0
  52. package/src/tools/extract/processDocument.js +7 -1
  53. package/src/tools/extract/summarizeContent.js +17 -0
  54. package/src/tools/research/deepResearch.js +34 -0
  55. package/src/tools/templates/ScrapeTemplateTool.js +68 -0
  56. package/src/tools/templates/TemplateRegistry.js +311 -0
  57. package/src/utils/Logger.js +15 -0
  58. package/src/utils/htmlToMarkdown.js +54 -0
  59. package/src/utils/secretMask.js +86 -0
@@ -0,0 +1,45 @@
1
+ /**
2
+ * batch command — scrape multiple URLs from a file.
3
+ * Reads newline-delimited URLs from the specified file.
4
+ */
5
+ import { BatchScrapeTool } from '../../tools/advanced/BatchScrapeTool.js';
6
+ import { getToolConfig } from '../../constants/config.js';
7
+ import { runTool } from '../lib/runTool.js';
8
+ import { readFileSync } from 'node:fs';
9
+
10
+ export function register(program) {
11
+ program
12
+ .command('batch <urls-file>')
13
+ .description('Scrape multiple URLs from a newline-delimited file')
14
+ .option('--format <fmt>', 'Output format: text, markdown, html', 'markdown')
15
+ .option('--concurrency <n>', 'Concurrent requests', '5')
16
+ .option('--max-retries <n>', 'Maximum retries per URL', '2')
17
+ .action(async (urlsFile, opts, cmd) => {
18
+ const globals = cmd.parent.opts();
19
+ const cliFlags = { json: globals.json, pretty: globals.pretty, quiet: globals.quiet };
20
+
21
+ let urls;
22
+ try {
23
+ urls = readFileSync(urlsFile, 'utf8')
24
+ .split('\n')
25
+ .map(l => l.trim())
26
+ .filter(l => l && !l.startsWith('#'));
27
+ } catch (e) {
28
+ process.stderr.write(`Error reading URLs file: ${e.message}\n`);
29
+ process.exit(1);
30
+ }
31
+
32
+ if (urls.length === 0) {
33
+ process.stderr.write('Error: No URLs found in file\n');
34
+ process.exit(1);
35
+ }
36
+
37
+ const tool = new BatchScrapeTool(getToolConfig('batch_scrape'));
38
+ await runTool(tool, {
39
+ urls,
40
+ formats: [opts.format],
41
+ maxConcurrency: parseInt(opts.concurrency, 10),
42
+ jobOptions: { maxRetries: parseInt(opts.maxRetries, 10) }
43
+ }, cliFlags);
44
+ });
45
+ }
@@ -0,0 +1,30 @@
1
+ /**
2
+ * crawl command — deep crawl a website using crawl_deep tool.
3
+ */
4
+ import { CrawlDeepTool } from '../../tools/crawl/crawlDeep.js';
5
+ import { getToolConfig } from '../../constants/config.js';
6
+ import { runTool } from '../lib/runTool.js';
7
+
8
+ export function register(program) {
9
+ program
10
+ .command('crawl <url>')
11
+ .description('Deep crawl a website and extract its content')
12
+ .option('--depth <n>', 'Maximum crawl depth (1-5)', '3')
13
+ .option('--max-pages <n>', 'Maximum pages to crawl', '100')
14
+ .option('--no-robots', 'Ignore robots.txt')
15
+ .option('--follow-external', 'Follow external links')
16
+ .option('--concurrency <n>', 'Concurrent requests (1-20)', '10')
17
+ .action(async (url, opts, cmd) => {
18
+ const globals = cmd.parent.opts();
19
+ const cliFlags = { json: globals.json, pretty: globals.pretty, quiet: globals.quiet };
20
+ const tool = new CrawlDeepTool(getToolConfig('crawl_deep'));
21
+ await runTool(tool, {
22
+ url,
23
+ max_depth: parseInt(opts.depth, 10),
24
+ max_pages: parseInt(opts.maxPages, 10),
25
+ respect_robots: opts.robots !== false,
26
+ follow_external: !!opts.followExternal,
27
+ concurrency: parseInt(opts.concurrency, 10)
28
+ }, cliFlags);
29
+ });
30
+ }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * extract command — extract structured data or LLM-guided extraction.
3
+ * With --schema: uses extract_structured (JSON schema-based).
4
+ * With --prompt: uses extract_with_llm (natural language).
5
+ */
6
+ import { ExtractStructuredTool } from '../../tools/extract/extractStructured.js';
7
+ import { ExtractWithLlm } from '../../tools/extract/extractWithLlm.js';
8
+ import { getToolConfig } from '../../constants/config.js';
9
+ import { runTool } from '../lib/runTool.js';
10
+ import { readFileSync } from 'node:fs';
11
+
12
+ export function register(program) {
13
+ program
14
+ .command('extract <url>')
15
+ .description('Extract structured data from a URL')
16
+ .option('--schema <file>', 'JSON schema file for structured extraction')
17
+ .option('--prompt <text>', 'Natural language prompt for LLM-guided extraction')
18
+ .option('--model <model>', 'LLM model to use (ollama model name or openai/anthropic)')
19
+ .action(async (url, opts, cmd) => {
20
+ const globals = cmd.parent.opts();
21
+ const cliFlags = { json: globals.json, pretty: globals.pretty, quiet: globals.quiet };
22
+
23
+ if (opts.schema) {
24
+ let schema;
25
+ try {
26
+ schema = JSON.parse(readFileSync(opts.schema, 'utf8'));
27
+ } catch (e) {
28
+ process.stderr.write(`Error reading schema file: ${e.message}\n`);
29
+ process.exit(1);
30
+ }
31
+ const tool = new ExtractStructuredTool(getToolConfig('extract_structured'));
32
+ await runTool(tool, { url, schema }, cliFlags);
33
+ } else if (opts.prompt) {
34
+ const tool = new ExtractWithLlm(getToolConfig('extract_with_llm'));
35
+ await runTool(tool, {
36
+ url,
37
+ prompt: opts.prompt,
38
+ model: opts.model
39
+ }, cliFlags);
40
+ } else {
41
+ process.stderr.write('Error: extract requires --schema <file> or --prompt <text>\n');
42
+ process.exit(1);
43
+ }
44
+ });
45
+ }
@@ -0,0 +1,46 @@
1
+ /**
2
+ * install-skills command -- install CrawlForge skill files into AI coding tools.
3
+ */
4
+ import { install } from '../../skills/installer.js';
5
+
6
+ export function register(program) {
7
+ program
8
+ .command('install-skills')
9
+ .description('Install CrawlForge skill files into Claude Code, Cursor, or VS Code')
10
+ .option('--target <target>', 'Target: claude-code, cursor, vscode, or all', 'all')
11
+ .option('--force', 'Overwrite existing skill files')
12
+ .option('--dry-run', 'Show what would be installed without writing files')
13
+ .action(async (opts) => {
14
+ try {
15
+ const results = await install({
16
+ target: opts.target,
17
+ force: Boolean(opts.force),
18
+ dryRun: Boolean(opts.dryRun),
19
+ cwd: process.cwd()
20
+ });
21
+
22
+ if (opts.dryRun) {
23
+ process.stdout.write('Dry run -- would install to:\n');
24
+ results.paths.forEach(p => process.stdout.write(' ' + p + '\n'));
25
+ process.exit(0);
26
+ return;
27
+ }
28
+
29
+ if (results.installed.length > 0) {
30
+ process.stdout.write('Installed:\n');
31
+ results.installed.forEach(p => process.stdout.write(' ' + p + '\n'));
32
+ }
33
+ if (results.skipped.length > 0) {
34
+ process.stdout.write('Skipped (already installed; use --force to overwrite):\n');
35
+ results.skipped.forEach(p => process.stdout.write(' ' + p + '\n'));
36
+ }
37
+ if (results.installed.length === 0 && results.skipped.length === 0) {
38
+ process.stdout.write('Nothing to install.\n');
39
+ }
40
+ process.exit(0);
41
+ } catch (err) {
42
+ process.stderr.write('Error: ' + err.message + '\n');
43
+ process.exit(1);
44
+ }
45
+ });
46
+ }
@@ -0,0 +1,24 @@
1
+ /**
2
+ * llmstxt command — generate llms.txt for a website.
3
+ */
4
+ import { GenerateLLMsTxtTool } from '../../tools/llmstxt/generateLLMsTxt.js';
5
+ import { getToolConfig } from '../../constants/config.js';
6
+ import { runTool } from '../lib/runTool.js';
7
+
8
+ export function register(program) {
9
+ program
10
+ .command('llmstxt <url>')
11
+ .description('Generate llms.txt for a website (AI compliance file)')
12
+ .option('--include-full', 'Also generate llms-full.txt')
13
+ .option('--max-pages <n>', 'Maximum pages to analyze', '50')
14
+ .action(async (url, opts, cmd) => {
15
+ const globals = cmd.parent.opts();
16
+ const cliFlags = { json: globals.json, pretty: globals.pretty, quiet: globals.quiet };
17
+ const tool = new GenerateLLMsTxtTool(getToolConfig('generate_llms_txt'));
18
+ await runTool(tool, {
19
+ url,
20
+ include_full_txt: !!opts.includeFull,
21
+ max_pages: parseInt(opts.maxPages, 10)
22
+ }, cliFlags);
23
+ });
24
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * localize command — fetch content with locale/geo awareness.
3
+ */
4
+ import { LocalizationManager } from '../../core/LocalizationManager.js';
5
+ import { getToolConfig } from '../../constants/config.js';
6
+ import { runTool } from '../lib/runTool.js';
7
+
8
+ export function register(program) {
9
+ program
10
+ .command('localize <url>')
11
+ .description('Fetch URL with locale/geo-aware settings')
12
+ .option('--locale <locale>', 'Locale code (e.g. en-US, fr-FR)', 'en-US')
13
+ .option('--country <code>', 'Country code for geo-targeting (e.g. US, FR)')
14
+ .option('--currency <code>', 'Currency code (e.g. USD, EUR)')
15
+ .action(async (url, opts, cmd) => {
16
+ const globals = cmd.parent.opts();
17
+ const cliFlags = { json: globals.json, pretty: globals.pretty, quiet: globals.quiet };
18
+ const mgr = new LocalizationManager(getToolConfig('localization'));
19
+ const wrapperTool = {
20
+ execute: (p) => mgr.fetchWithLocalization(p)
21
+ };
22
+ await runTool(wrapperTool, {
23
+ url,
24
+ locale: opts.locale,
25
+ country: opts.country,
26
+ currency: opts.currency
27
+ }, cliFlags);
28
+ });
29
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * map command — generate a sitemap using map_site tool.
3
+ */
4
+ import { MapSiteTool } from '../../tools/crawl/mapSite.js';
5
+ import { getToolConfig } from '../../constants/config.js';
6
+ import { runTool } from '../lib/runTool.js';
7
+
8
+ export function register(program) {
9
+ program
10
+ .command('map <url>')
11
+ .description('Generate a sitemap for a website')
12
+ .option('--depth <n>', 'Maximum crawl depth', '3')
13
+ .option('--max-pages <n>', 'Maximum pages to include', '500')
14
+ .option('--format <fmt>', 'Output format: json or xml', 'json')
15
+ .action(async (url, opts, cmd) => {
16
+ const globals = cmd.parent.opts();
17
+ const cliFlags = { json: globals.json, pretty: globals.pretty, quiet: globals.quiet };
18
+ const tool = new MapSiteTool(getToolConfig('map_site'));
19
+ await runTool(tool, {
20
+ url,
21
+ max_depth: parseInt(opts.depth, 10),
22
+ max_pages: parseInt(opts.maxPages, 10),
23
+ output_format: opts.format
24
+ }, cliFlags);
25
+ });
26
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * monitor command — continuously monitor a URL for changes (scheduled mode).
3
+ */
4
+ import { TrackChangesTool } from '../../tools/tracking/trackChanges/index.js';
5
+ import { getToolConfig } from '../../constants/config.js';
6
+ import { runTool } from '../lib/runTool.js';
7
+
8
+ export function register(program) {
9
+ program
10
+ .command('monitor <url>')
11
+ .description('Continuously monitor a URL for content changes')
12
+ .option('--interval <seconds>', 'Check interval in seconds', '300')
13
+ .option('--selector <css>', 'CSS selector to scope monitoring')
14
+ .option('--webhook <url>', 'Webhook URL to notify on changes')
15
+ .option('--threshold <pct>', 'Change threshold percentage (0-100)', '5')
16
+ .action(async (url, opts, cmd) => {
17
+ const globals = cmd.parent.opts();
18
+ const cliFlags = { json: globals.json, pretty: globals.pretty, quiet: globals.quiet };
19
+ const tool = new TrackChangesTool(getToolConfig('track_changes'));
20
+ await runTool(tool, {
21
+ url,
22
+ scheduled: true,
23
+ interval_seconds: parseInt(opts.interval, 10),
24
+ selector: opts.selector,
25
+ webhook_url: opts.webhook,
26
+ change_threshold: parseFloat(opts.threshold)
27
+ }, cliFlags);
28
+ });
29
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * research command — deep research on a topic.
3
+ */
4
+ import { DeepResearchTool } from '../../tools/research/deepResearch.js';
5
+ import { getToolConfig } from '../../constants/config.js';
6
+ import { runTool } from '../lib/runTool.js';
7
+
8
+ export function register(program) {
9
+ program
10
+ .command('research <topic>')
11
+ .description('Conduct deep research on a topic')
12
+ .option('--depth <level>', 'Research depth: basic, standard, or deep', 'standard')
13
+ .option('--max-urls <n>', 'Maximum URLs to analyze', '20')
14
+ .option('--output-format <fmt>', 'Output format: summary or detailed', 'summary')
15
+ .action(async (topic, opts, cmd) => {
16
+ const globals = cmd.parent.opts();
17
+ const cliFlags = { json: globals.json, pretty: globals.pretty, quiet: globals.quiet };
18
+ const tool = new DeepResearchTool(getToolConfig('deep_research'));
19
+ await runTool(tool, {
20
+ query: topic,
21
+ depth: opts.depth,
22
+ max_urls: parseInt(opts.maxUrls, 10),
23
+ output_format: opts.outputFormat
24
+ }, cliFlags);
25
+ });
26
+ }
@@ -0,0 +1,37 @@
1
+ /**
2
+ * scrape command — fetches a URL and returns its content.
3
+ * Without --extract: uses fetch_url (raw HTML + headers).
4
+ * With --extract: uses extract_content (cleaned text/markdown).
5
+ */
6
+ import { fetchUrlHandler } from '../../tools/basic/fetchUrl.js';
7
+ import { ExtractContentTool } from '../../tools/extract/extractContent.js';
8
+ import { getToolConfig } from '../../constants/config.js';
9
+ import { runTool } from '../lib/runTool.js';
10
+
11
+ export function register(program) {
12
+ program
13
+ .command('scrape <url>')
14
+ .description('Fetch a URL and return its content')
15
+ .option('--extract', 'Use extract_content for cleaned text/markdown output')
16
+ .option('--format <format>', 'Output format: text, markdown, html (default: text)', 'text')
17
+ .option('--timeout <ms>', 'Request timeout in milliseconds', '10000')
18
+ .action(async (url, opts, cmd) => {
19
+ const globals = cmd.parent.opts();
20
+ const cliFlags = { json: globals.json, pretty: globals.pretty, quiet: globals.quiet };
21
+ const apiKey = globals.apiKey || process.env.CRAWLFORGE_API_KEY;
22
+ const timeout = parseInt(opts.timeout, 10);
23
+
24
+ if (opts.extract) {
25
+ const tool = new ExtractContentTool(getToolConfig('extract_content'));
26
+ const wrapperTool = {
27
+ execute: (p) => tool.execute(p)
28
+ };
29
+ await runTool(wrapperTool, { url, output_format: opts.format, timeout }, cliFlags);
30
+ } else {
31
+ const wrapperTool = {
32
+ execute: (p) => fetchUrlHandler(p)
33
+ };
34
+ await runTool(wrapperTool, { url, timeout }, cliFlags);
35
+ }
36
+ });
37
+ }
@@ -0,0 +1,28 @@
1
+ /**
2
+ * search command — searches the web using search_web tool.
3
+ */
4
+ import { SearchWebTool } from '../../tools/search/searchWeb.js';
5
+ import { getToolConfig } from '../../constants/config.js';
6
+ import { runTool } from '../lib/runTool.js';
7
+
8
+ export function register(program) {
9
+ program
10
+ .command('search <query>')
11
+ .description('Search the web')
12
+ .option('--limit <n>', 'Number of results', '10')
13
+ .option('--lang <lang>', 'Language code (e.g. en, fr)', 'en')
14
+ .option('--provider <p>', 'Search provider: crawlforge or searxng', 'crawlforge')
15
+ .option('--no-safe-search', 'Disable safe search')
16
+ .action(async (query, opts, cmd) => {
17
+ const globals = cmd.parent.opts();
18
+ const cliFlags = { json: globals.json, pretty: globals.pretty, quiet: globals.quiet };
19
+ const tool = new SearchWebTool(getToolConfig('search_web'));
20
+ await runTool(tool, {
21
+ query,
22
+ limit: parseInt(opts.limit, 10),
23
+ lang: opts.lang,
24
+ provider: opts.provider,
25
+ safe_search: opts.safeSearch !== false
26
+ }, cliFlags);
27
+ });
28
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * stealth command — scrape a URL using stealth mode.
3
+ */
4
+ import { StealthBrowserManager } from '../../core/StealthBrowserManager.js';
5
+ import { getToolConfig } from '../../constants/config.js';
6
+ import { runTool } from '../lib/runTool.js';
7
+
8
+ export function register(program) {
9
+ program
10
+ .command('stealth <url>')
11
+ .description('Scrape a URL using stealth/anti-bot browser mode')
12
+ .option('--engine <engine>', 'Browser engine: playwright or camoufox', 'playwright')
13
+ .option('--wait <ms>', 'Wait time after page load in milliseconds', '2000')
14
+ .option('--screenshot', 'Capture a screenshot')
15
+ .action(async (url, opts, cmd) => {
16
+ const globals = cmd.parent.opts();
17
+ const cliFlags = { json: globals.json, pretty: globals.pretty, quiet: globals.quiet };
18
+ const mgr = new StealthBrowserManager(getToolConfig('stealth_mode'));
19
+ const wrapperTool = {
20
+ execute: (p) => mgr.scrapeWithStealth(p)
21
+ };
22
+ await runTool(wrapperTool, {
23
+ url,
24
+ engine: opts.engine,
25
+ wait_for: parseInt(opts.wait, 10),
26
+ screenshot: !!opts.screenshot
27
+ }, cliFlags);
28
+ });
29
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * template command — scrape a target URL using a pre-built site template.
3
+ */
4
+ import { ScrapeTemplateTool } from '../../tools/templates/ScrapeTemplateTool.js';
5
+ import { getToolConfig } from '../../constants/config.js';
6
+ import { runTool } from '../lib/runTool.js';
7
+
8
+ export function register(program) {
9
+ program
10
+ .command('template <id> <target>')
11
+ .description('Scrape using a pre-built site template (e.g. amazon-product, github-repo)')
12
+ .option('--list', 'List all available templates')
13
+ .action(async (id, target, opts, cmd) => {
14
+ const globals = cmd.parent.opts();
15
+ const cliFlags = { json: globals.json, pretty: globals.pretty, quiet: globals.quiet };
16
+ const tool = new ScrapeTemplateTool(getToolConfig('scrape_template'));
17
+
18
+ if (opts.list) {
19
+ const wrapperTool = { execute: () => tool.listTemplates() };
20
+ await runTool(wrapperTool, {}, cliFlags);
21
+ return;
22
+ }
23
+
24
+ await runTool(tool, { template_id: id, url: target }, cliFlags);
25
+ });
26
+ }
@@ -0,0 +1,24 @@
1
+ /**
2
+ * track command — track content changes on a URL.
3
+ */
4
+ import { TrackChangesTool } from '../../tools/tracking/trackChanges/index.js';
5
+ import { getToolConfig } from '../../constants/config.js';
6
+ import { runTool } from '../lib/runTool.js';
7
+
8
+ export function register(program) {
9
+ program
10
+ .command('track <url>')
11
+ .description('Track content changes on a URL')
12
+ .option('--selector <css>', 'CSS selector to scope tracking')
13
+ .option('--threshold <pct>', 'Change threshold percentage (0-100)', '5')
14
+ .action(async (url, opts, cmd) => {
15
+ const globals = cmd.parent.opts();
16
+ const cliFlags = { json: globals.json, pretty: globals.pretty, quiet: globals.quiet };
17
+ const tool = new TrackChangesTool(getToolConfig('track_changes'));
18
+ await runTool(tool, {
19
+ url,
20
+ selector: opts.selector,
21
+ change_threshold: parseFloat(opts.threshold)
22
+ }, cliFlags);
23
+ });
24
+ }
@@ -0,0 +1,35 @@
1
+ /**
2
+ * uninstall-skills command -- remove CrawlForge skill files.
3
+ */
4
+ import { uninstall } from '../../skills/installer.js';
5
+
6
+ export function register(program) {
7
+ program
8
+ .command('uninstall-skills')
9
+ .description('Remove CrawlForge skill files from Claude Code, Cursor, or VS Code')
10
+ .option('--target <target>', 'Target: claude-code, cursor, vscode, or all', 'all')
11
+ .action(async (opts) => {
12
+ try {
13
+ const results = await uninstall({
14
+ target: opts.target,
15
+ cwd: process.cwd()
16
+ });
17
+
18
+ if (results.removed.length > 0) {
19
+ process.stdout.write('Removed:\n');
20
+ results.removed.forEach(p => process.stdout.write(' ' + p + '\n'));
21
+ }
22
+ if (results.notFound.length > 0) {
23
+ process.stdout.write('Not found (already removed):\n');
24
+ results.notFound.forEach(p => process.stdout.write(' ' + p + '\n'));
25
+ }
26
+ if (results.removed.length === 0) {
27
+ process.stdout.write('No skill files found to remove.\n');
28
+ }
29
+ process.exit(0);
30
+ } catch (err) {
31
+ process.stderr.write('Error: ' + err.message + '\n');
32
+ process.exit(1);
33
+ }
34
+ });
35
+ }
@@ -0,0 +1,57 @@
1
+ /**
2
+ * formatter.js — CLI output formatter shared across all CLI commands.
3
+ * Respects global flags: --json, --pretty, --quiet.
4
+ * No logic duplication with MCP tools — formats the same tool execute() output.
5
+ */
6
+
7
+ /**
8
+ * Format a tool result for CLI output.
9
+ * @param {object} result — raw object from tool.execute() or MCP handler
10
+ * @param {{ json?: boolean, pretty?: boolean, quiet?: boolean }} flags
11
+ * @returns {string}
12
+ */
13
+ export function formatResult(result, flags = {}) {
14
+ const { json = false, pretty = false, quiet = false } = flags;
15
+
16
+ if (quiet) return '';
17
+
18
+ // If result has MCP content array, extract the text
19
+ if (result && Array.isArray(result.content)) {
20
+ const texts = result.content
21
+ .filter(c => c.type === 'text')
22
+ .map(c => c.text);
23
+
24
+ if (json || pretty) {
25
+ // Try to parse each text as JSON and re-serialize
26
+ const parsed = texts.map(t => {
27
+ try { return JSON.parse(t); } catch { return t; }
28
+ });
29
+ const output = parsed.length === 1 ? parsed[0] : parsed;
30
+ return pretty
31
+ ? JSON.stringify(output, null, 2)
32
+ : JSON.stringify(output);
33
+ }
34
+
35
+ // Plain text: return text blocks joined
36
+ return texts.join('\n');
37
+ }
38
+
39
+ // Plain object
40
+ if (json) return JSON.stringify(result);
41
+ if (pretty) return JSON.stringify(result, null, 2);
42
+ return typeof result === 'string' ? result : JSON.stringify(result, null, 2);
43
+ }
44
+
45
+ /**
46
+ * Format an error for CLI output.
47
+ * @param {Error|string} error
48
+ * @param {{ json?: boolean }} flags
49
+ * @returns {string}
50
+ */
51
+ export function formatError(error, flags = {}) {
52
+ const message = error instanceof Error ? error.message : String(error);
53
+ if (flags.json) {
54
+ return JSON.stringify({ error: message });
55
+ }
56
+ return `Error: ${message}`;
57
+ }
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * CrawlForge CLI — src/cli/index.js
4
+ * Entry point for the `crawlforge` command.
5
+ *
6
+ * Global flags:
7
+ * --json Output raw JSON (compact)
8
+ * --pretty Output pretty-printed JSON
9
+ * --quiet Suppress all output (exit code only)
10
+ * --api-key CrawlForge API key (overrides CRAWLFORGE_API_KEY env)
11
+ * --timeout Global request timeout in ms (default: 30000)
12
+ */
13
+
14
+ import { Command } from 'commander';
15
+ import { createRequire } from 'node:module';
16
+ import { fileURLToPath } from 'node:url';
17
+ import { dirname, join } from 'node:path';
18
+ import { readFileSync } from 'node:fs';
19
+
20
+ // Load package.json for version
21
+ const __filename = fileURLToPath(import.meta.url);
22
+ const __dirname = dirname(__filename);
23
+ const pkgPath = join(__dirname, '../../package.json');
24
+ let version = '4.1.0';
25
+ try {
26
+ const pkg = JSON.parse(readFileSync(pkgPath, 'utf8'));
27
+ version = pkg.version;
28
+ } catch { /* use fallback */ }
29
+
30
+ // Import all command registrars
31
+ import { register as registerScrape } from './commands/scrape.js';
32
+ import { register as registerSearch } from './commands/search.js';
33
+ import { register as registerCrawl } from './commands/crawl.js';
34
+ import { register as registerMap } from './commands/map.js';
35
+ import { register as registerExtract } from './commands/extract.js';
36
+ import { register as registerTrack } from './commands/track.js';
37
+ import { register as registerAnalyze } from './commands/analyze.js';
38
+ import { register as registerResearch } from './commands/research.js';
39
+ import { register as registerStealth } from './commands/stealth.js';
40
+ import { register as registerBatch } from './commands/batch.js';
41
+ import { register as registerActions } from './commands/actions.js';
42
+ import { register as registerLocalize } from './commands/localize.js';
43
+ import { register as registerLlmstxt } from './commands/llmstxt.js';
44
+ import { register as registerTemplate } from './commands/template.js';
45
+ import { register as registerMonitor } from './commands/monitor.js';
46
+ import { register as registerInstallSkills } from './commands/install-skills.js';
47
+ import { register as registerUninstallSkills } from './commands/uninstall-skills.js';
48
+
49
+ const program = new Command();
50
+
51
+ program
52
+ .name('crawlforge')
53
+ .description('CrawlForge CLI — web scraping, crawling, and content processing')
54
+ .version(version)
55
+ .option('--json', 'Output compact JSON')
56
+ .option('--pretty', 'Output pretty-printed JSON')
57
+ .option('--quiet', 'Suppress all stdout output (exit code only)')
58
+ .option('--api-key <key>', 'CrawlForge API key (overrides CRAWLFORGE_API_KEY env var)')
59
+ .option('--timeout <ms>', 'Global request timeout in milliseconds', '30000');
60
+
61
+ // Apply --api-key globally before commands run
62
+ program.hook('preAction', (thisCommand) => {
63
+ const opts = program.opts();
64
+ if (opts.apiKey) {
65
+ process.env.CRAWLFORGE_API_KEY = opts.apiKey;
66
+ }
67
+ if (opts.timeout) {
68
+ process.env.CRAWLFORGE_CLI_TIMEOUT = opts.timeout;
69
+ }
70
+ });
71
+
72
+ // Register all 15 tool commands + 2 skills commands
73
+ registerScrape(program);
74
+ registerSearch(program);
75
+ registerCrawl(program);
76
+ registerMap(program);
77
+ registerExtract(program);
78
+ registerTrack(program);
79
+ registerAnalyze(program);
80
+ registerResearch(program);
81
+ registerStealth(program);
82
+ registerBatch(program);
83
+ registerActions(program);
84
+ registerLocalize(program);
85
+ registerLlmstxt(program);
86
+ registerTemplate(program);
87
+ registerMonitor(program);
88
+ registerInstallSkills(program);
89
+ registerUninstallSkills(program);
90
+
91
+ program.parseAsync(process.argv).catch((err) => {
92
+ process.stderr.write(`Fatal error: ${err.message}\n`);
93
+ process.exit(1);
94
+ });