crawlforge-mcp-server 4.7.2 → 4.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CLAUDE.md +2 -2
  2. package/package.json +2 -1
  3. package/server.js +42 -9
  4. package/src/cli/commands/init.js +13 -2
  5. package/src/cli/commands/install-skills.js +10 -1
  6. package/src/cli/commands/monitor.js +81 -0
  7. package/src/cli/commands/uninstall-skills.js +10 -1
  8. package/src/core/ActionExecutor.js +51 -9
  9. package/src/core/ElicitationHelper.js +18 -5
  10. package/src/core/LLMsTxtAnalyzer.js +2 -1
  11. package/src/core/MonitorScheduler.js +281 -0
  12. package/src/core/MonitorStore.js +79 -0
  13. package/src/core/ResearchOrchestrator.js +2 -1
  14. package/src/core/crawlers/BFSCrawler.js +2 -1
  15. package/src/skills/agent-skills/crawlforge-batch-automation/SKILL.md +126 -0
  16. package/src/skills/agent-skills/crawlforge-batch-automation/references/actions.md +127 -0
  17. package/src/skills/agent-skills/crawlforge-change-tracking/SKILL.md +116 -0
  18. package/src/skills/agent-skills/crawlforge-deep-research/SKILL.md +108 -0
  19. package/src/skills/agent-skills/crawlforge-deep-research/references/workflows.md +76 -0
  20. package/src/skills/agent-skills/crawlforge-getting-started/SKILL.md +89 -0
  21. package/src/skills/agent-skills/crawlforge-getting-started/references/cli.md +71 -0
  22. package/src/skills/agent-skills/crawlforge-getting-started/references/credits.md +75 -0
  23. package/src/skills/agent-skills/crawlforge-stealth-browsing/SKILL.md +106 -0
  24. package/src/skills/agent-skills/crawlforge-stealth-browsing/references/engine-selection.md +63 -0
  25. package/src/skills/agent-skills/crawlforge-structured-extraction/SKILL.md +121 -0
  26. package/src/skills/agent-skills/crawlforge-structured-extraction/references/templates.md +39 -0
  27. package/src/skills/agent-skills/crawlforge-web-scraping/SKILL.md +141 -0
  28. package/src/skills/agent-skills/crawlforge-web-scraping/references/tool-reference.md +95 -0
  29. package/src/skills/installer.js +186 -34
  30. package/src/tools/advanced/batchScrape/worker.js +8 -2
  31. package/src/tools/basic/_fetch.js +14 -1
  32. package/src/tools/crawl/_sessionContext.js +3 -1
  33. package/src/tools/extract/_fetchAndParse.js +2 -1
  34. package/src/tools/extract/extractContent.js +2 -1
  35. package/src/tools/extract/processDocument.js +2 -1
  36. package/src/tools/scrape/_brandingExtractor.js +378 -0
  37. package/src/tools/scrape/unifiedScrape.js +66 -6
  38. package/src/tools/templates/ScrapeTemplateTool.js +2 -1
  39. package/src/tools/tracking/trackChanges/differ.js +3 -1
  40. package/src/tools/tracking/trackChanges/index.js +74 -21
  41. package/src/tools/tracking/trackChanges/schema.js +7 -2
  42. package/src/utils/hostRateLimiter.js +46 -0
  43. package/src/utils/robotsChecker.js +2 -1
  44. package/src/utils/sitemapParser.js +2 -1
  45. package/src/utils/ssrfGuard.js +161 -0
  46. package/src/utils/ssrfProtection.js +6 -9
  47. package/src/skills/crawlforge-cli.md +0 -157
  48. package/src/skills/crawlforge-mcp.md +0 -80
  49. package/src/skills/crawlforge-research.md +0 -104
  50. package/src/skills/crawlforge-stealth.md +0 -98
package/CLAUDE.md CHANGED
@@ -62,7 +62,7 @@ These guidelines are working if: fewer unnecessary changes in diffs, fewer rewri
62
62
 
63
63
  CrawlForge MCP Server - A professional MCP (Model Context Protocol) server providing 26 web scraping, crawling, and content processing tools (5 inline + 21 advanced).
64
64
 
65
- **Current Version:** 4.6.0
65
+ **Current Version:** 4.8.0
66
66
 
67
67
  ## Development Commands
68
68
 
@@ -241,7 +241,7 @@ When adding a new tool to server.js:
241
241
 
242
242
  Key mechanisms for security-conscious future sessions:
243
243
 
244
- - **SSRF** (`src/utils/ssrfProtection.js`): Every scraped URL validatedhttp/https only; blocks loopback, RFC1918, IPv6 ULA/link-local, cloud metadata endpoints; blocks dangerous ports (22, 25, 53, 445, 3306, 5432, 6379, 27017, etc.); redirects re-validated per hop, capped at 5; pre-parse path-traversal rejection. Blocklist-based no per-deployment outbound allowlist.
244
+ - **SSRF** (`src/utils/ssrfGuard.js` enforcing `src/utils/ssrfProtection.js`): As of v4.8.0 SSRF is actually enforced on the live scraping fetch path (previously `ssrfProtection.js` was unwired). `ssrfGuard` injects an undici dispatcher whose connect-time `lookup` validates every connection initial request and each redirect hop — and pins the validated IP (closing the DNS-rebinding TOCTOU window). Stage 1 (default) blocks loopback, link-local/cloud-metadata (169.254.169.254), and 0.0.0.0; `SSRF_STRICT=true` adds full RFC1918/ULA/etc. enforcement. Kill switch `SSRF_PROTECTION_ENABLED=false`; `ALLOWED_DOMAINS` allowlist bypass. Wired into every read-scrape site (`_fetch.js`, `_fetchAndParse.js`, `batchScrape/worker.js`, `mapSite.js`, `BFSCrawler.js`, extract/template/session/research/llms-txt/robots/sitemap/track-changes fetches) via `ssrfGuard()`/`safeFetch()`. http/https only; dangerous ports + path-traversal still rejected by `ssrfProtection.js`.
245
245
  - **endpointGuard** (`src/core/endpointGuard.js`): Hard allow-list of `{crawlforge.dev, www.crawlforge.dev, api.crawlforge.dev}` for the server's own backend calls; HTTPS required; fail-closed. Localhost only in creator mode (v3.0.18).
246
246
  - **Action allowlist** (`src/core/ActionExecutor.js`): `scrape_with_actions` accepts only 7 action types: `wait`, `click`, `type`, `press`, `scroll`, `screenshot`, `executeJavaScript`. `executeJavaScript` throws unless `ALLOW_JAVASCRIPT_EXECUTION=true` is set at deploy time (off by default).
247
247
  - **Elicitation** (`src/core/ElicitationHelper.js`): User confirmation requested for `deep_research` (>50 URLs), `batch_scrape` (sync, >25 URLs), `crawl_deep` (projected >500 pages), `extract_structured` (schema has >3 required fields, no LLM configured), and credit-low situations. Fail-open if client does not support elicitation.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawlforge-mcp-server",
3
- "version": "4.7.2",
3
+ "version": "4.8.0",
4
4
  "mcpName": "io.github.mysleekdesigns/crawlforge-mcp-server",
5
5
  "description": "CrawlForge MCP Server - Professional Model Context Protocol server with 26 web scraping, crawling, deep-research, and autonomous-extraction tools. Returns clean Markdown and structured JSON for Claude, Cursor, and any MCP client. Defaults to local Ollama for LLM extraction (no API key needed); OpenAI/Anthropic available as opt-in. Includes a unified multi-format scrape tool, an autonomous agent, pre-built site templates, and Camoufox stealth browsing.",
6
6
  "main": "server.js",
@@ -22,6 +22,7 @@
22
22
  "test:tools": "node test-tools.js",
23
23
  "test:real-world": "node test-real-world.js",
24
24
  "test:all": "bash run-all-tests.sh",
25
+ "skills:gen": "node scripts/generate-skill-md.mjs",
25
26
  "postinstall": "echo '\nCrawlForge MCP Server installed!\n\nQuick start: run \"npx crawlforge init\" to configure your API key, install skills, and register the MCP server with your AI clients.\nOr run \"npx crawlforge-setup\" to configure your API key only.\n'",
26
27
  "docker:build": "docker build -t crawlforge .",
27
28
  "docker:dev": "docker-compose up crawlforge-dev",
package/server.js CHANGED
@@ -154,7 +154,7 @@ const deepResearchTool = new DeepResearchTool();
154
154
  const trackChangesTool = new TrackChangesTool();
155
155
  const generateLLMsTxtTool = new GenerateLLMsTxtTool();
156
156
  const scrapeTemplateTool = new ScrapeTemplateTool(); // D3.3
157
- const unifiedScrapeTool = new UnifiedScrapeTool(); // D4 D1
157
+ const unifiedScrapeTool = new UnifiedScrapeTool({ actionExecutor: scrapeWithActionsTool.actionExecutor }); // D4 D1 (+v4.8 screenshot reuses the shared browser pool)
158
158
  const agentTool = new AgentTool(); // D4 D2
159
159
  const stealthBrowserManager = new StealthBrowserManager();
160
160
  const localizationManager = new LocalizationManager();
@@ -177,6 +177,7 @@ batchScrapeTool.setMcpServer(server);
177
177
  crawlDeepTool.setMcpServer(server);
178
178
  extractStructuredTool.setMcpServer(server);
179
179
  agentTool.setMcpServer(server); // D4 D2: SamplingClient + Elicitation
180
+ trackChangesTool.setMcpServer(server); // v4.8: SamplingClient for scheduled-monitor goal judging
180
181
  AuthManager.setElicitation(elicitation);
181
182
 
182
183
  // ─── D1.1 Resource Templates (MCP Resources) ─────────────────────────────────
@@ -813,12 +814,12 @@ server.registerTool("deep_research", {
813
814
 
814
815
  // Tool: scrape (D4 D1 — unified multi-format single-fetch)
815
816
  server.registerTool("scrape", {
816
- description: "Use this when you need multiple content formats from a single URL in one call — e.g. markdown + links + metadata together. One fetch, no N-request fan-out. Formats: \"markdown\", \"html\", \"rawHtml\", \"text\", \"links\", \"metadata\", or {type:\"json\",schema,prompt} for LLM-structured extraction. onlyMainContent:true (default) strips boilerplate via Readability. Partial success: per-format warnings never fail the whole call. Example: scrape({url:\"https://example.com\", formats:[\"markdown\",\"links\",\"metadata\"]})",
817
+ description: "Use this when you need multiple content formats from a single URL in one call — e.g. markdown + links + metadata together. One fetch, no N-request fan-out. Formats: \"markdown\", \"html\", \"rawHtml\", \"text\", \"links\", \"metadata\", \"branding\" (static design tokens: colors, fonts, logo), \"screenshot\" (renders in a browser, returns crawlforge://screenshot/{id} resources), or {type:\"json\",schema,prompt} for LLM-structured extraction. onlyMainContent:true (default) strips boilerplate via Readability. Partial success: per-format warnings never fail the whole call. Example: scrape({url:\"https://example.com\", formats:[\"markdown\",\"links\",\"branding\"]})",
817
818
  annotations: { title: "Scrape (Multi-Format)", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
818
819
  inputSchema: {
819
820
  url: z.string().url().describe("The URL to scrape"),
820
821
  formats: z.array(z.union([
821
- z.enum(["markdown", "html", "rawHtml", "text", "links", "metadata", "screenshot"]),
822
+ z.enum(["markdown", "html", "rawHtml", "text", "links", "metadata", "screenshot", "branding"]),
822
823
  z.object({
823
824
  type: z.literal("json"),
824
825
  schema: z.record(z.any()).optional().describe("JSON schema for extraction"),
@@ -826,11 +827,31 @@ server.registerTool("scrape", {
826
827
  })
827
828
  ])).min(1).optional().default(["markdown"]).describe("Formats to return (default: [\"markdown\"])"),
828
829
  onlyMainContent: z.boolean().optional().default(true).describe("Strip boilerplate via Readability (default: true)"),
829
- timeoutMs: z.number().min(1000).max(60000).optional().default(15000).describe("Fetch timeout in ms")
830
+ timeoutMs: z.number().min(1000).max(60000).optional().default(15000).describe("Fetch timeout in ms"),
831
+ brandingOptions: z.object({
832
+ fetchLinkedCss: z.boolean().optional().default(true).describe("Fetch linked stylesheets for richer color/font extraction"),
833
+ maxStylesheets: z.number().min(0).max(20).optional().default(10).describe("Max linked stylesheets to fetch")
834
+ }).optional().describe("Options for the \"branding\" format"),
835
+ screenshotOptions: z.object({
836
+ fullPage: z.boolean().optional().default(false).describe("Capture the full scrollable page"),
837
+ format: z.enum(["png", "jpeg"]).optional().default("png"),
838
+ quality: z.number().min(0).max(100).optional().describe("JPEG quality (jpeg only)")
839
+ }).optional().describe("Options for the \"screenshot\" format")
830
840
  }
831
841
  }, withAuth("scrape", async (params) => {
832
842
  try {
833
843
  const result = await unifiedScrapeTool.execute(params);
844
+ // Publish any captured screenshots as crawlforge://screenshot/{actionId}
845
+ // resources and annotate each with its URI (mirrors scrape_with_actions).
846
+ if (Array.isArray(result?.content?.screenshots)) {
847
+ result.content.screenshots = result.content.screenshots.map((shot) => {
848
+ if (shot?.actionId && shot?.data) {
849
+ resourceRegistry.storeScreenshot(shot.actionId, shot.data);
850
+ return { ...shot, resourceUri: `crawlforge://screenshot/${shot.actionId}` };
851
+ }
852
+ return shot;
853
+ });
854
+ }
834
855
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
835
856
  } catch (error) {
836
857
  return { content: [{ type: "text", text: `Scrape failed: ${error.message}` }], isError: true };
@@ -863,10 +884,10 @@ server.registerTool("track_changes", {
863
884
  description: "Use this when you need to monitor a URL for content changes over time — e.g. competitor pricing, regulation updates, product availability. Start with operation:\"create_baseline\", then periodically use operation:\"compare\" to diff. Supports webhooks and scheduled monitoring. Example: track_changes({url: \"https://example.com/pricing\", operation: \"create_baseline\"})",
864
885
  annotations: { title: "Track Changes", readOnlyHint: false, destructiveHint: false, idempotentHint: false, openWorldHint: true },
865
886
  inputSchema: {
866
- url: z.string().url().describe("The URL to track changes for"),
887
+ url: z.string().url().optional().describe("The URL to track changes for (optional for list_scheduled_monitors)"),
867
888
  operation: z.enum([
868
889
  'create_baseline', 'compare', 'monitor', 'get_history', 'get_stats',
869
- 'create_scheduled_monitor', 'stop_scheduled_monitor', 'get_dashboard',
890
+ 'create_scheduled_monitor', 'stop_scheduled_monitor', 'list_scheduled_monitors', 'get_dashboard',
870
891
  'export_history', 'create_alert_rule', 'generate_trend_report', 'get_monitoring_templates'
871
892
  ]).default('compare').describe("Tracking operation to perform"),
872
893
  content: z.string().optional().describe("Content to compare against baseline"),
@@ -930,10 +951,14 @@ server.registerTool("track_changes", {
930
951
  }).optional()
931
952
  }).optional().describe("Notification configuration for webhooks and Slack"),
932
953
  scheduledMonitorOptions: z.object({
933
- schedule: z.string().optional(),
954
+ schedule: z.string().optional().describe("Optional cron expression (power users)"),
934
955
  templateId: z.string().optional(),
935
- enabled: z.boolean().default(true)
936
- }).optional().describe("Scheduled monitoring options with cron expressions"),
956
+ enabled: z.boolean().default(true),
957
+ interval: z.number().min(60000).optional().describe("Polling interval in ms (default 1h)"),
958
+ goal: z.string().optional().describe("Plain-English alert goal; an LLM judges whether a change matches (degrades to threshold if no LLM)"),
959
+ monitorId: z.string().optional().describe("Monitor id for stop_scheduled_monitor"),
960
+ notificationThreshold: z.enum(['minor', 'moderate', 'major', 'critical']).optional()
961
+ }).optional().describe("Scheduled monitoring: recurring compare + notify, optional plain-English goal"),
937
962
  alertRuleOptions: z.object({
938
963
  ruleId: z.string().optional(),
939
964
  condition: z.string().optional(),
@@ -1271,6 +1296,14 @@ async function runServer() {
1271
1296
  await connectStdio(server);
1272
1297
  }
1273
1298
 
1299
+ // v4.8: start the scheduled-monitor engine (loads persisted monitors, catches
1300
+ // up any due runs). Best-effort — a scheduler failure must not block startup.
1301
+ try {
1302
+ await trackChangesTool.startScheduler();
1303
+ } catch (err) {
1304
+ console.error('Scheduled-monitor engine failed to start:', err.message);
1305
+ }
1306
+
1274
1307
  console.error(`Environment: ${config.server.nodeEnv}`);
1275
1308
  console.error("Search enabled: true (via CrawlForge proxy)");
1276
1309
 
@@ -3,7 +3,7 @@
3
3
  */
4
4
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs';
5
5
  import { join } from 'node:path';
6
- import { install } from '../../skills/installer.js';
6
+ import { install, installHook } from '../../skills/installer.js';
7
7
 
8
8
  const HOME = process.env.HOME || process.env.USERPROFILE || '';
9
9
 
@@ -61,6 +61,7 @@ export function register(program) {
61
61
  .description('Set up CrawlForge: verify API key, install skills, and register the MCP server with your AI clients')
62
62
  .option('--all', 'Install skills to all targets and register all detected client configs')
63
63
  .option('--client <name>', 'Target client to register: claude-code, claude-desktop, or cursor')
64
+ .option('--with-hook', 'Add an opt-in UserPromptSubmit reminder to boost skill auto-activation')
64
65
  .option('--yes', 'Non-interactive — assume yes to all prompts')
65
66
  .action(async (opts) => {
66
67
  const out = (msg) => process.stderr.write(msg + '\n');
@@ -80,7 +81,7 @@ export function register(program) {
80
81
  try {
81
82
  const results = await install({ target: skillTarget, force: false, cwd: process.cwd() });
82
83
  if (results.installed.length > 0) {
83
- out('Skills installed: ' + results.installed.length + ' file(s)');
84
+ out('Skills installed: ' + results.installed.length + ' skill(s)');
84
85
  } else {
85
86
  out('Skills: already up to date (use crawlforge install-skills --force to overwrite)');
86
87
  }
@@ -88,6 +89,16 @@ export function register(program) {
88
89
  out('Warning: skill install failed — ' + err.message);
89
90
  }
90
91
 
92
+ // 2b. Optional forced-eval hook (opt-in)
93
+ if (opts.withHook) {
94
+ try {
95
+ const hook = installHook();
96
+ out(hook.added ? 'Forced-eval hook added: ' + hook.path : 'Forced-eval hook already present');
97
+ } catch (err) {
98
+ out('Warning: could not add forced-eval hook — ' + err.message);
99
+ }
100
+ }
101
+
91
102
  // 3. MCP stanza merge
92
103
  const clientFilter = opts.client || (opts.all ? undefined : 'claude-code');
93
104
  const targets = resolveClientPaths(clientFilter);
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * install-skills command -- install CrawlForge skill files into AI coding tools.
3
3
  */
4
- import { install } from '../../skills/installer.js';
4
+ import { install, installHook } from '../../skills/installer.js';
5
5
 
6
6
  export function register(program) {
7
7
  program
@@ -10,6 +10,7 @@ export function register(program) {
10
10
  .option('--target <target>', 'Target: claude-code, cursor, vscode, or all', 'all')
11
11
  .option('--force', 'Overwrite existing skill files')
12
12
  .option('--dry-run', 'Show what would be installed without writing files')
13
+ .option('--with-hook', 'Also add an opt-in UserPromptSubmit reminder to boost skill auto-activation')
13
14
  .action(async (opts) => {
14
15
  try {
15
16
  const results = await install({
@@ -19,6 +20,14 @@ export function register(program) {
19
20
  cwd: process.cwd()
20
21
  });
21
22
 
23
+ if (opts.withHook && !opts.dryRun) {
24
+ const hook = installHook();
25
+ process.stdout.write(
26
+ (hook.added ? 'Added forced-eval hook: ' : 'Forced-eval hook already present: ') +
27
+ hook.path + '\n'
28
+ );
29
+ }
30
+
22
31
  if (opts.dryRun) {
23
32
  process.stdout.write('Dry run -- would install to:\n');
24
33
  results.paths.forEach(p => process.stdout.write(' ' + p + '\n'));
@@ -46,4 +46,85 @@ export function register(program) {
46
46
  // monitor runs continuously — do not auto-exit after the first result.
47
47
  await runTool(wrapperTool, params, cliFlags, { exitOnSuccess: false });
48
48
  });
49
+
50
+ // ── Scheduled monitors (persisted; fire in-process while the server runs, or
51
+ // via `monitor:run-due` from system cron for guaranteed firing) ──────────
52
+
53
+ const emit = (obj) => process.stdout.write(JSON.stringify(obj, null, 2) + '\n');
54
+
55
+ program
56
+ .command('monitor:create <url>')
57
+ .description('Create a persisted scheduled monitor (optionally with a plain-English alert goal)')
58
+ .option('--every <seconds>', 'Polling interval in seconds', '3600')
59
+ .option('--goal <text>', 'Plain-English alert goal (LLM-judged; degrades to threshold if no LLM)')
60
+ .option('--webhook <url>', 'Webhook URL to notify on meaningful changes')
61
+ .option('--threshold <level>', 'Notification threshold: minor|moderate|major|critical', 'moderate')
62
+ .option('--cron <expr>', 'Optional cron expression (advanced)')
63
+ .option('--selector <css>', 'CSS selector to scope monitoring')
64
+ .action(async (url, opts) => {
65
+ const tool = new TrackChangesTool(getToolConfig('track_changes'));
66
+ try {
67
+ const res = await tool.execute({
68
+ url,
69
+ operation: 'create_scheduled_monitor',
70
+ ...(opts.selector ? { trackingOptions: { customSelectors: [opts.selector] } } : {}),
71
+ ...(opts.webhook ? { notificationOptions: { webhook: { enabled: true, url: opts.webhook } } } : {}),
72
+ scheduledMonitorOptions: {
73
+ interval: Math.max(parseInt(opts.every, 10), 60) * 1000,
74
+ ...(opts.goal ? { goal: opts.goal } : {}),
75
+ ...(opts.cron ? { schedule: opts.cron } : {}),
76
+ notificationThreshold: opts.threshold
77
+ }
78
+ });
79
+ emit(res);
80
+ process.exit(res.success ? 0 : 1);
81
+ } catch (err) {
82
+ process.stderr.write('Error: ' + err.message + '\n');
83
+ process.exit(1);
84
+ }
85
+ });
86
+
87
+ program
88
+ .command('monitor:list')
89
+ .description('List persisted scheduled monitors')
90
+ .action(async () => {
91
+ const tool = new TrackChangesTool(getToolConfig('track_changes'));
92
+ try {
93
+ emit(await tool.execute({ operation: 'list_scheduled_monitors' }));
94
+ process.exit(0);
95
+ } catch (err) {
96
+ process.stderr.write('Error: ' + err.message + '\n');
97
+ process.exit(1);
98
+ }
99
+ });
100
+
101
+ program
102
+ .command('monitor:stop <id>')
103
+ .description('Stop and remove a scheduled monitor by id')
104
+ .action(async (id) => {
105
+ const tool = new TrackChangesTool(getToolConfig('track_changes'));
106
+ try {
107
+ const res = await tool.execute({ operation: 'stop_scheduled_monitor', scheduledMonitorOptions: { monitorId: id } });
108
+ emit(res);
109
+ process.exit(res.success ? 0 : 1);
110
+ } catch (err) {
111
+ process.stderr.write('Error: ' + err.message + '\n');
112
+ process.exit(1);
113
+ }
114
+ });
115
+
116
+ program
117
+ .command('monitor:run-due')
118
+ .description('Fire every due scheduled monitor once and exit (wire into system cron for guaranteed firing)')
119
+ .action(async () => {
120
+ const tool = new TrackChangesTool(getToolConfig('track_changes'));
121
+ try {
122
+ const res = await tool.runDueOnce();
123
+ emit({ success: true, ...res });
124
+ process.exit(0);
125
+ } catch (err) {
126
+ process.stderr.write('Error: ' + err.message + '\n');
127
+ process.exit(1);
128
+ }
129
+ });
49
130
  }
@@ -1,13 +1,14 @@
1
1
  /**
2
2
  * uninstall-skills command -- remove CrawlForge skill files.
3
3
  */
4
- import { uninstall } from '../../skills/installer.js';
4
+ import { uninstall, uninstallHook } from '../../skills/installer.js';
5
5
 
6
6
  export function register(program) {
7
7
  program
8
8
  .command('uninstall-skills')
9
9
  .description('Remove CrawlForge skill files from Claude Code, Cursor, or VS Code')
10
10
  .option('--target <target>', 'Target: claude-code, cursor, vscode, or all', 'all')
11
+ .option('--remove-hook', 'Also remove the opt-in UserPromptSubmit reminder hook')
11
12
  .action(async (opts) => {
12
13
  try {
13
14
  const results = await uninstall({
@@ -15,6 +16,14 @@ export function register(program) {
15
16
  cwd: process.cwd()
16
17
  });
17
18
 
19
+ if (opts.removeHook) {
20
+ const hook = uninstallHook();
21
+ process.stdout.write(
22
+ (hook.removed ? 'Removed forced-eval hook: ' : 'No forced-eval hook found: ') +
23
+ hook.path + '\n'
24
+ );
25
+ }
26
+
18
27
  if (results.removed.length > 0) {
19
28
  process.stdout.write('Removed:\n');
20
29
  results.removed.forEach(p => process.stdout.write(' ' + p + '\n'));
@@ -6,6 +6,12 @@
6
6
  import { z } from 'zod';
7
7
  import BrowserProcessor from './processing/BrowserProcessor.js';
8
8
  import { EventEmitter } from 'events';
9
+ import { createHash } from 'node:crypto';
10
+
11
+ // executeJavaScript hardening limits (only relevant when the deploy-time flag
12
+ // ALLOW_JAVASCRIPT_EXECUTION=true is set; JS execution stays off by default).
13
+ const JS_MAX_SCRIPT_LENGTH = parseInt(process.env.JS_MAX_SCRIPT_LENGTH || '10000', 10);
14
+ const JS_EXECUTION_TIMEOUT_MS = parseInt(process.env.JS_EXECUTION_TIMEOUT_MS || '5000', 10);
9
15
 
10
16
  // Action schemas
11
17
  const BaseActionSchema = z.object({
@@ -715,17 +721,53 @@ export class ActionExecutor extends EventEmitter {
715
721
  );
716
722
  }
717
723
 
718
- // Log security warning when JS execution is enabled
719
- console.warn('⚠️ SECURITY WARNING: JavaScript execution is enabled. This allows arbitrary code execution!');
720
-
721
- const result = await page.evaluate(
722
- new Function('...args', action.script),
723
- ...action.args
724
+ const script = typeof action.script === 'string' ? action.script : '';
725
+ const args = Array.isArray(action.args) ? action.args : [];
726
+
727
+ // Defense-in-depth: bound script size before evaluating.
728
+ if (script.length > JS_MAX_SCRIPT_LENGTH) {
729
+ throw new Error(
730
+ `JavaScript execution rejected: script length ${script.length} exceeds limit of ${JS_MAX_SCRIPT_LENGTH} ` +
731
+ `(set JS_MAX_SCRIPT_LENGTH to raise it).`
732
+ );
733
+ }
734
+
735
+ // Structured audit log to stderr (stdout is reserved for the MCP JSON-RPC stream).
736
+ const scriptHash = createHash('sha256').update(script).digest('hex').slice(0, 16);
737
+ let targetUrl = 'unknown';
738
+ try { targetUrl = page.url(); } catch { /* page may be closed */ }
739
+ console.warn(
740
+ '[security] executeJavaScript ' + JSON.stringify({
741
+ ts: new Date().toISOString(),
742
+ url: targetUrl,
743
+ scriptSha256: scriptHash,
744
+ scriptLength: script.length,
745
+ argCount: args.length
746
+ })
724
747
  );
725
-
748
+
749
+ // Bound execution time independent of the generic per-action timeout.
750
+ let timer;
751
+ const timeout = new Promise((_, reject) => {
752
+ timer = setTimeout(
753
+ () => reject(new Error(`JavaScript execution timed out after ${JS_EXECUTION_TIMEOUT_MS}ms`)),
754
+ JS_EXECUTION_TIMEOUT_MS
755
+ );
756
+ });
757
+
758
+ let result;
759
+ try {
760
+ result = await Promise.race([
761
+ page.evaluate(new Function('...args', script), ...args),
762
+ timeout
763
+ ]);
764
+ } finally {
765
+ clearTimeout(timer);
766
+ }
767
+
726
768
  return {
727
- script: action.script,
728
- args: action.args,
769
+ script,
770
+ args,
729
771
  result: action.returnResult ? result : undefined
730
772
  };
731
773
  }
@@ -24,7 +24,16 @@ export class ElicitationHelper {
24
24
  * @returns {boolean}
25
25
  */
26
26
  get supported() {
27
- return !!(this._mcpServer?.server?.elicit);
27
+ const server = this._mcpServer?.server;
28
+ // The MCP SDK exposes elicitation via Server.elicitInput(); it is only
29
+ // usable when the connected CLIENT advertised the `elicitation` capability.
30
+ if (typeof server?.elicitInput !== 'function') return false;
31
+ try {
32
+ const caps = server.getClientCapabilities?.();
33
+ return !!caps?.elicitation;
34
+ } catch {
35
+ return false;
36
+ }
28
37
  }
29
38
 
30
39
  /**
@@ -48,7 +57,7 @@ export class ElicitationHelper {
48
57
  .join('\n');
49
58
  const fullMessage = detailLines ? `${message}\n\n${detailLines}` : message;
50
59
 
51
- const result = await this._mcpServer.server.elicit({
60
+ const result = await this._mcpServer.server.elicitInput({
52
61
  message: fullMessage,
53
62
  requestedSchema: {
54
63
  type: 'object',
@@ -63,7 +72,8 @@ export class ElicitationHelper {
63
72
  },
64
73
  });
65
74
 
66
- return result?.content?.confirmed === true;
75
+ // Only an explicit accept + confirmed=true proceeds; decline/cancel = stop.
76
+ return result?.action === 'accept' && result?.content?.confirmed === true;
67
77
  } catch (err) {
68
78
  this._logger.warn('Elicitation request failed — proceeding without confirmation', { error: err.message });
69
79
  return true; // fail-open
@@ -87,7 +97,7 @@ export class ElicitationHelper {
87
97
  }
88
98
 
89
99
  try {
90
- const result = await this._mcpServer.server.elicit({
100
+ const result = await this._mcpServer.server.elicitInput({
91
101
  message,
92
102
  requestedSchema: {
93
103
  type: 'object',
@@ -103,7 +113,10 @@ export class ElicitationHelper {
103
113
  },
104
114
  });
105
115
 
106
- return result?.content?.[fieldName] || defaultValue || null;
116
+ if (result?.action === 'accept' && result?.content?.[fieldName] != null) {
117
+ return result.content[fieldName];
118
+ }
119
+ return defaultValue || null;
107
120
  } catch (err) {
108
121
  this._logger.warn('Elicitation request failed', { error: err.message });
109
122
  return defaultValue || null;
@@ -4,6 +4,7 @@ import { MapSiteTool } from '../tools/crawl/mapSite.js';
4
4
  import { CrawlDeepTool } from '../tools/crawl/crawlDeep.js';
5
5
  import { normalizeUrl, getBaseUrl } from '../utils/urlNormalizer.js';
6
6
  import { Logger } from '../utils/Logger.js';
7
+ import { safeFetch } from '../utils/ssrfGuard.js';
7
8
 
8
9
  const logger = new Logger('LLMsTxtAnalyzer');
9
10
 
@@ -442,7 +443,7 @@ export class LLMsTxtAnalyzer {
442
443
  const timeoutId = setTimeout(() => controller.abort(), timeout);
443
444
 
444
445
  try {
445
- const response = await fetch(url, {
446
+ const response = await safeFetch(url, {
446
447
  signal: controller.signal,
447
448
  headers: {
448
449
  'User-Agent': this.options.userAgent