crawlforge-mcp-server 4.7.1 → 4.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CLAUDE.md +2 -2
  2. package/package.json +2 -1
  3. package/server.js +56 -10
  4. package/src/cli/commands/init.js +13 -2
  5. package/src/cli/commands/install-skills.js +10 -1
  6. package/src/cli/commands/monitor.js +81 -0
  7. package/src/cli/commands/uninstall-skills.js +10 -1
  8. package/src/core/ActionExecutor.js +81 -15
  9. package/src/core/ElicitationHelper.js +18 -5
  10. package/src/core/LLMsTxtAnalyzer.js +2 -1
  11. package/src/core/MonitorScheduler.js +281 -0
  12. package/src/core/MonitorStore.js +79 -0
  13. package/src/core/ResearchOrchestrator.js +2 -1
  14. package/src/core/crawlers/BFSCrawler.js +2 -1
  15. package/src/resources/ResourceRegistry.js +3 -0
  16. package/src/skills/agent-skills/crawlforge-batch-automation/SKILL.md +126 -0
  17. package/src/skills/agent-skills/crawlforge-batch-automation/references/actions.md +127 -0
  18. package/src/skills/agent-skills/crawlforge-change-tracking/SKILL.md +116 -0
  19. package/src/skills/agent-skills/crawlforge-deep-research/SKILL.md +108 -0
  20. package/src/skills/agent-skills/crawlforge-deep-research/references/workflows.md +76 -0
  21. package/src/skills/agent-skills/crawlforge-getting-started/SKILL.md +89 -0
  22. package/src/skills/agent-skills/crawlforge-getting-started/references/cli.md +71 -0
  23. package/src/skills/agent-skills/crawlforge-getting-started/references/credits.md +75 -0
  24. package/src/skills/agent-skills/crawlforge-stealth-browsing/SKILL.md +106 -0
  25. package/src/skills/agent-skills/crawlforge-stealth-browsing/references/engine-selection.md +63 -0
  26. package/src/skills/agent-skills/crawlforge-structured-extraction/SKILL.md +121 -0
  27. package/src/skills/agent-skills/crawlforge-structured-extraction/references/templates.md +39 -0
  28. package/src/skills/agent-skills/crawlforge-web-scraping/SKILL.md +141 -0
  29. package/src/skills/agent-skills/crawlforge-web-scraping/references/tool-reference.md +95 -0
  30. package/src/skills/installer.js +186 -34
  31. package/src/tools/advanced/ScrapeWithActionsTool.js +7 -0
  32. package/src/tools/advanced/batchScrape/worker.js +8 -2
  33. package/src/tools/basic/_fetch.js +14 -1
  34. package/src/tools/crawl/_sessionContext.js +3 -1
  35. package/src/tools/extract/_fetchAndParse.js +2 -1
  36. package/src/tools/extract/extractContent.js +2 -1
  37. package/src/tools/extract/extractStructured.js +43 -0
  38. package/src/tools/extract/processDocument.js +2 -1
  39. package/src/tools/scrape/_brandingExtractor.js +378 -0
  40. package/src/tools/scrape/unifiedScrape.js +66 -6
  41. package/src/tools/templates/ScrapeTemplateTool.js +2 -1
  42. package/src/tools/tracking/trackChanges/differ.js +3 -1
  43. package/src/tools/tracking/trackChanges/index.js +74 -21
  44. package/src/tools/tracking/trackChanges/schema.js +7 -2
  45. package/src/utils/hostRateLimiter.js +46 -0
  46. package/src/utils/robotsChecker.js +2 -1
  47. package/src/utils/sitemapParser.js +2 -1
  48. package/src/utils/ssrfGuard.js +161 -0
  49. package/src/utils/ssrfProtection.js +6 -9
  50. package/src/skills/crawlforge-cli.md +0 -157
  51. package/src/skills/crawlforge-mcp.md +0 -80
  52. package/src/skills/crawlforge-research.md +0 -104
  53. package/src/skills/crawlforge-stealth.md +0 -98
package/CLAUDE.md CHANGED
@@ -62,7 +62,7 @@ These guidelines are working if: fewer unnecessary changes in diffs, fewer rewri
62
62
 
63
63
  CrawlForge MCP Server - A professional MCP (Model Context Protocol) server providing 26 web scraping, crawling, and content processing tools (5 inline + 21 advanced).
64
64
 
65
- **Current Version:** 4.6.0
65
+ **Current Version:** 4.8.0
66
66
 
67
67
  ## Development Commands
68
68
 
@@ -241,7 +241,7 @@ When adding a new tool to server.js:
241
241
 
242
242
  Key mechanisms for security-conscious future sessions:
243
243
 
244
- - **SSRF** (`src/utils/ssrfProtection.js`): Every scraped URL validatedhttp/https only; blocks loopback, RFC1918, IPv6 ULA/link-local, cloud metadata endpoints; blocks dangerous ports (22, 25, 53, 445, 3306, 5432, 6379, 27017, etc.); redirects re-validated per hop, capped at 5; pre-parse path-traversal rejection. Blocklist-based no per-deployment outbound allowlist.
244
+ - **SSRF** (`src/utils/ssrfGuard.js` enforcing `src/utils/ssrfProtection.js`): As of v4.8.0 SSRF is actually enforced on the live scraping fetch path (previously `ssrfProtection.js` was unwired). `ssrfGuard` injects an undici dispatcher whose connect-time `lookup` validates every connection initial request and each redirect hop — and pins the validated IP (closing the DNS-rebinding TOCTOU window). Stage 1 (default) blocks loopback, link-local/cloud-metadata (169.254.169.254), and 0.0.0.0; `SSRF_STRICT=true` adds full RFC1918/ULA/etc. enforcement. Kill switch `SSRF_PROTECTION_ENABLED=false`; `ALLOWED_DOMAINS` allowlist bypass. Wired into every read-scrape site (`_fetch.js`, `_fetchAndParse.js`, `batchScrape/worker.js`, `mapSite.js`, `BFSCrawler.js`, extract/template/session/research/llms-txt/robots/sitemap/track-changes fetches) via `ssrfGuard()`/`safeFetch()`. http/https only; dangerous ports + path-traversal still rejected by `ssrfProtection.js`.
245
245
  - **endpointGuard** (`src/core/endpointGuard.js`): Hard allow-list of `{crawlforge.dev, www.crawlforge.dev, api.crawlforge.dev}` for the server's own backend calls; HTTPS required; fail-closed. Localhost only in creator mode (v3.0.18).
246
246
  - **Action allowlist** (`src/core/ActionExecutor.js`): `scrape_with_actions` accepts only 7 action types: `wait`, `click`, `type`, `press`, `scroll`, `screenshot`, `executeJavaScript`. `executeJavaScript` throws unless `ALLOW_JAVASCRIPT_EXECUTION=true` is set at deploy time (off by default).
247
247
  - **Elicitation** (`src/core/ElicitationHelper.js`): User confirmation requested for `deep_research` (>50 URLs), `batch_scrape` (sync, >25 URLs), `crawl_deep` (projected >500 pages), `extract_structured` (schema has >3 required fields, no LLM configured), and credit-low situations. Fail-open if client does not support elicitation.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawlforge-mcp-server",
3
- "version": "4.7.1",
3
+ "version": "4.8.0",
4
4
  "mcpName": "io.github.mysleekdesigns/crawlforge-mcp-server",
5
5
  "description": "CrawlForge MCP Server - Professional Model Context Protocol server with 26 web scraping, crawling, deep-research, and autonomous-extraction tools. Returns clean Markdown and structured JSON for Claude, Cursor, and any MCP client. Defaults to local Ollama for LLM extraction (no API key needed); OpenAI/Anthropic available as opt-in. Includes a unified multi-format scrape tool, an autonomous agent, pre-built site templates, and Camoufox stealth browsing.",
6
6
  "main": "server.js",
@@ -22,6 +22,7 @@
22
22
  "test:tools": "node test-tools.js",
23
23
  "test:real-world": "node test-real-world.js",
24
24
  "test:all": "bash run-all-tests.sh",
25
+ "skills:gen": "node scripts/generate-skill-md.mjs",
25
26
  "postinstall": "echo '\nCrawlForge MCP Server installed!\n\nQuick start: run \"npx crawlforge init\" to configure your API key, install skills, and register the MCP server with your AI clients.\nOr run \"npx crawlforge-setup\" to configure your API key only.\n'",
26
27
  "docker:build": "docker build -t crawlforge .",
27
28
  "docker:dev": "docker-compose up crawlforge-dev",
package/server.js CHANGED
@@ -89,7 +89,7 @@ if (configErrors.length > 0 && config.server.nodeEnv === 'production') {
89
89
  // Create the server
90
90
  const server = new McpServer({
91
91
  name: "crawlforge",
92
- version: "4.7.1",
92
+ version: "4.7.2",
93
93
  description: "Production-ready MCP server with 26 web scraping, crawling, and content processing tools. Features MCP Resources (crawlforge://), Prompts, Sampling fallback, Elicitation, stealth browsing, deep research, structured extraction, change tracking, local-LLM extraction via Ollama, unified multi-format scrape, and autonomous agent tool.",
94
94
  homepage: "https://www.crawlforge.dev",
95
95
  icon: "https://www.crawlforge.dev/icon.png"
@@ -154,7 +154,7 @@ const deepResearchTool = new DeepResearchTool();
154
154
  const trackChangesTool = new TrackChangesTool();
155
155
  const generateLLMsTxtTool = new GenerateLLMsTxtTool();
156
156
  const scrapeTemplateTool = new ScrapeTemplateTool(); // D3.3
157
- const unifiedScrapeTool = new UnifiedScrapeTool(); // D4 D1
157
+ const unifiedScrapeTool = new UnifiedScrapeTool({ actionExecutor: scrapeWithActionsTool.actionExecutor }); // D4 D1 (+v4.8 screenshot reuses the shared browser pool)
158
158
  const agentTool = new AgentTool(); // D4 D2
159
159
  const stealthBrowserManager = new StealthBrowserManager();
160
160
  const localizationManager = new LocalizationManager();
@@ -177,6 +177,7 @@ batchScrapeTool.setMcpServer(server);
177
177
  crawlDeepTool.setMcpServer(server);
178
178
  extractStructuredTool.setMcpServer(server);
179
179
  agentTool.setMcpServer(server); // D4 D2: SamplingClient + Elicitation
180
+ trackChangesTool.setMcpServer(server); // v4.8: SamplingClient for scheduled-monitor goal judging
180
181
  AuthManager.setElicitation(elicitation);
181
182
 
182
183
  // ─── D1.1 Resource Templates (MCP Resources) ─────────────────────────────────
@@ -736,6 +737,19 @@ server.registerTool("scrape_with_actions", {
736
737
  }, withAuth("scrape_with_actions", async (params) => {
737
738
  try {
738
739
  const result = await scrapeWithActionsTool.execute(params);
740
+
741
+ // Publish captured screenshots as crawlforge://screenshot/{actionId}
742
+ // resources (the documented contract) and annotate each with its URI.
743
+ if (Array.isArray(result.screenshots)) {
744
+ result.screenshots = result.screenshots.map((shot) => {
745
+ if (shot?.actionId && shot?.data) {
746
+ resourceRegistry.storeScreenshot(shot.actionId, shot.data);
747
+ return { ...shot, resourceUri: `crawlforge://screenshot/${shot.actionId}` };
748
+ }
749
+ return shot;
750
+ });
751
+ }
752
+
739
753
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
740
754
  } catch (error) {
741
755
  return { content: [{ type: "text", text: `Scrape with actions failed: ${error.message}` }], isError: true };
@@ -800,12 +814,12 @@ server.registerTool("deep_research", {
800
814
 
801
815
  // Tool: scrape (D4 D1 — unified multi-format single-fetch)
802
816
  server.registerTool("scrape", {
803
- description: "Use this when you need multiple content formats from a single URL in one call — e.g. markdown + links + metadata together. One fetch, no N-request fan-out. Formats: \"markdown\", \"html\", \"rawHtml\", \"text\", \"links\", \"metadata\", or {type:\"json\",schema,prompt} for LLM-structured extraction. onlyMainContent:true (default) strips boilerplate via Readability. Partial success: per-format warnings never fail the whole call. Example: scrape({url:\"https://example.com\", formats:[\"markdown\",\"links\",\"metadata\"]})",
817
+ description: "Use this when you need multiple content formats from a single URL in one call — e.g. markdown + links + metadata together. One fetch, no N-request fan-out. Formats: \"markdown\", \"html\", \"rawHtml\", \"text\", \"links\", \"metadata\", \"branding\" (static design tokens: colors, fonts, logo), \"screenshot\" (renders in a browser, returns crawlforge://screenshot/{id} resources), or {type:\"json\",schema,prompt} for LLM-structured extraction. onlyMainContent:true (default) strips boilerplate via Readability. Partial success: per-format warnings never fail the whole call. Example: scrape({url:\"https://example.com\", formats:[\"markdown\",\"links\",\"branding\"]})",
804
818
  annotations: { title: "Scrape (Multi-Format)", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
805
819
  inputSchema: {
806
820
  url: z.string().url().describe("The URL to scrape"),
807
821
  formats: z.array(z.union([
808
- z.enum(["markdown", "html", "rawHtml", "text", "links", "metadata", "screenshot"]),
822
+ z.enum(["markdown", "html", "rawHtml", "text", "links", "metadata", "screenshot", "branding"]),
809
823
  z.object({
810
824
  type: z.literal("json"),
811
825
  schema: z.record(z.any()).optional().describe("JSON schema for extraction"),
@@ -813,11 +827,31 @@ server.registerTool("scrape", {
813
827
  })
814
828
  ])).min(1).optional().default(["markdown"]).describe("Formats to return (default: [\"markdown\"])"),
815
829
  onlyMainContent: z.boolean().optional().default(true).describe("Strip boilerplate via Readability (default: true)"),
816
- timeoutMs: z.number().min(1000).max(60000).optional().default(15000).describe("Fetch timeout in ms")
830
+ timeoutMs: z.number().min(1000).max(60000).optional().default(15000).describe("Fetch timeout in ms"),
831
+ brandingOptions: z.object({
832
+ fetchLinkedCss: z.boolean().optional().default(true).describe("Fetch linked stylesheets for richer color/font extraction"),
833
+ maxStylesheets: z.number().min(0).max(20).optional().default(10).describe("Max linked stylesheets to fetch")
834
+ }).optional().describe("Options for the \"branding\" format"),
835
+ screenshotOptions: z.object({
836
+ fullPage: z.boolean().optional().default(false).describe("Capture the full scrollable page"),
837
+ format: z.enum(["png", "jpeg"]).optional().default("png"),
838
+ quality: z.number().min(0).max(100).optional().describe("JPEG quality (jpeg only)")
839
+ }).optional().describe("Options for the \"screenshot\" format")
817
840
  }
818
841
  }, withAuth("scrape", async (params) => {
819
842
  try {
820
843
  const result = await unifiedScrapeTool.execute(params);
844
+ // Publish any captured screenshots as crawlforge://screenshot/{actionId}
845
+ // resources and annotate each with its URI (mirrors scrape_with_actions).
846
+ if (Array.isArray(result?.content?.screenshots)) {
847
+ result.content.screenshots = result.content.screenshots.map((shot) => {
848
+ if (shot?.actionId && shot?.data) {
849
+ resourceRegistry.storeScreenshot(shot.actionId, shot.data);
850
+ return { ...shot, resourceUri: `crawlforge://screenshot/${shot.actionId}` };
851
+ }
852
+ return shot;
853
+ });
854
+ }
821
855
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
822
856
  } catch (error) {
823
857
  return { content: [{ type: "text", text: `Scrape failed: ${error.message}` }], isError: true };
@@ -850,10 +884,10 @@ server.registerTool("track_changes", {
850
884
  description: "Use this when you need to monitor a URL for content changes over time — e.g. competitor pricing, regulation updates, product availability. Start with operation:\"create_baseline\", then periodically use operation:\"compare\" to diff. Supports webhooks and scheduled monitoring. Example: track_changes({url: \"https://example.com/pricing\", operation: \"create_baseline\"})",
851
885
  annotations: { title: "Track Changes", readOnlyHint: false, destructiveHint: false, idempotentHint: false, openWorldHint: true },
852
886
  inputSchema: {
853
- url: z.string().url().describe("The URL to track changes for"),
887
+ url: z.string().url().optional().describe("The URL to track changes for (optional for list_scheduled_monitors)"),
854
888
  operation: z.enum([
855
889
  'create_baseline', 'compare', 'monitor', 'get_history', 'get_stats',
856
- 'create_scheduled_monitor', 'stop_scheduled_monitor', 'get_dashboard',
890
+ 'create_scheduled_monitor', 'stop_scheduled_monitor', 'list_scheduled_monitors', 'get_dashboard',
857
891
  'export_history', 'create_alert_rule', 'generate_trend_report', 'get_monitoring_templates'
858
892
  ]).default('compare').describe("Tracking operation to perform"),
859
893
  content: z.string().optional().describe("Content to compare against baseline"),
@@ -917,10 +951,14 @@ server.registerTool("track_changes", {
917
951
  }).optional()
918
952
  }).optional().describe("Notification configuration for webhooks and Slack"),
919
953
  scheduledMonitorOptions: z.object({
920
- schedule: z.string().optional(),
954
+ schedule: z.string().optional().describe("Optional cron expression (power users)"),
921
955
  templateId: z.string().optional(),
922
- enabled: z.boolean().default(true)
923
- }).optional().describe("Scheduled monitoring options with cron expressions"),
956
+ enabled: z.boolean().default(true),
957
+ interval: z.number().min(60000).optional().describe("Polling interval in ms (default 1h)"),
958
+ goal: z.string().optional().describe("Plain-English alert goal; an LLM judges whether a change matches (degrades to threshold if no LLM)"),
959
+ monitorId: z.string().optional().describe("Monitor id for stop_scheduled_monitor"),
960
+ notificationThreshold: z.enum(['minor', 'moderate', 'major', 'critical']).optional()
961
+ }).optional().describe("Scheduled monitoring: recurring compare + notify, optional plain-English goal"),
924
962
  alertRuleOptions: z.object({
925
963
  ruleId: z.string().optional(),
926
964
  condition: z.string().optional(),
@@ -1258,6 +1296,14 @@ async function runServer() {
1258
1296
  await connectStdio(server);
1259
1297
  }
1260
1298
 
1299
+ // v4.8: start the scheduled-monitor engine (loads persisted monitors, catches
1300
+ // up any due runs). Best-effort — a scheduler failure must not block startup.
1301
+ try {
1302
+ await trackChangesTool.startScheduler();
1303
+ } catch (err) {
1304
+ console.error('Scheduled-monitor engine failed to start:', err.message);
1305
+ }
1306
+
1261
1307
  console.error(`Environment: ${config.server.nodeEnv}`);
1262
1308
  console.error("Search enabled: true (via CrawlForge proxy)");
1263
1309
 
@@ -3,7 +3,7 @@
3
3
  */
4
4
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs';
5
5
  import { join } from 'node:path';
6
- import { install } from '../../skills/installer.js';
6
+ import { install, installHook } from '../../skills/installer.js';
7
7
 
8
8
  const HOME = process.env.HOME || process.env.USERPROFILE || '';
9
9
 
@@ -61,6 +61,7 @@ export function register(program) {
61
61
  .description('Set up CrawlForge: verify API key, install skills, and register the MCP server with your AI clients')
62
62
  .option('--all', 'Install skills to all targets and register all detected client configs')
63
63
  .option('--client <name>', 'Target client to register: claude-code, claude-desktop, or cursor')
64
+ .option('--with-hook', 'Add an opt-in UserPromptSubmit reminder to boost skill auto-activation')
64
65
  .option('--yes', 'Non-interactive — assume yes to all prompts')
65
66
  .action(async (opts) => {
66
67
  const out = (msg) => process.stderr.write(msg + '\n');
@@ -80,7 +81,7 @@ export function register(program) {
80
81
  try {
81
82
  const results = await install({ target: skillTarget, force: false, cwd: process.cwd() });
82
83
  if (results.installed.length > 0) {
83
- out('Skills installed: ' + results.installed.length + ' file(s)');
84
+ out('Skills installed: ' + results.installed.length + ' skill(s)');
84
85
  } else {
85
86
  out('Skills: already up to date (use crawlforge install-skills --force to overwrite)');
86
87
  }
@@ -88,6 +89,16 @@ export function register(program) {
88
89
  out('Warning: skill install failed — ' + err.message);
89
90
  }
90
91
 
92
+ // 2b. Optional forced-eval hook (opt-in)
93
+ if (opts.withHook) {
94
+ try {
95
+ const hook = installHook();
96
+ out(hook.added ? 'Forced-eval hook added: ' + hook.path : 'Forced-eval hook already present');
97
+ } catch (err) {
98
+ out('Warning: could not add forced-eval hook — ' + err.message);
99
+ }
100
+ }
101
+
91
102
  // 3. MCP stanza merge
92
103
  const clientFilter = opts.client || (opts.all ? undefined : 'claude-code');
93
104
  const targets = resolveClientPaths(clientFilter);
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * install-skills command -- install CrawlForge skill files into AI coding tools.
3
3
  */
4
- import { install } from '../../skills/installer.js';
4
+ import { install, installHook } from '../../skills/installer.js';
5
5
 
6
6
  export function register(program) {
7
7
  program
@@ -10,6 +10,7 @@ export function register(program) {
10
10
  .option('--target <target>', 'Target: claude-code, cursor, vscode, or all', 'all')
11
11
  .option('--force', 'Overwrite existing skill files')
12
12
  .option('--dry-run', 'Show what would be installed without writing files')
13
+ .option('--with-hook', 'Also add an opt-in UserPromptSubmit reminder to boost skill auto-activation')
13
14
  .action(async (opts) => {
14
15
  try {
15
16
  const results = await install({
@@ -19,6 +20,14 @@ export function register(program) {
19
20
  cwd: process.cwd()
20
21
  });
21
22
 
23
+ if (opts.withHook && !opts.dryRun) {
24
+ const hook = installHook();
25
+ process.stdout.write(
26
+ (hook.added ? 'Added forced-eval hook: ' : 'Forced-eval hook already present: ') +
27
+ hook.path + '\n'
28
+ );
29
+ }
30
+
22
31
  if (opts.dryRun) {
23
32
  process.stdout.write('Dry run -- would install to:\n');
24
33
  results.paths.forEach(p => process.stdout.write(' ' + p + '\n'));
@@ -46,4 +46,85 @@ export function register(program) {
46
46
  // monitor runs continuously — do not auto-exit after the first result.
47
47
  await runTool(wrapperTool, params, cliFlags, { exitOnSuccess: false });
48
48
  });
49
+
50
+ // ── Scheduled monitors (persisted; fire in-process while the server runs, or
51
+ // via `monitor:run-due` from system cron for guaranteed firing) ──────────
52
+
53
+ const emit = (obj) => process.stdout.write(JSON.stringify(obj, null, 2) + '\n');
54
+
55
+ program
56
+ .command('monitor:create <url>')
57
+ .description('Create a persisted scheduled monitor (optionally with a plain-English alert goal)')
58
+ .option('--every <seconds>', 'Polling interval in seconds', '3600')
59
+ .option('--goal <text>', 'Plain-English alert goal (LLM-judged; degrades to threshold if no LLM)')
60
+ .option('--webhook <url>', 'Webhook URL to notify on meaningful changes')
61
+ .option('--threshold <level>', 'Notification threshold: minor|moderate|major|critical', 'moderate')
62
+ .option('--cron <expr>', 'Optional cron expression (advanced)')
63
+ .option('--selector <css>', 'CSS selector to scope monitoring')
64
+ .action(async (url, opts) => {
65
+ const tool = new TrackChangesTool(getToolConfig('track_changes'));
66
+ try {
67
+ const res = await tool.execute({
68
+ url,
69
+ operation: 'create_scheduled_monitor',
70
+ ...(opts.selector ? { trackingOptions: { customSelectors: [opts.selector] } } : {}),
71
+ ...(opts.webhook ? { notificationOptions: { webhook: { enabled: true, url: opts.webhook } } } : {}),
72
+ scheduledMonitorOptions: {
73
+ interval: Math.max(parseInt(opts.every, 10), 60) * 1000,
74
+ ...(opts.goal ? { goal: opts.goal } : {}),
75
+ ...(opts.cron ? { schedule: opts.cron } : {}),
76
+ notificationThreshold: opts.threshold
77
+ }
78
+ });
79
+ emit(res);
80
+ process.exit(res.success ? 0 : 1);
81
+ } catch (err) {
82
+ process.stderr.write('Error: ' + err.message + '\n');
83
+ process.exit(1);
84
+ }
85
+ });
86
+
87
+ program
88
+ .command('monitor:list')
89
+ .description('List persisted scheduled monitors')
90
+ .action(async () => {
91
+ const tool = new TrackChangesTool(getToolConfig('track_changes'));
92
+ try {
93
+ emit(await tool.execute({ operation: 'list_scheduled_monitors' }));
94
+ process.exit(0);
95
+ } catch (err) {
96
+ process.stderr.write('Error: ' + err.message + '\n');
97
+ process.exit(1);
98
+ }
99
+ });
100
+
101
+ program
102
+ .command('monitor:stop <id>')
103
+ .description('Stop and remove a scheduled monitor by id')
104
+ .action(async (id) => {
105
+ const tool = new TrackChangesTool(getToolConfig('track_changes'));
106
+ try {
107
+ const res = await tool.execute({ operation: 'stop_scheduled_monitor', scheduledMonitorOptions: { monitorId: id } });
108
+ emit(res);
109
+ process.exit(res.success ? 0 : 1);
110
+ } catch (err) {
111
+ process.stderr.write('Error: ' + err.message + '\n');
112
+ process.exit(1);
113
+ }
114
+ });
115
+
116
+ program
117
+ .command('monitor:run-due')
118
+ .description('Fire every due scheduled monitor once and exit (wire into system cron for guaranteed firing)')
119
+ .action(async () => {
120
+ const tool = new TrackChangesTool(getToolConfig('track_changes'));
121
+ try {
122
+ const res = await tool.runDueOnce();
123
+ emit({ success: true, ...res });
124
+ process.exit(0);
125
+ } catch (err) {
126
+ process.stderr.write('Error: ' + err.message + '\n');
127
+ process.exit(1);
128
+ }
129
+ });
49
130
  }
@@ -1,13 +1,14 @@
1
1
  /**
2
2
  * uninstall-skills command -- remove CrawlForge skill files.
3
3
  */
4
- import { uninstall } from '../../skills/installer.js';
4
+ import { uninstall, uninstallHook } from '../../skills/installer.js';
5
5
 
6
6
  export function register(program) {
7
7
  program
8
8
  .command('uninstall-skills')
9
9
  .description('Remove CrawlForge skill files from Claude Code, Cursor, or VS Code')
10
10
  .option('--target <target>', 'Target: claude-code, cursor, vscode, or all', 'all')
11
+ .option('--remove-hook', 'Also remove the opt-in UserPromptSubmit reminder hook')
11
12
  .action(async (opts) => {
12
13
  try {
13
14
  const results = await uninstall({
@@ -15,6 +16,14 @@ export function register(program) {
15
16
  cwd: process.cwd()
16
17
  });
17
18
 
19
+ if (opts.removeHook) {
20
+ const hook = uninstallHook();
21
+ process.stdout.write(
22
+ (hook.removed ? 'Removed forced-eval hook: ' : 'No forced-eval hook found: ') +
23
+ hook.path + '\n'
24
+ );
25
+ }
26
+
18
27
  if (results.removed.length > 0) {
19
28
  process.stdout.write('Removed:\n');
20
29
  results.removed.forEach(p => process.stdout.write(' ' + p + '\n'));
@@ -6,6 +6,12 @@
6
6
  import { z } from 'zod';
7
7
  import BrowserProcessor from './processing/BrowserProcessor.js';
8
8
  import { EventEmitter } from 'events';
9
+ import { createHash } from 'node:crypto';
10
+
11
+ // executeJavaScript hardening limits (only relevant when the deploy-time flag
12
+ // ALLOW_JAVASCRIPT_EXECUTION=true is set; JS execution stays off by default).
13
+ const JS_MAX_SCRIPT_LENGTH = parseInt(process.env.JS_MAX_SCRIPT_LENGTH || '10000', 10);
14
+ const JS_EXECUTION_TIMEOUT_MS = parseInt(process.env.JS_EXECUTION_TIMEOUT_MS || '5000', 10);
9
15
 
10
16
  // Action schemas
11
17
  const BaseActionSchema = z.object({
@@ -23,8 +29,8 @@ const WaitActionSchema = BaseActionSchema.extend({
23
29
  selector: z.string().optional(),
24
30
  condition: z.enum(['visible', 'hidden', 'enabled', 'disabled', 'stable']).optional(),
25
31
  text: z.string().optional()
26
- }).refine(data => data.duration || data.milliseconds || data.selector || data.text, {
27
- message: 'Wait action requires duration/milliseconds, selector, or text'
32
+ }).refine(data => data.duration || data.milliseconds || data.timeout || data.selector || data.text, {
33
+ message: 'Wait action requires duration/milliseconds/timeout, selector, or text'
28
34
  });
29
35
 
30
36
  const ClickActionSchema = BaseActionSchema.extend({
@@ -329,6 +335,18 @@ export class ActionExecutor extends EventEmitter {
329
335
  executionContext.results.push(actionResult);
330
336
  this.stats.totalActions++;
331
337
 
338
+ // Collect screenshots produced by successful screenshot actions so
339
+ // they surface in the tool result (not just error screenshots).
340
+ if (actionResult.success && action.type === 'screenshot' && actionResult.result?.data) {
341
+ executionContext.screenshots.push({
342
+ actionId: actionResult.id,
343
+ data: actionResult.result.data,
344
+ format: actionResult.result.format,
345
+ fullPage: actionResult.result.fullPage,
346
+ timestamp: actionResult.timestamp
347
+ });
348
+ }
349
+
332
350
  if (actionResult.success) {
333
351
  this.stats.successfulActions++;
334
352
  } else {
@@ -382,7 +400,16 @@ export class ActionExecutor extends EventEmitter {
382
400
  this.emit('actionStarted', { actionId, action, chainId: executionContext.id });
383
401
 
384
402
  let result;
385
- const timeout = action.timeout || this.defaultTimeout;
403
+ let timeout = action.timeout || this.defaultTimeout;
404
+
405
+ // A `wait` action that uses `timeout` as its pause duration (no
406
+ // duration/milliseconds/selector/text) must not also use that same value
407
+ // as its abort deadline, or the abort would race the wait. Give headroom.
408
+ if (action.type === 'wait' &&
409
+ !action.duration && !action.milliseconds && !action.selector && !action.text &&
410
+ action.timeout) {
411
+ timeout = Math.max(this.defaultTimeout, action.timeout + 5000);
412
+ }
386
413
 
387
414
  // Execute based on action type with timeout
388
415
  const executionPromise = this.executeActionByType(page, action);
@@ -467,8 +494,11 @@ export class ActionExecutor extends EventEmitter {
467
494
  * @returns {Promise<Object>} Wait result
468
495
  */
469
496
  async executeWaitAction(page, action) {
470
- // Handle both 'duration' and 'milliseconds' for backwards compatibility
471
- const waitTime = action.duration || action.milliseconds;
497
+ // Handle 'duration'/'milliseconds' (and 'timeout' as a pause duration only
498
+ // when no selector/text is given — selector/text waits use 'timeout' as
499
+ // their abort deadline instead).
500
+ const waitTime = action.duration || action.milliseconds ||
501
+ (!action.selector && !action.text ? action.timeout : undefined);
472
502
  if (waitTime) {
473
503
  await this.delay(waitTime);
474
504
  return { waited: waitTime };
@@ -492,7 +522,7 @@ export class ActionExecutor extends EventEmitter {
492
522
  return { text: action.text };
493
523
  }
494
524
 
495
- throw new Error('Wait action requires duration, selector, or text');
525
+ throw new Error('Wait action requires duration/milliseconds/timeout, selector, or text');
496
526
  }
497
527
 
498
528
  /**
@@ -691,17 +721,53 @@ export class ActionExecutor extends EventEmitter {
691
721
  );
692
722
  }
693
723
 
694
- // Log security warning when JS execution is enabled
695
- console.warn('⚠️ SECURITY WARNING: JavaScript execution is enabled. This allows arbitrary code execution!');
696
-
697
- const result = await page.evaluate(
698
- new Function('...args', action.script),
699
- ...action.args
724
+ const script = typeof action.script === 'string' ? action.script : '';
725
+ const args = Array.isArray(action.args) ? action.args : [];
726
+
727
+ // Defense-in-depth: bound script size before evaluating.
728
+ if (script.length > JS_MAX_SCRIPT_LENGTH) {
729
+ throw new Error(
730
+ `JavaScript execution rejected: script length ${script.length} exceeds limit of ${JS_MAX_SCRIPT_LENGTH} ` +
731
+ `(set JS_MAX_SCRIPT_LENGTH to raise it).`
732
+ );
733
+ }
734
+
735
+ // Structured audit log to stderr (stdout is reserved for the MCP JSON-RPC stream).
736
+ const scriptHash = createHash('sha256').update(script).digest('hex').slice(0, 16);
737
+ let targetUrl = 'unknown';
738
+ try { targetUrl = page.url(); } catch { /* page may be closed */ }
739
+ console.warn(
740
+ '[security] executeJavaScript ' + JSON.stringify({
741
+ ts: new Date().toISOString(),
742
+ url: targetUrl,
743
+ scriptSha256: scriptHash,
744
+ scriptLength: script.length,
745
+ argCount: args.length
746
+ })
700
747
  );
701
-
748
+
749
+ // Bound execution time independent of the generic per-action timeout.
750
+ let timer;
751
+ const timeout = new Promise((_, reject) => {
752
+ timer = setTimeout(
753
+ () => reject(new Error(`JavaScript execution timed out after ${JS_EXECUTION_TIMEOUT_MS}ms`)),
754
+ JS_EXECUTION_TIMEOUT_MS
755
+ );
756
+ });
757
+
758
+ let result;
759
+ try {
760
+ result = await Promise.race([
761
+ page.evaluate(new Function('...args', script), ...args),
762
+ timeout
763
+ ]);
764
+ } finally {
765
+ clearTimeout(timer);
766
+ }
767
+
702
768
  return {
703
- script: action.script,
704
- args: action.args,
769
+ script,
770
+ args,
705
771
  result: action.returnResult ? result : undefined
706
772
  };
707
773
  }
@@ -24,7 +24,16 @@ export class ElicitationHelper {
24
24
  * @returns {boolean}
25
25
  */
26
26
  get supported() {
27
- return !!(this._mcpServer?.server?.elicit);
27
+ const server = this._mcpServer?.server;
28
+ // The MCP SDK exposes elicitation via Server.elicitInput(); it is only
29
+ // usable when the connected CLIENT advertised the `elicitation` capability.
30
+ if (typeof server?.elicitInput !== 'function') return false;
31
+ try {
32
+ const caps = server.getClientCapabilities?.();
33
+ return !!caps?.elicitation;
34
+ } catch {
35
+ return false;
36
+ }
28
37
  }
29
38
 
30
39
  /**
@@ -48,7 +57,7 @@ export class ElicitationHelper {
48
57
  .join('\n');
49
58
  const fullMessage = detailLines ? `${message}\n\n${detailLines}` : message;
50
59
 
51
- const result = await this._mcpServer.server.elicit({
60
+ const result = await this._mcpServer.server.elicitInput({
52
61
  message: fullMessage,
53
62
  requestedSchema: {
54
63
  type: 'object',
@@ -63,7 +72,8 @@ export class ElicitationHelper {
63
72
  },
64
73
  });
65
74
 
66
- return result?.content?.confirmed === true;
75
+ // Only an explicit accept + confirmed=true proceeds; decline/cancel = stop.
76
+ return result?.action === 'accept' && result?.content?.confirmed === true;
67
77
  } catch (err) {
68
78
  this._logger.warn('Elicitation request failed — proceeding without confirmation', { error: err.message });
69
79
  return true; // fail-open
@@ -87,7 +97,7 @@ export class ElicitationHelper {
87
97
  }
88
98
 
89
99
  try {
90
- const result = await this._mcpServer.server.elicit({
100
+ const result = await this._mcpServer.server.elicitInput({
91
101
  message,
92
102
  requestedSchema: {
93
103
  type: 'object',
@@ -103,7 +113,10 @@ export class ElicitationHelper {
103
113
  },
104
114
  });
105
115
 
106
- return result?.content?.[fieldName] || defaultValue || null;
116
+ if (result?.action === 'accept' && result?.content?.[fieldName] != null) {
117
+ return result.content[fieldName];
118
+ }
119
+ return defaultValue || null;
107
120
  } catch (err) {
108
121
  this._logger.warn('Elicitation request failed', { error: err.message });
109
122
  return defaultValue || null;
@@ -4,6 +4,7 @@ import { MapSiteTool } from '../tools/crawl/mapSite.js';
4
4
  import { CrawlDeepTool } from '../tools/crawl/crawlDeep.js';
5
5
  import { normalizeUrl, getBaseUrl } from '../utils/urlNormalizer.js';
6
6
  import { Logger } from '../utils/Logger.js';
7
+ import { safeFetch } from '../utils/ssrfGuard.js';
7
8
 
8
9
  const logger = new Logger('LLMsTxtAnalyzer');
9
10
 
@@ -442,7 +443,7 @@ export class LLMsTxtAnalyzer {
442
443
  const timeoutId = setTimeout(() => controller.abort(), timeout);
443
444
 
444
445
  try {
445
- const response = await fetch(url, {
446
+ const response = await safeFetch(url, {
446
447
  signal: controller.signal,
447
448
  headers: {
448
449
  'User-Agent': this.options.userAgent