npm - crawlforge-mcp-server - Versions diffs - 3.0.17 → 3.3.1 - Mend

crawlforge-mcp-server 3.0.17 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

package/CLAUDE.md +2 -0
package/README.md +1 -0
package/package.json +6 -2
package/server.js +192 -1277
package/src/constants/config.js +2 -1
package/src/core/ActionExecutor.js +2 -43
package/src/core/AuthManager.js +230 -32
package/src/core/BrowserContextPool.js +187 -0
package/src/core/JobManager.js +7 -5
package/src/core/LocalizationManager.js +14 -125
package/src/core/ResearchOrchestrator.js +86 -5
package/src/core/StealthBrowserManager.js +26 -18
package/src/core/cache/CacheManager.js +4 -1
package/src/core/crawlers/BFSCrawler.js +19 -5
package/src/core/endpointGuard.js +37 -0
package/src/observability/metrics.js +137 -0
package/src/observability/tracing.js +74 -0
package/src/server/auth/oauth.js +388 -0
package/src/server/registerTool.js +41 -0
package/src/server/schemas/common.js +29 -0
package/src/server/transports/http.js +22 -0
package/src/server/transports/stdio.js +16 -0
package/src/server/transports/streamableHttp.js +226 -0
package/src/server/withAuth.js +121 -0
package/src/tools/advanced/BatchScrapeTool.js +12 -1086
package/src/tools/advanced/ScrapeWithActionsTool.js +105 -19
package/src/tools/advanced/batchScrape/index.js +328 -0
package/src/tools/advanced/batchScrape/queue.js +91 -0
package/src/tools/advanced/batchScrape/reporter.js +26 -0
package/src/tools/advanced/batchScrape/schema.js +37 -0
package/src/tools/advanced/batchScrape/worker.js +179 -0
package/src/tools/advanced/scrapeWithActions/recorder.js +188 -0
package/src/tools/basic/_fetch.js +35 -0
package/src/tools/basic/extractLinks.js +74 -0
package/src/tools/basic/extractMetadata.js +74 -0
package/src/tools/basic/extractText.js +46 -0
package/src/tools/basic/fetchUrl.js +44 -0
package/src/tools/basic/scrapeStructured.js +58 -0
package/src/tools/crawl/_sessionContext.js +234 -0
package/src/tools/crawl/crawlDeep.js +55 -5
package/src/tools/crawl/mapSite.js +23 -2
package/src/tools/extract/_fetchAndParse.js +57 -0
package/src/tools/extract/extractStructured.js +3 -19
package/src/tools/extract/extractWithLlm.js +295 -0
package/src/tools/research/deepResearch.js +33 -8
package/src/tools/search/providers/searxng.js +126 -0
package/src/tools/search/ranking/ResultDeduplicator.js +18 -11
package/src/tools/search/ranking/ResultRanker.js +17 -10
package/src/tools/search/ranking/SearchResultCache.js +52 -0
package/src/tools/search/searchWeb.js +112 -6
package/src/tools/tracking/trackChanges/differ.js +98 -0
package/src/tools/tracking/trackChanges/index.js +432 -0
package/src/tools/tracking/trackChanges/monitor.js +93 -0
package/src/tools/tracking/trackChanges/notifier.js +105 -0
package/src/tools/tracking/trackChanges/schema.js +127 -0
package/src/tools/tracking/trackChanges.js +12 -1374

package/server.js CHANGED Viewed

@@ -6,12 +6,8 @@ export { isCreatorModeVerified } from './src/core/creatorMode.js';
 // Import everything else
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
-import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
-import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
-import { createServer } from "node:http";
-import { randomUUID } from "node:crypto";
 import { z } from "zod";
-import { load } from "cheerio";
+import { logger } from "./src/utils/Logger.js";
 import { SearchWebTool } from "./src/tools/search/searchWeb.js";
 import { CrawlDeepTool } from "./src/tools/crawl/crawlDeep.js";
 import { MapSiteTool } from "./src/tools/crawl/mapSite.js";
@@ -19,24 +15,33 @@ import { ExtractContentTool } from "./src/tools/extract/extractContent.js";
 import { ProcessDocumentTool } from "./src/tools/extract/processDocument.js";
 import { SummarizeContentTool } from "./src/tools/extract/summarizeContent.js";
 import { AnalyzeContentTool } from "./src/tools/extract/analyzeContent.js";
-// Phase 1: LLM-Powered Structured Extraction
 import { ExtractStructuredTool } from "./src/tools/extract/extractStructured.js";
-// Wave 2 Advanced Tools
+import { ExtractWithLlm } from "./src/tools/extract/extractWithLlm.js";
 import { BatchScrapeTool } from "./src/tools/advanced/BatchScrapeTool.js";
 import { ScrapeWithActionsTool } from "./src/tools/advanced/ScrapeWithActionsTool.js";
-// Deep Research Tool
 import { DeepResearchTool } from "./src/tools/research/deepResearch.js";
-// Change Tracking Tool
-import { TrackChangesTool } from "./src/tools/tracking/trackChanges.js";
-// LLMs.txt Generator Tool (Phase 2.5)
+import { TrackChangesTool } from "./src/tools/tracking/trackChanges/index.js";
 import { GenerateLLMsTxtTool } from "./src/tools/llmstxt/generateLLMsTxt.js";
-// Wave 3-4 Core Managers
 import { StealthBrowserManager } from "./src/core/StealthBrowserManager.js";
 import { LocalizationManager } from "./src/core/LocalizationManager.js";
 import { memoryMonitor } from "./src/utils/MemoryMonitor.js";
 import { config, validateConfig, getToolConfig } from "./src/constants/config.js";
-// Authentication Manager
 import AuthManager from "./src/core/AuthManager.js";
+import { makeWithAuth } from "./src/server/withAuth.js";
+// Transport helpers
+import { connectStdio } from "./src/server/transports/stdio.js";
+import { connectHttp } from "./src/server/transports/http.js";
+import { connectStreamableHttp } from "./src/server/transports/streamableHttp.js";
+// OAuth 2.1 (HTTP transport only — opt-in via CRAWLFORGE_OAUTH_ENABLED=true)
+import { createOAuthProvider } from "./src/server/auth/oauth.js";
+// Observability (no-op by default — enable via CRAWLFORGE_METRICS / OTEL_SDK_DISABLED)
+import { createMetricsRegistry } from "./src/observability/metrics.js";
+// Basic tool handlers (extracted from server.js)
+import { fetchUrlHandler } from "./src/tools/basic/fetchUrl.js";
+import { extractTextHandler } from "./src/tools/basic/extractText.js";
+import { extractLinksHandler } from "./src/tools/basic/extractLinks.js";
+import { extractMetadataHandler } from "./src/tools/basic/extractMetadata.js";
+import { scrapeStructuredHandler } from "./src/tools/basic/scrapeStructured.js";
 // Initialize Authentication Manager
 await AuthManager.initialize();
@@ -84,7 +89,7 @@ if (configErrors.length > 0 && config.server.nodeEnv === 'production') {
 // Create the server
 const server = new McpServer({
   name: "crawlforge",
-  version: "3.0.12",
+  version: "3.2.0",
   description: "Production-ready MCP server with 20 web scraping, crawling, and content processing tools. Features stealth browsing, deep research, structured extraction, and change tracking.",
   homepage: "https://www.crawlforge.dev",
   icon: "https://www.crawlforge.dev/icon.png"
@@ -99,7 +104,7 @@ server.prompt("getting-started", {
       role: "user",
       content: {
         type: "text",
-        text: "You have access to CrawlForge MCP with 20 web scraping tools. Key tools:\n\n" +
+        text: "You have access to CrawlForge MCP with 21 web scraping tools. Key tools:\n\n" +
           "- fetch_url: Fetch raw HTML/content from any URL\n" +
           "- extract_text: Extract clean text from a webpage\n" +
           "- extract_content: Smart content extraction with readability\n" +
@@ -111,6 +116,7 @@ server.prompt("getting-started", {
           "- deep_research: Multi-source research on any topic\n" +
           "- stealth_mode: Anti-detection browsing for protected sites\n" +
           "- extract_structured: LLM-powered structured data extraction\n" +
+          "- extract_with_llm: Natural-language extraction via OpenAI/Anthropic\n" +
           "- track_changes: Monitor website changes over time\n" +
           "- generate_llms_txt: Generate llms.txt for any website\n\n" +
           "Workflow: search_web -> fetch_url -> extract_content -> analyze_content\n\n" +
@@ -120,528 +126,37 @@ server.prompt("getting-started", {
   };
 });
-// Helper function to wrap tool handlers with authentication and credit tracking
-function withAuth(toolName, handler) {
-  return async (params) => {
-    const startTime = Date.now();
-    try {
-      // Skip credit checks in creator mode
-      if (!AuthManager.isCreatorMode()) {
-        // Check credits before executing
-        const creditCost = AuthManager.getToolCost(toolName);
-        const hasCredits = await AuthManager.checkCredits(creditCost);
-        if (!hasCredits) {
-          return {
-            content: [{
-              type: "text",
-              text: JSON.stringify({
-                error: "Insufficient credits",
-                message: `This operation requires ${creditCost} credits. Please upgrade your plan at https://www.crawlforge.dev/pricing`,
-                creditsRequired: creditCost
-              }, null, 2)
-            }]
-          };
-        }
-      }
-      // Execute the tool
-      const result = await handler(params);
-      // Report usage for successful execution (skip in creator mode)
-      const processingTime = Date.now() - startTime;
-      if (!AuthManager.isCreatorMode()) {
-        const creditCost = AuthManager.getToolCost(toolName);
-        await AuthManager.reportUsage(
-          toolName,
-          creditCost,
-          params,
-          200,
-          processingTime
-        );
-      }
-      return result;
-    } catch (error) {
-      // Report usage even for errors (reduced credit cost) - skip in creator mode
-      const processingTime = Date.now() - startTime;
-      if (!AuthManager.isCreatorMode()) {
-        await AuthManager.reportUsage(
-          toolName,
-          Math.max(1, Math.floor(AuthManager.getToolCost(toolName) * 0.5)), // Half credits for errors
-          params,
-          500,
-          processingTime
-        );
-      }
-      throw error;
-    }
-  };
-}
+// Observability registry — only emit metrics in HTTP mode when explicitly enabled.
+// Stdio mode stays silent to match MCP host expectations.
+const metricsEnabled =
+  (process.argv.includes('--http') || process.env.MCP_HTTP === 'true') &&
+  process.env.CRAWLFORGE_METRICS === 'true';
+const metrics = metricsEnabled ? createMetricsRegistry() : null;
-// Initialize Search Web Tool - always available with CrawlForge API key
+// Tool-handler wrapper: auth + credit tracking + structured invocation logging + observability.
+const withAuth = makeWithAuth({ authManager: AuthManager, logger, metrics });
+// Initialize tools
 const searchWebTool = new SearchWebTool(getToolConfig("search_web"));
 const crawlDeepTool = new CrawlDeepTool(getToolConfig('crawl_deep'));
 const mapSiteTool = new MapSiteTool(getToolConfig('map_site'));
-// Initialize Phase 3 tools
 const extractContentTool = new ExtractContentTool();
 const processDocumentTool = new ProcessDocumentTool();
 const summarizeContentTool = new SummarizeContentTool();
 const analyzeContentTool = new AnalyzeContentTool();
-// Phase 1: LLM-Powered Structured Extraction Tool
 const extractStructuredTool = new ExtractStructuredTool();
-// Initialize Wave 2 Advanced Tools
+const extractWithLlmTool = new ExtractWithLlm();
 const batchScrapeTool = new BatchScrapeTool();
 const scrapeWithActionsTool = new ScrapeWithActionsTool();
-// Initialize Deep Research Tool
 const deepResearchTool = new DeepResearchTool();
-// Initialize Change Tracking Tool
 const trackChangesTool = new TrackChangesTool();
-// Initialize LLMs.txt Generator Tool (Phase 2.5)
 const generateLLMsTxtTool = new GenerateLLMsTxtTool();
-// Initialize Wave 3-4 Core Managers
 const stealthBrowserManager = new StealthBrowserManager();
 const localizationManager = new LocalizationManager();
-// Zod schemas for tool parameters and responses
-const FetchUrlSchema = z.object({
-  url: z.string().url(),
-  headers: z.record(z.string()).optional(),
-  timeout: z.number().min(1000).max(30000).optional().default(10000)
-});
-const ExtractTextSchema = z.object({
-  url: z.string().url(),
-  remove_scripts: z.boolean().optional().default(true),
-  remove_styles: z.boolean().optional().default(true)
-});
-const ExtractLinksSchema = z.object({
-  url: z.string().url(),
-  filter_external: z.boolean().optional().default(false),
-  base_url: z.string().url().optional()
-});
-const ExtractMetadataSchema = z.object({
-  url: z.string().url()
-});
-const ScrapeStructuredSchema = z.object({
-  url: z.string().url(),
-  selectors: z.record(z.string())
-});
-const SearchWebSchema = z.object({
-  query: z.string(),
-  limit: z.number().min(1).max(100).optional(),
-  offset: z.number().min(0).optional(),
-  lang: z.string().optional(),
-  safe_search: z.boolean().optional(),
-  time_range: z.enum(['day', 'week', 'month', 'year', 'all']).optional(),
-  site: z.string().optional(),
-  file_type: z.string().optional()
-});
-const CrawlDeepSchema = z.object({
-  url: z.string().url(),
-  max_depth: z.number().min(1).max(5).optional(),
-  max_pages: z.number().min(1).max(1000).optional(),
-  include_patterns: z.array(z.string()).optional(),
-  exclude_patterns: z.array(z.string()).optional(),
-  follow_external: z.boolean().optional(),
-  respect_robots: z.boolean().optional(),
-  extract_content: z.boolean().optional(),
-  concurrency: z.number().min(1).max(20).optional()
-});
-const MapSiteSchema = z.object({
-  url: z.string().url(),
-  include_sitemap: z.boolean().optional(),
-  max_urls: z.number().min(1).max(10000).optional(),
-  group_by_path: z.boolean().optional(),
-  include_metadata: z.boolean().optional()
-});
-const ExtractContentSchema = z.object({
-  url: z.string().url(),
-  options: z.object({}).optional()
-});
-const ProcessDocumentSchema = z.object({
-  source: z.string(),
-  sourceType: z.enum(['url', 'pdf_url', 'file', 'pdf_file']).optional(),
-  options: z.object({}).optional()
-});
-const SummarizeContentSchema = z.object({
-  text: z.string(),
-  options: z.object({}).optional()
-});
-const AnalyzeContentSchema = z.object({
-  text: z.string(),
-  options: z.object({}).optional()
-});
-// Wave 2 Advanced Tools Schemas
-const BatchScrapeSchema = z.object({
-  urls: z.array(z.union([
-    z.string().url(),
-    z.object({
-      url: z.string().url(),
-      selectors: z.record(z.string()).optional(),
-      headers: z.record(z.string()).optional(),
-      timeout: z.number().min(1000).max(30000).optional(),
-      metadata: z.record(z.any()).optional()
-    })
-  ])).min(1).max(50),
-  formats: z.array(z.enum(['markdown', 'html', 'json', 'text'])).default(['json']),
-  mode: z.enum(['sync', 'async']).default('sync'),
-  webhook: z.object({
-    url: z.string().url(),
-    events: z.array(z.string()).optional().default(['batch_completed', 'batch_failed']),
-    headers: z.record(z.string()).optional(),
-    signingSecret: z.string().optional()
-  }).optional(),
-  extractionSchema: z.record(z.string()).optional(),
-  maxConcurrency: z.number().min(1).max(20).default(10),
-  delayBetweenRequests: z.number().min(0).max(10000).default(100),
-  includeMetadata: z.boolean().default(true),
-  includeFailed: z.boolean().default(true),
-  pageSize: z.number().min(1).max(100).default(25),
-  jobOptions: z.object({
-    priority: z.number().default(0),
-    ttl: z.number().min(60000).default(24 * 60 * 60 * 1000),
-    maxRetries: z.number().min(0).max(5).default(1),
-    tags: z.array(z.string()).default([])
-  }).optional()
-});
-const ScrapeWithActionsSchema = z.object({
-  url: z.string().url(),
-  actions: z.array(z.object({
-    type: z.enum(['wait', 'click', 'type', 'press', 'scroll', 'screenshot', 'executeJavaScript']),
-    selector: z.string().optional(),
-    text: z.string().optional(),
-    key: z.string().optional(),
-    script: z.string().optional(),
-    timeout: z.number().optional(),
-    description: z.string().optional(),
-    continueOnError: z.boolean().default(false),
-    retries: z.number().min(0).max(5).default(0)
-  })).min(1).max(20),
-  formats: z.array(z.enum(['markdown', 'html', 'json', 'text', 'screenshots'])).default(['json']),
-  captureIntermediateStates: z.boolean().default(false),
-  captureScreenshots: z.boolean().default(true),
-  formAutoFill: z.object({
-    fields: z.array(z.object({
-      selector: z.string(),
-      value: z.string(),
-      type: z.enum(['text', 'select', 'checkbox', 'radio', 'file']).default('text'),
-      waitAfter: z.number().min(0).max(5000).default(100)
-    })),
-    submitSelector: z.string().optional(),
-    waitAfterSubmit: z.number().min(0).max(30000).default(2000)
-  }).optional(),
-  browserOptions: z.object({
-    headless: z.boolean().default(true),
-    userAgent: z.string().optional(),
-    viewportWidth: z.number().min(800).max(1920).default(1280),
-    viewportHeight: z.number().min(600).max(1080).default(720),
-    timeout: z.number().min(10000).max(120000).default(30000)
-  }).optional(),
-  extractionOptions: z.object({
-    selectors: z.record(z.string()).optional(),
-    includeMetadata: z.boolean().default(true),
-    includeLinks: z.boolean().default(true),
-    includeImages: z.boolean().default(true)
-  }).optional(),
-  continueOnActionError: z.boolean().default(false),
-  maxRetries: z.number().min(0).max(3).default(1),
-  screenshotOnError: z.boolean().default(true)
-});
-// Deep Research Tool Schema
-const DeepResearchSchema = z.object({
-  topic: z.string().min(3).max(500),
-  maxDepth: z.number().min(1).max(10).optional().default(5),
-  maxUrls: z.number().min(1).max(1000).optional().default(50),
-  timeLimit: z.number().min(30000).max(300000).optional().default(120000),
-  researchApproach: z.enum(['broad', 'focused', 'academic', 'current_events', 'comparative']).optional().default('broad'),
-  sourceTypes: z.array(z.enum(['academic', 'news', 'government', 'commercial', 'blog', 'wiki', 'any'])).optional().default(['any']),
-  credibilityThreshold: z.number().min(0).max(1).optional().default(0.3),
-  includeRecentOnly: z.boolean().optional().default(false),
-  enableConflictDetection: z.boolean().optional().default(true),
-  enableSourceVerification: z.boolean().optional().default(true),
-  enableSynthesis: z.boolean().optional().default(true),
-  outputFormat: z.enum(['comprehensive', 'summary', 'citations_only', 'conflicts_focus']).optional().default('comprehensive'),
-  includeRawData: z.boolean().optional().default(false),
-  includeActivityLog: z.boolean().optional().default(false),
-  queryExpansion: z.object({
-    enableSynonyms: z.boolean().optional().default(true),
-    enableSpellCheck: z.boolean().optional().default(true),
-    enableContextual: z.boolean().optional().default(true),
-    maxVariations: z.number().min(1).max(20).optional().default(8)
-  }).optional(),
-  llmConfig: z.object({
-    provider: z.enum(['auto', 'openai', 'anthropic']).optional().default('auto'),
-    openai: z.object({
-      apiKey: z.string().optional(),
-      model: z.string().optional().default('gpt-3.5-turbo'),
-      embeddingModel: z.string().optional().default('text-embedding-ada-002')
-    }).optional(),
-    anthropic: z.object({
-      apiKey: z.string().optional(),
-      model: z.string().optional().default('claude-3-haiku-20240307')
-    }).optional(),
-    enableSemanticAnalysis: z.boolean().optional().default(true),
-    enableIntelligentSynthesis: z.boolean().optional().default(true)
-  }).optional(),
-  concurrency: z.number().min(1).max(20).optional().default(5),
-  cacheResults: z.boolean().optional().default(true),
-  webhook: z.object({
-    url: z.string().url(),
-    events: z.array(z.enum(['started', 'progress', 'completed', 'failed'])).optional().default(['completed']),
-    headers: z.record(z.string()).optional()
-  }).optional()
-});
-// Change Tracking Tool Schema
-const TrackChangesSchema = z.object({
-  url: z.string().url(),
-  operation: z.enum(['create_baseline', 'compare', 'monitor', 'get_history', 'get_stats']).default('compare'),
-  content: z.string().optional(),
-  html: z.string().optional(),
-  trackingOptions: z.object({
-    granularity: z.enum(['page', 'section', 'element', 'text']).default('section'),
-    trackText: z.boolean().default(true),
-    trackStructure: z.boolean().default(true),
-    trackAttributes: z.boolean().default(false),
-    trackImages: z.boolean().default(false),
-    trackLinks: z.boolean().default(true),
-    ignoreWhitespace: z.boolean().default(true),
-    ignoreCase: z.boolean().default(false),
-    customSelectors: z.array(z.string()).optional(),
-    excludeSelectors: z.array(z.string()).optional(),
-    significanceThresholds: z.object({
-      minor: z.number().min(0).max(1).default(0.1),
-      moderate: z.number().min(0).max(1).default(0.3),
-      major: z.number().min(0).max(1).default(0.7)
-    }).optional()
-  }).optional(),
-  monitoringOptions: z.object({
-    enabled: z.boolean().default(false),
-    interval: z.number().min(60000).max(24 * 60 * 60 * 1000).default(300000),
-    maxRetries: z.number().min(0).max(5).default(3),
-    retryDelay: z.number().min(1000).max(60000).default(5000),
-    notificationThreshold: z.enum(['minor', 'moderate', 'major', 'critical']).default('moderate'),
-    enableWebhook: z.boolean().default(false),
-    webhookUrl: z.string().url().optional(),
-    webhookSecret: z.string().optional()
-  }).optional(),
-  storageOptions: z.object({
-    enableSnapshots: z.boolean().default(true),
-    retainHistory: z.boolean().default(true),
-    maxHistoryEntries: z.number().min(1).max(1000).default(100),
-    compressionEnabled: z.boolean().default(true),
-    deltaStorageEnabled: z.boolean().default(true)
-  }).optional(),
-  queryOptions: z.object({
-    limit: z.number().min(1).max(500).default(50),
-    offset: z.number().min(0).default(0),
-    startTime: z.number().optional(),
-    endTime: z.number().optional(),
-    includeContent: z.boolean().default(false),
-    significanceFilter: z.enum(['all', 'minor', 'moderate', 'major', 'critical']).optional()
-  }).optional(),
-  notificationOptions: z.object({
-    webhook: z.object({
-      enabled: z.boolean().default(false),
-      url: z.string().url().optional(),
-      method: z.enum(['POST', 'PUT']).default('POST'),
-      headers: z.record(z.string()).optional(),
-      signingSecret: z.string().optional(),
-      includeContent: z.boolean().default(false)
-    }).optional(),
-    slack: z.object({
-      enabled: z.boolean().default(false),
-      webhookUrl: z.string().url().optional(),
-      channel: z.string().optional(),
-      username: z.string().optional()
-    }).optional()
-  }).optional()
-});
-// LLMs.txt Generator Tool Schema (Phase 2.5)
-const GenerateLLMsTxtSchema = z.object({
-  url: z.string().url(),
-  analysisOptions: z.object({
-    maxDepth: z.number().min(1).max(5).optional().default(3),
-    maxPages: z.number().min(10).max(500).optional().default(100),
-    detectAPIs: z.boolean().optional().default(true),
-    analyzeContent: z.boolean().optional().default(true),
-    checkSecurity: z.boolean().optional().default(true),
-    respectRobots: z.boolean().optional().default(true)
-  }).optional(),
-  outputOptions: z.object({
-    includeDetailed: z.boolean().optional().default(true),
-    includeAnalysis: z.boolean().optional().default(false),
-    contactEmail: z.string().email().optional(),
-    organizationName: z.string().optional(),
-    customGuidelines: z.array(z.string()).optional(),
-    customRestrictions: z.array(z.string()).optional()
-  }).optional(),
-  complianceLevel: z.enum(['basic', 'standard', 'strict']).optional().default('standard'),
-  format: z.enum(['both', 'llms-txt', 'llms-full-txt']).optional().default('both')
-});
-// Stealth Mode Tool Schema (Wave 3)
-const StealthModeSchema = z.object({
-  operation: z.enum(['configure', 'enable', 'disable', 'create_context', 'create_page', 'get_stats', 'cleanup']).default('configure'),
-  stealthConfig: z.object({
-    level: z.enum(['basic', 'medium', 'advanced']).default('medium'),
-    randomizeFingerprint: z.boolean().default(true),
-    hideWebDriver: z.boolean().default(true),
-    blockWebRTC: z.boolean().default(true),
-    spoofTimezone: z.boolean().default(true),
-    randomizeHeaders: z.boolean().default(true),
-    useRandomUserAgent: z.boolean().default(true),
-    simulateHumanBehavior: z.boolean().default(true),
-    customUserAgent: z.string().optional(),
-    customViewport: z.object({
-      width: z.number().min(800).max(1920),
-      height: z.number().min(600).max(1080)
-    }).optional(),
-    locale: z.string().default('en-US'),
-    timezone: z.string().optional(),
-    webRTCPublicIP: z.string().optional(),
-    webRTCLocalIPs: z.array(z.string()).optional(),
-    proxyRotation: z.object({
-      enabled: z.boolean().default(false),
-      proxies: z.array(z.string()).optional(),
-      rotationInterval: z.number().default(300000)
-    }).optional(),
-    antiDetection: z.object({
-      cloudflareBypass: z.boolean().default(true),
-      recaptchaHandling: z.boolean().default(true),
-      hideAutomation: z.boolean().default(true),
-      spoofMediaDevices: z.boolean().default(true),
-      spoofBatteryAPI: z.boolean().default(true)
-    }).optional(),
-    fingerprinting: z.object({
-      canvasNoise: z.boolean().default(true),
-      webglSpoofing: z.boolean().default(true),
-      audioContextSpoofing: z.boolean().default(true),
-      fontSpoofing: z.boolean().default(true),
-      hardwareSpoofing: z.boolean().default(true)
-    }).optional()
-  }).optional(),
-  contextId: z.string().optional(),
-  urlToTest: z.string().url().optional()
-});
-// Localization Tool Schema (Wave 3)
-const LocalizationSchema = z.object({
-  operation: z.enum(['configure_country', 'localize_search', 'localize_browser', 'generate_timezone_spoof', 'handle_geo_blocking', 'auto_detect', 'get_stats', 'get_supported_countries']).default('configure_country'),
-  countryCode: z.string().length(2).optional(),
-  language: z.string().optional(),
-  timezone: z.string().optional(),
-  currency: z.string().length(3).optional(),
-  customHeaders: z.record(z.string()).optional(),
-  userAgent: z.string().optional(),
-  acceptLanguage: z.string().optional(),
-  geoLocation: z.object({
-    latitude: z.number().min(-90).max(90),
-    longitude: z.number().min(-180).max(180),
-    accuracy: z.number().min(1).max(100).optional()
-  }).optional(),
-  proxySettings: z.object({
-    enabled: z.boolean().default(false),
-    region: z.string().optional(),
-    type: z.enum(['http', 'https', 'socks4', 'socks5']).default('https'),
-    server: z.string().optional(),
-    port: z.number().optional(),
-    username: z.string().optional(),
-    password: z.string().optional(),
-    rotation: z.object({
-      enabled: z.boolean().default(false),
-      interval: z.number().default(300000),
-      strategy: z.enum(['round-robin', 'random', 'failover']).default('round-robin')
-    }).optional(),
-    fallback: z.object({
-      enabled: z.boolean().default(true),
-      maxRetries: z.number().default(3),
-      timeout: z.number().default(10000)
-    }).optional()
-  }).optional(),
-  searchParams: z.object({
-    query: z.string().optional(),
-    limit: z.number().optional(),
-    offset: z.number().optional(),
-    headers: z.record(z.string()).optional()
-  }).optional(),
-  browserOptions: z.object({
-    locale: z.string().optional(),
-    timezoneId: z.string().optional(),
-    extraHTTPHeaders: z.record(z.string()).optional(),
-    userAgent: z.string().optional()
-  }).optional(),
-  content: z.string().optional(),
-  url: z.string().url().optional(),
-  response: z.object({
-    status: z.number(),
-    body: z.string().optional(),
-    statusText: z.string().optional()
-  }).optional()
-});
-// Utility function to fetch URL with error handling
-async function fetchWithTimeout(url, options = {}) {
-  const { timeout = 10000, headers = {} } = options;
-  const controller = new AbortController();
-  const timeoutId = setTimeout(() => controller.abort(), timeout);
-  try {
-    const response = await fetch(url, {
-      signal: controller.signal,
-      headers: {
-        'User-Agent': 'CrawlForge/1.0.0',
-        ...headers
-      }
-    });
-    clearTimeout(timeoutId);
-    return response;
-  } catch (error) {
-    clearTimeout(timeoutId);
-    if (error.name === 'AbortError') {
-      throw new Error(`Request timeout after ${timeout}ms`);
-    }
-    throw error;
-  }
-}
+// ─── Tool registrations ────────────────────────────────────────────────────────
-// Tool: fetch_url - Basic URL fetching with headers and response handling
+// Tool: fetch_url
 server.registerTool("fetch_url", {
   description: "Fetch content from a URL with optional headers and timeout",
   annotations: { title: "Fetch URL", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
@@ -650,45 +165,9 @@ server.registerTool("fetch_url", {
     headers: z.record(z.string()).optional().describe("Custom HTTP headers to include in the request"),
     timeout: z.number().min(1000).max(30000).optional().default(10000).describe("Request timeout in milliseconds (1000-30000)")
   }
-}, withAuth("fetch_url", async ({ url, headers, timeout }) => {
-  try {
-    const response = await fetchWithTimeout(url, {
-      timeout: timeout || 10000,
-      headers: headers || {}
-    });
-    const body = await response.text();
-    const responseHeaders = {};
-    response.headers.forEach((value, key) => {
-      responseHeaders[key] = value;
-    });
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify({
-          status: response.status,
-          statusText: response.statusText,
-          headers: responseHeaders,
-          body: body,
-          contentType: response.headers.get('content-type') || 'unknown',
-          size: body.length,
-          url: response.url
-        }, null, 2)
-      }]
-    };
-  } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Failed to fetch URL: ${error.message}`
-      }],
-      isError: true
-    };
-  }
-}));
+}, withAuth("fetch_url", fetchUrlHandler));
-// Tool: extract_text - Extract clean text content from HTML
+// Tool: extract_text
 server.registerTool("extract_text", {
   description: "Extract clean text content from a webpage",
   annotations: { title: "Extract Text", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
@@ -697,53 +176,9 @@ server.registerTool("extract_text", {
     remove_scripts: z.boolean().optional().default(true).describe("Remove script tags before extraction"),
     remove_styles: z.boolean().optional().default(true).describe("Remove style tags before extraction")
   }
-}, withAuth("extract_text", async ({ url, remove_scripts, remove_styles }) => {
-  try {
-    const response = await fetchWithTimeout(url);
-    if (!response.ok) {
-      throw new Error(`HTTP ${response.status}: ${response.statusText}`);
-    }
-    const html = await response.text();
-    const $ = load(html);
-    // Remove unwanted elements
-    if (remove_scripts !== false) {
-      $('script').remove();
-    }
-    if (remove_styles !== false) {
-      $('style').remove();
-    }
-    // Remove common non-content elements
-    $('nav, header, footer, aside, .advertisement, .ad, .sidebar').remove();
-    // Extract text content
-    const text = $('body').text().replace(/\s+/g, ' ').trim();
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify({
-          text: text,
-          word_count: text.split(/\s+/).filter(word => word.length > 0).length,
-          char_count: text.length,
-          url: response.url
-        }, null, 2)
-      }]
-    };
-  } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Failed to extract text: ${error.message}`
-      }],
-      isError: true
-    };
-  }
-}));
+}, withAuth("extract_text", extractTextHandler));
-// Tool: extract_links - Extract all links from a webpage with optional filtering
+// Tool: extract_links
 server.registerTool("extract_links", {
   description: "Extract all links from a webpage with optional filtering",
   annotations: { title: "Extract Links", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
@@ -752,163 +187,18 @@ server.registerTool("extract_links", {
     filter_external: z.boolean().optional().default(false).describe("Only return external links"),
     base_url: z.string().url().optional().describe("Base URL for resolving relative links")
   }
-}, withAuth("extract_links", async ({ url, filter_external, base_url }) => {
-  try {
-    const response = await fetchWithTimeout(url);
-    if (!response.ok) {
-      throw new Error(`HTTP ${response.status}: ${response.statusText}`);
-    }
-    const html = await response.text();
-    const $ = load(html);
-    const baseUrl = base_url || new URL(url).origin;
-    const pageUrl = new URL(url);
-    const links = [];
-    $('a[href]').each((_, element) => {
-      const href = $(element).attr('href');
-      const text = $(element).text().trim();
-      if (!href) return;
-      let absoluteUrl;
-      let isExternal = false;
-      try {
-        if (href.startsWith('http://') || href.startsWith('https://')) {
-          absoluteUrl = href;
-          isExternal = new URL(href).origin !== pageUrl.origin;
-        } else {
-          absoluteUrl = new URL(href, baseUrl).toString();
-          isExternal = false;
-        }
-        // Apply filtering
-        if (filter_external && isExternal) {
-          return;
-        }
-        links.push({
-          href: absoluteUrl,
-          text: text,
-          is_external: isExternal,
-          original_href: href
-        });
-      } catch (urlError) {
-        // Skip invalid URLs
-      }
-    });
-    // Remove duplicates
-    const uniqueLinks = links.filter((link, index, arr) =>
-      arr.findIndex(l => l.href === link.href) === index
-    );
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify({
-          links: uniqueLinks,
-          total_count: uniqueLinks.length,
-          internal_count: uniqueLinks.filter(l => !l.is_external).length,
-          external_count: uniqueLinks.filter(l => l.is_external).length,
-          base_url: baseUrl
-        }, null, 2)
-      }]
-    };
-  } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Failed to extract links: ${error.message}`
-      }],
-      isError: true
-    };
-  }
-}));
+}, withAuth("extract_links", extractLinksHandler));
-// Tool: extract_metadata - Extract page metadata
+// Tool: extract_metadata
 server.registerTool("extract_metadata", {
   description: "Extract metadata from a webpage (title, description, keywords, etc.)",
   annotations: { title: "Extract Metadata", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
   inputSchema: {
     url: z.string().url().describe("The URL to extract metadata from")
   }
-}, withAuth("extract_metadata", async ({ url }) => {
-  try {
-    const response = await fetchWithTimeout(url);
-    if (!response.ok) {
-      throw new Error(`HTTP ${response.status}: ${response.statusText}`);
-    }
-    const html = await response.text();
-    const $ = load(html);
-    // Extract basic metadata
-    const title = $('title').text().trim() || $('h1').first().text().trim();
-    const description = $('meta[name="description"]').attr('content') ||
-                      $('meta[property="og:description"]').attr('content') || '';
-    const keywords = $('meta[name="keywords"]').attr('content') || '';
-    const canonical = $('link[rel="canonical"]').attr('href') || '';
-    // Extract Open Graph tags
-    const ogTags = {};
-    $('meta[property^="og:"]').each((_, element) => {
-      const property = $(element).attr('property');
-      const content = $(element).attr('content');
-      if (property && content) {
-        ogTags[property.replace('og:', '')] = content;
-      }
-    });
-    // Extract Twitter Card tags
-    const twitterTags = {};
-    $('meta[name^="twitter:"]').each((_, element) => {
-      const name = $(element).attr('name');
-      const content = $(element).attr('content');
-      if (name && content) {
-        twitterTags[name.replace('twitter:', '')] = content;
-      }
-    });
-    // Extract additional metadata
-    const author = $('meta[name="author"]').attr('content') || '';
-    const robots = $('meta[name="robots"]').attr('content') || '';
-    const viewport = $('meta[name="viewport"]').attr('content') || '';
-    const charset = $('meta[charset]').attr('charset') ||
-                   $('meta[http-equiv="Content-Type"]').attr('content') || '';
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify({
-          title: title,
-          description: description,
-          keywords: keywords.split(',').map(k => k.trim()).filter(k => k),
-          canonical_url: canonical,
-          author: author,
-          robots: robots,
-          viewport: viewport,
-          charset: charset,
-          og_tags: ogTags,
-          twitter_tags: twitterTags,
-          url: response.url
-        }, null, 2)
-      }]
-    };
-  } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Failed to extract metadata: ${error.message}`
-      }],
-      isError: true
-    };
-  }
-}));
+}, withAuth("extract_metadata", extractMetadataHandler));
-// Tool: scrape_structured - Extract structured data using CSS selectors
+// Tool: scrape_structured
 server.registerTool("scrape_structured", {
   description: "Extract structured data from a webpage using CSS selectors",
   annotations: { title: "Scrape Structured Data", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
@@ -916,63 +206,9 @@ server.registerTool("scrape_structured", {
     url: z.string().url().describe("The URL to scrape"),
     selectors: z.record(z.string()).describe("CSS selectors mapping field names to selectors")
   }
-}, withAuth("scrape_structured", async ({ url, selectors }) => {
-  try {
-    const response = await fetchWithTimeout(url);
-    if (!response.ok) {
-      throw new Error(`HTTP ${response.status}: ${response.statusText}`);
-    }
-    const html = await response.text();
-    const $ = load(html);
-    const results = {};
-    for (const [fieldName, selector] of Object.entries(selectors)) {
-      try {
-        const elements = $(selector);
-        if (elements.length === 0) {
-          results[fieldName] = null;
-        } else if (elements.length === 1) {
-          // Single element - return text content
-          results[fieldName] = elements.text().trim();
-        } else {
-          // Multiple elements - return array of text content
-          results[fieldName] = elements.map((_, el) => $(el).text().trim()).get();
-        }
-      } catch (selectorError) {
-        results[fieldName] = {
-          error: `Invalid selector: ${selector}`,
-          message: selectorError.message
-        };
-      }
-    }
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify({
-          data: results,
-          selectors_used: selectors,
-          elements_found: Object.keys(results).length,
-          url: response.url
-        }, null, 2)
-      }]
-    };
-  } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Failed to scrape structured data: ${error.message}`
-      }],
-      isError: true
-    };
-  }
-}));
+}, withAuth("scrape_structured", scrapeStructuredHandler));
-// Tool: search_web - Web search with configurable providers
-// Tool: search_web - Search the web using Google Search via CrawlForge proxy
+// Tool: search_web
 server.registerTool("search_web", {
   description: "Search the web using Google Search API (proxied through CrawlForge)",
   annotations: { title: "Search the Web", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
@@ -989,34 +225,16 @@ server.registerTool("search_web", {
 }, withAuth("search_web", async ({ query, limit, offset, lang, safe_search, time_range, site, file_type }) => {
   try {
     if (!query) {
-      return {
-        content: [{
-          type: "text",
-          text: "Query parameter is required"
-        }],
-        isError: true
-      };
+      return { content: [{ type: "text", text: "Query parameter is required" }], isError: true };
     }
     const result = await searchWebTool.execute({ query, limit, offset, lang, safe_search, time_range, site, file_type });
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Search failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Search failed: ${error.message}` }], isError: true };
   }
 }));
-// Tool: crawl_deep - Deep crawl websites with BFS algorithm
+// Tool: crawl_deep
 server.registerTool("crawl_deep", {
   description: "Crawl websites deeply using breadth-first search",
   annotations: { title: "Deep Crawl", readOnlyHint: true, destructiveHint: false, idempotentHint: false, openWorldHint: true },
@@ -1034,34 +252,16 @@ server.registerTool("crawl_deep", {
 }, withAuth("crawl_deep", async ({ url, max_depth, max_pages, include_patterns, exclude_patterns, follow_external, respect_robots, extract_content, concurrency }) => {
   try {
     if (!url) {
-      return {
-        content: [{
-          type: "text",
-          text: "URL parameter is required"
-        }],
-        isError: true
-      };
+      return { content: [{ type: "text", text: "URL parameter is required" }], isError: true };
     }
     const result = await crawlDeepTool.execute({ url, max_depth, max_pages, include_patterns, exclude_patterns, follow_external, respect_robots, extract_content, concurrency });
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Crawl failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Crawl failed: ${error.message}` }], isError: true };
   }
 }));
-// Tool: map_site - Discover and map website structure
+// Tool: map_site
 server.registerTool("map_site", {
   description: "Discover and map website structure",
   annotations: { title: "Map Website", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
@@ -1075,36 +275,16 @@ server.registerTool("map_site", {
 }, withAuth("map_site", async ({ url, include_sitemap, max_urls, group_by_path, include_metadata }) => {
   try {
     if (!url) {
-      return {
-        content: [{
-          type: "text",
-          text: "URL parameter is required"
-        }],
-        isError: true
-      };
+      return { content: [{ type: "text", text: "URL parameter is required" }], isError: true };
     }
     const result = await mapSiteTool.execute({ url, include_sitemap, max_urls, group_by_path, include_metadata });
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Site mapping failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Site mapping failed: ${error.message}` }], isError: true };
   }
 }));
-// Phase 3 Tools: Enhanced Content Processing
-// Tool: extract_content - Enhanced content extraction with readability detection
+// Tool: extract_content
 server.registerTool("extract_content", {
   description: "Extract and analyze main content from web pages with enhanced readability detection",
   annotations: { title: "Extract Content", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
@@ -1115,34 +295,16 @@ server.registerTool("extract_content", {
 }, withAuth("extract_content", async ({ url, options }) => {
   try {
     if (!url) {
-      return {
-        content: [{
-          type: "text",
-          text: "URL parameter is required"
-        }],
-        isError: true
-      };
+      return { content: [{ type: "text", text: "URL parameter is required" }], isError: true };
     }
     const result = await extractContentTool.execute({ url, options });
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Content extraction failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Content extraction failed: ${error.message}` }], isError: true };
   }
 }));
-// Tool: process_document - Multi-format document processing
+// Tool: process_document
 server.registerTool("process_document", {
   description: "Process documents from multiple sources and formats including PDFs and web pages",
   annotations: { title: "Process Document", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
@@ -1154,34 +316,16 @@ server.registerTool("process_document", {
 }, withAuth("process_document", async ({ source, sourceType, options }) => {
   try {
     if (!source) {
-      return {
-        content: [{
-          type: "text",
-          text: "Source parameter is required"
-        }],
-        isError: true
-      };
+      return { content: [{ type: "text", text: "Source parameter is required" }], isError: true };
     }
     const result = await processDocumentTool.execute({ source, sourceType, options });
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Document processing failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Document processing failed: ${error.message}` }], isError: true };
   }
 }));
-// Tool: summarize_content - Intelligent content summarization
+// Tool: summarize_content
 server.registerTool("summarize_content", {
   description: "Generate intelligent summaries of text content with configurable options",
   annotations: { title: "Summarize Content", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false },
@@ -1192,34 +336,16 @@ server.registerTool("summarize_content", {
 }, withAuth("summarize_content", async ({ text, options }) => {
   try {
     if (!text) {
-      return {
-        content: [{
-          type: "text",
-          text: "Text parameter is required"
-        }],
-        isError: true
-      };
+      return { content: [{ type: "text", text: "Text parameter is required" }], isError: true };
     }
     const result = await summarizeContentTool.execute({ text, options });
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Content summarization failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Content summarization failed: ${error.message}` }], isError: true };
   }
 }));
-// Tool: analyze_content - Comprehensive content analysis
+// Tool: analyze_content
 server.registerTool("analyze_content", {
   description: "Perform comprehensive content analysis including language detection and topic extraction",
   annotations: { title: "Analyze Content", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false },
@@ -1230,38 +356,16 @@ server.registerTool("analyze_content", {
 }, withAuth("analyze_content", async ({ text, options }) => {
   try {
     if (!text) {
-      return {
-        content: [{
-          type: "text",
-          text: "Text parameter is required"
-        }],
-        isError: true
-      };
+      return { content: [{ type: "text", text: "Text parameter is required" }], isError: true };
     }
     const result = await analyzeContentTool.execute({ text, options });
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Content analysis failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Content analysis failed: ${error.message}` }], isError: true };
   }
 }));
-// Phase 1: LLM-Powered Structured Extraction
-// Tool: extract_structured - Extract structured data from a URL using LLM and JSON Schema
+// Tool: extract_structured
 server.registerTool("extract_structured", {
   description: "Extract structured data from a webpage using LLM-powered analysis and a JSON Schema. Falls back to CSS selector extraction when no LLM provider is configured.",
   annotations: { title: "Extract Structured Data", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
@@ -1282,35 +386,36 @@ server.registerTool("extract_structured", {
   }
 }, withAuth("extract_structured", async ({ url, schema, prompt, llmConfig, fallbackToSelectors, selectorHints }) => {
   try {
-    const result = await extractStructuredTool.execute({
-      url,
-      schema,
-      prompt,
-      llmConfig,
-      fallbackToSelectors,
-      selectorHints
-    });
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    const result = await extractStructuredTool.execute({ url, schema, prompt, llmConfig, fallbackToSelectors, selectorHints });
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Structured extraction failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Structured extraction failed: ${error.message}` }], isError: true };
   }
 }));
+// Tool: extract_with_llm
+server.registerTool("extract_with_llm", {
+  description: "Extract structured data from a URL or text using a natural-language prompt, powered by OpenAI or Anthropic. Requires OPENAI_API_KEY or ANTHROPIC_API_KEY in the environment.",
+  annotations: { title: "Extract With LLM", readOnlyHint: true, destructiveHint: false, idempotentHint: false, openWorldHint: true },
+  inputSchema: {
+    url: z.string().url().optional().describe("URL to fetch and extract from (one of url/content required)"),
+    content: z.string().optional().describe("Pre-fetched text to extract from (one of url/content required)"),
+    prompt: z.string().describe("Natural-language extraction instruction"),
+    schema: z.record(z.unknown()).optional().describe("Optional JSON-schema-like hint for output shape"),
+    provider: z.enum(["openai", "anthropic", "auto"]).optional().default("auto").describe("LLM provider"),
+    model: z.string().optional().describe("Override default model"),
+    maxTokens: z.number().optional().default(4096).describe("Maximum output tokens")
+  }
+}, withAuth("extract_with_llm", async (params) => {
+  try {
+    const result = await extractWithLlmTool.execute(params);
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
+  } catch (error) {
+    return { content: [{ type: "text", text: `LLM extraction failed: ${error.message}` }], isError: true };
+  }
+}));
-// Wave 2 Advanced Tools
-// Tool: batch_scrape - Process multiple URLs simultaneously with job management
+// Tool: batch_scrape
 server.registerTool("batch_scrape", {
   description: "Process multiple URLs simultaneously with support for async job management and webhook notifications",
   annotations: { title: "Batch Scrape", readOnlyHint: true, destructiveHint: false, idempotentHint: false, openWorldHint: true },
@@ -1349,24 +454,13 @@ server.registerTool("batch_scrape", {
 }, withAuth("batch_scrape", async (params) => {
   try {
     const result = await batchScrapeTool.execute(params);
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Batch scrape failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Batch scrape failed: ${error.message}` }], isError: true };
   }
 }));
-// Tool: scrape_with_actions - Execute action chains before scraping
+// Tool: scrape_with_actions
 server.registerTool("scrape_with_actions", {
   description: "Execute browser action chains before scraping content, with form auto-fill and intermediate state capture",
   annotations: { title: "Scrape with Browser Actions", readOnlyHint: true, destructiveHint: false, idempotentHint: false, openWorldHint: true },
@@ -1416,24 +510,13 @@ server.registerTool("scrape_with_actions", {
 }, withAuth("scrape_with_actions", async (params) => {
   try {
     const result = await scrapeWithActionsTool.execute(params);
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Scrape with actions failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Scrape with actions failed: ${error.message}` }], isError: true };
   }
 }));
-// Tool: deep_research - Comprehensive multi-stage research with source verification
+// Tool: deep_research
 server.registerTool("deep_research", {
   description: "Conduct comprehensive multi-stage research with intelligent query expansion, source verification, and conflict detection",
   annotations: { title: "Deep Research", readOnlyHint: true, destructiveHint: false, idempotentHint: false, openWorldHint: true },
@@ -1483,42 +566,22 @@ server.registerTool("deep_research", {
 }, withAuth("deep_research", async (params) => {
   try {
     const result = await deepResearchTool.execute(params);
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Deep research failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Deep research failed: ${error.message}` }], isError: true };
   }
 }));
-// Tool: track_changes - Enhanced Content change tracking with baseline capture and monitoring (Phase 2.4)
+// Tool: track_changes
 server.registerTool("track_changes", {
   description: "Enhanced content change tracking with baseline capture, comparison, scheduled monitoring, advanced comparison engine, alert system, and historical analysis",
   annotations: { title: "Track Changes", readOnlyHint: false, destructiveHint: false, idempotentHint: false, openWorldHint: true },
   inputSchema: {
     url: z.string().url().describe("The URL to track changes for"),
     operation: z.enum([
-      'create_baseline',
-      'compare',
-      'monitor',
-      'get_history',
-      'get_stats',
-      'create_scheduled_monitor',
-      'stop_scheduled_monitor',
-      'get_dashboard',
-      'export_history',
-      'create_alert_rule',
-      'generate_trend_report',
-      'get_monitoring_templates'
+      'create_baseline', 'compare', 'monitor', 'get_history', 'get_stats',
+      'create_scheduled_monitor', 'stop_scheduled_monitor', 'get_dashboard',
+      'export_history', 'create_alert_rule', 'generate_trend_report', 'get_monitoring_templates'
     ]).default('compare').describe("Tracking operation to perform"),
     content: z.string().optional().describe("Content to compare against baseline"),
     html: z.string().optional().describe("HTML content to compare against baseline"),
@@ -1580,15 +643,14 @@ server.registerTool("track_changes", {
         username: z.string().optional()
       }).optional()
     }).optional().describe("Notification configuration for webhooks and Slack"),
-    // Enhanced Phase 2.4 options
     scheduledMonitorOptions: z.object({
-      schedule: z.string().optional(), // Cron expression
-      templateId: z.string().optional(), // Monitoring template ID
+      schedule: z.string().optional(),
+      templateId: z.string().optional(),
       enabled: z.boolean().default(true)
     }).optional().describe("Scheduled monitoring options with cron expressions"),
     alertRuleOptions: z.object({
       ruleId: z.string().optional(),
-      condition: z.string().optional(), // Condition description
+      condition: z.string().optional(),
       actions: z.array(z.enum(['webhook', 'email', 'slack'])).optional(),
       throttle: z.number().min(0).optional(),
       priority: z.enum(['low', 'medium', 'high']).optional()
@@ -1609,24 +671,13 @@ server.registerTool("track_changes", {
 }, withAuth("track_changes", async (params) => {
   try {
     const result = await trackChangesTool.execute(params);
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Change tracking failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Change tracking failed: ${error.message}` }], isError: true };
   }
 }));
-// Tool: generate_llms_txt - Generate LLMs.txt and LLMs-full.txt files (Phase 2.5)
+// Tool: generate_llms_txt
 server.registerTool("generate_llms_txt", {
   description: "Analyze websites and generate standard-compliant LLMs.txt and LLMs-full.txt files defining AI model interaction guidelines",
   annotations: { title: "Generate llms.txt", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
@@ -1654,24 +705,13 @@ server.registerTool("generate_llms_txt", {
 }, withAuth("generate_llms_txt", async (params) => {
   try {
     const result = await generateLLMsTxtTool.execute(params);
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `LLMs.txt generation failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `LLMs.txt generation failed: ${error.message}` }], isError: true };
   }
 }));
-// Tool: stealth_mode - Advanced anti-detection browser management (Wave 3)
+// Tool: stealth_mode
 server.registerTool("stealth_mode", {
   description: "Advanced anti-detection browser management with stealth features, fingerprint randomization, and human behavior simulation",
   annotations: { title: "Stealth Mode", readOnlyHint: false, destructiveHint: false, idempotentHint: false, openWorldHint: true },
@@ -1721,7 +761,6 @@ server.registerTool("stealth_mode", {
 }, withAuth("stealth_mode", async ({ operation, stealthConfig, contextId, urlToTest }) => {
   try {
     let result;
     switch (operation) {
       case 'configure':
         if (stealthConfig) {
@@ -1731,69 +770,42 @@ server.registerTool("stealth_mode", {
           result = { error: 'stealthConfig is required for configure operation' };
         }
         break;
       case 'enable':
         stealthBrowserManager.enableStealthMode(stealthConfig?.level || 'medium');
         result = { enabled: true, level: stealthConfig?.level || 'medium' };
         break;
       case 'disable':
         stealthBrowserManager.disableStealthMode();
         result = { disabled: true };
         break;
-      case 'create_context':
+      case 'create_context': {
         const contextData = await stealthBrowserManager.createStealthContext(stealthConfig);
-        result = {
-          contextId: contextData.contextId,
-          fingerprint: contextData.fingerprint,
-          created: true
-        };
+        result = { contextId: contextData.contextId, fingerprint: contextData.fingerprint, created: true };
         break;
-      case 'create_page':
-        if (!contextId) {
-          throw new Error('contextId is required for create_page operation');
-        }
+      }
+      case 'create_page': {
+        if (!contextId) throw new Error('contextId is required for create_page operation');
         const page = await stealthBrowserManager.createStealthPage(contextId);
-        result = {
-          pageCreated: true,
-          contextId: contextId,
-          url: urlToTest ? await page.goto(urlToTest) : null
-        };
+        result = { pageCreated: true, contextId, url: urlToTest ? await page.goto(urlToTest) : null };
         break;
+      }
       case 'get_stats':
         result = stealthBrowserManager.getStats();
         break;
       case 'cleanup':
         await stealthBrowserManager.cleanup();
         result = { cleaned: true };
         break;
       default:
         result = { error: `Unknown operation: ${operation}` };
     }
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Stealth mode operation failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Stealth mode operation failed: ${error.message}` }], isError: true };
   }
 }));
-// Tool: localization - Multi-language and geo-location management (Wave 3)
+// Tool: localization
 server.registerTool("localization", {
   description: "Multi-language and geo-location management with country-specific settings, browser locale emulation, timezone spoofing, and geo-blocked content handling",
   annotations: { title: "Localization", readOnlyHint: false, destructiveHint: false, idempotentHint: false, openWorldHint: true },
@@ -1854,186 +866,110 @@ server.registerTool("localization", {
   try {
     const { operation } = params;
     let result;
     switch (operation) {
       case 'configure_country':
-        if (!params.countryCode) {
-          throw new Error('countryCode is required for configure_country operation');
-        }
+        if (!params.countryCode) throw new Error('countryCode is required for configure_country operation');
         result = await localizationManager.configureCountry(params.countryCode, params);
         break;
       case 'localize_search':
-        if (!params.searchParams) {
-          throw new Error('searchParams is required for localize_search operation');
-        }
+        if (!params.searchParams) throw new Error('searchParams is required for localize_search operation');
         result = await localizationManager.localizeSearchQuery(params.searchParams, params.countryCode);
         break;
       case 'localize_browser':
-        if (!params.browserOptions) {
-          throw new Error('browserOptions is required for localize_browser operation');
-        }
+        if (!params.browserOptions) throw new Error('browserOptions is required for localize_browser operation');
         result = await localizationManager.localizeBrowserContext(params.browserOptions, params.countryCode);
         break;
       case 'generate_timezone_spoof':
         result = {
           timezoneScript: await localizationManager.generateTimezoneSpoof(params.countryCode),
           countryCode: params.countryCode || localizationManager.getCurrentSettings().countryCode
         };
         break;
       case 'handle_geo_blocking':
-        if (!params.url || !params.response) {
-          throw new Error('url and response are required for handle_geo_blocking operation');
-        }
+        if (!params.url || !params.response) throw new Error('url and response are required for handle_geo_blocking operation');
         result = await localizationManager.handleGeoBlocking(params.url, params.response);
         break;
       case 'auto_detect':
-        if (!params.content || !params.url) {
-          throw new Error('content and url are required for auto_detect operation');
-        }
+        if (!params.content || !params.url) throw new Error('content and url are required for auto_detect operation');
         result = await localizationManager.autoDetectLocalization(params.content, params.url);
         break;
       case 'get_stats':
         result = localizationManager.getStats();
         break;
       case 'get_supported_countries':
         result = {
           supportedCountries: localizationManager.getSupportedCountries(),
           totalCount: localizationManager.getSupportedCountries().length
         };
         break;
       default:
         result = { error: `Unknown operation: ${operation}` };
     }
-    return {
-      content: [{
-        type: "text",
-        text: JSON.stringify(result, null, 2)
-      }]
-    };
+    return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
   } catch (error) {
-    return {
-      content: [{
-        type: "text",
-        text: `Localization operation failed: ${error.message}`
-      }],
-      isError: true
-    };
+    return { content: [{ type: "text", text: `Localization operation failed: ${error.message}` }], isError: true };
   }
 }));
-// Determine transport mode: HTTP if --http flag or MCP_HTTP env var is set
+// ─── Transport + startup ───────────────────────────────────────────────────────
 const useHttp = process.argv.includes('--http') || process.env.MCP_HTTP === 'true';
+const useLegacyHttp = process.argv.includes('--legacy-http') || process.env.CRAWLFORGE_LEGACY_HTTP === 'true';
-// Set up transport and start the server
 async function runServer() {
   if (useHttp) {
     const port = parseInt(process.env.PORT || '3000', 10);
-    // Stateless transport — no session tracking, each request is independent
-    // This avoids the bug where server.connect(newTransport) kills previous sessions
-    const transport = new StreamableHTTPServerTransport({
-      sessionIdGenerator: undefined,
-    });
-    await server.connect(transport);
-    const httpServer = createServer(async (req, res) => {
-      // CORS headers for Smithery gateway
-      res.setHeader('Access-Control-Allow-Origin', '*');
-      res.setHeader('Access-Control-Allow-Methods', 'GET, POST, DELETE, OPTIONS');
-      res.setHeader('Access-Control-Allow-Headers', 'Content-Type, mcp-session-id');
-      res.setHeader('Access-Control-Expose-Headers', 'mcp-session-id');
-      if (req.method === 'OPTIONS') {
-        res.writeHead(204);
-        res.end();
-        return;
-      }
-      // Health check endpoint
-      if (req.url === '/health') {
-        res.writeHead(200, { 'Content-Type': 'application/json' });
-        res.end(JSON.stringify({ status: 'ok', version: '3.0' }));
-        return;
-      }
-      // MCP server card for Smithery discovery
-      if (req.url === '/.well-known/mcp/server-card.json') {
-        res.writeHead(200, { 'Content-Type': 'application/json' });
-        res.end(JSON.stringify({
-          serverInfo: {
-            name: "crawlforge",
-            version: "3.0.12",
-            description: "Production-ready MCP server with 20 web scraping, crawling, and content processing tools. Features stealth browsing, deep research, structured extraction, and change tracking.",
-            homepage: "https://www.crawlforge.dev",
-            icon: "https://www.crawlforge.dev/icon.png"
-          },
-          transport: {
-            type: "streamable-http",
-            url: "/mcp"
-          },
-          configSchema: {
-            type: "object",
-            properties: {
-              apiKey: {
-                type: "string",
-                title: "CrawlForge API Key",
-                description: "Your CrawlForge API key. Get one free at https://www.crawlforge.dev/signup (includes 1,000 credits)",
-                "x-from": { header: "x-api-key" }
-              }
-            },
-            required: ["apiKey"]
-          }
-        }));
-        return;
-      }
-      // Route /mcp to the transport handler
-      if (req.url === '/mcp' || req.url === '/') {
-        await transport.handleRequest(req, res);
-        return;
+    if (useLegacyHttp) {
+      // One-release deprecation window for stateless legacy transport.
+      console.error('WARNING: --legacy-http is deprecated and will be removed in v3.3.0. Use the default Streamable HTTP transport.');
+      await connectHttp(server, AuthManager, logger, port);
+    } else {
+      // OAuth (opt-in)
+      let oauthProvider = null;
+      if (process.env.CRAWLFORGE_OAUTH_ENABLED === 'true') {
+        const issuer = process.env.CRAWLFORGE_OAUTH_ISSUER || `http://localhost:${port}`;
+        const apiKey = AuthManager.getConfig()?.apiKey;
+        if (!apiKey) {
+          console.error('OAuth enabled but no CrawlForge API key is configured — falling back to static-key auth.');
+        } else {
+          oauthProvider = createOAuthProvider({ issuer, apiKey, logger });
+          console.error(`OAuth 2.1 enabled — discovery at ${issuer}/.well-known/oauth-authorization-server`);
+        }
       }
-      res.writeHead(404);
-      res.end('Not Found');
-    });
-    httpServer.listen(port, () => {
-      console.error(`CrawlForge MCP Server v3.0 running on HTTP port ${port}`);
-      console.error(`MCP endpoint: http://localhost:${port}/mcp`);
-      console.error(`Health check: http://localhost:${port}/health`);
-    });
+      await connectStreamableHttp(server, AuthManager, logger, {
+        port,
+        legacy: false,
+        oauth: oauthProvider,
+        metrics
+      });
+    }
   } else {
-    const transport = new StdioServerTransport();
-    await server.connect(transport);
-    console.error("CrawlForge MCP Server v3.0 running on stdio");
+    await connectStdio(server);
   }
   console.error(`Environment: ${config.server.nodeEnv}`);
   console.error("Search enabled: true (via CrawlForge proxy)");
-  const baseTools = "fetch_url, extract_text, extract_links, extract_metadata, scrape_structured, crawl_deep, map_site";
-  const searchTool = ", search_web";
-  const phase3Tools = ", extract_content, process_document, summarize_content, analyze_content";
-  const wave2Tools = ", batch_scrape, scrape_with_actions";
-  const researchTools = ", deep_research";
-  const trackingTools = ", track_changes";
-  const llmsTxtTools = ", generate_llms_txt";
-  const wave3Tools = ", stealth_mode, localization";
-  const phase1Tools = ", extract_structured";
-  console.error(`Tools available: ${baseTools}${searchTool}${phase3Tools}${wave2Tools}${researchTools}${trackingTools}${llmsTxtTools}${wave3Tools}${phase1Tools}`);
+  const allTools = [
+    "fetch_url", "extract_text", "extract_links", "extract_metadata", "scrape_structured",
+    "search_web", "crawl_deep", "map_site",
+    "extract_content", "process_document", "summarize_content", "analyze_content",
+    "batch_scrape", "scrape_with_actions",
+    "deep_research", "track_changes", "generate_llms_txt",
+    "stealth_mode", "localization", "extract_structured", "extract_with_llm"
+  ];
+  console.error(`Tools available: ${allTools.join(', ')}`);
-// === MEMORY LEAK PREVENTION ===
-// Add graceful shutdown handling to prevent memory leaks
+  // Start memory monitoring in development
+  if (config.server.nodeEnv === "development") {
+    memoryMonitor.start();
+    console.error("Memory monitoring started");
+  }
+}
+// ─── Graceful shutdown ─────────────────────────────────────────────────────────
 let isShuttingDown = false;
@@ -2042,26 +978,19 @@ async function gracefulShutdown(signal) {
     console.error("Force shutdown...");
     process.exit(1);
   }
   isShuttingDown = true;
   console.error(`Received ${signal}. Starting graceful shutdown...`);
   try {
-    // Cleanup tools that have destroy methods
     const toolsToCleanup = [
-      batchScrapeTool,
-      scrapeWithActionsTool,
-      deepResearchTool,
-      trackChangesTool,
-      generateLLMsTxtTool,
-      stealthBrowserManager,
-      localizationManager,
-      extractStructuredTool
+      batchScrapeTool, scrapeWithActionsTool, deepResearchTool,
+      trackChangesTool, generateLLMsTxtTool, stealthBrowserManager,
+      localizationManager, extractStructuredTool
     ].filter(tool => tool && (typeof tool.destroy === 'function' || typeof tool.cleanup === 'function'));
     console.error(`Cleaning up ${toolsToCleanup.length} tools...`);
-    // Cleanup tools with timeout
     await Promise.race([
       Promise.all(toolsToCleanup.map(async (tool) => {
         try {
@@ -2075,40 +1004,33 @@ async function gracefulShutdown(signal) {
           console.error(`Error cleaning up ${tool.constructor.name}:`, error.message);
         }
       })),
-      new Promise(resolve => setTimeout(resolve, 5000)) // 5 second timeout
+      new Promise(resolve => setTimeout(resolve, 5000))
     ]);
-    // Stop memory monitoring
     if (memoryMonitor.isMonitoring) {
       memoryMonitor.stop();
       console.error("Memory monitoring stopped");
     }
-    // Force garbage collection if available
     if (global.gc) {
       console.error("Running final garbage collection...");
       global.gc();
     }
     console.error("Graceful shutdown completed");
     process.exit(0);
   } catch (error) {
     console.error("Error during graceful shutdown:", error);
     process.exit(1);
   }
 }
-// Register signal handlers
 process.on('SIGINT', () => gracefulShutdown('SIGINT'));
 process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
-// Handle uncaught exceptions and unhandled rejections
 process.on('uncaughtException', (error) => {
   console.error('Uncaught Exception:', error);
   gracefulShutdown('uncaughtException');
 });
 process.on('unhandledRejection', (reason, promise) => {
   console.error('Unhandled Rejection at:', promise, 'reason:', reason);
   gracefulShutdown('unhandledRejection');
@@ -2119,17 +1041,10 @@ if (config.server.nodeEnv === 'development') {
   setInterval(() => {
     const usage = process.memoryUsage();
     const memoryMB = (usage.heapUsed / 1024 / 1024).toFixed(2);
-    if (memoryMB > 200) { // Alert if over 200MB
+    if (memoryMB > 200) {
       console.error(`Memory usage: ${memoryMB}MB (high usage detected)`);
     }
-  }, 60000); // Check every minute
-}
-  // Start memory monitoring in development
-  if (config.server.nodeEnv === "development") {
-    memoryMonitor.start();
-    console.error("Memory monitoring started");
-  }
+  }, 60000);
 }
 runServer().catch((error) => {