npm - crawlforge-mcp-server - Versions diffs - 4.2.11 → 4.5.0 - Mend

crawlforge-mcp-server 4.2.11 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/package.json +2 -1
package/server.js +152 -21
package/src/constants/config.js +5 -0
package/src/core/ActionExecutor.js +13 -1
package/src/core/ChangeTracker.js +8 -5
package/src/core/LLMsTxtAnalyzer.js +71 -47
package/src/core/LocalizationManager.js +7 -4
package/src/core/ResearchOrchestrator.js +10 -6
package/src/core/StealthBrowserManager.js +111 -40
package/src/core/analysis/ContentAnalyzer.js +2 -2
package/src/core/crawlers/BFSCrawler.js +23 -12
package/src/core/processing/ContentProcessor.js +19 -3
package/src/core/processing/PDFProcessor.js +72 -23
package/src/tools/advanced/ScrapeWithActionsTool.js +63 -25
package/src/tools/advanced/batchScrape/index.js +3 -1
package/src/tools/advanced/batchScrape/reporter.js +5 -1
package/src/tools/advanced/batchScrape/worker.js +6 -1
package/src/tools/basic/_fetch.js +78 -5
package/src/tools/basic/extractLinks.js +1 -1
package/src/tools/basic/extractMetadata.js +65 -1
package/src/tools/basic/extractText.js +61 -5
package/src/tools/basic/scrapeStructured.js +48 -10
package/src/tools/crawl/crawlDeep.js +13 -5
package/src/tools/crawl/mapSite.js +24 -51
package/src/tools/extract/analyzeContent.js +11 -6
package/src/tools/extract/extractContent.js +23 -5
package/src/tools/extract/extractStructured.js +65 -16
package/src/tools/extract/extractWithLlm.js +192 -11
package/src/tools/extract/listOllamaModels.js +19 -8
package/src/tools/extract/processDocument.js +10 -4
package/src/tools/extract/summarizeContent.js +58 -1
package/src/tools/llmstxt/generateLLMsTxt.js +124 -3
package/src/tools/research/deepResearch.js +43 -4
package/src/tools/search/providers/searxng.js +2 -2
package/src/tools/search/ranking/ResultDeduplicator.js +32 -9
package/src/tools/search/ranking/ResultRanker.js +13 -4
package/src/tools/search/searchWeb.js +5 -5
package/src/tools/templates/TemplateRegistry.js +3 -2
package/src/tools/tracking/trackChanges/differ.js +33 -1
package/src/utils/htmlToMarkdown.js +5 -1

package/src/tools/extract/extractContent.js CHANGED Viewed

@@ -124,7 +124,7 @@ export class ExtractContentTool {
     try {
       const validated = ExtractContentSchema.parse(params);
-      const { url, options } = validated;
+      const { url, html: providedHtml, options } = validated;
       const result = {
         url,
@@ -133,10 +133,16 @@ export class ExtractContentTool {
         processingTime: 0
       };
-      // Step 1: Fetch content (with or without JavaScript rendering)
+      // Step 1: Fetch content (with or without JavaScript rendering).
+      // If pre-rendered HTML is supplied (e.g. post-action page from
+      // scrape_with_actions), use it directly and skip the network fetch.
       let html, pageTitle;
+      if (providedHtml) {
+        html = providedHtml;
+        pageTitle = this.extractTitleFromHTML(html);
+      } else {
       const shouldUseJavaScript = options.requiresJavaScript || await this.shouldUseJavaScript(url);
       if (shouldUseJavaScript) {
         console.error('Using browser rendering for JavaScript content...');
         const browserResult = await this.browserProcessor.processURL({
@@ -162,7 +168,7 @@ export class ExtractContentTool {
           headers: {
             'User-Agent': 'Mozilla/5.0 (compatible; MCP-WebScraper/3.0; Enhanced-Content-Extractor)'
           },
-          timeout: 15000
+          signal: AbortSignal.timeout(15000)
         });
         if (!response.ok) {
@@ -172,6 +178,7 @@ export class ExtractContentTool {
         html = await response.text();
         pageTitle = this.extractTitleFromHTML(html);
       }
+      }
       result.title = pageTitle;
@@ -194,6 +201,9 @@ export class ExtractContentTool {
         result.content = {
           text: processingResult.readability.textContent || processingResult.readability.content,
         };
+        result.extractionMethod = 'readability';
+        result.confidence = 0.9;
+        result.finalUrl = url;
         // Convert to markdown if requested
         if (options.outputFormat === 'markdown') {
@@ -203,6 +213,10 @@ export class ExtractContentTool {
         result.content = {
           text: processingResult.fallback_content.content
         };
+        result.extractionMethod = 'fallback_boilerplate_removal';
+        result.fallback_reason = 'Readability did not detect an article; used boilerplate-removal fallback';
+        result.confidence = 0.5;
+        result.finalUrl = url;
       } else {
         // Last resort: extract text from HTML
         result.content = {
@@ -213,6 +227,10 @@ export class ExtractContentTool {
             includeImageAlt: true
           })
         };
+        result.extractionMethod = 'raw_body_text';
+        result.fallback_reason = 'Neither Readability nor boilerplate-removal yielded content; extracted raw body text';
+        result.confidence = 0.2;
+        result.finalUrl = url;
       }
       // Include HTML if requested
@@ -314,4 +332,4 @@ export class ExtractContentTool {
   }
 }
-export default ExtractContentTool;
+export default ExtractContentTool;

package/src/tools/extract/extractStructured.js CHANGED Viewed

@@ -8,6 +8,11 @@ import { z } from 'zod';
 import { ElicitationHelper } from '../../core/ElicitationHelper.js'; // D1.4
 import { load } from 'cheerio';
 import { LLMManager } from '../../core/llm/LLMManager.js';
+import { createRequire } from 'module';
+const _require = createRequire(import.meta.url);
+const _pkg = _require('../../../package.json');
+const CRAWLFORGE_UA = `CrawlForge/${_pkg.version} (+https://crawlforge.dev)`;
 import { fetchAndParse } from './_fetchAndParse.js';
 const ExtractStructuredSchema = z.object({
@@ -30,7 +35,7 @@ export class ExtractStructuredTool {
   constructor(options = {}) {
     this.llmManager = null;
     this.llmConfig = options.llmConfig || {};
-    this.userAgent = 'Mozilla/5.0 (compatible; CrawlForge-MCP/3.0; ExtractStructured)';
+    this.userAgent = CRAWLFORGE_UA;
     // D1.4: Elicitation helper
     this._elicitation = new ElicitationHelper({});
   }
@@ -129,7 +134,8 @@ export class ExtractStructuredTool {
         validation: {
           valid: extractionResult.valid || false,
           errors: extractionResult.validationErrors || []
-        }
+        },
+        extractionNotes: extractionResult.extractionNotes || []
       };
     } catch (error) {
@@ -156,20 +162,53 @@ export class ExtractStructuredTool {
     let fieldsFound = 0;
     for (const [key, fieldSchema] of Object.entries(properties)) {
+      const isArrayField = fieldSchema.type === 'array';
       // Use explicit selector hint if provided
       const selector = selectorHints[key];
       if (selector) {
-        const el = $(selector);
-        if (el.length > 0) {
-          const rawValue = el.first().text().trim();
-          if (rawValue) {
-            extracted[key] = this._coerceValue(rawValue, fieldSchema);
-            fieldsFound++;
-            continue;
+        const els = $(selector);
+        if (els.length > 0) {
+          if (isArrayField || els.length > 1) {
+            const values = els.map((_, el) => $(el).text().trim()).get().filter(Boolean);
+            if (values.length > 0) {
+              extracted[key] = values;
+              fieldsFound++;
+              continue;
+            }
+          } else {
+            const rawValue = els.first().text().trim();
+            if (rawValue) {
+              extracted[key] = this._coerceValue(rawValue, fieldSchema);
+              fieldsFound++;
+              continue;
+            }
           }
         }
       }
+      // For array fields: detect ul/ol > li patterns before meta/common selectors
+      if (isArrayField) {
+        const listSelectors = [
+          `ul.${key} > li`, `ol.${key} > li`,
+          `#${key} > li`, `[data-${key}] > li`,
+          `ul[class*="${key}"] > li`, `ol[class*="${key}"] > li`
+        ];
+        let listValues = null;
+        for (const lsel of listSelectors) {
+          const items = $(lsel);
+          if (items.length > 0) {
+            listValues = items.map((_, el) => $(el).text().trim()).get().filter(Boolean);
+            break;
+          }
+        }
+        if (listValues && listValues.length > 0) {
+          extracted[key] = listValues;
+          fieldsFound++;
+          continue;
+        }
+      }
       // Try common patterns: meta tags, headings, semantic elements
       const metaContent = $(`meta[name="${key}"], meta[property="${key}"], meta[property="og:${key}"]`).attr('content');
       if (metaContent) {
@@ -189,11 +228,20 @@ export class ExtractStructuredTool {
       for (const sel of commonSelectors) {
         const el = $(sel);
         if (el.length > 0) {
-          const rawValue = el.first().text().trim();
-          if (rawValue) {
-            extracted[key] = this._coerceValue(rawValue, fieldSchema);
-            fieldsFound++;
-            break;
+          if (isArrayField && el.length > 1) {
+            const values = el.map((_, item) => $(item).text().trim()).get().filter(Boolean);
+            if (values.length > 0) {
+              extracted[key] = values;
+              fieldsFound++;
+              break;
+            }
+          } else {
+            const rawValue = el.first().text().trim();
+            if (rawValue) {
+              extracted[key] = this._coerceValue(rawValue, fieldSchema);
+              fieldsFound++;
+              break;
+            }
           }
         }
       }
@@ -215,7 +263,8 @@ export class ExtractStructuredTool {
     return {
       data: extracted,
       valid: errors.length === 0,
-      validationErrors: errors.length > 0 ? errors : ['Used CSS selector fallback extraction']
+      validationErrors: errors,
+      extractionNotes: ['Used CSS selector fallback extraction']
     };
   }
@@ -254,7 +303,7 @@ export class ExtractStructuredTool {
       base = result.valid ? 0.6 : 0.4;
     }
-    // Penalize for validation errors
+    // Penalize only for actual validation errors (not extractionNotes)
     const errorCount = (result.validationErrors || []).length;
     const penalty = Math.min(0.3, errorCount * 0.1);

package/src/tools/extract/extractWithLlm.js CHANGED Viewed

@@ -7,6 +7,7 @@
  * Pass provider: "openai" | "anthropic" with the matching API key to use a cloud model.
  */
+import { z } from 'zod';
 import { fetchAndParse } from './_fetchAndParse.js';
 // D1.3: SamplingClient for MCP sampling fallback (lazy — only imported if needed)
 let _SamplingClient = null;
@@ -68,20 +69,26 @@ function resolveProvider(provider) {
 /**
  * Build the user message text that goes to the LLM.
+ * C3: also returns truncation metadata so the caller can surface it.
+ * @returns {{ userMessage: string, truncated: boolean, original_length: number }}
  */
 function buildUserMessage(userPrompt, text, schema) {
-  const truncated = text.length > MAX_INPUT_CHARS ? text.slice(0, MAX_INPUT_CHARS) + '\n[...truncated]' : text;
+  const original_length = text.length;
+  const truncated = original_length > MAX_INPUT_CHARS;
+  const body = truncated ? text.slice(0, MAX_INPUT_CHARS) + '\n[...truncated]' : text;
   let msg = `Extraction instruction: ${userPrompt}\n\n`;
   if (schema && Object.keys(schema).length > 0) {
     msg += `Output schema hint:\n${JSON.stringify(schema, null, 2)}\n\n`;
   }
-  msg += `Web page content:\n${truncated}\n\nReturn only valid JSON.`;
-  return msg;
+  msg += `Web page content:\n${body}\n\nReturn only valid JSON.`;
+  return { userMessage: msg, truncated, original_length };
 }
 /**
  * Parse JSON from an LLM response string defensively.
  * Strips markdown code fences if present.
+ * C3: if the stripped string is not a full JSON document, locate the first
+ * embedded JSON object or array and try to parse that substring.
  * Returns parsed object or throws.
  */
 function parseJson(raw) {
@@ -90,7 +97,137 @@ function parseJson(raw) {
     .replace(/^```(?:json)?\s*/i, '')
     .replace(/\s*```\s*$/, '')
     .trim();
-  return JSON.parse(stripped);
+  // Fast path: well-formed JSON
+  try {
+    return JSON.parse(stripped);
+  } catch (_) {
+    // Fall through to substring recovery
+  }
+  // C3: locate the first *balanced* JSON object or array embedded in the
+  // string — tolerant of prose both before and after the JSON.
+  const balanced = extractBalancedJson(stripped);
+  if (balanced !== null) {
+    return JSON.parse(balanced);
+  }
+  // Re-throw the original parse error with the full content
+  throw new SyntaxError(`No JSON found in LLM response: ${stripped.slice(0, 200)}`);
+}
+/**
+ * Scan a string for the first balanced JSON object or array, respecting string
+ * literals and escapes so braces inside strings don't unbalance the scan.
+ * @returns {string|null} the JSON substring, or null if none is found
+ */
+function extractBalancedJson(str) {
+  const objStart = str.indexOf('{');
+  const arrStart = str.indexOf('[');
+  const start = objStart === -1 ? arrStart :
+                arrStart === -1 ? objStart :
+                Math.min(objStart, arrStart);
+  if (start === -1) return null;
+  const open = str[start];
+  const close = open === '{' ? '}' : ']';
+  let depth = 0;
+  let inString = false;
+  let escaped = false;
+  for (let i = start; i < str.length; i++) {
+    const ch = str[i];
+    if (escaped) { escaped = false; continue; }
+    if (ch === '\\') { escaped = true; continue; }
+    if (ch === '"') { inString = !inString; continue; }
+    if (inString) continue;
+    if (ch === open) depth++;
+    else if (ch === close) {
+      depth--;
+      if (depth === 0) return str.slice(start, i + 1);
+    }
+  }
+  return null;
+}
+// ── Schema handling (C3) ───────────────────────────────────────────────────────
+/**
+ * Normalize a caller-supplied schema hint into a valid top-level JSON Schema
+ * object suitable for Anthropic tool `input_schema`.
+ *
+ * Accepts either a full JSON Schema (`{ type, properties, ... }`) or a flat
+ * field→type-hint map (`{ name: "string", tags: "array" }`), which is wrapped
+ * as an object schema.
+ */
+function buildInputSchema(schema) {
+  if (schema && (schema.type === 'object' || schema.properties)) {
+    return { additionalProperties: true, ...schema, type: 'object' };
+  }
+  // Flat hint map → object schema with string-typed properties for any
+  // non-object hint values (Anthropic requires a valid JSON Schema).
+  const properties = {};
+  for (const [key, val] of Object.entries(schema || {})) {
+    properties[key] = (val && typeof val === 'object') ? val : { type: 'string' };
+  }
+  return { type: 'object', properties, additionalProperties: true };
+}
+/**
+ * Build a zod validator from a JSON-Schema-like hint. Best-effort: unknown
+ * shapes fall back to `z.any()` so validation never rejects on constructs the
+ * converter does not understand.
+ */
+function jsonSchemaToZod(schema) {
+  if (!schema || typeof schema !== 'object') return z.any();
+  // Flat hint map (no `type`/`properties`) → treat values as field hints.
+  const isJsonSchema = schema.type || schema.properties || schema.items;
+  if (!isJsonSchema) {
+    const shape = {};
+    for (const [key, val] of Object.entries(schema)) {
+      shape[key] = jsonSchemaToZod(typeof val === 'string' ? { type: val } : val).optional();
+    }
+    return z.object(shape).passthrough();
+  }
+  switch (schema.type) {
+    case 'string': return z.string();
+    case 'number':
+    case 'integer': return z.number();
+    case 'boolean': return z.boolean();
+    case 'null': return z.null();
+    case 'array': return z.array(schema.items ? jsonSchemaToZod(schema.items) : z.any());
+    case 'object': {
+      const shape = {};
+      const required = Array.isArray(schema.required) ? schema.required : [];
+      for (const [key, val] of Object.entries(schema.properties || {})) {
+        const field = jsonSchemaToZod(val);
+        shape[key] = required.includes(key) ? field : field.optional();
+      }
+      return z.object(shape).passthrough();
+    }
+    default: return z.any();
+  }
+}
+/**
+ * Validate parsed output against the schema hint.
+ * @returns {{ valid: boolean, errors: string[] }}
+ */
+function validateAgainstSchema(parsed, schema) {
+  try {
+    const validator = jsonSchemaToZod(schema);
+    const result = validator.safeParse(parsed);
+    if (result.success) return { valid: true, errors: [] };
+    return {
+      valid: false,
+      errors: result.error.issues.map((i) => `${i.path.join('.') || '(root)'}: ${i.message}`)
+    };
+  } catch {
+    // Converter failure should not block extraction — treat as unvalidated.
+    return { valid: true, errors: [] };
+  }
 }
 // ── OpenAI call ───────────────────────────────────────────────────────────────
@@ -133,8 +270,10 @@ async function callOpenAI({ apiKey, model, systemMessage, userMessage, maxTokens
 // ── Anthropic call ────────────────────────────────────────────────────────────
-async function callAnthropic({ apiKey, model, systemMessage, userMessage, maxTokens }) {
+async function callAnthropic({ apiKey, model, systemMessage, userMessage, maxTokens, schema }) {
   const url = `${anthropicBaseUrl()}/v1/messages`;
+  const useToolUse = schema && Object.keys(schema).length > 0;
   const body = {
     model,
     system: systemMessage,
@@ -142,6 +281,18 @@ async function callAnthropic({ apiKey, model, systemMessage, userMessage, maxTok
     max_tokens: maxTokens
   };
+  // C3: when a schema is provided, force structured output via tool-use. The
+  // tool's input_schema constrains the model and the tool_use input block is
+  // returned as already-valid JSON (no fence-stripping/parsing guesswork).
+  if (useToolUse) {
+    body.tools = [{
+      name: 'extract_data',
+      description: 'Return the extracted data conforming to the provided schema.',
+      input_schema: buildInputSchema(schema)
+    }];
+    body.tool_choice = { type: 'tool', name: 'extract_data' };
+  }
   const response = await fetch(url, {
     method: 'POST',
     headers: {
@@ -159,11 +310,21 @@ async function callAnthropic({ apiKey, model, systemMessage, userMessage, maxTok
   }
   const json = await response.json();
-  const content = json.content?.[0]?.text ?? '';
   const usage = {
     input_tokens: json.usage?.input_tokens ?? 0,
     output_tokens: json.usage?.output_tokens ?? 0
   };
+  if (useToolUse) {
+    // Read the structured input from the tool_use block.
+    const toolBlock = (json.content || []).find((b) => b.type === 'tool_use');
+    if (toolBlock && toolBlock.input !== undefined) {
+      return { rawText: JSON.stringify(toolBlock.input), usage, model: json.model || model };
+    }
+    // Fall through to text if the model declined to call the tool.
+  }
+  const content = (json.content || []).find((b) => b.type === 'text')?.text ?? '';
   return { rawText: content, usage, model: json.model || model };
 }
@@ -229,7 +390,7 @@ async function callLLM({ provider, apiKey, model, systemMessage, userMessage, ma
   if (provider === 'ollama') {
     return callOllama({ model, systemMessage, userMessage, maxTokens, schema });
   }
-  return callAnthropic({ apiKey, model, systemMessage, userMessage, maxTokens });
+  return callAnthropic({ apiKey, model, systemMessage, userMessage, maxTokens, schema });
 }
 // ── Tool class ────────────────────────────────────────────────────────────────
@@ -304,7 +465,7 @@ export class ExtractWithLlm {
     const systemMessage =
       'You extract structured data from web content per the user\'s instructions. Return JSON only.';
-    const userMessage = buildUserMessage(prompt, text, schema);
+    const { userMessage, truncated: inputTruncated, original_length } = buildUserMessage(prompt, text, schema);
     // Step 2: First LLM call — with sampling fallback for 'auto' provider
     // Fallback chain: Ollama → API key (handled by resolveProvider) → sampling → error
@@ -314,10 +475,17 @@ export class ExtractWithLlm {
         provider, apiKey, model, systemMessage, userMessage, maxTokens, schema
       }));
     } catch (llmErr) {
-      // D1.3: If provider is 'auto'/'ollama' and it failed, try sampling as final fallback
+      // D1.3: If provider is 'auto'/'ollama' and it failed, try MCP sampling as final fallback
       if (providerParam === 'auto' || providerParam === 'ollama') {
         try {
-          ({ rawText, usage } = await callViaSampling({ systemMessage, userMessage, maxTokens }));
+          const SamplingClient = await getSamplingClient();
+          const samplingClient = new SamplingClient();
+          const { text: sampledText } = await samplingClient.complete(
+            `${systemMessage}\n\n${userMessage}`,
+            { maxTokens }
+          );
+          rawText = sampledText;
+          usage = { input_tokens: 0, output_tokens: 0 };
           resolvedModel = 'sampling';
         } catch (samplingErr) {
           return { success: false, error: `LLM call failed: ${llmErr.message}. Sampling fallback also failed: ${samplingErr.message}` };
@@ -362,13 +530,26 @@ export class ExtractWithLlm {
       }
     }
-    return {
+    // C3: surface truncation metadata so callers know the input was clipped
+    const result = {
       success: true,
       data: parsed,
       provider: resolvedModel === 'sampling' ? 'sampling' : provider,
       model: resolvedModel || model,
       usage
     };
+    if (inputTruncated) {
+      result.truncated = true;
+      result.original_length = original_length;
+    }
+    // C3: validate output against the schema hint (zod). Non-fatal — the data
+    // is still returned; callers can inspect `valid`/`validationErrors`.
+    if (schema && Object.keys(schema).length > 0) {
+      const { valid, errors } = validateAgainstSchema(parsed, schema);
+      result.valid = valid;
+      if (!valid) result.validationErrors = errors;
+    }
+    return result;
   }
 }

package/src/tools/extract/listOllamaModels.js CHANGED Viewed

@@ -41,14 +41,25 @@ export class ListOllamaModelsTool {
       return { success: false, baseUrl, error: `Invalid JSON from Ollama: ${err.message}` };
     }
-    const models = (data.models || []).map((m) => ({
-      name: m.name,
-      size_bytes: m.size,
-      modified_at: m.modified_at,
-      family: m.details?.family,
-      parameter_size: m.details?.parameter_size,
-      quantization: m.details?.quantization_level
-    }));
+    // C3: harden against non-array response; normalize modified_at to ISO 8601.
+    const rawModels = Array.isArray(data.models) ? data.models :
+                      Array.isArray(data) ? data : [];
+    const models = rawModels.map((m) => {
+      let modified_at = m.modified_at ?? null;
+      if (modified_at !== null) {
+        const d = new Date(modified_at);
+        modified_at = isNaN(d.getTime()) ? modified_at : d.toISOString();
+      }
+      return {
+        name: m.name,
+        size_bytes: m.size,
+        modified_at,
+        family: m.details?.family,
+        parameter_size: m.details?.parameter_size,
+        quantization: m.details?.quantization_level
+      };
+    });
     return {
       success: true,

package/src/tools/extract/processDocument.js CHANGED Viewed

@@ -19,7 +19,12 @@ const ProcessDocumentSchema = z.object({
     extractMetadata: z.boolean().default(true),
     password: z.string().optional(),
     maxPages: z.number().min(1).max(500).default(100),
+    // C3: extract a specific 1-based, inclusive page range from a PDF
+    pageRange: z.object({
+      start: z.number().min(1).default(1),
+      end: z.number().min(1).optional()
+    }).optional(),
     // Web content options
     useReadability: z.boolean().default(true),
     extractStructuredData: z.boolean().default(true),
@@ -195,7 +200,8 @@ export class ProcessDocumentTool {
         extractText: options.extractText,
         extractMetadata: options.extractMetadata,
         password: options.password,
-        maxPages: options.maxPages
+        maxPages: options.maxPages,
+        ...(options.pageRange ? { pageRange: options.pageRange } : {})
       }
     });
@@ -273,7 +279,7 @@ export class ProcessDocumentTool {
         headers: {
           'User-Agent': 'Mozilla/5.0 (compatible; MCP-WebScraper/3.0; Document-Processor)'
         },
-        timeout: 15000
+        signal: AbortSignal.timeout(15000)
       });
       if (!response.ok) {
@@ -506,4 +512,4 @@ export class ProcessDocumentTool {
   }
 }
-export default ProcessDocumentTool;
+export default ProcessDocumentTool;

package/src/tools/extract/summarizeContent.js CHANGED Viewed

@@ -68,6 +68,8 @@ const SummarizeContentResult = z.object({
   summarizedAt: z.string(),
   processingTime: z.number(),
   success: z.boolean(),
+  degraded: z.boolean().optional(),
+  degradedReason: z.string().optional(),
   error: z.string().optional()
 });
@@ -131,11 +133,17 @@ export class SummarizeContentTool {
       // Step 2: Set summary result
       result.summary = analysisResult.summary;
-      // D1.3: If abstractive mode requested, attempt sampling-based enhancement
+      // D1.3: If abstractive mode requested, attempt sampling-based enhancement.
+      // If it can't run (no LLM/sampling available), fall back to the extractive
+      // result but flag it explicitly rather than silently masking.
       if (options.summaryType === 'abstractive') {
         const abstractive = await this._abstractiveSummaryViaSampling(text, analysisResult.summary, options.summaryLength);
         if (abstractive) {
           result.summary = abstractive;
+        } else {
+          result.summary = { ...result.summary, type: 'extractive' };
+          result.degraded = true;
+          result.degradedReason = 'Abstractive summarization unavailable (no LLM/sampling backend); returned extractive summary instead.';
         }
       }
@@ -191,6 +199,55 @@ export class SummarizeContentTool {
     }
   }
+  /**
+   * Generate an abstractive summary via the MCP SamplingClient fallback chain
+   * (Ollama → OpenAI → Anthropic → MCP sampling). Returns a summary object in the
+   * same shape as the extractive result, or null if no backend is available.
+   * @param {string} text - Full original text
+   * @param {Object} extractiveSummary - The extractive summary (for shape/fallback)
+   * @param {string} summaryLength - 'short' | 'medium' | 'long'
+   * @returns {Promise<Object|null>}
+   */
+  async _abstractiveSummaryViaSampling(text, extractiveSummary, summaryLength) {
+    try {
+      const SamplingClient = await getSamplingClient();
+      const client = new SamplingClient();
+      const lengthGuide = {
+        short: '1-2 sentences',
+        medium: '3-5 sentences',
+        long: '6-10 sentences'
+      }[summaryLength] || '3-5 sentences';
+      const prompt =
+        `Write a concise, fluent abstractive summary (${lengthGuide}) of the following text. ` +
+        `Capture the main ideas in your own words. Respond with only the summary text.\n\n` +
+        `${text.slice(0, 12000)}`;
+      const { text: summaryText } = await client.complete(prompt, { maxTokens: 600 });
+      if (!summaryText || !summaryText.trim()) {
+        return null;
+      }
+      const cleaned = summaryText.trim();
+      const sentences = splitSentences(cleaned);
+      const compressionRatio = text.length > 0
+        ? Math.round((cleaned.length / text.length) * 1000) / 1000
+        : 0;
+      return {
+        text: cleaned,
+        sentences,
+        type: 'abstractive',
+        length: summaryLength,
+        compressionRatio
+      };
+    } catch {
+      // No sampling/LLM backend available — caller falls back to extractive.
+      return null;
+    }
+  }
   /**
    * Extract key points from original text and summary
    * @param {string} originalText - Original text