crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Summarize Content MCP Tool
|
|
3
|
+
* Content summarization with configurable length and type options
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from 'zod';
|
|
7
|
+
import { ContentAnalyzer } from '../../core/analysis/ContentAnalyzer.js';
|
|
8
|
+
|
|
9
|
+
const SummarizeContentSchema = z.object({
|
|
10
|
+
text: z.string().min(10),
|
|
11
|
+
options: z.object({
|
|
12
|
+
summaryLength: z.enum(['short', 'medium', 'long']).default('medium'),
|
|
13
|
+
summaryType: z.enum(['extractive', 'abstractive']).default('extractive'),
|
|
14
|
+
includeKeypoints: z.boolean().default(true),
|
|
15
|
+
includeKeywords: z.boolean().default(true),
|
|
16
|
+
includeStatistics: z.boolean().default(true),
|
|
17
|
+
maxKeywords: z.number().min(1).max(20).default(10),
|
|
18
|
+
preserveStructure: z.boolean().default(false),
|
|
19
|
+
language: z.string().optional()
|
|
20
|
+
}).optional().default({})
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
const SummarizeContentResult = z.object({
|
|
24
|
+
originalText: z.string(),
|
|
25
|
+
summary: z.object({
|
|
26
|
+
text: z.string(),
|
|
27
|
+
sentences: z.array(z.string()),
|
|
28
|
+
type: z.string(),
|
|
29
|
+
length: z.string(),
|
|
30
|
+
compressionRatio: z.number()
|
|
31
|
+
}),
|
|
32
|
+
keypoints: z.array(z.string()).optional(),
|
|
33
|
+
keywords: z.array(z.object({
|
|
34
|
+
keyword: z.string(),
|
|
35
|
+
relevance: z.number(),
|
|
36
|
+
frequency: z.number()
|
|
37
|
+
})).optional(),
|
|
38
|
+
statistics: z.object({
|
|
39
|
+
original: z.object({
|
|
40
|
+
characters: z.number(),
|
|
41
|
+
words: z.number(),
|
|
42
|
+
sentences: z.number(),
|
|
43
|
+
paragraphs: z.number(),
|
|
44
|
+
readingTime: z.number()
|
|
45
|
+
}),
|
|
46
|
+
summary: z.object({
|
|
47
|
+
characters: z.number(),
|
|
48
|
+
words: z.number(),
|
|
49
|
+
sentences: z.number(),
|
|
50
|
+
readingTime: z.number()
|
|
51
|
+
})
|
|
52
|
+
}).optional(),
|
|
53
|
+
metadata: z.object({
|
|
54
|
+
language: z.string().optional(),
|
|
55
|
+
processingMethod: z.string(),
|
|
56
|
+
confidenceScore: z.number()
|
|
57
|
+
}),
|
|
58
|
+
summarizedAt: z.string(),
|
|
59
|
+
processingTime: z.number(),
|
|
60
|
+
success: z.boolean(),
|
|
61
|
+
error: z.string().optional()
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
export class SummarizeContentTool {
|
|
65
|
+
constructor() {
|
|
66
|
+
this.contentAnalyzer = new ContentAnalyzer();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Get tool definition for MCP server
|
|
71
|
+
* @returns {Object} Tool definition
|
|
72
|
+
*/
|
|
73
|
+
getDefinition() {
|
|
74
|
+
return {
|
|
75
|
+
name: 'summarize_content',
|
|
76
|
+
description: 'Generate intelligent summaries of text content with configurable length, type, and additional analysis including key points and keywords.',
|
|
77
|
+
inputSchema: SummarizeContentSchema
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Execute content summarization
|
|
83
|
+
* @param {Object} params - Summarization parameters
|
|
84
|
+
* @returns {Promise<Object>} Summarization result
|
|
85
|
+
*/
|
|
86
|
+
async execute(params) {
|
|
87
|
+
const startTime = Date.now();
|
|
88
|
+
|
|
89
|
+
try {
|
|
90
|
+
const validated = SummarizeContentSchema.parse(params);
|
|
91
|
+
const { text, options } = validated;
|
|
92
|
+
|
|
93
|
+
const result = {
|
|
94
|
+
originalText: text.substring(0, 500) + (text.length > 500 ? '...' : ''),
|
|
95
|
+
summarizedAt: new Date().toISOString(),
|
|
96
|
+
success: false,
|
|
97
|
+
processingTime: 0
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
// Step 1: Generate summary using ContentAnalyzer
|
|
101
|
+
const analysisResult = await this.contentAnalyzer.analyzeContent({
|
|
102
|
+
text,
|
|
103
|
+
options: {
|
|
104
|
+
summarize: true,
|
|
105
|
+
extractKeywords: options.includeKeywords,
|
|
106
|
+
detectLanguage: true,
|
|
107
|
+
summaryLength: options.summaryLength,
|
|
108
|
+
summaryType: options.summaryType,
|
|
109
|
+
maxKeywords: options.maxKeywords,
|
|
110
|
+
extractTopics: false,
|
|
111
|
+
extractEntities: false,
|
|
112
|
+
includeReadabilityMetrics: false,
|
|
113
|
+
includeSentiment: false
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
if (!analysisResult.summary) {
|
|
118
|
+
throw new Error('Summary generation failed');
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Step 2: Set summary result
|
|
122
|
+
result.summary = analysisResult.summary;
|
|
123
|
+
|
|
124
|
+
// Step 3: Extract key points if requested
|
|
125
|
+
if (options.includeKeypoints) {
|
|
126
|
+
result.keypoints = await this.extractKeyPoints(text, analysisResult.summary);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Step 4: Add keywords if requested
|
|
130
|
+
if (options.includeKeywords && analysisResult.keywords) {
|
|
131
|
+
result.keywords = analysisResult.keywords;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Step 5: Calculate statistics if requested
|
|
135
|
+
if (options.includeStatistics) {
|
|
136
|
+
result.statistics = {
|
|
137
|
+
original: this.calculateTextStatistics(text),
|
|
138
|
+
summary: this.calculateTextStatistics(result.summary.text)
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Step 6: Set metadata
|
|
143
|
+
result.metadata = {
|
|
144
|
+
language: analysisResult.language?.code || options.language || 'unknown',
|
|
145
|
+
processingMethod: options.summaryType,
|
|
146
|
+
confidenceScore: this.calculateConfidenceScore(text, result.summary.text)
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
result.processingTime = Date.now() - startTime;
|
|
150
|
+
result.success = true;
|
|
151
|
+
|
|
152
|
+
return result;
|
|
153
|
+
|
|
154
|
+
} catch (error) {
|
|
155
|
+
return {
|
|
156
|
+
originalText: params.text?.substring(0, 100) || 'unknown',
|
|
157
|
+
summarizedAt: new Date().toISOString(),
|
|
158
|
+
success: false,
|
|
159
|
+
error: `Content summarization failed: ${error.message}`,
|
|
160
|
+
processingTime: Date.now() - startTime,
|
|
161
|
+
summary: {
|
|
162
|
+
text: '',
|
|
163
|
+
sentences: [],
|
|
164
|
+
type: 'failed',
|
|
165
|
+
length: 'none',
|
|
166
|
+
compressionRatio: 0
|
|
167
|
+
},
|
|
168
|
+
metadata: {
|
|
169
|
+
processingMethod: 'failed',
|
|
170
|
+
confidenceScore: 0
|
|
171
|
+
}
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Extract key points from original text and summary
|
|
178
|
+
* @param {string} originalText - Original text
|
|
179
|
+
* @param {Object} summary - Summary object
|
|
180
|
+
* @returns {Promise<Array>} - Array of key points
|
|
181
|
+
*/
|
|
182
|
+
async extractKeyPoints(originalText, summary) {
|
|
183
|
+
try {
|
|
184
|
+
// Simple key point extraction based on important sentences
|
|
185
|
+
const sentences = originalText.split(/[.!?]+/).filter(s => s.trim().length > 0);
|
|
186
|
+
|
|
187
|
+
// Score sentences based on various factors
|
|
188
|
+
const scoredSentences = sentences.map(sentence => {
|
|
189
|
+
const words = sentence.toLowerCase().split(/\s+/);
|
|
190
|
+
|
|
191
|
+
// Factors that increase sentence importance
|
|
192
|
+
let score = 0;
|
|
193
|
+
|
|
194
|
+
// Length factor (medium-length sentences preferred)
|
|
195
|
+
const wordCount = words.length;
|
|
196
|
+
if (wordCount >= 10 && wordCount <= 25) {
|
|
197
|
+
score += 2;
|
|
198
|
+
} else if (wordCount >= 6 && wordCount <= 30) {
|
|
199
|
+
score += 1;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Keyword density (words that appear in summary)
|
|
203
|
+
const summaryWords = summary.text.toLowerCase().split(/\s+/);
|
|
204
|
+
const commonWords = words.filter(word => summaryWords.includes(word));
|
|
205
|
+
score += commonWords.length * 0.5;
|
|
206
|
+
|
|
207
|
+
// Position factor (sentences at beginning and end are often important)
|
|
208
|
+
const position = sentences.indexOf(sentence);
|
|
209
|
+
const totalSentences = sentences.length;
|
|
210
|
+
if (position < totalSentences * 0.2 || position > totalSentences * 0.8) {
|
|
211
|
+
score += 1;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Numeric data or specific terms
|
|
215
|
+
if (/\d+/.test(sentence)) score += 0.5;
|
|
216
|
+
if (/\b(important|significant|key|main|primary|essential|critical)\b/i.test(sentence)) {
|
|
217
|
+
score += 1;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
return {
|
|
221
|
+
sentence: sentence.trim(),
|
|
222
|
+
score,
|
|
223
|
+
position
|
|
224
|
+
};
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
// Select top key points
|
|
228
|
+
const topSentences = scoredSentences
|
|
229
|
+
.filter(item => item.score > 1) // Minimum threshold
|
|
230
|
+
.sort((a, b) => b.score - a.score)
|
|
231
|
+
.slice(0, 5) // Top 5 key points
|
|
232
|
+
.sort((a, b) => a.position - b.position) // Restore original order
|
|
233
|
+
.map(item => item.sentence);
|
|
234
|
+
|
|
235
|
+
return topSentences;
|
|
236
|
+
|
|
237
|
+
} catch (error) {
|
|
238
|
+
console.warn('Key point extraction failed:', error.message);
|
|
239
|
+
return [];
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Calculate text statistics
|
|
245
|
+
* @param {string} text - Text to analyze
|
|
246
|
+
* @returns {Object} - Text statistics
|
|
247
|
+
*/
|
|
248
|
+
calculateTextStatistics(text) {
|
|
249
|
+
const characters = text.length;
|
|
250
|
+
const words = text.split(/\s+/).filter(w => w.length > 0);
|
|
251
|
+
const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 0);
|
|
252
|
+
const paragraphs = text.split(/\n\s*\n/).filter(p => p.trim().length > 0);
|
|
253
|
+
|
|
254
|
+
// Estimate reading time (average 200 words per minute)
|
|
255
|
+
const readingTime = Math.ceil(words.length / 200);
|
|
256
|
+
|
|
257
|
+
return {
|
|
258
|
+
characters,
|
|
259
|
+
words: words.length,
|
|
260
|
+
sentences: sentences.length,
|
|
261
|
+
paragraphs: paragraphs.length,
|
|
262
|
+
readingTime
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Calculate confidence score for summary quality
|
|
268
|
+
* @param {string} originalText - Original text
|
|
269
|
+
* @param {string} summaryText - Summary text
|
|
270
|
+
* @returns {number} - Confidence score (0-1)
|
|
271
|
+
*/
|
|
272
|
+
calculateConfidenceScore(originalText, summaryText) {
|
|
273
|
+
try {
|
|
274
|
+
if (!summaryText || summaryText.length === 0) {
|
|
275
|
+
return 0;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
const originalWords = originalText.toLowerCase().split(/\s+/).filter(w => w.length > 2);
|
|
279
|
+
const summaryWords = summaryText.toLowerCase().split(/\s+/).filter(w => w.length > 2);
|
|
280
|
+
|
|
281
|
+
if (originalWords.length === 0 || summaryWords.length === 0) {
|
|
282
|
+
return 0;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Calculate word overlap
|
|
286
|
+
const uniqueOriginalWords = new Set(originalWords);
|
|
287
|
+
const uniqueSummaryWords = new Set(summaryWords);
|
|
288
|
+
const intersection = new Set([...uniqueOriginalWords].filter(word => uniqueSummaryWords.has(word)));
|
|
289
|
+
|
|
290
|
+
const overlapRatio = intersection.size / Math.min(uniqueOriginalWords.size, uniqueSummaryWords.size);
|
|
291
|
+
|
|
292
|
+
// Calculate compression ratio factor
|
|
293
|
+
const compressionRatio = summaryText.length / originalText.length;
|
|
294
|
+
const compressionScore = compressionRatio > 0.1 && compressionRatio < 0.8 ? 1 : 0.5;
|
|
295
|
+
|
|
296
|
+
// Calculate length appropriateness
|
|
297
|
+
const summaryWordCount = summaryWords.length;
|
|
298
|
+
const lengthScore = summaryWordCount >= 10 && summaryWordCount <= 200 ? 1 : 0.7;
|
|
299
|
+
|
|
300
|
+
// Combine factors
|
|
301
|
+
const confidence = (overlapRatio * 0.5 + compressionScore * 0.3 + lengthScore * 0.2);
|
|
302
|
+
|
|
303
|
+
return Math.round(Math.min(1, Math.max(0, confidence)) * 100) / 100;
|
|
304
|
+
|
|
305
|
+
} catch (error) {
|
|
306
|
+
console.warn('Confidence score calculation failed:', error.message);
|
|
307
|
+
return 0.5; // Default neutral confidence
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Summarize multiple texts
|
|
313
|
+
* @param {Array} texts - Array of texts to summarize
|
|
314
|
+
* @param {Object} options - Summarization options
|
|
315
|
+
* @returns {Promise<Array>} - Array of summarization results
|
|
316
|
+
*/
|
|
317
|
+
async summarizeMultiple(texts, options = {}) {
|
|
318
|
+
const concurrency = options.concurrency || 3;
|
|
319
|
+
const results = [];
|
|
320
|
+
|
|
321
|
+
for (let i = 0; i < texts.length; i += concurrency) {
|
|
322
|
+
const batch = texts.slice(i, i + concurrency);
|
|
323
|
+
const batchPromises = batch.map(text => {
|
|
324
|
+
const params = typeof text === 'string'
|
|
325
|
+
? { text, options }
|
|
326
|
+
: { ...text, options: { ...options, ...text.options } };
|
|
327
|
+
|
|
328
|
+
return this.execute(params).catch(error => ({
|
|
329
|
+
originalText: params.text?.substring(0, 100) || 'unknown',
|
|
330
|
+
success: false,
|
|
331
|
+
error: error.message,
|
|
332
|
+
summarizedAt: new Date().toISOString(),
|
|
333
|
+
processingTime: 0,
|
|
334
|
+
summary: {
|
|
335
|
+
text: '',
|
|
336
|
+
sentences: [],
|
|
337
|
+
type: 'failed',
|
|
338
|
+
length: 'none',
|
|
339
|
+
compressionRatio: 0
|
|
340
|
+
}
|
|
341
|
+
}));
|
|
342
|
+
});
|
|
343
|
+
|
|
344
|
+
const batchResults = await Promise.all(batchPromises);
|
|
345
|
+
results.push(...batchResults);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
return results;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Generate summary with custom length
|
|
353
|
+
* @param {string} text - Text to summarize
|
|
354
|
+
* @param {number} targetWords - Target word count for summary
|
|
355
|
+
* @param {Object} options - Additional options
|
|
356
|
+
* @returns {Promise<Object>} - Custom summary result
|
|
357
|
+
*/
|
|
358
|
+
async generateCustomLengthSummary(text, targetWords, options = {}) {
|
|
359
|
+
// Determine length category based on target words
|
|
360
|
+
let summaryLength;
|
|
361
|
+
if (targetWords <= 50) summaryLength = 'short';
|
|
362
|
+
else if (targetWords <= 150) summaryLength = 'medium';
|
|
363
|
+
else summaryLength = 'long';
|
|
364
|
+
|
|
365
|
+
return await this.execute({
|
|
366
|
+
text,
|
|
367
|
+
options: {
|
|
368
|
+
...options,
|
|
369
|
+
summaryLength,
|
|
370
|
+
targetWords
|
|
371
|
+
}
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
export default SummarizeContentTool;
|