crawlforge-mcp-server 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CLAUDE.md +315 -0
  2. package/LICENSE +21 -0
  3. package/README.md +181 -0
  4. package/package.json +115 -0
  5. package/server.js +1963 -0
  6. package/setup.js +112 -0
  7. package/src/constants/config.js +615 -0
  8. package/src/core/ActionExecutor.js +1104 -0
  9. package/src/core/AlertNotificationSystem.js +601 -0
  10. package/src/core/AuthManager.js +315 -0
  11. package/src/core/ChangeTracker.js +2306 -0
  12. package/src/core/JobManager.js +687 -0
  13. package/src/core/LLMsTxtAnalyzer.js +753 -0
  14. package/src/core/LocalizationManager.js +1615 -0
  15. package/src/core/PerformanceManager.js +828 -0
  16. package/src/core/ResearchOrchestrator.js +1327 -0
  17. package/src/core/SnapshotManager.js +1037 -0
  18. package/src/core/StealthBrowserManager.js +1795 -0
  19. package/src/core/WebhookDispatcher.js +745 -0
  20. package/src/core/analysis/ContentAnalyzer.js +749 -0
  21. package/src/core/analysis/LinkAnalyzer.js +972 -0
  22. package/src/core/cache/CacheManager.js +821 -0
  23. package/src/core/connections/ConnectionPool.js +553 -0
  24. package/src/core/crawlers/BFSCrawler.js +845 -0
  25. package/src/core/integrations/PerformanceIntegration.js +377 -0
  26. package/src/core/llm/AnthropicProvider.js +135 -0
  27. package/src/core/llm/LLMManager.js +415 -0
  28. package/src/core/llm/LLMProvider.js +97 -0
  29. package/src/core/llm/OpenAIProvider.js +127 -0
  30. package/src/core/processing/BrowserProcessor.js +986 -0
  31. package/src/core/processing/ContentProcessor.js +505 -0
  32. package/src/core/processing/PDFProcessor.js +448 -0
  33. package/src/core/processing/StreamProcessor.js +673 -0
  34. package/src/core/queue/QueueManager.js +98 -0
  35. package/src/core/workers/WorkerPool.js +585 -0
  36. package/src/core/workers/worker.js +743 -0
  37. package/src/monitoring/healthCheck.js +600 -0
  38. package/src/monitoring/metrics.js +761 -0
  39. package/src/optimization/wave3-optimizations.js +932 -0
  40. package/src/security/security-patches.js +120 -0
  41. package/src/security/security-tests.js +355 -0
  42. package/src/security/wave3-security.js +652 -0
  43. package/src/tools/advanced/BatchScrapeTool.js +1089 -0
  44. package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
  45. package/src/tools/crawl/crawlDeep.js +449 -0
  46. package/src/tools/crawl/mapSite.js +400 -0
  47. package/src/tools/extract/analyzeContent.js +624 -0
  48. package/src/tools/extract/extractContent.js +329 -0
  49. package/src/tools/extract/processDocument.js +503 -0
  50. package/src/tools/extract/summarizeContent.js +376 -0
  51. package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
  52. package/src/tools/research/deepResearch.js +706 -0
  53. package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
  54. package/src/tools/search/adapters/googleSearch.js +236 -0
  55. package/src/tools/search/adapters/searchProviderFactory.js +96 -0
  56. package/src/tools/search/queryExpander.js +543 -0
  57. package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
  58. package/src/tools/search/ranking/ResultRanker.js +497 -0
  59. package/src/tools/search/searchWeb.js +482 -0
  60. package/src/tools/tracking/trackChanges.js +1355 -0
  61. package/src/utils/CircuitBreaker.js +515 -0
  62. package/src/utils/ErrorHandlingConfig.js +342 -0
  63. package/src/utils/HumanBehaviorSimulator.js +569 -0
  64. package/src/utils/Logger.js +568 -0
  65. package/src/utils/MemoryMonitor.js +173 -0
  66. package/src/utils/RetryManager.js +386 -0
  67. package/src/utils/contentUtils.js +588 -0
  68. package/src/utils/domainFilter.js +612 -0
  69. package/src/utils/inputValidation.js +766 -0
  70. package/src/utils/rateLimiter.js +196 -0
  71. package/src/utils/robotsChecker.js +91 -0
  72. package/src/utils/securityMiddleware.js +416 -0
  73. package/src/utils/sitemapParser.js +678 -0
  74. package/src/utils/ssrfProtection.js +640 -0
  75. package/src/utils/urlNormalizer.js +168 -0
@@ -0,0 +1,376 @@
1
+ /**
2
+ * Summarize Content MCP Tool
3
+ * Content summarization with configurable length and type options
4
+ */
5
+
6
+ import { z } from 'zod';
7
+ import { ContentAnalyzer } from '../../core/analysis/ContentAnalyzer.js';
8
+
9
+ const SummarizeContentSchema = z.object({
10
+ text: z.string().min(10),
11
+ options: z.object({
12
+ summaryLength: z.enum(['short', 'medium', 'long']).default('medium'),
13
+ summaryType: z.enum(['extractive', 'abstractive']).default('extractive'),
14
+ includeKeypoints: z.boolean().default(true),
15
+ includeKeywords: z.boolean().default(true),
16
+ includeStatistics: z.boolean().default(true),
17
+ maxKeywords: z.number().min(1).max(20).default(10),
18
+ preserveStructure: z.boolean().default(false),
19
+ language: z.string().optional()
20
+ }).optional().default({})
21
+ });
22
+
23
+ const SummarizeContentResult = z.object({
24
+ originalText: z.string(),
25
+ summary: z.object({
26
+ text: z.string(),
27
+ sentences: z.array(z.string()),
28
+ type: z.string(),
29
+ length: z.string(),
30
+ compressionRatio: z.number()
31
+ }),
32
+ keypoints: z.array(z.string()).optional(),
33
+ keywords: z.array(z.object({
34
+ keyword: z.string(),
35
+ relevance: z.number(),
36
+ frequency: z.number()
37
+ })).optional(),
38
+ statistics: z.object({
39
+ original: z.object({
40
+ characters: z.number(),
41
+ words: z.number(),
42
+ sentences: z.number(),
43
+ paragraphs: z.number(),
44
+ readingTime: z.number()
45
+ }),
46
+ summary: z.object({
47
+ characters: z.number(),
48
+ words: z.number(),
49
+ sentences: z.number(),
50
+ readingTime: z.number()
51
+ })
52
+ }).optional(),
53
+ metadata: z.object({
54
+ language: z.string().optional(),
55
+ processingMethod: z.string(),
56
+ confidenceScore: z.number()
57
+ }),
58
+ summarizedAt: z.string(),
59
+ processingTime: z.number(),
60
+ success: z.boolean(),
61
+ error: z.string().optional()
62
+ });
63
+
64
+ export class SummarizeContentTool {
65
+ constructor() {
66
+ this.contentAnalyzer = new ContentAnalyzer();
67
+ }
68
+
69
+ /**
70
+ * Get tool definition for MCP server
71
+ * @returns {Object} Tool definition
72
+ */
73
+ getDefinition() {
74
+ return {
75
+ name: 'summarize_content',
76
+ description: 'Generate intelligent summaries of text content with configurable length, type, and additional analysis including key points and keywords.',
77
+ inputSchema: SummarizeContentSchema
78
+ };
79
+ }
80
+
81
+ /**
82
+ * Execute content summarization
83
+ * @param {Object} params - Summarization parameters
84
+ * @returns {Promise<Object>} Summarization result
85
+ */
86
+ async execute(params) {
87
+ const startTime = Date.now();
88
+
89
+ try {
90
+ const validated = SummarizeContentSchema.parse(params);
91
+ const { text, options } = validated;
92
+
93
+ const result = {
94
+ originalText: text.substring(0, 500) + (text.length > 500 ? '...' : ''),
95
+ summarizedAt: new Date().toISOString(),
96
+ success: false,
97
+ processingTime: 0
98
+ };
99
+
100
+ // Step 1: Generate summary using ContentAnalyzer
101
+ const analysisResult = await this.contentAnalyzer.analyzeContent({
102
+ text,
103
+ options: {
104
+ summarize: true,
105
+ extractKeywords: options.includeKeywords,
106
+ detectLanguage: true,
107
+ summaryLength: options.summaryLength,
108
+ summaryType: options.summaryType,
109
+ maxKeywords: options.maxKeywords,
110
+ extractTopics: false,
111
+ extractEntities: false,
112
+ includeReadabilityMetrics: false,
113
+ includeSentiment: false
114
+ }
115
+ });
116
+
117
+ if (!analysisResult.summary) {
118
+ throw new Error('Summary generation failed');
119
+ }
120
+
121
+ // Step 2: Set summary result
122
+ result.summary = analysisResult.summary;
123
+
124
+ // Step 3: Extract key points if requested
125
+ if (options.includeKeypoints) {
126
+ result.keypoints = await this.extractKeyPoints(text, analysisResult.summary);
127
+ }
128
+
129
+ // Step 4: Add keywords if requested
130
+ if (options.includeKeywords && analysisResult.keywords) {
131
+ result.keywords = analysisResult.keywords;
132
+ }
133
+
134
+ // Step 5: Calculate statistics if requested
135
+ if (options.includeStatistics) {
136
+ result.statistics = {
137
+ original: this.calculateTextStatistics(text),
138
+ summary: this.calculateTextStatistics(result.summary.text)
139
+ };
140
+ }
141
+
142
+ // Step 6: Set metadata
143
+ result.metadata = {
144
+ language: analysisResult.language?.code || options.language || 'unknown',
145
+ processingMethod: options.summaryType,
146
+ confidenceScore: this.calculateConfidenceScore(text, result.summary.text)
147
+ };
148
+
149
+ result.processingTime = Date.now() - startTime;
150
+ result.success = true;
151
+
152
+ return result;
153
+
154
+ } catch (error) {
155
+ return {
156
+ originalText: params.text?.substring(0, 100) || 'unknown',
157
+ summarizedAt: new Date().toISOString(),
158
+ success: false,
159
+ error: `Content summarization failed: ${error.message}`,
160
+ processingTime: Date.now() - startTime,
161
+ summary: {
162
+ text: '',
163
+ sentences: [],
164
+ type: 'failed',
165
+ length: 'none',
166
+ compressionRatio: 0
167
+ },
168
+ metadata: {
169
+ processingMethod: 'failed',
170
+ confidenceScore: 0
171
+ }
172
+ };
173
+ }
174
+ }
175
+
176
+ /**
177
+ * Extract key points from original text and summary
178
+ * @param {string} originalText - Original text
179
+ * @param {Object} summary - Summary object
180
+ * @returns {Promise<Array>} - Array of key points
181
+ */
182
+ async extractKeyPoints(originalText, summary) {
183
+ try {
184
+ // Simple key point extraction based on important sentences
185
+ const sentences = originalText.split(/[.!?]+/).filter(s => s.trim().length > 0);
186
+
187
+ // Score sentences based on various factors
188
+ const scoredSentences = sentences.map(sentence => {
189
+ const words = sentence.toLowerCase().split(/\s+/);
190
+
191
+ // Factors that increase sentence importance
192
+ let score = 0;
193
+
194
+ // Length factor (medium-length sentences preferred)
195
+ const wordCount = words.length;
196
+ if (wordCount >= 10 && wordCount <= 25) {
197
+ score += 2;
198
+ } else if (wordCount >= 6 && wordCount <= 30) {
199
+ score += 1;
200
+ }
201
+
202
+ // Keyword density (words that appear in summary)
203
+ const summaryWords = summary.text.toLowerCase().split(/\s+/);
204
+ const commonWords = words.filter(word => summaryWords.includes(word));
205
+ score += commonWords.length * 0.5;
206
+
207
+ // Position factor (sentences at beginning and end are often important)
208
+ const position = sentences.indexOf(sentence);
209
+ const totalSentences = sentences.length;
210
+ if (position < totalSentences * 0.2 || position > totalSentences * 0.8) {
211
+ score += 1;
212
+ }
213
+
214
+ // Numeric data or specific terms
215
+ if (/\d+/.test(sentence)) score += 0.5;
216
+ if (/\b(important|significant|key|main|primary|essential|critical)\b/i.test(sentence)) {
217
+ score += 1;
218
+ }
219
+
220
+ return {
221
+ sentence: sentence.trim(),
222
+ score,
223
+ position
224
+ };
225
+ });
226
+
227
+ // Select top key points
228
+ const topSentences = scoredSentences
229
+ .filter(item => item.score > 1) // Minimum threshold
230
+ .sort((a, b) => b.score - a.score)
231
+ .slice(0, 5) // Top 5 key points
232
+ .sort((a, b) => a.position - b.position) // Restore original order
233
+ .map(item => item.sentence);
234
+
235
+ return topSentences;
236
+
237
+ } catch (error) {
238
+ console.warn('Key point extraction failed:', error.message);
239
+ return [];
240
+ }
241
+ }
242
+
243
+ /**
244
+ * Calculate text statistics
245
+ * @param {string} text - Text to analyze
246
+ * @returns {Object} - Text statistics
247
+ */
248
+ calculateTextStatistics(text) {
249
+ const characters = text.length;
250
+ const words = text.split(/\s+/).filter(w => w.length > 0);
251
+ const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 0);
252
+ const paragraphs = text.split(/\n\s*\n/).filter(p => p.trim().length > 0);
253
+
254
+ // Estimate reading time (average 200 words per minute)
255
+ const readingTime = Math.ceil(words.length / 200);
256
+
257
+ return {
258
+ characters,
259
+ words: words.length,
260
+ sentences: sentences.length,
261
+ paragraphs: paragraphs.length,
262
+ readingTime
263
+ };
264
+ }
265
+
266
+ /**
267
+ * Calculate confidence score for summary quality
268
+ * @param {string} originalText - Original text
269
+ * @param {string} summaryText - Summary text
270
+ * @returns {number} - Confidence score (0-1)
271
+ */
272
+ calculateConfidenceScore(originalText, summaryText) {
273
+ try {
274
+ if (!summaryText || summaryText.length === 0) {
275
+ return 0;
276
+ }
277
+
278
+ const originalWords = originalText.toLowerCase().split(/\s+/).filter(w => w.length > 2);
279
+ const summaryWords = summaryText.toLowerCase().split(/\s+/).filter(w => w.length > 2);
280
+
281
+ if (originalWords.length === 0 || summaryWords.length === 0) {
282
+ return 0;
283
+ }
284
+
285
+ // Calculate word overlap
286
+ const uniqueOriginalWords = new Set(originalWords);
287
+ const uniqueSummaryWords = new Set(summaryWords);
288
+ const intersection = new Set([...uniqueOriginalWords].filter(word => uniqueSummaryWords.has(word)));
289
+
290
+ const overlapRatio = intersection.size / Math.min(uniqueOriginalWords.size, uniqueSummaryWords.size);
291
+
292
+ // Calculate compression ratio factor
293
+ const compressionRatio = summaryText.length / originalText.length;
294
+ const compressionScore = compressionRatio > 0.1 && compressionRatio < 0.8 ? 1 : 0.5;
295
+
296
+ // Calculate length appropriateness
297
+ const summaryWordCount = summaryWords.length;
298
+ const lengthScore = summaryWordCount >= 10 && summaryWordCount <= 200 ? 1 : 0.7;
299
+
300
+ // Combine factors
301
+ const confidence = (overlapRatio * 0.5 + compressionScore * 0.3 + lengthScore * 0.2);
302
+
303
+ return Math.round(Math.min(1, Math.max(0, confidence)) * 100) / 100;
304
+
305
+ } catch (error) {
306
+ console.warn('Confidence score calculation failed:', error.message);
307
+ return 0.5; // Default neutral confidence
308
+ }
309
+ }
310
+
311
+ /**
312
+ * Summarize multiple texts
313
+ * @param {Array} texts - Array of texts to summarize
314
+ * @param {Object} options - Summarization options
315
+ * @returns {Promise<Array>} - Array of summarization results
316
+ */
317
+ async summarizeMultiple(texts, options = {}) {
318
+ const concurrency = options.concurrency || 3;
319
+ const results = [];
320
+
321
+ for (let i = 0; i < texts.length; i += concurrency) {
322
+ const batch = texts.slice(i, i + concurrency);
323
+ const batchPromises = batch.map(text => {
324
+ const params = typeof text === 'string'
325
+ ? { text, options }
326
+ : { ...text, options: { ...options, ...text.options } };
327
+
328
+ return this.execute(params).catch(error => ({
329
+ originalText: params.text?.substring(0, 100) || 'unknown',
330
+ success: false,
331
+ error: error.message,
332
+ summarizedAt: new Date().toISOString(),
333
+ processingTime: 0,
334
+ summary: {
335
+ text: '',
336
+ sentences: [],
337
+ type: 'failed',
338
+ length: 'none',
339
+ compressionRatio: 0
340
+ }
341
+ }));
342
+ });
343
+
344
+ const batchResults = await Promise.all(batchPromises);
345
+ results.push(...batchResults);
346
+ }
347
+
348
+ return results;
349
+ }
350
+
351
+ /**
352
+ * Generate summary with custom length
353
+ * @param {string} text - Text to summarize
354
+ * @param {number} targetWords - Target word count for summary
355
+ * @param {Object} options - Additional options
356
+ * @returns {Promise<Object>} - Custom summary result
357
+ */
358
+ async generateCustomLengthSummary(text, targetWords, options = {}) {
359
+ // Determine length category based on target words
360
+ let summaryLength;
361
+ if (targetWords <= 50) summaryLength = 'short';
362
+ else if (targetWords <= 150) summaryLength = 'medium';
363
+ else summaryLength = 'long';
364
+
365
+ return await this.execute({
366
+ text,
367
+ options: {
368
+ ...options,
369
+ summaryLength,
370
+ targetWords
371
+ }
372
+ });
373
+ }
374
+ }
375
+
376
+ export default SummarizeContentTool;