crawlforge-mcp-server 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CLAUDE.md +315 -0
  2. package/LICENSE +21 -0
  3. package/README.md +181 -0
  4. package/package.json +115 -0
  5. package/server.js +1963 -0
  6. package/setup.js +112 -0
  7. package/src/constants/config.js +615 -0
  8. package/src/core/ActionExecutor.js +1104 -0
  9. package/src/core/AlertNotificationSystem.js +601 -0
  10. package/src/core/AuthManager.js +315 -0
  11. package/src/core/ChangeTracker.js +2306 -0
  12. package/src/core/JobManager.js +687 -0
  13. package/src/core/LLMsTxtAnalyzer.js +753 -0
  14. package/src/core/LocalizationManager.js +1615 -0
  15. package/src/core/PerformanceManager.js +828 -0
  16. package/src/core/ResearchOrchestrator.js +1327 -0
  17. package/src/core/SnapshotManager.js +1037 -0
  18. package/src/core/StealthBrowserManager.js +1795 -0
  19. package/src/core/WebhookDispatcher.js +745 -0
  20. package/src/core/analysis/ContentAnalyzer.js +749 -0
  21. package/src/core/analysis/LinkAnalyzer.js +972 -0
  22. package/src/core/cache/CacheManager.js +821 -0
  23. package/src/core/connections/ConnectionPool.js +553 -0
  24. package/src/core/crawlers/BFSCrawler.js +845 -0
  25. package/src/core/integrations/PerformanceIntegration.js +377 -0
  26. package/src/core/llm/AnthropicProvider.js +135 -0
  27. package/src/core/llm/LLMManager.js +415 -0
  28. package/src/core/llm/LLMProvider.js +97 -0
  29. package/src/core/llm/OpenAIProvider.js +127 -0
  30. package/src/core/processing/BrowserProcessor.js +986 -0
  31. package/src/core/processing/ContentProcessor.js +505 -0
  32. package/src/core/processing/PDFProcessor.js +448 -0
  33. package/src/core/processing/StreamProcessor.js +673 -0
  34. package/src/core/queue/QueueManager.js +98 -0
  35. package/src/core/workers/WorkerPool.js +585 -0
  36. package/src/core/workers/worker.js +743 -0
  37. package/src/monitoring/healthCheck.js +600 -0
  38. package/src/monitoring/metrics.js +761 -0
  39. package/src/optimization/wave3-optimizations.js +932 -0
  40. package/src/security/security-patches.js +120 -0
  41. package/src/security/security-tests.js +355 -0
  42. package/src/security/wave3-security.js +652 -0
  43. package/src/tools/advanced/BatchScrapeTool.js +1089 -0
  44. package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
  45. package/src/tools/crawl/crawlDeep.js +449 -0
  46. package/src/tools/crawl/mapSite.js +400 -0
  47. package/src/tools/extract/analyzeContent.js +624 -0
  48. package/src/tools/extract/extractContent.js +329 -0
  49. package/src/tools/extract/processDocument.js +503 -0
  50. package/src/tools/extract/summarizeContent.js +376 -0
  51. package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
  52. package/src/tools/research/deepResearch.js +706 -0
  53. package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
  54. package/src/tools/search/adapters/googleSearch.js +236 -0
  55. package/src/tools/search/adapters/searchProviderFactory.js +96 -0
  56. package/src/tools/search/queryExpander.js +543 -0
  57. package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
  58. package/src/tools/search/ranking/ResultRanker.js +497 -0
  59. package/src/tools/search/searchWeb.js +482 -0
  60. package/src/tools/tracking/trackChanges.js +1355 -0
  61. package/src/utils/CircuitBreaker.js +515 -0
  62. package/src/utils/ErrorHandlingConfig.js +342 -0
  63. package/src/utils/HumanBehaviorSimulator.js +569 -0
  64. package/src/utils/Logger.js +568 -0
  65. package/src/utils/MemoryMonitor.js +173 -0
  66. package/src/utils/RetryManager.js +386 -0
  67. package/src/utils/contentUtils.js +588 -0
  68. package/src/utils/domainFilter.js +612 -0
  69. package/src/utils/inputValidation.js +766 -0
  70. package/src/utils/rateLimiter.js +196 -0
  71. package/src/utils/robotsChecker.js +91 -0
  72. package/src/utils/securityMiddleware.js +416 -0
  73. package/src/utils/sitemapParser.js +678 -0
  74. package/src/utils/ssrfProtection.js +640 -0
  75. package/src/utils/urlNormalizer.js +168 -0
@@ -0,0 +1,624 @@
1
+ /**
2
+ * Analyze Content MCP Tool
3
+ * Comprehensive content analysis including language detection, topic analysis, sentiment, and more
4
+ */
5
+
6
+ import { z } from 'zod';
7
+ import { ContentAnalyzer } from '../../core/analysis/ContentAnalyzer.js';
8
+
9
+ const AnalyzeContentSchema = z.object({
10
+ text: z.string().min(10),
11
+ options: z.object({
12
+ detectLanguage: z.boolean().default(true),
13
+ extractTopics: z.boolean().default(true),
14
+ extractEntities: z.boolean().default(true),
15
+ extractKeywords: z.boolean().default(true),
16
+ analyzeSentiment: z.boolean().default(true),
17
+ calculateReadability: z.boolean().default(true),
18
+ includeStatistics: z.boolean().default(true),
19
+
20
+ // Analysis depth options
21
+ maxTopics: z.number().min(1).max(20).default(10),
22
+ maxKeywords: z.number().min(1).max(50).default(15),
23
+ minConfidence: z.number().min(0).max(1).default(0.1),
24
+
25
+ // Output options
26
+ includeAdvancedMetrics: z.boolean().default(false),
27
+ groupEntitiesByType: z.boolean().default(true),
28
+ rankByRelevance: z.boolean().default(true)
29
+ }).optional().default({})
30
+ });
31
+
32
+ const AnalyzeContentResult = z.object({
33
+ text: z.string(),
34
+ language: z.object({
35
+ code: z.string(),
36
+ name: z.string(),
37
+ confidence: z.number(),
38
+ alternatives: z.array(z.object({
39
+ code: z.string(),
40
+ name: z.string(),
41
+ confidence: z.number()
42
+ }))
43
+ }).optional(),
44
+ topics: z.array(z.object({
45
+ topic: z.string(),
46
+ confidence: z.number(),
47
+ keywords: z.array(z.string()),
48
+ category: z.string().optional()
49
+ })).optional(),
50
+ entities: z.object({
51
+ people: z.array(z.string()),
52
+ places: z.array(z.string()),
53
+ organizations: z.array(z.string()),
54
+ dates: z.array(z.string()),
55
+ money: z.array(z.string()),
56
+ other: z.array(z.string()),
57
+ summary: z.object({
58
+ totalEntities: z.number(),
59
+ uniqueEntities: z.number(),
60
+ entityDensity: z.number()
61
+ })
62
+ }).optional(),
63
+ keywords: z.array(z.object({
64
+ keyword: z.string(),
65
+ frequency: z.number(),
66
+ relevance: z.number(),
67
+ type: z.string(),
68
+ category: z.string().optional()
69
+ })).optional(),
70
+ sentiment: z.object({
71
+ polarity: z.number(),
72
+ subjectivity: z.number(),
73
+ label: z.string(),
74
+ confidence: z.number(),
75
+ emotions: z.array(z.object({
76
+ emotion: z.string(),
77
+ intensity: z.number()
78
+ })).optional()
79
+ }).optional(),
80
+ readability: z.object({
81
+ score: z.number(),
82
+ level: z.string(),
83
+ metrics: z.object({
84
+ sentences: z.number(),
85
+ words: z.number(),
86
+ characters: z.number(),
87
+ avgWordsPerSentence: z.number(),
88
+ avgCharsPerWord: z.number(),
89
+ complexWords: z.number(),
90
+ syllables: z.number()
91
+ })
92
+ }).optional(),
93
+ statistics: z.object({
94
+ characters: z.number(),
95
+ charactersNoSpaces: z.number(),
96
+ words: z.number(),
97
+ sentences: z.number(),
98
+ paragraphs: z.number(),
99
+ readingTime: z.number(),
100
+ vocabulary: z.object({
101
+ uniqueWords: z.number(),
102
+ vocabularyRichness: z.number(),
103
+ lexicalDiversity: z.number()
104
+ }).optional()
105
+ }).optional(),
106
+ themes: z.array(z.object({
107
+ theme: z.string(),
108
+ confidence: z.number(),
109
+ supportingTopics: z.array(z.string())
110
+ })).optional(),
111
+ analyzedAt: z.string(),
112
+ processingTime: z.number(),
113
+ success: z.boolean(),
114
+ error: z.string().optional()
115
+ });
116
+
117
+ export class AnalyzeContentTool {
118
+ constructor() {
119
+ this.contentAnalyzer = new ContentAnalyzer();
120
+ }
121
+
122
+ /**
123
+ * Get tool definition for MCP server
124
+ * @returns {Object} Tool definition
125
+ */
126
+ getDefinition() {
127
+ return {
128
+ name: 'analyze_content',
129
+ description: 'Perform comprehensive content analysis including language detection, topic extraction, entity recognition, sentiment analysis, keyword extraction, and readability assessment.',
130
+ inputSchema: AnalyzeContentSchema
131
+ };
132
+ }
133
+
134
+ /**
135
+ * Execute content analysis
136
+ * @param {Object} params - Analysis parameters
137
+ * @returns {Promise<Object>} Analysis result
138
+ */
139
+ async execute(params) {
140
+ const startTime = Date.now();
141
+
142
+ try {
143
+ const validated = AnalyzeContentSchema.parse(params);
144
+ const { text, options } = validated;
145
+
146
+ const result = {
147
+ text: text.substring(0, 500) + (text.length > 500 ? '...' : ''),
148
+ analyzedAt: new Date().toISOString(),
149
+ success: false,
150
+ processingTime: 0
151
+ };
152
+
153
+ // Execute comprehensive analysis using ContentAnalyzer
154
+ const analysisResult = await this.contentAnalyzer.analyzeContent({
155
+ text,
156
+ options: {
157
+ summarize: false, // We don't need summary for analysis
158
+ detectLanguage: options.detectLanguage,
159
+ extractTopics: options.extractTopics,
160
+ extractEntities: options.extractEntities,
161
+ extractKeywords: options.extractKeywords,
162
+ includeSentiment: options.analyzeSentiment,
163
+ includeReadabilityMetrics: options.calculateReadability,
164
+ maxTopics: options.maxTopics,
165
+ maxKeywords: options.maxKeywords,
166
+ minConfidence: options.minConfidence
167
+ }
168
+ });
169
+
170
+ // Step 1: Language detection
171
+ if (options.detectLanguage && analysisResult.language) {
172
+ result.language = {
173
+ code: analysisResult.language.code,
174
+ name: analysisResult.language.name,
175
+ confidence: analysisResult.language.confidence,
176
+ alternatives: analysisResult.language.alternative || []
177
+ };
178
+ }
179
+
180
+ // Step 2: Topic extraction with categorization
181
+ if (options.extractTopics && analysisResult.topics) {
182
+ result.topics = analysisResult.topics.map(topic => ({
183
+ ...topic,
184
+ category: this.categorizeTopicByKeywords(topic.keywords)
185
+ }));
186
+
187
+ // Extract themes from topics if advanced metrics requested
188
+ if (options.includeAdvancedMetrics) {
189
+ result.themes = this.extractThemes(result.topics);
190
+ }
191
+ }
192
+
193
+ // Step 3: Entity extraction with enhanced grouping
194
+ if (options.extractEntities && analysisResult.entities) {
195
+ result.entities = {
196
+ ...analysisResult.entities,
197
+ summary: this.calculateEntitySummary(analysisResult.entities, text)
198
+ };
199
+ }
200
+
201
+ // Step 4: Keyword extraction with categorization
202
+ if (options.extractKeywords && analysisResult.keywords) {
203
+ result.keywords = analysisResult.keywords.map(keyword => ({
204
+ ...keyword,
205
+ category: this.categorizeKeyword(keyword.keyword, keyword.type)
206
+ }));
207
+
208
+ // Sort by relevance if requested
209
+ if (options.rankByRelevance) {
210
+ result.keywords.sort((a, b) => b.relevance - a.relevance);
211
+ }
212
+ }
213
+
214
+ // Step 5: Sentiment analysis with emotion detection
215
+ if (options.analyzeSentiment && analysisResult.sentiment) {
216
+ result.sentiment = {
217
+ ...analysisResult.sentiment,
218
+ emotions: options.includeAdvancedMetrics ? this.detectEmotions(text) : undefined
219
+ };
220
+ }
221
+
222
+ // Step 6: Readability metrics
223
+ if (options.calculateReadability && analysisResult.readability) {
224
+ result.readability = analysisResult.readability;
225
+ }
226
+
227
+ // Step 7: Text statistics with vocabulary analysis
228
+ if (options.includeStatistics && analysisResult.statistics) {
229
+ result.statistics = {
230
+ ...analysisResult.statistics,
231
+ vocabulary: options.includeAdvancedMetrics ? this.calculateVocabularyMetrics(text) : undefined
232
+ };
233
+ }
234
+
235
+ result.processingTime = Date.now() - startTime;
236
+ result.success = true;
237
+
238
+ return result;
239
+
240
+ } catch (error) {
241
+ return {
242
+ text: params.text?.substring(0, 100) || 'unknown',
243
+ analyzedAt: new Date().toISOString(),
244
+ success: false,
245
+ error: `Content analysis failed: ${error.message}`,
246
+ processingTime: Date.now() - startTime
247
+ };
248
+ }
249
+ }
250
+
251
+ /**
252
+ * Categorize topic based on keywords
253
+ * @param {Array} keywords - Topic keywords
254
+ * @returns {string} - Topic category
255
+ */
256
+ categorizeTopicByKeywords(keywords) {
257
+ const categories = {
258
+ technology: ['technology', 'software', 'computer', 'digital', 'internet', 'app', 'system', 'data', 'code', 'development'],
259
+ business: ['business', 'company', 'market', 'sales', 'revenue', 'profit', 'customer', 'service', 'management', 'strategy'],
260
+ science: ['science', 'research', 'study', 'analysis', 'experiment', 'theory', 'discovery', 'scientific', 'academic'],
261
+ health: ['health', 'medical', 'disease', 'treatment', 'patient', 'doctor', 'medicine', 'hospital', 'therapy', 'care'],
262
+ politics: ['politics', 'government', 'policy', 'election', 'politician', 'vote', 'democracy', 'law', 'congress', 'president'],
263
+ sports: ['sports', 'game', 'team', 'player', 'match', 'competition', 'championship', 'athletic', 'training', 'coach'],
264
+ entertainment: ['movie', 'music', 'entertainment', 'film', 'show', 'celebrity', 'actor', 'artist', 'performance', 'media'],
265
+ education: ['education', 'school', 'student', 'teacher', 'university', 'learning', 'course', 'academic', 'knowledge', 'study']
266
+ };
267
+
268
+ const keywordStr = keywords.join(' ').toLowerCase();
269
+
270
+ for (const [category, categoryKeywords] of Object.entries(categories)) {
271
+ const matches = categoryKeywords.filter(word => keywordStr.includes(word));
272
+ if (matches.length > 0) {
273
+ return category;
274
+ }
275
+ }
276
+
277
+ return 'general';
278
+ }
279
+
280
+ /**
281
+ * Categorize individual keyword
282
+ * @param {string} keyword - Keyword to categorize
283
+ * @param {string} type - Grammatical type
284
+ * @returns {string} - Keyword category
285
+ */
286
+ categorizeKeyword(keyword, type) {
287
+ const lowerKeyword = keyword.toLowerCase();
288
+
289
+ // Technical terms
290
+ if (/^(api|sdk|framework|library|database|algorithm|protocol|server|client|interface)$/i.test(keyword)) {
291
+ return 'technical';
292
+ }
293
+
294
+ // Business terms
295
+ if (/^(revenue|profit|market|customer|client|sales|business|company|organization)$/i.test(keyword)) {
296
+ return 'business';
297
+ }
298
+
299
+ // Academic terms
300
+ if (/^(research|study|analysis|theory|method|approach|findings|results|conclusion)$/i.test(keyword)) {
301
+ return 'academic';
302
+ }
303
+
304
+ // Time-related terms
305
+ if (/^(year|month|week|day|time|period|date|today|yesterday|tomorrow)$/i.test(keyword)) {
306
+ return 'temporal';
307
+ }
308
+
309
+ // Location terms
310
+ if (/^(country|city|state|region|area|location|place|world|global|international)$/i.test(keyword)) {
311
+ return 'geographical';
312
+ }
313
+
314
+ // Default to grammatical type
315
+ return type || 'general';
316
+ }
317
+
318
+ /**
319
+ * Extract themes from topics
320
+ * @param {Array} topics - Analyzed topics
321
+ * @returns {Array} - Extracted themes
322
+ */
323
+ extractThemes(topics) {
324
+ if (!topics || topics.length === 0) return [];
325
+
326
+ // Group topics by category
327
+ const topicsByCategory = {};
328
+ topics.forEach(topic => {
329
+ const category = topic.category || 'general';
330
+ if (!topicsByCategory[category]) {
331
+ topicsByCategory[category] = [];
332
+ }
333
+ topicsByCategory[category].push(topic);
334
+ });
335
+
336
+ // Create themes from categories with multiple topics
337
+ const themes = [];
338
+ for (const [category, categoryTopics] of Object.entries(topicsByCategory)) {
339
+ if (categoryTopics.length >= 2) {
340
+ const avgConfidence = categoryTopics.reduce((sum, topic) => sum + topic.confidence, 0) / categoryTopics.length;
341
+ const supportingTopics = categoryTopics.map(topic => topic.topic);
342
+
343
+ themes.push({
344
+ theme: category,
345
+ confidence: Math.round(avgConfidence * 100) / 100,
346
+ supportingTopics
347
+ });
348
+ }
349
+ }
350
+
351
+ return themes.sort((a, b) => b.confidence - a.confidence);
352
+ }
353
+
354
+ /**
355
+ * Calculate entity summary statistics
356
+ * @param {Object} entities - Extracted entities
357
+ * @param {string} text - Original text
358
+ * @returns {Object} - Entity summary
359
+ */
360
+ calculateEntitySummary(entities, text) {
361
+ const allEntities = [
362
+ ...entities.people,
363
+ ...entities.places,
364
+ ...entities.organizations,
365
+ ...entities.dates,
366
+ ...entities.money,
367
+ ...entities.other
368
+ ];
369
+
370
+ const uniqueEntities = new Set(allEntities.map(e => e.toLowerCase()));
371
+ const textWords = text.split(/\s+/).filter(w => w.length > 0);
372
+
373
+ return {
374
+ totalEntities: allEntities.length,
375
+ uniqueEntities: uniqueEntities.size,
376
+ entityDensity: Math.round((allEntities.length / textWords.length) * 100) / 100
377
+ };
378
+ }
379
+
380
+ /**
381
+ * Detect emotions in text (simplified approach)
382
+ * @param {string} text - Text to analyze
383
+ * @returns {Array} - Detected emotions with intensity
384
+ */
385
+ detectEmotions(text) {
386
+ const emotionWords = {
387
+ joy: ['happy', 'joy', 'excited', 'pleased', 'delighted', 'cheerful', 'glad', 'elated'],
388
+ anger: ['angry', 'mad', 'furious', 'rage', 'annoyed', 'frustrated', 'irritated'],
389
+ sadness: ['sad', 'depressed', 'unhappy', 'grief', 'sorrow', 'melancholy', 'down'],
390
+ fear: ['afraid', 'scared', 'terrified', 'anxious', 'worried', 'nervous', 'fearful'],
391
+ surprise: ['surprised', 'amazed', 'shocked', 'astonished', 'stunned', 'startled'],
392
+ disgust: ['disgusted', 'revolted', 'repulsed', 'sickened', 'appalled'],
393
+ trust: ['trust', 'confident', 'secure', 'certain', 'assured', 'reliable'],
394
+ anticipation: ['excited', 'eager', 'looking forward', 'anticipating', 'expecting']
395
+ };
396
+
397
+ const words = text.toLowerCase().split(/\s+/);
398
+ const emotions = [];
399
+
400
+ for (const [emotion, emotionKeywords] of Object.entries(emotionWords)) {
401
+ const matches = words.filter(word => emotionKeywords.some(keyword => word.includes(keyword)));
402
+ if (matches.length > 0) {
403
+ const intensity = Math.min(1, matches.length / Math.max(words.length / 100, 1));
404
+ emotions.push({
405
+ emotion,
406
+ intensity: Math.round(intensity * 100) / 100
407
+ });
408
+ }
409
+ }
410
+
411
+ return emotions.sort((a, b) => b.intensity - a.intensity).slice(0, 5);
412
+ }
413
+
414
+ /**
415
+ * Calculate vocabulary richness metrics
416
+ * @param {string} text - Text to analyze
417
+ * @returns {Object} - Vocabulary metrics
418
+ */
419
+ calculateVocabularyMetrics(text) {
420
+ const words = text.toLowerCase().split(/\s+/).filter(w => w.length > 2);
421
+ const uniqueWords = new Set(words);
422
+
423
+ // Type-Token Ratio (vocabulary richness)
424
+ const vocabularyRichness = uniqueWords.size / Math.max(words.length, 1);
425
+
426
+ // Simple lexical diversity measure
427
+ const wordFreq = {};
428
+ words.forEach(word => {
429
+ wordFreq[word] = (wordFreq[word] || 0) + 1;
430
+ });
431
+
432
+ const hapaxLegomena = Object.values(wordFreq).filter(freq => freq === 1).length;
433
+ const lexicalDiversity = hapaxLegomena / Math.max(uniqueWords.size, 1);
434
+
435
+ return {
436
+ uniqueWords: uniqueWords.size,
437
+ vocabularyRichness: Math.round(vocabularyRichness * 100) / 100,
438
+ lexicalDiversity: Math.round(lexicalDiversity * 100) / 100
439
+ };
440
+ }
441
+
442
+ /**
443
+ * Analyze content for specific domain
444
+ * @param {string} text - Text to analyze
445
+ * @param {string} domain - Domain to focus on (e.g., 'academic', 'business', 'technical')
446
+ * @param {Object} options - Analysis options
447
+ * @returns {Promise<Object>} - Domain-specific analysis
448
+ */
449
+ async analyzeDomainSpecific(text, domain, options = {}) {
450
+ const domainOptions = {
451
+ ...options,
452
+ extractTopics: true,
453
+ extractKeywords: true,
454
+ maxKeywords: 20,
455
+ includeAdvancedMetrics: true
456
+ };
457
+
458
+ const result = await this.execute({ text, options: domainOptions });
459
+
460
+ if (!result.success) return result;
461
+
462
+ // Filter and enhance results for specific domain
463
+ if (result.topics) {
464
+ result.topics = result.topics.filter(topic =>
465
+ topic.category === domain || topic.category === 'general'
466
+ );
467
+ }
468
+
469
+ if (result.keywords) {
470
+ result.keywords = result.keywords.filter(keyword =>
471
+ keyword.category === domain || keyword.category === 'general'
472
+ );
473
+ }
474
+
475
+ return result;
476
+ }
477
+
478
+ /**
479
+ * Compare content analysis between multiple texts
480
+ * @param {Array} texts - Array of texts to compare
481
+ * @param {Object} options - Analysis options
482
+ * @returns {Promise<Object>} - Comparative analysis result
483
+ */
484
+ async compareContent(texts, options = {}) {
485
+ const results = await Promise.all(
486
+ texts.map(text => this.execute({ text, options }))
487
+ );
488
+
489
+ const comparison = {
490
+ individual: results,
491
+ comparison: {
492
+ languages: this.compareLanguages(results),
493
+ sentiments: this.compareSentiments(results),
494
+ readability: this.compareReadability(results),
495
+ commonTopics: this.findCommonTopics(results),
496
+ uniqueTopics: this.findUniqueTopics(results)
497
+ }
498
+ };
499
+
500
+ return comparison;
501
+ }
502
+
503
+ /**
504
+ * Compare languages across results
505
+ * @param {Array} results - Analysis results
506
+ * @returns {Object} - Language comparison
507
+ */
508
+ compareLanguages(results) {
509
+ const languages = results
510
+ .filter(r => r.success && r.language)
511
+ .map(r => r.language.code);
512
+
513
+ const languageCount = {};
514
+ languages.forEach(lang => {
515
+ languageCount[lang] = (languageCount[lang] || 0) + 1;
516
+ });
517
+
518
+ return {
519
+ detected: languageCount,
520
+ primary: Object.entries(languageCount).sort((a, b) => b[1] - a[1])[0]?.[0] || 'unknown',
521
+ diversity: Object.keys(languageCount).length
522
+ };
523
+ }
524
+
525
+ /**
526
+ * Compare sentiments across results
527
+ * @param {Array} results - Analysis results
528
+ * @returns {Object} - Sentiment comparison
529
+ */
530
+ compareSentiments(results) {
531
+ const sentiments = results
532
+ .filter(r => r.success && r.sentiment)
533
+ .map(r => r.sentiment);
534
+
535
+ if (sentiments.length === 0) return null;
536
+
537
+ const avgPolarity = sentiments.reduce((sum, s) => sum + s.polarity, 0) / sentiments.length;
538
+ const avgSubjectivity = sentiments.reduce((sum, s) => sum + s.subjectivity, 0) / sentiments.length;
539
+
540
+ return {
541
+ averagePolarity: Math.round(avgPolarity * 100) / 100,
542
+ averageSubjectivity: Math.round(avgSubjectivity * 100) / 100,
543
+ range: {
544
+ polarity: {
545
+ min: Math.min(...sentiments.map(s => s.polarity)),
546
+ max: Math.max(...sentiments.map(s => s.polarity))
547
+ }
548
+ }
549
+ };
550
+ }
551
+
552
+ /**
553
+ * Compare readability across results
554
+ * @param {Array} results - Analysis results
555
+ * @returns {Object} - Readability comparison
556
+ */
557
+ compareReadability(results) {
558
+ const readabilityScores = results
559
+ .filter(r => r.success && r.readability)
560
+ .map(r => r.readability.score);
561
+
562
+ if (readabilityScores.length === 0) return null;
563
+
564
+ const avgScore = readabilityScores.reduce((sum, score) => sum + score, 0) / readabilityScores.length;
565
+
566
+ return {
567
+ averageScore: Math.round(avgScore * 100) / 100,
568
+ range: {
569
+ min: Math.min(...readabilityScores),
570
+ max: Math.max(...readabilityScores)
571
+ },
572
+ consistency: Math.max(...readabilityScores) - Math.min(...readabilityScores) < 20
573
+ };
574
+ }
575
+
576
+ /**
577
+ * Find common topics across results
578
+ * @param {Array} results - Analysis results
579
+ * @returns {Array} - Common topics
580
+ */
581
+ findCommonTopics(results) {
582
+ const allTopics = results
583
+ .filter(r => r.success && r.topics)
584
+ .flatMap(r => r.topics.map(t => t.topic.toLowerCase()));
585
+
586
+ const topicCount = {};
587
+ allTopics.forEach(topic => {
588
+ topicCount[topic] = (topicCount[topic] || 0) + 1;
589
+ });
590
+
591
+ return Object.entries(topicCount)
592
+ .filter(([, count]) => count > 1)
593
+ .sort((a, b) => b[1] - a[1])
594
+ .map(([topic, count]) => ({ topic, occurrences: count }));
595
+ }
596
+
597
+ /**
598
+ * Find unique topics across results
599
+ * @param {Array} results - Analysis results
600
+ * @returns {Array} - Unique topics by text
601
+ */
602
+ findUniqueTopics(results) {
603
+ const allTopics = results
604
+ .filter(r => r.success && r.topics)
605
+ .flatMap(r => r.topics.map(t => t.topic.toLowerCase()));
606
+
607
+ const topicCount = {};
608
+ allTopics.forEach(topic => {
609
+ topicCount[topic] = (topicCount[topic] || 0) + 1;
610
+ });
611
+
612
+ return results.map((result, index) => {
613
+ if (!result.success || !result.topics) return { textIndex: index, uniqueTopics: [] };
614
+
615
+ const uniqueTopics = result.topics
616
+ .filter(topic => topicCount[topic.topic.toLowerCase()] === 1)
617
+ .map(topic => topic.topic);
618
+
619
+ return { textIndex: index, uniqueTopics };
620
+ });
621
+ }
622
+ }
623
+
624
+ export default AnalyzeContentTool;