crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,624 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Analyze Content MCP Tool
|
|
3
|
+
* Comprehensive content analysis including language detection, topic analysis, sentiment, and more
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from 'zod';
|
|
7
|
+
import { ContentAnalyzer } from '../../core/analysis/ContentAnalyzer.js';
|
|
8
|
+
|
|
9
|
+
const AnalyzeContentSchema = z.object({
|
|
10
|
+
text: z.string().min(10),
|
|
11
|
+
options: z.object({
|
|
12
|
+
detectLanguage: z.boolean().default(true),
|
|
13
|
+
extractTopics: z.boolean().default(true),
|
|
14
|
+
extractEntities: z.boolean().default(true),
|
|
15
|
+
extractKeywords: z.boolean().default(true),
|
|
16
|
+
analyzeSentiment: z.boolean().default(true),
|
|
17
|
+
calculateReadability: z.boolean().default(true),
|
|
18
|
+
includeStatistics: z.boolean().default(true),
|
|
19
|
+
|
|
20
|
+
// Analysis depth options
|
|
21
|
+
maxTopics: z.number().min(1).max(20).default(10),
|
|
22
|
+
maxKeywords: z.number().min(1).max(50).default(15),
|
|
23
|
+
minConfidence: z.number().min(0).max(1).default(0.1),
|
|
24
|
+
|
|
25
|
+
// Output options
|
|
26
|
+
includeAdvancedMetrics: z.boolean().default(false),
|
|
27
|
+
groupEntitiesByType: z.boolean().default(true),
|
|
28
|
+
rankByRelevance: z.boolean().default(true)
|
|
29
|
+
}).optional().default({})
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
const AnalyzeContentResult = z.object({
|
|
33
|
+
text: z.string(),
|
|
34
|
+
language: z.object({
|
|
35
|
+
code: z.string(),
|
|
36
|
+
name: z.string(),
|
|
37
|
+
confidence: z.number(),
|
|
38
|
+
alternatives: z.array(z.object({
|
|
39
|
+
code: z.string(),
|
|
40
|
+
name: z.string(),
|
|
41
|
+
confidence: z.number()
|
|
42
|
+
}))
|
|
43
|
+
}).optional(),
|
|
44
|
+
topics: z.array(z.object({
|
|
45
|
+
topic: z.string(),
|
|
46
|
+
confidence: z.number(),
|
|
47
|
+
keywords: z.array(z.string()),
|
|
48
|
+
category: z.string().optional()
|
|
49
|
+
})).optional(),
|
|
50
|
+
entities: z.object({
|
|
51
|
+
people: z.array(z.string()),
|
|
52
|
+
places: z.array(z.string()),
|
|
53
|
+
organizations: z.array(z.string()),
|
|
54
|
+
dates: z.array(z.string()),
|
|
55
|
+
money: z.array(z.string()),
|
|
56
|
+
other: z.array(z.string()),
|
|
57
|
+
summary: z.object({
|
|
58
|
+
totalEntities: z.number(),
|
|
59
|
+
uniqueEntities: z.number(),
|
|
60
|
+
entityDensity: z.number()
|
|
61
|
+
})
|
|
62
|
+
}).optional(),
|
|
63
|
+
keywords: z.array(z.object({
|
|
64
|
+
keyword: z.string(),
|
|
65
|
+
frequency: z.number(),
|
|
66
|
+
relevance: z.number(),
|
|
67
|
+
type: z.string(),
|
|
68
|
+
category: z.string().optional()
|
|
69
|
+
})).optional(),
|
|
70
|
+
sentiment: z.object({
|
|
71
|
+
polarity: z.number(),
|
|
72
|
+
subjectivity: z.number(),
|
|
73
|
+
label: z.string(),
|
|
74
|
+
confidence: z.number(),
|
|
75
|
+
emotions: z.array(z.object({
|
|
76
|
+
emotion: z.string(),
|
|
77
|
+
intensity: z.number()
|
|
78
|
+
})).optional()
|
|
79
|
+
}).optional(),
|
|
80
|
+
readability: z.object({
|
|
81
|
+
score: z.number(),
|
|
82
|
+
level: z.string(),
|
|
83
|
+
metrics: z.object({
|
|
84
|
+
sentences: z.number(),
|
|
85
|
+
words: z.number(),
|
|
86
|
+
characters: z.number(),
|
|
87
|
+
avgWordsPerSentence: z.number(),
|
|
88
|
+
avgCharsPerWord: z.number(),
|
|
89
|
+
complexWords: z.number(),
|
|
90
|
+
syllables: z.number()
|
|
91
|
+
})
|
|
92
|
+
}).optional(),
|
|
93
|
+
statistics: z.object({
|
|
94
|
+
characters: z.number(),
|
|
95
|
+
charactersNoSpaces: z.number(),
|
|
96
|
+
words: z.number(),
|
|
97
|
+
sentences: z.number(),
|
|
98
|
+
paragraphs: z.number(),
|
|
99
|
+
readingTime: z.number(),
|
|
100
|
+
vocabulary: z.object({
|
|
101
|
+
uniqueWords: z.number(),
|
|
102
|
+
vocabularyRichness: z.number(),
|
|
103
|
+
lexicalDiversity: z.number()
|
|
104
|
+
}).optional()
|
|
105
|
+
}).optional(),
|
|
106
|
+
themes: z.array(z.object({
|
|
107
|
+
theme: z.string(),
|
|
108
|
+
confidence: z.number(),
|
|
109
|
+
supportingTopics: z.array(z.string())
|
|
110
|
+
})).optional(),
|
|
111
|
+
analyzedAt: z.string(),
|
|
112
|
+
processingTime: z.number(),
|
|
113
|
+
success: z.boolean(),
|
|
114
|
+
error: z.string().optional()
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
export class AnalyzeContentTool {
|
|
118
|
+
constructor() {
|
|
119
|
+
this.contentAnalyzer = new ContentAnalyzer();
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Get tool definition for MCP server
|
|
124
|
+
* @returns {Object} Tool definition
|
|
125
|
+
*/
|
|
126
|
+
getDefinition() {
|
|
127
|
+
return {
|
|
128
|
+
name: 'analyze_content',
|
|
129
|
+
description: 'Perform comprehensive content analysis including language detection, topic extraction, entity recognition, sentiment analysis, keyword extraction, and readability assessment.',
|
|
130
|
+
inputSchema: AnalyzeContentSchema
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Execute content analysis
|
|
136
|
+
* @param {Object} params - Analysis parameters
|
|
137
|
+
* @returns {Promise<Object>} Analysis result
|
|
138
|
+
*/
|
|
139
|
+
async execute(params) {
|
|
140
|
+
const startTime = Date.now();
|
|
141
|
+
|
|
142
|
+
try {
|
|
143
|
+
const validated = AnalyzeContentSchema.parse(params);
|
|
144
|
+
const { text, options } = validated;
|
|
145
|
+
|
|
146
|
+
const result = {
|
|
147
|
+
text: text.substring(0, 500) + (text.length > 500 ? '...' : ''),
|
|
148
|
+
analyzedAt: new Date().toISOString(),
|
|
149
|
+
success: false,
|
|
150
|
+
processingTime: 0
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
// Execute comprehensive analysis using ContentAnalyzer
|
|
154
|
+
const analysisResult = await this.contentAnalyzer.analyzeContent({
|
|
155
|
+
text,
|
|
156
|
+
options: {
|
|
157
|
+
summarize: false, // We don't need summary for analysis
|
|
158
|
+
detectLanguage: options.detectLanguage,
|
|
159
|
+
extractTopics: options.extractTopics,
|
|
160
|
+
extractEntities: options.extractEntities,
|
|
161
|
+
extractKeywords: options.extractKeywords,
|
|
162
|
+
includeSentiment: options.analyzeSentiment,
|
|
163
|
+
includeReadabilityMetrics: options.calculateReadability,
|
|
164
|
+
maxTopics: options.maxTopics,
|
|
165
|
+
maxKeywords: options.maxKeywords,
|
|
166
|
+
minConfidence: options.minConfidence
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
// Step 1: Language detection
|
|
171
|
+
if (options.detectLanguage && analysisResult.language) {
|
|
172
|
+
result.language = {
|
|
173
|
+
code: analysisResult.language.code,
|
|
174
|
+
name: analysisResult.language.name,
|
|
175
|
+
confidence: analysisResult.language.confidence,
|
|
176
|
+
alternatives: analysisResult.language.alternative || []
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Step 2: Topic extraction with categorization
|
|
181
|
+
if (options.extractTopics && analysisResult.topics) {
|
|
182
|
+
result.topics = analysisResult.topics.map(topic => ({
|
|
183
|
+
...topic,
|
|
184
|
+
category: this.categorizeTopicByKeywords(topic.keywords)
|
|
185
|
+
}));
|
|
186
|
+
|
|
187
|
+
// Extract themes from topics if advanced metrics requested
|
|
188
|
+
if (options.includeAdvancedMetrics) {
|
|
189
|
+
result.themes = this.extractThemes(result.topics);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Step 3: Entity extraction with enhanced grouping
|
|
194
|
+
if (options.extractEntities && analysisResult.entities) {
|
|
195
|
+
result.entities = {
|
|
196
|
+
...analysisResult.entities,
|
|
197
|
+
summary: this.calculateEntitySummary(analysisResult.entities, text)
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Step 4: Keyword extraction with categorization
|
|
202
|
+
if (options.extractKeywords && analysisResult.keywords) {
|
|
203
|
+
result.keywords = analysisResult.keywords.map(keyword => ({
|
|
204
|
+
...keyword,
|
|
205
|
+
category: this.categorizeKeyword(keyword.keyword, keyword.type)
|
|
206
|
+
}));
|
|
207
|
+
|
|
208
|
+
// Sort by relevance if requested
|
|
209
|
+
if (options.rankByRelevance) {
|
|
210
|
+
result.keywords.sort((a, b) => b.relevance - a.relevance);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Step 5: Sentiment analysis with emotion detection
|
|
215
|
+
if (options.analyzeSentiment && analysisResult.sentiment) {
|
|
216
|
+
result.sentiment = {
|
|
217
|
+
...analysisResult.sentiment,
|
|
218
|
+
emotions: options.includeAdvancedMetrics ? this.detectEmotions(text) : undefined
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Step 6: Readability metrics
|
|
223
|
+
if (options.calculateReadability && analysisResult.readability) {
|
|
224
|
+
result.readability = analysisResult.readability;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Step 7: Text statistics with vocabulary analysis
|
|
228
|
+
if (options.includeStatistics && analysisResult.statistics) {
|
|
229
|
+
result.statistics = {
|
|
230
|
+
...analysisResult.statistics,
|
|
231
|
+
vocabulary: options.includeAdvancedMetrics ? this.calculateVocabularyMetrics(text) : undefined
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
result.processingTime = Date.now() - startTime;
|
|
236
|
+
result.success = true;
|
|
237
|
+
|
|
238
|
+
return result;
|
|
239
|
+
|
|
240
|
+
} catch (error) {
|
|
241
|
+
return {
|
|
242
|
+
text: params.text?.substring(0, 100) || 'unknown',
|
|
243
|
+
analyzedAt: new Date().toISOString(),
|
|
244
|
+
success: false,
|
|
245
|
+
error: `Content analysis failed: ${error.message}`,
|
|
246
|
+
processingTime: Date.now() - startTime
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Categorize topic based on keywords
|
|
253
|
+
* @param {Array} keywords - Topic keywords
|
|
254
|
+
* @returns {string} - Topic category
|
|
255
|
+
*/
|
|
256
|
+
categorizeTopicByKeywords(keywords) {
|
|
257
|
+
const categories = {
|
|
258
|
+
technology: ['technology', 'software', 'computer', 'digital', 'internet', 'app', 'system', 'data', 'code', 'development'],
|
|
259
|
+
business: ['business', 'company', 'market', 'sales', 'revenue', 'profit', 'customer', 'service', 'management', 'strategy'],
|
|
260
|
+
science: ['science', 'research', 'study', 'analysis', 'experiment', 'theory', 'discovery', 'scientific', 'academic'],
|
|
261
|
+
health: ['health', 'medical', 'disease', 'treatment', 'patient', 'doctor', 'medicine', 'hospital', 'therapy', 'care'],
|
|
262
|
+
politics: ['politics', 'government', 'policy', 'election', 'politician', 'vote', 'democracy', 'law', 'congress', 'president'],
|
|
263
|
+
sports: ['sports', 'game', 'team', 'player', 'match', 'competition', 'championship', 'athletic', 'training', 'coach'],
|
|
264
|
+
entertainment: ['movie', 'music', 'entertainment', 'film', 'show', 'celebrity', 'actor', 'artist', 'performance', 'media'],
|
|
265
|
+
education: ['education', 'school', 'student', 'teacher', 'university', 'learning', 'course', 'academic', 'knowledge', 'study']
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
const keywordStr = keywords.join(' ').toLowerCase();
|
|
269
|
+
|
|
270
|
+
for (const [category, categoryKeywords] of Object.entries(categories)) {
|
|
271
|
+
const matches = categoryKeywords.filter(word => keywordStr.includes(word));
|
|
272
|
+
if (matches.length > 0) {
|
|
273
|
+
return category;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
return 'general';
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Categorize individual keyword
|
|
282
|
+
* @param {string} keyword - Keyword to categorize
|
|
283
|
+
* @param {string} type - Grammatical type
|
|
284
|
+
* @returns {string} - Keyword category
|
|
285
|
+
*/
|
|
286
|
+
categorizeKeyword(keyword, type) {
|
|
287
|
+
const lowerKeyword = keyword.toLowerCase();
|
|
288
|
+
|
|
289
|
+
// Technical terms
|
|
290
|
+
if (/^(api|sdk|framework|library|database|algorithm|protocol|server|client|interface)$/i.test(keyword)) {
|
|
291
|
+
return 'technical';
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Business terms
|
|
295
|
+
if (/^(revenue|profit|market|customer|client|sales|business|company|organization)$/i.test(keyword)) {
|
|
296
|
+
return 'business';
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// Academic terms
|
|
300
|
+
if (/^(research|study|analysis|theory|method|approach|findings|results|conclusion)$/i.test(keyword)) {
|
|
301
|
+
return 'academic';
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// Time-related terms
|
|
305
|
+
if (/^(year|month|week|day|time|period|date|today|yesterday|tomorrow)$/i.test(keyword)) {
|
|
306
|
+
return 'temporal';
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// Location terms
|
|
310
|
+
if (/^(country|city|state|region|area|location|place|world|global|international)$/i.test(keyword)) {
|
|
311
|
+
return 'geographical';
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// Default to grammatical type
|
|
315
|
+
return type || 'general';
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Extract themes from topics
|
|
320
|
+
* @param {Array} topics - Analyzed topics
|
|
321
|
+
* @returns {Array} - Extracted themes
|
|
322
|
+
*/
|
|
323
|
+
extractThemes(topics) {
|
|
324
|
+
if (!topics || topics.length === 0) return [];
|
|
325
|
+
|
|
326
|
+
// Group topics by category
|
|
327
|
+
const topicsByCategory = {};
|
|
328
|
+
topics.forEach(topic => {
|
|
329
|
+
const category = topic.category || 'general';
|
|
330
|
+
if (!topicsByCategory[category]) {
|
|
331
|
+
topicsByCategory[category] = [];
|
|
332
|
+
}
|
|
333
|
+
topicsByCategory[category].push(topic);
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
// Create themes from categories with multiple topics
|
|
337
|
+
const themes = [];
|
|
338
|
+
for (const [category, categoryTopics] of Object.entries(topicsByCategory)) {
|
|
339
|
+
if (categoryTopics.length >= 2) {
|
|
340
|
+
const avgConfidence = categoryTopics.reduce((sum, topic) => sum + topic.confidence, 0) / categoryTopics.length;
|
|
341
|
+
const supportingTopics = categoryTopics.map(topic => topic.topic);
|
|
342
|
+
|
|
343
|
+
themes.push({
|
|
344
|
+
theme: category,
|
|
345
|
+
confidence: Math.round(avgConfidence * 100) / 100,
|
|
346
|
+
supportingTopics
|
|
347
|
+
});
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
return themes.sort((a, b) => b.confidence - a.confidence);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Calculate entity summary statistics
|
|
356
|
+
* @param {Object} entities - Extracted entities
|
|
357
|
+
* @param {string} text - Original text
|
|
358
|
+
* @returns {Object} - Entity summary
|
|
359
|
+
*/
|
|
360
|
+
calculateEntitySummary(entities, text) {
|
|
361
|
+
const allEntities = [
|
|
362
|
+
...entities.people,
|
|
363
|
+
...entities.places,
|
|
364
|
+
...entities.organizations,
|
|
365
|
+
...entities.dates,
|
|
366
|
+
...entities.money,
|
|
367
|
+
...entities.other
|
|
368
|
+
];
|
|
369
|
+
|
|
370
|
+
const uniqueEntities = new Set(allEntities.map(e => e.toLowerCase()));
|
|
371
|
+
const textWords = text.split(/\s+/).filter(w => w.length > 0);
|
|
372
|
+
|
|
373
|
+
return {
|
|
374
|
+
totalEntities: allEntities.length,
|
|
375
|
+
uniqueEntities: uniqueEntities.size,
|
|
376
|
+
entityDensity: Math.round((allEntities.length / textWords.length) * 100) / 100
|
|
377
|
+
};
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Detect emotions in text (simplified approach)
|
|
382
|
+
* @param {string} text - Text to analyze
|
|
383
|
+
* @returns {Array} - Detected emotions with intensity
|
|
384
|
+
*/
|
|
385
|
+
detectEmotions(text) {
|
|
386
|
+
const emotionWords = {
|
|
387
|
+
joy: ['happy', 'joy', 'excited', 'pleased', 'delighted', 'cheerful', 'glad', 'elated'],
|
|
388
|
+
anger: ['angry', 'mad', 'furious', 'rage', 'annoyed', 'frustrated', 'irritated'],
|
|
389
|
+
sadness: ['sad', 'depressed', 'unhappy', 'grief', 'sorrow', 'melancholy', 'down'],
|
|
390
|
+
fear: ['afraid', 'scared', 'terrified', 'anxious', 'worried', 'nervous', 'fearful'],
|
|
391
|
+
surprise: ['surprised', 'amazed', 'shocked', 'astonished', 'stunned', 'startled'],
|
|
392
|
+
disgust: ['disgusted', 'revolted', 'repulsed', 'sickened', 'appalled'],
|
|
393
|
+
trust: ['trust', 'confident', 'secure', 'certain', 'assured', 'reliable'],
|
|
394
|
+
anticipation: ['excited', 'eager', 'looking forward', 'anticipating', 'expecting']
|
|
395
|
+
};
|
|
396
|
+
|
|
397
|
+
const words = text.toLowerCase().split(/\s+/);
|
|
398
|
+
const emotions = [];
|
|
399
|
+
|
|
400
|
+
for (const [emotion, emotionKeywords] of Object.entries(emotionWords)) {
|
|
401
|
+
const matches = words.filter(word => emotionKeywords.some(keyword => word.includes(keyword)));
|
|
402
|
+
if (matches.length > 0) {
|
|
403
|
+
const intensity = Math.min(1, matches.length / Math.max(words.length / 100, 1));
|
|
404
|
+
emotions.push({
|
|
405
|
+
emotion,
|
|
406
|
+
intensity: Math.round(intensity * 100) / 100
|
|
407
|
+
});
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
return emotions.sort((a, b) => b.intensity - a.intensity).slice(0, 5);
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Calculate vocabulary richness metrics
|
|
416
|
+
* @param {string} text - Text to analyze
|
|
417
|
+
* @returns {Object} - Vocabulary metrics
|
|
418
|
+
*/
|
|
419
|
+
calculateVocabularyMetrics(text) {
|
|
420
|
+
const words = text.toLowerCase().split(/\s+/).filter(w => w.length > 2);
|
|
421
|
+
const uniqueWords = new Set(words);
|
|
422
|
+
|
|
423
|
+
// Type-Token Ratio (vocabulary richness)
|
|
424
|
+
const vocabularyRichness = uniqueWords.size / Math.max(words.length, 1);
|
|
425
|
+
|
|
426
|
+
// Simple lexical diversity measure
|
|
427
|
+
const wordFreq = {};
|
|
428
|
+
words.forEach(word => {
|
|
429
|
+
wordFreq[word] = (wordFreq[word] || 0) + 1;
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
const hapaxLegomena = Object.values(wordFreq).filter(freq => freq === 1).length;
|
|
433
|
+
const lexicalDiversity = hapaxLegomena / Math.max(uniqueWords.size, 1);
|
|
434
|
+
|
|
435
|
+
return {
|
|
436
|
+
uniqueWords: uniqueWords.size,
|
|
437
|
+
vocabularyRichness: Math.round(vocabularyRichness * 100) / 100,
|
|
438
|
+
lexicalDiversity: Math.round(lexicalDiversity * 100) / 100
|
|
439
|
+
};
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
/**
|
|
443
|
+
* Analyze content for specific domain
|
|
444
|
+
* @param {string} text - Text to analyze
|
|
445
|
+
* @param {string} domain - Domain to focus on (e.g., 'academic', 'business', 'technical')
|
|
446
|
+
* @param {Object} options - Analysis options
|
|
447
|
+
* @returns {Promise<Object>} - Domain-specific analysis
|
|
448
|
+
*/
|
|
449
|
+
async analyzeDomainSpecific(text, domain, options = {}) {
|
|
450
|
+
const domainOptions = {
|
|
451
|
+
...options,
|
|
452
|
+
extractTopics: true,
|
|
453
|
+
extractKeywords: true,
|
|
454
|
+
maxKeywords: 20,
|
|
455
|
+
includeAdvancedMetrics: true
|
|
456
|
+
};
|
|
457
|
+
|
|
458
|
+
const result = await this.execute({ text, options: domainOptions });
|
|
459
|
+
|
|
460
|
+
if (!result.success) return result;
|
|
461
|
+
|
|
462
|
+
// Filter and enhance results for specific domain
|
|
463
|
+
if (result.topics) {
|
|
464
|
+
result.topics = result.topics.filter(topic =>
|
|
465
|
+
topic.category === domain || topic.category === 'general'
|
|
466
|
+
);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
if (result.keywords) {
|
|
470
|
+
result.keywords = result.keywords.filter(keyword =>
|
|
471
|
+
keyword.category === domain || keyword.category === 'general'
|
|
472
|
+
);
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
return result;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
/**
|
|
479
|
+
* Compare content analysis between multiple texts
|
|
480
|
+
* @param {Array} texts - Array of texts to compare
|
|
481
|
+
* @param {Object} options - Analysis options
|
|
482
|
+
* @returns {Promise<Object>} - Comparative analysis result
|
|
483
|
+
*/
|
|
484
|
+
async compareContent(texts, options = {}) {
|
|
485
|
+
const results = await Promise.all(
|
|
486
|
+
texts.map(text => this.execute({ text, options }))
|
|
487
|
+
);
|
|
488
|
+
|
|
489
|
+
const comparison = {
|
|
490
|
+
individual: results,
|
|
491
|
+
comparison: {
|
|
492
|
+
languages: this.compareLanguages(results),
|
|
493
|
+
sentiments: this.compareSentiments(results),
|
|
494
|
+
readability: this.compareReadability(results),
|
|
495
|
+
commonTopics: this.findCommonTopics(results),
|
|
496
|
+
uniqueTopics: this.findUniqueTopics(results)
|
|
497
|
+
}
|
|
498
|
+
};
|
|
499
|
+
|
|
500
|
+
return comparison;
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
/**
|
|
504
|
+
* Compare languages across results
|
|
505
|
+
* @param {Array} results - Analysis results
|
|
506
|
+
* @returns {Object} - Language comparison
|
|
507
|
+
*/
|
|
508
|
+
compareLanguages(results) {
|
|
509
|
+
const languages = results
|
|
510
|
+
.filter(r => r.success && r.language)
|
|
511
|
+
.map(r => r.language.code);
|
|
512
|
+
|
|
513
|
+
const languageCount = {};
|
|
514
|
+
languages.forEach(lang => {
|
|
515
|
+
languageCount[lang] = (languageCount[lang] || 0) + 1;
|
|
516
|
+
});
|
|
517
|
+
|
|
518
|
+
return {
|
|
519
|
+
detected: languageCount,
|
|
520
|
+
primary: Object.entries(languageCount).sort((a, b) => b[1] - a[1])[0]?.[0] || 'unknown',
|
|
521
|
+
diversity: Object.keys(languageCount).length
|
|
522
|
+
};
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
/**
|
|
526
|
+
* Compare sentiments across results
|
|
527
|
+
* @param {Array} results - Analysis results
|
|
528
|
+
* @returns {Object} - Sentiment comparison
|
|
529
|
+
*/
|
|
530
|
+
compareSentiments(results) {
|
|
531
|
+
const sentiments = results
|
|
532
|
+
.filter(r => r.success && r.sentiment)
|
|
533
|
+
.map(r => r.sentiment);
|
|
534
|
+
|
|
535
|
+
if (sentiments.length === 0) return null;
|
|
536
|
+
|
|
537
|
+
const avgPolarity = sentiments.reduce((sum, s) => sum + s.polarity, 0) / sentiments.length;
|
|
538
|
+
const avgSubjectivity = sentiments.reduce((sum, s) => sum + s.subjectivity, 0) / sentiments.length;
|
|
539
|
+
|
|
540
|
+
return {
|
|
541
|
+
averagePolarity: Math.round(avgPolarity * 100) / 100,
|
|
542
|
+
averageSubjectivity: Math.round(avgSubjectivity * 100) / 100,
|
|
543
|
+
range: {
|
|
544
|
+
polarity: {
|
|
545
|
+
min: Math.min(...sentiments.map(s => s.polarity)),
|
|
546
|
+
max: Math.max(...sentiments.map(s => s.polarity))
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
};
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
/**
|
|
553
|
+
* Compare readability across results
|
|
554
|
+
* @param {Array} results - Analysis results
|
|
555
|
+
* @returns {Object} - Readability comparison
|
|
556
|
+
*/
|
|
557
|
+
compareReadability(results) {
|
|
558
|
+
const readabilityScores = results
|
|
559
|
+
.filter(r => r.success && r.readability)
|
|
560
|
+
.map(r => r.readability.score);
|
|
561
|
+
|
|
562
|
+
if (readabilityScores.length === 0) return null;
|
|
563
|
+
|
|
564
|
+
const avgScore = readabilityScores.reduce((sum, score) => sum + score, 0) / readabilityScores.length;
|
|
565
|
+
|
|
566
|
+
return {
|
|
567
|
+
averageScore: Math.round(avgScore * 100) / 100,
|
|
568
|
+
range: {
|
|
569
|
+
min: Math.min(...readabilityScores),
|
|
570
|
+
max: Math.max(...readabilityScores)
|
|
571
|
+
},
|
|
572
|
+
consistency: Math.max(...readabilityScores) - Math.min(...readabilityScores) < 20
|
|
573
|
+
};
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
/**
|
|
577
|
+
* Find common topics across results
|
|
578
|
+
* @param {Array} results - Analysis results
|
|
579
|
+
* @returns {Array} - Common topics
|
|
580
|
+
*/
|
|
581
|
+
findCommonTopics(results) {
|
|
582
|
+
const allTopics = results
|
|
583
|
+
.filter(r => r.success && r.topics)
|
|
584
|
+
.flatMap(r => r.topics.map(t => t.topic.toLowerCase()));
|
|
585
|
+
|
|
586
|
+
const topicCount = {};
|
|
587
|
+
allTopics.forEach(topic => {
|
|
588
|
+
topicCount[topic] = (topicCount[topic] || 0) + 1;
|
|
589
|
+
});
|
|
590
|
+
|
|
591
|
+
return Object.entries(topicCount)
|
|
592
|
+
.filter(([, count]) => count > 1)
|
|
593
|
+
.sort((a, b) => b[1] - a[1])
|
|
594
|
+
.map(([topic, count]) => ({ topic, occurrences: count }));
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
/**
|
|
598
|
+
* Find unique topics across results
|
|
599
|
+
* @param {Array} results - Analysis results
|
|
600
|
+
* @returns {Array} - Unique topics by text
|
|
601
|
+
*/
|
|
602
|
+
findUniqueTopics(results) {
|
|
603
|
+
const allTopics = results
|
|
604
|
+
.filter(r => r.success && r.topics)
|
|
605
|
+
.flatMap(r => r.topics.map(t => t.topic.toLowerCase()));
|
|
606
|
+
|
|
607
|
+
const topicCount = {};
|
|
608
|
+
allTopics.forEach(topic => {
|
|
609
|
+
topicCount[topic] = (topicCount[topic] || 0) + 1;
|
|
610
|
+
});
|
|
611
|
+
|
|
612
|
+
return results.map((result, index) => {
|
|
613
|
+
if (!result.success || !result.topics) return { textIndex: index, uniqueTopics: [] };
|
|
614
|
+
|
|
615
|
+
const uniqueTopics = result.topics
|
|
616
|
+
.filter(topic => topicCount[topic.topic.toLowerCase()] === 1)
|
|
617
|
+
.map(topic => topic.topic);
|
|
618
|
+
|
|
619
|
+
return { textIndex: index, uniqueTopics };
|
|
620
|
+
});
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
export default AnalyzeContentTool;
|