crawlforge-mcp-server 3.0.1 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +63 -19
- package/README.md +19 -15
- package/package.json +6 -6
- package/server.js +38 -21
- package/setup.js +5 -5
- package/src/core/ActionExecutor.js +16 -1
- package/src/core/AuthManager.js +5 -4
- package/src/core/ChangeTracker.js +5 -963
- package/src/core/WebhookDispatcher.js +4 -0
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
import crypto from "crypto";
|
|
2
2
|
/**
|
|
3
|
-
* ChangeTracker -
|
|
3
|
+
* ChangeTracker - Content Change Detection and Analysis
|
|
4
4
|
* Implements hierarchical content hashing (page → sections → elements)
|
|
5
|
-
* with differential comparison engine
|
|
6
|
-
* scheduled monitoring, advanced comparison engine, alert system,
|
|
7
|
-
* and historical analysis capabilities
|
|
5
|
+
* with differential comparison engine and change significance scoring
|
|
8
6
|
*/
|
|
9
7
|
|
|
10
8
|
import { createHash } from 'crypto';
|
|
@@ -12,10 +10,6 @@ import { z } from 'zod';
|
|
|
12
10
|
import { EventEmitter } from 'events';
|
|
13
11
|
import { load } from 'cheerio';
|
|
14
12
|
import { diffWords, diffLines, diffChars } from 'diff';
|
|
15
|
-
import * as cron from 'node-cron';
|
|
16
|
-
import fs from 'fs/promises';
|
|
17
|
-
import path from 'path';
|
|
18
|
-
import fetch from 'node-fetch';
|
|
19
13
|
|
|
20
14
|
const ChangeTrackingSchema = z.object({
|
|
21
15
|
url: z.string().url(),
|
|
@@ -83,23 +77,7 @@ export class ChangeTracker extends EventEmitter {
|
|
|
83
77
|
this.baselineContent = new Map();
|
|
84
78
|
this.changeNotifications = new Map();
|
|
85
79
|
this.snapshotManager = new Map();
|
|
86
|
-
|
|
87
|
-
// Phase 2.4 Enhanced Features
|
|
88
|
-
this.scheduledMonitors = new Map(); // Cron-based monitoring
|
|
89
|
-
this.monitoringTemplates = new Map(); // Reusable monitoring configurations
|
|
90
|
-
this.alertRules = new Map(); // Custom alert rules and conditions
|
|
91
|
-
this.alertHistory = new Map(); // Alert notification history
|
|
92
|
-
this.trendAnalysis = new Map(); // Pattern recognition data
|
|
93
|
-
this.visualRegression = new Map(); // Visual diff storage
|
|
94
|
-
this.alertThrottling = new Map(); // Alert rate limiting
|
|
95
|
-
this.semanticDiffCache = new Map(); // Semantic analysis cache
|
|
96
|
-
this.monitoringDashboard = {
|
|
97
|
-
status: 'initialized',
|
|
98
|
-
monitors: new Map(),
|
|
99
|
-
alerts: [],
|
|
100
|
-
trends: {}
|
|
101
|
-
};
|
|
102
|
-
// Enhanced Statistics
|
|
80
|
+
// Statistics
|
|
103
81
|
this.stats = {
|
|
104
82
|
pagesTracked: 0,
|
|
105
83
|
changesDetected: 0,
|
|
@@ -109,16 +87,7 @@ export class ChangeTracker extends EventEmitter {
|
|
|
109
87
|
falsePositives: 0,
|
|
110
88
|
averageChangeScore: 0,
|
|
111
89
|
lastAnalysis: null,
|
|
112
|
-
processingTime: 0
|
|
113
|
-
// Phase 2.4 additions
|
|
114
|
-
scheduledMonitors: 0,
|
|
115
|
-
alertsSent: 0,
|
|
116
|
-
alertsThrottled: 0,
|
|
117
|
-
semanticAnalyses: 0,
|
|
118
|
-
visualRegression: 0,
|
|
119
|
-
trendPatternsDetected: 0,
|
|
120
|
-
averageAlertResponseTime: 0,
|
|
121
|
-
monitoringUptime: 0
|
|
90
|
+
processingTime: 0
|
|
122
91
|
};
|
|
123
92
|
|
|
124
93
|
// Semantic analysis tools (if enabled)
|
|
@@ -133,322 +102,11 @@ export class ChangeTracker extends EventEmitter {
|
|
|
133
102
|
await this.initializeSemanticAnalyzer();
|
|
134
103
|
}
|
|
135
104
|
|
|
136
|
-
// Initialize Phase 2.4 components
|
|
137
|
-
await this.initializeEnhancedFeatures();
|
|
138
|
-
|
|
139
105
|
this.emit('initialized');
|
|
140
106
|
}
|
|
141
107
|
|
|
142
108
|
/**
|
|
143
|
-
*
|
|
144
|
-
*/
|
|
145
|
-
async initializeEnhancedFeatures() {
|
|
146
|
-
try {
|
|
147
|
-
// Initialize monitoring dashboard
|
|
148
|
-
this.monitoringDashboard.status = 'initializing';
|
|
149
|
-
|
|
150
|
-
// Load existing monitoring templates
|
|
151
|
-
await this.loadMonitoringTemplates();
|
|
152
|
-
|
|
153
|
-
// Initialize alert system
|
|
154
|
-
await this.initializeAlertSystem();
|
|
155
|
-
|
|
156
|
-
// Set up historical analysis
|
|
157
|
-
await this.initializeHistoricalAnalysis();
|
|
158
|
-
|
|
159
|
-
// Initialize semantic diff engine if enabled
|
|
160
|
-
if (this.options.enableSemanticAnalysis) {
|
|
161
|
-
await this.initializeSemanticDiffEngine();
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
this.monitoringDashboard.status = 'active';
|
|
165
|
-
this.emit('enhancedFeaturesInitialized');
|
|
166
|
-
|
|
167
|
-
} catch (error) {
|
|
168
|
-
this.monitoringDashboard.status = 'error';
|
|
169
|
-
this.emit('error', { operation: 'initializeEnhancedFeatures', error: error.message });
|
|
170
|
-
throw error;
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
/**
|
|
175
|
-
* Load monitoring templates from storage
|
|
176
|
-
*/
|
|
177
|
-
async loadMonitoringTemplates() {
|
|
178
|
-
const defaultTemplates = {
|
|
179
|
-
'news-site': {
|
|
180
|
-
name: 'News Site Monitoring',
|
|
181
|
-
frequency: '*/15 * * * *', // Every 15 minutes
|
|
182
|
-
options: {
|
|
183
|
-
granularity: 'section',
|
|
184
|
-
trackText: true,
|
|
185
|
-
trackStructure: false,
|
|
186
|
-
significanceThresholds: { minor: 0.05, moderate: 0.2, major: 0.5 }
|
|
187
|
-
},
|
|
188
|
-
alertRules: {
|
|
189
|
-
threshold: 'minor',
|
|
190
|
-
methods: ['webhook', 'email'],
|
|
191
|
-
throttle: 300000 // 5 minutes
|
|
192
|
-
}
|
|
193
|
-
},
|
|
194
|
-
'e-commerce': {
|
|
195
|
-
name: 'E-commerce Site Monitoring',
|
|
196
|
-
frequency: '0 */2 * * *', // Every 2 hours
|
|
197
|
-
options: {
|
|
198
|
-
granularity: 'element',
|
|
199
|
-
trackText: true,
|
|
200
|
-
trackStructure: true,
|
|
201
|
-
trackImages: true,
|
|
202
|
-
customSelectors: ['.price', '.stock-status', '.product-title']
|
|
203
|
-
},
|
|
204
|
-
alertRules: {
|
|
205
|
-
threshold: 'moderate',
|
|
206
|
-
methods: ['webhook', 'slack'],
|
|
207
|
-
throttle: 600000 // 10 minutes
|
|
208
|
-
}
|
|
209
|
-
},
|
|
210
|
-
'documentation': {
|
|
211
|
-
name: 'Documentation Monitoring',
|
|
212
|
-
frequency: '0 9 * * *', // Daily at 9 AM
|
|
213
|
-
options: {
|
|
214
|
-
granularity: 'section',
|
|
215
|
-
trackText: true,
|
|
216
|
-
trackStructure: true,
|
|
217
|
-
excludeSelectors: ['.last-updated', '.edit-link']
|
|
218
|
-
},
|
|
219
|
-
alertRules: {
|
|
220
|
-
threshold: 'major',
|
|
221
|
-
methods: ['email'],
|
|
222
|
-
throttle: 3600000 // 1 hour
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
};
|
|
226
|
-
|
|
227
|
-
for (const [id, template] of Object.entries(defaultTemplates)) {
|
|
228
|
-
this.monitoringTemplates.set(id, template);
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
/**
|
|
233
|
-
* Initialize alert system with default rules
|
|
234
|
-
*/
|
|
235
|
-
async initializeAlertSystem() {
|
|
236
|
-
// Default alert rules
|
|
237
|
-
const defaultAlertRules = {
|
|
238
|
-
'critical-changes': {
|
|
239
|
-
condition: (changeResult) => changeResult.significance === 'critical',
|
|
240
|
-
actions: ['webhook', 'email', 'slack'],
|
|
241
|
-
throttle: 0, // No throttling for critical changes
|
|
242
|
-
priority: 'high'
|
|
243
|
-
},
|
|
244
|
-
'frequent-changes': {
|
|
245
|
-
condition: (url, history) => {
|
|
246
|
-
const recent = history.filter(h => Date.now() - h.timestamp < 3600000); // Last hour
|
|
247
|
-
return recent.length > 5;
|
|
248
|
-
},
|
|
249
|
-
actions: ['webhook'],
|
|
250
|
-
throttle: 1800000, // 30 minutes
|
|
251
|
-
priority: 'medium'
|
|
252
|
-
},
|
|
253
|
-
'structural-changes': {
|
|
254
|
-
condition: (changeResult) => changeResult.changeType === 'structural',
|
|
255
|
-
actions: ['webhook', 'email'],
|
|
256
|
-
throttle: 600000, // 10 minutes
|
|
257
|
-
priority: 'medium'
|
|
258
|
-
}
|
|
259
|
-
};
|
|
260
|
-
|
|
261
|
-
for (const [id, rule] of Object.entries(defaultAlertRules)) {
|
|
262
|
-
this.alertRules.set(id, rule);
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
/**
|
|
267
|
-
* Initialize historical analysis capabilities
|
|
268
|
-
*/
|
|
269
|
-
async initializeHistoricalAnalysis() {
|
|
270
|
-
// Initialize trend analysis patterns
|
|
271
|
-
this.trendAnalysis.set('patterns', {
|
|
272
|
-
dailyChangePatterns: new Map(),
|
|
273
|
-
weeklyTrends: new Map(),
|
|
274
|
-
contentVelocity: new Map(),
|
|
275
|
-
changeFrequency: new Map()
|
|
276
|
-
});
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
/**
|
|
280
|
-
* Initialize semantic diff engine
|
|
281
|
-
*/
|
|
282
|
-
async initializeSemanticDiffEngine() {
|
|
283
|
-
// Initialize semantic analysis components
|
|
284
|
-
this.semanticDiffCache.set('initialized', true);
|
|
285
|
-
this.semanticDiffCache.set('algorithms', {
|
|
286
|
-
textSimilarity: this.calculateTextSimilarity.bind(this),
|
|
287
|
-
structuralSimilarity: this.calculateStructuralSimilarity.bind(this),
|
|
288
|
-
semanticSimilarity: this.calculateSemanticSimilarity.bind(this)
|
|
289
|
-
});
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
/**
|
|
293
|
-
* Create scheduled monitoring with cron-like scheduling
|
|
294
|
-
* @param {string} url - URL to monitor
|
|
295
|
-
* @param {string} schedule - Cron expression
|
|
296
|
-
* @param {Object} options - Monitoring options
|
|
297
|
-
* @returns {Object} - Monitor configuration
|
|
298
|
-
*/
|
|
299
|
-
async createScheduledMonitor(url, schedule, options = {}) {
|
|
300
|
-
try {
|
|
301
|
-
// Validate cron expression
|
|
302
|
-
if (!cron.validate(schedule)) {
|
|
303
|
-
throw new Error(`Invalid cron expression: ${schedule}`);
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
const monitorId = `scheduled_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
307
|
-
|
|
308
|
-
const monitorConfig = {
|
|
309
|
-
id: monitorId,
|
|
310
|
-
url,
|
|
311
|
-
schedule,
|
|
312
|
-
options: {
|
|
313
|
-
granularity: 'section',
|
|
314
|
-
trackText: true,
|
|
315
|
-
trackStructure: true,
|
|
316
|
-
alertRules: {
|
|
317
|
-
threshold: 'moderate',
|
|
318
|
-
methods: ['webhook'],
|
|
319
|
-
throttle: 600000
|
|
320
|
-
},
|
|
321
|
-
...options
|
|
322
|
-
},
|
|
323
|
-
stats: {
|
|
324
|
-
created: Date.now(),
|
|
325
|
-
executions: 0,
|
|
326
|
-
lastExecution: null,
|
|
327
|
-
changesDetected: 0,
|
|
328
|
-
errors: 0,
|
|
329
|
-
averageExecutionTime: 0
|
|
330
|
-
},
|
|
331
|
-
status: 'active'
|
|
332
|
-
};
|
|
333
|
-
|
|
334
|
-
// Create cron job
|
|
335
|
-
const cronJob = cron.schedule(schedule, async () => {
|
|
336
|
-
await this.executeScheduledMonitor(monitorId);
|
|
337
|
-
}, {
|
|
338
|
-
scheduled: true,
|
|
339
|
-
timezone: 'UTC'
|
|
340
|
-
});
|
|
341
|
-
|
|
342
|
-
monitorConfig.cronJob = cronJob;
|
|
343
|
-
|
|
344
|
-
// Store monitor
|
|
345
|
-
this.scheduledMonitors.set(monitorId, monitorConfig);
|
|
346
|
-
this.monitoringDashboard.monitors.set(monitorId, {
|
|
347
|
-
url,
|
|
348
|
-
schedule,
|
|
349
|
-
status: 'active',
|
|
350
|
-
nextExecution: cronJob.nextDates().toString()
|
|
351
|
-
});
|
|
352
|
-
|
|
353
|
-
this.stats.scheduledMonitors++;
|
|
354
|
-
|
|
355
|
-
this.emit('scheduledMonitorCreated', {
|
|
356
|
-
monitorId,
|
|
357
|
-
url,
|
|
358
|
-
schedule,
|
|
359
|
-
nextExecution: cronJob.nextDates().toString()
|
|
360
|
-
});
|
|
361
|
-
|
|
362
|
-
return {
|
|
363
|
-
success: true,
|
|
364
|
-
monitorId,
|
|
365
|
-
url,
|
|
366
|
-
schedule,
|
|
367
|
-
nextExecution: cronJob.nextDates().toString(),
|
|
368
|
-
options: monitorConfig.options
|
|
369
|
-
};
|
|
370
|
-
|
|
371
|
-
} catch (error) {
|
|
372
|
-
this.emit('error', { operation: 'createScheduledMonitor', url, error: error.message });
|
|
373
|
-
throw new Error(`Failed to create scheduled monitor: ${error.message}`);
|
|
374
|
-
}
|
|
375
|
-
}
|
|
376
|
-
|
|
377
|
-
/**
|
|
378
|
-
* Execute scheduled monitor check
|
|
379
|
-
* @param {string} monitorId - Monitor ID
|
|
380
|
-
*/
|
|
381
|
-
async executeScheduledMonitor(monitorId) {
|
|
382
|
-
const startTime = Date.now();
|
|
383
|
-
|
|
384
|
-
try {
|
|
385
|
-
const monitor = this.scheduledMonitors.get(monitorId);
|
|
386
|
-
if (!monitor || monitor.status !== 'active') {
|
|
387
|
-
return;
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
monitor.stats.executions++;
|
|
391
|
-
monitor.stats.lastExecution = Date.now();
|
|
392
|
-
|
|
393
|
-
// Fetch current content
|
|
394
|
-
const response = await fetch(monitor.url, {
|
|
395
|
-
headers: {
|
|
396
|
-
'User-Agent': 'MCP-WebScraper-ChangeTracker/3.0-Enhanced',
|
|
397
|
-
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
|
398
|
-
},
|
|
399
|
-
timeout: 30000
|
|
400
|
-
});
|
|
401
|
-
|
|
402
|
-
if (!response.ok) {
|
|
403
|
-
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
const currentContent = await response.text();
|
|
407
|
-
|
|
408
|
-
// Perform enhanced comparison
|
|
409
|
-
const comparisonResult = await this.performEnhancedComparison(
|
|
410
|
-
monitor.url,
|
|
411
|
-
currentContent,
|
|
412
|
-
monitor.options
|
|
413
|
-
);
|
|
414
|
-
|
|
415
|
-
// Update execution time stats
|
|
416
|
-
const executionTime = Date.now() - startTime;
|
|
417
|
-
monitor.stats.averageExecutionTime =
|
|
418
|
-
(monitor.stats.averageExecutionTime * (monitor.stats.executions - 1) + executionTime) /
|
|
419
|
-
monitor.stats.executions;
|
|
420
|
-
|
|
421
|
-
// Process change result
|
|
422
|
-
if (comparisonResult.hasChanges) {
|
|
423
|
-
monitor.stats.changesDetected++;
|
|
424
|
-
|
|
425
|
-
// Update trend analysis
|
|
426
|
-
await this.updateTrendAnalysis(monitor.url, comparisonResult);
|
|
427
|
-
|
|
428
|
-
// Check alert rules and send notifications
|
|
429
|
-
await this.processAlertRules(monitor.url, comparisonResult, monitor.options.alertRules);
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
this.emit('scheduledMonitorExecuted', {
|
|
433
|
-
monitorId,
|
|
434
|
-
url: monitor.url,
|
|
435
|
-
hasChanges: comparisonResult.hasChanges,
|
|
436
|
-
significance: comparisonResult.significance,
|
|
437
|
-
executionTime
|
|
438
|
-
});
|
|
439
|
-
|
|
440
|
-
} catch (error) {
|
|
441
|
-
const monitor = this.scheduledMonitors.get(monitorId);
|
|
442
|
-
if (monitor) {
|
|
443
|
-
monitor.stats.errors++;
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
this.emit('scheduledMonitorError', {
|
|
447
|
-
monitorId,
|
|
448
|
-
error: error.message,
|
|
449
|
-
timestamp: Date.now()
|
|
450
|
-
});
|
|
451
|
-
}\n }\n \n /**\n * Perform enhanced comparison with semantic analysis\n * @param {string} url - URL being compared\n * @param {string} currentContent - Current content\n * @param {Object} options - Comparison options\n * @returns {Object} - Enhanced comparison results\n */\n async performEnhancedComparison(url, currentContent, options = {}) {\n try {\n // Get standard comparison\n const standardComparison = await this.compareWithBaseline(url, currentContent, options);\n \n if (!standardComparison.hasChanges) {\n return standardComparison;\n }\n \n // Enhance with semantic analysis\n const semanticAnalysis = await this.performSemanticAnalysis(\n url, \n currentContent, \n standardComparison\n );\n \n // Enhance with visual regression detection\n const visualAnalysis = await this.performVisualRegressionAnalysis(\n url,\n currentContent,\n options\n );\n \n // Enhance with structured data analysis\n const structuredAnalysis = await this.performStructuredDataAnalysis(\n url,\n currentContent,\n standardComparison\n );\n \n // Calculate enhanced significance score\n const enhancedSignificance = await this.calculateEnhancedSignificance(\n standardComparison,\n semanticAnalysis,\n visualAnalysis,\n structuredAnalysis\n );\n \n return {\n ...standardComparison,\n enhancedFeatures: {\n semanticAnalysis,\n visualAnalysis,\n structuredAnalysis,\n enhancedSignificance\n },\n significance: enhancedSignificance,\n analysisType: 'enhanced'\n };\n \n } catch (error) {\n this.emit('error', { operation: 'performEnhancedComparison', url, error: error.message });\n // Fall back to standard comparison\n return await this.compareWithBaseline(url, currentContent, options);\n }\n }\n \n /**\n * Perform semantic analysis of changes\n * @param {string} url - URL\n * @param {string} currentContent - Current content\n * @param {Object} standardComparison - Standard comparison results\n * @returns {Object} - Semantic analysis results\n */\n async performSemanticAnalysis(url, currentContent, standardComparison) {\n const analysis = {\n textualSimilarity: 0,\n conceptualChanges: [],\n sentimentChanges: [],\n topicShifts: [],\n keywordChanges: [],\n confidenceScore: 0\n };\n \n try {\n // Get baseline content\n const baseline = this.getLatestBaseline(url);\n if (!baseline) {\n return analysis;\n }\n \n // Extract text content from both versions\n const $ = load(currentContent);\n const currentText = $.text().replace(/\\s+/g, ' ').trim();\n \n const $baseline = load(baseline.analysis.originalContent);\n const baselineText = $baseline.text().replace(/\\s+/g, ' ').trim();\n \n // Calculate textual similarity using advanced algorithms\n analysis.textualSimilarity = this.calculateTextSimilarity(baselineText, currentText);\n \n // Detect keyword changes\n analysis.keywordChanges = this.detectKeywordChanges(baselineText, currentText);\n \n // Simple topic shift detection\n analysis.topicShifts = this.detectTopicShifts(baselineText, currentText);\n \n // Calculate confidence score\n analysis.confidenceScore = this.calculateSemanticConfidence(analysis);\n \n this.stats.semanticAnalyses++;\n \n return analysis;\n \n } catch (error) {\n this.emit('error', { operation: 'performSemanticAnalysis', url, error: error.message });\n return analysis;\n }\n }\n \n /**\n * Perform visual regression analysis\n * @param {string} url - URL\n * @param {string} currentContent - Current content\n * @param {Object} options - Analysis options\n * @returns {Object} - Visual analysis results\n */\n async performVisualRegressionAnalysis(url, currentContent, options = {}) {\n const analysis = {\n layoutChanges: [],\n cssChanges: [],\n imageChanges: [],\n fontChanges: [],\n colorChanges: [],\n hasVisualChanges: false\n };\n \n try {\n const $ = load(currentContent);\n const baseline = this.getLatestBaseline(url);\n \n if (!baseline) {\n return analysis;\n }\n \n const $baseline = load(baseline.analysis.originalContent);\n \n // Detect layout changes\n analysis.layoutChanges = this.detectLayoutChanges($baseline, $);\n \n // Detect CSS changes\n analysis.cssChanges = this.detectCSSChanges($baseline, $);\n \n // Detect image changes\n analysis.imageChanges = this.detectImageChanges($baseline, $);\n \n // Determine if there are visual changes\n analysis.hasVisualChanges = \n analysis.layoutChanges.length > 0 ||\n analysis.cssChanges.length > 0 ||\n analysis.imageChanges.length > 0;\n \n if (analysis.hasVisualChanges) {\n this.stats.visualRegression++;\n }\n \n return analysis;\n \n } catch (error) {\n this.emit('error', { operation: 'performVisualRegressionAnalysis', url, error: error.message });\n return analysis;\n }\n }\n \n /**\n * Perform structured data analysis\n * @param {string} url - URL\n * @param {string} currentContent - Current content\n * @param {Object} standardComparison - Standard comparison results\n * @returns {Object} - Structured data analysis\n */\n async performStructuredDataAnalysis(url, currentContent, standardComparison) {\n const analysis = {\n schemaChanges: [],\n dataFieldChanges: [],\n validationChanges: [],\n metadataChanges: [],\n hasStructuredChanges: false\n };\n \n try {\n const $ = load(currentContent);\n const baseline = this.getLatestBaseline(url);\n \n if (!baseline) {\n return analysis;\n }\n \n // Extract structured data (JSON-LD, microdata, etc.)\n const currentStructuredData = this.extractStructuredData($);\n const baselineStructuredData = this.extractStructuredData(load(baseline.analysis.originalContent));\n \n // Compare structured data\n analysis.schemaChanges = this.compareStructuredData(baselineStructuredData, currentStructuredData);\n \n // Detect metadata changes\n analysis.metadataChanges = this.compareMetadata(\n baseline.analysis.metadata,\n standardComparison.details.current?.metadata || {}\n );\n \n analysis.hasStructuredChanges = \n analysis.schemaChanges.length > 0 ||\n analysis.metadataChanges.length > 0;\n \n return analysis;\n \n } catch (error) {\n this.emit('error', { operation: 'performStructuredDataAnalysis', url, error: error.message });\n return analysis;\n }\n }\n \n /**\n * Update trend analysis with new change data\n * @param {string} url - URL\n * @param {Object} changeResult - Change analysis results\n */\n async updateTrendAnalysis(url, changeResult) {\n try {\n const patterns = this.trendAnalysis.get('patterns');\n const now = new Date();\n const dayKey = now.toISOString().slice(0, 10); // YYYY-MM-DD\n const hourKey = now.toISOString().slice(0, 13); // YYYY-MM-DDTHH\n \n // Update daily patterns\n if (!patterns.dailyChangePatterns.has(url)) {\n patterns.dailyChangePatterns.set(url, new Map());\n }\n \n const urlDailyPatterns = patterns.dailyChangePatterns.get(url);\n if (!urlDailyPatterns.has(dayKey)) {\n urlDailyPatterns.set(dayKey, {\n changes: 0,\n significance: [],\n types: []\n });\n }\n \n const dayData = urlDailyPatterns.get(dayKey);\n dayData.changes++;\n dayData.significance.push(changeResult.significance);\n dayData.types.push(changeResult.changeType);\n \n // Update change frequency\n if (!patterns.changeFrequency.has(url)) {\n patterns.changeFrequency.set(url, []);\n }\n \n patterns.changeFrequency.get(url).push({\n timestamp: Date.now(),\n significance: changeResult.significance,\n type: changeResult.changeType\n });\n \n // Keep only last 1000 entries per URL\n const frequency = patterns.changeFrequency.get(url);\n if (frequency.length > 1000) {\n frequency.splice(0, frequency.length - 1000);\n }\n \n // Detect patterns\n await this.detectChangePatterns(url, patterns);\n \n } catch (error) {\n this.emit('error', { operation: 'updateTrendAnalysis', url, error: error.message });\n }\n }\n \n /**\n * Process alert rules and send notifications\n * @param {string} url - URL\n * @param {Object} changeResult - Change results\n * @param {Object} alertRules - Alert configuration\n */\n async processAlertRules(url, changeResult, alertRules = {}) {\n try {\n const alertsToSend = [];\n \n // Check each alert rule\n for (const [ruleId, rule] of this.alertRules.entries()) {\n let shouldTrigger = false;\n \n if (typeof rule.condition === 'function') {\n try {\n const history = this.getChangeHistory(url, 100);\n shouldTrigger = rule.condition(changeResult, history);\n } catch (error) {\n this.emit('error', { \n operation: 'evaluateAlertRule', \n ruleId, \n url, \n error: error.message \n });\n continue;\n }\n }\n \n if (shouldTrigger) {\n // Check throttling\n const throttleKey = `${url}_${ruleId}`;\n const lastAlert = this.alertThrottling.get(throttleKey);\n \n if (lastAlert && Date.now() - lastAlert < rule.throttle) {\n this.stats.alertsThrottled++;\n continue;\n }\n \n alertsToSend.push({\n ruleId,\n rule,\n url,\n changeResult,\n timestamp: Date.now()\n });\n \n // Update throttling\n this.alertThrottling.set(throttleKey, Date.now());\n }\n }\n \n // Send alerts\n for (const alert of alertsToSend) {\n await this.sendAlert(alert);\n }\n \n } catch (error) {\n this.emit('error', { operation: 'processAlertRules', url, error: error.message });\n }\n }\n \n /**\n * Send alert notification\n * @param {Object} alert - Alert configuration\n */\n async sendAlert(alert) {\n const startTime = Date.now();\n \n try {\n const alertData = {\n id: `alert_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,\n ruleId: alert.ruleId,\n url: alert.url,\n timestamp: alert.timestamp,\n priority: alert.rule.priority,\n changeResult: {\n significance: alert.changeResult.significance,\n changeType: alert.changeResult.changeType,\n summary: alert.changeResult.summary\n }\n };\n \n // Send to each configured method\n const promises = alert.rule.actions.map(async (action) => {\n try {\n await this.sendNotificationByMethod(action, alertData);\n this.emit('alertSent', { action, alertId: alertData.id, url: alert.url });\n } catch (error) {\n this.emit('alertError', { \n action, \n alertId: alertData.id, \n url: alert.url, \n error: error.message \n });\n }\n });\n \n await Promise.allSettled(promises);\n \n // Store alert in history\n if (!this.alertHistory.has(alert.url)) {\n this.alertHistory.set(alert.url, []);\n }\n \n this.alertHistory.get(alert.url).unshift(alertData);\n \n // Keep only last 100 alerts per URL\n const history = this.alertHistory.get(alert.url);\n if (history.length > 100) {\n history.splice(100);\n }\n \n // Update stats\n this.stats.alertsSent++;\n const responseTime = Date.now() - startTime;\n this.stats.averageAlertResponseTime = \n (this.stats.averageAlertResponseTime * (this.stats.alertsSent - 1) + responseTime) / \n this.stats.alertsSent;\n \n this.emit('alertProcessed', {\n alertId: alertData.id,\n url: alert.url,\n responseTime\n });\n \n } catch (error) {\n this.emit('error', { operation: 'sendAlert', url: alert.url, error: error.message });\n }\n }\n \n /**\n * Send notification by specific method\n * @param {string} method - Notification method\n * @param {Object} alertData - Alert data\n */\n async sendNotificationByMethod(method, alertData) {\n switch (method) {\n case 'webhook':\n await this.sendWebhookAlert(alertData);\n break;\n case 'email':\n await this.sendEmailAlert(alertData);\n break;\n case 'slack':\n await this.sendSlackAlert(alertData);\n break;\n default:\n throw new Error(`Unknown notification method: ${method}`);\n }\n }\n \n /**\n * Generate trend analysis report\n * @param {string} url - URL (optional, for specific URL analysis)\n * @returns {Object} - Trend analysis report\n */\n async generateTrendAnalysisReport(url = null) {\n try {\n const report = {\n timestamp: Date.now(),\n scope: url ? 'url-specific' : 'global',\n url,\n patterns: {},\n insights: [],\n recommendations: []\n };\n \n const patterns = this.trendAnalysis.get('patterns');\n \n if (url) {\n // URL-specific analysis\n report.patterns = await this.analyzeUrlPatterns(url, patterns);\n } else {\n // Global analysis\n report.patterns = await this.analyzeGlobalPatterns(patterns);\n }\n \n // Generate insights\n report.insights = this.generateTrendInsights(report.patterns);\n \n // Generate recommendations\n report.recommendations = this.generateTrendRecommendations(report.patterns, report.insights);\n \n return report;\n \n } catch (error) {\n this.emit('error', { operation: 'generateTrendAnalysisReport', url, error: error.message });\n throw error;\n }\n }\n \n /**\n * Export historical data\n * @param {Object} options - Export options\n * @returns {Object} - Exported data\n */\n async exportHistoricalData(options = {}) {\n const {\n format = 'json',\n url = null,\n startTime = null,\n endTime = null,\n includeContent = false,\n includeSnapshots = false\n } = options;\n \n try {\n const exportData = {\n metadata: {\n exportTime: Date.now(),\n format,\n scope: url ? 'url-specific' : 'global',\n url,\n timeRange: { startTime, endTime },\n options\n },\n changeHistory: {},\n snapshots: {},\n alertHistory: {},\n trendAnalysis: {},\n statistics: this.getEnhancedStats()\n };\n \n // Export change history\n const urls = url ? [url] : Array.from(this.changeHistory.keys());\n \n for (const targetUrl of urls) {\n let history = this.getChangeHistory(targetUrl, 10000);\n \n // Apply time filters\n if (startTime || endTime) {\n history = history.filter(entry => {\n if (startTime && entry.timestamp < startTime) return false;\n if (endTime && entry.timestamp > endTime) return false;\n return true;\n });\n }\n \n // Remove content if not requested\n if (!includeContent) {\n history = history.map(entry => {\n const { details, ...rest } = entry;\n return {\n ...rest,\n details: details ? {\n similarity: details.similarity,\n significance: details.significance\n } : undefined\n };\n });\n }\n \n exportData.changeHistory[targetUrl] = history;\n \n // Export alert history\n if (this.alertHistory.has(targetUrl)) {\n exportData.alertHistory[targetUrl] = this.alertHistory.get(targetUrl);\n }\n }\n \n // Export trend analysis\n const patterns = this.trendAnalysis.get('patterns');\n if (patterns) {\n exportData.trendAnalysis = {\n dailyPatterns: Object.fromEntries(patterns.dailyChangePatterns),\n changeFrequency: Object.fromEntries(patterns.changeFrequency)\n };\n }\n \n // Format output\n if (format === 'csv') {\n return this.convertToCSV(exportData);\n }\n \n return exportData;\n \n } catch (error) {\n this.emit('error', { operation: 'exportHistoricalData', error: error.message });\n throw error;\n }\n }\n \n /**\n * Get monitoring dashboard status\n * @returns {Object} - Dashboard data\n */\n getMonitoringDashboard() {\n return {\n status: this.monitoringDashboard.status,\n monitors: Array.from(this.monitoringDashboard.monitors.entries()).map(([id, config]) => ({\n id,\n ...config\n })),\n recentAlerts: this.monitoringDashboard.alerts.slice(-10),\n trends: this.monitoringDashboard.trends,\n statistics: this.getEnhancedStats(),\n timestamp: Date.now()\n };\n }\n \n /**\n * Get enhanced statistics\n * @returns {Object} - Enhanced statistics\n */\n getEnhancedStats() {\n return {\n ...this.stats,\n activeScheduledMonitors: this.scheduledMonitors.size,\n alertRules: this.alertRules.size,\n monitoringTemplates: this.monitoringTemplates.size,\n throttledAlerts: this.alertThrottling.size,\n trendPatterns: this.trendAnalysis.has('patterns') ? \n this.trendAnalysis.get('patterns').dailyChangePatterns.size : 0\n };\n }\n \n /**\n * Create baseline snapshot for change tracking
|
|
109
|
+
* Create baseline snapshot for change tracking
|
|
452
110
|
* @param {string} url - URL to track
|
|
453
111
|
* @param {string} content - Content to establish as baseline
|
|
454
112
|
* @param {Object} options - Tracking options
|
|
@@ -1677,628 +1335,12 @@ export class ChangeTracker extends EventEmitter {
|
|
|
1677
1335
|
// Placeholder for semantic analysis initialization
|
|
1678
1336
|
}
|
|
1679
1337
|
|
|
1680
|
-
// Enhanced Feature Helper Methods
|
|
1681
|
-
|
|
1682
|
-
/**
|
|
1683
|
-
* Get latest baseline for a URL
|
|
1684
|
-
* @param {string} url - URL
|
|
1685
|
-
* @returns {Object} - Latest baseline
|
|
1686
|
-
*/
|
|
1687
|
-
getLatestBaseline(url) {
|
|
1688
|
-
const snapshots = this.snapshots.get(url);
|
|
1689
|
-
return snapshots && snapshots.length > 0 ? snapshots[snapshots.length - 1] : null;
|
|
1690
|
-
}
|
|
1691
|
-
|
|
1692
|
-
/**
|
|
1693
|
-
* Calculate text similarity using advanced algorithms
|
|
1694
|
-
* @param {string} text1 - First text
|
|
1695
|
-
* @param {string} text2 - Second text
|
|
1696
|
-
* @returns {number} - Similarity score (0-1)
|
|
1697
|
-
*/
|
|
1698
|
-
calculateTextSimilarity(text1, text2) {
|
|
1699
|
-
if (!text1 || !text2) return 0;
|
|
1700
|
-
|
|
1701
|
-
// Simple Jaccard similarity for keywords
|
|
1702
|
-
const words1 = new Set(text1.toLowerCase().split(/\W+/).filter(w => w.length > 3));
|
|
1703
|
-
const words2 = new Set(text2.toLowerCase().split(/\W+/).filter(w => w.length > 3));
|
|
1704
|
-
|
|
1705
|
-
const intersection = new Set([...words1].filter(x => words2.has(x)));
|
|
1706
|
-
const union = new Set([...words1, ...words2]);
|
|
1707
|
-
|
|
1708
|
-
return union.size > 0 ? intersection.size / union.size : 0;
|
|
1709
|
-
}
|
|
1710
|
-
|
|
1711
|
-
/**
|
|
1712
|
-
* Calculate semantic similarity
|
|
1713
|
-
* @param {string} text1 - First text
|
|
1714
|
-
* @param {string} text2 - Second text
|
|
1715
|
-
* @returns {number} - Semantic similarity score
|
|
1716
|
-
*/
|
|
1717
|
-
calculateSemanticSimilarity(text1, text2) {
|
|
1718
|
-
// Placeholder for advanced semantic analysis
|
|
1719
|
-
// Could integrate with NLP services or local models
|
|
1720
|
-
return this.calculateTextSimilarity(text1, text2);
|
|
1721
|
-
}
|
|
1722
|
-
|
|
1723
|
-
/**
|
|
1724
|
-
* Detect keyword changes between texts
|
|
1725
|
-
* @param {string} baselineText - Baseline text
|
|
1726
|
-
* @param {string} currentText - Current text
|
|
1727
|
-
* @returns {Array} - Keyword changes
|
|
1728
|
-
*/
|
|
1729
|
-
detectKeywordChanges(baselineText, currentText) {
|
|
1730
|
-
const changes = [];
|
|
1731
|
-
|
|
1732
|
-
try {
|
|
1733
|
-
const baselineWords = baselineText.toLowerCase().split(/\W+/).filter(w => w.length > 3);
|
|
1734
|
-
const currentWords = currentText.toLowerCase().split(/\W+/).filter(w => w.length > 3);
|
|
1735
|
-
|
|
1736
|
-
const baselineFreq = this.calculateWordFrequency(baselineWords);
|
|
1737
|
-
const currentFreq = this.calculateWordFrequency(currentWords);
|
|
1738
|
-
|
|
1739
|
-
// Find significant frequency changes
|
|
1740
|
-
const allWords = new Set([...Object.keys(baselineFreq), ...Object.keys(currentFreq)]);
|
|
1741
|
-
|
|
1742
|
-
for (const word of allWords) {
|
|
1743
|
-
const baseFreq = baselineFreq[word] || 0;
|
|
1744
|
-
const currFreq = currentFreq[word] || 0;
|
|
1745
|
-
const change = Math.abs(currFreq - baseFreq);
|
|
1746
|
-
|
|
1747
|
-
if (change > 2) { // Significant frequency change
|
|
1748
|
-
changes.push({
|
|
1749
|
-
word,
|
|
1750
|
-
baselineFrequency: baseFreq,
|
|
1751
|
-
currentFrequency: currFreq,
|
|
1752
|
-
change: currFreq - baseFreq,
|
|
1753
|
-
type: currFreq > baseFreq ? 'increased' : 'decreased'
|
|
1754
|
-
});
|
|
1755
|
-
}
|
|
1756
|
-
}
|
|
1757
|
-
} catch (error) {
|
|
1758
|
-
this.emit('error', { operation: 'detectKeywordChanges', error: error.message });
|
|
1759
|
-
}
|
|
1760
|
-
|
|
1761
|
-
return changes.slice(0, 20); // Top 20 changes
|
|
1762
|
-
}
|
|
1763
|
-
|
|
1764
|
-
/**
|
|
1765
|
-
* Detect topic shifts between texts
|
|
1766
|
-
* @param {string} baselineText - Baseline text
|
|
1767
|
-
* @param {string} currentText - Current text
|
|
1768
|
-
* @returns {Array} - Topic shifts
|
|
1769
|
-
*/
|
|
1770
|
-
detectTopicShifts(baselineText, currentText) {
|
|
1771
|
-
const shifts = [];
|
|
1772
|
-
|
|
1773
|
-
try {
|
|
1774
|
-
// Simple topic detection based on key phrases
|
|
1775
|
-
const topicKeywords = {
|
|
1776
|
-
technology: ['software', 'computer', 'digital', 'tech', 'system', 'data'],
|
|
1777
|
-
business: ['company', 'market', 'business', 'sales', 'revenue', 'profit'],
|
|
1778
|
-
health: ['health', 'medical', 'doctor', 'treatment', 'disease', 'patient'],
|
|
1779
|
-
politics: ['government', 'policy', 'political', 'election', 'vote', 'congress'],
|
|
1780
|
-
sports: ['game', 'team', 'player', 'score', 'match', 'championship']
|
|
1781
|
-
};
|
|
1782
|
-
|
|
1783
|
-
const baselineTopics = this.detectTopics(baselineText, topicKeywords);
|
|
1784
|
-
const currentTopics = this.detectTopics(currentText, topicKeywords);
|
|
1785
|
-
|
|
1786
|
-
// Compare topic presence
|
|
1787
|
-
for (const topic of Object.keys(topicKeywords)) {
|
|
1788
|
-
const baselineScore = baselineTopics[topic] || 0;
|
|
1789
|
-
const currentScore = currentTopics[topic] || 0;
|
|
1790
|
-
const change = currentScore - baselineScore;
|
|
1791
|
-
|
|
1792
|
-
if (Math.abs(change) > 0.1) {
|
|
1793
|
-
shifts.push({
|
|
1794
|
-
topic,
|
|
1795
|
-
baselineScore,
|
|
1796
|
-
currentScore,
|
|
1797
|
-
change,
|
|
1798
|
-
type: change > 0 ? 'emerged' : 'diminished'
|
|
1799
|
-
});
|
|
1800
|
-
}
|
|
1801
|
-
}
|
|
1802
|
-
} catch (error) {
|
|
1803
|
-
this.emit('error', { operation: 'detectTopicShifts', error: error.message });
|
|
1804
|
-
}
|
|
1805
|
-
|
|
1806
|
-
return shifts;
|
|
1807
|
-
}
|
|
1808
|
-
|
|
1809
|
-
/**
|
|
1810
|
-
* Calculate semantic confidence score
|
|
1811
|
-
* @param {Object} analysis - Semantic analysis
|
|
1812
|
-
* @returns {number} - Confidence score
|
|
1813
|
-
*/
|
|
1814
|
-
calculateSemanticConfidence(analysis) {
|
|
1815
|
-
let confidence = 0;
|
|
1816
|
-
|
|
1817
|
-
// Base confidence on available data
|
|
1818
|
-
if (analysis.textualSimilarity > 0) confidence += 0.3;
|
|
1819
|
-
if (analysis.keywordChanges.length > 0) confidence += 0.3;
|
|
1820
|
-
if (analysis.topicShifts.length > 0) confidence += 0.2;
|
|
1821
|
-
|
|
1822
|
-
// Adjust based on data quality
|
|
1823
|
-
const dataQuality = Math.min(
|
|
1824
|
-
analysis.keywordChanges.length / 10, // Max 10 keyword changes for full score
|
|
1825
|
-
1
|
|
1826
|
-
);
|
|
1827
|
-
|
|
1828
|
-
return Math.min(confidence * dataQuality, 1);
|
|
1829
|
-
}
|
|
1830
|
-
|
|
1831
|
-
/**
|
|
1832
|
-
* Detect layout changes between DOM structures
|
|
1833
|
-
* @param {Object} baseline - Baseline DOM
|
|
1834
|
-
* @param {Object} current - Current DOM
|
|
1835
|
-
* @returns {Array} - Layout changes
|
|
1836
|
-
*/
|
|
1837
|
-
detectLayoutChanges(baseline, current) {
|
|
1838
|
-
const changes = [];
|
|
1839
|
-
|
|
1840
|
-
try {
|
|
1841
|
-
// Compare element counts by type
|
|
1842
|
-
const baselineElements = this.countElements(baseline);
|
|
1843
|
-
const currentElements = this.countElements(current);
|
|
1844
|
-
|
|
1845
|
-
for (const [tag, baseCount] of Object.entries(baselineElements)) {
|
|
1846
|
-
const currCount = currentElements[tag] || 0;
|
|
1847
|
-
if (Math.abs(currCount - baseCount) > 0) {
|
|
1848
|
-
changes.push({
|
|
1849
|
-
type: 'element_count_change',
|
|
1850
|
-
tag,
|
|
1851
|
-
baseline: baseCount,
|
|
1852
|
-
current: currCount,
|
|
1853
|
-
change: currCount - baseCount
|
|
1854
|
-
});
|
|
1855
|
-
}
|
|
1856
|
-
}
|
|
1857
|
-
|
|
1858
|
-
// Check for new element types
|
|
1859
|
-
for (const [tag, currCount] of Object.entries(currentElements)) {
|
|
1860
|
-
if (!baselineElements[tag]) {
|
|
1861
|
-
changes.push({
|
|
1862
|
-
type: 'new_element_type',
|
|
1863
|
-
tag,
|
|
1864
|
-
count: currCount
|
|
1865
|
-
});
|
|
1866
|
-
}
|
|
1867
|
-
}
|
|
1868
|
-
} catch (error) {
|
|
1869
|
-
this.emit('error', { operation: 'detectLayoutChanges', error: error.message });
|
|
1870
|
-
}
|
|
1871
|
-
|
|
1872
|
-
return changes;
|
|
1873
|
-
}
|
|
1874
|
-
|
|
1875
|
-
/**
|
|
1876
|
-
* Detect CSS changes
|
|
1877
|
-
* @param {Object} baseline - Baseline DOM
|
|
1878
|
-
* @param {Object} current - Current DOM
|
|
1879
|
-
* @returns {Array} - CSS changes
|
|
1880
|
-
*/
|
|
1881
|
-
detectCSSChanges(baseline, current) {
|
|
1882
|
-
const changes = [];
|
|
1883
|
-
|
|
1884
|
-
try {
|
|
1885
|
-
// Extract style information
|
|
1886
|
-
const baselineStyles = this.extractStyles(baseline);
|
|
1887
|
-
const currentStyles = this.extractStyles(current);
|
|
1888
|
-
|
|
1889
|
-
// Compare inline styles
|
|
1890
|
-
const styleDiff = this.compareStyles(baselineStyles, currentStyles);
|
|
1891
|
-
changes.push(...styleDiff);
|
|
1892
|
-
|
|
1893
|
-
} catch (error) {
|
|
1894
|
-
this.emit('error', { operation: 'detectCSSChanges', error: error.message });
|
|
1895
|
-
}
|
|
1896
|
-
|
|
1897
|
-
return changes;
|
|
1898
|
-
}
|
|
1899
|
-
|
|
1900
|
-
/**
|
|
1901
|
-
* Extract structured data from DOM
|
|
1902
|
-
* @param {Object} $ - Cheerio DOM
|
|
1903
|
-
* @returns {Object} - Structured data
|
|
1904
|
-
*/
|
|
1905
|
-
extractStructuredData($) {
|
|
1906
|
-
const structuredData = {
|
|
1907
|
-
jsonLd: [],
|
|
1908
|
-
microdata: [],
|
|
1909
|
-
rdfa: [],
|
|
1910
|
-
openGraph: {},
|
|
1911
|
-
twitterCard: {},
|
|
1912
|
-
schema: []
|
|
1913
|
-
};
|
|
1914
|
-
|
|
1915
|
-
try {
|
|
1916
|
-
// Extract JSON-LD
|
|
1917
|
-
$('script[type="application/ld+json"]').each((index, element) => {
|
|
1918
|
-
try {
|
|
1919
|
-
const data = JSON.parse($(element).html());
|
|
1920
|
-
structuredData.jsonLd.push(data);
|
|
1921
|
-
} catch (e) {
|
|
1922
|
-
// Invalid JSON, skip
|
|
1923
|
-
}
|
|
1924
|
-
});
|
|
1925
|
-
|
|
1926
|
-
// Extract Open Graph
|
|
1927
|
-
$('meta[property^="og:"]').each((index, element) => {
|
|
1928
|
-
const property = $(element).attr('property');
|
|
1929
|
-
const content = $(element).attr('content');
|
|
1930
|
-
if (property && content) {
|
|
1931
|
-
structuredData.openGraph[property] = content;
|
|
1932
|
-
}
|
|
1933
|
-
});
|
|
1934
|
-
|
|
1935
|
-
// Extract Twitter Card
|
|
1936
|
-
$('meta[name^="twitter:"]').each((index, element) => {
|
|
1937
|
-
const name = $(element).attr('name');
|
|
1938
|
-
const content = $(element).attr('content');
|
|
1939
|
-
if (name && content) {
|
|
1940
|
-
structuredData.twitterCard[name] = content;
|
|
1941
|
-
}
|
|
1942
|
-
});
|
|
1943
|
-
|
|
1944
|
-
} catch (error) {
|
|
1945
|
-
this.emit('error', { operation: 'extractStructuredData', error: error.message });
|
|
1946
|
-
}
|
|
1947
|
-
|
|
1948
|
-
return structuredData;
|
|
1949
|
-
}
|
|
1950
|
-
|
|
1951
|
-
/**
|
|
1952
|
-
* Compare structured data
|
|
1953
|
-
* @param {Object} baseline - Baseline structured data
|
|
1954
|
-
* @param {Object} current - Current structured data
|
|
1955
|
-
* @returns {Array} - Schema changes
|
|
1956
|
-
*/
|
|
1957
|
-
compareStructuredData(baseline, current) {
|
|
1958
|
-
const changes = [];
|
|
1959
|
-
|
|
1960
|
-
try {
|
|
1961
|
-
// Compare JSON-LD
|
|
1962
|
-
const jsonLdChanges = this.compareArrayData(baseline.jsonLd, current.jsonLd, 'json-ld');
|
|
1963
|
-
changes.push(...jsonLdChanges);
|
|
1964
|
-
|
|
1965
|
-
// Compare Open Graph
|
|
1966
|
-
const ogChanges = this.compareObjectData(baseline.openGraph, current.openGraph, 'open-graph');
|
|
1967
|
-
changes.push(...ogChanges);
|
|
1968
|
-
|
|
1969
|
-
// Compare Twitter Card
|
|
1970
|
-
const twitterChanges = this.compareObjectData(baseline.twitterCard, current.twitterCard, 'twitter-card');
|
|
1971
|
-
changes.push(...twitterChanges);
|
|
1972
|
-
|
|
1973
|
-
} catch (error) {
|
|
1974
|
-
this.emit('error', { operation: 'compareStructuredData', error: error.message });
|
|
1975
|
-
}
|
|
1976
|
-
|
|
1977
|
-
return changes;
|
|
1978
|
-
}
|
|
1979
|
-
|
|
1980
|
-
/**
|
|
1981
|
-
* Compare metadata objects
|
|
1982
|
-
* @param {Object} baseline - Baseline metadata
|
|
1983
|
-
* @param {Object} current - Current metadata
|
|
1984
|
-
* @returns {Array} - Metadata changes
|
|
1985
|
-
*/
|
|
1986
|
-
compareMetadata(baseline, current) {
|
|
1987
|
-
const changes = [];
|
|
1988
|
-
|
|
1989
|
-
try {
|
|
1990
|
-
const baselineKeys = Object.keys(baseline || {});
|
|
1991
|
-
const currentKeys = Object.keys(current || {});
|
|
1992
|
-
const allKeys = new Set([...baselineKeys, ...currentKeys]);
|
|
1993
|
-
|
|
1994
|
-
for (const key of allKeys) {
|
|
1995
|
-
const baseValue = baseline?.[key];
|
|
1996
|
-
const currValue = current?.[key];
|
|
1997
|
-
|
|
1998
|
-
if (JSON.stringify(baseValue) !== JSON.stringify(currValue)) {
|
|
1999
|
-
changes.push({
|
|
2000
|
-
type: 'metadata_change',
|
|
2001
|
-
field: key,
|
|
2002
|
-
baseline: baseValue,
|
|
2003
|
-
current: currValue,
|
|
2004
|
-
changeType: !baseValue ? 'added' : !currValue ? 'removed' : 'modified'
|
|
2005
|
-
});
|
|
2006
|
-
}
|
|
2007
|
-
}
|
|
2008
|
-
} catch (error) {
|
|
2009
|
-
this.emit('error', { operation: 'compareMetadata', error: error.message });
|
|
2010
|
-
}
|
|
2011
|
-
|
|
2012
|
-
return changes;
|
|
2013
|
-
}
|
|
2014
|
-
|
|
2015
|
-
/**
|
|
2016
|
-
* Calculate enhanced significance score
|
|
2017
|
-
* @param {Object} standardComparison - Standard comparison
|
|
2018
|
-
* @param {Object} semanticAnalysis - Semantic analysis
|
|
2019
|
-
* @param {Object} visualAnalysis - Visual analysis
|
|
2020
|
-
* @param {Object} structuredAnalysis - Structured analysis
|
|
2021
|
-
* @returns {string} - Enhanced significance level
|
|
2022
|
-
*/
|
|
2023
|
-
async calculateEnhancedSignificance(standardComparison, semanticAnalysis, visualAnalysis, structuredAnalysis) {
|
|
2024
|
-
try {
|
|
2025
|
-
let enhancedScore = 0;
|
|
2026
|
-
const weights = {
|
|
2027
|
-
standard: 0.4,
|
|
2028
|
-
semantic: 0.2,
|
|
2029
|
-
visual: 0.2,
|
|
2030
|
-
structured: 0.2
|
|
2031
|
-
};
|
|
2032
|
-
|
|
2033
|
-
// Standard comparison score
|
|
2034
|
-
const standardScore = this.getSignificanceScore(standardComparison.significance);
|
|
2035
|
-
enhancedScore += standardScore * weights.standard;
|
|
2036
|
-
|
|
2037
|
-
// Semantic analysis score
|
|
2038
|
-
const semanticScore = semanticAnalysis.confidenceScore *
|
|
2039
|
-
(1 - semanticAnalysis.textualSimilarity);
|
|
2040
|
-
enhancedScore += semanticScore * weights.semantic;
|
|
2041
|
-
|
|
2042
|
-
// Visual analysis score
|
|
2043
|
-
const visualScore = visualAnalysis.hasVisualChanges ? 0.7 : 0;
|
|
2044
|
-
enhancedScore += visualScore * weights.visual;
|
|
2045
|
-
|
|
2046
|
-
// Structured data score
|
|
2047
|
-
const structuredScore = structuredAnalysis.hasStructuredChanges ? 0.8 : 0;
|
|
2048
|
-
enhancedScore += structuredScore * weights.structured;
|
|
2049
|
-
|
|
2050
|
-
// Convert to significance level
|
|
2051
|
-
return this.scoreToSignificance(enhancedScore);
|
|
2052
|
-
|
|
2053
|
-
} catch (error) {
|
|
2054
|
-
this.emit('error', { operation: 'calculateEnhancedSignificance', error: error.message });
|
|
2055
|
-
return standardComparison.significance;
|
|
2056
|
-
}
|
|
2057
|
-
}
|
|
2058
|
-
|
|
2059
|
-
/**
|
|
2060
|
-
* Detect change patterns in historical data
|
|
2061
|
-
* @param {string} url - URL
|
|
2062
|
-
* @param {Object} patterns - Pattern data
|
|
2063
|
-
*/
|
|
2064
|
-
async detectChangePatterns(url, patterns) {
|
|
2065
|
-
try {
|
|
2066
|
-
const frequency = patterns.changeFrequency.get(url);
|
|
2067
|
-
if (!frequency || frequency.length < 10) return;
|
|
2068
|
-
|
|
2069
|
-
// Detect recurring patterns
|
|
2070
|
-
const recurringPatterns = this.detectRecurringPatterns(frequency);
|
|
2071
|
-
|
|
2072
|
-
// Detect time-based patterns
|
|
2073
|
-
const timePatterns = this.detectTimePatterns(frequency);
|
|
2074
|
-
|
|
2075
|
-
// Update trend analysis
|
|
2076
|
-
if (recurringPatterns.length > 0 || timePatterns.length > 0) {
|
|
2077
|
-
this.stats.trendPatternsDetected++;
|
|
2078
|
-
|
|
2079
|
-
this.emit('patternsDetected', {
|
|
2080
|
-
url,
|
|
2081
|
-
recurringPatterns,
|
|
2082
|
-
timePatterns,
|
|
2083
|
-
timestamp: Date.now()
|
|
2084
|
-
});
|
|
2085
|
-
}
|
|
2086
|
-
|
|
2087
|
-
} catch (error) {
|
|
2088
|
-
this.emit('error', { operation: 'detectChangePatterns', url, error: error.message });
|
|
2089
|
-
}
|
|
2090
|
-
}
|
|
2091
|
-
|
|
2092
|
-
/**
|
|
2093
|
-
* Send webhook alert
|
|
2094
|
-
* @param {Object} alertData - Alert data
|
|
2095
|
-
*/
|
|
2096
|
-
async sendWebhookAlert(alertData) {
|
|
2097
|
-
// Placeholder for webhook implementation
|
|
2098
|
-
this.emit('webhookAlert', alertData);
|
|
2099
|
-
}
|
|
2100
|
-
|
|
2101
|
-
/**
|
|
2102
|
-
* Send email alert
|
|
2103
|
-
* @param {Object} alertData - Alert data
|
|
2104
|
-
*/
|
|
2105
|
-
async sendEmailAlert(alertData) {
|
|
2106
|
-
// Placeholder for email implementation
|
|
2107
|
-
this.emit('emailAlert', alertData);
|
|
2108
|
-
}
|
|
2109
|
-
|
|
2110
|
-
/**
|
|
2111
|
-
* Send Slack alert
|
|
2112
|
-
* @param {Object} alertData - Alert data
|
|
2113
|
-
*/
|
|
2114
|
-
async sendSlackAlert(alertData) {
|
|
2115
|
-
// Placeholder for Slack implementation
|
|
2116
|
-
this.emit('slackAlert', alertData);
|
|
2117
|
-
}
|
|
2118
|
-
|
|
2119
|
-
// Utility helper methods
|
|
2120
|
-
|
|
2121
|
-
calculateWordFrequency(words) {
|
|
2122
|
-
const frequency = {};
|
|
2123
|
-
words.forEach(word => {
|
|
2124
|
-
frequency[word] = (frequency[word] || 0) + 1;
|
|
2125
|
-
});
|
|
2126
|
-
return frequency;
|
|
2127
|
-
}
|
|
2128
|
-
|
|
2129
|
-
detectTopics(text, topicKeywords) {
|
|
2130
|
-
const topics = {};
|
|
2131
|
-
const words = text.toLowerCase().split(/\W+/);
|
|
2132
|
-
|
|
2133
|
-
for (const [topic, keywords] of Object.entries(topicKeywords)) {
|
|
2134
|
-
let score = 0;
|
|
2135
|
-
keywords.forEach(keyword => {
|
|
2136
|
-
score += words.filter(word => word.includes(keyword)).length;
|
|
2137
|
-
});
|
|
2138
|
-
topics[topic] = score / words.length;
|
|
2139
|
-
}
|
|
2140
|
-
|
|
2141
|
-
return topics;
|
|
2142
|
-
}
|
|
2143
|
-
|
|
2144
|
-
countElements($) {
|
|
2145
|
-
const counts = {};
|
|
2146
|
-
$('*').each((index, element) => {
|
|
2147
|
-
const tag = element.name;
|
|
2148
|
-
counts[tag] = (counts[tag] || 0) + 1;
|
|
2149
|
-
});
|
|
2150
|
-
return counts;
|
|
2151
|
-
}
|
|
2152
|
-
|
|
2153
|
-
extractStyles($) {
|
|
2154
|
-
const styles = {};
|
|
2155
|
-
$('[style]').each((index, element) => {
|
|
2156
|
-
const style = $(element).attr('style');
|
|
2157
|
-
if (style) {
|
|
2158
|
-
styles[`element_${index}`] = style;
|
|
2159
|
-
}
|
|
2160
|
-
});
|
|
2161
|
-
return styles;
|
|
2162
|
-
}
|
|
2163
|
-
|
|
2164
|
-
compareStyles(baseline, current) {
|
|
2165
|
-
const changes = [];
|
|
2166
|
-
const allKeys = new Set([...Object.keys(baseline), ...Object.keys(current)]);
|
|
2167
|
-
|
|
2168
|
-
for (const key of allKeys) {
|
|
2169
|
-
if (baseline[key] !== current[key]) {
|
|
2170
|
-
changes.push({
|
|
2171
|
-
type: 'style_change',
|
|
2172
|
-
element: key,
|
|
2173
|
-
baseline: baseline[key],
|
|
2174
|
-
current: current[key]
|
|
2175
|
-
});
|
|
2176
|
-
}
|
|
2177
|
-
}
|
|
2178
|
-
|
|
2179
|
-
return changes;
|
|
2180
|
-
}
|
|
2181
|
-
|
|
2182
|
-
compareArrayData(baseline, current, type) {
|
|
2183
|
-
const changes = [];
|
|
2184
|
-
|
|
2185
|
-
if (baseline.length !== current.length) {
|
|
2186
|
-
changes.push({
|
|
2187
|
-
type: `${type}_count_change`,
|
|
2188
|
-
baseline: baseline.length,
|
|
2189
|
-
current: current.length
|
|
2190
|
-
});
|
|
2191
|
-
}
|
|
2192
|
-
|
|
2193
|
-
return changes;
|
|
2194
|
-
}
|
|
2195
|
-
|
|
2196
|
-
compareObjectData(baseline, current, type) {
|
|
2197
|
-
const changes = [];
|
|
2198
|
-
const allKeys = new Set([...Object.keys(baseline), ...Object.keys(current)]);
|
|
2199
|
-
|
|
2200
|
-
for (const key of allKeys) {
|
|
2201
|
-
if (baseline[key] !== current[key]) {
|
|
2202
|
-
changes.push({
|
|
2203
|
-
type: `${type}_change`,
|
|
2204
|
-
field: key,
|
|
2205
|
-
baseline: baseline[key],
|
|
2206
|
-
current: current[key]
|
|
2207
|
-
});
|
|
2208
|
-
}
|
|
2209
|
-
}
|
|
2210
|
-
|
|
2211
|
-
return changes;
|
|
2212
|
-
}
|
|
2213
|
-
|
|
2214
|
-
getSignificanceScore(significance) {
|
|
2215
|
-
const scores = {
|
|
2216
|
-
'none': 0,
|
|
2217
|
-
'minor': 0.2,
|
|
2218
|
-
'moderate': 0.5,
|
|
2219
|
-
'major': 0.8,
|
|
2220
|
-
'critical': 1.0
|
|
2221
|
-
};
|
|
2222
|
-
return scores[significance] || 0;
|
|
2223
|
-
}
|
|
2224
|
-
|
|
2225
|
-
scoreToSignificance(score) {
|
|
2226
|
-
if (score >= 0.9) return 'critical';
|
|
2227
|
-
if (score >= 0.7) return 'major';
|
|
2228
|
-
if (score >= 0.4) return 'moderate';
|
|
2229
|
-
if (score >= 0.1) return 'minor';
|
|
2230
|
-
return 'none';
|
|
2231
|
-
}
|
|
2232
|
-
|
|
2233
|
-
analyzeUrlPatterns(url, patterns) {
|
|
2234
|
-
// Placeholder for URL-specific pattern analysis
|
|
2235
|
-
return {
|
|
2236
|
-
dailyAverage: 0,
|
|
2237
|
-
peakTimes: [],
|
|
2238
|
-
commonTypes: []
|
|
2239
|
-
};
|
|
2240
|
-
}
|
|
2241
|
-
|
|
2242
|
-
analyzeGlobalPatterns(patterns) {
|
|
2243
|
-
// Placeholder for global pattern analysis
|
|
2244
|
-
return {
|
|
2245
|
-
totalUrls: patterns.dailyChangePatterns.size,
|
|
2246
|
-
mostActiveUrls: [],
|
|
2247
|
-
commonPatterns: []
|
|
2248
|
-
};
|
|
2249
|
-
}
|
|
2250
|
-
|
|
2251
|
-
generateTrendInsights(patterns) {
|
|
2252
|
-
return [
|
|
2253
|
-
'Pattern analysis requires more data',
|
|
2254
|
-
'Monitoring is active and collecting data'
|
|
2255
|
-
];
|
|
2256
|
-
}
|
|
2257
|
-
|
|
2258
|
-
generateTrendRecommendations(patterns, insights) {
|
|
2259
|
-
return [
|
|
2260
|
-
'Continue monitoring to build pattern database',
|
|
2261
|
-
'Consider adjusting monitoring frequency based on change patterns'
|
|
2262
|
-
];
|
|
2263
|
-
}
|
|
2264
|
-
|
|
2265
|
-
detectRecurringPatterns(frequency) {
|
|
2266
|
-
// Placeholder for recurring pattern detection
|
|
2267
|
-
return [];
|
|
2268
|
-
}
|
|
2269
|
-
|
|
2270
|
-
detectTimePatterns(frequency) {
|
|
2271
|
-
// Placeholder for time-based pattern detection
|
|
2272
|
-
return [];
|
|
2273
|
-
}
|
|
2274
|
-
|
|
2275
|
-
convertToCSV(data) {
|
|
2276
|
-
// Placeholder for CSV conversion
|
|
2277
|
-
return JSON.stringify(data, null, 2);
|
|
2278
|
-
}
|
|
2279
|
-
|
|
2280
1338
|
cleanup() {
|
|
2281
|
-
// Stop all scheduled monitors
|
|
2282
|
-
for (const [id, monitor] of this.scheduledMonitors.entries()) {
|
|
2283
|
-
if (monitor.cronJob) {
|
|
2284
|
-
monitor.cronJob.destroy();
|
|
2285
|
-
}
|
|
2286
|
-
}
|
|
2287
|
-
|
|
2288
|
-
// Clear all data
|
|
2289
1339
|
this.contentHistory.clear();
|
|
2290
1340
|
this.baselineContent.clear();
|
|
2291
1341
|
this.activeMonitors.clear();
|
|
2292
1342
|
this.changeNotifications.clear();
|
|
2293
1343
|
this.snapshotManager.clear();
|
|
2294
|
-
this.scheduledMonitors.clear();
|
|
2295
|
-
this.monitoringTemplates.clear();
|
|
2296
|
-
this.alertRules.clear();
|
|
2297
|
-
this.alertHistory.clear();
|
|
2298
|
-
this.trendAnalysis.clear();
|
|
2299
|
-
this.visualRegression.clear();
|
|
2300
|
-
this.alertThrottling.clear();
|
|
2301
|
-
this.semanticDiffCache.clear();
|
|
2302
1344
|
}
|
|
2303
1345
|
|
|
2304
1346
|
}
|