crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,2306 @@
|
|
|
1
|
+
import crypto from "crypto";
|
|
2
|
+
/**
|
|
3
|
+
* ChangeTracker - Enhanced Content Change Detection and Analysis (Phase 2.4)
|
|
4
|
+
* Implements hierarchical content hashing (page → sections → elements)
|
|
5
|
+
* with differential comparison engine, change significance scoring,
|
|
6
|
+
* scheduled monitoring, advanced comparison engine, alert system,
|
|
7
|
+
* and historical analysis capabilities
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { createHash } from 'crypto';
|
|
11
|
+
import { z } from 'zod';
|
|
12
|
+
import { EventEmitter } from 'events';
|
|
13
|
+
import { load } from 'cheerio';
|
|
14
|
+
import { diffWords, diffLines, diffChars } from 'diff';
|
|
15
|
+
import * as cron from 'node-cron';
|
|
16
|
+
import fs from 'fs/promises';
|
|
17
|
+
import path from 'path';
|
|
18
|
+
import fetch from 'node-fetch';
|
|
19
|
+
|
|
20
|
+
const ChangeTrackingSchema = z.object({
|
|
21
|
+
url: z.string().url(),
|
|
22
|
+
content: z.string(),
|
|
23
|
+
html: z.string().optional(),
|
|
24
|
+
options: z.object({
|
|
25
|
+
granularity: z.enum(['page', 'section', 'element', 'text']).default('section'),
|
|
26
|
+
trackText: z.boolean().default(true),
|
|
27
|
+
trackStructure: z.boolean().default(true),
|
|
28
|
+
trackAttributes: z.boolean().default(false),
|
|
29
|
+
trackImages: z.boolean().default(false),
|
|
30
|
+
trackLinks: z.boolean().default(true),
|
|
31
|
+
ignoreWhitespace: z.boolean().default(true),
|
|
32
|
+
ignoreCase: z.boolean().default(false),
|
|
33
|
+
customSelectors: z.array(z.string()).optional(),
|
|
34
|
+
excludeSelectors: z.array(z.string()).optional().default([
|
|
35
|
+
'script', 'style', 'noscript', '.advertisement', '.ad'
|
|
36
|
+
]),
|
|
37
|
+
significanceThresholds: z.object({
|
|
38
|
+
minor: z.number().min(0).max(1).default(0.1),
|
|
39
|
+
moderate: z.number().min(0).max(1).default(0.3),
|
|
40
|
+
major: z.number().min(0).max(1).default(0.7)
|
|
41
|
+
}).optional()
|
|
42
|
+
}).optional().default({})
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
const ChangeComparisonSchema = z.object({
|
|
46
|
+
baselineUrl: z.string().url(),
|
|
47
|
+
currentUrl: z.string().url(),
|
|
48
|
+
baselineContent: z.string(),
|
|
49
|
+
currentContent: z.string(),
|
|
50
|
+
options: z.object({}).optional()
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const ChangeSignificance = z.enum(['none', 'minor', 'moderate', 'major', 'critical']);
|
|
54
|
+
|
|
55
|
+
export class ChangeTracker extends EventEmitter {
|
|
56
|
+
constructor(options = {}) {
|
|
57
|
+
super();
|
|
58
|
+
|
|
59
|
+
this.options = {
|
|
60
|
+
hashAlgorithm: 'sha256',
|
|
61
|
+
maxHistoryLength: 100,
|
|
62
|
+
enableRealTimeTracking: true,
|
|
63
|
+
monitoringInterval: 300000, // 5 minutes
|
|
64
|
+
enableChangeSignificanceScoring: true,
|
|
65
|
+
enableStructuralAnalysis: true,
|
|
66
|
+
enableSemanticAnalysis: false,
|
|
67
|
+
contentSimilarityThreshold: 0.8,
|
|
68
|
+
...options
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// Content snapshots and hashes
|
|
72
|
+
this.snapshots = new Map();
|
|
73
|
+
this.contentHashes = new Map();
|
|
74
|
+
this.changeHistory = new Map();
|
|
75
|
+
this.structuralHashes = new Map();
|
|
76
|
+
|
|
77
|
+
// Change detection state
|
|
78
|
+
this.activeMonitors = new Map();
|
|
79
|
+
this.lastProcessedTimestamps = new Map();
|
|
80
|
+
|
|
81
|
+
// Content history and snapshots management
|
|
82
|
+
this.contentHistory = new Map();
|
|
83
|
+
this.baselineContent = new Map();
|
|
84
|
+
this.changeNotifications = new Map();
|
|
85
|
+
this.snapshotManager = new Map();
|
|
86
|
+
|
|
87
|
+
// Phase 2.4 Enhanced Features
|
|
88
|
+
this.scheduledMonitors = new Map(); // Cron-based monitoring
|
|
89
|
+
this.monitoringTemplates = new Map(); // Reusable monitoring configurations
|
|
90
|
+
this.alertRules = new Map(); // Custom alert rules and conditions
|
|
91
|
+
this.alertHistory = new Map(); // Alert notification history
|
|
92
|
+
this.trendAnalysis = new Map(); // Pattern recognition data
|
|
93
|
+
this.visualRegression = new Map(); // Visual diff storage
|
|
94
|
+
this.alertThrottling = new Map(); // Alert rate limiting
|
|
95
|
+
this.semanticDiffCache = new Map(); // Semantic analysis cache
|
|
96
|
+
this.monitoringDashboard = {
|
|
97
|
+
status: 'initialized',
|
|
98
|
+
monitors: new Map(),
|
|
99
|
+
alerts: [],
|
|
100
|
+
trends: {}
|
|
101
|
+
};
|
|
102
|
+
// Enhanced Statistics
|
|
103
|
+
this.stats = {
|
|
104
|
+
pagesTracked: 0,
|
|
105
|
+
changesDetected: 0,
|
|
106
|
+
significantChanges: 0,
|
|
107
|
+
structuralChanges: 0,
|
|
108
|
+
contentChanges: 0,
|
|
109
|
+
falsePositives: 0,
|
|
110
|
+
averageChangeScore: 0,
|
|
111
|
+
lastAnalysis: null,
|
|
112
|
+
processingTime: 0,
|
|
113
|
+
// Phase 2.4 additions
|
|
114
|
+
scheduledMonitors: 0,
|
|
115
|
+
alertsSent: 0,
|
|
116
|
+
alertsThrottled: 0,
|
|
117
|
+
semanticAnalyses: 0,
|
|
118
|
+
visualRegression: 0,
|
|
119
|
+
trendPatternsDetected: 0,
|
|
120
|
+
averageAlertResponseTime: 0,
|
|
121
|
+
monitoringUptime: 0
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
// Semantic analysis tools (if enabled)
|
|
125
|
+
this.semanticAnalyzer = null;
|
|
126
|
+
|
|
127
|
+
this.initialize();
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
async initialize() {
|
|
131
|
+
// Initialize semantic analysis if enabled
|
|
132
|
+
if (this.options.enableSemanticAnalysis) {
|
|
133
|
+
await this.initializeSemanticAnalyzer();
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Initialize Phase 2.4 components
|
|
137
|
+
await this.initializeEnhancedFeatures();
|
|
138
|
+
|
|
139
|
+
this.emit('initialized');
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Initialize Enhanced Features for Phase 2.4
|
|
144
|
+
*/
|
|
145
|
+
async initializeEnhancedFeatures() {
|
|
146
|
+
try {
|
|
147
|
+
// Initialize monitoring dashboard
|
|
148
|
+
this.monitoringDashboard.status = 'initializing';
|
|
149
|
+
|
|
150
|
+
// Load existing monitoring templates
|
|
151
|
+
await this.loadMonitoringTemplates();
|
|
152
|
+
|
|
153
|
+
// Initialize alert system
|
|
154
|
+
await this.initializeAlertSystem();
|
|
155
|
+
|
|
156
|
+
// Set up historical analysis
|
|
157
|
+
await this.initializeHistoricalAnalysis();
|
|
158
|
+
|
|
159
|
+
// Initialize semantic diff engine if enabled
|
|
160
|
+
if (this.options.enableSemanticAnalysis) {
|
|
161
|
+
await this.initializeSemanticDiffEngine();
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
this.monitoringDashboard.status = 'active';
|
|
165
|
+
this.emit('enhancedFeaturesInitialized');
|
|
166
|
+
|
|
167
|
+
} catch (error) {
|
|
168
|
+
this.monitoringDashboard.status = 'error';
|
|
169
|
+
this.emit('error', { operation: 'initializeEnhancedFeatures', error: error.message });
|
|
170
|
+
throw error;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Load monitoring templates from storage
|
|
176
|
+
*/
|
|
177
|
+
async loadMonitoringTemplates() {
|
|
178
|
+
const defaultTemplates = {
|
|
179
|
+
'news-site': {
|
|
180
|
+
name: 'News Site Monitoring',
|
|
181
|
+
frequency: '*/15 * * * *', // Every 15 minutes
|
|
182
|
+
options: {
|
|
183
|
+
granularity: 'section',
|
|
184
|
+
trackText: true,
|
|
185
|
+
trackStructure: false,
|
|
186
|
+
significanceThresholds: { minor: 0.05, moderate: 0.2, major: 0.5 }
|
|
187
|
+
},
|
|
188
|
+
alertRules: {
|
|
189
|
+
threshold: 'minor',
|
|
190
|
+
methods: ['webhook', 'email'],
|
|
191
|
+
throttle: 300000 // 5 minutes
|
|
192
|
+
}
|
|
193
|
+
},
|
|
194
|
+
'e-commerce': {
|
|
195
|
+
name: 'E-commerce Site Monitoring',
|
|
196
|
+
frequency: '0 */2 * * *', // Every 2 hours
|
|
197
|
+
options: {
|
|
198
|
+
granularity: 'element',
|
|
199
|
+
trackText: true,
|
|
200
|
+
trackStructure: true,
|
|
201
|
+
trackImages: true,
|
|
202
|
+
customSelectors: ['.price', '.stock-status', '.product-title']
|
|
203
|
+
},
|
|
204
|
+
alertRules: {
|
|
205
|
+
threshold: 'moderate',
|
|
206
|
+
methods: ['webhook', 'slack'],
|
|
207
|
+
throttle: 600000 // 10 minutes
|
|
208
|
+
}
|
|
209
|
+
},
|
|
210
|
+
'documentation': {
|
|
211
|
+
name: 'Documentation Monitoring',
|
|
212
|
+
frequency: '0 9 * * *', // Daily at 9 AM
|
|
213
|
+
options: {
|
|
214
|
+
granularity: 'section',
|
|
215
|
+
trackText: true,
|
|
216
|
+
trackStructure: true,
|
|
217
|
+
excludeSelectors: ['.last-updated', '.edit-link']
|
|
218
|
+
},
|
|
219
|
+
alertRules: {
|
|
220
|
+
threshold: 'major',
|
|
221
|
+
methods: ['email'],
|
|
222
|
+
throttle: 3600000 // 1 hour
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
};
|
|
226
|
+
|
|
227
|
+
for (const [id, template] of Object.entries(defaultTemplates)) {
|
|
228
|
+
this.monitoringTemplates.set(id, template);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Initialize alert system with default rules
|
|
234
|
+
*/
|
|
235
|
+
async initializeAlertSystem() {
|
|
236
|
+
// Default alert rules
|
|
237
|
+
const defaultAlertRules = {
|
|
238
|
+
'critical-changes': {
|
|
239
|
+
condition: (changeResult) => changeResult.significance === 'critical',
|
|
240
|
+
actions: ['webhook', 'email', 'slack'],
|
|
241
|
+
throttle: 0, // No throttling for critical changes
|
|
242
|
+
priority: 'high'
|
|
243
|
+
},
|
|
244
|
+
'frequent-changes': {
|
|
245
|
+
condition: (url, history) => {
|
|
246
|
+
const recent = history.filter(h => Date.now() - h.timestamp < 3600000); // Last hour
|
|
247
|
+
return recent.length > 5;
|
|
248
|
+
},
|
|
249
|
+
actions: ['webhook'],
|
|
250
|
+
throttle: 1800000, // 30 minutes
|
|
251
|
+
priority: 'medium'
|
|
252
|
+
},
|
|
253
|
+
'structural-changes': {
|
|
254
|
+
condition: (changeResult) => changeResult.changeType === 'structural',
|
|
255
|
+
actions: ['webhook', 'email'],
|
|
256
|
+
throttle: 600000, // 10 minutes
|
|
257
|
+
priority: 'medium'
|
|
258
|
+
}
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
for (const [id, rule] of Object.entries(defaultAlertRules)) {
|
|
262
|
+
this.alertRules.set(id, rule);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Initialize historical analysis capabilities
|
|
268
|
+
*/
|
|
269
|
+
async initializeHistoricalAnalysis() {
|
|
270
|
+
// Initialize trend analysis patterns
|
|
271
|
+
this.trendAnalysis.set('patterns', {
|
|
272
|
+
dailyChangePatterns: new Map(),
|
|
273
|
+
weeklyTrends: new Map(),
|
|
274
|
+
contentVelocity: new Map(),
|
|
275
|
+
changeFrequency: new Map()
|
|
276
|
+
});
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Initialize semantic diff engine
|
|
281
|
+
*/
|
|
282
|
+
async initializeSemanticDiffEngine() {
|
|
283
|
+
// Initialize semantic analysis components
|
|
284
|
+
this.semanticDiffCache.set('initialized', true);
|
|
285
|
+
this.semanticDiffCache.set('algorithms', {
|
|
286
|
+
textSimilarity: this.calculateTextSimilarity.bind(this),
|
|
287
|
+
structuralSimilarity: this.calculateStructuralSimilarity.bind(this),
|
|
288
|
+
semanticSimilarity: this.calculateSemanticSimilarity.bind(this)
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Create scheduled monitoring with cron-like scheduling
|
|
294
|
+
* @param {string} url - URL to monitor
|
|
295
|
+
* @param {string} schedule - Cron expression
|
|
296
|
+
* @param {Object} options - Monitoring options
|
|
297
|
+
* @returns {Object} - Monitor configuration
|
|
298
|
+
*/
|
|
299
|
+
async createScheduledMonitor(url, schedule, options = {}) {
|
|
300
|
+
try {
|
|
301
|
+
// Validate cron expression
|
|
302
|
+
if (!cron.validate(schedule)) {
|
|
303
|
+
throw new Error(`Invalid cron expression: ${schedule}`);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const monitorId = `scheduled_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
307
|
+
|
|
308
|
+
const monitorConfig = {
|
|
309
|
+
id: monitorId,
|
|
310
|
+
url,
|
|
311
|
+
schedule,
|
|
312
|
+
options: {
|
|
313
|
+
granularity: 'section',
|
|
314
|
+
trackText: true,
|
|
315
|
+
trackStructure: true,
|
|
316
|
+
alertRules: {
|
|
317
|
+
threshold: 'moderate',
|
|
318
|
+
methods: ['webhook'],
|
|
319
|
+
throttle: 600000
|
|
320
|
+
},
|
|
321
|
+
...options
|
|
322
|
+
},
|
|
323
|
+
stats: {
|
|
324
|
+
created: Date.now(),
|
|
325
|
+
executions: 0,
|
|
326
|
+
lastExecution: null,
|
|
327
|
+
changesDetected: 0,
|
|
328
|
+
errors: 0,
|
|
329
|
+
averageExecutionTime: 0
|
|
330
|
+
},
|
|
331
|
+
status: 'active'
|
|
332
|
+
};
|
|
333
|
+
|
|
334
|
+
// Create cron job
|
|
335
|
+
const cronJob = cron.schedule(schedule, async () => {
|
|
336
|
+
await this.executeScheduledMonitor(monitorId);
|
|
337
|
+
}, {
|
|
338
|
+
scheduled: true,
|
|
339
|
+
timezone: 'UTC'
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
monitorConfig.cronJob = cronJob;
|
|
343
|
+
|
|
344
|
+
// Store monitor
|
|
345
|
+
this.scheduledMonitors.set(monitorId, monitorConfig);
|
|
346
|
+
this.monitoringDashboard.monitors.set(monitorId, {
|
|
347
|
+
url,
|
|
348
|
+
schedule,
|
|
349
|
+
status: 'active',
|
|
350
|
+
nextExecution: cronJob.nextDates().toString()
|
|
351
|
+
});
|
|
352
|
+
|
|
353
|
+
this.stats.scheduledMonitors++;
|
|
354
|
+
|
|
355
|
+
this.emit('scheduledMonitorCreated', {
|
|
356
|
+
monitorId,
|
|
357
|
+
url,
|
|
358
|
+
schedule,
|
|
359
|
+
nextExecution: cronJob.nextDates().toString()
|
|
360
|
+
});
|
|
361
|
+
|
|
362
|
+
return {
|
|
363
|
+
success: true,
|
|
364
|
+
monitorId,
|
|
365
|
+
url,
|
|
366
|
+
schedule,
|
|
367
|
+
nextExecution: cronJob.nextDates().toString(),
|
|
368
|
+
options: monitorConfig.options
|
|
369
|
+
};
|
|
370
|
+
|
|
371
|
+
} catch (error) {
|
|
372
|
+
this.emit('error', { operation: 'createScheduledMonitor', url, error: error.message });
|
|
373
|
+
throw new Error(`Failed to create scheduled monitor: ${error.message}`);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
/**
|
|
378
|
+
* Execute scheduled monitor check
|
|
379
|
+
* @param {string} monitorId - Monitor ID
|
|
380
|
+
*/
|
|
381
|
+
async executeScheduledMonitor(monitorId) {
|
|
382
|
+
const startTime = Date.now();
|
|
383
|
+
|
|
384
|
+
try {
|
|
385
|
+
const monitor = this.scheduledMonitors.get(monitorId);
|
|
386
|
+
if (!monitor || monitor.status !== 'active') {
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
monitor.stats.executions++;
|
|
391
|
+
monitor.stats.lastExecution = Date.now();
|
|
392
|
+
|
|
393
|
+
// Fetch current content
|
|
394
|
+
const response = await fetch(monitor.url, {
|
|
395
|
+
headers: {
|
|
396
|
+
'User-Agent': 'MCP-WebScraper-ChangeTracker/3.0-Enhanced',
|
|
397
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
|
398
|
+
},
|
|
399
|
+
timeout: 30000
|
|
400
|
+
});
|
|
401
|
+
|
|
402
|
+
if (!response.ok) {
|
|
403
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
const currentContent = await response.text();
|
|
407
|
+
|
|
408
|
+
// Perform enhanced comparison
|
|
409
|
+
const comparisonResult = await this.performEnhancedComparison(
|
|
410
|
+
monitor.url,
|
|
411
|
+
currentContent,
|
|
412
|
+
monitor.options
|
|
413
|
+
);
|
|
414
|
+
|
|
415
|
+
// Update execution time stats
|
|
416
|
+
const executionTime = Date.now() - startTime;
|
|
417
|
+
monitor.stats.averageExecutionTime =
|
|
418
|
+
(monitor.stats.averageExecutionTime * (monitor.stats.executions - 1) + executionTime) /
|
|
419
|
+
monitor.stats.executions;
|
|
420
|
+
|
|
421
|
+
// Process change result
|
|
422
|
+
if (comparisonResult.hasChanges) {
|
|
423
|
+
monitor.stats.changesDetected++;
|
|
424
|
+
|
|
425
|
+
// Update trend analysis
|
|
426
|
+
await this.updateTrendAnalysis(monitor.url, comparisonResult);
|
|
427
|
+
|
|
428
|
+
// Check alert rules and send notifications
|
|
429
|
+
await this.processAlertRules(monitor.url, comparisonResult, monitor.options.alertRules);
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
this.emit('scheduledMonitorExecuted', {
|
|
433
|
+
monitorId,
|
|
434
|
+
url: monitor.url,
|
|
435
|
+
hasChanges: comparisonResult.hasChanges,
|
|
436
|
+
significance: comparisonResult.significance,
|
|
437
|
+
executionTime
|
|
438
|
+
});
|
|
439
|
+
|
|
440
|
+
} catch (error) {
|
|
441
|
+
const monitor = this.scheduledMonitors.get(monitorId);
|
|
442
|
+
if (monitor) {
|
|
443
|
+
monitor.stats.errors++;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
this.emit('scheduledMonitorError', {
|
|
447
|
+
monitorId,
|
|
448
|
+
error: error.message,
|
|
449
|
+
timestamp: Date.now()
|
|
450
|
+
});
|
|
451
|
+
}\n }\n \n /**\n * Perform enhanced comparison with semantic analysis\n * @param {string} url - URL being compared\n * @param {string} currentContent - Current content\n * @param {Object} options - Comparison options\n * @returns {Object} - Enhanced comparison results\n */\n async performEnhancedComparison(url, currentContent, options = {}) {\n try {\n // Get standard comparison\n const standardComparison = await this.compareWithBaseline(url, currentContent, options);\n \n if (!standardComparison.hasChanges) {\n return standardComparison;\n }\n \n // Enhance with semantic analysis\n const semanticAnalysis = await this.performSemanticAnalysis(\n url, \n currentContent, \n standardComparison\n );\n \n // Enhance with visual regression detection\n const visualAnalysis = await this.performVisualRegressionAnalysis(\n url,\n currentContent,\n options\n );\n \n // Enhance with structured data analysis\n const structuredAnalysis = await this.performStructuredDataAnalysis(\n url,\n currentContent,\n standardComparison\n );\n \n // Calculate enhanced significance score\n const enhancedSignificance = await this.calculateEnhancedSignificance(\n standardComparison,\n semanticAnalysis,\n visualAnalysis,\n structuredAnalysis\n );\n \n return {\n ...standardComparison,\n enhancedFeatures: {\n semanticAnalysis,\n visualAnalysis,\n structuredAnalysis,\n enhancedSignificance\n },\n significance: enhancedSignificance,\n analysisType: 'enhanced'\n };\n \n } catch (error) {\n this.emit('error', { operation: 'performEnhancedComparison', url, error: error.message });\n // Fall back to standard comparison\n return await this.compareWithBaseline(url, currentContent, options);\n }\n }\n \n /**\n * Perform semantic analysis of changes\n * @param {string} url - URL\n * @param {string} currentContent - Current content\n * @param {Object} standardComparison - Standard comparison results\n * @returns {Object} - Semantic analysis results\n */\n async performSemanticAnalysis(url, currentContent, standardComparison) {\n const analysis = {\n textualSimilarity: 0,\n conceptualChanges: [],\n sentimentChanges: [],\n topicShifts: [],\n keywordChanges: [],\n confidenceScore: 0\n };\n \n try {\n // Get baseline content\n const baseline = this.getLatestBaseline(url);\n if (!baseline) {\n return analysis;\n }\n \n // Extract text content from both versions\n const $ = load(currentContent);\n const currentText = $.text().replace(/\\s+/g, ' ').trim();\n \n const $baseline = load(baseline.analysis.originalContent);\n const baselineText = $baseline.text().replace(/\\s+/g, ' ').trim();\n \n // Calculate textual similarity using advanced algorithms\n analysis.textualSimilarity = this.calculateTextSimilarity(baselineText, currentText);\n \n // Detect keyword changes\n analysis.keywordChanges = this.detectKeywordChanges(baselineText, currentText);\n \n // Simple topic shift detection\n analysis.topicShifts = this.detectTopicShifts(baselineText, currentText);\n \n // Calculate confidence score\n analysis.confidenceScore = this.calculateSemanticConfidence(analysis);\n \n this.stats.semanticAnalyses++;\n \n return analysis;\n \n } catch (error) {\n this.emit('error', { operation: 'performSemanticAnalysis', url, error: error.message });\n return analysis;\n }\n }\n \n /**\n * Perform visual regression analysis\n * @param {string} url - URL\n * @param {string} currentContent - Current content\n * @param {Object} options - Analysis options\n * @returns {Object} - Visual analysis results\n */\n async performVisualRegressionAnalysis(url, currentContent, options = {}) {\n const analysis = {\n layoutChanges: [],\n cssChanges: [],\n imageChanges: [],\n fontChanges: [],\n colorChanges: [],\n hasVisualChanges: false\n };\n \n try {\n const $ = load(currentContent);\n const baseline = this.getLatestBaseline(url);\n \n if (!baseline) {\n return analysis;\n }\n \n const $baseline = load(baseline.analysis.originalContent);\n \n // Detect layout changes\n analysis.layoutChanges = this.detectLayoutChanges($baseline, $);\n \n // Detect CSS changes\n analysis.cssChanges = this.detectCSSChanges($baseline, $);\n \n // Detect image changes\n analysis.imageChanges = this.detectImageChanges($baseline, $);\n \n // Determine if there are visual changes\n analysis.hasVisualChanges = \n analysis.layoutChanges.length > 0 ||\n analysis.cssChanges.length > 0 ||\n analysis.imageChanges.length > 0;\n \n if (analysis.hasVisualChanges) {\n this.stats.visualRegression++;\n }\n \n return analysis;\n \n } catch (error) {\n this.emit('error', { operation: 'performVisualRegressionAnalysis', url, error: error.message });\n return analysis;\n }\n }\n \n /**\n * Perform structured data analysis\n * @param {string} url - URL\n * @param {string} currentContent - Current content\n * @param {Object} standardComparison - Standard comparison results\n * @returns {Object} - Structured data analysis\n */\n async performStructuredDataAnalysis(url, currentContent, standardComparison) {\n const analysis = {\n schemaChanges: [],\n dataFieldChanges: [],\n validationChanges: [],\n metadataChanges: [],\n hasStructuredChanges: false\n };\n \n try {\n const $ = load(currentContent);\n const baseline = this.getLatestBaseline(url);\n \n if (!baseline) {\n return analysis;\n }\n \n // Extract structured data (JSON-LD, microdata, etc.)\n const currentStructuredData = this.extractStructuredData($);\n const baselineStructuredData = this.extractStructuredData(load(baseline.analysis.originalContent));\n \n // Compare structured data\n analysis.schemaChanges = this.compareStructuredData(baselineStructuredData, currentStructuredData);\n \n // Detect metadata changes\n analysis.metadataChanges = this.compareMetadata(\n baseline.analysis.metadata,\n standardComparison.details.current?.metadata || {}\n );\n \n analysis.hasStructuredChanges = \n analysis.schemaChanges.length > 0 ||\n analysis.metadataChanges.length > 0;\n \n return analysis;\n \n } catch (error) {\n this.emit('error', { operation: 'performStructuredDataAnalysis', url, error: error.message });\n return analysis;\n }\n }\n \n /**\n * Update trend analysis with new change data\n * @param {string} url - URL\n * @param {Object} changeResult - Change analysis results\n */\n async updateTrendAnalysis(url, changeResult) {\n try {\n const patterns = this.trendAnalysis.get('patterns');\n const now = new Date();\n const dayKey = now.toISOString().slice(0, 10); // YYYY-MM-DD\n const hourKey = now.toISOString().slice(0, 13); // YYYY-MM-DDTHH\n \n // Update daily patterns\n if (!patterns.dailyChangePatterns.has(url)) {\n patterns.dailyChangePatterns.set(url, new Map());\n }\n \n const urlDailyPatterns = patterns.dailyChangePatterns.get(url);\n if (!urlDailyPatterns.has(dayKey)) {\n urlDailyPatterns.set(dayKey, {\n changes: 0,\n significance: [],\n types: []\n });\n }\n \n const dayData = urlDailyPatterns.get(dayKey);\n dayData.changes++;\n dayData.significance.push(changeResult.significance);\n dayData.types.push(changeResult.changeType);\n \n // Update change frequency\n if (!patterns.changeFrequency.has(url)) {\n patterns.changeFrequency.set(url, []);\n }\n \n patterns.changeFrequency.get(url).push({\n timestamp: Date.now(),\n significance: changeResult.significance,\n type: changeResult.changeType\n });\n \n // Keep only last 1000 entries per URL\n const frequency = patterns.changeFrequency.get(url);\n if (frequency.length > 1000) {\n frequency.splice(0, frequency.length - 1000);\n }\n \n // Detect patterns\n await this.detectChangePatterns(url, patterns);\n \n } catch (error) {\n this.emit('error', { operation: 'updateTrendAnalysis', url, error: error.message });\n }\n }\n \n /**\n * Process alert rules and send notifications\n * @param {string} url - URL\n * @param {Object} changeResult - Change results\n * @param {Object} alertRules - Alert configuration\n */\n async processAlertRules(url, changeResult, alertRules = {}) {\n try {\n const alertsToSend = [];\n \n // Check each alert rule\n for (const [ruleId, rule] of this.alertRules.entries()) {\n let shouldTrigger = false;\n \n if (typeof rule.condition === 'function') {\n try {\n const history = this.getChangeHistory(url, 100);\n shouldTrigger = rule.condition(changeResult, history);\n } catch (error) {\n this.emit('error', { \n operation: 'evaluateAlertRule', \n ruleId, \n url, \n error: error.message \n });\n continue;\n }\n }\n \n if (shouldTrigger) {\n // Check throttling\n const throttleKey = `${url}_${ruleId}`;\n const lastAlert = this.alertThrottling.get(throttleKey);\n \n if (lastAlert && Date.now() - lastAlert < rule.throttle) {\n this.stats.alertsThrottled++;\n continue;\n }\n \n alertsToSend.push({\n ruleId,\n rule,\n url,\n changeResult,\n timestamp: Date.now()\n });\n \n // Update throttling\n this.alertThrottling.set(throttleKey, Date.now());\n }\n }\n \n // Send alerts\n for (const alert of alertsToSend) {\n await this.sendAlert(alert);\n }\n \n } catch (error) {\n this.emit('error', { operation: 'processAlertRules', url, error: error.message });\n }\n }\n \n /**\n * Send alert notification\n * @param {Object} alert - Alert configuration\n */\n async sendAlert(alert) {\n const startTime = Date.now();\n \n try {\n const alertData = {\n id: `alert_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,\n ruleId: alert.ruleId,\n url: alert.url,\n timestamp: alert.timestamp,\n priority: alert.rule.priority,\n changeResult: {\n significance: alert.changeResult.significance,\n changeType: alert.changeResult.changeType,\n summary: alert.changeResult.summary\n }\n };\n \n // Send to each configured method\n const promises = alert.rule.actions.map(async (action) => {\n try {\n await this.sendNotificationByMethod(action, alertData);\n this.emit('alertSent', { action, alertId: alertData.id, url: alert.url });\n } catch (error) {\n this.emit('alertError', { \n action, \n alertId: alertData.id, \n url: alert.url, \n error: error.message \n });\n }\n });\n \n await Promise.allSettled(promises);\n \n // Store alert in history\n if (!this.alertHistory.has(alert.url)) {\n this.alertHistory.set(alert.url, []);\n }\n \n this.alertHistory.get(alert.url).unshift(alertData);\n \n // Keep only last 100 alerts per URL\n const history = this.alertHistory.get(alert.url);\n if (history.length > 100) {\n history.splice(100);\n }\n \n // Update stats\n this.stats.alertsSent++;\n const responseTime = Date.now() - startTime;\n this.stats.averageAlertResponseTime = \n (this.stats.averageAlertResponseTime * (this.stats.alertsSent - 1) + responseTime) / \n this.stats.alertsSent;\n \n this.emit('alertProcessed', {\n alertId: alertData.id,\n url: alert.url,\n responseTime\n });\n \n } catch (error) {\n this.emit('error', { operation: 'sendAlert', url: alert.url, error: error.message });\n }\n }\n \n /**\n * Send notification by specific method\n * @param {string} method - Notification method\n * @param {Object} alertData - Alert data\n */\n async sendNotificationByMethod(method, alertData) {\n switch (method) {\n case 'webhook':\n await this.sendWebhookAlert(alertData);\n break;\n case 'email':\n await this.sendEmailAlert(alertData);\n break;\n case 'slack':\n await this.sendSlackAlert(alertData);\n break;\n default:\n throw new Error(`Unknown notification method: ${method}`);\n }\n }\n \n /**\n * Generate trend analysis report\n * @param {string} url - URL (optional, for specific URL analysis)\n * @returns {Object} - Trend analysis report\n */\n async generateTrendAnalysisReport(url = null) {\n try {\n const report = {\n timestamp: Date.now(),\n scope: url ? 'url-specific' : 'global',\n url,\n patterns: {},\n insights: [],\n recommendations: []\n };\n \n const patterns = this.trendAnalysis.get('patterns');\n \n if (url) {\n // URL-specific analysis\n report.patterns = await this.analyzeUrlPatterns(url, patterns);\n } else {\n // Global analysis\n report.patterns = await this.analyzeGlobalPatterns(patterns);\n }\n \n // Generate insights\n report.insights = this.generateTrendInsights(report.patterns);\n \n // Generate recommendations\n report.recommendations = this.generateTrendRecommendations(report.patterns, report.insights);\n \n return report;\n \n } catch (error) {\n this.emit('error', { operation: 'generateTrendAnalysisReport', url, error: error.message });\n throw error;\n }\n }\n \n /**\n * Export historical data\n * @param {Object} options - Export options\n * @returns {Object} - Exported data\n */\n async exportHistoricalData(options = {}) {\n const {\n format = 'json',\n url = null,\n startTime = null,\n endTime = null,\n includeContent = false,\n includeSnapshots = false\n } = options;\n \n try {\n const exportData = {\n metadata: {\n exportTime: Date.now(),\n format,\n scope: url ? 'url-specific' : 'global',\n url,\n timeRange: { startTime, endTime },\n options\n },\n changeHistory: {},\n snapshots: {},\n alertHistory: {},\n trendAnalysis: {},\n statistics: this.getEnhancedStats()\n };\n \n // Export change history\n const urls = url ? [url] : Array.from(this.changeHistory.keys());\n \n for (const targetUrl of urls) {\n let history = this.getChangeHistory(targetUrl, 10000);\n \n // Apply time filters\n if (startTime || endTime) {\n history = history.filter(entry => {\n if (startTime && entry.timestamp < startTime) return false;\n if (endTime && entry.timestamp > endTime) return false;\n return true;\n });\n }\n \n // Remove content if not requested\n if (!includeContent) {\n history = history.map(entry => {\n const { details, ...rest } = entry;\n return {\n ...rest,\n details: details ? {\n similarity: details.similarity,\n significance: details.significance\n } : undefined\n };\n });\n }\n \n exportData.changeHistory[targetUrl] = history;\n \n // Export alert history\n if (this.alertHistory.has(targetUrl)) {\n exportData.alertHistory[targetUrl] = this.alertHistory.get(targetUrl);\n }\n }\n \n // Export trend analysis\n const patterns = this.trendAnalysis.get('patterns');\n if (patterns) {\n exportData.trendAnalysis = {\n dailyPatterns: Object.fromEntries(patterns.dailyChangePatterns),\n changeFrequency: Object.fromEntries(patterns.changeFrequency)\n };\n }\n \n // Format output\n if (format === 'csv') {\n return this.convertToCSV(exportData);\n }\n \n return exportData;\n \n } catch (error) {\n this.emit('error', { operation: 'exportHistoricalData', error: error.message });\n throw error;\n }\n }\n \n /**\n * Get monitoring dashboard status\n * @returns {Object} - Dashboard data\n */\n getMonitoringDashboard() {\n return {\n status: this.monitoringDashboard.status,\n monitors: Array.from(this.monitoringDashboard.monitors.entries()).map(([id, config]) => ({\n id,\n ...config\n })),\n recentAlerts: this.monitoringDashboard.alerts.slice(-10),\n trends: this.monitoringDashboard.trends,\n statistics: this.getEnhancedStats(),\n timestamp: Date.now()\n };\n }\n \n /**\n * Get enhanced statistics\n * @returns {Object} - Enhanced statistics\n */\n getEnhancedStats() {\n return {\n ...this.stats,\n activeScheduledMonitors: this.scheduledMonitors.size,\n alertRules: this.alertRules.size,\n monitoringTemplates: this.monitoringTemplates.size,\n throttledAlerts: this.alertThrottling.size,\n trendPatterns: this.trendAnalysis.has('patterns') ? \n this.trendAnalysis.get('patterns').dailyChangePatterns.size : 0\n };\n }\n \n /**\n * Create baseline snapshot for change tracking
|
|
452
|
+
* @param {string} url - URL to track
|
|
453
|
+
* @param {string} content - Content to establish as baseline
|
|
454
|
+
* @param {Object} options - Tracking options
|
|
455
|
+
* @returns {Object} - Baseline snapshot information
|
|
456
|
+
*/
|
|
457
|
+
async createBaseline(url, content, options = {}) {
|
|
458
|
+
const startTime = Date.now();
|
|
459
|
+
|
|
460
|
+
try {
|
|
461
|
+
const validated = ChangeTrackingSchema.parse({ url, content, options });
|
|
462
|
+
const { granularity, trackText, trackStructure } = validated.options;
|
|
463
|
+
|
|
464
|
+
// Generate hierarchical content hashes
|
|
465
|
+
const contentAnalysis = await this.analyzeContent(content, validated.options);
|
|
466
|
+
|
|
467
|
+
const baseline = {
|
|
468
|
+
url,
|
|
469
|
+
timestamp: Date.now(),
|
|
470
|
+
contentLength: content.length,
|
|
471
|
+
granularity,
|
|
472
|
+
analysis: contentAnalysis,
|
|
473
|
+
options: validated.options,
|
|
474
|
+
version: 1
|
|
475
|
+
};
|
|
476
|
+
|
|
477
|
+
// Store baseline
|
|
478
|
+
this.snapshots.set(url, [baseline]);
|
|
479
|
+
this.contentHashes.set(url, contentAnalysis.hashes);
|
|
480
|
+
this.changeHistory.set(url, []);
|
|
481
|
+
this.lastProcessedTimestamps.set(url, Date.now());
|
|
482
|
+
|
|
483
|
+
this.stats.pagesTracked++;
|
|
484
|
+
this.stats.processingTime += Date.now() - startTime;
|
|
485
|
+
|
|
486
|
+
this.emit('baselineCreated', {
|
|
487
|
+
url,
|
|
488
|
+
baseline,
|
|
489
|
+
processingTime: Date.now() - startTime
|
|
490
|
+
});
|
|
491
|
+
|
|
492
|
+
return {
|
|
493
|
+
success: true,
|
|
494
|
+
url,
|
|
495
|
+
version: 1,
|
|
496
|
+
contentHash: contentAnalysis.hashes.page,
|
|
497
|
+
sections: Object.keys(contentAnalysis.hashes.sections).length,
|
|
498
|
+
elements: Object.keys(contentAnalysis.hashes.elements).length,
|
|
499
|
+
createdAt: baseline.timestamp
|
|
500
|
+
};
|
|
501
|
+
|
|
502
|
+
} catch (error) {
|
|
503
|
+
this.emit('error', { operation: 'createBaseline', url, error: error.message });
|
|
504
|
+
throw new Error(`Failed to create baseline for ${url}: ${error.message}`);
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* Compare current content against baseline
|
|
510
|
+
* @param {string} url - URL to compare
|
|
511
|
+
* @param {string} currentContent - Current content
|
|
512
|
+
* @param {Object} options - Comparison options
|
|
513
|
+
* @returns {Object} - Change analysis results
|
|
514
|
+
*/
|
|
515
|
+
async compareWithBaseline(url, currentContent, options = {}) {
|
|
516
|
+
const startTime = Date.now();
|
|
517
|
+
|
|
518
|
+
try {
|
|
519
|
+
if (!this.snapshots.has(url)) {
|
|
520
|
+
throw new Error(`No baseline found for URL: ${url}`);
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
const snapshots = this.snapshots.get(url);
|
|
524
|
+
const baseline = snapshots[snapshots.length - 1]; // Get latest baseline
|
|
525
|
+
|
|
526
|
+
const validated = ChangeComparisonSchema.parse({
|
|
527
|
+
baselineUrl: url,
|
|
528
|
+
currentUrl: url,
|
|
529
|
+
baselineContent: baseline.analysis.originalContent || '',
|
|
530
|
+
currentContent,
|
|
531
|
+
options
|
|
532
|
+
});
|
|
533
|
+
|
|
534
|
+
// Analyze current content
|
|
535
|
+
const currentAnalysis = await this.analyzeContent(currentContent, baseline.options);
|
|
536
|
+
|
|
537
|
+
// Perform comprehensive change detection
|
|
538
|
+
const changeAnalysis = await this.detectChanges(
|
|
539
|
+
baseline.analysis,
|
|
540
|
+
currentAnalysis,
|
|
541
|
+
baseline.options
|
|
542
|
+
);
|
|
543
|
+
|
|
544
|
+
// Calculate change significance
|
|
545
|
+
const significance = await this.calculateChangeSignificance(changeAnalysis, baseline.options);
|
|
546
|
+
|
|
547
|
+
// Create change record
|
|
548
|
+
const changeRecord = {
|
|
549
|
+
url,
|
|
550
|
+
timestamp: Date.now(),
|
|
551
|
+
baselineVersion: baseline.version,
|
|
552
|
+
changeType: this.classifyChangeType(changeAnalysis),
|
|
553
|
+
significance,
|
|
554
|
+
details: changeAnalysis,
|
|
555
|
+
metrics: {
|
|
556
|
+
contentSimilarity: changeAnalysis.similarity,
|
|
557
|
+
structuralSimilarity: changeAnalysis.structuralSimilarity,
|
|
558
|
+
addedElements: changeAnalysis.addedElements?.length || 0,
|
|
559
|
+
removedElements: changeAnalysis.removedElements?.length || 0,
|
|
560
|
+
modifiedElements: changeAnalysis.modifiedElements?.length || 0
|
|
561
|
+
},
|
|
562
|
+
processingTime: 0
|
|
563
|
+
};
|
|
564
|
+
|
|
565
|
+
changeRecord.processingTime = Date.now() - startTime;
|
|
566
|
+
|
|
567
|
+
// Store change record
|
|
568
|
+
const changeHistory = this.changeHistory.get(url);
|
|
569
|
+
changeHistory.push(changeRecord);
|
|
570
|
+
|
|
571
|
+
// Update statistics
|
|
572
|
+
this.updateStats(changeRecord);
|
|
573
|
+
|
|
574
|
+
// Update content hashes if significant change
|
|
575
|
+
if (significance !== 'none') {
|
|
576
|
+
this.contentHashes.set(url, currentAnalysis.hashes);
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
this.emit('changeDetected', changeRecord);
|
|
580
|
+
|
|
581
|
+
return {
|
|
582
|
+
hasChanges: significance !== 'none',
|
|
583
|
+
significance,
|
|
584
|
+
changeType: changeRecord.changeType,
|
|
585
|
+
summary: this.generateChangeSummary(changeAnalysis),
|
|
586
|
+
details: changeAnalysis,
|
|
587
|
+
metrics: changeRecord.metrics,
|
|
588
|
+
recommendations: this.generateChangeRecommendations(changeRecord)
|
|
589
|
+
};
|
|
590
|
+
|
|
591
|
+
} catch (error) {
|
|
592
|
+
this.emit('error', { operation: 'compareWithBaseline', url, error: error.message });
|
|
593
|
+
throw new Error(`Failed to compare content for ${url}: ${error.message}`);
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
/**
|
|
598
|
+
* Analyze content structure and create hierarchical hashes
|
|
599
|
+
* @param {string} content - Content to analyze
|
|
600
|
+
* @param {Object} options - Analysis options
|
|
601
|
+
* @returns {Object} - Content analysis results
|
|
602
|
+
*/
|
|
603
|
+
async analyzeContent(content, options = {}) {
|
|
604
|
+
const analysis = {
|
|
605
|
+
originalContent: content,
|
|
606
|
+
hashes: {
|
|
607
|
+
page: this.hashContent(content),
|
|
608
|
+
sections: {},
|
|
609
|
+
elements: {},
|
|
610
|
+
text: {}
|
|
611
|
+
},
|
|
612
|
+
structure: {},
|
|
613
|
+
metadata: {},
|
|
614
|
+
statistics: {}
|
|
615
|
+
};
|
|
616
|
+
|
|
617
|
+
try {
|
|
618
|
+
// Parse HTML if available
|
|
619
|
+
const $ = load(content);
|
|
620
|
+
|
|
621
|
+
// Remove excluded elements
|
|
622
|
+
options.excludeSelectors?.forEach(selector => {
|
|
623
|
+
$(selector).remove();
|
|
624
|
+
});
|
|
625
|
+
|
|
626
|
+
// Analyze at different granularities
|
|
627
|
+
switch (options.granularity) {
|
|
628
|
+
case 'element':
|
|
629
|
+
await this.analyzeElementLevel($, analysis, options);
|
|
630
|
+
break;
|
|
631
|
+
case 'section':
|
|
632
|
+
await this.analyzeSectionLevel($, analysis, options);
|
|
633
|
+
break;
|
|
634
|
+
case 'text':
|
|
635
|
+
await this.analyzeTextLevel($, analysis, options);
|
|
636
|
+
break;
|
|
637
|
+
default:
|
|
638
|
+
await this.analyzePageLevel($, analysis, options);
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
// Extract structural information
|
|
642
|
+
if (options.trackStructure) {
|
|
643
|
+
analysis.structure = this.extractStructure($, options);
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// Extract metadata
|
|
647
|
+
analysis.metadata = this.extractMetadata($, options);
|
|
648
|
+
|
|
649
|
+
// Calculate statistics
|
|
650
|
+
analysis.statistics = this.calculateContentStatistics(content, $);
|
|
651
|
+
|
|
652
|
+
} catch (error) {
|
|
653
|
+
// Fallback to plain text analysis
|
|
654
|
+
analysis.hashes.text.plain = this.hashContent(content);
|
|
655
|
+
analysis.statistics = {
|
|
656
|
+
contentLength: content.length,
|
|
657
|
+
wordCount: content.split(/\s+/).length,
|
|
658
|
+
error: error.message
|
|
659
|
+
};
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
return analysis;
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
/**
|
|
666
|
+
* Detect changes between two content analyses
|
|
667
|
+
* @param {Object} baseline - Baseline content analysis
|
|
668
|
+
* @param {Object} current - Current content analysis
|
|
669
|
+
* @param {Object} options - Detection options
|
|
670
|
+
* @returns {Object} - Change detection results
|
|
671
|
+
*/
|
|
672
|
+
async detectChanges(baseline, current, options = {}) {
|
|
673
|
+
const changes = {
|
|
674
|
+
similarity: 0,
|
|
675
|
+
structuralSimilarity: 0,
|
|
676
|
+
addedElements: [],
|
|
677
|
+
removedElements: [],
|
|
678
|
+
modifiedElements: [],
|
|
679
|
+
textChanges: [],
|
|
680
|
+
structuralChanges: [],
|
|
681
|
+
attributeChanges: [],
|
|
682
|
+
imageChanges: [],
|
|
683
|
+
linkChanges: []
|
|
684
|
+
};
|
|
685
|
+
|
|
686
|
+
// Calculate overall content similarity
|
|
687
|
+
changes.similarity = this.calculateSimilarity(
|
|
688
|
+
baseline.hashes.page,
|
|
689
|
+
current.hashes.page
|
|
690
|
+
);
|
|
691
|
+
|
|
692
|
+
// Detect structural changes
|
|
693
|
+
if (options.trackStructure) {
|
|
694
|
+
changes.structuralChanges = await this.detectStructuralChanges(
|
|
695
|
+
baseline.structure,
|
|
696
|
+
current.structure
|
|
697
|
+
);
|
|
698
|
+
|
|
699
|
+
changes.structuralSimilarity = this.calculateStructuralSimilarity(
|
|
700
|
+
baseline.structure,
|
|
701
|
+
current.structure
|
|
702
|
+
);
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
// Detect section-level changes
|
|
706
|
+
const sectionChanges = this.detectHashChanges(
|
|
707
|
+
baseline.hashes.sections,
|
|
708
|
+
current.hashes.sections
|
|
709
|
+
);
|
|
710
|
+
|
|
711
|
+
changes.addedElements.push(...sectionChanges.added);
|
|
712
|
+
changes.removedElements.push(...sectionChanges.removed);
|
|
713
|
+
changes.modifiedElements.push(...sectionChanges.modified);
|
|
714
|
+
|
|
715
|
+
// Detect element-level changes
|
|
716
|
+
if (baseline.hashes.elements && current.hashes.elements) {
|
|
717
|
+
const elementChanges = this.detectHashChanges(
|
|
718
|
+
baseline.hashes.elements,
|
|
719
|
+
current.hashes.elements
|
|
720
|
+
);
|
|
721
|
+
|
|
722
|
+
changes.addedElements.push(...elementChanges.added);
|
|
723
|
+
changes.removedElements.push(...elementChanges.removed);
|
|
724
|
+
changes.modifiedElements.push(...elementChanges.modified);
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
// Detect text changes
|
|
728
|
+
if (options.trackText) {
|
|
729
|
+
changes.textChanges = await this.detectTextChanges(
|
|
730
|
+
baseline.originalContent,
|
|
731
|
+
current.originalContent,
|
|
732
|
+
options
|
|
733
|
+
);
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
// Detect link changes
|
|
737
|
+
if (options.trackLinks) {
|
|
738
|
+
changes.linkChanges = this.detectLinkChanges(
|
|
739
|
+
baseline.metadata.links || [],
|
|
740
|
+
current.metadata.links || []
|
|
741
|
+
);
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
// Detect image changes
|
|
745
|
+
if (options.trackImages) {
|
|
746
|
+
changes.imageChanges = this.detectImageChanges(
|
|
747
|
+
baseline.metadata.images || [],
|
|
748
|
+
current.metadata.images || []
|
|
749
|
+
);
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
return changes;
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
/**
|
|
756
|
+
* Calculate change significance score
|
|
757
|
+
* @param {Object} changeAnalysis - Change analysis results
|
|
758
|
+
* @param {Object} options - Scoring options
|
|
759
|
+
* @returns {string} - Significance level
|
|
760
|
+
*/
|
|
761
|
+
async calculateChangeSignificance(changeAnalysis, options = {}) {
|
|
762
|
+
const thresholds = options.significanceThresholds || {
|
|
763
|
+
minor: 0.1,
|
|
764
|
+
moderate: 0.3,
|
|
765
|
+
major: 0.7
|
|
766
|
+
};
|
|
767
|
+
|
|
768
|
+
let significanceScore = 0;
|
|
769
|
+
const weights = {
|
|
770
|
+
similarity: 0.3,
|
|
771
|
+
structural: 0.2,
|
|
772
|
+
additions: 0.15,
|
|
773
|
+
removals: 0.15,
|
|
774
|
+
modifications: 0.1,
|
|
775
|
+
textChanges: 0.1
|
|
776
|
+
};
|
|
777
|
+
|
|
778
|
+
// Content similarity impact (inverted - less similarity = more significant)
|
|
779
|
+
significanceScore += (1 - changeAnalysis.similarity) * weights.similarity;
|
|
780
|
+
|
|
781
|
+
// Structural changes impact
|
|
782
|
+
if (changeAnalysis.structuralChanges.length > 0) {
|
|
783
|
+
significanceScore += Math.min(changeAnalysis.structuralChanges.length * 0.1, 1) * weights.structural;
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
// Element changes impact
|
|
787
|
+
const totalElements = changeAnalysis.addedElements.length +
|
|
788
|
+
changeAnalysis.removedElements.length +
|
|
789
|
+
changeAnalysis.modifiedElements.length;
|
|
790
|
+
|
|
791
|
+
significanceScore += Math.min(totalElements * 0.05, 1) *
|
|
792
|
+
(weights.additions + weights.removals + weights.modifications);
|
|
793
|
+
|
|
794
|
+
// Text changes impact
|
|
795
|
+
if (changeAnalysis.textChanges.length > 0) {
|
|
796
|
+
const textChangeRatio = changeAnalysis.textChanges.reduce(
|
|
797
|
+
(sum, change) => sum + (change.added?.length || 0) + (change.removed?.length || 0),
|
|
798
|
+
0
|
|
799
|
+
) / 1000; // Normalize by character count
|
|
800
|
+
|
|
801
|
+
significanceScore += Math.min(textChangeRatio, 1) * weights.textChanges;
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
// Determine significance level
|
|
805
|
+
if (significanceScore < thresholds.minor) {
|
|
806
|
+
return 'none';
|
|
807
|
+
} else if (significanceScore < thresholds.moderate) {
|
|
808
|
+
return 'minor';
|
|
809
|
+
} else if (significanceScore < thresholds.major) {
|
|
810
|
+
return 'moderate';
|
|
811
|
+
} else if (significanceScore < 0.9) {
|
|
812
|
+
return 'major';
|
|
813
|
+
} else {
|
|
814
|
+
return 'critical';
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
// Content Analysis Methods
|
|
819
|
+
|
|
820
|
+
async analyzePageLevel($, analysis, options) {
|
|
821
|
+
const pageContent = $.html();
|
|
822
|
+
analysis.hashes.page = this.hashContent(pageContent);
|
|
823
|
+
|
|
824
|
+
if (options.trackText) {
|
|
825
|
+
const textContent = $.text();
|
|
826
|
+
analysis.hashes.text.page = this.hashContent(textContent);
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
async analyzeSectionLevel($, analysis, options) {
|
|
831
|
+
const sections = ['header', 'nav', 'main', 'article', 'section', 'aside', 'footer'];
|
|
832
|
+
|
|
833
|
+
sections.forEach(tag => {
|
|
834
|
+
$(tag).each((index, element) => {
|
|
835
|
+
const sectionKey = `${tag}_${index}`;
|
|
836
|
+
const sectionContent = $(element).html() || '';
|
|
837
|
+
analysis.hashes.sections[sectionKey] = this.hashContent(sectionContent);
|
|
838
|
+
|
|
839
|
+
if (options.trackText) {
|
|
840
|
+
const textContent = $(element).text() || '';
|
|
841
|
+
analysis.hashes.text[sectionKey] = this.hashContent(textContent);
|
|
842
|
+
}
|
|
843
|
+
});
|
|
844
|
+
});
|
|
845
|
+
|
|
846
|
+
// Handle custom selectors
|
|
847
|
+
if (options.customSelectors) {
|
|
848
|
+
options.customSelectors.forEach((selector, index) => {
|
|
849
|
+
$(selector).each((elemIndex, element) => {
|
|
850
|
+
const key = `custom_${index}_${elemIndex}`;
|
|
851
|
+
const content = $(element).html() || '';
|
|
852
|
+
analysis.hashes.sections[key] = this.hashContent(content);
|
|
853
|
+
});
|
|
854
|
+
});
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
async analyzeElementLevel($, analysis, options) {
|
|
859
|
+
// Analyze common important elements
|
|
860
|
+
const importantElements = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'span', 'a'];
|
|
861
|
+
|
|
862
|
+
importantElements.forEach(tag => {
|
|
863
|
+
$(tag).each((index, element) => {
|
|
864
|
+
const elementKey = `${tag}_${index}`;
|
|
865
|
+
const elementContent = $(element).html() || '';
|
|
866
|
+
analysis.hashes.elements[elementKey] = this.hashContent(elementContent);
|
|
867
|
+
|
|
868
|
+
if (options.trackAttributes) {
|
|
869
|
+
const attributes = element.attribs || {};
|
|
870
|
+
analysis.hashes.elements[`${elementKey}_attr`] = this.hashContent(JSON.stringify(attributes));
|
|
871
|
+
}
|
|
872
|
+
});
|
|
873
|
+
});
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
async analyzeTextLevel($, analysis, options) {
|
|
877
|
+
const textNodes = [];
|
|
878
|
+
|
|
879
|
+
// Extract all text nodes
|
|
880
|
+
$('*').contents().filter(function() {
|
|
881
|
+
return this.type === 'text' && $(this).text().trim();
|
|
882
|
+
}).each((index, node) => {
|
|
883
|
+
const text = $(node).text().trim();
|
|
884
|
+
if (text) {
|
|
885
|
+
textNodes.push(text);
|
|
886
|
+
analysis.hashes.text[`text_${index}`] = this.hashContent(text);
|
|
887
|
+
}
|
|
888
|
+
});
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
extractStructure($, options) {
|
|
892
|
+
const structure = {
|
|
893
|
+
elements: [],
|
|
894
|
+
hierarchy: {},
|
|
895
|
+
semanticStructure: {}
|
|
896
|
+
};
|
|
897
|
+
|
|
898
|
+
// Extract DOM hierarchy
|
|
899
|
+
$('*').each((index, element) => {
|
|
900
|
+
const tagName = element.name;
|
|
901
|
+
const depth = $(element).parents().length;
|
|
902
|
+
const hasChildren = $(element).children().length > 0;
|
|
903
|
+
|
|
904
|
+
structure.elements.push({
|
|
905
|
+
tag: tagName,
|
|
906
|
+
index,
|
|
907
|
+
depth,
|
|
908
|
+
hasChildren,
|
|
909
|
+
classes: element.attribs?.class?.split(' ') || [],
|
|
910
|
+
id: element.attribs?.id
|
|
911
|
+
});
|
|
912
|
+
});
|
|
913
|
+
|
|
914
|
+
// Extract semantic structure
|
|
915
|
+
const semanticTags = ['header', 'nav', 'main', 'article', 'section', 'aside', 'footer'];
|
|
916
|
+
semanticTags.forEach(tag => {
|
|
917
|
+
structure.semanticStructure[tag] = $(tag).length;
|
|
918
|
+
});
|
|
919
|
+
|
|
920
|
+
return structure;
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
extractMetadata($, options) {
|
|
924
|
+
const metadata = {
|
|
925
|
+
title: $('title').text() || '',
|
|
926
|
+
headings: [],
|
|
927
|
+
links: [],
|
|
928
|
+
images: [],
|
|
929
|
+
scripts: [],
|
|
930
|
+
forms: []
|
|
931
|
+
};
|
|
932
|
+
|
|
933
|
+
// Extract headings
|
|
934
|
+
$('h1, h2, h3, h4, h5, h6').each((index, element) => {
|
|
935
|
+
metadata.headings.push({
|
|
936
|
+
tag: element.name,
|
|
937
|
+
text: $(element).text().trim(),
|
|
938
|
+
level: parseInt(element.name.replace('h', ''))
|
|
939
|
+
});
|
|
940
|
+
});
|
|
941
|
+
|
|
942
|
+
// Extract links
|
|
943
|
+
if (options.trackLinks) {
|
|
944
|
+
$('a[href]').each((index, element) => {
|
|
945
|
+
metadata.links.push({
|
|
946
|
+
href: $(element).attr('href'),
|
|
947
|
+
text: $(element).text().trim(),
|
|
948
|
+
external: this.isExternalLink($(element).attr('href'))
|
|
949
|
+
});
|
|
950
|
+
});
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
// Extract images
|
|
954
|
+
if (options.trackImages) {
|
|
955
|
+
$('img[src]').each((index, element) => {
|
|
956
|
+
metadata.images.push({
|
|
957
|
+
src: $(element).attr('src'),
|
|
958
|
+
alt: $(element).attr('alt') || '',
|
|
959
|
+
title: $(element).attr('title') || ''
|
|
960
|
+
});
|
|
961
|
+
});
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
return metadata;
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
calculateContentStatistics(content, $) {
|
|
968
|
+
return {
|
|
969
|
+
contentLength: content.length,
|
|
970
|
+
htmlLength: $.html().length,
|
|
971
|
+
textLength: $.text().length,
|
|
972
|
+
wordCount: $.text().split(/\s+/).filter(word => word.length > 0).length,
|
|
973
|
+
elementCount: $('*').length,
|
|
974
|
+
linkCount: $('a').length,
|
|
975
|
+
imageCount: $('img').length,
|
|
976
|
+
scriptCount: $('script').length
|
|
977
|
+
};
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
// Change Detection Methods
|
|
981
|
+
|
|
982
|
+
detectHashChanges(baselineHashes, currentHashes) {
|
|
983
|
+
const changes = {
|
|
984
|
+
added: [],
|
|
985
|
+
removed: [],
|
|
986
|
+
modified: []
|
|
987
|
+
};
|
|
988
|
+
|
|
989
|
+
const baselineKeys = new Set(Object.keys(baselineHashes));
|
|
990
|
+
const currentKeys = new Set(Object.keys(currentHashes));
|
|
991
|
+
|
|
992
|
+
// Find added elements
|
|
993
|
+
for (const key of currentKeys) {
|
|
994
|
+
if (!baselineKeys.has(key)) {
|
|
995
|
+
changes.added.push(key);
|
|
996
|
+
}
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
// Find removed elements
|
|
1000
|
+
for (const key of baselineKeys) {
|
|
1001
|
+
if (!currentKeys.has(key)) {
|
|
1002
|
+
changes.removed.push(key);
|
|
1003
|
+
}
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
// Find modified elements
|
|
1007
|
+
for (const key of baselineKeys) {
|
|
1008
|
+
if (currentKeys.has(key) && baselineHashes[key] !== currentHashes[key]) {
|
|
1009
|
+
changes.modified.push({
|
|
1010
|
+
key,
|
|
1011
|
+
oldHash: baselineHashes[key],
|
|
1012
|
+
newHash: currentHashes[key]
|
|
1013
|
+
});
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
return changes;
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
async detectStructuralChanges(baselineStructure, currentStructure) {
|
|
1021
|
+
const changes = [];
|
|
1022
|
+
|
|
1023
|
+
// Compare element counts by type
|
|
1024
|
+
const baselineCounts = this.countElementTypes(baselineStructure);
|
|
1025
|
+
const currentCounts = this.countElementTypes(currentStructure);
|
|
1026
|
+
|
|
1027
|
+
for (const [element, baselineCount] of baselineCounts) {
|
|
1028
|
+
const currentCount = currentCounts.get(element) || 0;
|
|
1029
|
+
if (currentCount !== baselineCount) {
|
|
1030
|
+
changes.push({
|
|
1031
|
+
type: 'element_count_change',
|
|
1032
|
+
element,
|
|
1033
|
+
oldCount: baselineCount,
|
|
1034
|
+
newCount: currentCount,
|
|
1035
|
+
difference: currentCount - baselineCount
|
|
1036
|
+
});
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
// Check for new element types
|
|
1041
|
+
for (const [element, currentCount] of currentCounts) {
|
|
1042
|
+
if (!baselineCounts.has(element)) {
|
|
1043
|
+
changes.push({
|
|
1044
|
+
type: 'new_element_type',
|
|
1045
|
+
element,
|
|
1046
|
+
count: currentCount
|
|
1047
|
+
});
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
return changes;
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
async detectTextChanges(baselineContent, currentContent, options = {}) {
|
|
1055
|
+
const textChanges = [];
|
|
1056
|
+
|
|
1057
|
+
if (options.ignoreWhitespace) {
|
|
1058
|
+
baselineContent = baselineContent.replace(/\s+/g, ' ').trim();
|
|
1059
|
+
currentContent = currentContent.replace(/\s+/g, ' ').trim();
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
if (options.ignoreCase) {
|
|
1063
|
+
baselineContent = baselineContent.toLowerCase();
|
|
1064
|
+
currentContent = currentContent.toLowerCase();
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
// Word-level diff
|
|
1068
|
+
const wordDiff = diffWords(baselineContent, currentContent);
|
|
1069
|
+
textChanges.push({
|
|
1070
|
+
type: 'word_diff',
|
|
1071
|
+
changes: wordDiff.filter(part => part.added || part.removed)
|
|
1072
|
+
});
|
|
1073
|
+
|
|
1074
|
+
// Line-level diff for structured content
|
|
1075
|
+
const lineDiff = diffLines(baselineContent, currentContent);
|
|
1076
|
+
if (lineDiff.some(part => part.added || part.removed)) {
|
|
1077
|
+
textChanges.push({
|
|
1078
|
+
type: 'line_diff',
|
|
1079
|
+
changes: lineDiff.filter(part => part.added || part.removed)
|
|
1080
|
+
});
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
return textChanges;
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
detectLinkChanges(baselineLinks, currentLinks) {
|
|
1087
|
+
const changes = {
|
|
1088
|
+
added: [],
|
|
1089
|
+
removed: [],
|
|
1090
|
+
modified: []
|
|
1091
|
+
};
|
|
1092
|
+
|
|
1093
|
+
const baselineMap = new Map(baselineLinks.map(link => [link.href, link]));
|
|
1094
|
+
const currentMap = new Map(currentLinks.map(link => [link.href, link]));
|
|
1095
|
+
|
|
1096
|
+
// Find added links
|
|
1097
|
+
for (const [href, link] of currentMap) {
|
|
1098
|
+
if (!baselineMap.has(href)) {
|
|
1099
|
+
changes.added.push(link);
|
|
1100
|
+
}
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
// Find removed links
|
|
1104
|
+
for (const [href, link] of baselineMap) {
|
|
1105
|
+
if (!currentMap.has(href)) {
|
|
1106
|
+
changes.removed.push(link);
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
// Find modified links (text changes)
|
|
1111
|
+
for (const [href, baselineLink] of baselineMap) {
|
|
1112
|
+
const currentLink = currentMap.get(href);
|
|
1113
|
+
if (currentLink && currentLink.text !== baselineLink.text) {
|
|
1114
|
+
changes.modified.push({
|
|
1115
|
+
href,
|
|
1116
|
+
oldText: baselineLink.text,
|
|
1117
|
+
newText: currentLink.text
|
|
1118
|
+
});
|
|
1119
|
+
}
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
return changes;
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
detectImageChanges(baselineImages, currentImages) {
|
|
1126
|
+
const changes = {
|
|
1127
|
+
added: [],
|
|
1128
|
+
removed: [],
|
|
1129
|
+
modified: []
|
|
1130
|
+
};
|
|
1131
|
+
|
|
1132
|
+
const baselineMap = new Map(baselineImages.map(img => [img.src, img]));
|
|
1133
|
+
const currentMap = new Map(currentImages.map(img => [img.src, img]));
|
|
1134
|
+
|
|
1135
|
+
// Find added images
|
|
1136
|
+
for (const [src, img] of currentMap) {
|
|
1137
|
+
if (!baselineMap.has(src)) {
|
|
1138
|
+
changes.added.push(img);
|
|
1139
|
+
}
|
|
1140
|
+
}
|
|
1141
|
+
|
|
1142
|
+
// Find removed images
|
|
1143
|
+
for (const [src, img] of baselineMap) {
|
|
1144
|
+
if (!currentMap.has(src)) {
|
|
1145
|
+
changes.removed.push(img);
|
|
1146
|
+
}
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
// Find modified images (alt text changes)
|
|
1150
|
+
for (const [src, baselineImg] of baselineMap) {
|
|
1151
|
+
const currentImg = currentMap.get(src);
|
|
1152
|
+
if (currentImg && (currentImg.alt !== baselineImg.alt || currentImg.title !== baselineImg.title)) {
|
|
1153
|
+
changes.modified.push({
|
|
1154
|
+
src,
|
|
1155
|
+
oldAlt: baselineImg.alt,
|
|
1156
|
+
newAlt: currentImg.alt,
|
|
1157
|
+
oldTitle: baselineImg.title,
|
|
1158
|
+
newTitle: currentImg.title
|
|
1159
|
+
});
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
return changes;
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
// Utility Methods
|
|
1167
|
+
|
|
1168
|
+
hashContent(content) {
|
|
1169
|
+
return createHash(this.options.hashAlgorithm)
|
|
1170
|
+
.update(content || '')
|
|
1171
|
+
.digest('hex');
|
|
1172
|
+
}
|
|
1173
|
+
|
|
1174
|
+
calculateSimilarity(hash1, hash2) {
|
|
1175
|
+
if (hash1 === hash2) return 1;
|
|
1176
|
+
|
|
1177
|
+
// Simple similarity based on hash difference
|
|
1178
|
+
// In production, you might want to use more sophisticated algorithms
|
|
1179
|
+
const diff = this.hammingDistance(hash1, hash2);
|
|
1180
|
+
const maxLength = Math.max(hash1.length, hash2.length);
|
|
1181
|
+
return 1 - (diff / maxLength);
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1184
|
+
calculateStructuralSimilarity(baseline, current) {
|
|
1185
|
+
if (!baseline || !current) return 0;
|
|
1186
|
+
|
|
1187
|
+
const baselineElements = baseline.elements || [];
|
|
1188
|
+
const currentElements = current.elements || [];
|
|
1189
|
+
|
|
1190
|
+
if (baselineElements.length === 0 && currentElements.length === 0) return 1;
|
|
1191
|
+
if (baselineElements.length === 0 || currentElements.length === 0) return 0;
|
|
1192
|
+
|
|
1193
|
+
const tagSimilarity = this.calculateTagSimilarity(baselineElements, currentElements);
|
|
1194
|
+
const hierarchySimilarity = this.calculateHierarchySimilarity(baseline.hierarchy, current.hierarchy);
|
|
1195
|
+
|
|
1196
|
+
return (tagSimilarity + hierarchySimilarity) / 2;
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
calculateTagSimilarity(baselineElements, currentElements) {
|
|
1200
|
+
const baselineTags = baselineElements.map(el => el.tag);
|
|
1201
|
+
const currentTags = currentElements.map(el => el.tag);
|
|
1202
|
+
|
|
1203
|
+
const intersection = baselineTags.filter(tag => currentTags.includes(tag));
|
|
1204
|
+
const union = new Set([...baselineTags, ...currentTags]);
|
|
1205
|
+
|
|
1206
|
+
return intersection.length / union.size;
|
|
1207
|
+
}
|
|
1208
|
+
|
|
1209
|
+
calculateHierarchySimilarity(baseline, current) {
|
|
1210
|
+
// Simple structural comparison - can be enhanced
|
|
1211
|
+
if (!baseline || !current) return 0;
|
|
1212
|
+
return Object.keys(baseline).length === Object.keys(current).length ? 1 : 0.5;
|
|
1213
|
+
}
|
|
1214
|
+
|
|
1215
|
+
hammingDistance(str1, str2) {
|
|
1216
|
+
if (str1.length !== str2.length) {
|
|
1217
|
+
return Math.abs(str1.length - str2.length);
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
let distance = 0;
|
|
1221
|
+
for (let i = 0; i < str1.length; i++) {
|
|
1222
|
+
if (str1[i] !== str2[i]) {
|
|
1223
|
+
distance++;
|
|
1224
|
+
}
|
|
1225
|
+
}
|
|
1226
|
+
return distance;
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
countElementTypes(structure) {
|
|
1230
|
+
const counts = new Map();
|
|
1231
|
+
|
|
1232
|
+
if (structure.elements) {
|
|
1233
|
+
structure.elements.forEach(element => {
|
|
1234
|
+
counts.set(element.tag, (counts.get(element.tag) || 0) + 1);
|
|
1235
|
+
});
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1238
|
+
return counts;
|
|
1239
|
+
}
|
|
1240
|
+
|
|
1241
|
+
isExternalLink(href) {
|
|
1242
|
+
if (!href) return false;
|
|
1243
|
+
return href.startsWith('http://') || href.startsWith('https://');
|
|
1244
|
+
}
|
|
1245
|
+
|
|
1246
|
+
classifyChangeType(changeAnalysis) {
|
|
1247
|
+
const { addedElements, removedElements, modifiedElements, structuralChanges } = changeAnalysis;
|
|
1248
|
+
|
|
1249
|
+
if (structuralChanges.length > 0) {
|
|
1250
|
+
return 'structural';
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
if (addedElements.length > removedElements.length) {
|
|
1254
|
+
return 'content_addition';
|
|
1255
|
+
}
|
|
1256
|
+
|
|
1257
|
+
if (removedElements.length > addedElements.length) {
|
|
1258
|
+
return 'content_removal';
|
|
1259
|
+
}
|
|
1260
|
+
|
|
1261
|
+
if (modifiedElements.length > 0) {
|
|
1262
|
+
return 'content_modification';
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
return 'text_change';
|
|
1266
|
+
}
|
|
1267
|
+
|
|
1268
|
+
generateChangeSummary(changeAnalysis) {
|
|
1269
|
+
const { addedElements, removedElements, modifiedElements, similarity } = changeAnalysis;
|
|
1270
|
+
|
|
1271
|
+
const total = addedElements.length + removedElements.length + modifiedElements.length;
|
|
1272
|
+
|
|
1273
|
+
return {
|
|
1274
|
+
totalChanges: total,
|
|
1275
|
+
contentSimilarity: Math.round(similarity * 100),
|
|
1276
|
+
added: addedElements.length,
|
|
1277
|
+
removed: removedElements.length,
|
|
1278
|
+
modified: modifiedElements.length,
|
|
1279
|
+
changeDescription: this.generateChangeDescription(changeAnalysis)
|
|
1280
|
+
};
|
|
1281
|
+
}
|
|
1282
|
+
|
|
1283
|
+
generateChangeDescription(changeAnalysis) {
|
|
1284
|
+
const { addedElements, removedElements, modifiedElements, textChanges } = changeAnalysis;
|
|
1285
|
+
|
|
1286
|
+
const descriptions = [];
|
|
1287
|
+
|
|
1288
|
+
if (addedElements.length > 0) {
|
|
1289
|
+
descriptions.push(`${addedElements.length} elements added`);
|
|
1290
|
+
}
|
|
1291
|
+
|
|
1292
|
+
if (removedElements.length > 0) {
|
|
1293
|
+
descriptions.push(`${removedElements.length} elements removed`);
|
|
1294
|
+
}
|
|
1295
|
+
|
|
1296
|
+
if (modifiedElements.length > 0) {
|
|
1297
|
+
descriptions.push(`${modifiedElements.length} elements modified`);
|
|
1298
|
+
}
|
|
1299
|
+
|
|
1300
|
+
if (textChanges.length > 0) {
|
|
1301
|
+
descriptions.push('Text content changed');
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
return descriptions.join(', ') || 'No significant changes detected';
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
generateChangeRecommendations(changeRecord) {
|
|
1308
|
+
const recommendations = [];
|
|
1309
|
+
const { significance, details, changeType } = changeRecord;
|
|
1310
|
+
|
|
1311
|
+
if (significance === 'critical') {
|
|
1312
|
+
recommendations.push({
|
|
1313
|
+
type: 'alert',
|
|
1314
|
+
priority: 'high',
|
|
1315
|
+
message: 'Critical changes detected. Manual review recommended.'
|
|
1316
|
+
});
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
if (changeType === 'structural') {
|
|
1320
|
+
recommendations.push({
|
|
1321
|
+
type: 'monitoring',
|
|
1322
|
+
priority: 'medium',
|
|
1323
|
+
message: 'Structural changes may affect scraping selectors.'
|
|
1324
|
+
});
|
|
1325
|
+
}
|
|
1326
|
+
|
|
1327
|
+
if (details.similarity < 0.5) {
|
|
1328
|
+
recommendations.push({
|
|
1329
|
+
type: 'analysis',
|
|
1330
|
+
priority: 'medium',
|
|
1331
|
+
message: 'Low content similarity suggests major content changes.'
|
|
1332
|
+
});
|
|
1333
|
+
}
|
|
1334
|
+
|
|
1335
|
+
return recommendations;
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1338
|
+
updateStats(changeRecord) {
|
|
1339
|
+
this.stats.changesDetected++;
|
|
1340
|
+
|
|
1341
|
+
if (changeRecord.significance !== 'none') {
|
|
1342
|
+
this.stats.significantChanges++;
|
|
1343
|
+
}
|
|
1344
|
+
|
|
1345
|
+
if (changeRecord.changeType === 'structural') {
|
|
1346
|
+
this.stats.structuralChanges++;
|
|
1347
|
+
} else {
|
|
1348
|
+
this.stats.contentChanges++;
|
|
1349
|
+
}
|
|
1350
|
+
|
|
1351
|
+
// Update average change score
|
|
1352
|
+
this.stats.averageChangeScore =
|
|
1353
|
+
(this.stats.averageChangeScore * (this.stats.changesDetected - 1) +
|
|
1354
|
+
changeRecord.details.similarity) / this.stats.changesDetected;
|
|
1355
|
+
|
|
1356
|
+
this.stats.lastAnalysis = changeRecord.timestamp;
|
|
1357
|
+
this.stats.processingTime += changeRecord.processingTime;
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
// Public API Methods
|
|
1361
|
+
|
|
1362
|
+
getStats() {
|
|
1363
|
+
return {
|
|
1364
|
+
...this.stats,
|
|
1365
|
+
monitoredUrls: this.snapshots.size,
|
|
1366
|
+
totalSnapshots: Array.from(this.snapshots.values()).reduce((sum, snapshots) => sum + snapshots.length, 0),
|
|
1367
|
+
averageProcessingTime: this.stats.changesDetected > 0 ?
|
|
1368
|
+
this.stats.processingTime / this.stats.changesDetected : 0
|
|
1369
|
+
};
|
|
1370
|
+
}
|
|
1371
|
+
|
|
1372
|
+
getChangeHistory(url, limit = 50) {
|
|
1373
|
+
const history = this.changeHistory.get(url) || [];
|
|
1374
|
+
return history.slice(-limit).reverse();
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
clearHistory(url) {
|
|
1378
|
+
if (url) {
|
|
1379
|
+
this.changeHistory.set(url, []);
|
|
1380
|
+
this.emit('historyCleared', url);
|
|
1381
|
+
} else {
|
|
1382
|
+
this.changeHistory.clear();
|
|
1383
|
+
this.emit('allHistoryCleared');
|
|
1384
|
+
}
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
resetStats() {
|
|
1388
|
+
this.stats = {
|
|
1389
|
+
pagesTracked: 0,
|
|
1390
|
+
changesDetected: 0,
|
|
1391
|
+
significantChanges: 0,
|
|
1392
|
+
structuralChanges: 0,
|
|
1393
|
+
contentChanges: 0,
|
|
1394
|
+
falsePositives: 0,
|
|
1395
|
+
averageChangeScore: 0,
|
|
1396
|
+
lastAnalysis: null,
|
|
1397
|
+
processingTime: 0
|
|
1398
|
+
};
|
|
1399
|
+
}
|
|
1400
|
+
|
|
1401
|
+
|
|
1402
|
+
/**
|
|
1403
|
+
* Generate content hash
|
|
1404
|
+
*/
|
|
1405
|
+
generateContentHash(content) {
|
|
1406
|
+
|
|
1407
|
+
return crypto.createHash("sha256").update(content).digest("hex");
|
|
1408
|
+
}
|
|
1409
|
+
|
|
1410
|
+
/**
|
|
1411
|
+
* Create snapshot of content
|
|
1412
|
+
*/
|
|
1413
|
+
async createSnapshot(url, content) {
|
|
1414
|
+
const timestamp = Date.now();
|
|
1415
|
+
const hash = this.generateContentHash(content);
|
|
1416
|
+
|
|
1417
|
+
const snapshot = {
|
|
1418
|
+
url,
|
|
1419
|
+
content,
|
|
1420
|
+
contentHash: hash,
|
|
1421
|
+
timestamp,
|
|
1422
|
+
version: 1
|
|
1423
|
+
};
|
|
1424
|
+
|
|
1425
|
+
// Store snapshot in cache
|
|
1426
|
+
if (!this.contentHistory.has(url)) {
|
|
1427
|
+
this.contentHistory.set(url, []);
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
this.contentHistory.get(url).unshift(snapshot);
|
|
1431
|
+
|
|
1432
|
+
// Also store in snapshots Map for compatibility
|
|
1433
|
+
if (!this.snapshots.has(url)) {
|
|
1434
|
+
this.snapshots.set(url, []);
|
|
1435
|
+
}
|
|
1436
|
+
this.snapshots.get(url).unshift(snapshot);
|
|
1437
|
+
|
|
1438
|
+
// Keep only last 100 snapshots
|
|
1439
|
+
const history = this.contentHistory.get(url);
|
|
1440
|
+
if (history.length > 100) {
|
|
1441
|
+
history.splice(100);
|
|
1442
|
+
}
|
|
1443
|
+
|
|
1444
|
+
return snapshot;
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
|
|
1448
|
+
/**
|
|
1449
|
+
* Get snapshot history for a URL
|
|
1450
|
+
*/
|
|
1451
|
+
getSnapshotHistory(url) {
|
|
1452
|
+
return this.contentHistory.get(url) || [];
|
|
1453
|
+
}
|
|
1454
|
+
|
|
1455
|
+
/**
|
|
1456
|
+
* Detect changes against the latest snapshot
|
|
1457
|
+
*/
|
|
1458
|
+
async detectChanges(url, currentContent) {
|
|
1459
|
+
// Validate URL format
|
|
1460
|
+
try {
|
|
1461
|
+
new URL(url);
|
|
1462
|
+
} catch (error) {
|
|
1463
|
+
throw new Error(`Invalid URL format: ${url}`);
|
|
1464
|
+
}
|
|
1465
|
+
if (!this.contentHistory.has(url)) {
|
|
1466
|
+
return {
|
|
1467
|
+
hasChanges: false,
|
|
1468
|
+
score: 0,
|
|
1469
|
+
significance: "none"
|
|
1470
|
+
};
|
|
1471
|
+
}
|
|
1472
|
+
|
|
1473
|
+
const history = this.contentHistory.get(url);
|
|
1474
|
+
if (history.length === 0) {
|
|
1475
|
+
return {
|
|
1476
|
+
hasChanges: false,
|
|
1477
|
+
score: 0,
|
|
1478
|
+
significance: "none"
|
|
1479
|
+
};
|
|
1480
|
+
}
|
|
1481
|
+
|
|
1482
|
+
const lastSnapshot = history[0]; // Latest snapshot
|
|
1483
|
+
const currentHash = this.generateContentHash(currentContent);
|
|
1484
|
+
|
|
1485
|
+
if (lastSnapshot.contentHash === currentHash) {
|
|
1486
|
+
return {
|
|
1487
|
+
hasChanges: false,
|
|
1488
|
+
score: 0,
|
|
1489
|
+
significance: "none"
|
|
1490
|
+
};
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
// Calculate change score based on content difference
|
|
1494
|
+
const similarity = this.calculateSimilarity(lastSnapshot.contentHash, currentHash);
|
|
1495
|
+
const score = 1 - similarity;
|
|
1496
|
+
|
|
1497
|
+
// Determine significance
|
|
1498
|
+
let significance = "none";
|
|
1499
|
+
if (score > 0.7) significance = "major";
|
|
1500
|
+
else if (score > 0.3) significance = "moderate";
|
|
1501
|
+
else if (score > 0.1) significance = "minor";
|
|
1502
|
+
|
|
1503
|
+
return {
|
|
1504
|
+
hasChanges: score > 0,
|
|
1505
|
+
score,
|
|
1506
|
+
significance
|
|
1507
|
+
};
|
|
1508
|
+
}
|
|
1509
|
+
|
|
1510
|
+
/**
|
|
1511
|
+
* Calculate significance score for changes
|
|
1512
|
+
*/
|
|
1513
|
+
calculateSignificanceScore(changes) {
|
|
1514
|
+
if (!changes) return 0;
|
|
1515
|
+
|
|
1516
|
+
let score = 0;
|
|
1517
|
+
const weights = {
|
|
1518
|
+
textChanges: 0.4,
|
|
1519
|
+
structuralChanges: 0.6
|
|
1520
|
+
};
|
|
1521
|
+
|
|
1522
|
+
// Handle object format with textChanges and structuralChanges
|
|
1523
|
+
if (typeof changes === "object" && !Array.isArray(changes)) {
|
|
1524
|
+
if (changes.textChanges) {
|
|
1525
|
+
const text = changes.textChanges;
|
|
1526
|
+
const textScore = ((text.additions || 0) + (text.deletions || 0) + (text.modifications || 0)) / (changes.totalLength || 1000);
|
|
1527
|
+
score += textScore * weights.textChanges;
|
|
1528
|
+
}
|
|
1529
|
+
|
|
1530
|
+
if (changes.structuralChanges) {
|
|
1531
|
+
const struct = changes.structuralChanges;
|
|
1532
|
+
const structScore = ((struct.additions || 0) + (struct.deletions || 0)) / 20; // Normalize
|
|
1533
|
+
score += structScore * weights.structuralChanges;
|
|
1534
|
+
}
|
|
1535
|
+
|
|
1536
|
+
return Math.min(score, 1.0); // Cap at 1.0
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
// Handle legacy array format
|
|
1540
|
+
if (Array.isArray(changes)) {
|
|
1541
|
+
const legacyWeights = {
|
|
1542
|
+
added: 0.3,
|
|
1543
|
+
removed: 0.4,
|
|
1544
|
+
modified: 0.2
|
|
1545
|
+
};
|
|
1546
|
+
|
|
1547
|
+
changes.forEach(change => {
|
|
1548
|
+
score += (legacyWeights[change.type] || 0.1) * (change.count || 1);
|
|
1549
|
+
});
|
|
1550
|
+
}
|
|
1551
|
+
|
|
1552
|
+
return Math.min(score, 1.0); // Cap at 1.0
|
|
1553
|
+
}
|
|
1554
|
+
|
|
1555
|
+
/**
|
|
1556
|
+
* Start monitoring URL for changes
|
|
1557
|
+
*/
|
|
1558
|
+
async startMonitoring(url, options = {}) {
|
|
1559
|
+
const monitorId = `monitor_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
1560
|
+
|
|
1561
|
+
const monitor = {
|
|
1562
|
+
id: monitorId,
|
|
1563
|
+
url,
|
|
1564
|
+
interval: options.interval || 300000, // 5 minutes default
|
|
1565
|
+
enabled: true,
|
|
1566
|
+
lastCheck: null,
|
|
1567
|
+
checkCount: 0,
|
|
1568
|
+
changeCount: 0
|
|
1569
|
+
};
|
|
1570
|
+
|
|
1571
|
+
this.activeMonitors.set(url, monitor); // Store by URL for easy access
|
|
1572
|
+
|
|
1573
|
+
return monitor;
|
|
1574
|
+
}
|
|
1575
|
+
|
|
1576
|
+
/**
|
|
1577
|
+
* Get statistics
|
|
1578
|
+
*/
|
|
1579
|
+
getStatistics() {
|
|
1580
|
+
return {
|
|
1581
|
+
totalBaselines: this.baselineContent.size,
|
|
1582
|
+
totalMonitors: this.activeMonitors.size,
|
|
1583
|
+
totalComparisons: this.stats.comparisons,
|
|
1584
|
+
totalChanges: this.stats.changesDetected,
|
|
1585
|
+
averageChangeSignificance: this.stats.averageSignificance,
|
|
1586
|
+
lastActivity: this.stats.lastActivity
|
|
1587
|
+
};
|
|
1588
|
+
}
|
|
1589
|
+
|
|
1590
|
+
/**
|
|
1591
|
+
* Cleanup resources
|
|
1592
|
+
*/
|
|
1593
|
+
async performDifferentialAnalysis(url, currentContent, options = {}) {
|
|
1594
|
+
if (!url || !currentContent) {
|
|
1595
|
+
throw new Error("URL and current content required for differential analysis");
|
|
1596
|
+
}
|
|
1597
|
+
|
|
1598
|
+
if (!this.contentHistory.has(url)) {
|
|
1599
|
+
throw new Error(`No baseline found for URL: ${url}`);
|
|
1600
|
+
}
|
|
1601
|
+
|
|
1602
|
+
try {
|
|
1603
|
+
const history = this.contentHistory.get(url);
|
|
1604
|
+
const baseline = history[0]; // Get latest snapshot
|
|
1605
|
+
|
|
1606
|
+
const analysis = {
|
|
1607
|
+
wordDiff: [],
|
|
1608
|
+
statistics: {
|
|
1609
|
+
contentSimilarity: 0,
|
|
1610
|
+
changeScore: 0
|
|
1611
|
+
},
|
|
1612
|
+
similarity: 0,
|
|
1613
|
+
structuralChanges: [],
|
|
1614
|
+
contentChanges: [],
|
|
1615
|
+
semanticChanges: [],
|
|
1616
|
+
changeScore: 0,
|
|
1617
|
+
changeSignificance: "none",
|
|
1618
|
+
metadata: {
|
|
1619
|
+
comparisonTime: new Date().toISOString(),
|
|
1620
|
+
baselineVersion: baseline.version || "unknown",
|
|
1621
|
+
currentVersion: "current"
|
|
1622
|
+
}
|
|
1623
|
+
};
|
|
1624
|
+
|
|
1625
|
+
// Calculate similarity
|
|
1626
|
+
const currentHash = this.generateContentHash(currentContent);
|
|
1627
|
+
analysis.similarity = this.calculateSimilarity(baseline.contentHash, currentHash);
|
|
1628
|
+
analysis.statistics.contentSimilarity = analysis.similarity;
|
|
1629
|
+
analysis.statistics.changeScore = 1 - analysis.similarity;
|
|
1630
|
+
|
|
1631
|
+
// Simple word diff
|
|
1632
|
+
const baselineWords = baseline.content.split(/\s+/);
|
|
1633
|
+
const currentWords = currentContent.split(/\s+/);
|
|
1634
|
+
|
|
1635
|
+
// Basic diff calculation
|
|
1636
|
+
const added = currentWords.filter(word => !baselineWords.includes(word));
|
|
1637
|
+
const removed = baselineWords.filter(word => !currentWords.includes(word));
|
|
1638
|
+
|
|
1639
|
+
analysis.wordDiff = [
|
|
1640
|
+
...added.map(word => ({ value: word, added: true })),
|
|
1641
|
+
...removed.map(word => ({ value: word, removed: true }))
|
|
1642
|
+
];
|
|
1643
|
+
|
|
1644
|
+
return analysis;
|
|
1645
|
+
} catch (error) {
|
|
1646
|
+
throw new Error(`Differential analysis failed: ${error.message}`);
|
|
1647
|
+
}
|
|
1648
|
+
}
|
|
1649
|
+
|
|
1650
|
+
/**
|
|
1651
|
+
* Stop monitoring a URL
|
|
1652
|
+
*/
|
|
1653
|
+
stopMonitoring(url) {
|
|
1654
|
+
if (this.activeMonitors.has(url)) {
|
|
1655
|
+
this.activeMonitors.delete(url);
|
|
1656
|
+
return true;
|
|
1657
|
+
}
|
|
1658
|
+
return false;
|
|
1659
|
+
}
|
|
1660
|
+
/**
|
|
1661
|
+
* Get statistics with proper format
|
|
1662
|
+
*/
|
|
1663
|
+
getStatistics() {
|
|
1664
|
+
return {
|
|
1665
|
+
totalBaselines: this.contentHistory.size,
|
|
1666
|
+
totalMonitors: this.activeMonitors.size,
|
|
1667
|
+
totalComparisons: this.stats.changesDetected || 0,
|
|
1668
|
+
totalChanges: this.stats.changesDetected || 0,
|
|
1669
|
+
averageChangeSignificance: this.stats.averageChangeScore || 0,
|
|
1670
|
+
lastActivity: this.stats.lastAnalysis,
|
|
1671
|
+
pagesTracked: this.contentHistory.size,
|
|
1672
|
+
changesDetected: this.stats.changesDetected || 0
|
|
1673
|
+
};
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
async initializeSemanticAnalyzer() {
|
|
1677
|
+
// Placeholder for semantic analysis initialization
|
|
1678
|
+
}
|
|
1679
|
+
|
|
1680
|
+
// Enhanced Feature Helper Methods
|
|
1681
|
+
|
|
1682
|
+
/**
|
|
1683
|
+
* Get latest baseline for a URL
|
|
1684
|
+
* @param {string} url - URL
|
|
1685
|
+
* @returns {Object} - Latest baseline
|
|
1686
|
+
*/
|
|
1687
|
+
getLatestBaseline(url) {
|
|
1688
|
+
const snapshots = this.snapshots.get(url);
|
|
1689
|
+
return snapshots && snapshots.length > 0 ? snapshots[snapshots.length - 1] : null;
|
|
1690
|
+
}
|
|
1691
|
+
|
|
1692
|
+
/**
|
|
1693
|
+
* Calculate text similarity using advanced algorithms
|
|
1694
|
+
* @param {string} text1 - First text
|
|
1695
|
+
* @param {string} text2 - Second text
|
|
1696
|
+
* @returns {number} - Similarity score (0-1)
|
|
1697
|
+
*/
|
|
1698
|
+
calculateTextSimilarity(text1, text2) {
|
|
1699
|
+
if (!text1 || !text2) return 0;
|
|
1700
|
+
|
|
1701
|
+
// Simple Jaccard similarity for keywords
|
|
1702
|
+
const words1 = new Set(text1.toLowerCase().split(/\W+/).filter(w => w.length > 3));
|
|
1703
|
+
const words2 = new Set(text2.toLowerCase().split(/\W+/).filter(w => w.length > 3));
|
|
1704
|
+
|
|
1705
|
+
const intersection = new Set([...words1].filter(x => words2.has(x)));
|
|
1706
|
+
const union = new Set([...words1, ...words2]);
|
|
1707
|
+
|
|
1708
|
+
return union.size > 0 ? intersection.size / union.size : 0;
|
|
1709
|
+
}
|
|
1710
|
+
|
|
1711
|
+
/**
|
|
1712
|
+
* Calculate semantic similarity
|
|
1713
|
+
* @param {string} text1 - First text
|
|
1714
|
+
* @param {string} text2 - Second text
|
|
1715
|
+
* @returns {number} - Semantic similarity score
|
|
1716
|
+
*/
|
|
1717
|
+
calculateSemanticSimilarity(text1, text2) {
|
|
1718
|
+
// Placeholder for advanced semantic analysis
|
|
1719
|
+
// Could integrate with NLP services or local models
|
|
1720
|
+
return this.calculateTextSimilarity(text1, text2);
|
|
1721
|
+
}
|
|
1722
|
+
|
|
1723
|
+
/**
|
|
1724
|
+
* Detect keyword changes between texts
|
|
1725
|
+
* @param {string} baselineText - Baseline text
|
|
1726
|
+
* @param {string} currentText - Current text
|
|
1727
|
+
* @returns {Array} - Keyword changes
|
|
1728
|
+
*/
|
|
1729
|
+
detectKeywordChanges(baselineText, currentText) {
|
|
1730
|
+
const changes = [];
|
|
1731
|
+
|
|
1732
|
+
try {
|
|
1733
|
+
const baselineWords = baselineText.toLowerCase().split(/\W+/).filter(w => w.length > 3);
|
|
1734
|
+
const currentWords = currentText.toLowerCase().split(/\W+/).filter(w => w.length > 3);
|
|
1735
|
+
|
|
1736
|
+
const baselineFreq = this.calculateWordFrequency(baselineWords);
|
|
1737
|
+
const currentFreq = this.calculateWordFrequency(currentWords);
|
|
1738
|
+
|
|
1739
|
+
// Find significant frequency changes
|
|
1740
|
+
const allWords = new Set([...Object.keys(baselineFreq), ...Object.keys(currentFreq)]);
|
|
1741
|
+
|
|
1742
|
+
for (const word of allWords) {
|
|
1743
|
+
const baseFreq = baselineFreq[word] || 0;
|
|
1744
|
+
const currFreq = currentFreq[word] || 0;
|
|
1745
|
+
const change = Math.abs(currFreq - baseFreq);
|
|
1746
|
+
|
|
1747
|
+
if (change > 2) { // Significant frequency change
|
|
1748
|
+
changes.push({
|
|
1749
|
+
word,
|
|
1750
|
+
baselineFrequency: baseFreq,
|
|
1751
|
+
currentFrequency: currFreq,
|
|
1752
|
+
change: currFreq - baseFreq,
|
|
1753
|
+
type: currFreq > baseFreq ? 'increased' : 'decreased'
|
|
1754
|
+
});
|
|
1755
|
+
}
|
|
1756
|
+
}
|
|
1757
|
+
} catch (error) {
|
|
1758
|
+
this.emit('error', { operation: 'detectKeywordChanges', error: error.message });
|
|
1759
|
+
}
|
|
1760
|
+
|
|
1761
|
+
return changes.slice(0, 20); // Top 20 changes
|
|
1762
|
+
}
|
|
1763
|
+
|
|
1764
|
+
/**
|
|
1765
|
+
* Detect topic shifts between texts
|
|
1766
|
+
* @param {string} baselineText - Baseline text
|
|
1767
|
+
* @param {string} currentText - Current text
|
|
1768
|
+
* @returns {Array} - Topic shifts
|
|
1769
|
+
*/
|
|
1770
|
+
detectTopicShifts(baselineText, currentText) {
|
|
1771
|
+
const shifts = [];
|
|
1772
|
+
|
|
1773
|
+
try {
|
|
1774
|
+
// Simple topic detection based on key phrases
|
|
1775
|
+
const topicKeywords = {
|
|
1776
|
+
technology: ['software', 'computer', 'digital', 'tech', 'system', 'data'],
|
|
1777
|
+
business: ['company', 'market', 'business', 'sales', 'revenue', 'profit'],
|
|
1778
|
+
health: ['health', 'medical', 'doctor', 'treatment', 'disease', 'patient'],
|
|
1779
|
+
politics: ['government', 'policy', 'political', 'election', 'vote', 'congress'],
|
|
1780
|
+
sports: ['game', 'team', 'player', 'score', 'match', 'championship']
|
|
1781
|
+
};
|
|
1782
|
+
|
|
1783
|
+
const baselineTopics = this.detectTopics(baselineText, topicKeywords);
|
|
1784
|
+
const currentTopics = this.detectTopics(currentText, topicKeywords);
|
|
1785
|
+
|
|
1786
|
+
// Compare topic presence
|
|
1787
|
+
for (const topic of Object.keys(topicKeywords)) {
|
|
1788
|
+
const baselineScore = baselineTopics[topic] || 0;
|
|
1789
|
+
const currentScore = currentTopics[topic] || 0;
|
|
1790
|
+
const change = currentScore - baselineScore;
|
|
1791
|
+
|
|
1792
|
+
if (Math.abs(change) > 0.1) {
|
|
1793
|
+
shifts.push({
|
|
1794
|
+
topic,
|
|
1795
|
+
baselineScore,
|
|
1796
|
+
currentScore,
|
|
1797
|
+
change,
|
|
1798
|
+
type: change > 0 ? 'emerged' : 'diminished'
|
|
1799
|
+
});
|
|
1800
|
+
}
|
|
1801
|
+
}
|
|
1802
|
+
} catch (error) {
|
|
1803
|
+
this.emit('error', { operation: 'detectTopicShifts', error: error.message });
|
|
1804
|
+
}
|
|
1805
|
+
|
|
1806
|
+
return shifts;
|
|
1807
|
+
}
|
|
1808
|
+
|
|
1809
|
+
/**
|
|
1810
|
+
* Calculate semantic confidence score
|
|
1811
|
+
* @param {Object} analysis - Semantic analysis
|
|
1812
|
+
* @returns {number} - Confidence score
|
|
1813
|
+
*/
|
|
1814
|
+
calculateSemanticConfidence(analysis) {
|
|
1815
|
+
let confidence = 0;
|
|
1816
|
+
|
|
1817
|
+
// Base confidence on available data
|
|
1818
|
+
if (analysis.textualSimilarity > 0) confidence += 0.3;
|
|
1819
|
+
if (analysis.keywordChanges.length > 0) confidence += 0.3;
|
|
1820
|
+
if (analysis.topicShifts.length > 0) confidence += 0.2;
|
|
1821
|
+
|
|
1822
|
+
// Adjust based on data quality
|
|
1823
|
+
const dataQuality = Math.min(
|
|
1824
|
+
analysis.keywordChanges.length / 10, // Max 10 keyword changes for full score
|
|
1825
|
+
1
|
|
1826
|
+
);
|
|
1827
|
+
|
|
1828
|
+
return Math.min(confidence * dataQuality, 1);
|
|
1829
|
+
}
|
|
1830
|
+
|
|
1831
|
+
/**
|
|
1832
|
+
* Detect layout changes between DOM structures
|
|
1833
|
+
* @param {Object} baseline - Baseline DOM
|
|
1834
|
+
* @param {Object} current - Current DOM
|
|
1835
|
+
* @returns {Array} - Layout changes
|
|
1836
|
+
*/
|
|
1837
|
+
detectLayoutChanges(baseline, current) {
|
|
1838
|
+
const changes = [];
|
|
1839
|
+
|
|
1840
|
+
try {
|
|
1841
|
+
// Compare element counts by type
|
|
1842
|
+
const baselineElements = this.countElements(baseline);
|
|
1843
|
+
const currentElements = this.countElements(current);
|
|
1844
|
+
|
|
1845
|
+
for (const [tag, baseCount] of Object.entries(baselineElements)) {
|
|
1846
|
+
const currCount = currentElements[tag] || 0;
|
|
1847
|
+
if (Math.abs(currCount - baseCount) > 0) {
|
|
1848
|
+
changes.push({
|
|
1849
|
+
type: 'element_count_change',
|
|
1850
|
+
tag,
|
|
1851
|
+
baseline: baseCount,
|
|
1852
|
+
current: currCount,
|
|
1853
|
+
change: currCount - baseCount
|
|
1854
|
+
});
|
|
1855
|
+
}
|
|
1856
|
+
}
|
|
1857
|
+
|
|
1858
|
+
// Check for new element types
|
|
1859
|
+
for (const [tag, currCount] of Object.entries(currentElements)) {
|
|
1860
|
+
if (!baselineElements[tag]) {
|
|
1861
|
+
changes.push({
|
|
1862
|
+
type: 'new_element_type',
|
|
1863
|
+
tag,
|
|
1864
|
+
count: currCount
|
|
1865
|
+
});
|
|
1866
|
+
}
|
|
1867
|
+
}
|
|
1868
|
+
} catch (error) {
|
|
1869
|
+
this.emit('error', { operation: 'detectLayoutChanges', error: error.message });
|
|
1870
|
+
}
|
|
1871
|
+
|
|
1872
|
+
return changes;
|
|
1873
|
+
}
|
|
1874
|
+
|
|
1875
|
+
/**
|
|
1876
|
+
* Detect CSS changes
|
|
1877
|
+
* @param {Object} baseline - Baseline DOM
|
|
1878
|
+
* @param {Object} current - Current DOM
|
|
1879
|
+
* @returns {Array} - CSS changes
|
|
1880
|
+
*/
|
|
1881
|
+
detectCSSChanges(baseline, current) {
|
|
1882
|
+
const changes = [];
|
|
1883
|
+
|
|
1884
|
+
try {
|
|
1885
|
+
// Extract style information
|
|
1886
|
+
const baselineStyles = this.extractStyles(baseline);
|
|
1887
|
+
const currentStyles = this.extractStyles(current);
|
|
1888
|
+
|
|
1889
|
+
// Compare inline styles
|
|
1890
|
+
const styleDiff = this.compareStyles(baselineStyles, currentStyles);
|
|
1891
|
+
changes.push(...styleDiff);
|
|
1892
|
+
|
|
1893
|
+
} catch (error) {
|
|
1894
|
+
this.emit('error', { operation: 'detectCSSChanges', error: error.message });
|
|
1895
|
+
}
|
|
1896
|
+
|
|
1897
|
+
return changes;
|
|
1898
|
+
}
|
|
1899
|
+
|
|
1900
|
+
/**
|
|
1901
|
+
* Extract structured data from DOM
|
|
1902
|
+
* @param {Object} $ - Cheerio DOM
|
|
1903
|
+
* @returns {Object} - Structured data
|
|
1904
|
+
*/
|
|
1905
|
+
extractStructuredData($) {
|
|
1906
|
+
const structuredData = {
|
|
1907
|
+
jsonLd: [],
|
|
1908
|
+
microdata: [],
|
|
1909
|
+
rdfa: [],
|
|
1910
|
+
openGraph: {},
|
|
1911
|
+
twitterCard: {},
|
|
1912
|
+
schema: []
|
|
1913
|
+
};
|
|
1914
|
+
|
|
1915
|
+
try {
|
|
1916
|
+
// Extract JSON-LD
|
|
1917
|
+
$('script[type="application/ld+json"]').each((index, element) => {
|
|
1918
|
+
try {
|
|
1919
|
+
const data = JSON.parse($(element).html());
|
|
1920
|
+
structuredData.jsonLd.push(data);
|
|
1921
|
+
} catch (e) {
|
|
1922
|
+
// Invalid JSON, skip
|
|
1923
|
+
}
|
|
1924
|
+
});
|
|
1925
|
+
|
|
1926
|
+
// Extract Open Graph
|
|
1927
|
+
$('meta[property^="og:"]').each((index, element) => {
|
|
1928
|
+
const property = $(element).attr('property');
|
|
1929
|
+
const content = $(element).attr('content');
|
|
1930
|
+
if (property && content) {
|
|
1931
|
+
structuredData.openGraph[property] = content;
|
|
1932
|
+
}
|
|
1933
|
+
});
|
|
1934
|
+
|
|
1935
|
+
// Extract Twitter Card
|
|
1936
|
+
$('meta[name^="twitter:"]').each((index, element) => {
|
|
1937
|
+
const name = $(element).attr('name');
|
|
1938
|
+
const content = $(element).attr('content');
|
|
1939
|
+
if (name && content) {
|
|
1940
|
+
structuredData.twitterCard[name] = content;
|
|
1941
|
+
}
|
|
1942
|
+
});
|
|
1943
|
+
|
|
1944
|
+
} catch (error) {
|
|
1945
|
+
this.emit('error', { operation: 'extractStructuredData', error: error.message });
|
|
1946
|
+
}
|
|
1947
|
+
|
|
1948
|
+
return structuredData;
|
|
1949
|
+
}
|
|
1950
|
+
|
|
1951
|
+
/**
|
|
1952
|
+
* Compare structured data
|
|
1953
|
+
* @param {Object} baseline - Baseline structured data
|
|
1954
|
+
* @param {Object} current - Current structured data
|
|
1955
|
+
* @returns {Array} - Schema changes
|
|
1956
|
+
*/
|
|
1957
|
+
compareStructuredData(baseline, current) {
|
|
1958
|
+
const changes = [];
|
|
1959
|
+
|
|
1960
|
+
try {
|
|
1961
|
+
// Compare JSON-LD
|
|
1962
|
+
const jsonLdChanges = this.compareArrayData(baseline.jsonLd, current.jsonLd, 'json-ld');
|
|
1963
|
+
changes.push(...jsonLdChanges);
|
|
1964
|
+
|
|
1965
|
+
// Compare Open Graph
|
|
1966
|
+
const ogChanges = this.compareObjectData(baseline.openGraph, current.openGraph, 'open-graph');
|
|
1967
|
+
changes.push(...ogChanges);
|
|
1968
|
+
|
|
1969
|
+
// Compare Twitter Card
|
|
1970
|
+
const twitterChanges = this.compareObjectData(baseline.twitterCard, current.twitterCard, 'twitter-card');
|
|
1971
|
+
changes.push(...twitterChanges);
|
|
1972
|
+
|
|
1973
|
+
} catch (error) {
|
|
1974
|
+
this.emit('error', { operation: 'compareStructuredData', error: error.message });
|
|
1975
|
+
}
|
|
1976
|
+
|
|
1977
|
+
return changes;
|
|
1978
|
+
}
|
|
1979
|
+
|
|
1980
|
+
/**
|
|
1981
|
+
* Compare metadata objects
|
|
1982
|
+
* @param {Object} baseline - Baseline metadata
|
|
1983
|
+
* @param {Object} current - Current metadata
|
|
1984
|
+
* @returns {Array} - Metadata changes
|
|
1985
|
+
*/
|
|
1986
|
+
compareMetadata(baseline, current) {
|
|
1987
|
+
const changes = [];
|
|
1988
|
+
|
|
1989
|
+
try {
|
|
1990
|
+
const baselineKeys = Object.keys(baseline || {});
|
|
1991
|
+
const currentKeys = Object.keys(current || {});
|
|
1992
|
+
const allKeys = new Set([...baselineKeys, ...currentKeys]);
|
|
1993
|
+
|
|
1994
|
+
for (const key of allKeys) {
|
|
1995
|
+
const baseValue = baseline?.[key];
|
|
1996
|
+
const currValue = current?.[key];
|
|
1997
|
+
|
|
1998
|
+
if (JSON.stringify(baseValue) !== JSON.stringify(currValue)) {
|
|
1999
|
+
changes.push({
|
|
2000
|
+
type: 'metadata_change',
|
|
2001
|
+
field: key,
|
|
2002
|
+
baseline: baseValue,
|
|
2003
|
+
current: currValue,
|
|
2004
|
+
changeType: !baseValue ? 'added' : !currValue ? 'removed' : 'modified'
|
|
2005
|
+
});
|
|
2006
|
+
}
|
|
2007
|
+
}
|
|
2008
|
+
} catch (error) {
|
|
2009
|
+
this.emit('error', { operation: 'compareMetadata', error: error.message });
|
|
2010
|
+
}
|
|
2011
|
+
|
|
2012
|
+
return changes;
|
|
2013
|
+
}
|
|
2014
|
+
|
|
2015
|
+
/**
|
|
2016
|
+
* Calculate enhanced significance score
|
|
2017
|
+
* @param {Object} standardComparison - Standard comparison
|
|
2018
|
+
* @param {Object} semanticAnalysis - Semantic analysis
|
|
2019
|
+
* @param {Object} visualAnalysis - Visual analysis
|
|
2020
|
+
* @param {Object} structuredAnalysis - Structured analysis
|
|
2021
|
+
* @returns {string} - Enhanced significance level
|
|
2022
|
+
*/
|
|
2023
|
+
async calculateEnhancedSignificance(standardComparison, semanticAnalysis, visualAnalysis, structuredAnalysis) {
|
|
2024
|
+
try {
|
|
2025
|
+
let enhancedScore = 0;
|
|
2026
|
+
const weights = {
|
|
2027
|
+
standard: 0.4,
|
|
2028
|
+
semantic: 0.2,
|
|
2029
|
+
visual: 0.2,
|
|
2030
|
+
structured: 0.2
|
|
2031
|
+
};
|
|
2032
|
+
|
|
2033
|
+
// Standard comparison score
|
|
2034
|
+
const standardScore = this.getSignificanceScore(standardComparison.significance);
|
|
2035
|
+
enhancedScore += standardScore * weights.standard;
|
|
2036
|
+
|
|
2037
|
+
// Semantic analysis score
|
|
2038
|
+
const semanticScore = semanticAnalysis.confidenceScore *
|
|
2039
|
+
(1 - semanticAnalysis.textualSimilarity);
|
|
2040
|
+
enhancedScore += semanticScore * weights.semantic;
|
|
2041
|
+
|
|
2042
|
+
// Visual analysis score
|
|
2043
|
+
const visualScore = visualAnalysis.hasVisualChanges ? 0.7 : 0;
|
|
2044
|
+
enhancedScore += visualScore * weights.visual;
|
|
2045
|
+
|
|
2046
|
+
// Structured data score
|
|
2047
|
+
const structuredScore = structuredAnalysis.hasStructuredChanges ? 0.8 : 0;
|
|
2048
|
+
enhancedScore += structuredScore * weights.structured;
|
|
2049
|
+
|
|
2050
|
+
// Convert to significance level
|
|
2051
|
+
return this.scoreToSignificance(enhancedScore);
|
|
2052
|
+
|
|
2053
|
+
} catch (error) {
|
|
2054
|
+
this.emit('error', { operation: 'calculateEnhancedSignificance', error: error.message });
|
|
2055
|
+
return standardComparison.significance;
|
|
2056
|
+
}
|
|
2057
|
+
}
|
|
2058
|
+
|
|
2059
|
+
/**
|
|
2060
|
+
* Detect change patterns in historical data
|
|
2061
|
+
* @param {string} url - URL
|
|
2062
|
+
* @param {Object} patterns - Pattern data
|
|
2063
|
+
*/
|
|
2064
|
+
async detectChangePatterns(url, patterns) {
|
|
2065
|
+
try {
|
|
2066
|
+
const frequency = patterns.changeFrequency.get(url);
|
|
2067
|
+
if (!frequency || frequency.length < 10) return;
|
|
2068
|
+
|
|
2069
|
+
// Detect recurring patterns
|
|
2070
|
+
const recurringPatterns = this.detectRecurringPatterns(frequency);
|
|
2071
|
+
|
|
2072
|
+
// Detect time-based patterns
|
|
2073
|
+
const timePatterns = this.detectTimePatterns(frequency);
|
|
2074
|
+
|
|
2075
|
+
// Update trend analysis
|
|
2076
|
+
if (recurringPatterns.length > 0 || timePatterns.length > 0) {
|
|
2077
|
+
this.stats.trendPatternsDetected++;
|
|
2078
|
+
|
|
2079
|
+
this.emit('patternsDetected', {
|
|
2080
|
+
url,
|
|
2081
|
+
recurringPatterns,
|
|
2082
|
+
timePatterns,
|
|
2083
|
+
timestamp: Date.now()
|
|
2084
|
+
});
|
|
2085
|
+
}
|
|
2086
|
+
|
|
2087
|
+
} catch (error) {
|
|
2088
|
+
this.emit('error', { operation: 'detectChangePatterns', url, error: error.message });
|
|
2089
|
+
}
|
|
2090
|
+
}
|
|
2091
|
+
|
|
2092
|
+
/**
|
|
2093
|
+
* Send webhook alert
|
|
2094
|
+
* @param {Object} alertData - Alert data
|
|
2095
|
+
*/
|
|
2096
|
+
async sendWebhookAlert(alertData) {
|
|
2097
|
+
// Placeholder for webhook implementation
|
|
2098
|
+
this.emit('webhookAlert', alertData);
|
|
2099
|
+
}
|
|
2100
|
+
|
|
2101
|
+
/**
|
|
2102
|
+
* Send email alert
|
|
2103
|
+
* @param {Object} alertData - Alert data
|
|
2104
|
+
*/
|
|
2105
|
+
async sendEmailAlert(alertData) {
|
|
2106
|
+
// Placeholder for email implementation
|
|
2107
|
+
this.emit('emailAlert', alertData);
|
|
2108
|
+
}
|
|
2109
|
+
|
|
2110
|
+
/**
|
|
2111
|
+
* Send Slack alert
|
|
2112
|
+
* @param {Object} alertData - Alert data
|
|
2113
|
+
*/
|
|
2114
|
+
async sendSlackAlert(alertData) {
|
|
2115
|
+
// Placeholder for Slack implementation
|
|
2116
|
+
this.emit('slackAlert', alertData);
|
|
2117
|
+
}
|
|
2118
|
+
|
|
2119
|
+
// Utility helper methods
|
|
2120
|
+
|
|
2121
|
+
calculateWordFrequency(words) {
|
|
2122
|
+
const frequency = {};
|
|
2123
|
+
words.forEach(word => {
|
|
2124
|
+
frequency[word] = (frequency[word] || 0) + 1;
|
|
2125
|
+
});
|
|
2126
|
+
return frequency;
|
|
2127
|
+
}
|
|
2128
|
+
|
|
2129
|
+
detectTopics(text, topicKeywords) {
|
|
2130
|
+
const topics = {};
|
|
2131
|
+
const words = text.toLowerCase().split(/\W+/);
|
|
2132
|
+
|
|
2133
|
+
for (const [topic, keywords] of Object.entries(topicKeywords)) {
|
|
2134
|
+
let score = 0;
|
|
2135
|
+
keywords.forEach(keyword => {
|
|
2136
|
+
score += words.filter(word => word.includes(keyword)).length;
|
|
2137
|
+
});
|
|
2138
|
+
topics[topic] = score / words.length;
|
|
2139
|
+
}
|
|
2140
|
+
|
|
2141
|
+
return topics;
|
|
2142
|
+
}
|
|
2143
|
+
|
|
2144
|
+
countElements($) {
|
|
2145
|
+
const counts = {};
|
|
2146
|
+
$('*').each((index, element) => {
|
|
2147
|
+
const tag = element.name;
|
|
2148
|
+
counts[tag] = (counts[tag] || 0) + 1;
|
|
2149
|
+
});
|
|
2150
|
+
return counts;
|
|
2151
|
+
}
|
|
2152
|
+
|
|
2153
|
+
extractStyles($) {
|
|
2154
|
+
const styles = {};
|
|
2155
|
+
$('[style]').each((index, element) => {
|
|
2156
|
+
const style = $(element).attr('style');
|
|
2157
|
+
if (style) {
|
|
2158
|
+
styles[`element_${index}`] = style;
|
|
2159
|
+
}
|
|
2160
|
+
});
|
|
2161
|
+
return styles;
|
|
2162
|
+
}
|
|
2163
|
+
|
|
2164
|
+
compareStyles(baseline, current) {
|
|
2165
|
+
const changes = [];
|
|
2166
|
+
const allKeys = new Set([...Object.keys(baseline), ...Object.keys(current)]);
|
|
2167
|
+
|
|
2168
|
+
for (const key of allKeys) {
|
|
2169
|
+
if (baseline[key] !== current[key]) {
|
|
2170
|
+
changes.push({
|
|
2171
|
+
type: 'style_change',
|
|
2172
|
+
element: key,
|
|
2173
|
+
baseline: baseline[key],
|
|
2174
|
+
current: current[key]
|
|
2175
|
+
});
|
|
2176
|
+
}
|
|
2177
|
+
}
|
|
2178
|
+
|
|
2179
|
+
return changes;
|
|
2180
|
+
}
|
|
2181
|
+
|
|
2182
|
+
compareArrayData(baseline, current, type) {
|
|
2183
|
+
const changes = [];
|
|
2184
|
+
|
|
2185
|
+
if (baseline.length !== current.length) {
|
|
2186
|
+
changes.push({
|
|
2187
|
+
type: `${type}_count_change`,
|
|
2188
|
+
baseline: baseline.length,
|
|
2189
|
+
current: current.length
|
|
2190
|
+
});
|
|
2191
|
+
}
|
|
2192
|
+
|
|
2193
|
+
return changes;
|
|
2194
|
+
}
|
|
2195
|
+
|
|
2196
|
+
compareObjectData(baseline, current, type) {
|
|
2197
|
+
const changes = [];
|
|
2198
|
+
const allKeys = new Set([...Object.keys(baseline), ...Object.keys(current)]);
|
|
2199
|
+
|
|
2200
|
+
for (const key of allKeys) {
|
|
2201
|
+
if (baseline[key] !== current[key]) {
|
|
2202
|
+
changes.push({
|
|
2203
|
+
type: `${type}_change`,
|
|
2204
|
+
field: key,
|
|
2205
|
+
baseline: baseline[key],
|
|
2206
|
+
current: current[key]
|
|
2207
|
+
});
|
|
2208
|
+
}
|
|
2209
|
+
}
|
|
2210
|
+
|
|
2211
|
+
return changes;
|
|
2212
|
+
}
|
|
2213
|
+
|
|
2214
|
+
getSignificanceScore(significance) {
|
|
2215
|
+
const scores = {
|
|
2216
|
+
'none': 0,
|
|
2217
|
+
'minor': 0.2,
|
|
2218
|
+
'moderate': 0.5,
|
|
2219
|
+
'major': 0.8,
|
|
2220
|
+
'critical': 1.0
|
|
2221
|
+
};
|
|
2222
|
+
return scores[significance] || 0;
|
|
2223
|
+
}
|
|
2224
|
+
|
|
2225
|
+
scoreToSignificance(score) {
|
|
2226
|
+
if (score >= 0.9) return 'critical';
|
|
2227
|
+
if (score >= 0.7) return 'major';
|
|
2228
|
+
if (score >= 0.4) return 'moderate';
|
|
2229
|
+
if (score >= 0.1) return 'minor';
|
|
2230
|
+
return 'none';
|
|
2231
|
+
}
|
|
2232
|
+
|
|
2233
|
+
analyzeUrlPatterns(url, patterns) {
|
|
2234
|
+
// Placeholder for URL-specific pattern analysis
|
|
2235
|
+
return {
|
|
2236
|
+
dailyAverage: 0,
|
|
2237
|
+
peakTimes: [],
|
|
2238
|
+
commonTypes: []
|
|
2239
|
+
};
|
|
2240
|
+
}
|
|
2241
|
+
|
|
2242
|
+
analyzeGlobalPatterns(patterns) {
|
|
2243
|
+
// Placeholder for global pattern analysis
|
|
2244
|
+
return {
|
|
2245
|
+
totalUrls: patterns.dailyChangePatterns.size,
|
|
2246
|
+
mostActiveUrls: [],
|
|
2247
|
+
commonPatterns: []
|
|
2248
|
+
};
|
|
2249
|
+
}
|
|
2250
|
+
|
|
2251
|
+
generateTrendInsights(patterns) {
|
|
2252
|
+
return [
|
|
2253
|
+
'Pattern analysis requires more data',
|
|
2254
|
+
'Monitoring is active and collecting data'
|
|
2255
|
+
];
|
|
2256
|
+
}
|
|
2257
|
+
|
|
2258
|
+
generateTrendRecommendations(patterns, insights) {
|
|
2259
|
+
return [
|
|
2260
|
+
'Continue monitoring to build pattern database',
|
|
2261
|
+
'Consider adjusting monitoring frequency based on change patterns'
|
|
2262
|
+
];
|
|
2263
|
+
}
|
|
2264
|
+
|
|
2265
|
+
detectRecurringPatterns(frequency) {
|
|
2266
|
+
// Placeholder for recurring pattern detection
|
|
2267
|
+
return [];
|
|
2268
|
+
}
|
|
2269
|
+
|
|
2270
|
+
detectTimePatterns(frequency) {
|
|
2271
|
+
// Placeholder for time-based pattern detection
|
|
2272
|
+
return [];
|
|
2273
|
+
}
|
|
2274
|
+
|
|
2275
|
+
convertToCSV(data) {
|
|
2276
|
+
// Placeholder for CSV conversion
|
|
2277
|
+
return JSON.stringify(data, null, 2);
|
|
2278
|
+
}
|
|
2279
|
+
|
|
2280
|
+
cleanup() {
|
|
2281
|
+
// Stop all scheduled monitors
|
|
2282
|
+
for (const [id, monitor] of this.scheduledMonitors.entries()) {
|
|
2283
|
+
if (monitor.cronJob) {
|
|
2284
|
+
monitor.cronJob.destroy();
|
|
2285
|
+
}
|
|
2286
|
+
}
|
|
2287
|
+
|
|
2288
|
+
// Clear all data
|
|
2289
|
+
this.contentHistory.clear();
|
|
2290
|
+
this.baselineContent.clear();
|
|
2291
|
+
this.activeMonitors.clear();
|
|
2292
|
+
this.changeNotifications.clear();
|
|
2293
|
+
this.snapshotManager.clear();
|
|
2294
|
+
this.scheduledMonitors.clear();
|
|
2295
|
+
this.monitoringTemplates.clear();
|
|
2296
|
+
this.alertRules.clear();
|
|
2297
|
+
this.alertHistory.clear();
|
|
2298
|
+
this.trendAnalysis.clear();
|
|
2299
|
+
this.visualRegression.clear();
|
|
2300
|
+
this.alertThrottling.clear();
|
|
2301
|
+
this.semanticDiffCache.clear();
|
|
2302
|
+
}
|
|
2303
|
+
|
|
2304
|
+
}
|
|
2305
|
+
|
|
2306
|
+
export default ChangeTracker;
|