crawlforge-mcp-server 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CLAUDE.md +315 -0
  2. package/LICENSE +21 -0
  3. package/README.md +181 -0
  4. package/package.json +115 -0
  5. package/server.js +1963 -0
  6. package/setup.js +112 -0
  7. package/src/constants/config.js +615 -0
  8. package/src/core/ActionExecutor.js +1104 -0
  9. package/src/core/AlertNotificationSystem.js +601 -0
  10. package/src/core/AuthManager.js +315 -0
  11. package/src/core/ChangeTracker.js +2306 -0
  12. package/src/core/JobManager.js +687 -0
  13. package/src/core/LLMsTxtAnalyzer.js +753 -0
  14. package/src/core/LocalizationManager.js +1615 -0
  15. package/src/core/PerformanceManager.js +828 -0
  16. package/src/core/ResearchOrchestrator.js +1327 -0
  17. package/src/core/SnapshotManager.js +1037 -0
  18. package/src/core/StealthBrowserManager.js +1795 -0
  19. package/src/core/WebhookDispatcher.js +745 -0
  20. package/src/core/analysis/ContentAnalyzer.js +749 -0
  21. package/src/core/analysis/LinkAnalyzer.js +972 -0
  22. package/src/core/cache/CacheManager.js +821 -0
  23. package/src/core/connections/ConnectionPool.js +553 -0
  24. package/src/core/crawlers/BFSCrawler.js +845 -0
  25. package/src/core/integrations/PerformanceIntegration.js +377 -0
  26. package/src/core/llm/AnthropicProvider.js +135 -0
  27. package/src/core/llm/LLMManager.js +415 -0
  28. package/src/core/llm/LLMProvider.js +97 -0
  29. package/src/core/llm/OpenAIProvider.js +127 -0
  30. package/src/core/processing/BrowserProcessor.js +986 -0
  31. package/src/core/processing/ContentProcessor.js +505 -0
  32. package/src/core/processing/PDFProcessor.js +448 -0
  33. package/src/core/processing/StreamProcessor.js +673 -0
  34. package/src/core/queue/QueueManager.js +98 -0
  35. package/src/core/workers/WorkerPool.js +585 -0
  36. package/src/core/workers/worker.js +743 -0
  37. package/src/monitoring/healthCheck.js +600 -0
  38. package/src/monitoring/metrics.js +761 -0
  39. package/src/optimization/wave3-optimizations.js +932 -0
  40. package/src/security/security-patches.js +120 -0
  41. package/src/security/security-tests.js +355 -0
  42. package/src/security/wave3-security.js +652 -0
  43. package/src/tools/advanced/BatchScrapeTool.js +1089 -0
  44. package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
  45. package/src/tools/crawl/crawlDeep.js +449 -0
  46. package/src/tools/crawl/mapSite.js +400 -0
  47. package/src/tools/extract/analyzeContent.js +624 -0
  48. package/src/tools/extract/extractContent.js +329 -0
  49. package/src/tools/extract/processDocument.js +503 -0
  50. package/src/tools/extract/summarizeContent.js +376 -0
  51. package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
  52. package/src/tools/research/deepResearch.js +706 -0
  53. package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
  54. package/src/tools/search/adapters/googleSearch.js +236 -0
  55. package/src/tools/search/adapters/searchProviderFactory.js +96 -0
  56. package/src/tools/search/queryExpander.js +543 -0
  57. package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
  58. package/src/tools/search/ranking/ResultRanker.js +497 -0
  59. package/src/tools/search/searchWeb.js +482 -0
  60. package/src/tools/tracking/trackChanges.js +1355 -0
  61. package/src/utils/CircuitBreaker.js +515 -0
  62. package/src/utils/ErrorHandlingConfig.js +342 -0
  63. package/src/utils/HumanBehaviorSimulator.js +569 -0
  64. package/src/utils/Logger.js +568 -0
  65. package/src/utils/MemoryMonitor.js +173 -0
  66. package/src/utils/RetryManager.js +386 -0
  67. package/src/utils/contentUtils.js +588 -0
  68. package/src/utils/domainFilter.js +612 -0
  69. package/src/utils/inputValidation.js +766 -0
  70. package/src/utils/rateLimiter.js +196 -0
  71. package/src/utils/robotsChecker.js +91 -0
  72. package/src/utils/securityMiddleware.js +416 -0
  73. package/src/utils/sitemapParser.js +678 -0
  74. package/src/utils/ssrfProtection.js +640 -0
  75. package/src/utils/urlNormalizer.js +168 -0
@@ -0,0 +1,2306 @@
1
+ import crypto from "crypto";
2
+ /**
3
+ * ChangeTracker - Enhanced Content Change Detection and Analysis (Phase 2.4)
4
+ * Implements hierarchical content hashing (page → sections → elements)
5
+ * with differential comparison engine, change significance scoring,
6
+ * scheduled monitoring, advanced comparison engine, alert system,
7
+ * and historical analysis capabilities
8
+ */
9
+
10
+ import { createHash } from 'crypto';
11
+ import { z } from 'zod';
12
+ import { EventEmitter } from 'events';
13
+ import { load } from 'cheerio';
14
+ import { diffWords, diffLines, diffChars } from 'diff';
15
+ import * as cron from 'node-cron';
16
+ import fs from 'fs/promises';
17
+ import path from 'path';
18
+ import fetch from 'node-fetch';
19
+
20
+ const ChangeTrackingSchema = z.object({
21
+ url: z.string().url(),
22
+ content: z.string(),
23
+ html: z.string().optional(),
24
+ options: z.object({
25
+ granularity: z.enum(['page', 'section', 'element', 'text']).default('section'),
26
+ trackText: z.boolean().default(true),
27
+ trackStructure: z.boolean().default(true),
28
+ trackAttributes: z.boolean().default(false),
29
+ trackImages: z.boolean().default(false),
30
+ trackLinks: z.boolean().default(true),
31
+ ignoreWhitespace: z.boolean().default(true),
32
+ ignoreCase: z.boolean().default(false),
33
+ customSelectors: z.array(z.string()).optional(),
34
+ excludeSelectors: z.array(z.string()).optional().default([
35
+ 'script', 'style', 'noscript', '.advertisement', '.ad'
36
+ ]),
37
+ significanceThresholds: z.object({
38
+ minor: z.number().min(0).max(1).default(0.1),
39
+ moderate: z.number().min(0).max(1).default(0.3),
40
+ major: z.number().min(0).max(1).default(0.7)
41
+ }).optional()
42
+ }).optional().default({})
43
+ });
44
+
45
+ const ChangeComparisonSchema = z.object({
46
+ baselineUrl: z.string().url(),
47
+ currentUrl: z.string().url(),
48
+ baselineContent: z.string(),
49
+ currentContent: z.string(),
50
+ options: z.object({}).optional()
51
+ });
52
+
53
+ const ChangeSignificance = z.enum(['none', 'minor', 'moderate', 'major', 'critical']);
54
+
55
+ export class ChangeTracker extends EventEmitter {
56
+ constructor(options = {}) {
57
+ super();
58
+
59
+ this.options = {
60
+ hashAlgorithm: 'sha256',
61
+ maxHistoryLength: 100,
62
+ enableRealTimeTracking: true,
63
+ monitoringInterval: 300000, // 5 minutes
64
+ enableChangeSignificanceScoring: true,
65
+ enableStructuralAnalysis: true,
66
+ enableSemanticAnalysis: false,
67
+ contentSimilarityThreshold: 0.8,
68
+ ...options
69
+ };
70
+
71
+ // Content snapshots and hashes
72
+ this.snapshots = new Map();
73
+ this.contentHashes = new Map();
74
+ this.changeHistory = new Map();
75
+ this.structuralHashes = new Map();
76
+
77
+ // Change detection state
78
+ this.activeMonitors = new Map();
79
+ this.lastProcessedTimestamps = new Map();
80
+
81
+ // Content history and snapshots management
82
+ this.contentHistory = new Map();
83
+ this.baselineContent = new Map();
84
+ this.changeNotifications = new Map();
85
+ this.snapshotManager = new Map();
86
+
87
+ // Phase 2.4 Enhanced Features
88
+ this.scheduledMonitors = new Map(); // Cron-based monitoring
89
+ this.monitoringTemplates = new Map(); // Reusable monitoring configurations
90
+ this.alertRules = new Map(); // Custom alert rules and conditions
91
+ this.alertHistory = new Map(); // Alert notification history
92
+ this.trendAnalysis = new Map(); // Pattern recognition data
93
+ this.visualRegression = new Map(); // Visual diff storage
94
+ this.alertThrottling = new Map(); // Alert rate limiting
95
+ this.semanticDiffCache = new Map(); // Semantic analysis cache
96
+ this.monitoringDashboard = {
97
+ status: 'initialized',
98
+ monitors: new Map(),
99
+ alerts: [],
100
+ trends: {}
101
+ };
102
+ // Enhanced Statistics
103
+ this.stats = {
104
+ pagesTracked: 0,
105
+ changesDetected: 0,
106
+ significantChanges: 0,
107
+ structuralChanges: 0,
108
+ contentChanges: 0,
109
+ falsePositives: 0,
110
+ averageChangeScore: 0,
111
+ lastAnalysis: null,
112
+ processingTime: 0,
113
+ // Phase 2.4 additions
114
+ scheduledMonitors: 0,
115
+ alertsSent: 0,
116
+ alertsThrottled: 0,
117
+ semanticAnalyses: 0,
118
+ visualRegression: 0,
119
+ trendPatternsDetected: 0,
120
+ averageAlertResponseTime: 0,
121
+ monitoringUptime: 0
122
+ };
123
+
124
+ // Semantic analysis tools (if enabled)
125
+ this.semanticAnalyzer = null;
126
+
127
+ this.initialize();
128
+ }
129
+
130
+ async initialize() {
131
+ // Initialize semantic analysis if enabled
132
+ if (this.options.enableSemanticAnalysis) {
133
+ await this.initializeSemanticAnalyzer();
134
+ }
135
+
136
+ // Initialize Phase 2.4 components
137
+ await this.initializeEnhancedFeatures();
138
+
139
+ this.emit('initialized');
140
+ }
141
+
142
+ /**
143
+ * Initialize Enhanced Features for Phase 2.4
144
+ */
145
+ async initializeEnhancedFeatures() {
146
+ try {
147
+ // Initialize monitoring dashboard
148
+ this.monitoringDashboard.status = 'initializing';
149
+
150
+ // Load existing monitoring templates
151
+ await this.loadMonitoringTemplates();
152
+
153
+ // Initialize alert system
154
+ await this.initializeAlertSystem();
155
+
156
+ // Set up historical analysis
157
+ await this.initializeHistoricalAnalysis();
158
+
159
+ // Initialize semantic diff engine if enabled
160
+ if (this.options.enableSemanticAnalysis) {
161
+ await this.initializeSemanticDiffEngine();
162
+ }
163
+
164
+ this.monitoringDashboard.status = 'active';
165
+ this.emit('enhancedFeaturesInitialized');
166
+
167
+ } catch (error) {
168
+ this.monitoringDashboard.status = 'error';
169
+ this.emit('error', { operation: 'initializeEnhancedFeatures', error: error.message });
170
+ throw error;
171
+ }
172
+ }
173
+
174
+ /**
175
+ * Load monitoring templates from storage
176
+ */
177
+ async loadMonitoringTemplates() {
178
+ const defaultTemplates = {
179
+ 'news-site': {
180
+ name: 'News Site Monitoring',
181
+ frequency: '*/15 * * * *', // Every 15 minutes
182
+ options: {
183
+ granularity: 'section',
184
+ trackText: true,
185
+ trackStructure: false,
186
+ significanceThresholds: { minor: 0.05, moderate: 0.2, major: 0.5 }
187
+ },
188
+ alertRules: {
189
+ threshold: 'minor',
190
+ methods: ['webhook', 'email'],
191
+ throttle: 300000 // 5 minutes
192
+ }
193
+ },
194
+ 'e-commerce': {
195
+ name: 'E-commerce Site Monitoring',
196
+ frequency: '0 */2 * * *', // Every 2 hours
197
+ options: {
198
+ granularity: 'element',
199
+ trackText: true,
200
+ trackStructure: true,
201
+ trackImages: true,
202
+ customSelectors: ['.price', '.stock-status', '.product-title']
203
+ },
204
+ alertRules: {
205
+ threshold: 'moderate',
206
+ methods: ['webhook', 'slack'],
207
+ throttle: 600000 // 10 minutes
208
+ }
209
+ },
210
+ 'documentation': {
211
+ name: 'Documentation Monitoring',
212
+ frequency: '0 9 * * *', // Daily at 9 AM
213
+ options: {
214
+ granularity: 'section',
215
+ trackText: true,
216
+ trackStructure: true,
217
+ excludeSelectors: ['.last-updated', '.edit-link']
218
+ },
219
+ alertRules: {
220
+ threshold: 'major',
221
+ methods: ['email'],
222
+ throttle: 3600000 // 1 hour
223
+ }
224
+ }
225
+ };
226
+
227
+ for (const [id, template] of Object.entries(defaultTemplates)) {
228
+ this.monitoringTemplates.set(id, template);
229
+ }
230
+ }
231
+
232
+ /**
233
+ * Initialize alert system with default rules
234
+ */
235
+ async initializeAlertSystem() {
236
+ // Default alert rules
237
+ const defaultAlertRules = {
238
+ 'critical-changes': {
239
+ condition: (changeResult) => changeResult.significance === 'critical',
240
+ actions: ['webhook', 'email', 'slack'],
241
+ throttle: 0, // No throttling for critical changes
242
+ priority: 'high'
243
+ },
244
+ 'frequent-changes': {
245
+ condition: (url, history) => {
246
+ const recent = history.filter(h => Date.now() - h.timestamp < 3600000); // Last hour
247
+ return recent.length > 5;
248
+ },
249
+ actions: ['webhook'],
250
+ throttle: 1800000, // 30 minutes
251
+ priority: 'medium'
252
+ },
253
+ 'structural-changes': {
254
+ condition: (changeResult) => changeResult.changeType === 'structural',
255
+ actions: ['webhook', 'email'],
256
+ throttle: 600000, // 10 minutes
257
+ priority: 'medium'
258
+ }
259
+ };
260
+
261
+ for (const [id, rule] of Object.entries(defaultAlertRules)) {
262
+ this.alertRules.set(id, rule);
263
+ }
264
+ }
265
+
266
+ /**
267
+ * Initialize historical analysis capabilities
268
+ */
269
+ async initializeHistoricalAnalysis() {
270
+ // Initialize trend analysis patterns
271
+ this.trendAnalysis.set('patterns', {
272
+ dailyChangePatterns: new Map(),
273
+ weeklyTrends: new Map(),
274
+ contentVelocity: new Map(),
275
+ changeFrequency: new Map()
276
+ });
277
+ }
278
+
279
+ /**
280
+ * Initialize semantic diff engine
281
+ */
282
+ async initializeSemanticDiffEngine() {
283
+ // Initialize semantic analysis components
284
+ this.semanticDiffCache.set('initialized', true);
285
+ this.semanticDiffCache.set('algorithms', {
286
+ textSimilarity: this.calculateTextSimilarity.bind(this),
287
+ structuralSimilarity: this.calculateStructuralSimilarity.bind(this),
288
+ semanticSimilarity: this.calculateSemanticSimilarity.bind(this)
289
+ });
290
+ }
291
+
292
+ /**
293
+ * Create scheduled monitoring with cron-like scheduling
294
+ * @param {string} url - URL to monitor
295
+ * @param {string} schedule - Cron expression
296
+ * @param {Object} options - Monitoring options
297
+ * @returns {Object} - Monitor configuration
298
+ */
299
+ async createScheduledMonitor(url, schedule, options = {}) {
300
+ try {
301
+ // Validate cron expression
302
+ if (!cron.validate(schedule)) {
303
+ throw new Error(`Invalid cron expression: ${schedule}`);
304
+ }
305
+
306
+ const monitorId = `scheduled_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
307
+
308
+ const monitorConfig = {
309
+ id: monitorId,
310
+ url,
311
+ schedule,
312
+ options: {
313
+ granularity: 'section',
314
+ trackText: true,
315
+ trackStructure: true,
316
+ alertRules: {
317
+ threshold: 'moderate',
318
+ methods: ['webhook'],
319
+ throttle: 600000
320
+ },
321
+ ...options
322
+ },
323
+ stats: {
324
+ created: Date.now(),
325
+ executions: 0,
326
+ lastExecution: null,
327
+ changesDetected: 0,
328
+ errors: 0,
329
+ averageExecutionTime: 0
330
+ },
331
+ status: 'active'
332
+ };
333
+
334
+ // Create cron job
335
+ const cronJob = cron.schedule(schedule, async () => {
336
+ await this.executeScheduledMonitor(monitorId);
337
+ }, {
338
+ scheduled: true,
339
+ timezone: 'UTC'
340
+ });
341
+
342
+ monitorConfig.cronJob = cronJob;
343
+
344
+ // Store monitor
345
+ this.scheduledMonitors.set(monitorId, monitorConfig);
346
+ this.monitoringDashboard.monitors.set(monitorId, {
347
+ url,
348
+ schedule,
349
+ status: 'active',
350
+ nextExecution: cronJob.nextDates().toString()
351
+ });
352
+
353
+ this.stats.scheduledMonitors++;
354
+
355
+ this.emit('scheduledMonitorCreated', {
356
+ monitorId,
357
+ url,
358
+ schedule,
359
+ nextExecution: cronJob.nextDates().toString()
360
+ });
361
+
362
+ return {
363
+ success: true,
364
+ monitorId,
365
+ url,
366
+ schedule,
367
+ nextExecution: cronJob.nextDates().toString(),
368
+ options: monitorConfig.options
369
+ };
370
+
371
+ } catch (error) {
372
+ this.emit('error', { operation: 'createScheduledMonitor', url, error: error.message });
373
+ throw new Error(`Failed to create scheduled monitor: ${error.message}`);
374
+ }
375
+ }
376
+
377
+ /**
378
+ * Execute scheduled monitor check
379
+ * @param {string} monitorId - Monitor ID
380
+ */
381
+ async executeScheduledMonitor(monitorId) {
382
+ const startTime = Date.now();
383
+
384
+ try {
385
+ const monitor = this.scheduledMonitors.get(monitorId);
386
+ if (!monitor || monitor.status !== 'active') {
387
+ return;
388
+ }
389
+
390
+ monitor.stats.executions++;
391
+ monitor.stats.lastExecution = Date.now();
392
+
393
+ // Fetch current content
394
+ const response = await fetch(monitor.url, {
395
+ headers: {
396
+ 'User-Agent': 'MCP-WebScraper-ChangeTracker/3.0-Enhanced',
397
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
398
+ },
399
+ timeout: 30000
400
+ });
401
+
402
+ if (!response.ok) {
403
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
404
+ }
405
+
406
+ const currentContent = await response.text();
407
+
408
+ // Perform enhanced comparison
409
+ const comparisonResult = await this.performEnhancedComparison(
410
+ monitor.url,
411
+ currentContent,
412
+ monitor.options
413
+ );
414
+
415
+ // Update execution time stats
416
+ const executionTime = Date.now() - startTime;
417
+ monitor.stats.averageExecutionTime =
418
+ (monitor.stats.averageExecutionTime * (monitor.stats.executions - 1) + executionTime) /
419
+ monitor.stats.executions;
420
+
421
+ // Process change result
422
+ if (comparisonResult.hasChanges) {
423
+ monitor.stats.changesDetected++;
424
+
425
+ // Update trend analysis
426
+ await this.updateTrendAnalysis(monitor.url, comparisonResult);
427
+
428
+ // Check alert rules and send notifications
429
+ await this.processAlertRules(monitor.url, comparisonResult, monitor.options.alertRules);
430
+ }
431
+
432
+ this.emit('scheduledMonitorExecuted', {
433
+ monitorId,
434
+ url: monitor.url,
435
+ hasChanges: comparisonResult.hasChanges,
436
+ significance: comparisonResult.significance,
437
+ executionTime
438
+ });
439
+
440
+ } catch (error) {
441
+ const monitor = this.scheduledMonitors.get(monitorId);
442
+ if (monitor) {
443
+ monitor.stats.errors++;
444
+ }
445
+
446
+ this.emit('scheduledMonitorError', {
447
+ monitorId,
448
+ error: error.message,
449
+ timestamp: Date.now()
450
+ });
451
+ }\n }\n \n /**\n * Perform enhanced comparison with semantic analysis\n * @param {string} url - URL being compared\n * @param {string} currentContent - Current content\n * @param {Object} options - Comparison options\n * @returns {Object} - Enhanced comparison results\n */\n async performEnhancedComparison(url, currentContent, options = {}) {\n try {\n // Get standard comparison\n const standardComparison = await this.compareWithBaseline(url, currentContent, options);\n \n if (!standardComparison.hasChanges) {\n return standardComparison;\n }\n \n // Enhance with semantic analysis\n const semanticAnalysis = await this.performSemanticAnalysis(\n url, \n currentContent, \n standardComparison\n );\n \n // Enhance with visual regression detection\n const visualAnalysis = await this.performVisualRegressionAnalysis(\n url,\n currentContent,\n options\n );\n \n // Enhance with structured data analysis\n const structuredAnalysis = await this.performStructuredDataAnalysis(\n url,\n currentContent,\n standardComparison\n );\n \n // Calculate enhanced significance score\n const enhancedSignificance = await this.calculateEnhancedSignificance(\n standardComparison,\n semanticAnalysis,\n visualAnalysis,\n structuredAnalysis\n );\n \n return {\n ...standardComparison,\n enhancedFeatures: {\n semanticAnalysis,\n visualAnalysis,\n structuredAnalysis,\n enhancedSignificance\n },\n significance: enhancedSignificance,\n analysisType: 'enhanced'\n };\n \n } catch (error) {\n this.emit('error', { operation: 'performEnhancedComparison', url, error: error.message });\n // Fall back to standard comparison\n return await this.compareWithBaseline(url, currentContent, options);\n }\n }\n \n /**\n * Perform semantic analysis of changes\n * @param {string} url - URL\n * @param {string} currentContent - Current content\n * @param {Object} standardComparison - Standard comparison results\n * @returns {Object} - Semantic analysis results\n */\n async performSemanticAnalysis(url, currentContent, standardComparison) {\n const analysis = {\n textualSimilarity: 0,\n conceptualChanges: [],\n sentimentChanges: [],\n topicShifts: [],\n keywordChanges: [],\n confidenceScore: 0\n };\n \n try {\n // Get baseline content\n const baseline = this.getLatestBaseline(url);\n if (!baseline) {\n return analysis;\n }\n \n // Extract text content from both versions\n const $ = load(currentContent);\n const currentText = $.text().replace(/\\s+/g, ' ').trim();\n \n const $baseline = load(baseline.analysis.originalContent);\n const baselineText = $baseline.text().replace(/\\s+/g, ' ').trim();\n \n // Calculate textual similarity using advanced algorithms\n analysis.textualSimilarity = this.calculateTextSimilarity(baselineText, currentText);\n \n // Detect keyword changes\n analysis.keywordChanges = this.detectKeywordChanges(baselineText, currentText);\n \n // Simple topic shift detection\n analysis.topicShifts = this.detectTopicShifts(baselineText, currentText);\n \n // Calculate confidence score\n analysis.confidenceScore = this.calculateSemanticConfidence(analysis);\n \n this.stats.semanticAnalyses++;\n \n return analysis;\n \n } catch (error) {\n this.emit('error', { operation: 'performSemanticAnalysis', url, error: error.message });\n return analysis;\n }\n }\n \n /**\n * Perform visual regression analysis\n * @param {string} url - URL\n * @param {string} currentContent - Current content\n * @param {Object} options - Analysis options\n * @returns {Object} - Visual analysis results\n */\n async performVisualRegressionAnalysis(url, currentContent, options = {}) {\n const analysis = {\n layoutChanges: [],\n cssChanges: [],\n imageChanges: [],\n fontChanges: [],\n colorChanges: [],\n hasVisualChanges: false\n };\n \n try {\n const $ = load(currentContent);\n const baseline = this.getLatestBaseline(url);\n \n if (!baseline) {\n return analysis;\n }\n \n const $baseline = load(baseline.analysis.originalContent);\n \n // Detect layout changes\n analysis.layoutChanges = this.detectLayoutChanges($baseline, $);\n \n // Detect CSS changes\n analysis.cssChanges = this.detectCSSChanges($baseline, $);\n \n // Detect image changes\n analysis.imageChanges = this.detectImageChanges($baseline, $);\n \n // Determine if there are visual changes\n analysis.hasVisualChanges = \n analysis.layoutChanges.length > 0 ||\n analysis.cssChanges.length > 0 ||\n analysis.imageChanges.length > 0;\n \n if (analysis.hasVisualChanges) {\n this.stats.visualRegression++;\n }\n \n return analysis;\n \n } catch (error) {\n this.emit('error', { operation: 'performVisualRegressionAnalysis', url, error: error.message });\n return analysis;\n }\n }\n \n /**\n * Perform structured data analysis\n * @param {string} url - URL\n * @param {string} currentContent - Current content\n * @param {Object} standardComparison - Standard comparison results\n * @returns {Object} - Structured data analysis\n */\n async performStructuredDataAnalysis(url, currentContent, standardComparison) {\n const analysis = {\n schemaChanges: [],\n dataFieldChanges: [],\n validationChanges: [],\n metadataChanges: [],\n hasStructuredChanges: false\n };\n \n try {\n const $ = load(currentContent);\n const baseline = this.getLatestBaseline(url);\n \n if (!baseline) {\n return analysis;\n }\n \n // Extract structured data (JSON-LD, microdata, etc.)\n const currentStructuredData = this.extractStructuredData($);\n const baselineStructuredData = this.extractStructuredData(load(baseline.analysis.originalContent));\n \n // Compare structured data\n analysis.schemaChanges = this.compareStructuredData(baselineStructuredData, currentStructuredData);\n \n // Detect metadata changes\n analysis.metadataChanges = this.compareMetadata(\n baseline.analysis.metadata,\n standardComparison.details.current?.metadata || {}\n );\n \n analysis.hasStructuredChanges = \n analysis.schemaChanges.length > 0 ||\n analysis.metadataChanges.length > 0;\n \n return analysis;\n \n } catch (error) {\n this.emit('error', { operation: 'performStructuredDataAnalysis', url, error: error.message });\n return analysis;\n }\n }\n \n /**\n * Update trend analysis with new change data\n * @param {string} url - URL\n * @param {Object} changeResult - Change analysis results\n */\n async updateTrendAnalysis(url, changeResult) {\n try {\n const patterns = this.trendAnalysis.get('patterns');\n const now = new Date();\n const dayKey = now.toISOString().slice(0, 10); // YYYY-MM-DD\n const hourKey = now.toISOString().slice(0, 13); // YYYY-MM-DDTHH\n \n // Update daily patterns\n if (!patterns.dailyChangePatterns.has(url)) {\n patterns.dailyChangePatterns.set(url, new Map());\n }\n \n const urlDailyPatterns = patterns.dailyChangePatterns.get(url);\n if (!urlDailyPatterns.has(dayKey)) {\n urlDailyPatterns.set(dayKey, {\n changes: 0,\n significance: [],\n types: []\n });\n }\n \n const dayData = urlDailyPatterns.get(dayKey);\n dayData.changes++;\n dayData.significance.push(changeResult.significance);\n dayData.types.push(changeResult.changeType);\n \n // Update change frequency\n if (!patterns.changeFrequency.has(url)) {\n patterns.changeFrequency.set(url, []);\n }\n \n patterns.changeFrequency.get(url).push({\n timestamp: Date.now(),\n significance: changeResult.significance,\n type: changeResult.changeType\n });\n \n // Keep only last 1000 entries per URL\n const frequency = patterns.changeFrequency.get(url);\n if (frequency.length > 1000) {\n frequency.splice(0, frequency.length - 1000);\n }\n \n // Detect patterns\n await this.detectChangePatterns(url, patterns);\n \n } catch (error) {\n this.emit('error', { operation: 'updateTrendAnalysis', url, error: error.message });\n }\n }\n \n /**\n * Process alert rules and send notifications\n * @param {string} url - URL\n * @param {Object} changeResult - Change results\n * @param {Object} alertRules - Alert configuration\n */\n async processAlertRules(url, changeResult, alertRules = {}) {\n try {\n const alertsToSend = [];\n \n // Check each alert rule\n for (const [ruleId, rule] of this.alertRules.entries()) {\n let shouldTrigger = false;\n \n if (typeof rule.condition === 'function') {\n try {\n const history = this.getChangeHistory(url, 100);\n shouldTrigger = rule.condition(changeResult, history);\n } catch (error) {\n this.emit('error', { \n operation: 'evaluateAlertRule', \n ruleId, \n url, \n error: error.message \n });\n continue;\n }\n }\n \n if (shouldTrigger) {\n // Check throttling\n const throttleKey = `${url}_${ruleId}`;\n const lastAlert = this.alertThrottling.get(throttleKey);\n \n if (lastAlert && Date.now() - lastAlert < rule.throttle) {\n this.stats.alertsThrottled++;\n continue;\n }\n \n alertsToSend.push({\n ruleId,\n rule,\n url,\n changeResult,\n timestamp: Date.now()\n });\n \n // Update throttling\n this.alertThrottling.set(throttleKey, Date.now());\n }\n }\n \n // Send alerts\n for (const alert of alertsToSend) {\n await this.sendAlert(alert);\n }\n \n } catch (error) {\n this.emit('error', { operation: 'processAlertRules', url, error: error.message });\n }\n }\n \n /**\n * Send alert notification\n * @param {Object} alert - Alert configuration\n */\n async sendAlert(alert) {\n const startTime = Date.now();\n \n try {\n const alertData = {\n id: `alert_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,\n ruleId: alert.ruleId,\n url: alert.url,\n timestamp: alert.timestamp,\n priority: alert.rule.priority,\n changeResult: {\n significance: alert.changeResult.significance,\n changeType: alert.changeResult.changeType,\n summary: alert.changeResult.summary\n }\n };\n \n // Send to each configured method\n const promises = alert.rule.actions.map(async (action) => {\n try {\n await this.sendNotificationByMethod(action, alertData);\n this.emit('alertSent', { action, alertId: alertData.id, url: alert.url });\n } catch (error) {\n this.emit('alertError', { \n action, \n alertId: alertData.id, \n url: alert.url, \n error: error.message \n });\n }\n });\n \n await Promise.allSettled(promises);\n \n // Store alert in history\n if (!this.alertHistory.has(alert.url)) {\n this.alertHistory.set(alert.url, []);\n }\n \n this.alertHistory.get(alert.url).unshift(alertData);\n \n // Keep only last 100 alerts per URL\n const history = this.alertHistory.get(alert.url);\n if (history.length > 100) {\n history.splice(100);\n }\n \n // Update stats\n this.stats.alertsSent++;\n const responseTime = Date.now() - startTime;\n this.stats.averageAlertResponseTime = \n (this.stats.averageAlertResponseTime * (this.stats.alertsSent - 1) + responseTime) / \n this.stats.alertsSent;\n \n this.emit('alertProcessed', {\n alertId: alertData.id,\n url: alert.url,\n responseTime\n });\n \n } catch (error) {\n this.emit('error', { operation: 'sendAlert', url: alert.url, error: error.message });\n }\n }\n \n /**\n * Send notification by specific method\n * @param {string} method - Notification method\n * @param {Object} alertData - Alert data\n */\n async sendNotificationByMethod(method, alertData) {\n switch (method) {\n case 'webhook':\n await this.sendWebhookAlert(alertData);\n break;\n case 'email':\n await this.sendEmailAlert(alertData);\n break;\n case 'slack':\n await this.sendSlackAlert(alertData);\n break;\n default:\n throw new Error(`Unknown notification method: ${method}`);\n }\n }\n \n /**\n * Generate trend analysis report\n * @param {string} url - URL (optional, for specific URL analysis)\n * @returns {Object} - Trend analysis report\n */\n async generateTrendAnalysisReport(url = null) {\n try {\n const report = {\n timestamp: Date.now(),\n scope: url ? 'url-specific' : 'global',\n url,\n patterns: {},\n insights: [],\n recommendations: []\n };\n \n const patterns = this.trendAnalysis.get('patterns');\n \n if (url) {\n // URL-specific analysis\n report.patterns = await this.analyzeUrlPatterns(url, patterns);\n } else {\n // Global analysis\n report.patterns = await this.analyzeGlobalPatterns(patterns);\n }\n \n // Generate insights\n report.insights = this.generateTrendInsights(report.patterns);\n \n // Generate recommendations\n report.recommendations = this.generateTrendRecommendations(report.patterns, report.insights);\n \n return report;\n \n } catch (error) {\n this.emit('error', { operation: 'generateTrendAnalysisReport', url, error: error.message });\n throw error;\n }\n }\n \n /**\n * Export historical data\n * @param {Object} options - Export options\n * @returns {Object} - Exported data\n */\n async exportHistoricalData(options = {}) {\n const {\n format = 'json',\n url = null,\n startTime = null,\n endTime = null,\n includeContent = false,\n includeSnapshots = false\n } = options;\n \n try {\n const exportData = {\n metadata: {\n exportTime: Date.now(),\n format,\n scope: url ? 'url-specific' : 'global',\n url,\n timeRange: { startTime, endTime },\n options\n },\n changeHistory: {},\n snapshots: {},\n alertHistory: {},\n trendAnalysis: {},\n statistics: this.getEnhancedStats()\n };\n \n // Export change history\n const urls = url ? [url] : Array.from(this.changeHistory.keys());\n \n for (const targetUrl of urls) {\n let history = this.getChangeHistory(targetUrl, 10000);\n \n // Apply time filters\n if (startTime || endTime) {\n history = history.filter(entry => {\n if (startTime && entry.timestamp < startTime) return false;\n if (endTime && entry.timestamp > endTime) return false;\n return true;\n });\n }\n \n // Remove content if not requested\n if (!includeContent) {\n history = history.map(entry => {\n const { details, ...rest } = entry;\n return {\n ...rest,\n details: details ? {\n similarity: details.similarity,\n significance: details.significance\n } : undefined\n };\n });\n }\n \n exportData.changeHistory[targetUrl] = history;\n \n // Export alert history\n if (this.alertHistory.has(targetUrl)) {\n exportData.alertHistory[targetUrl] = this.alertHistory.get(targetUrl);\n }\n }\n \n // Export trend analysis\n const patterns = this.trendAnalysis.get('patterns');\n if (patterns) {\n exportData.trendAnalysis = {\n dailyPatterns: Object.fromEntries(patterns.dailyChangePatterns),\n changeFrequency: Object.fromEntries(patterns.changeFrequency)\n };\n }\n \n // Format output\n if (format === 'csv') {\n return this.convertToCSV(exportData);\n }\n \n return exportData;\n \n } catch (error) {\n this.emit('error', { operation: 'exportHistoricalData', error: error.message });\n throw error;\n }\n }\n \n /**\n * Get monitoring dashboard status\n * @returns {Object} - Dashboard data\n */\n getMonitoringDashboard() {\n return {\n status: this.monitoringDashboard.status,\n monitors: Array.from(this.monitoringDashboard.monitors.entries()).map(([id, config]) => ({\n id,\n ...config\n })),\n recentAlerts: this.monitoringDashboard.alerts.slice(-10),\n trends: this.monitoringDashboard.trends,\n statistics: this.getEnhancedStats(),\n timestamp: Date.now()\n };\n }\n \n /**\n * Get enhanced statistics\n * @returns {Object} - Enhanced statistics\n */\n getEnhancedStats() {\n return {\n ...this.stats,\n activeScheduledMonitors: this.scheduledMonitors.size,\n alertRules: this.alertRules.size,\n monitoringTemplates: this.monitoringTemplates.size,\n throttledAlerts: this.alertThrottling.size,\n trendPatterns: this.trendAnalysis.has('patterns') ? \n this.trendAnalysis.get('patterns').dailyChangePatterns.size : 0\n };\n }\n \n /**\n * Create baseline snapshot for change tracking
452
+ * @param {string} url - URL to track
453
+ * @param {string} content - Content to establish as baseline
454
+ * @param {Object} options - Tracking options
455
+ * @returns {Object} - Baseline snapshot information
456
+ */
457
+ async createBaseline(url, content, options = {}) {
458
+ const startTime = Date.now();
459
+
460
+ try {
461
+ const validated = ChangeTrackingSchema.parse({ url, content, options });
462
+ const { granularity, trackText, trackStructure } = validated.options;
463
+
464
+ // Generate hierarchical content hashes
465
+ const contentAnalysis = await this.analyzeContent(content, validated.options);
466
+
467
+ const baseline = {
468
+ url,
469
+ timestamp: Date.now(),
470
+ contentLength: content.length,
471
+ granularity,
472
+ analysis: contentAnalysis,
473
+ options: validated.options,
474
+ version: 1
475
+ };
476
+
477
+ // Store baseline
478
+ this.snapshots.set(url, [baseline]);
479
+ this.contentHashes.set(url, contentAnalysis.hashes);
480
+ this.changeHistory.set(url, []);
481
+ this.lastProcessedTimestamps.set(url, Date.now());
482
+
483
+ this.stats.pagesTracked++;
484
+ this.stats.processingTime += Date.now() - startTime;
485
+
486
+ this.emit('baselineCreated', {
487
+ url,
488
+ baseline,
489
+ processingTime: Date.now() - startTime
490
+ });
491
+
492
+ return {
493
+ success: true,
494
+ url,
495
+ version: 1,
496
+ contentHash: contentAnalysis.hashes.page,
497
+ sections: Object.keys(contentAnalysis.hashes.sections).length,
498
+ elements: Object.keys(contentAnalysis.hashes.elements).length,
499
+ createdAt: baseline.timestamp
500
+ };
501
+
502
+ } catch (error) {
503
+ this.emit('error', { operation: 'createBaseline', url, error: error.message });
504
+ throw new Error(`Failed to create baseline for ${url}: ${error.message}`);
505
+ }
506
+ }
507
+
508
+ /**
509
+ * Compare current content against baseline
510
+ * @param {string} url - URL to compare
511
+ * @param {string} currentContent - Current content
512
+ * @param {Object} options - Comparison options
513
+ * @returns {Object} - Change analysis results
514
+ */
515
+ async compareWithBaseline(url, currentContent, options = {}) {
516
+ const startTime = Date.now();
517
+
518
+ try {
519
+ if (!this.snapshots.has(url)) {
520
+ throw new Error(`No baseline found for URL: ${url}`);
521
+ }
522
+
523
+ const snapshots = this.snapshots.get(url);
524
+ const baseline = snapshots[snapshots.length - 1]; // Get latest baseline
525
+
526
+ const validated = ChangeComparisonSchema.parse({
527
+ baselineUrl: url,
528
+ currentUrl: url,
529
+ baselineContent: baseline.analysis.originalContent || '',
530
+ currentContent,
531
+ options
532
+ });
533
+
534
+ // Analyze current content
535
+ const currentAnalysis = await this.analyzeContent(currentContent, baseline.options);
536
+
537
+ // Perform comprehensive change detection
538
+ const changeAnalysis = await this.detectChanges(
539
+ baseline.analysis,
540
+ currentAnalysis,
541
+ baseline.options
542
+ );
543
+
544
+ // Calculate change significance
545
+ const significance = await this.calculateChangeSignificance(changeAnalysis, baseline.options);
546
+
547
+ // Create change record
548
+ const changeRecord = {
549
+ url,
550
+ timestamp: Date.now(),
551
+ baselineVersion: baseline.version,
552
+ changeType: this.classifyChangeType(changeAnalysis),
553
+ significance,
554
+ details: changeAnalysis,
555
+ metrics: {
556
+ contentSimilarity: changeAnalysis.similarity,
557
+ structuralSimilarity: changeAnalysis.structuralSimilarity,
558
+ addedElements: changeAnalysis.addedElements?.length || 0,
559
+ removedElements: changeAnalysis.removedElements?.length || 0,
560
+ modifiedElements: changeAnalysis.modifiedElements?.length || 0
561
+ },
562
+ processingTime: 0
563
+ };
564
+
565
+ changeRecord.processingTime = Date.now() - startTime;
566
+
567
+ // Store change record
568
+ const changeHistory = this.changeHistory.get(url);
569
+ changeHistory.push(changeRecord);
570
+
571
+ // Update statistics
572
+ this.updateStats(changeRecord);
573
+
574
+ // Update content hashes if significant change
575
+ if (significance !== 'none') {
576
+ this.contentHashes.set(url, currentAnalysis.hashes);
577
+ }
578
+
579
+ this.emit('changeDetected', changeRecord);
580
+
581
+ return {
582
+ hasChanges: significance !== 'none',
583
+ significance,
584
+ changeType: changeRecord.changeType,
585
+ summary: this.generateChangeSummary(changeAnalysis),
586
+ details: changeAnalysis,
587
+ metrics: changeRecord.metrics,
588
+ recommendations: this.generateChangeRecommendations(changeRecord)
589
+ };
590
+
591
+ } catch (error) {
592
+ this.emit('error', { operation: 'compareWithBaseline', url, error: error.message });
593
+ throw new Error(`Failed to compare content for ${url}: ${error.message}`);
594
+ }
595
+ }
596
+
597
+ /**
598
+ * Analyze content structure and create hierarchical hashes
599
+ * @param {string} content - Content to analyze
600
+ * @param {Object} options - Analysis options
601
+ * @returns {Object} - Content analysis results
602
+ */
603
+ async analyzeContent(content, options = {}) {
604
+ const analysis = {
605
+ originalContent: content,
606
+ hashes: {
607
+ page: this.hashContent(content),
608
+ sections: {},
609
+ elements: {},
610
+ text: {}
611
+ },
612
+ structure: {},
613
+ metadata: {},
614
+ statistics: {}
615
+ };
616
+
617
+ try {
618
+ // Parse HTML if available
619
+ const $ = load(content);
620
+
621
+ // Remove excluded elements
622
+ options.excludeSelectors?.forEach(selector => {
623
+ $(selector).remove();
624
+ });
625
+
626
+ // Analyze at different granularities
627
+ switch (options.granularity) {
628
+ case 'element':
629
+ await this.analyzeElementLevel($, analysis, options);
630
+ break;
631
+ case 'section':
632
+ await this.analyzeSectionLevel($, analysis, options);
633
+ break;
634
+ case 'text':
635
+ await this.analyzeTextLevel($, analysis, options);
636
+ break;
637
+ default:
638
+ await this.analyzePageLevel($, analysis, options);
639
+ }
640
+
641
+ // Extract structural information
642
+ if (options.trackStructure) {
643
+ analysis.structure = this.extractStructure($, options);
644
+ }
645
+
646
+ // Extract metadata
647
+ analysis.metadata = this.extractMetadata($, options);
648
+
649
+ // Calculate statistics
650
+ analysis.statistics = this.calculateContentStatistics(content, $);
651
+
652
+ } catch (error) {
653
+ // Fallback to plain text analysis
654
+ analysis.hashes.text.plain = this.hashContent(content);
655
+ analysis.statistics = {
656
+ contentLength: content.length,
657
+ wordCount: content.split(/\s+/).length,
658
+ error: error.message
659
+ };
660
+ }
661
+
662
+ return analysis;
663
+ }
664
+
665
+ /**
666
+ * Detect changes between two content analyses
667
+ * @param {Object} baseline - Baseline content analysis
668
+ * @param {Object} current - Current content analysis
669
+ * @param {Object} options - Detection options
670
+ * @returns {Object} - Change detection results
671
+ */
672
+ async detectChanges(baseline, current, options = {}) {
673
+ const changes = {
674
+ similarity: 0,
675
+ structuralSimilarity: 0,
676
+ addedElements: [],
677
+ removedElements: [],
678
+ modifiedElements: [],
679
+ textChanges: [],
680
+ structuralChanges: [],
681
+ attributeChanges: [],
682
+ imageChanges: [],
683
+ linkChanges: []
684
+ };
685
+
686
+ // Calculate overall content similarity
687
+ changes.similarity = this.calculateSimilarity(
688
+ baseline.hashes.page,
689
+ current.hashes.page
690
+ );
691
+
692
+ // Detect structural changes
693
+ if (options.trackStructure) {
694
+ changes.structuralChanges = await this.detectStructuralChanges(
695
+ baseline.structure,
696
+ current.structure
697
+ );
698
+
699
+ changes.structuralSimilarity = this.calculateStructuralSimilarity(
700
+ baseline.structure,
701
+ current.structure
702
+ );
703
+ }
704
+
705
+ // Detect section-level changes
706
+ const sectionChanges = this.detectHashChanges(
707
+ baseline.hashes.sections,
708
+ current.hashes.sections
709
+ );
710
+
711
+ changes.addedElements.push(...sectionChanges.added);
712
+ changes.removedElements.push(...sectionChanges.removed);
713
+ changes.modifiedElements.push(...sectionChanges.modified);
714
+
715
+ // Detect element-level changes
716
+ if (baseline.hashes.elements && current.hashes.elements) {
717
+ const elementChanges = this.detectHashChanges(
718
+ baseline.hashes.elements,
719
+ current.hashes.elements
720
+ );
721
+
722
+ changes.addedElements.push(...elementChanges.added);
723
+ changes.removedElements.push(...elementChanges.removed);
724
+ changes.modifiedElements.push(...elementChanges.modified);
725
+ }
726
+
727
+ // Detect text changes
728
+ if (options.trackText) {
729
+ changes.textChanges = await this.detectTextChanges(
730
+ baseline.originalContent,
731
+ current.originalContent,
732
+ options
733
+ );
734
+ }
735
+
736
+ // Detect link changes
737
+ if (options.trackLinks) {
738
+ changes.linkChanges = this.detectLinkChanges(
739
+ baseline.metadata.links || [],
740
+ current.metadata.links || []
741
+ );
742
+ }
743
+
744
+ // Detect image changes
745
+ if (options.trackImages) {
746
+ changes.imageChanges = this.detectImageChanges(
747
+ baseline.metadata.images || [],
748
+ current.metadata.images || []
749
+ );
750
+ }
751
+
752
+ return changes;
753
+ }
754
+
755
+ /**
756
+ * Calculate change significance score
757
+ * @param {Object} changeAnalysis - Change analysis results
758
+ * @param {Object} options - Scoring options
759
+ * @returns {string} - Significance level
760
+ */
761
+ async calculateChangeSignificance(changeAnalysis, options = {}) {
762
+ const thresholds = options.significanceThresholds || {
763
+ minor: 0.1,
764
+ moderate: 0.3,
765
+ major: 0.7
766
+ };
767
+
768
+ let significanceScore = 0;
769
+ const weights = {
770
+ similarity: 0.3,
771
+ structural: 0.2,
772
+ additions: 0.15,
773
+ removals: 0.15,
774
+ modifications: 0.1,
775
+ textChanges: 0.1
776
+ };
777
+
778
+ // Content similarity impact (inverted - less similarity = more significant)
779
+ significanceScore += (1 - changeAnalysis.similarity) * weights.similarity;
780
+
781
+ // Structural changes impact
782
+ if (changeAnalysis.structuralChanges.length > 0) {
783
+ significanceScore += Math.min(changeAnalysis.structuralChanges.length * 0.1, 1) * weights.structural;
784
+ }
785
+
786
+ // Element changes impact
787
+ const totalElements = changeAnalysis.addedElements.length +
788
+ changeAnalysis.removedElements.length +
789
+ changeAnalysis.modifiedElements.length;
790
+
791
+ significanceScore += Math.min(totalElements * 0.05, 1) *
792
+ (weights.additions + weights.removals + weights.modifications);
793
+
794
+ // Text changes impact
795
+ if (changeAnalysis.textChanges.length > 0) {
796
+ const textChangeRatio = changeAnalysis.textChanges.reduce(
797
+ (sum, change) => sum + (change.added?.length || 0) + (change.removed?.length || 0),
798
+ 0
799
+ ) / 1000; // Normalize by character count
800
+
801
+ significanceScore += Math.min(textChangeRatio, 1) * weights.textChanges;
802
+ }
803
+
804
+ // Determine significance level
805
+ if (significanceScore < thresholds.minor) {
806
+ return 'none';
807
+ } else if (significanceScore < thresholds.moderate) {
808
+ return 'minor';
809
+ } else if (significanceScore < thresholds.major) {
810
+ return 'moderate';
811
+ } else if (significanceScore < 0.9) {
812
+ return 'major';
813
+ } else {
814
+ return 'critical';
815
+ }
816
+ }
817
+
818
+ // Content Analysis Methods
819
+
820
+ async analyzePageLevel($, analysis, options) {
821
+ const pageContent = $.html();
822
+ analysis.hashes.page = this.hashContent(pageContent);
823
+
824
+ if (options.trackText) {
825
+ const textContent = $.text();
826
+ analysis.hashes.text.page = this.hashContent(textContent);
827
+ }
828
+ }
829
+
830
+ async analyzeSectionLevel($, analysis, options) {
831
+ const sections = ['header', 'nav', 'main', 'article', 'section', 'aside', 'footer'];
832
+
833
+ sections.forEach(tag => {
834
+ $(tag).each((index, element) => {
835
+ const sectionKey = `${tag}_${index}`;
836
+ const sectionContent = $(element).html() || '';
837
+ analysis.hashes.sections[sectionKey] = this.hashContent(sectionContent);
838
+
839
+ if (options.trackText) {
840
+ const textContent = $(element).text() || '';
841
+ analysis.hashes.text[sectionKey] = this.hashContent(textContent);
842
+ }
843
+ });
844
+ });
845
+
846
+ // Handle custom selectors
847
+ if (options.customSelectors) {
848
+ options.customSelectors.forEach((selector, index) => {
849
+ $(selector).each((elemIndex, element) => {
850
+ const key = `custom_${index}_${elemIndex}`;
851
+ const content = $(element).html() || '';
852
+ analysis.hashes.sections[key] = this.hashContent(content);
853
+ });
854
+ });
855
+ }
856
+ }
857
+
858
+ async analyzeElementLevel($, analysis, options) {
859
+ // Analyze common important elements
860
+ const importantElements = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'span', 'a'];
861
+
862
+ importantElements.forEach(tag => {
863
+ $(tag).each((index, element) => {
864
+ const elementKey = `${tag}_${index}`;
865
+ const elementContent = $(element).html() || '';
866
+ analysis.hashes.elements[elementKey] = this.hashContent(elementContent);
867
+
868
+ if (options.trackAttributes) {
869
+ const attributes = element.attribs || {};
870
+ analysis.hashes.elements[`${elementKey}_attr`] = this.hashContent(JSON.stringify(attributes));
871
+ }
872
+ });
873
+ });
874
+ }
875
+
876
+ async analyzeTextLevel($, analysis, options) {
877
+ const textNodes = [];
878
+
879
+ // Extract all text nodes
880
+ $('*').contents().filter(function() {
881
+ return this.type === 'text' && $(this).text().trim();
882
+ }).each((index, node) => {
883
+ const text = $(node).text().trim();
884
+ if (text) {
885
+ textNodes.push(text);
886
+ analysis.hashes.text[`text_${index}`] = this.hashContent(text);
887
+ }
888
+ });
889
+ }
890
+
891
+ extractStructure($, options) {
892
+ const structure = {
893
+ elements: [],
894
+ hierarchy: {},
895
+ semanticStructure: {}
896
+ };
897
+
898
+ // Extract DOM hierarchy
899
+ $('*').each((index, element) => {
900
+ const tagName = element.name;
901
+ const depth = $(element).parents().length;
902
+ const hasChildren = $(element).children().length > 0;
903
+
904
+ structure.elements.push({
905
+ tag: tagName,
906
+ index,
907
+ depth,
908
+ hasChildren,
909
+ classes: element.attribs?.class?.split(' ') || [],
910
+ id: element.attribs?.id
911
+ });
912
+ });
913
+
914
+ // Extract semantic structure
915
+ const semanticTags = ['header', 'nav', 'main', 'article', 'section', 'aside', 'footer'];
916
+ semanticTags.forEach(tag => {
917
+ structure.semanticStructure[tag] = $(tag).length;
918
+ });
919
+
920
+ return structure;
921
+ }
922
+
923
+ extractMetadata($, options) {
924
+ const metadata = {
925
+ title: $('title').text() || '',
926
+ headings: [],
927
+ links: [],
928
+ images: [],
929
+ scripts: [],
930
+ forms: []
931
+ };
932
+
933
+ // Extract headings
934
+ $('h1, h2, h3, h4, h5, h6').each((index, element) => {
935
+ metadata.headings.push({
936
+ tag: element.name,
937
+ text: $(element).text().trim(),
938
+ level: parseInt(element.name.replace('h', ''))
939
+ });
940
+ });
941
+
942
+ // Extract links
943
+ if (options.trackLinks) {
944
+ $('a[href]').each((index, element) => {
945
+ metadata.links.push({
946
+ href: $(element).attr('href'),
947
+ text: $(element).text().trim(),
948
+ external: this.isExternalLink($(element).attr('href'))
949
+ });
950
+ });
951
+ }
952
+
953
+ // Extract images
954
+ if (options.trackImages) {
955
+ $('img[src]').each((index, element) => {
956
+ metadata.images.push({
957
+ src: $(element).attr('src'),
958
+ alt: $(element).attr('alt') || '',
959
+ title: $(element).attr('title') || ''
960
+ });
961
+ });
962
+ }
963
+
964
+ return metadata;
965
+ }
966
+
967
+ calculateContentStatistics(content, $) {
968
+ return {
969
+ contentLength: content.length,
970
+ htmlLength: $.html().length,
971
+ textLength: $.text().length,
972
+ wordCount: $.text().split(/\s+/).filter(word => word.length > 0).length,
973
+ elementCount: $('*').length,
974
+ linkCount: $('a').length,
975
+ imageCount: $('img').length,
976
+ scriptCount: $('script').length
977
+ };
978
+ }
979
+
980
+ // Change Detection Methods
981
+
982
+ detectHashChanges(baselineHashes, currentHashes) {
983
+ const changes = {
984
+ added: [],
985
+ removed: [],
986
+ modified: []
987
+ };
988
+
989
+ const baselineKeys = new Set(Object.keys(baselineHashes));
990
+ const currentKeys = new Set(Object.keys(currentHashes));
991
+
992
+ // Find added elements
993
+ for (const key of currentKeys) {
994
+ if (!baselineKeys.has(key)) {
995
+ changes.added.push(key);
996
+ }
997
+ }
998
+
999
+ // Find removed elements
1000
+ for (const key of baselineKeys) {
1001
+ if (!currentKeys.has(key)) {
1002
+ changes.removed.push(key);
1003
+ }
1004
+ }
1005
+
1006
+ // Find modified elements
1007
+ for (const key of baselineKeys) {
1008
+ if (currentKeys.has(key) && baselineHashes[key] !== currentHashes[key]) {
1009
+ changes.modified.push({
1010
+ key,
1011
+ oldHash: baselineHashes[key],
1012
+ newHash: currentHashes[key]
1013
+ });
1014
+ }
1015
+ }
1016
+
1017
+ return changes;
1018
+ }
1019
+
1020
+ async detectStructuralChanges(baselineStructure, currentStructure) {
1021
+ const changes = [];
1022
+
1023
+ // Compare element counts by type
1024
+ const baselineCounts = this.countElementTypes(baselineStructure);
1025
+ const currentCounts = this.countElementTypes(currentStructure);
1026
+
1027
+ for (const [element, baselineCount] of baselineCounts) {
1028
+ const currentCount = currentCounts.get(element) || 0;
1029
+ if (currentCount !== baselineCount) {
1030
+ changes.push({
1031
+ type: 'element_count_change',
1032
+ element,
1033
+ oldCount: baselineCount,
1034
+ newCount: currentCount,
1035
+ difference: currentCount - baselineCount
1036
+ });
1037
+ }
1038
+ }
1039
+
1040
+ // Check for new element types
1041
+ for (const [element, currentCount] of currentCounts) {
1042
+ if (!baselineCounts.has(element)) {
1043
+ changes.push({
1044
+ type: 'new_element_type',
1045
+ element,
1046
+ count: currentCount
1047
+ });
1048
+ }
1049
+ }
1050
+
1051
+ return changes;
1052
+ }
1053
+
1054
+ async detectTextChanges(baselineContent, currentContent, options = {}) {
1055
+ const textChanges = [];
1056
+
1057
+ if (options.ignoreWhitespace) {
1058
+ baselineContent = baselineContent.replace(/\s+/g, ' ').trim();
1059
+ currentContent = currentContent.replace(/\s+/g, ' ').trim();
1060
+ }
1061
+
1062
+ if (options.ignoreCase) {
1063
+ baselineContent = baselineContent.toLowerCase();
1064
+ currentContent = currentContent.toLowerCase();
1065
+ }
1066
+
1067
+ // Word-level diff
1068
+ const wordDiff = diffWords(baselineContent, currentContent);
1069
+ textChanges.push({
1070
+ type: 'word_diff',
1071
+ changes: wordDiff.filter(part => part.added || part.removed)
1072
+ });
1073
+
1074
+ // Line-level diff for structured content
1075
+ const lineDiff = diffLines(baselineContent, currentContent);
1076
+ if (lineDiff.some(part => part.added || part.removed)) {
1077
+ textChanges.push({
1078
+ type: 'line_diff',
1079
+ changes: lineDiff.filter(part => part.added || part.removed)
1080
+ });
1081
+ }
1082
+
1083
+ return textChanges;
1084
+ }
1085
+
1086
+ detectLinkChanges(baselineLinks, currentLinks) {
1087
+ const changes = {
1088
+ added: [],
1089
+ removed: [],
1090
+ modified: []
1091
+ };
1092
+
1093
+ const baselineMap = new Map(baselineLinks.map(link => [link.href, link]));
1094
+ const currentMap = new Map(currentLinks.map(link => [link.href, link]));
1095
+
1096
+ // Find added links
1097
+ for (const [href, link] of currentMap) {
1098
+ if (!baselineMap.has(href)) {
1099
+ changes.added.push(link);
1100
+ }
1101
+ }
1102
+
1103
+ // Find removed links
1104
+ for (const [href, link] of baselineMap) {
1105
+ if (!currentMap.has(href)) {
1106
+ changes.removed.push(link);
1107
+ }
1108
+ }
1109
+
1110
+ // Find modified links (text changes)
1111
+ for (const [href, baselineLink] of baselineMap) {
1112
+ const currentLink = currentMap.get(href);
1113
+ if (currentLink && currentLink.text !== baselineLink.text) {
1114
+ changes.modified.push({
1115
+ href,
1116
+ oldText: baselineLink.text,
1117
+ newText: currentLink.text
1118
+ });
1119
+ }
1120
+ }
1121
+
1122
+ return changes;
1123
+ }
1124
+
1125
+ detectImageChanges(baselineImages, currentImages) {
1126
+ const changes = {
1127
+ added: [],
1128
+ removed: [],
1129
+ modified: []
1130
+ };
1131
+
1132
+ const baselineMap = new Map(baselineImages.map(img => [img.src, img]));
1133
+ const currentMap = new Map(currentImages.map(img => [img.src, img]));
1134
+
1135
+ // Find added images
1136
+ for (const [src, img] of currentMap) {
1137
+ if (!baselineMap.has(src)) {
1138
+ changes.added.push(img);
1139
+ }
1140
+ }
1141
+
1142
+ // Find removed images
1143
+ for (const [src, img] of baselineMap) {
1144
+ if (!currentMap.has(src)) {
1145
+ changes.removed.push(img);
1146
+ }
1147
+ }
1148
+
1149
+ // Find modified images (alt text changes)
1150
+ for (const [src, baselineImg] of baselineMap) {
1151
+ const currentImg = currentMap.get(src);
1152
+ if (currentImg && (currentImg.alt !== baselineImg.alt || currentImg.title !== baselineImg.title)) {
1153
+ changes.modified.push({
1154
+ src,
1155
+ oldAlt: baselineImg.alt,
1156
+ newAlt: currentImg.alt,
1157
+ oldTitle: baselineImg.title,
1158
+ newTitle: currentImg.title
1159
+ });
1160
+ }
1161
+ }
1162
+
1163
+ return changes;
1164
+ }
1165
+
1166
+ // Utility Methods
1167
+
1168
+ hashContent(content) {
1169
+ return createHash(this.options.hashAlgorithm)
1170
+ .update(content || '')
1171
+ .digest('hex');
1172
+ }
1173
+
1174
+ calculateSimilarity(hash1, hash2) {
1175
+ if (hash1 === hash2) return 1;
1176
+
1177
+ // Simple similarity based on hash difference
1178
+ // In production, you might want to use more sophisticated algorithms
1179
+ const diff = this.hammingDistance(hash1, hash2);
1180
+ const maxLength = Math.max(hash1.length, hash2.length);
1181
+ return 1 - (diff / maxLength);
1182
+ }
1183
+
1184
+ calculateStructuralSimilarity(baseline, current) {
1185
+ if (!baseline || !current) return 0;
1186
+
1187
+ const baselineElements = baseline.elements || [];
1188
+ const currentElements = current.elements || [];
1189
+
1190
+ if (baselineElements.length === 0 && currentElements.length === 0) return 1;
1191
+ if (baselineElements.length === 0 || currentElements.length === 0) return 0;
1192
+
1193
+ const tagSimilarity = this.calculateTagSimilarity(baselineElements, currentElements);
1194
+ const hierarchySimilarity = this.calculateHierarchySimilarity(baseline.hierarchy, current.hierarchy);
1195
+
1196
+ return (tagSimilarity + hierarchySimilarity) / 2;
1197
+ }
1198
+
1199
+ calculateTagSimilarity(baselineElements, currentElements) {
1200
+ const baselineTags = baselineElements.map(el => el.tag);
1201
+ const currentTags = currentElements.map(el => el.tag);
1202
+
1203
+ const intersection = baselineTags.filter(tag => currentTags.includes(tag));
1204
+ const union = new Set([...baselineTags, ...currentTags]);
1205
+
1206
+ return intersection.length / union.size;
1207
+ }
1208
+
1209
+ calculateHierarchySimilarity(baseline, current) {
1210
+ // Simple structural comparison - can be enhanced
1211
+ if (!baseline || !current) return 0;
1212
+ return Object.keys(baseline).length === Object.keys(current).length ? 1 : 0.5;
1213
+ }
1214
+
1215
+ hammingDistance(str1, str2) {
1216
+ if (str1.length !== str2.length) {
1217
+ return Math.abs(str1.length - str2.length);
1218
+ }
1219
+
1220
+ let distance = 0;
1221
+ for (let i = 0; i < str1.length; i++) {
1222
+ if (str1[i] !== str2[i]) {
1223
+ distance++;
1224
+ }
1225
+ }
1226
+ return distance;
1227
+ }
1228
+
1229
+ countElementTypes(structure) {
1230
+ const counts = new Map();
1231
+
1232
+ if (structure.elements) {
1233
+ structure.elements.forEach(element => {
1234
+ counts.set(element.tag, (counts.get(element.tag) || 0) + 1);
1235
+ });
1236
+ }
1237
+
1238
+ return counts;
1239
+ }
1240
+
1241
+ isExternalLink(href) {
1242
+ if (!href) return false;
1243
+ return href.startsWith('http://') || href.startsWith('https://');
1244
+ }
1245
+
1246
+ classifyChangeType(changeAnalysis) {
1247
+ const { addedElements, removedElements, modifiedElements, structuralChanges } = changeAnalysis;
1248
+
1249
+ if (structuralChanges.length > 0) {
1250
+ return 'structural';
1251
+ }
1252
+
1253
+ if (addedElements.length > removedElements.length) {
1254
+ return 'content_addition';
1255
+ }
1256
+
1257
+ if (removedElements.length > addedElements.length) {
1258
+ return 'content_removal';
1259
+ }
1260
+
1261
+ if (modifiedElements.length > 0) {
1262
+ return 'content_modification';
1263
+ }
1264
+
1265
+ return 'text_change';
1266
+ }
1267
+
1268
+ generateChangeSummary(changeAnalysis) {
1269
+ const { addedElements, removedElements, modifiedElements, similarity } = changeAnalysis;
1270
+
1271
+ const total = addedElements.length + removedElements.length + modifiedElements.length;
1272
+
1273
+ return {
1274
+ totalChanges: total,
1275
+ contentSimilarity: Math.round(similarity * 100),
1276
+ added: addedElements.length,
1277
+ removed: removedElements.length,
1278
+ modified: modifiedElements.length,
1279
+ changeDescription: this.generateChangeDescription(changeAnalysis)
1280
+ };
1281
+ }
1282
+
1283
+ generateChangeDescription(changeAnalysis) {
1284
+ const { addedElements, removedElements, modifiedElements, textChanges } = changeAnalysis;
1285
+
1286
+ const descriptions = [];
1287
+
1288
+ if (addedElements.length > 0) {
1289
+ descriptions.push(`${addedElements.length} elements added`);
1290
+ }
1291
+
1292
+ if (removedElements.length > 0) {
1293
+ descriptions.push(`${removedElements.length} elements removed`);
1294
+ }
1295
+
1296
+ if (modifiedElements.length > 0) {
1297
+ descriptions.push(`${modifiedElements.length} elements modified`);
1298
+ }
1299
+
1300
+ if (textChanges.length > 0) {
1301
+ descriptions.push('Text content changed');
1302
+ }
1303
+
1304
+ return descriptions.join(', ') || 'No significant changes detected';
1305
+ }
1306
+
1307
+ generateChangeRecommendations(changeRecord) {
1308
+ const recommendations = [];
1309
+ const { significance, details, changeType } = changeRecord;
1310
+
1311
+ if (significance === 'critical') {
1312
+ recommendations.push({
1313
+ type: 'alert',
1314
+ priority: 'high',
1315
+ message: 'Critical changes detected. Manual review recommended.'
1316
+ });
1317
+ }
1318
+
1319
+ if (changeType === 'structural') {
1320
+ recommendations.push({
1321
+ type: 'monitoring',
1322
+ priority: 'medium',
1323
+ message: 'Structural changes may affect scraping selectors.'
1324
+ });
1325
+ }
1326
+
1327
+ if (details.similarity < 0.5) {
1328
+ recommendations.push({
1329
+ type: 'analysis',
1330
+ priority: 'medium',
1331
+ message: 'Low content similarity suggests major content changes.'
1332
+ });
1333
+ }
1334
+
1335
+ return recommendations;
1336
+ }
1337
+
1338
+ updateStats(changeRecord) {
1339
+ this.stats.changesDetected++;
1340
+
1341
+ if (changeRecord.significance !== 'none') {
1342
+ this.stats.significantChanges++;
1343
+ }
1344
+
1345
+ if (changeRecord.changeType === 'structural') {
1346
+ this.stats.structuralChanges++;
1347
+ } else {
1348
+ this.stats.contentChanges++;
1349
+ }
1350
+
1351
+ // Update average change score
1352
+ this.stats.averageChangeScore =
1353
+ (this.stats.averageChangeScore * (this.stats.changesDetected - 1) +
1354
+ changeRecord.details.similarity) / this.stats.changesDetected;
1355
+
1356
+ this.stats.lastAnalysis = changeRecord.timestamp;
1357
+ this.stats.processingTime += changeRecord.processingTime;
1358
+ }
1359
+
1360
+ // Public API Methods
1361
+
1362
+ getStats() {
1363
+ return {
1364
+ ...this.stats,
1365
+ monitoredUrls: this.snapshots.size,
1366
+ totalSnapshots: Array.from(this.snapshots.values()).reduce((sum, snapshots) => sum + snapshots.length, 0),
1367
+ averageProcessingTime: this.stats.changesDetected > 0 ?
1368
+ this.stats.processingTime / this.stats.changesDetected : 0
1369
+ };
1370
+ }
1371
+
1372
+ getChangeHistory(url, limit = 50) {
1373
+ const history = this.changeHistory.get(url) || [];
1374
+ return history.slice(-limit).reverse();
1375
+ }
1376
+
1377
+ clearHistory(url) {
1378
+ if (url) {
1379
+ this.changeHistory.set(url, []);
1380
+ this.emit('historyCleared', url);
1381
+ } else {
1382
+ this.changeHistory.clear();
1383
+ this.emit('allHistoryCleared');
1384
+ }
1385
+ }
1386
+
1387
+ resetStats() {
1388
+ this.stats = {
1389
+ pagesTracked: 0,
1390
+ changesDetected: 0,
1391
+ significantChanges: 0,
1392
+ structuralChanges: 0,
1393
+ contentChanges: 0,
1394
+ falsePositives: 0,
1395
+ averageChangeScore: 0,
1396
+ lastAnalysis: null,
1397
+ processingTime: 0
1398
+ };
1399
+ }
1400
+
1401
+
1402
+ /**
1403
+ * Generate content hash
1404
+ */
1405
+ generateContentHash(content) {
1406
+
1407
+ return crypto.createHash("sha256").update(content).digest("hex");
1408
+ }
1409
+
1410
+ /**
1411
+ * Create snapshot of content
1412
+ */
1413
+ async createSnapshot(url, content) {
1414
+ const timestamp = Date.now();
1415
+ const hash = this.generateContentHash(content);
1416
+
1417
+ const snapshot = {
1418
+ url,
1419
+ content,
1420
+ contentHash: hash,
1421
+ timestamp,
1422
+ version: 1
1423
+ };
1424
+
1425
+ // Store snapshot in cache
1426
+ if (!this.contentHistory.has(url)) {
1427
+ this.contentHistory.set(url, []);
1428
+ }
1429
+
1430
+ this.contentHistory.get(url).unshift(snapshot);
1431
+
1432
+ // Also store in snapshots Map for compatibility
1433
+ if (!this.snapshots.has(url)) {
1434
+ this.snapshots.set(url, []);
1435
+ }
1436
+ this.snapshots.get(url).unshift(snapshot);
1437
+
1438
+ // Keep only last 100 snapshots
1439
+ const history = this.contentHistory.get(url);
1440
+ if (history.length > 100) {
1441
+ history.splice(100);
1442
+ }
1443
+
1444
+ return snapshot;
1445
+ }
1446
+
1447
+
1448
+ /**
1449
+ * Get snapshot history for a URL
1450
+ */
1451
+ getSnapshotHistory(url) {
1452
+ return this.contentHistory.get(url) || [];
1453
+ }
1454
+
1455
+ /**
1456
+ * Detect changes against the latest snapshot
1457
+ */
1458
+ async detectChanges(url, currentContent) {
1459
+ // Validate URL format
1460
+ try {
1461
+ new URL(url);
1462
+ } catch (error) {
1463
+ throw new Error(`Invalid URL format: ${url}`);
1464
+ }
1465
+ if (!this.contentHistory.has(url)) {
1466
+ return {
1467
+ hasChanges: false,
1468
+ score: 0,
1469
+ significance: "none"
1470
+ };
1471
+ }
1472
+
1473
+ const history = this.contentHistory.get(url);
1474
+ if (history.length === 0) {
1475
+ return {
1476
+ hasChanges: false,
1477
+ score: 0,
1478
+ significance: "none"
1479
+ };
1480
+ }
1481
+
1482
+ const lastSnapshot = history[0]; // Latest snapshot
1483
+ const currentHash = this.generateContentHash(currentContent);
1484
+
1485
+ if (lastSnapshot.contentHash === currentHash) {
1486
+ return {
1487
+ hasChanges: false,
1488
+ score: 0,
1489
+ significance: "none"
1490
+ };
1491
+ }
1492
+
1493
+ // Calculate change score based on content difference
1494
+ const similarity = this.calculateSimilarity(lastSnapshot.contentHash, currentHash);
1495
+ const score = 1 - similarity;
1496
+
1497
+ // Determine significance
1498
+ let significance = "none";
1499
+ if (score > 0.7) significance = "major";
1500
+ else if (score > 0.3) significance = "moderate";
1501
+ else if (score > 0.1) significance = "minor";
1502
+
1503
+ return {
1504
+ hasChanges: score > 0,
1505
+ score,
1506
+ significance
1507
+ };
1508
+ }
1509
+
1510
+ /**
1511
+ * Calculate significance score for changes
1512
+ */
1513
+ calculateSignificanceScore(changes) {
1514
+ if (!changes) return 0;
1515
+
1516
+ let score = 0;
1517
+ const weights = {
1518
+ textChanges: 0.4,
1519
+ structuralChanges: 0.6
1520
+ };
1521
+
1522
+ // Handle object format with textChanges and structuralChanges
1523
+ if (typeof changes === "object" && !Array.isArray(changes)) {
1524
+ if (changes.textChanges) {
1525
+ const text = changes.textChanges;
1526
+ const textScore = ((text.additions || 0) + (text.deletions || 0) + (text.modifications || 0)) / (changes.totalLength || 1000);
1527
+ score += textScore * weights.textChanges;
1528
+ }
1529
+
1530
+ if (changes.structuralChanges) {
1531
+ const struct = changes.structuralChanges;
1532
+ const structScore = ((struct.additions || 0) + (struct.deletions || 0)) / 20; // Normalize
1533
+ score += structScore * weights.structuralChanges;
1534
+ }
1535
+
1536
+ return Math.min(score, 1.0); // Cap at 1.0
1537
+ }
1538
+
1539
+ // Handle legacy array format
1540
+ if (Array.isArray(changes)) {
1541
+ const legacyWeights = {
1542
+ added: 0.3,
1543
+ removed: 0.4,
1544
+ modified: 0.2
1545
+ };
1546
+
1547
+ changes.forEach(change => {
1548
+ score += (legacyWeights[change.type] || 0.1) * (change.count || 1);
1549
+ });
1550
+ }
1551
+
1552
+ return Math.min(score, 1.0); // Cap at 1.0
1553
+ }
1554
+
1555
+ /**
1556
+ * Start monitoring URL for changes
1557
+ */
1558
+ async startMonitoring(url, options = {}) {
1559
+ const monitorId = `monitor_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
1560
+
1561
+ const monitor = {
1562
+ id: monitorId,
1563
+ url,
1564
+ interval: options.interval || 300000, // 5 minutes default
1565
+ enabled: true,
1566
+ lastCheck: null,
1567
+ checkCount: 0,
1568
+ changeCount: 0
1569
+ };
1570
+
1571
+ this.activeMonitors.set(url, monitor); // Store by URL for easy access
1572
+
1573
+ return monitor;
1574
+ }
1575
+
1576
+ /**
1577
+ * Get statistics
1578
+ */
1579
+ getStatistics() {
1580
+ return {
1581
+ totalBaselines: this.baselineContent.size,
1582
+ totalMonitors: this.activeMonitors.size,
1583
+ totalComparisons: this.stats.comparisons,
1584
+ totalChanges: this.stats.changesDetected,
1585
+ averageChangeSignificance: this.stats.averageSignificance,
1586
+ lastActivity: this.stats.lastActivity
1587
+ };
1588
+ }
1589
+
1590
+ /**
1591
+ * Cleanup resources
1592
+ */
1593
+ async performDifferentialAnalysis(url, currentContent, options = {}) {
1594
+ if (!url || !currentContent) {
1595
+ throw new Error("URL and current content required for differential analysis");
1596
+ }
1597
+
1598
+ if (!this.contentHistory.has(url)) {
1599
+ throw new Error(`No baseline found for URL: ${url}`);
1600
+ }
1601
+
1602
+ try {
1603
+ const history = this.contentHistory.get(url);
1604
+ const baseline = history[0]; // Get latest snapshot
1605
+
1606
+ const analysis = {
1607
+ wordDiff: [],
1608
+ statistics: {
1609
+ contentSimilarity: 0,
1610
+ changeScore: 0
1611
+ },
1612
+ similarity: 0,
1613
+ structuralChanges: [],
1614
+ contentChanges: [],
1615
+ semanticChanges: [],
1616
+ changeScore: 0,
1617
+ changeSignificance: "none",
1618
+ metadata: {
1619
+ comparisonTime: new Date().toISOString(),
1620
+ baselineVersion: baseline.version || "unknown",
1621
+ currentVersion: "current"
1622
+ }
1623
+ };
1624
+
1625
+ // Calculate similarity
1626
+ const currentHash = this.generateContentHash(currentContent);
1627
+ analysis.similarity = this.calculateSimilarity(baseline.contentHash, currentHash);
1628
+ analysis.statistics.contentSimilarity = analysis.similarity;
1629
+ analysis.statistics.changeScore = 1 - analysis.similarity;
1630
+
1631
+ // Simple word diff
1632
+ const baselineWords = baseline.content.split(/\s+/);
1633
+ const currentWords = currentContent.split(/\s+/);
1634
+
1635
+ // Basic diff calculation
1636
+ const added = currentWords.filter(word => !baselineWords.includes(word));
1637
+ const removed = baselineWords.filter(word => !currentWords.includes(word));
1638
+
1639
+ analysis.wordDiff = [
1640
+ ...added.map(word => ({ value: word, added: true })),
1641
+ ...removed.map(word => ({ value: word, removed: true }))
1642
+ ];
1643
+
1644
+ return analysis;
1645
+ } catch (error) {
1646
+ throw new Error(`Differential analysis failed: ${error.message}`);
1647
+ }
1648
+ }
1649
+
1650
+ /**
1651
+ * Stop monitoring a URL
1652
+ */
1653
+ stopMonitoring(url) {
1654
+ if (this.activeMonitors.has(url)) {
1655
+ this.activeMonitors.delete(url);
1656
+ return true;
1657
+ }
1658
+ return false;
1659
+ }
1660
+ /**
1661
+ * Get statistics with proper format
1662
+ */
1663
+ getStatistics() {
1664
+ return {
1665
+ totalBaselines: this.contentHistory.size,
1666
+ totalMonitors: this.activeMonitors.size,
1667
+ totalComparisons: this.stats.changesDetected || 0,
1668
+ totalChanges: this.stats.changesDetected || 0,
1669
+ averageChangeSignificance: this.stats.averageChangeScore || 0,
1670
+ lastActivity: this.stats.lastAnalysis,
1671
+ pagesTracked: this.contentHistory.size,
1672
+ changesDetected: this.stats.changesDetected || 0
1673
+ };
1674
+ }
1675
+
1676
+ async initializeSemanticAnalyzer() {
1677
+ // Placeholder for semantic analysis initialization
1678
+ }
1679
+
1680
+ // Enhanced Feature Helper Methods
1681
+
1682
+ /**
1683
+ * Get latest baseline for a URL
1684
+ * @param {string} url - URL
1685
+ * @returns {Object} - Latest baseline
1686
+ */
1687
+ getLatestBaseline(url) {
1688
+ const snapshots = this.snapshots.get(url);
1689
+ return snapshots && snapshots.length > 0 ? snapshots[snapshots.length - 1] : null;
1690
+ }
1691
+
1692
+ /**
1693
+ * Calculate text similarity using advanced algorithms
1694
+ * @param {string} text1 - First text
1695
+ * @param {string} text2 - Second text
1696
+ * @returns {number} - Similarity score (0-1)
1697
+ */
1698
+ calculateTextSimilarity(text1, text2) {
1699
+ if (!text1 || !text2) return 0;
1700
+
1701
+ // Simple Jaccard similarity for keywords
1702
+ const words1 = new Set(text1.toLowerCase().split(/\W+/).filter(w => w.length > 3));
1703
+ const words2 = new Set(text2.toLowerCase().split(/\W+/).filter(w => w.length > 3));
1704
+
1705
+ const intersection = new Set([...words1].filter(x => words2.has(x)));
1706
+ const union = new Set([...words1, ...words2]);
1707
+
1708
+ return union.size > 0 ? intersection.size / union.size : 0;
1709
+ }
1710
+
1711
+ /**
1712
+ * Calculate semantic similarity
1713
+ * @param {string} text1 - First text
1714
+ * @param {string} text2 - Second text
1715
+ * @returns {number} - Semantic similarity score
1716
+ */
1717
+ calculateSemanticSimilarity(text1, text2) {
1718
+ // Placeholder for advanced semantic analysis
1719
+ // Could integrate with NLP services or local models
1720
+ return this.calculateTextSimilarity(text1, text2);
1721
+ }
1722
+
1723
+ /**
1724
+ * Detect keyword changes between texts
1725
+ * @param {string} baselineText - Baseline text
1726
+ * @param {string} currentText - Current text
1727
+ * @returns {Array} - Keyword changes
1728
+ */
1729
+ detectKeywordChanges(baselineText, currentText) {
1730
+ const changes = [];
1731
+
1732
+ try {
1733
+ const baselineWords = baselineText.toLowerCase().split(/\W+/).filter(w => w.length > 3);
1734
+ const currentWords = currentText.toLowerCase().split(/\W+/).filter(w => w.length > 3);
1735
+
1736
+ const baselineFreq = this.calculateWordFrequency(baselineWords);
1737
+ const currentFreq = this.calculateWordFrequency(currentWords);
1738
+
1739
+ // Find significant frequency changes
1740
+ const allWords = new Set([...Object.keys(baselineFreq), ...Object.keys(currentFreq)]);
1741
+
1742
+ for (const word of allWords) {
1743
+ const baseFreq = baselineFreq[word] || 0;
1744
+ const currFreq = currentFreq[word] || 0;
1745
+ const change = Math.abs(currFreq - baseFreq);
1746
+
1747
+ if (change > 2) { // Significant frequency change
1748
+ changes.push({
1749
+ word,
1750
+ baselineFrequency: baseFreq,
1751
+ currentFrequency: currFreq,
1752
+ change: currFreq - baseFreq,
1753
+ type: currFreq > baseFreq ? 'increased' : 'decreased'
1754
+ });
1755
+ }
1756
+ }
1757
+ } catch (error) {
1758
+ this.emit('error', { operation: 'detectKeywordChanges', error: error.message });
1759
+ }
1760
+
1761
+ return changes.slice(0, 20); // Top 20 changes
1762
+ }
1763
+
1764
+ /**
1765
+ * Detect topic shifts between texts
1766
+ * @param {string} baselineText - Baseline text
1767
+ * @param {string} currentText - Current text
1768
+ * @returns {Array} - Topic shifts
1769
+ */
1770
+ detectTopicShifts(baselineText, currentText) {
1771
+ const shifts = [];
1772
+
1773
+ try {
1774
+ // Simple topic detection based on key phrases
1775
+ const topicKeywords = {
1776
+ technology: ['software', 'computer', 'digital', 'tech', 'system', 'data'],
1777
+ business: ['company', 'market', 'business', 'sales', 'revenue', 'profit'],
1778
+ health: ['health', 'medical', 'doctor', 'treatment', 'disease', 'patient'],
1779
+ politics: ['government', 'policy', 'political', 'election', 'vote', 'congress'],
1780
+ sports: ['game', 'team', 'player', 'score', 'match', 'championship']
1781
+ };
1782
+
1783
+ const baselineTopics = this.detectTopics(baselineText, topicKeywords);
1784
+ const currentTopics = this.detectTopics(currentText, topicKeywords);
1785
+
1786
+ // Compare topic presence
1787
+ for (const topic of Object.keys(topicKeywords)) {
1788
+ const baselineScore = baselineTopics[topic] || 0;
1789
+ const currentScore = currentTopics[topic] || 0;
1790
+ const change = currentScore - baselineScore;
1791
+
1792
+ if (Math.abs(change) > 0.1) {
1793
+ shifts.push({
1794
+ topic,
1795
+ baselineScore,
1796
+ currentScore,
1797
+ change,
1798
+ type: change > 0 ? 'emerged' : 'diminished'
1799
+ });
1800
+ }
1801
+ }
1802
+ } catch (error) {
1803
+ this.emit('error', { operation: 'detectTopicShifts', error: error.message });
1804
+ }
1805
+
1806
+ return shifts;
1807
+ }
1808
+
1809
+ /**
1810
+ * Calculate semantic confidence score
1811
+ * @param {Object} analysis - Semantic analysis
1812
+ * @returns {number} - Confidence score
1813
+ */
1814
+ calculateSemanticConfidence(analysis) {
1815
+ let confidence = 0;
1816
+
1817
+ // Base confidence on available data
1818
+ if (analysis.textualSimilarity > 0) confidence += 0.3;
1819
+ if (analysis.keywordChanges.length > 0) confidence += 0.3;
1820
+ if (analysis.topicShifts.length > 0) confidence += 0.2;
1821
+
1822
+ // Adjust based on data quality
1823
+ const dataQuality = Math.min(
1824
+ analysis.keywordChanges.length / 10, // Max 10 keyword changes for full score
1825
+ 1
1826
+ );
1827
+
1828
+ return Math.min(confidence * dataQuality, 1);
1829
+ }
1830
+
1831
+ /**
1832
+ * Detect layout changes between DOM structures
1833
+ * @param {Object} baseline - Baseline DOM
1834
+ * @param {Object} current - Current DOM
1835
+ * @returns {Array} - Layout changes
1836
+ */
1837
+ detectLayoutChanges(baseline, current) {
1838
+ const changes = [];
1839
+
1840
+ try {
1841
+ // Compare element counts by type
1842
+ const baselineElements = this.countElements(baseline);
1843
+ const currentElements = this.countElements(current);
1844
+
1845
+ for (const [tag, baseCount] of Object.entries(baselineElements)) {
1846
+ const currCount = currentElements[tag] || 0;
1847
+ if (Math.abs(currCount - baseCount) > 0) {
1848
+ changes.push({
1849
+ type: 'element_count_change',
1850
+ tag,
1851
+ baseline: baseCount,
1852
+ current: currCount,
1853
+ change: currCount - baseCount
1854
+ });
1855
+ }
1856
+ }
1857
+
1858
+ // Check for new element types
1859
+ for (const [tag, currCount] of Object.entries(currentElements)) {
1860
+ if (!baselineElements[tag]) {
1861
+ changes.push({
1862
+ type: 'new_element_type',
1863
+ tag,
1864
+ count: currCount
1865
+ });
1866
+ }
1867
+ }
1868
+ } catch (error) {
1869
+ this.emit('error', { operation: 'detectLayoutChanges', error: error.message });
1870
+ }
1871
+
1872
+ return changes;
1873
+ }
1874
+
1875
+ /**
1876
+ * Detect CSS changes
1877
+ * @param {Object} baseline - Baseline DOM
1878
+ * @param {Object} current - Current DOM
1879
+ * @returns {Array} - CSS changes
1880
+ */
1881
+ detectCSSChanges(baseline, current) {
1882
+ const changes = [];
1883
+
1884
+ try {
1885
+ // Extract style information
1886
+ const baselineStyles = this.extractStyles(baseline);
1887
+ const currentStyles = this.extractStyles(current);
1888
+
1889
+ // Compare inline styles
1890
+ const styleDiff = this.compareStyles(baselineStyles, currentStyles);
1891
+ changes.push(...styleDiff);
1892
+
1893
+ } catch (error) {
1894
+ this.emit('error', { operation: 'detectCSSChanges', error: error.message });
1895
+ }
1896
+
1897
+ return changes;
1898
+ }
1899
+
1900
+ /**
1901
+ * Extract structured data from DOM
1902
+ * @param {Object} $ - Cheerio DOM
1903
+ * @returns {Object} - Structured data
1904
+ */
1905
+ extractStructuredData($) {
1906
+ const structuredData = {
1907
+ jsonLd: [],
1908
+ microdata: [],
1909
+ rdfa: [],
1910
+ openGraph: {},
1911
+ twitterCard: {},
1912
+ schema: []
1913
+ };
1914
+
1915
+ try {
1916
+ // Extract JSON-LD
1917
+ $('script[type="application/ld+json"]').each((index, element) => {
1918
+ try {
1919
+ const data = JSON.parse($(element).html());
1920
+ structuredData.jsonLd.push(data);
1921
+ } catch (e) {
1922
+ // Invalid JSON, skip
1923
+ }
1924
+ });
1925
+
1926
+ // Extract Open Graph
1927
+ $('meta[property^="og:"]').each((index, element) => {
1928
+ const property = $(element).attr('property');
1929
+ const content = $(element).attr('content');
1930
+ if (property && content) {
1931
+ structuredData.openGraph[property] = content;
1932
+ }
1933
+ });
1934
+
1935
+ // Extract Twitter Card
1936
+ $('meta[name^="twitter:"]').each((index, element) => {
1937
+ const name = $(element).attr('name');
1938
+ const content = $(element).attr('content');
1939
+ if (name && content) {
1940
+ structuredData.twitterCard[name] = content;
1941
+ }
1942
+ });
1943
+
1944
+ } catch (error) {
1945
+ this.emit('error', { operation: 'extractStructuredData', error: error.message });
1946
+ }
1947
+
1948
+ return structuredData;
1949
+ }
1950
+
1951
+ /**
1952
+ * Compare structured data
1953
+ * @param {Object} baseline - Baseline structured data
1954
+ * @param {Object} current - Current structured data
1955
+ * @returns {Array} - Schema changes
1956
+ */
1957
+ compareStructuredData(baseline, current) {
1958
+ const changes = [];
1959
+
1960
+ try {
1961
+ // Compare JSON-LD
1962
+ const jsonLdChanges = this.compareArrayData(baseline.jsonLd, current.jsonLd, 'json-ld');
1963
+ changes.push(...jsonLdChanges);
1964
+
1965
+ // Compare Open Graph
1966
+ const ogChanges = this.compareObjectData(baseline.openGraph, current.openGraph, 'open-graph');
1967
+ changes.push(...ogChanges);
1968
+
1969
+ // Compare Twitter Card
1970
+ const twitterChanges = this.compareObjectData(baseline.twitterCard, current.twitterCard, 'twitter-card');
1971
+ changes.push(...twitterChanges);
1972
+
1973
+ } catch (error) {
1974
+ this.emit('error', { operation: 'compareStructuredData', error: error.message });
1975
+ }
1976
+
1977
+ return changes;
1978
+ }
1979
+
1980
+ /**
1981
+ * Compare metadata objects
1982
+ * @param {Object} baseline - Baseline metadata
1983
+ * @param {Object} current - Current metadata
1984
+ * @returns {Array} - Metadata changes
1985
+ */
1986
+ compareMetadata(baseline, current) {
1987
+ const changes = [];
1988
+
1989
+ try {
1990
+ const baselineKeys = Object.keys(baseline || {});
1991
+ const currentKeys = Object.keys(current || {});
1992
+ const allKeys = new Set([...baselineKeys, ...currentKeys]);
1993
+
1994
+ for (const key of allKeys) {
1995
+ const baseValue = baseline?.[key];
1996
+ const currValue = current?.[key];
1997
+
1998
+ if (JSON.stringify(baseValue) !== JSON.stringify(currValue)) {
1999
+ changes.push({
2000
+ type: 'metadata_change',
2001
+ field: key,
2002
+ baseline: baseValue,
2003
+ current: currValue,
2004
+ changeType: !baseValue ? 'added' : !currValue ? 'removed' : 'modified'
2005
+ });
2006
+ }
2007
+ }
2008
+ } catch (error) {
2009
+ this.emit('error', { operation: 'compareMetadata', error: error.message });
2010
+ }
2011
+
2012
+ return changes;
2013
+ }
2014
+
2015
+ /**
2016
+ * Calculate enhanced significance score
2017
+ * @param {Object} standardComparison - Standard comparison
2018
+ * @param {Object} semanticAnalysis - Semantic analysis
2019
+ * @param {Object} visualAnalysis - Visual analysis
2020
+ * @param {Object} structuredAnalysis - Structured analysis
2021
+ * @returns {string} - Enhanced significance level
2022
+ */
2023
+ async calculateEnhancedSignificance(standardComparison, semanticAnalysis, visualAnalysis, structuredAnalysis) {
2024
+ try {
2025
+ let enhancedScore = 0;
2026
+ const weights = {
2027
+ standard: 0.4,
2028
+ semantic: 0.2,
2029
+ visual: 0.2,
2030
+ structured: 0.2
2031
+ };
2032
+
2033
+ // Standard comparison score
2034
+ const standardScore = this.getSignificanceScore(standardComparison.significance);
2035
+ enhancedScore += standardScore * weights.standard;
2036
+
2037
+ // Semantic analysis score
2038
+ const semanticScore = semanticAnalysis.confidenceScore *
2039
+ (1 - semanticAnalysis.textualSimilarity);
2040
+ enhancedScore += semanticScore * weights.semantic;
2041
+
2042
+ // Visual analysis score
2043
+ const visualScore = visualAnalysis.hasVisualChanges ? 0.7 : 0;
2044
+ enhancedScore += visualScore * weights.visual;
2045
+
2046
+ // Structured data score
2047
+ const structuredScore = structuredAnalysis.hasStructuredChanges ? 0.8 : 0;
2048
+ enhancedScore += structuredScore * weights.structured;
2049
+
2050
+ // Convert to significance level
2051
+ return this.scoreToSignificance(enhancedScore);
2052
+
2053
+ } catch (error) {
2054
+ this.emit('error', { operation: 'calculateEnhancedSignificance', error: error.message });
2055
+ return standardComparison.significance;
2056
+ }
2057
+ }
2058
+
2059
+ /**
2060
+ * Detect change patterns in historical data
2061
+ * @param {string} url - URL
2062
+ * @param {Object} patterns - Pattern data
2063
+ */
2064
+ async detectChangePatterns(url, patterns) {
2065
+ try {
2066
+ const frequency = patterns.changeFrequency.get(url);
2067
+ if (!frequency || frequency.length < 10) return;
2068
+
2069
+ // Detect recurring patterns
2070
+ const recurringPatterns = this.detectRecurringPatterns(frequency);
2071
+
2072
+ // Detect time-based patterns
2073
+ const timePatterns = this.detectTimePatterns(frequency);
2074
+
2075
+ // Update trend analysis
2076
+ if (recurringPatterns.length > 0 || timePatterns.length > 0) {
2077
+ this.stats.trendPatternsDetected++;
2078
+
2079
+ this.emit('patternsDetected', {
2080
+ url,
2081
+ recurringPatterns,
2082
+ timePatterns,
2083
+ timestamp: Date.now()
2084
+ });
2085
+ }
2086
+
2087
+ } catch (error) {
2088
+ this.emit('error', { operation: 'detectChangePatterns', url, error: error.message });
2089
+ }
2090
+ }
2091
+
2092
+ /**
2093
+ * Send webhook alert
2094
+ * @param {Object} alertData - Alert data
2095
+ */
2096
+ async sendWebhookAlert(alertData) {
2097
+ // Placeholder for webhook implementation
2098
+ this.emit('webhookAlert', alertData);
2099
+ }
2100
+
2101
+ /**
2102
+ * Send email alert
2103
+ * @param {Object} alertData - Alert data
2104
+ */
2105
+ async sendEmailAlert(alertData) {
2106
+ // Placeholder for email implementation
2107
+ this.emit('emailAlert', alertData);
2108
+ }
2109
+
2110
+ /**
2111
+ * Send Slack alert
2112
+ * @param {Object} alertData - Alert data
2113
+ */
2114
+ async sendSlackAlert(alertData) {
2115
+ // Placeholder for Slack implementation
2116
+ this.emit('slackAlert', alertData);
2117
+ }
2118
+
2119
+ // Utility helper methods
2120
+
2121
+ calculateWordFrequency(words) {
2122
+ const frequency = {};
2123
+ words.forEach(word => {
2124
+ frequency[word] = (frequency[word] || 0) + 1;
2125
+ });
2126
+ return frequency;
2127
+ }
2128
+
2129
+ detectTopics(text, topicKeywords) {
2130
+ const topics = {};
2131
+ const words = text.toLowerCase().split(/\W+/);
2132
+
2133
+ for (const [topic, keywords] of Object.entries(topicKeywords)) {
2134
+ let score = 0;
2135
+ keywords.forEach(keyword => {
2136
+ score += words.filter(word => word.includes(keyword)).length;
2137
+ });
2138
+ topics[topic] = score / words.length;
2139
+ }
2140
+
2141
+ return topics;
2142
+ }
2143
+
2144
+ countElements($) {
2145
+ const counts = {};
2146
+ $('*').each((index, element) => {
2147
+ const tag = element.name;
2148
+ counts[tag] = (counts[tag] || 0) + 1;
2149
+ });
2150
+ return counts;
2151
+ }
2152
+
2153
+ extractStyles($) {
2154
+ const styles = {};
2155
+ $('[style]').each((index, element) => {
2156
+ const style = $(element).attr('style');
2157
+ if (style) {
2158
+ styles[`element_${index}`] = style;
2159
+ }
2160
+ });
2161
+ return styles;
2162
+ }
2163
+
2164
+ compareStyles(baseline, current) {
2165
+ const changes = [];
2166
+ const allKeys = new Set([...Object.keys(baseline), ...Object.keys(current)]);
2167
+
2168
+ for (const key of allKeys) {
2169
+ if (baseline[key] !== current[key]) {
2170
+ changes.push({
2171
+ type: 'style_change',
2172
+ element: key,
2173
+ baseline: baseline[key],
2174
+ current: current[key]
2175
+ });
2176
+ }
2177
+ }
2178
+
2179
+ return changes;
2180
+ }
2181
+
2182
+ compareArrayData(baseline, current, type) {
2183
+ const changes = [];
2184
+
2185
+ if (baseline.length !== current.length) {
2186
+ changes.push({
2187
+ type: `${type}_count_change`,
2188
+ baseline: baseline.length,
2189
+ current: current.length
2190
+ });
2191
+ }
2192
+
2193
+ return changes;
2194
+ }
2195
+
2196
+ compareObjectData(baseline, current, type) {
2197
+ const changes = [];
2198
+ const allKeys = new Set([...Object.keys(baseline), ...Object.keys(current)]);
2199
+
2200
+ for (const key of allKeys) {
2201
+ if (baseline[key] !== current[key]) {
2202
+ changes.push({
2203
+ type: `${type}_change`,
2204
+ field: key,
2205
+ baseline: baseline[key],
2206
+ current: current[key]
2207
+ });
2208
+ }
2209
+ }
2210
+
2211
+ return changes;
2212
+ }
2213
+
2214
+ getSignificanceScore(significance) {
2215
+ const scores = {
2216
+ 'none': 0,
2217
+ 'minor': 0.2,
2218
+ 'moderate': 0.5,
2219
+ 'major': 0.8,
2220
+ 'critical': 1.0
2221
+ };
2222
+ return scores[significance] || 0;
2223
+ }
2224
+
2225
+ scoreToSignificance(score) {
2226
+ if (score >= 0.9) return 'critical';
2227
+ if (score >= 0.7) return 'major';
2228
+ if (score >= 0.4) return 'moderate';
2229
+ if (score >= 0.1) return 'minor';
2230
+ return 'none';
2231
+ }
2232
+
2233
+ analyzeUrlPatterns(url, patterns) {
2234
+ // Placeholder for URL-specific pattern analysis
2235
+ return {
2236
+ dailyAverage: 0,
2237
+ peakTimes: [],
2238
+ commonTypes: []
2239
+ };
2240
+ }
2241
+
2242
+ analyzeGlobalPatterns(patterns) {
2243
+ // Placeholder for global pattern analysis
2244
+ return {
2245
+ totalUrls: patterns.dailyChangePatterns.size,
2246
+ mostActiveUrls: [],
2247
+ commonPatterns: []
2248
+ };
2249
+ }
2250
+
2251
+ generateTrendInsights(patterns) {
2252
+ return [
2253
+ 'Pattern analysis requires more data',
2254
+ 'Monitoring is active and collecting data'
2255
+ ];
2256
+ }
2257
+
2258
+ generateTrendRecommendations(patterns, insights) {
2259
+ return [
2260
+ 'Continue monitoring to build pattern database',
2261
+ 'Consider adjusting monitoring frequency based on change patterns'
2262
+ ];
2263
+ }
2264
+
2265
+ detectRecurringPatterns(frequency) {
2266
+ // Placeholder for recurring pattern detection
2267
+ return [];
2268
+ }
2269
+
2270
+ detectTimePatterns(frequency) {
2271
+ // Placeholder for time-based pattern detection
2272
+ return [];
2273
+ }
2274
+
2275
+ convertToCSV(data) {
2276
+ // Placeholder for CSV conversion
2277
+ return JSON.stringify(data, null, 2);
2278
+ }
2279
+
2280
+ cleanup() {
2281
+ // Stop all scheduled monitors
2282
+ for (const [id, monitor] of this.scheduledMonitors.entries()) {
2283
+ if (monitor.cronJob) {
2284
+ monitor.cronJob.destroy();
2285
+ }
2286
+ }
2287
+
2288
+ // Clear all data
2289
+ this.contentHistory.clear();
2290
+ this.baselineContent.clear();
2291
+ this.activeMonitors.clear();
2292
+ this.changeNotifications.clear();
2293
+ this.snapshotManager.clear();
2294
+ this.scheduledMonitors.clear();
2295
+ this.monitoringTemplates.clear();
2296
+ this.alertRules.clear();
2297
+ this.alertHistory.clear();
2298
+ this.trendAnalysis.clear();
2299
+ this.visualRegression.clear();
2300
+ this.alertThrottling.clear();
2301
+ this.semanticDiffCache.clear();
2302
+ }
2303
+
2304
+ }
2305
+
2306
+ export default ChangeTracker;