@ai-jshook/mcp 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +38 -0
- package/CLAUDE.md +170 -0
- package/README.md +564 -0
- package/bun.lock +1484 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +57 -0
- package/dist/index.js.map +1 -0
- package/dist/modules/analyzer/AISummarizer.d.ts +41 -0
- package/dist/modules/analyzer/AISummarizer.d.ts.map +1 -0
- package/dist/modules/analyzer/AISummarizer.js +186 -0
- package/dist/modules/analyzer/AISummarizer.js.map +1 -0
- package/dist/modules/analyzer/CodeAnalyzer.d.ts +28 -0
- package/dist/modules/analyzer/CodeAnalyzer.d.ts.map +1 -0
- package/dist/modules/analyzer/CodeAnalyzer.js +1287 -0
- package/dist/modules/analyzer/CodeAnalyzer.js.map +1 -0
- package/dist/modules/analyzer/IntelligentAnalyzer.d.ts +114 -0
- package/dist/modules/analyzer/IntelligentAnalyzer.d.ts.map +1 -0
- package/dist/modules/analyzer/IntelligentAnalyzer.js +1176 -0
- package/dist/modules/analyzer/IntelligentAnalyzer.js.map +1 -0
- package/dist/modules/browser/BrowserModeManager.d.ts +31 -0
- package/dist/modules/browser/BrowserModeManager.d.ts.map +1 -0
- package/dist/modules/browser/BrowserModeManager.js +241 -0
- package/dist/modules/browser/BrowserModeManager.js.map +1 -0
- package/dist/modules/captcha/AICaptchaDetector.d.ts +32 -0
- package/dist/modules/captcha/AICaptchaDetector.d.ts.map +1 -0
- package/dist/modules/captcha/AICaptchaDetector.js +387 -0
- package/dist/modules/captcha/AICaptchaDetector.js.map +1 -0
- package/dist/modules/captcha/CaptchaDetector.d.ts +28 -0
- package/dist/modules/captcha/CaptchaDetector.d.ts.map +1 -0
- package/dist/modules/captcha/CaptchaDetector.js +513 -0
- package/dist/modules/captcha/CaptchaDetector.js.map +1 -0
- package/dist/modules/collector/CodeCache.d.ts +37 -0
- package/dist/modules/collector/CodeCache.d.ts.map +1 -0
- package/dist/modules/collector/CodeCache.js +188 -0
- package/dist/modules/collector/CodeCache.js.map +1 -0
- package/dist/modules/collector/CodeCollector.d.ts +107 -0
- package/dist/modules/collector/CodeCollector.d.ts.map +1 -0
- package/dist/modules/collector/CodeCollector.js +796 -0
- package/dist/modules/collector/CodeCollector.js.map +1 -0
- package/dist/modules/collector/CodeCompressor.d.ts +65 -0
- package/dist/modules/collector/CodeCompressor.d.ts.map +1 -0
- package/dist/modules/collector/CodeCompressor.js +245 -0
- package/dist/modules/collector/CodeCompressor.js.map +1 -0
- package/dist/modules/collector/DOMInspector.d.ts +51 -0
- package/dist/modules/collector/DOMInspector.d.ts.map +1 -0
- package/dist/modules/collector/DOMInspector.js +437 -0
- package/dist/modules/collector/DOMInspector.js.map +1 -0
- package/dist/modules/collector/PageController.d.ts +79 -0
- package/dist/modules/collector/PageController.d.ts.map +1 -0
- package/dist/modules/collector/PageController.js +287 -0
- package/dist/modules/collector/PageController.js.map +1 -0
- package/dist/modules/collector/SmartCodeCollector.d.ts +38 -0
- package/dist/modules/collector/SmartCodeCollector.d.ts.map +1 -0
- package/dist/modules/collector/SmartCodeCollector.js +208 -0
- package/dist/modules/collector/SmartCodeCollector.js.map +1 -0
- package/dist/modules/collector/StreamingCollector.d.ts +46 -0
- package/dist/modules/collector/StreamingCollector.d.ts.map +1 -0
- package/dist/modules/collector/StreamingCollector.js +127 -0
- package/dist/modules/collector/StreamingCollector.js.map +1 -0
- package/dist/modules/crypto/CryptoDetector.d.ts +22 -0
- package/dist/modules/crypto/CryptoDetector.d.ts.map +1 -0
- package/dist/modules/crypto/CryptoDetector.js +168 -0
- package/dist/modules/crypto/CryptoDetector.js.map +1 -0
- package/dist/modules/crypto/CryptoDetectorEnhanced.d.ts +31 -0
- package/dist/modules/crypto/CryptoDetectorEnhanced.d.ts.map +1 -0
- package/dist/modules/crypto/CryptoDetectorEnhanced.js +269 -0
- package/dist/modules/crypto/CryptoDetectorEnhanced.js.map +1 -0
- package/dist/modules/crypto/CryptoRules.d.ts +59 -0
- package/dist/modules/crypto/CryptoRules.d.ts.map +1 -0
- package/dist/modules/crypto/CryptoRules.js +234 -0
- package/dist/modules/crypto/CryptoRules.js.map +1 -0
- package/dist/modules/debugger/BlackboxManager.d.ts +14 -0
- package/dist/modules/debugger/BlackboxManager.d.ts.map +1 -0
- package/dist/modules/debugger/BlackboxManager.js +98 -0
- package/dist/modules/debugger/BlackboxManager.js.map +1 -0
- package/dist/modules/debugger/DebuggerManager.d.ts +138 -0
- package/dist/modules/debugger/DebuggerManager.d.ts.map +1 -0
- package/dist/modules/debugger/DebuggerManager.js +777 -0
- package/dist/modules/debugger/DebuggerManager.js.map +1 -0
- package/dist/modules/debugger/EventBreakpointManager.d.ts +30 -0
- package/dist/modules/debugger/EventBreakpointManager.d.ts.map +1 -0
- package/dist/modules/debugger/EventBreakpointManager.js +125 -0
- package/dist/modules/debugger/EventBreakpointManager.js.map +1 -0
- package/dist/modules/debugger/RuntimeInspector.d.ts +54 -0
- package/dist/modules/debugger/RuntimeInspector.d.ts.map +1 -0
- package/dist/modules/debugger/RuntimeInspector.js +277 -0
- package/dist/modules/debugger/RuntimeInspector.js.map +1 -0
- package/dist/modules/debugger/ScriptManager.d.ts +94 -0
- package/dist/modules/debugger/ScriptManager.d.ts.map +1 -0
- package/dist/modules/debugger/ScriptManager.js +433 -0
- package/dist/modules/debugger/ScriptManager.js.map +1 -0
- package/dist/modules/debugger/WatchExpressionManager.d.ts +52 -0
- package/dist/modules/debugger/WatchExpressionManager.d.ts.map +1 -0
- package/dist/modules/debugger/WatchExpressionManager.js +136 -0
- package/dist/modules/debugger/WatchExpressionManager.js.map +1 -0
- package/dist/modules/debugger/XHRBreakpointManager.d.ts +21 -0
- package/dist/modules/debugger/XHRBreakpointManager.d.ts.map +1 -0
- package/dist/modules/debugger/XHRBreakpointManager.js +81 -0
- package/dist/modules/debugger/XHRBreakpointManager.js.map +1 -0
- package/dist/modules/deobfuscator/ASTOptimizer.d.ts +12 -0
- package/dist/modules/deobfuscator/ASTOptimizer.d.ts.map +1 -0
- package/dist/modules/deobfuscator/ASTOptimizer.js +234 -0
- package/dist/modules/deobfuscator/ASTOptimizer.js.map +1 -0
- package/dist/modules/deobfuscator/AdvancedDeobfuscator.d.ts +52 -0
- package/dist/modules/deobfuscator/AdvancedDeobfuscator.d.ts.map +1 -0
- package/dist/modules/deobfuscator/AdvancedDeobfuscator.js +985 -0
- package/dist/modules/deobfuscator/AdvancedDeobfuscator.js.map +1 -0
- package/dist/modules/deobfuscator/Deobfuscator.d.ts +23 -0
- package/dist/modules/deobfuscator/Deobfuscator.d.ts.map +1 -0
- package/dist/modules/deobfuscator/Deobfuscator.js +487 -0
- package/dist/modules/deobfuscator/Deobfuscator.js.map +1 -0
- package/dist/modules/deobfuscator/JSVMPDeobfuscator.d.ts +19 -0
- package/dist/modules/deobfuscator/JSVMPDeobfuscator.d.ts.map +1 -0
- package/dist/modules/deobfuscator/JSVMPDeobfuscator.js +594 -0
- package/dist/modules/deobfuscator/JSVMPDeobfuscator.js.map +1 -0
- package/dist/modules/deobfuscator/JScramberDeobfuscator.d.ts +28 -0
- package/dist/modules/deobfuscator/JScramberDeobfuscator.d.ts.map +1 -0
- package/dist/modules/deobfuscator/JScramberDeobfuscator.js +239 -0
- package/dist/modules/deobfuscator/JScramberDeobfuscator.js.map +1 -0
- package/dist/modules/deobfuscator/PackerDeobfuscator.d.ts +38 -0
- package/dist/modules/deobfuscator/PackerDeobfuscator.d.ts.map +1 -0
- package/dist/modules/deobfuscator/PackerDeobfuscator.js +191 -0
- package/dist/modules/deobfuscator/PackerDeobfuscator.js.map +1 -0
- package/dist/modules/detector/ObfuscationDetector.d.ts +35 -0
- package/dist/modules/detector/ObfuscationDetector.d.ts.map +1 -0
- package/dist/modules/detector/ObfuscationDetector.js +278 -0
- package/dist/modules/detector/ObfuscationDetector.js.map +1 -0
- package/dist/modules/emulator/AIEnvironmentAnalyzer.d.ts +32 -0
- package/dist/modules/emulator/AIEnvironmentAnalyzer.d.ts.map +1 -0
- package/dist/modules/emulator/AIEnvironmentAnalyzer.js +548 -0
- package/dist/modules/emulator/AIEnvironmentAnalyzer.js.map +1 -0
- package/dist/modules/emulator/BrowserAPIDatabase.d.ts +34 -0
- package/dist/modules/emulator/BrowserAPIDatabase.d.ts.map +1 -0
- package/dist/modules/emulator/BrowserAPIDatabase.js +326 -0
- package/dist/modules/emulator/BrowserAPIDatabase.js.map +1 -0
- package/dist/modules/emulator/BrowserEnvironmentRules.d.ts +47 -0
- package/dist/modules/emulator/BrowserEnvironmentRules.d.ts.map +1 -0
- package/dist/modules/emulator/BrowserEnvironmentRules.js +493 -0
- package/dist/modules/emulator/BrowserEnvironmentRules.js.map +1 -0
- package/dist/modules/emulator/EnvironmentEmulator.d.ts +27 -0
- package/dist/modules/emulator/EnvironmentEmulator.d.ts.map +1 -0
- package/dist/modules/emulator/EnvironmentEmulator.js +1113 -0
- package/dist/modules/emulator/EnvironmentEmulator.js.map +1 -0
- package/dist/modules/emulator/EnvironmentEmulatorEnhanced.d.ts +26 -0
- package/dist/modules/emulator/EnvironmentEmulatorEnhanced.d.ts.map +1 -0
- package/dist/modules/emulator/EnvironmentEmulatorEnhanced.js +493 -0
- package/dist/modules/emulator/EnvironmentEmulatorEnhanced.js.map +1 -0
- package/dist/modules/emulator/templates/chrome-env.d.ts +260 -0
- package/dist/modules/emulator/templates/chrome-env.d.ts.map +1 -0
- package/dist/modules/emulator/templates/chrome-env.js +253 -0
- package/dist/modules/emulator/templates/chrome-env.js.map +1 -0
- package/dist/modules/hook/AIHookGenerator.d.ts +53 -0
- package/dist/modules/hook/AIHookGenerator.d.ts.map +1 -0
- package/dist/modules/hook/AIHookGenerator.js +353 -0
- package/dist/modules/hook/AIHookGenerator.js.map +1 -0
- package/dist/modules/hook/HookManager.d.ts +67 -0
- package/dist/modules/hook/HookManager.d.ts.map +1 -0
- package/dist/modules/hook/HookManager.js +1225 -0
- package/dist/modules/hook/HookManager.js.map +1 -0
- package/dist/modules/monitor/ConsoleMonitor.d.ts +140 -0
- package/dist/modules/monitor/ConsoleMonitor.d.ts.map +1 -0
- package/dist/modules/monitor/ConsoleMonitor.js +834 -0
- package/dist/modules/monitor/ConsoleMonitor.js.map +1 -0
- package/dist/modules/monitor/PerformanceMonitor.d.ts +65 -0
- package/dist/modules/monitor/PerformanceMonitor.d.ts.map +1 -0
- package/dist/modules/monitor/PerformanceMonitor.js +175 -0
- package/dist/modules/monitor/PerformanceMonitor.js.map +1 -0
- package/dist/modules/stealth/StealthScripts2025.d.ts +17 -0
- package/dist/modules/stealth/StealthScripts2025.d.ts.map +1 -0
- package/dist/modules/stealth/StealthScripts2025.js +274 -0
- package/dist/modules/stealth/StealthScripts2025.js.map +1 -0
- package/dist/modules/symbolic/JSVMPSymbolicExecutor.d.ts +69 -0
- package/dist/modules/symbolic/JSVMPSymbolicExecutor.d.ts.map +1 -0
- package/dist/modules/symbolic/JSVMPSymbolicExecutor.js +232 -0
- package/dist/modules/symbolic/JSVMPSymbolicExecutor.js.map +1 -0
- package/dist/modules/symbolic/SymbolicExecutor.d.ts +69 -0
- package/dist/modules/symbolic/SymbolicExecutor.d.ts.map +1 -0
- package/dist/modules/symbolic/SymbolicExecutor.js +346 -0
- package/dist/modules/symbolic/SymbolicExecutor.js.map +1 -0
- package/dist/server/AIHookToolDefinitions.d.ts +3 -0
- package/dist/server/AIHookToolDefinitions.d.ts.map +1 -0
- package/dist/server/AIHookToolDefinitions.js +284 -0
- package/dist/server/AIHookToolDefinitions.js.map +1 -0
- package/dist/server/AIHookToolHandlers.d.ts +50 -0
- package/dist/server/AIHookToolHandlers.d.ts.map +1 -0
- package/dist/server/AIHookToolHandlers.js +311 -0
- package/dist/server/AIHookToolHandlers.js.map +1 -0
- package/dist/server/AdvancedToolDefinitions.d.ts +3 -0
- package/dist/server/AdvancedToolDefinitions.d.ts.map +1 -0
- package/dist/server/AdvancedToolDefinitions.js +218 -0
- package/dist/server/AdvancedToolDefinitions.js.map +1 -0
- package/dist/server/AdvancedToolHandlers.d.ts +85 -0
- package/dist/server/AdvancedToolHandlers.d.ts.map +1 -0
- package/dist/server/AdvancedToolHandlers.js +431 -0
- package/dist/server/AdvancedToolHandlers.js.map +1 -0
- package/dist/server/BrowserToolDefinitions.d.ts +3 -0
- package/dist/server/BrowserToolDefinitions.d.ts.map +1 -0
- package/dist/server/BrowserToolDefinitions.js +841 -0
- package/dist/server/BrowserToolDefinitions.js.map +1 -0
- package/dist/server/BrowserToolHandlers.d.ts +290 -0
- package/dist/server/BrowserToolHandlers.d.ts.map +1 -0
- package/dist/server/BrowserToolHandlers.js +784 -0
- package/dist/server/BrowserToolHandlers.js.map +1 -0
- package/dist/server/CacheToolDefinitions.d.ts +3 -0
- package/dist/server/CacheToolDefinitions.d.ts.map +1 -0
- package/dist/server/CacheToolDefinitions.js +166 -0
- package/dist/server/CacheToolDefinitions.js.map +1 -0
- package/dist/server/DebuggerToolDefinitions.d.ts +3 -0
- package/dist/server/DebuggerToolDefinitions.d.ts.map +1 -0
- package/dist/server/DebuggerToolDefinitions.js +600 -0
- package/dist/server/DebuggerToolDefinitions.js.map +1 -0
- package/dist/server/DebuggerToolHandlers.d.ts +230 -0
- package/dist/server/DebuggerToolHandlers.d.ts.map +1 -0
- package/dist/server/DebuggerToolHandlers.js +935 -0
- package/dist/server/DebuggerToolHandlers.js.map +1 -0
- package/dist/server/MCPServer.d.ts +55 -0
- package/dist/server/MCPServer.d.ts.map +1 -0
- package/dist/server/MCPServer.js +1344 -0
- package/dist/server/MCPServer.js.map +1 -0
- package/dist/server/TokenBudgetToolDefinitions.d.ts +3 -0
- package/dist/server/TokenBudgetToolDefinitions.d.ts.map +1 -0
- package/dist/server/TokenBudgetToolDefinitions.js +114 -0
- package/dist/server/TokenBudgetToolDefinitions.js.map +1 -0
- package/dist/services/LLMService.d.ts +41 -0
- package/dist/services/LLMService.d.ts.map +1 -0
- package/dist/services/LLMService.js +792 -0
- package/dist/services/LLMService.js.map +1 -0
- package/dist/types/index.d.ts +527 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +2 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/AdaptiveDataSerializer.d.ts +27 -0
- package/dist/utils/AdaptiveDataSerializer.d.ts.map +1 -0
- package/dist/utils/AdaptiveDataSerializer.js +215 -0
- package/dist/utils/AdaptiveDataSerializer.js.map +1 -0
- package/dist/utils/CacheAdapters.d.ts +30 -0
- package/dist/utils/CacheAdapters.d.ts.map +1 -0
- package/dist/utils/CacheAdapters.js +83 -0
- package/dist/utils/CacheAdapters.js.map +1 -0
- package/dist/utils/TokenBudgetManager.d.ts +52 -0
- package/dist/utils/TokenBudgetManager.d.ts.map +1 -0
- package/dist/utils/TokenBudgetManager.js +190 -0
- package/dist/utils/TokenBudgetManager.js.map +1 -0
- package/dist/utils/UnifiedCacheManager.d.ts +55 -0
- package/dist/utils/UnifiedCacheManager.d.ts.map +1 -0
- package/dist/utils/UnifiedCacheManager.js +207 -0
- package/dist/utils/UnifiedCacheManager.js.map +1 -0
- package/dist/utils/cache.d.ts +13 -0
- package/dist/utils/cache.d.ts.map +1 -0
- package/dist/utils/cache.js +92 -0
- package/dist/utils/cache.js.map +1 -0
- package/dist/utils/config.d.ts +7 -0
- package/dist/utils/config.d.ts.map +1 -0
- package/dist/utils/config.js +93 -0
- package/dist/utils/config.js.map +1 -0
- package/dist/utils/detailedDataManager.d.ts +60 -0
- package/dist/utils/detailedDataManager.d.ts.map +1 -0
- package/dist/utils/detailedDataManager.js +204 -0
- package/dist/utils/detailedDataManager.js.map +1 -0
- package/dist/utils/logger.d.ts +16 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +47 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/parallel.d.ts +40 -0
- package/dist/utils/parallel.d.ts.map +1 -0
- package/dist/utils/parallel.js +148 -0
- package/dist/utils/parallel.js.map +1 -0
- package/package.json +94 -0
- package/server.json +39 -0
- package/tsconfig.dev.json +14 -0
|
@@ -0,0 +1,796 @@
|
|
|
1
|
+
import puppeteer from 'puppeteer-extra';
|
|
2
|
+
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
3
|
+
import { logger } from '../../utils/logger.js';
|
|
4
|
+
import { CodeCache } from './CodeCache.js';
|
|
5
|
+
import { SmartCodeCollector } from './SmartCodeCollector.js';
|
|
6
|
+
import { CodeCompressor } from './CodeCompressor.js';
|
|
7
|
+
puppeteer.use(StealthPlugin());
|
|
8
|
+
export class CodeCollector {
|
|
9
|
+
config;
|
|
10
|
+
browser = null;
|
|
11
|
+
collectedUrls = new Set();
|
|
12
|
+
MAX_COLLECTED_URLS;
|
|
13
|
+
MAX_FILES_PER_COLLECT;
|
|
14
|
+
MAX_RESPONSE_SIZE;
|
|
15
|
+
MAX_SINGLE_FILE_SIZE;
|
|
16
|
+
viewport;
|
|
17
|
+
userAgent;
|
|
18
|
+
collectedFilesCache = new Map();
|
|
19
|
+
cache;
|
|
20
|
+
cacheEnabled = true;
|
|
21
|
+
smartCollector;
|
|
22
|
+
compressor;
|
|
23
|
+
cdpSession = null;
|
|
24
|
+
cdpListeners = {};
|
|
25
|
+
constructor(config) {
|
|
26
|
+
this.config = config;
|
|
27
|
+
this.MAX_COLLECTED_URLS = config.maxCollectedUrls ?? 10000;
|
|
28
|
+
this.MAX_FILES_PER_COLLECT = config.maxFilesPerCollect ?? 200;
|
|
29
|
+
this.MAX_RESPONSE_SIZE = config.maxTotalContentSize ?? 512 * 1024;
|
|
30
|
+
this.MAX_SINGLE_FILE_SIZE = config.maxSingleFileSize ?? 200 * 1024;
|
|
31
|
+
this.viewport = config.viewport ?? { width: 1920, height: 1080 };
|
|
32
|
+
this.userAgent = config.userAgent ??
|
|
33
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
|
34
|
+
this.cache = new CodeCache();
|
|
35
|
+
this.smartCollector = new SmartCodeCollector();
|
|
36
|
+
this.compressor = new CodeCompressor();
|
|
37
|
+
logger.info(`๐ CodeCollector limits: maxCollect=${this.MAX_FILES_PER_COLLECT} files, maxResponse=${(this.MAX_RESPONSE_SIZE / 1024).toFixed(0)}KB, maxSingle=${(this.MAX_SINGLE_FILE_SIZE / 1024).toFixed(0)}KB`);
|
|
38
|
+
logger.info(`๐ก Strategy: Collect ALL files โ Cache โ Return summary/partial data to fit MCP limits`);
|
|
39
|
+
}
|
|
40
|
+
setCacheEnabled(enabled) {
|
|
41
|
+
this.cacheEnabled = enabled;
|
|
42
|
+
logger.info(`Code cache ${enabled ? 'enabled' : 'disabled'}`);
|
|
43
|
+
}
|
|
44
|
+
async clearFileCache() {
|
|
45
|
+
await this.cache.clear();
|
|
46
|
+
}
|
|
47
|
+
async getFileCacheStats() {
|
|
48
|
+
return await this.cache.getStats();
|
|
49
|
+
}
|
|
50
|
+
async clearAllData() {
|
|
51
|
+
logger.info('๐งน Clearing all collected data...');
|
|
52
|
+
await this.cache.clear();
|
|
53
|
+
this.compressor.clearCache();
|
|
54
|
+
this.compressor.resetStats();
|
|
55
|
+
this.collectedUrls.clear();
|
|
56
|
+
logger.success('โ
All data cleared');
|
|
57
|
+
}
|
|
58
|
+
async getAllStats() {
|
|
59
|
+
const cacheStats = await this.cache.getStats();
|
|
60
|
+
const compressionStats = this.compressor.getStats();
|
|
61
|
+
return {
|
|
62
|
+
cache: cacheStats,
|
|
63
|
+
compression: {
|
|
64
|
+
...compressionStats,
|
|
65
|
+
cacheSize: this.compressor.getCacheSize(),
|
|
66
|
+
},
|
|
67
|
+
collector: {
|
|
68
|
+
collectedUrls: this.collectedUrls.size,
|
|
69
|
+
maxCollectedUrls: this.MAX_COLLECTED_URLS,
|
|
70
|
+
},
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
getCache() {
|
|
74
|
+
return this.cache;
|
|
75
|
+
}
|
|
76
|
+
getCompressor() {
|
|
77
|
+
return this.compressor;
|
|
78
|
+
}
|
|
79
|
+
cleanupCollectedUrls() {
|
|
80
|
+
if (this.collectedUrls.size > this.MAX_COLLECTED_URLS) {
|
|
81
|
+
logger.warn(`Collected URLs exceeded ${this.MAX_COLLECTED_URLS}, clearing...`);
|
|
82
|
+
const urls = Array.from(this.collectedUrls);
|
|
83
|
+
this.collectedUrls.clear();
|
|
84
|
+
urls.slice(-Math.floor(this.MAX_COLLECTED_URLS / 2)).forEach(url => this.collectedUrls.add(url));
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
async init() {
|
|
88
|
+
if (this.browser) {
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
logger.info('Initializing browser with anti-detection...');
|
|
92
|
+
this.browser = await puppeteer.launch({
|
|
93
|
+
headless: this.config.headless,
|
|
94
|
+
args: [
|
|
95
|
+
'--no-sandbox',
|
|
96
|
+
'--disable-setuid-sandbox',
|
|
97
|
+
'--disable-dev-shm-usage',
|
|
98
|
+
'--disable-blink-features=AutomationControlled',
|
|
99
|
+
'--disable-web-security',
|
|
100
|
+
'--disable-features=IsolateOrigins,site-per-process',
|
|
101
|
+
`--window-size=${this.viewport.width},${this.viewport.height}`,
|
|
102
|
+
'--ignore-certificate-errors',
|
|
103
|
+
],
|
|
104
|
+
defaultViewport: this.viewport,
|
|
105
|
+
});
|
|
106
|
+
this.browser.on('disconnected', () => {
|
|
107
|
+
logger.warn('โ ๏ธ Browser disconnected');
|
|
108
|
+
this.browser = null;
|
|
109
|
+
if (this.cdpSession) {
|
|
110
|
+
this.cdpSession = null;
|
|
111
|
+
this.cdpListeners = {};
|
|
112
|
+
}
|
|
113
|
+
});
|
|
114
|
+
logger.success('Browser initialized with enhanced anti-detection');
|
|
115
|
+
}
|
|
116
|
+
async close() {
|
|
117
|
+
await this.clearAllData();
|
|
118
|
+
if (this.browser) {
|
|
119
|
+
await this.browser.close();
|
|
120
|
+
this.browser = null;
|
|
121
|
+
logger.info('Browser closed and all data cleared');
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
async getActivePage() {
|
|
125
|
+
if (!this.browser) {
|
|
126
|
+
await this.init();
|
|
127
|
+
}
|
|
128
|
+
const pages = await this.browser.pages();
|
|
129
|
+
if (pages.length === 0) {
|
|
130
|
+
return await this.browser.newPage();
|
|
131
|
+
}
|
|
132
|
+
const lastPage = pages[pages.length - 1];
|
|
133
|
+
if (!lastPage) {
|
|
134
|
+
throw new Error('Failed to get active page');
|
|
135
|
+
}
|
|
136
|
+
return lastPage;
|
|
137
|
+
}
|
|
138
|
+
async createPage(url) {
|
|
139
|
+
if (!this.browser) {
|
|
140
|
+
await this.init();
|
|
141
|
+
}
|
|
142
|
+
const page = await this.browser.newPage();
|
|
143
|
+
await page.setUserAgent(this.userAgent);
|
|
144
|
+
await this.applyAntiDetection(page);
|
|
145
|
+
if (url) {
|
|
146
|
+
await page.goto(url, {
|
|
147
|
+
waitUntil: 'networkidle2',
|
|
148
|
+
timeout: this.config.timeout,
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
logger.info(`New page created${url ? `: ${url}` : ''}`);
|
|
152
|
+
return page;
|
|
153
|
+
}
|
|
154
|
+
async applyAntiDetection(page) {
|
|
155
|
+
await page.evaluateOnNewDocument(() => {
|
|
156
|
+
Object.defineProperty(navigator, 'webdriver', {
|
|
157
|
+
get: () => false,
|
|
158
|
+
});
|
|
159
|
+
Object.defineProperty(navigator, 'plugins', {
|
|
160
|
+
get: () => [1, 2, 3, 4, 5],
|
|
161
|
+
});
|
|
162
|
+
Object.defineProperty(navigator, 'languages', {
|
|
163
|
+
get: () => ['en-US', 'en'],
|
|
164
|
+
});
|
|
165
|
+
if (!window.chrome) {
|
|
166
|
+
window.chrome = {
|
|
167
|
+
runtime: {},
|
|
168
|
+
loadTimes: function () { },
|
|
169
|
+
csi: function () { },
|
|
170
|
+
app: {},
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
const originalQuery = window.navigator.permissions.query;
|
|
174
|
+
window.navigator.permissions.query = (parameters) => {
|
|
175
|
+
if (parameters.name === 'notifications') {
|
|
176
|
+
return Promise.resolve({ state: 'denied' });
|
|
177
|
+
}
|
|
178
|
+
return originalQuery(parameters);
|
|
179
|
+
};
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
async getStatus() {
|
|
183
|
+
if (!this.browser) {
|
|
184
|
+
return {
|
|
185
|
+
running: false,
|
|
186
|
+
pagesCount: 0,
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
try {
|
|
190
|
+
const pages = await this.browser.pages();
|
|
191
|
+
const version = await this.browser.version();
|
|
192
|
+
return {
|
|
193
|
+
running: true,
|
|
194
|
+
pagesCount: pages.length,
|
|
195
|
+
version,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
catch (error) {
|
|
199
|
+
logger.debug('Browser not running or disconnected:', error);
|
|
200
|
+
return {
|
|
201
|
+
running: false,
|
|
202
|
+
pagesCount: 0,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
async collect(options) {
|
|
207
|
+
const startTime = Date.now();
|
|
208
|
+
logger.info(`Collecting code from: ${options.url}`);
|
|
209
|
+
if (this.cacheEnabled) {
|
|
210
|
+
const cached = await this.cache.get(options.url, options);
|
|
211
|
+
if (cached) {
|
|
212
|
+
logger.info(`โ
Cache hit for: ${options.url}`);
|
|
213
|
+
return cached;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
await this.init();
|
|
217
|
+
if (!this.browser) {
|
|
218
|
+
throw new Error('Browser not initialized');
|
|
219
|
+
}
|
|
220
|
+
const page = await this.browser.newPage();
|
|
221
|
+
try {
|
|
222
|
+
page.setDefaultTimeout(options.timeout || this.config.timeout);
|
|
223
|
+
await page.setUserAgent(this.userAgent);
|
|
224
|
+
await this.applyAntiDetection(page);
|
|
225
|
+
const files = [];
|
|
226
|
+
this.cdpSession = await page.createCDPSession();
|
|
227
|
+
await this.cdpSession.send('Network.enable');
|
|
228
|
+
await this.cdpSession.send('Runtime.enable');
|
|
229
|
+
this.cdpListeners.responseReceived = async (params) => {
|
|
230
|
+
const { response, requestId, type } = params;
|
|
231
|
+
const url = response.url;
|
|
232
|
+
if (files.length >= this.MAX_FILES_PER_COLLECT) {
|
|
233
|
+
if (files.length === this.MAX_FILES_PER_COLLECT) {
|
|
234
|
+
logger.warn(`โ ๏ธ Reached max files limit (${this.MAX_FILES_PER_COLLECT}), will skip remaining files`);
|
|
235
|
+
}
|
|
236
|
+
return;
|
|
237
|
+
}
|
|
238
|
+
this.cleanupCollectedUrls();
|
|
239
|
+
if (type === 'Script' ||
|
|
240
|
+
response.mimeType?.includes('javascript') ||
|
|
241
|
+
url.endsWith('.js')) {
|
|
242
|
+
try {
|
|
243
|
+
const { body, base64Encoded } = await this.cdpSession.send('Network.getResponseBody', {
|
|
244
|
+
requestId,
|
|
245
|
+
});
|
|
246
|
+
const content = base64Encoded ? Buffer.from(body, 'base64').toString('utf-8') : body;
|
|
247
|
+
const contentSize = content.length;
|
|
248
|
+
let finalContent = content;
|
|
249
|
+
let truncated = false;
|
|
250
|
+
if (contentSize > this.MAX_SINGLE_FILE_SIZE) {
|
|
251
|
+
finalContent = content.substring(0, this.MAX_SINGLE_FILE_SIZE);
|
|
252
|
+
truncated = true;
|
|
253
|
+
logger.warn(`[CDP] Large file truncated: ${url} (${(contentSize / 1024).toFixed(2)} KB -> ${(this.MAX_SINGLE_FILE_SIZE / 1024).toFixed(2)} KB)`);
|
|
254
|
+
}
|
|
255
|
+
if (!this.collectedUrls.has(url)) {
|
|
256
|
+
this.collectedUrls.add(url);
|
|
257
|
+
const file = {
|
|
258
|
+
url,
|
|
259
|
+
content: finalContent,
|
|
260
|
+
size: finalContent.length,
|
|
261
|
+
type: 'external',
|
|
262
|
+
metadata: truncated ? {
|
|
263
|
+
truncated: true,
|
|
264
|
+
originalSize: contentSize,
|
|
265
|
+
truncatedSize: finalContent.length,
|
|
266
|
+
} : undefined,
|
|
267
|
+
};
|
|
268
|
+
files.push(file);
|
|
269
|
+
this.collectedFilesCache.set(url, file);
|
|
270
|
+
logger.debug(`[CDP] Collected (${files.length}/${this.MAX_FILES_PER_COLLECT}): ${url} (${(finalContent.length / 1024).toFixed(2)} KB)${truncated ? ' [TRUNCATED]' : ''}`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
catch (error) {
|
|
274
|
+
logger.warn(`[CDP] Failed to get response body for: ${url}`, error);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
};
|
|
278
|
+
this.cdpSession.on('Network.responseReceived', this.cdpListeners.responseReceived);
|
|
279
|
+
logger.info(`Navigating to: ${options.url}`);
|
|
280
|
+
await page.goto(options.url, {
|
|
281
|
+
waitUntil: 'networkidle2',
|
|
282
|
+
timeout: options.timeout || this.config.timeout,
|
|
283
|
+
});
|
|
284
|
+
if (options.includeInline !== false) {
|
|
285
|
+
logger.info('Collecting inline scripts...');
|
|
286
|
+
const inlineScripts = await this.collectInlineScripts(page);
|
|
287
|
+
files.push(...inlineScripts);
|
|
288
|
+
}
|
|
289
|
+
if (options.includeServiceWorker !== false) {
|
|
290
|
+
logger.info('Collecting Service Workers...');
|
|
291
|
+
const serviceWorkers = await this.collectServiceWorkers(page);
|
|
292
|
+
files.push(...serviceWorkers);
|
|
293
|
+
}
|
|
294
|
+
if (options.includeWebWorker !== false) {
|
|
295
|
+
logger.info('Collecting Web Workers...');
|
|
296
|
+
const webWorkers = await this.collectWebWorkers(page);
|
|
297
|
+
files.push(...webWorkers);
|
|
298
|
+
}
|
|
299
|
+
if (options.includeDynamic) {
|
|
300
|
+
logger.info('Waiting for dynamic scripts...');
|
|
301
|
+
await new Promise((resolve) => setTimeout(resolve, 3000));
|
|
302
|
+
}
|
|
303
|
+
if (this.cdpSession) {
|
|
304
|
+
if (this.cdpListeners.responseReceived) {
|
|
305
|
+
this.cdpSession.off('Network.responseReceived', this.cdpListeners.responseReceived);
|
|
306
|
+
}
|
|
307
|
+
await this.cdpSession.detach();
|
|
308
|
+
this.cdpSession = null;
|
|
309
|
+
this.cdpListeners = {};
|
|
310
|
+
}
|
|
311
|
+
const collectTime = Date.now() - startTime;
|
|
312
|
+
const totalSize = files.reduce((sum, file) => sum + file.size, 0);
|
|
313
|
+
const truncatedFiles = files.filter(f => f.metadata?.truncated);
|
|
314
|
+
if (truncatedFiles.length > 0) {
|
|
315
|
+
logger.warn(`โ ๏ธ ${truncatedFiles.length} files were truncated due to size limits`);
|
|
316
|
+
truncatedFiles.forEach(f => {
|
|
317
|
+
logger.warn(` - ${f.url}: ${(f.metadata?.originalSize / 1024).toFixed(2)} KB -> ${(f.size / 1024).toFixed(2)} KB`);
|
|
318
|
+
});
|
|
319
|
+
}
|
|
320
|
+
let processedFiles = files;
|
|
321
|
+
if (options.smartMode && options.smartMode !== 'full') {
|
|
322
|
+
try {
|
|
323
|
+
logger.info(`๐ง Applying smart collection mode: ${options.smartMode}`);
|
|
324
|
+
const smartOptions = {
|
|
325
|
+
mode: options.smartMode,
|
|
326
|
+
maxTotalSize: options.maxTotalSize,
|
|
327
|
+
maxFileSize: options.maxFileSize,
|
|
328
|
+
priorities: options.priorities,
|
|
329
|
+
};
|
|
330
|
+
const smartResult = await this.smartCollector.smartCollect(page, files, smartOptions);
|
|
331
|
+
if (options.smartMode === 'summary') {
|
|
332
|
+
logger.info(`๐ Returning ${smartResult.length} code summaries`);
|
|
333
|
+
if (Array.isArray(smartResult) && smartResult.length > 0 && smartResult[0] && 'hasEncryption' in smartResult[0]) {
|
|
334
|
+
return {
|
|
335
|
+
files: [],
|
|
336
|
+
summaries: smartResult,
|
|
337
|
+
dependencies: { nodes: [], edges: [] },
|
|
338
|
+
totalSize: 0,
|
|
339
|
+
collectTime: Date.now() - startTime,
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
if (Array.isArray(smartResult) && (smartResult.length === 0 || (smartResult[0] && 'content' in smartResult[0]))) {
|
|
344
|
+
processedFiles = smartResult;
|
|
345
|
+
}
|
|
346
|
+
else {
|
|
347
|
+
logger.warn('Smart collection returned unexpected type, using original files');
|
|
348
|
+
processedFiles = files;
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
catch (error) {
|
|
352
|
+
logger.error('Smart collection failed, using original files:', error);
|
|
353
|
+
processedFiles = files;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
if (options.compress) {
|
|
357
|
+
try {
|
|
358
|
+
logger.info(`๐๏ธ Compressing ${processedFiles.length} files with enhanced compressor...`);
|
|
359
|
+
const filesToCompress = processedFiles
|
|
360
|
+
.filter(file => this.compressor.shouldCompress(file.content))
|
|
361
|
+
.map(file => ({
|
|
362
|
+
url: file.url,
|
|
363
|
+
content: file.content,
|
|
364
|
+
}));
|
|
365
|
+
if (filesToCompress.length === 0) {
|
|
366
|
+
logger.info('No files need compression (all below threshold)');
|
|
367
|
+
}
|
|
368
|
+
else {
|
|
369
|
+
const compressedResults = await this.compressor.compressBatch(filesToCompress, {
|
|
370
|
+
level: undefined,
|
|
371
|
+
useCache: true,
|
|
372
|
+
maxRetries: 3,
|
|
373
|
+
concurrency: 5,
|
|
374
|
+
onProgress: (progress) => {
|
|
375
|
+
if (progress % 25 === 0) {
|
|
376
|
+
logger.debug(`Compression progress: ${progress.toFixed(0)}%`);
|
|
377
|
+
}
|
|
378
|
+
},
|
|
379
|
+
});
|
|
380
|
+
const compressedMap = new Map(compressedResults.map(r => [r.url, r]));
|
|
381
|
+
for (const file of processedFiles) {
|
|
382
|
+
const compressed = compressedMap.get(file.url);
|
|
383
|
+
if (compressed) {
|
|
384
|
+
file.metadata = {
|
|
385
|
+
...file.metadata,
|
|
386
|
+
compressed: true,
|
|
387
|
+
originalSize: compressed.originalSize,
|
|
388
|
+
compressedSize: compressed.compressedSize,
|
|
389
|
+
compressionRatio: compressed.compressionRatio,
|
|
390
|
+
};
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
const stats = this.compressor.getStats();
|
|
394
|
+
logger.info(`โ
Compressed ${compressedResults.length}/${processedFiles.length} files`);
|
|
395
|
+
logger.info(`๐ Compression stats: ${(stats.totalOriginalSize / 1024).toFixed(2)} KB -> ${(stats.totalCompressedSize / 1024).toFixed(2)} KB (${stats.averageRatio.toFixed(1)}% reduction)`);
|
|
396
|
+
logger.info(`โก Cache: ${stats.cacheHits} hits, ${stats.cacheMisses} misses (${stats.cacheHits > 0 ? ((stats.cacheHits / (stats.cacheHits + stats.cacheMisses)) * 100).toFixed(1) : 0}% hit rate)`);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
catch (error) {
|
|
400
|
+
logger.error('Compression failed:', error);
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
const dependencies = this.analyzeDependencies(processedFiles);
|
|
404
|
+
logger.success(`Collected ${processedFiles.length} files (${(totalSize / 1024).toFixed(2)} KB) in ${collectTime}ms`);
|
|
405
|
+
const result = {
|
|
406
|
+
files: processedFiles,
|
|
407
|
+
dependencies,
|
|
408
|
+
totalSize,
|
|
409
|
+
collectTime,
|
|
410
|
+
};
|
|
411
|
+
if (this.cacheEnabled) {
|
|
412
|
+
await this.cache.set(options.url, result, options);
|
|
413
|
+
logger.debug(`๐พ Saved to cache: ${options.url}`);
|
|
414
|
+
}
|
|
415
|
+
return result;
|
|
416
|
+
}
|
|
417
|
+
catch (error) {
|
|
418
|
+
logger.error('Code collection failed', error);
|
|
419
|
+
throw error;
|
|
420
|
+
}
|
|
421
|
+
finally {
|
|
422
|
+
await page.close();
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
async collectInlineScripts(page) {
|
|
426
|
+
const scripts = await page.evaluate((maxSingleSize) => {
|
|
427
|
+
const scriptElements = Array.from(document.querySelectorAll('script'));
|
|
428
|
+
return scriptElements
|
|
429
|
+
.filter((script) => !script.src && script.textContent)
|
|
430
|
+
.map((script, index) => {
|
|
431
|
+
let content = script.textContent || '';
|
|
432
|
+
const originalSize = content.length;
|
|
433
|
+
let truncated = false;
|
|
434
|
+
if (content.length > maxSingleSize) {
|
|
435
|
+
content = content.substring(0, maxSingleSize);
|
|
436
|
+
truncated = true;
|
|
437
|
+
}
|
|
438
|
+
return {
|
|
439
|
+
url: `inline-script-${index}`,
|
|
440
|
+
content,
|
|
441
|
+
size: content.length,
|
|
442
|
+
type: 'inline',
|
|
443
|
+
metadata: {
|
|
444
|
+
scriptType: script.type || 'text/javascript',
|
|
445
|
+
async: script.async,
|
|
446
|
+
defer: script.defer,
|
|
447
|
+
integrity: script.integrity || undefined,
|
|
448
|
+
truncated,
|
|
449
|
+
originalSize: truncated ? originalSize : undefined,
|
|
450
|
+
},
|
|
451
|
+
};
|
|
452
|
+
});
|
|
453
|
+
}, this.MAX_SINGLE_FILE_SIZE);
|
|
454
|
+
const limitedScripts = scripts.slice(0, this.MAX_FILES_PER_COLLECT);
|
|
455
|
+
if (scripts.length > limitedScripts.length) {
|
|
456
|
+
logger.warn(`โ ๏ธ Found ${scripts.length} inline scripts, limiting to ${this.MAX_FILES_PER_COLLECT}`);
|
|
457
|
+
}
|
|
458
|
+
const truncatedCount = limitedScripts.filter(s => s.metadata?.truncated).length;
|
|
459
|
+
if (truncatedCount > 0) {
|
|
460
|
+
logger.warn(`โ ๏ธ ${truncatedCount} inline scripts were truncated due to size limits`);
|
|
461
|
+
}
|
|
462
|
+
logger.debug(`Collected ${limitedScripts.length} inline scripts`);
|
|
463
|
+
return limitedScripts;
|
|
464
|
+
}
|
|
465
|
+
async collectServiceWorkers(page) {
|
|
466
|
+
try {
|
|
467
|
+
const serviceWorkers = await page.evaluate(async () => {
|
|
468
|
+
if (!('serviceWorker' in navigator)) {
|
|
469
|
+
return [];
|
|
470
|
+
}
|
|
471
|
+
const registrations = await navigator.serviceWorker.getRegistrations();
|
|
472
|
+
const workers = [];
|
|
473
|
+
for (const registration of registrations) {
|
|
474
|
+
const worker = registration.active || registration.installing || registration.waiting;
|
|
475
|
+
if (worker && worker.scriptURL) {
|
|
476
|
+
workers.push({
|
|
477
|
+
url: worker.scriptURL,
|
|
478
|
+
scope: registration.scope,
|
|
479
|
+
state: worker.state,
|
|
480
|
+
});
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
return workers;
|
|
484
|
+
});
|
|
485
|
+
const files = [];
|
|
486
|
+
for (const worker of serviceWorkers) {
|
|
487
|
+
try {
|
|
488
|
+
const content = await page.evaluate(async (url) => {
|
|
489
|
+
const response = await fetch(url);
|
|
490
|
+
return await response.text();
|
|
491
|
+
}, worker.url);
|
|
492
|
+
if (content) {
|
|
493
|
+
files.push({
|
|
494
|
+
url: worker.url,
|
|
495
|
+
content,
|
|
496
|
+
size: content.length,
|
|
497
|
+
type: 'service-worker',
|
|
498
|
+
});
|
|
499
|
+
logger.debug(`Collected Service Worker: ${worker.url}`);
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
catch (error) {
|
|
503
|
+
logger.warn(`Failed to collect Service Worker: ${worker.url}`, error);
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
return files;
|
|
507
|
+
}
|
|
508
|
+
catch (error) {
|
|
509
|
+
logger.warn('Service Worker collection failed', error);
|
|
510
|
+
return [];
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
async collectWebWorkers(page) {
|
|
514
|
+
try {
|
|
515
|
+
await page.evaluateOnNewDocument(() => {
|
|
516
|
+
const originalWorker = window.Worker;
|
|
517
|
+
const workerUrls = [];
|
|
518
|
+
window.Worker = function (scriptURL, options) {
|
|
519
|
+
workerUrls.push(scriptURL);
|
|
520
|
+
window.__workerUrls = workerUrls;
|
|
521
|
+
return new originalWorker(scriptURL, options);
|
|
522
|
+
};
|
|
523
|
+
});
|
|
524
|
+
const workerUrls = (await page.evaluate(() => window.__workerUrls || []));
|
|
525
|
+
const files = [];
|
|
526
|
+
for (const url of workerUrls) {
|
|
527
|
+
try {
|
|
528
|
+
const absoluteUrl = new URL(url, page.url()).href;
|
|
529
|
+
const content = await page.evaluate(async (workerUrl) => {
|
|
530
|
+
const response = await fetch(workerUrl);
|
|
531
|
+
return await response.text();
|
|
532
|
+
}, absoluteUrl);
|
|
533
|
+
if (content) {
|
|
534
|
+
files.push({
|
|
535
|
+
url: absoluteUrl,
|
|
536
|
+
content,
|
|
537
|
+
size: content.length,
|
|
538
|
+
type: 'web-worker',
|
|
539
|
+
});
|
|
540
|
+
logger.debug(`Collected Web Worker: ${absoluteUrl}`);
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
catch (error) {
|
|
544
|
+
logger.warn(`Failed to collect Web Worker: ${url}`, error);
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
return files;
|
|
548
|
+
}
|
|
549
|
+
catch (error) {
|
|
550
|
+
logger.warn('Web Worker collection failed', error);
|
|
551
|
+
return [];
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
analyzeDependencies(files) {
|
|
555
|
+
const nodes = [];
|
|
556
|
+
const edges = [];
|
|
557
|
+
files.forEach((file) => {
|
|
558
|
+
nodes.push({
|
|
559
|
+
id: file.url,
|
|
560
|
+
url: file.url,
|
|
561
|
+
type: file.type,
|
|
562
|
+
});
|
|
563
|
+
});
|
|
564
|
+
files.forEach((file) => {
|
|
565
|
+
const dependencies = this.extractDependencies(file.content);
|
|
566
|
+
dependencies.forEach((dep) => {
|
|
567
|
+
const targetFile = files.find((f) => f.url.includes(dep) || f.url.endsWith(dep) || f.url.endsWith(`${dep}.js`));
|
|
568
|
+
if (targetFile) {
|
|
569
|
+
edges.push({
|
|
570
|
+
from: file.url,
|
|
571
|
+
to: targetFile.url,
|
|
572
|
+
type: 'import',
|
|
573
|
+
});
|
|
574
|
+
}
|
|
575
|
+
});
|
|
576
|
+
});
|
|
577
|
+
logger.debug(`Dependency graph: ${nodes.length} nodes, ${edges.length} edges`);
|
|
578
|
+
return { nodes, edges };
|
|
579
|
+
}
|
|
580
|
+
extractDependencies(code) {
|
|
581
|
+
const dependencies = [];
|
|
582
|
+
const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
|
|
583
|
+
let match;
|
|
584
|
+
while ((match = importRegex.exec(code)) !== null) {
|
|
585
|
+
if (match[1])
|
|
586
|
+
dependencies.push(match[1]);
|
|
587
|
+
}
|
|
588
|
+
const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
589
|
+
while ((match = requireRegex.exec(code)) !== null) {
|
|
590
|
+
if (match[1])
|
|
591
|
+
dependencies.push(match[1]);
|
|
592
|
+
}
|
|
593
|
+
const dynamicImportRegex = /import\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
594
|
+
while ((match = dynamicImportRegex.exec(code)) !== null) {
|
|
595
|
+
if (match[1])
|
|
596
|
+
dependencies.push(match[1]);
|
|
597
|
+
}
|
|
598
|
+
return [...new Set(dependencies)];
|
|
599
|
+
}
|
|
600
|
+
shouldCollectUrl(url, filterRules) {
|
|
601
|
+
if (!filterRules || filterRules.length === 0) {
|
|
602
|
+
return true;
|
|
603
|
+
}
|
|
604
|
+
for (const rule of filterRules) {
|
|
605
|
+
const regex = new RegExp(rule.replace(/\*/g, '.*'));
|
|
606
|
+
if (regex.test(url)) {
|
|
607
|
+
return true;
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
return false;
|
|
611
|
+
}
|
|
612
|
+
async navigateWithRetry(page, url, options, maxRetries = 3) {
|
|
613
|
+
let lastError = null;
|
|
614
|
+
for (let i = 0; i < maxRetries; i++) {
|
|
615
|
+
try {
|
|
616
|
+
await page.goto(url, options);
|
|
617
|
+
return;
|
|
618
|
+
}
|
|
619
|
+
catch (error) {
|
|
620
|
+
lastError = error;
|
|
621
|
+
logger.warn(`Navigation attempt ${i + 1}/${maxRetries} failed: ${error}`);
|
|
622
|
+
if (i < maxRetries - 1) {
|
|
623
|
+
await new Promise((resolve) => setTimeout(resolve, 1000 * (i + 1)));
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
throw lastError || new Error('Navigation failed after retries');
|
|
628
|
+
}
|
|
629
|
+
async getPerformanceMetrics(page) {
|
|
630
|
+
try {
|
|
631
|
+
const metrics = await page.evaluate(() => {
|
|
632
|
+
const perf = performance.getEntriesByType('navigation')[0];
|
|
633
|
+
return {
|
|
634
|
+
domContentLoaded: perf.domContentLoadedEventEnd - perf.domContentLoadedEventStart,
|
|
635
|
+
loadComplete: perf.loadEventEnd - perf.loadEventStart,
|
|
636
|
+
domInteractive: perf.domInteractive - perf.fetchStart,
|
|
637
|
+
totalTime: perf.loadEventEnd - perf.fetchStart,
|
|
638
|
+
};
|
|
639
|
+
});
|
|
640
|
+
return metrics;
|
|
641
|
+
}
|
|
642
|
+
catch (error) {
|
|
643
|
+
logger.warn('Failed to get performance metrics', error);
|
|
644
|
+
return {};
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
async collectPageMetadata(page) {
|
|
648
|
+
try {
|
|
649
|
+
const metadata = await page.evaluate(() => {
|
|
650
|
+
return {
|
|
651
|
+
title: document.title,
|
|
652
|
+
url: window.location.href,
|
|
653
|
+
userAgent: navigator.userAgent,
|
|
654
|
+
viewport: {
|
|
655
|
+
width: window.innerWidth,
|
|
656
|
+
height: window.innerHeight,
|
|
657
|
+
},
|
|
658
|
+
cookies: document.cookie,
|
|
659
|
+
localStorage: Object.keys(localStorage).length,
|
|
660
|
+
sessionStorage: Object.keys(sessionStorage).length,
|
|
661
|
+
};
|
|
662
|
+
});
|
|
663
|
+
return metadata;
|
|
664
|
+
}
|
|
665
|
+
catch (error) {
|
|
666
|
+
logger.warn('Failed to collect page metadata', error);
|
|
667
|
+
return {};
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
getBrowser() {
|
|
671
|
+
return this.browser;
|
|
672
|
+
}
|
|
673
|
+
getCollectionStats() {
|
|
674
|
+
return {
|
|
675
|
+
totalCollected: this.collectedUrls.size,
|
|
676
|
+
uniqueUrls: this.collectedUrls.size,
|
|
677
|
+
};
|
|
678
|
+
}
|
|
679
|
+
clearCache() {
|
|
680
|
+
this.collectedUrls.clear();
|
|
681
|
+
logger.info('Collection cache cleared');
|
|
682
|
+
}
|
|
683
|
+
getCollectedFilesSummary() {
|
|
684
|
+
const summaries = Array.from(this.collectedFilesCache.values()).map(file => ({
|
|
685
|
+
url: file.url,
|
|
686
|
+
size: file.size,
|
|
687
|
+
type: file.type,
|
|
688
|
+
truncated: typeof file.metadata?.truncated === 'boolean' ? file.metadata.truncated : undefined,
|
|
689
|
+
originalSize: typeof file.metadata?.originalSize === 'number' ? file.metadata.originalSize : undefined,
|
|
690
|
+
}));
|
|
691
|
+
logger.info(`๐ Returning summary of ${summaries.length} collected files`);
|
|
692
|
+
return summaries;
|
|
693
|
+
}
|
|
694
|
+
getFileByUrl(url) {
|
|
695
|
+
const file = this.collectedFilesCache.get(url);
|
|
696
|
+
if (file) {
|
|
697
|
+
logger.info(`๐ Returning file: ${url} (${(file.size / 1024).toFixed(2)} KB)`);
|
|
698
|
+
return file;
|
|
699
|
+
}
|
|
700
|
+
logger.warn(`โ ๏ธ File not found: ${url}`);
|
|
701
|
+
return null;
|
|
702
|
+
}
|
|
703
|
+
getFilesByPattern(pattern, limit = 20, maxTotalSize = this.MAX_RESPONSE_SIZE) {
|
|
704
|
+
const regex = new RegExp(pattern);
|
|
705
|
+
const matched = [];
|
|
706
|
+
for (const file of this.collectedFilesCache.values()) {
|
|
707
|
+
if (regex.test(file.url)) {
|
|
708
|
+
matched.push(file);
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
const returned = [];
|
|
712
|
+
let totalSize = 0;
|
|
713
|
+
let truncated = false;
|
|
714
|
+
for (let i = 0; i < matched.length && i < limit; i++) {
|
|
715
|
+
const file = matched[i];
|
|
716
|
+
if (file && totalSize + file.size <= maxTotalSize) {
|
|
717
|
+
returned.push(file);
|
|
718
|
+
totalSize += file.size;
|
|
719
|
+
}
|
|
720
|
+
else {
|
|
721
|
+
truncated = true;
|
|
722
|
+
break;
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
if (truncated || matched.length > limit) {
|
|
726
|
+
logger.warn(`โ ๏ธ Pattern "${pattern}" matched ${matched.length} files, returning ${returned.length} (limited by size/count)`);
|
|
727
|
+
}
|
|
728
|
+
logger.info(`๐ Pattern "${pattern}": matched ${matched.length}, returning ${returned.length} files (${(totalSize / 1024).toFixed(2)} KB)`);
|
|
729
|
+
return {
|
|
730
|
+
files: returned,
|
|
731
|
+
totalSize,
|
|
732
|
+
matched: matched.length,
|
|
733
|
+
returned: returned.length,
|
|
734
|
+
truncated,
|
|
735
|
+
};
|
|
736
|
+
}
|
|
737
|
+
getTopPriorityFiles(topN = 10, maxTotalSize = this.MAX_RESPONSE_SIZE) {
|
|
738
|
+
const allFiles = Array.from(this.collectedFilesCache.values());
|
|
739
|
+
const scoredFiles = allFiles.map(file => ({
|
|
740
|
+
file,
|
|
741
|
+
score: this.calculatePriorityScore(file),
|
|
742
|
+
}));
|
|
743
|
+
scoredFiles.sort((a, b) => b.score - a.score);
|
|
744
|
+
const selected = [];
|
|
745
|
+
let totalSize = 0;
|
|
746
|
+
for (let i = 0; i < Math.min(topN, scoredFiles.length); i++) {
|
|
747
|
+
const item = scoredFiles[i];
|
|
748
|
+
if (item && item.file && totalSize + item.file.size <= maxTotalSize) {
|
|
749
|
+
selected.push(item.file);
|
|
750
|
+
totalSize += item.file.size;
|
|
751
|
+
}
|
|
752
|
+
else {
|
|
753
|
+
break;
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
logger.info(`โญ Returning top ${selected.length}/${allFiles.length} priority files (${(totalSize / 1024).toFixed(2)} KB)`);
|
|
757
|
+
return {
|
|
758
|
+
files: selected,
|
|
759
|
+
totalSize,
|
|
760
|
+
totalFiles: allFiles.length,
|
|
761
|
+
};
|
|
762
|
+
}
|
|
763
|
+
calculatePriorityScore(file) {
|
|
764
|
+
let score = 0;
|
|
765
|
+
if (file.type === 'inline')
|
|
766
|
+
score += 10;
|
|
767
|
+
else if (file.type === 'external')
|
|
768
|
+
score += 5;
|
|
769
|
+
if (file.size < 10 * 1024)
|
|
770
|
+
score += 15;
|
|
771
|
+
else if (file.size < 50 * 1024)
|
|
772
|
+
score += 10;
|
|
773
|
+
else if (file.size > 200 * 1024)
|
|
774
|
+
score -= 10;
|
|
775
|
+
const url = file.url.toLowerCase();
|
|
776
|
+
if (url.includes('main') || url.includes('index') || url.includes('app'))
|
|
777
|
+
score += 20;
|
|
778
|
+
if (url.includes('crypto') || url.includes('encrypt') || url.includes('sign'))
|
|
779
|
+
score += 30;
|
|
780
|
+
if (url.includes('api') || url.includes('request') || url.includes('ajax'))
|
|
781
|
+
score += 25;
|
|
782
|
+
if (url.includes('core') || url.includes('common') || url.includes('util'))
|
|
783
|
+
score += 15;
|
|
784
|
+
if (url.includes('vendor') || url.includes('lib') || url.includes('jquery') || url.includes('react'))
|
|
785
|
+
score -= 20;
|
|
786
|
+
if (url.includes('node_modules') || url.includes('bundle'))
|
|
787
|
+
score -= 30;
|
|
788
|
+
return score;
|
|
789
|
+
}
|
|
790
|
+
clearCollectedFilesCache() {
|
|
791
|
+
const count = this.collectedFilesCache.size;
|
|
792
|
+
this.collectedFilesCache.clear();
|
|
793
|
+
logger.info(`๐งน Cleared collected files cache (${count} files)`);
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
//# sourceMappingURL=CodeCollector.js.map
|