@jshookmcp/jshook 0.2.8 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -5
- package/README.zh.md +36 -5
- package/dist/{AntiCheatDetector-S8VRj-dD.mjs → AntiCheatDetector-CqGDXmfc.mjs} +160 -54
- package/dist/{CodeInjector-4Z3ngPoX.mjs → CodeInjector-BdjRfNx7.mjs} +5 -5
- package/dist/ConsoleMonitor-DykL3IAw.mjs +2269 -0
- package/dist/{DarwinAPI-B8hg_yhz.mjs → DarwinAPI-ETyy0xyo.mjs} +1 -1
- package/dist/DetailedDataManager-HT49OrvF.mjs +217 -0
- package/dist/EventBus-DFKvADm3.mjs +141 -0
- package/dist/EvidenceGraphBridge-318Oi0Lf.mjs +153 -0
- package/dist/{ExtensionManager-D5-bO9D8.mjs → ExtensionManager-BDMsY2Dz.mjs} +27 -13
- package/dist/{FingerprintManager-BVxFJL2-.mjs → FingerprintManager-BN4UQWnX.mjs} +1 -1
- package/dist/{HardwareBreakpoint-DK1yjWkV.mjs → HardwareBreakpoint-Cc2AFq1Y.mjs} +3 -3
- package/dist/{HeapAnalyzer-CEbo10xU.mjs → HeapAnalyzer-DruMgsgj.mjs} +21 -21
- package/dist/HookGeneratorBuilders.core.generators.storage-CTbB4Lcx.mjs +566 -0
- package/dist/InstrumentationSession-DLH0vd-z.mjs +244 -0
- package/dist/{MemoryController-DdtnBdD4.mjs → MemoryController-CMtviNW_.mjs} +3 -3
- package/dist/{MemoryScanSession-RMixN3bX.mjs → MemoryScanSession-ITgb_NMi.mjs} +81 -78
- package/dist/{MemoryScanner-QjK4ld0B.mjs → MemoryScanner-CiL7Z3ey.mjs} +50 -21
- package/dist/{NativeMemoryManager.impl-CB6gJ0NM.mjs → NativeMemoryManager.impl-D9Lkovvn.mjs} +20 -56
- package/dist/{NativeMemoryManager.utils-BML4q1ry.mjs → NativeMemoryManager.utils-BBlAixF5.mjs} +1 -1
- package/dist/{PEAnalyzer-CK0xe0Fs.mjs → PEAnalyzer-DMQ44gen.mjs} +16 -16
- package/dist/PageController-BPJNqqBN.mjs +431 -0
- package/dist/{PointerChainEngine-Cd73qu5b.mjs → PointerChainEngine-K7wN8Z-w.mjs} +10 -7
- package/dist/PrerequisiteError-TuyZIs6n.mjs +20 -0
- package/dist/ProcessRegistry-zGg12QbE.mjs +74 -0
- package/dist/ResponseBuilder-CJXWmWNw.mjs +143 -0
- package/dist/ReverseEvidenceGraph-C02-gXOh.mjs +269 -0
- package/dist/ScriptManager-ZuWD-0Jg.mjs +3003 -0
- package/dist/{Speedhack-CeF0XmEz.mjs → Speedhack-D-z0umeT.mjs} +2 -2
- package/dist/{StructureAnalyzer-D4GkMduU.mjs → StructureAnalyzer-Cav5AVSL.mjs} +9 -6
- package/dist/ToolCatalog-5OJdMiF0.mjs +582 -0
- package/dist/ToolError-jh9whhMd.mjs +15 -0
- package/dist/ToolProbe-DbCFGyrg.mjs +45 -0
- package/dist/ToolRegistry-B9krbTtI.mjs +180 -0
- package/dist/ToolRouter.policy-BGDAGyeH.mjs +344 -0
- package/dist/TraceRecorder-B41Z5XBj.mjs +1286 -0
- package/dist/{Win32API-Bc0QnQsN.mjs → Win32API-C2kjj0ze.mjs} +19 -13
- package/dist/{Win32Debug-DUHt9XUn.mjs → Win32Debug-CKrGOTpo.mjs} +3 -3
- package/dist/WorkflowEngine-DJ6M4opp.mjs +569 -0
- package/dist/analysis-BHeJW2Nb.mjs +1234 -0
- package/dist/antidebug-BRKeyt27.mjs +1081 -0
- package/dist/artifactRetention-CPXkUJXp.mjs +598 -0
- package/dist/artifacts-DkfosXH3.mjs +59 -0
- package/dist/authorization-schema-DRqyJMSk.mjs +31 -0
- package/dist/betterSqlite3-DLSBZodi.mjs +74 -0
- package/dist/binary-instrument--V3MAhJ4.mjs +971 -0
- package/dist/bind-helpers-ClV34xdn.mjs +42 -0
- package/dist/boringssl-inspector-Bo_LOLaS.mjs +180 -0
- package/dist/browser-Dx3_S2cG.mjs +4369 -0
- package/dist/capabilities-CcHlvWgK.mjs +33 -0
- package/dist/concurrency-Drev_Vz9.mjs +41 -0
- package/dist/{constants-CCvsN80K.mjs → constants-CDZLOoVv.mjs} +105 -48
- package/dist/coordination-DgItD9DL.mjs +259 -0
- package/dist/debugger-RS3RSAqs.mjs +1288 -0
- package/dist/definitions-BEoYofW5.mjs +47 -0
- package/dist/definitions-BRaefg3u.mjs +365 -0
- package/dist/definitions-BbkvZkiv.mjs +96 -0
- package/dist/definitions-BtWSHJ3o.mjs +17 -0
- package/dist/definitions-C1gCHO0i.mjs +43 -0
- package/dist/definitions-CDOg_b-l.mjs +138 -0
- package/dist/definitions-CVPD9hzZ.mjs +54 -0
- package/dist/definitions-Cea8Lgl7.mjs +94 -0
- package/dist/definitions-DAgIyjxM.mjs +10 -0
- package/dist/definitions-DJA27nsL.mjs +66 -0
- package/dist/definitions-DKPFU3LW.mjs +25 -0
- package/dist/definitions-DPRpZQ96.mjs +47 -0
- package/dist/definitions-DUE5gmdn.mjs +18 -0
- package/dist/definitions-DYVjOtxa.mjs +26 -0
- package/dist/definitions-DcYLVLCo.mjs +37 -0
- package/dist/definitions-Pp5LI2H4.mjs +27 -0
- package/dist/definitions-j9KdHVNR.mjs +14 -0
- package/dist/definitions-uzkjBwa7.mjs +258 -0
- package/dist/definitions-va-AnLuQ.mjs +28 -0
- package/dist/encoding-DJeqHmpd.mjs +1079 -0
- package/dist/evidence-graph-bridge-DcYizFk2.mjs +136 -0
- package/dist/{factory-CibqTNC8.mjs → factory-C90tBff6.mjs} +41 -56
- package/dist/flat-target-session-Dgax2Cy3.mjs +29 -0
- package/dist/graphql-CoHrhweh.mjs +1197 -0
- package/dist/handlers-4jmR0nMs.mjs +898 -0
- package/dist/handlers-BAHPxcch.mjs +789 -0
- package/dist/handlers-BOs9b907.mjs +2600 -0
- package/dist/handlers-BWXEy6ef.mjs +917 -0
- package/dist/handlers-Bndn6QvE.mjs +111 -0
- package/dist/handlers-BqC4bD4s.mjs +681 -0
- package/dist/handlers-BtYq60bM2.mjs +276 -0
- package/dist/handlers-BzgcB4iv.mjs +799 -0
- package/dist/handlers-CRyRWj2b.mjs +859 -0
- package/dist/handlers-CVv2H1uq.mjs +592 -0
- package/dist/handlers-Dl5a7JS4.mjs +572 -0
- package/dist/handlers-Dx2d7jt7.mjs +2537 -0
- package/dist/handlers-Dz9PYsCa.mjs +2805 -0
- package/dist/handlers-HujRKC3b.mjs +661 -0
- package/dist/handlers.impl-XWXkQfyi.mjs +807 -0
- package/dist/hooks-B1B8NRHL.mjs +898 -0
- package/dist/index.mjs +491 -259
- package/dist/{logger-BmWzC2lM.mjs → logger-Dh_xb7_2.mjs} +14 -6
- package/dist/maintenance-PRMkLVRW.mjs +835 -0
- package/dist/manifest-67Bok-Si.mjs +58 -0
- package/dist/manifest-6lNTMZAB2.mjs +87 -0
- package/dist/manifest-B2duEHiH.mjs +90 -0
- package/dist/manifest-B6EY9Vm8.mjs +57 -0
- package/dist/manifest-B6nKSbyY.mjs +95 -0
- package/dist/manifest-BL8AQNPF.mjs +106 -0
- package/dist/manifest-BSZvJJmV.mjs +47 -0
- package/dist/manifest-BU7qzUyX.mjs +418 -0
- package/dist/manifest-Bl62e8WK.mjs +49 -0
- package/dist/manifest-Bo5cXjdt.mjs +82 -0
- package/dist/manifest-BpS4gtUK.mjs +1347 -0
- package/dist/manifest-Bv65_e2W.mjs +101 -0
- package/dist/manifest-BytNIF4Z.mjs +117 -0
- package/dist/manifest-C-xtsjS3.mjs +81 -0
- package/dist/manifest-CDYl7OhA.mjs +66 -0
- package/dist/manifest-CRZ3xmkD.mjs +61 -0
- package/dist/manifest-CoW6u4Tp.mjs +132 -0
- package/dist/manifest-Cq5zN_8A.mjs +50 -0
- package/dist/manifest-D7YZM_2e.mjs +194 -0
- package/dist/manifest-DE_VrAeQ.mjs +314 -0
- package/dist/manifest-DGsXSCpT.mjs +39 -0
- package/dist/manifest-DJ2vfEuW.mjs +156 -0
- package/dist/manifest-DPXDYhEu.mjs +80 -0
- package/dist/manifest-Dd4fQb0a.mjs +322 -0
- package/dist/manifest-Deq6opGg.mjs +223 -0
- package/dist/manifest-DfJTafJK.mjs +37 -0
- package/dist/manifest-DgOdgN_j.mjs +50 -0
- package/dist/manifest-DlbMW4v4.mjs +47 -0
- package/dist/manifest-DmVfbH0w.mjs +374 -0
- package/dist/manifest-Dog6Ddjr.mjs +109 -0
- package/dist/manifest-DvgU5FWb.mjs +58 -0
- package/dist/manifest-HsfDBs7j.mjs +50 -0
- package/dist/manifest-I8oQHvCG.mjs +186 -0
- package/dist/manifest-NvH_a-av.mjs +786 -0
- package/dist/manifest-cEJU1v0Z.mjs +129 -0
- package/dist/manifest-wOl5XLB12.mjs +112 -0
- package/dist/modules-tZozf0LQ.mjs +10635 -0
- package/dist/mojo-ipc-DXNEXEqb.mjs +640 -0
- package/dist/network-CPVvwvFg.mjs +3852 -0
- package/dist/{artifacts-BbdOMET5.mjs → outputPaths-um7lCRY3.mjs} +219 -216
- package/dist/parse-args-B4cY5Vx5.mjs +39 -0
- package/dist/platform-CYeFoTWp.mjs +2161 -0
- package/dist/process-BTbgcVc6.mjs +1306 -0
- package/dist/proxy-r8YN6nP1.mjs +192 -0
- package/dist/registry-Bl8ZQW61.mjs +34 -0
- package/dist/response-CWhh2aLo.mjs +34 -0
- package/dist/server/plugin-api.mjs +2 -2
- package/dist/shared-state-board-BoZnSoj-.mjs +586 -0
- package/dist/sourcemap-BIDHUVXy.mjs +934 -0
- package/dist/ssrf-policy-Dsqd-DTX.mjs +166 -0
- package/dist/streaming-Dal6utPp.mjs +725 -0
- package/dist/tool-builder-BHJp32mV.mjs +186 -0
- package/dist/transform-DRVgGG90.mjs +1011 -0
- package/dist/types-Bx92KJfT.mjs +4 -0
- package/dist/wasm-BYx5UOeG.mjs +1044 -0
- package/dist/webcrack-Be0_FccV.mjs +747 -0
- package/dist/workflow-BpuKEtvn.mjs +725 -0
- package/package.json +82 -49
- package/dist/ExtensionManager-CPTJhHFg.mjs +0 -2
- package/dist/ToolCatalog-Bq4V2sbJ.mjs +0 -67201
- package/dist/{CacheAdapters-CzFNpD9a.mjs → CacheAdapters-jJFy20G-.mjs} +0 -0
- package/dist/{StealthVerifier-BzBCFiwx.mjs → StealthVerifier-BWmPgQsv.mjs} +0 -0
- package/dist/{VersionDetector-CNXcvD46.mjs → VersionDetector-K3V4vGsw.mjs} +0 -0
- package/dist/{formatAddress-ChCSIRWT.mjs → formatAddress-nnMvEohD.mjs} +0 -0
- package/dist/{types-BBjOqye-.mjs → types-DDBWs9UP.mjs} +1 -1
|
@@ -0,0 +1,3003 @@
|
|
|
1
|
+
import { t as logger } from "./logger-Dh_xb7_2.mjs";
|
|
2
|
+
import { B as DOM_QUERY_DEFAULT_LIMIT, V as DOM_WAIT_ELEMENT_TIMEOUT_MS } from "./constants-CDZLOoVv.mjs";
|
|
3
|
+
import { t as getCodeCacheDir } from "./outputPaths-um7lCRY3.mjs";
|
|
4
|
+
import { t as PrerequisiteError } from "./PrerequisiteError-TuyZIs6n.mjs";
|
|
5
|
+
import { n as detachFromFlatTarget, t as attachToFlatTarget } from "./flat-target-session-Dgax2Cy3.mjs";
|
|
6
|
+
import { createRequire } from "node:module";
|
|
7
|
+
import { existsSync } from "fs";
|
|
8
|
+
import path, { join } from "path";
|
|
9
|
+
import crypto, { createHash } from "crypto";
|
|
10
|
+
import { dirname, join as join$1 } from "node:path";
|
|
11
|
+
import { promisify } from "util";
|
|
12
|
+
import { connect, executablePath, launch } from "rebrowser-puppeteer-core";
|
|
13
|
+
import fs, { readFile } from "fs/promises";
|
|
14
|
+
import { gunzip, gzip } from "zlib";
|
|
15
|
+
import { homedir } from "os";
|
|
16
|
+
import { setImmediate } from "node:timers/promises";
|
|
17
|
+
//#region src/modules/collector/CodeCache.ts
|
|
18
|
+
var CodeCache = class CodeCache {
|
|
19
|
+
cacheDir;
|
|
20
|
+
maxAge;
|
|
21
|
+
maxSize;
|
|
22
|
+
memoryCache = /* @__PURE__ */ new Map();
|
|
23
|
+
MAX_MEMORY_CACHE_SIZE = 100;
|
|
24
|
+
writesSinceCleanup = 0;
|
|
25
|
+
static CLEANUP_INTERVAL = 20;
|
|
26
|
+
constructor(options = {}) {
|
|
27
|
+
this.cacheDir = options.cacheDir || getCodeCacheDir();
|
|
28
|
+
this.maxAge = options.maxAge || 1440 * 60 * 1e3;
|
|
29
|
+
this.maxSize = options.maxSize || 100 * 1024 * 1024;
|
|
30
|
+
}
|
|
31
|
+
async init() {
|
|
32
|
+
try {
|
|
33
|
+
await fs.mkdir(this.cacheDir, { recursive: true });
|
|
34
|
+
logger.debug(`Cache directory initialized: ${this.cacheDir}`);
|
|
35
|
+
} catch (error) {
|
|
36
|
+
logger.error("Failed to initialize cache directory:", error);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
generateKey(url, options) {
|
|
40
|
+
const data = JSON.stringify({
|
|
41
|
+
url,
|
|
42
|
+
options
|
|
43
|
+
});
|
|
44
|
+
return crypto.createHash("md5").update(data).digest("hex");
|
|
45
|
+
}
|
|
46
|
+
getCachePath(key) {
|
|
47
|
+
return path.join(this.cacheDir, `${key}.json`);
|
|
48
|
+
}
|
|
49
|
+
getDependenciesOrEmpty(dependencies) {
|
|
50
|
+
return dependencies ?? {
|
|
51
|
+
nodes: [],
|
|
52
|
+
edges: []
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
isExpired(entry) {
|
|
56
|
+
return Date.now() - entry.timestamp > this.maxAge;
|
|
57
|
+
}
|
|
58
|
+
async get(url, options) {
|
|
59
|
+
const key = this.generateKey(url, options);
|
|
60
|
+
if (this.memoryCache.has(key)) {
|
|
61
|
+
const entry = this.memoryCache.get(key);
|
|
62
|
+
if (!this.isExpired(entry)) {
|
|
63
|
+
logger.debug(`Cache hit (memory): ${url}`);
|
|
64
|
+
return {
|
|
65
|
+
files: entry.files,
|
|
66
|
+
dependencies: this.getDependenciesOrEmpty(entry.dependencies),
|
|
67
|
+
totalSize: entry.totalSize,
|
|
68
|
+
collectTime: entry.collectTime,
|
|
69
|
+
summaries: entry.summaries
|
|
70
|
+
};
|
|
71
|
+
} else this.memoryCache.delete(key);
|
|
72
|
+
}
|
|
73
|
+
try {
|
|
74
|
+
const cachePath = this.getCachePath(key);
|
|
75
|
+
const data = await fs.readFile(cachePath, "utf-8");
|
|
76
|
+
const entry = JSON.parse(data);
|
|
77
|
+
if (this.isExpired(entry)) {
|
|
78
|
+
logger.debug(`Cache expired: ${url}`);
|
|
79
|
+
await fs.unlink(cachePath);
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
this.memoryCache.set(key, entry);
|
|
83
|
+
logger.debug(`Cache hit (disk): ${url}`);
|
|
84
|
+
return {
|
|
85
|
+
files: entry.files,
|
|
86
|
+
dependencies: this.getDependenciesOrEmpty(entry.dependencies),
|
|
87
|
+
totalSize: entry.totalSize,
|
|
88
|
+
collectTime: entry.collectTime,
|
|
89
|
+
summaries: entry.summaries
|
|
90
|
+
};
|
|
91
|
+
} catch (err) {
|
|
92
|
+
logger.warn(`Cache read failed for ${url}: ${err instanceof Error ? err.message : String(err)}`);
|
|
93
|
+
return null;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
async set(url, result, options) {
|
|
97
|
+
const key = this.generateKey(url, options);
|
|
98
|
+
const hash = crypto.createHash("md5").update(JSON.stringify(result.files)).digest("hex");
|
|
99
|
+
const entry = {
|
|
100
|
+
url,
|
|
101
|
+
files: result.files,
|
|
102
|
+
dependencies: result.dependencies,
|
|
103
|
+
totalSize: result.totalSize,
|
|
104
|
+
collectTime: result.collectTime,
|
|
105
|
+
summaries: result.summaries,
|
|
106
|
+
timestamp: Date.now(),
|
|
107
|
+
hash
|
|
108
|
+
};
|
|
109
|
+
this.memoryCache.set(key, entry);
|
|
110
|
+
if (this.memoryCache.size > this.MAX_MEMORY_CACHE_SIZE) {
|
|
111
|
+
const firstKey = this.memoryCache.keys().next().value;
|
|
112
|
+
if (firstKey) {
|
|
113
|
+
this.memoryCache.delete(firstKey);
|
|
114
|
+
logger.debug(`Memory cache evicted: ${firstKey}`);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
try {
|
|
118
|
+
const cachePath = this.getCachePath(key);
|
|
119
|
+
await fs.mkdir(this.cacheDir, { recursive: true });
|
|
120
|
+
await fs.writeFile(cachePath, JSON.stringify(entry, null, 2), "utf-8");
|
|
121
|
+
logger.debug(`Cache saved: ${url} (${(result.totalSize / 1024).toFixed(2)} KB)`);
|
|
122
|
+
} catch (error) {
|
|
123
|
+
logger.error("Failed to save cache:", error);
|
|
124
|
+
}
|
|
125
|
+
if (++this.writesSinceCleanup >= CodeCache.CLEANUP_INTERVAL) {
|
|
126
|
+
this.writesSinceCleanup = 0;
|
|
127
|
+
this.cleanup();
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
async cleanup() {
|
|
131
|
+
try {
|
|
132
|
+
const files = await fs.readdir(this.cacheDir);
|
|
133
|
+
let totalSize = 0;
|
|
134
|
+
const entries = [];
|
|
135
|
+
for (const file of files) {
|
|
136
|
+
if (!file.endsWith(".json")) continue;
|
|
137
|
+
const filePath = path.join(this.cacheDir, file);
|
|
138
|
+
const stats = await fs.stat(filePath);
|
|
139
|
+
totalSize += stats.size;
|
|
140
|
+
entries.push({
|
|
141
|
+
file: filePath,
|
|
142
|
+
mtime: stats.mtime,
|
|
143
|
+
size: stats.size
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
if (totalSize > this.maxSize) {
|
|
147
|
+
entries.sort((a, b) => a.mtime.getTime() - b.mtime.getTime());
|
|
148
|
+
let removedSize = 0;
|
|
149
|
+
for (const entry of entries) {
|
|
150
|
+
if (totalSize - removedSize <= this.maxSize * .8) break;
|
|
151
|
+
await fs.unlink(entry.file);
|
|
152
|
+
removedSize += entry.size;
|
|
153
|
+
logger.debug(`Removed old cache: ${entry.file}`);
|
|
154
|
+
}
|
|
155
|
+
logger.info(`Cache cleanup: removed ${removedSize} bytes`);
|
|
156
|
+
}
|
|
157
|
+
} catch (error) {
|
|
158
|
+
logger.error("Failed to cleanup cache:", error);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
async clear() {
|
|
162
|
+
try {
|
|
163
|
+
this.memoryCache.clear();
|
|
164
|
+
const files = await fs.readdir(this.cacheDir);
|
|
165
|
+
for (const file of files) if (file.endsWith(".json")) await fs.unlink(path.join(this.cacheDir, file));
|
|
166
|
+
logger.info("All cache cleared");
|
|
167
|
+
} catch (error) {
|
|
168
|
+
if (error.code === "ENOENT") return;
|
|
169
|
+
logger.error("Failed to clear cache:", error);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
async getStats() {
|
|
173
|
+
try {
|
|
174
|
+
const files = await fs.readdir(this.cacheDir);
|
|
175
|
+
let totalSize = 0;
|
|
176
|
+
let diskEntries = 0;
|
|
177
|
+
for (const file of files) {
|
|
178
|
+
if (!file.endsWith(".json")) continue;
|
|
179
|
+
const filePath = path.join(this.cacheDir, file);
|
|
180
|
+
const stats = await fs.stat(filePath);
|
|
181
|
+
totalSize += stats.size;
|
|
182
|
+
diskEntries++;
|
|
183
|
+
}
|
|
184
|
+
return {
|
|
185
|
+
memoryEntries: this.memoryCache.size,
|
|
186
|
+
diskEntries,
|
|
187
|
+
totalSize
|
|
188
|
+
};
|
|
189
|
+
} catch (error) {
|
|
190
|
+
logger.error("Failed to get cache stats:", error);
|
|
191
|
+
return {
|
|
192
|
+
memoryEntries: this.memoryCache.size,
|
|
193
|
+
diskEntries: 0,
|
|
194
|
+
totalSize: 0
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
async warmup(urls) {
|
|
199
|
+
logger.info(`Warming up cache for ${urls.length} URLs...`);
|
|
200
|
+
for (const url of urls) await this.get(url);
|
|
201
|
+
logger.info("Cache warmup completed");
|
|
202
|
+
}
|
|
203
|
+
};
|
|
204
|
+
//#endregion
|
|
205
|
+
//#region src/modules/collector/SmartCodeCollector.ts
|
|
206
|
+
var SmartCodeCollector = class {
|
|
207
|
+
DEFAULT_MAX_TOTAL_SIZE = 512 * 1024;
|
|
208
|
+
DEFAULT_MAX_FILE_SIZE = 100 * 1024;
|
|
209
|
+
PREVIEW_LINES = 50;
|
|
210
|
+
async smartCollect(_page, files, options) {
|
|
211
|
+
logger.info(`Smart code collection mode: ${options.mode}`);
|
|
212
|
+
switch (options.mode) {
|
|
213
|
+
case "summary": return this.collectSummaries(files);
|
|
214
|
+
case "priority": return this.collectByPriority(files, options);
|
|
215
|
+
case "incremental": return this.collectIncremental(files, options);
|
|
216
|
+
default: return this.collectWithLimit(files, options);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
async collectSummaries(files) {
|
|
220
|
+
logger.info("Generating code summaries...");
|
|
221
|
+
return files.map((file) => {
|
|
222
|
+
const preview = file.content.split("\n").slice(0, this.PREVIEW_LINES).join("\n");
|
|
223
|
+
return {
|
|
224
|
+
url: file.url,
|
|
225
|
+
size: file.size,
|
|
226
|
+
type: file.type,
|
|
227
|
+
hasEncryption: this.detectEncryption(file.content),
|
|
228
|
+
hasAPI: this.detectAPI(file.content),
|
|
229
|
+
hasObfuscation: this.detectObfuscation(file.content),
|
|
230
|
+
functions: this.extractFunctions(file.content),
|
|
231
|
+
imports: this.extractImports(file.content),
|
|
232
|
+
preview
|
|
233
|
+
};
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
collectByPriority(files, options) {
|
|
237
|
+
const maxTotalSize = options.maxTotalSize || this.DEFAULT_MAX_TOTAL_SIZE;
|
|
238
|
+
const maxFileSize = options.maxFileSize || this.DEFAULT_MAX_FILE_SIZE;
|
|
239
|
+
const scoredFiles = files.map((file) => ({
|
|
240
|
+
file,
|
|
241
|
+
score: this.calculatePriority(file, options.priorities || [])
|
|
242
|
+
}));
|
|
243
|
+
scoredFiles.sort((a, b) => b.score - a.score);
|
|
244
|
+
const result = [];
|
|
245
|
+
let currentSize = 0;
|
|
246
|
+
for (const { file } of scoredFiles) {
|
|
247
|
+
let content = file.content;
|
|
248
|
+
let truncated = false;
|
|
249
|
+
if (file.size > maxFileSize) {
|
|
250
|
+
content = content.substring(0, maxFileSize);
|
|
251
|
+
truncated = true;
|
|
252
|
+
}
|
|
253
|
+
if (currentSize + content.length > maxTotalSize) {
|
|
254
|
+
logger.warn(`Reached max total size limit (${maxTotalSize} bytes), stopping collection`);
|
|
255
|
+
break;
|
|
256
|
+
}
|
|
257
|
+
result.push({
|
|
258
|
+
...file,
|
|
259
|
+
content,
|
|
260
|
+
size: content.length,
|
|
261
|
+
metadata: {
|
|
262
|
+
...file.metadata,
|
|
263
|
+
truncated,
|
|
264
|
+
originalSize: file.size,
|
|
265
|
+
priorityScore: this.calculatePriority(file, options.priorities || [])
|
|
266
|
+
}
|
|
267
|
+
});
|
|
268
|
+
currentSize += content.length;
|
|
269
|
+
}
|
|
270
|
+
logger.info(`Collected ${result.length}/${files.length} files by priority (${(currentSize / 1024).toFixed(2)} KB)`);
|
|
271
|
+
return result;
|
|
272
|
+
}
|
|
273
|
+
collectIncremental(files, options) {
|
|
274
|
+
const includePatterns = options.includePatterns || [];
|
|
275
|
+
const excludePatterns = options.excludePatterns || [];
|
|
276
|
+
const filtered = files.filter((file) => {
|
|
277
|
+
if (excludePatterns.some((pattern) => new RegExp(pattern).test(file.url))) return false;
|
|
278
|
+
if (includePatterns.length === 0) return true;
|
|
279
|
+
return includePatterns.some((pattern) => new RegExp(pattern).test(file.url));
|
|
280
|
+
});
|
|
281
|
+
logger.info(`Incremental collection: ${filtered.length}/${files.length} files matched`);
|
|
282
|
+
return this.collectWithLimit(filtered, options);
|
|
283
|
+
}
|
|
284
|
+
collectWithLimit(files, options) {
|
|
285
|
+
const maxTotalSize = options.maxTotalSize || this.DEFAULT_MAX_TOTAL_SIZE;
|
|
286
|
+
const maxFileSize = options.maxFileSize || this.DEFAULT_MAX_FILE_SIZE;
|
|
287
|
+
const result = [];
|
|
288
|
+
let currentSize = 0;
|
|
289
|
+
for (const file of files) {
|
|
290
|
+
let content = file.content;
|
|
291
|
+
let truncated = false;
|
|
292
|
+
if (file.size > maxFileSize) {
|
|
293
|
+
content = content.substring(0, maxFileSize);
|
|
294
|
+
truncated = true;
|
|
295
|
+
}
|
|
296
|
+
if (currentSize + content.length > maxTotalSize) {
|
|
297
|
+
logger.warn(`Reached max total size limit, collected ${result.length}/${files.length} files`);
|
|
298
|
+
break;
|
|
299
|
+
}
|
|
300
|
+
result.push({
|
|
301
|
+
...file,
|
|
302
|
+
content,
|
|
303
|
+
size: content.length,
|
|
304
|
+
metadata: {
|
|
305
|
+
...file.metadata,
|
|
306
|
+
truncated,
|
|
307
|
+
originalSize: file.size
|
|
308
|
+
}
|
|
309
|
+
});
|
|
310
|
+
currentSize += content.length;
|
|
311
|
+
}
|
|
312
|
+
return result;
|
|
313
|
+
}
|
|
314
|
+
calculatePriority(file, priorities) {
|
|
315
|
+
let score = 0;
|
|
316
|
+
if (file.type === "inline") score += 10;
|
|
317
|
+
if (file.type === "external") score += 5;
|
|
318
|
+
for (let i = 0; i < priorities.length; i++) {
|
|
319
|
+
const pattern = priorities[i];
|
|
320
|
+
if (pattern && new RegExp(pattern).test(file.url)) score += (priorities.length - i) * 20;
|
|
321
|
+
}
|
|
322
|
+
if (this.detectEncryption(file.content)) score += 50;
|
|
323
|
+
if (this.detectAPI(file.content)) score += 30;
|
|
324
|
+
if (this.detectObfuscation(file.content)) score += 20;
|
|
325
|
+
if (file.size < 10 * 1024) score += 10;
|
|
326
|
+
else if (file.size > 500 * 1024) score -= 20;
|
|
327
|
+
return score;
|
|
328
|
+
}
|
|
329
|
+
detectEncryption(content) {
|
|
330
|
+
return [
|
|
331
|
+
/crypto|encrypt|decrypt|cipher|aes|rsa|md5|sha/i,
|
|
332
|
+
/CryptoJS|forge|sjcl/i,
|
|
333
|
+
/btoa|atob/i
|
|
334
|
+
].some((pattern) => pattern.test(content));
|
|
335
|
+
}
|
|
336
|
+
detectAPI(content) {
|
|
337
|
+
return [
|
|
338
|
+
/fetch\s*\(/,
|
|
339
|
+
/XMLHttpRequest/,
|
|
340
|
+
/axios|request|ajax/i,
|
|
341
|
+
/\.get\(|\.post\(/
|
|
342
|
+
].some((pattern) => pattern.test(content));
|
|
343
|
+
}
|
|
344
|
+
detectObfuscation(content) {
|
|
345
|
+
const lines = content.split("\n");
|
|
346
|
+
if (content.length / lines.length > 200) return true;
|
|
347
|
+
if (/\\x[0-9a-f]{2}/i.test(content)) return true;
|
|
348
|
+
if (/\\u[0-9a-f]{4}/i.test(content)) return true;
|
|
349
|
+
if (/eval\s*\(/i.test(content)) return true;
|
|
350
|
+
return false;
|
|
351
|
+
}
|
|
352
|
+
extractFunctions(content) {
|
|
353
|
+
const functions = [];
|
|
354
|
+
for (const pattern of [
|
|
355
|
+
/function\s+([a-zA-Z_$][a-zA-Z0-9_$]*)/g,
|
|
356
|
+
/const\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*function/g,
|
|
357
|
+
/([a-zA-Z_$][a-zA-Z0-9_$]*)\s*:\s*function/g
|
|
358
|
+
]) {
|
|
359
|
+
let match;
|
|
360
|
+
while ((match = pattern.exec(content)) !== null) if (match[1] && !functions.includes(match[1])) functions.push(match[1]);
|
|
361
|
+
}
|
|
362
|
+
return functions.slice(0, 20);
|
|
363
|
+
}
|
|
364
|
+
extractImports(content) {
|
|
365
|
+
const imports = [];
|
|
366
|
+
for (const pattern of [/import\s+.*?from\s+['"]([^'"]+)['"]/g, /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g]) {
|
|
367
|
+
let match;
|
|
368
|
+
while ((match = pattern.exec(content)) !== null) if (match[1] && !imports.includes(match[1])) imports.push(match[1]);
|
|
369
|
+
}
|
|
370
|
+
return imports;
|
|
371
|
+
}
|
|
372
|
+
};
|
|
373
|
+
//#endregion
|
|
374
|
+
//#region src/modules/collector/CodeCompressor.ts
|
|
375
|
+
const gzipAsync = promisify(gzip);
|
|
376
|
+
const gunzipAsync = promisify(gunzip);
|
|
377
|
+
var CodeCompressor = class {
|
|
378
|
+
DEFAULT_LEVEL = 6;
|
|
379
|
+
DEFAULT_CHUNK_SIZE = 100 * 1024;
|
|
380
|
+
DEFAULT_CONCURRENCY = 5;
|
|
381
|
+
DEFAULT_MAX_RETRIES = 3;
|
|
382
|
+
CACHE_MAX_SIZE = 100;
|
|
383
|
+
CACHE_TTL = 3600 * 1e3;
|
|
384
|
+
cache = /* @__PURE__ */ new Map();
|
|
385
|
+
stats = {
|
|
386
|
+
totalCompressed: 0,
|
|
387
|
+
totalOriginalSize: 0,
|
|
388
|
+
totalCompressedSize: 0,
|
|
389
|
+
averageRatio: 0,
|
|
390
|
+
cacheHits: 0,
|
|
391
|
+
cacheMisses: 0,
|
|
392
|
+
totalTime: 0
|
|
393
|
+
};
|
|
394
|
+
async compress(code, options = {}) {
|
|
395
|
+
const startTime = Date.now();
|
|
396
|
+
const level = options.level ?? this.DEFAULT_LEVEL;
|
|
397
|
+
const useCache = options.useCache ?? true;
|
|
398
|
+
const maxRetries = options.maxRetries ?? this.DEFAULT_MAX_RETRIES;
|
|
399
|
+
const cacheKey = this.generateCacheKey(code, level);
|
|
400
|
+
if (useCache && this.cache.has(cacheKey)) {
|
|
401
|
+
const cached = this.cache.get(cacheKey);
|
|
402
|
+
if (Date.now() - cached.timestamp < this.CACHE_TTL) {
|
|
403
|
+
this.stats.cacheHits++;
|
|
404
|
+
logger.debug(`Cache hit for compression (${code.length} bytes)`);
|
|
405
|
+
return {
|
|
406
|
+
compressed: cached.compressed,
|
|
407
|
+
originalSize: cached.originalSize,
|
|
408
|
+
compressedSize: cached.compressedSize,
|
|
409
|
+
compressionRatio: cached.compressionRatio,
|
|
410
|
+
level
|
|
411
|
+
};
|
|
412
|
+
} else this.cache.delete(cacheKey);
|
|
413
|
+
}
|
|
414
|
+
this.stats.cacheMisses++;
|
|
415
|
+
let lastError = null;
|
|
416
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) try {
|
|
417
|
+
const buffer = Buffer.from(code, "utf-8");
|
|
418
|
+
const compressed = await gzipAsync(buffer, { level });
|
|
419
|
+
const base64 = compressed.toString("base64");
|
|
420
|
+
const originalSize = buffer.length;
|
|
421
|
+
const compressedSize = compressed.length;
|
|
422
|
+
const compressionRatio = (1 - compressedSize / originalSize) * 100;
|
|
423
|
+
const compressionTime = Date.now() - startTime;
|
|
424
|
+
this.stats.totalCompressed++;
|
|
425
|
+
this.stats.totalOriginalSize += originalSize;
|
|
426
|
+
this.stats.totalCompressedSize += compressedSize;
|
|
427
|
+
this.stats.averageRatio = (1 - this.stats.totalCompressedSize / this.stats.totalOriginalSize) * 100;
|
|
428
|
+
this.stats.totalTime += compressionTime;
|
|
429
|
+
const result = {
|
|
430
|
+
compressed: base64,
|
|
431
|
+
originalSize,
|
|
432
|
+
compressedSize,
|
|
433
|
+
compressionRatio,
|
|
434
|
+
level,
|
|
435
|
+
metadata: {
|
|
436
|
+
hash: cacheKey,
|
|
437
|
+
timestamp: Date.now(),
|
|
438
|
+
compressionTime
|
|
439
|
+
}
|
|
440
|
+
};
|
|
441
|
+
if (useCache) this.addToCache(cacheKey, {
|
|
442
|
+
compressed: base64,
|
|
443
|
+
originalSize,
|
|
444
|
+
compressedSize,
|
|
445
|
+
compressionRatio,
|
|
446
|
+
timestamp: Date.now()
|
|
447
|
+
});
|
|
448
|
+
logger.debug(`Compressed code: ${originalSize} -> ${compressedSize} bytes (${compressionRatio.toFixed(1)}% reduction, level ${level}, ${compressionTime}ms)`);
|
|
449
|
+
return result;
|
|
450
|
+
} catch (error) {
|
|
451
|
+
lastError = error;
|
|
452
|
+
logger.warn(`Compression attempt ${attempt + 1}/${maxRetries} failed:`, error);
|
|
453
|
+
if (attempt < maxRetries - 1) await new Promise((resolve) => setTimeout(resolve, 100 * (attempt + 1)));
|
|
454
|
+
}
|
|
455
|
+
logger.error("Failed to compress code after retries:", lastError);
|
|
456
|
+
throw lastError || /* @__PURE__ */ new Error("Compression failed");
|
|
457
|
+
}
|
|
458
|
+
async decompress(compressed, maxRetries = 3) {
|
|
459
|
+
let lastError = null;
|
|
460
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) try {
|
|
461
|
+
return (await gunzipAsync(Buffer.from(compressed, "base64"))).toString("utf-8");
|
|
462
|
+
} catch (error) {
|
|
463
|
+
lastError = error;
|
|
464
|
+
logger.warn(`Decompression attempt ${attempt + 1}/${maxRetries} failed:`, error);
|
|
465
|
+
if (attempt < maxRetries - 1) await new Promise((resolve) => setTimeout(resolve, 100 * (attempt + 1)));
|
|
466
|
+
}
|
|
467
|
+
logger.error("Failed to decompress code after retries:", lastError);
|
|
468
|
+
throw lastError || /* @__PURE__ */ new Error("Decompression failed");
|
|
469
|
+
}
|
|
470
|
+
async compressBatch(files, options = {}) {
|
|
471
|
+
const concurrency = options.concurrency ?? this.DEFAULT_CONCURRENCY;
|
|
472
|
+
const results = [];
|
|
473
|
+
for (let i = 0; i < files.length; i += concurrency) {
|
|
474
|
+
const batch = files.slice(i, i + concurrency);
|
|
475
|
+
const batchResults = await Promise.all(batch.map(async (file) => {
|
|
476
|
+
try {
|
|
477
|
+
const result = await this.compress(file.content, options);
|
|
478
|
+
if (options.onFileProgress) options.onFileProgress(file.url, 100);
|
|
479
|
+
return {
|
|
480
|
+
url: file.url,
|
|
481
|
+
compressed: result.compressed,
|
|
482
|
+
originalSize: result.originalSize,
|
|
483
|
+
compressedSize: result.compressedSize,
|
|
484
|
+
compressionRatio: result.compressionRatio
|
|
485
|
+
};
|
|
486
|
+
} catch (error) {
|
|
487
|
+
logger.error(`Failed to compress ${file.url}:`, error);
|
|
488
|
+
return {
|
|
489
|
+
url: file.url,
|
|
490
|
+
compressed: Buffer.from(file.content).toString("base64"),
|
|
491
|
+
originalSize: file.content.length,
|
|
492
|
+
compressedSize: file.content.length,
|
|
493
|
+
compressionRatio: 0
|
|
494
|
+
};
|
|
495
|
+
}
|
|
496
|
+
}));
|
|
497
|
+
results.push(...batchResults);
|
|
498
|
+
if (options.onProgress) options.onProgress(results.length / files.length * 100);
|
|
499
|
+
}
|
|
500
|
+
const totalOriginal = results.reduce((sum, r) => sum + r.originalSize, 0);
|
|
501
|
+
const totalCompressed = results.reduce((sum, r) => sum + r.compressedSize, 0);
|
|
502
|
+
const totalRatio = totalOriginal > 0 ? (1 - totalCompressed / totalOriginal) * 100 : 0;
|
|
503
|
+
logger.info(`Batch compression: ${results.length} files, ${(totalOriginal / 1024).toFixed(2)} KB -> ${(totalCompressed / 1024).toFixed(2)} KB (${totalRatio.toFixed(1)}% reduction)`);
|
|
504
|
+
return results;
|
|
505
|
+
}
|
|
506
|
+
shouldCompress(code, threshold = 1024) {
|
|
507
|
+
return code.length > threshold;
|
|
508
|
+
}
|
|
509
|
+
selectCompressionLevel(size) {
|
|
510
|
+
if (size < 10 * 1024) return 1;
|
|
511
|
+
else if (size < 100 * 1024) return 6;
|
|
512
|
+
else if (size < 1024 * 1024) return 9;
|
|
513
|
+
else return 6;
|
|
514
|
+
}
|
|
515
|
+
async compressStream(code, options = {}) {
|
|
516
|
+
const chunkSize = options.chunkSize ?? this.DEFAULT_CHUNK_SIZE;
|
|
517
|
+
if (code.length <= chunkSize) return this.compress(code, options);
|
|
518
|
+
const startTime = Date.now();
|
|
519
|
+
const chunks = [];
|
|
520
|
+
for (let i = 0; i < code.length; i += chunkSize) {
|
|
521
|
+
const chunk = code.substring(i, i + chunkSize);
|
|
522
|
+
const compressed = await this.compress(chunk, {
|
|
523
|
+
...options,
|
|
524
|
+
useCache: false
|
|
525
|
+
});
|
|
526
|
+
chunks.push(compressed.compressed);
|
|
527
|
+
if (options.onProgress) options.onProgress(i / code.length * 100);
|
|
528
|
+
}
|
|
529
|
+
const combined = JSON.stringify(chunks);
|
|
530
|
+
const finalCompressed = Buffer.from(combined).toString("base64");
|
|
531
|
+
const originalSize = code.length;
|
|
532
|
+
const compressedSize = finalCompressed.length;
|
|
533
|
+
const compressionRatio = (1 - compressedSize / originalSize) * 100;
|
|
534
|
+
const compressionTime = Date.now() - startTime;
|
|
535
|
+
logger.info(`Stream compression: ${chunks.length} chunks, ${(originalSize / 1024).toFixed(2)} KB -> ${(compressedSize / 1024).toFixed(2)} KB (${compressionRatio.toFixed(1)}% reduction, ${compressionTime}ms)`);
|
|
536
|
+
return {
|
|
537
|
+
compressed: finalCompressed,
|
|
538
|
+
originalSize,
|
|
539
|
+
compressedSize,
|
|
540
|
+
compressionRatio,
|
|
541
|
+
chunks: chunks.length,
|
|
542
|
+
metadata: {
|
|
543
|
+
hash: this.generateCacheKey(code, options.level ?? this.DEFAULT_LEVEL),
|
|
544
|
+
timestamp: Date.now(),
|
|
545
|
+
compressionTime
|
|
546
|
+
}
|
|
547
|
+
};
|
|
548
|
+
}
|
|
549
|
+
getStats() {
|
|
550
|
+
return { ...this.stats };
|
|
551
|
+
}
|
|
552
|
+
resetStats() {
|
|
553
|
+
this.stats = {
|
|
554
|
+
totalCompressed: 0,
|
|
555
|
+
totalOriginalSize: 0,
|
|
556
|
+
totalCompressedSize: 0,
|
|
557
|
+
averageRatio: 0,
|
|
558
|
+
cacheHits: 0,
|
|
559
|
+
cacheMisses: 0,
|
|
560
|
+
totalTime: 0
|
|
561
|
+
};
|
|
562
|
+
}
|
|
563
|
+
clearCache() {
|
|
564
|
+
this.cache.clear();
|
|
565
|
+
logger.info("Compression cache cleared");
|
|
566
|
+
}
|
|
567
|
+
getCacheSize() {
|
|
568
|
+
return this.cache.size;
|
|
569
|
+
}
|
|
570
|
+
generateCacheKey(code, level) {
|
|
571
|
+
return `${createHash("md5").update(code).digest("hex")}-${level}`;
|
|
572
|
+
}
|
|
573
|
+
addToCache(key, entry) {
|
|
574
|
+
if (this.cache.size >= this.CACHE_MAX_SIZE) {
|
|
575
|
+
const firstKey = this.cache.keys().next().value;
|
|
576
|
+
if (firstKey) this.cache.delete(firstKey);
|
|
577
|
+
}
|
|
578
|
+
this.cache.set(key, entry);
|
|
579
|
+
}
|
|
580
|
+
};
|
|
581
|
+
//#endregion
|
|
582
|
+
//#region src/modules/browser/BrowserTargetSessionManager.ts
|
|
583
|
+
var BrowserTargetSessionManager = class {
|
|
584
|
+
browserSession = null;
|
|
585
|
+
attachedTargetSession = null;
|
|
586
|
+
attachedTargetInfo = null;
|
|
587
|
+
autoAttachEnabled = false;
|
|
588
|
+
constructor(getBrowser) {
|
|
589
|
+
this.getBrowser = getBrowser;
|
|
590
|
+
}
|
|
591
|
+
async listTargets(filters = {}) {
|
|
592
|
+
const session = await this.ensureBrowserSession();
|
|
593
|
+
if (filters.discoverOOPIF !== false && !this.autoAttachEnabled) try {
|
|
594
|
+
await session.send("Target.setAutoAttach", {
|
|
595
|
+
autoAttach: true,
|
|
596
|
+
waitForDebuggerOnStart: false,
|
|
597
|
+
flatten: true
|
|
598
|
+
});
|
|
599
|
+
await session.send("Target.setDiscoverTargets", { discover: true });
|
|
600
|
+
this.autoAttachEnabled = true;
|
|
601
|
+
} catch {}
|
|
602
|
+
const response = await session.send("Target.getTargets");
|
|
603
|
+
return (Array.isArray(response.targetInfos) ? response.targetInfos.map((target) => this.normalizeTargetInfo(target)).filter((target) => target !== null) : []).filter((target) => this.matchesFilters(target, filters));
|
|
604
|
+
}
|
|
605
|
+
async attach(targetId) {
|
|
606
|
+
const current = this.attachedTargetInfo;
|
|
607
|
+
if (current?.targetId === targetId && this.attachedTargetSession) return current;
|
|
608
|
+
const target = (await this.listTargets()).find((entry) => entry.targetId === targetId);
|
|
609
|
+
if (!target) throw new Error(`CDP target not found: ${targetId}`);
|
|
610
|
+
await this.detach();
|
|
611
|
+
const session = await this.ensureBrowserSession();
|
|
612
|
+
this.attachedTargetSession = await attachToFlatTarget(session, targetId);
|
|
613
|
+
this.attachedTargetInfo = target;
|
|
614
|
+
return target;
|
|
615
|
+
}
|
|
616
|
+
async detach() {
|
|
617
|
+
if (!this.attachedTargetSession) {
|
|
618
|
+
this.attachedTargetInfo = null;
|
|
619
|
+
return false;
|
|
620
|
+
}
|
|
621
|
+
if (!this.browserSession) throw new Error("Browser CDP session unavailable for target detach");
|
|
622
|
+
const session = this.attachedTargetSession;
|
|
623
|
+
await detachFromFlatTarget(this.browserSession, session);
|
|
624
|
+
this.attachedTargetSession = null;
|
|
625
|
+
this.attachedTargetInfo = null;
|
|
626
|
+
return true;
|
|
627
|
+
}
|
|
628
|
+
getAttachedTargetSession() {
|
|
629
|
+
return this.attachedTargetSession;
|
|
630
|
+
}
|
|
631
|
+
getAttachedTargetInfo() {
|
|
632
|
+
return this.attachedTargetInfo;
|
|
633
|
+
}
|
|
634
|
+
async evaluate(expression, options = {}) {
|
|
635
|
+
const response = await this.requireAttachedTargetSession().send("Runtime.evaluate", {
|
|
636
|
+
expression,
|
|
637
|
+
returnByValue: options.returnByValue ?? true,
|
|
638
|
+
awaitPromise: options.awaitPromise ?? true
|
|
639
|
+
});
|
|
640
|
+
if (response.exceptionDetails) {
|
|
641
|
+
const details = response.exceptionDetails;
|
|
642
|
+
throw new Error(details.exception?.description || details.text || "Runtime.evaluate failed in attached target");
|
|
643
|
+
}
|
|
644
|
+
return options.returnByValue === false ? response.result ?? null : response.result?.value ?? null;
|
|
645
|
+
}
|
|
646
|
+
async addScriptToEvaluateOnNewDocument(source) {
|
|
647
|
+
return await this.requireAttachedTargetSession().send("Page.addScriptToEvaluateOnNewDocument", { source });
|
|
648
|
+
}
|
|
649
|
+
async dispose() {
|
|
650
|
+
await this.detach();
|
|
651
|
+
this.autoAttachEnabled = false;
|
|
652
|
+
if (this.browserSession) try {
|
|
653
|
+
await this.browserSession.detach();
|
|
654
|
+
} catch {} finally {
|
|
655
|
+
this.browserSession = null;
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
requireAttachedTargetSession() {
|
|
659
|
+
if (!this.attachedTargetSession) throw new Error("No CDP target is currently attached");
|
|
660
|
+
return this.attachedTargetSession;
|
|
661
|
+
}
|
|
662
|
+
async ensureBrowserSession() {
|
|
663
|
+
if (this.browserSession) return this.browserSession;
|
|
664
|
+
const browser = this.getBrowser();
|
|
665
|
+
if (!browser) throw new Error("Browser not connected");
|
|
666
|
+
this.browserSession = await browser.target().createCDPSession();
|
|
667
|
+
return this.browserSession;
|
|
668
|
+
}
|
|
669
|
+
matchesFilters(target, filters) {
|
|
670
|
+
if (filters.type && target.type !== filters.type) return false;
|
|
671
|
+
if (filters.types && filters.types.length > 0 && !filters.types.includes(target.type)) return false;
|
|
672
|
+
if (filters.targetId && target.targetId !== filters.targetId) return false;
|
|
673
|
+
if (filters.urlPattern && !target.url.includes(filters.urlPattern)) return false;
|
|
674
|
+
if (filters.titlePattern && !target.title.includes(filters.titlePattern)) return false;
|
|
675
|
+
if (filters.attachedOnly && !target.attached) return false;
|
|
676
|
+
return true;
|
|
677
|
+
}
|
|
678
|
+
normalizeTargetInfo(target) {
|
|
679
|
+
const targetId = typeof target.targetId === "string" ? target.targetId : null;
|
|
680
|
+
const type = typeof target.type === "string" ? target.type : null;
|
|
681
|
+
const title = typeof target.title === "string" ? target.title : "";
|
|
682
|
+
const url = typeof target.url === "string" ? target.url : "";
|
|
683
|
+
const attached = typeof target.attached === "boolean" ? target.attached : false;
|
|
684
|
+
if (!targetId || !type) return null;
|
|
685
|
+
return {
|
|
686
|
+
targetId,
|
|
687
|
+
type,
|
|
688
|
+
title,
|
|
689
|
+
url,
|
|
690
|
+
attached,
|
|
691
|
+
openerId: typeof target.openerId === "string" ? target.openerId : void 0,
|
|
692
|
+
canAccessOpener: typeof target.canAccessOpener === "boolean" ? target.canAccessOpener : void 0,
|
|
693
|
+
openerFrameId: typeof target.openerFrameId === "string" ? target.openerFrameId : void 0,
|
|
694
|
+
browserContextId: typeof target.browserContextId === "string" ? target.browserContextId : void 0,
|
|
695
|
+
subtype: typeof target.subtype === "string" ? target.subtype : void 0
|
|
696
|
+
};
|
|
697
|
+
}
|
|
698
|
+
};
|
|
699
|
+
//#endregion
|
|
700
|
+
//#region src/utils/browserExecutable.ts
|
|
701
|
+
/**
|
|
702
|
+
* Browser executable resolution policy:
|
|
703
|
+
* - Never scan host-installed browsers.
|
|
704
|
+
* - Only honor explicit overrides from environment variables.
|
|
705
|
+
* - Otherwise let Puppeteer handle browser resolution internally.
|
|
706
|
+
*/
|
|
707
|
+
const ENV_KEYS = [
|
|
708
|
+
"CHROME_PATH",
|
|
709
|
+
"PUPPETEER_EXECUTABLE_PATH",
|
|
710
|
+
"BROWSER_EXECUTABLE_PATH"
|
|
711
|
+
];
|
|
712
|
+
let cachedBrowserPath = null;
|
|
713
|
+
function resolveFromEnvironment() {
|
|
714
|
+
for (const key of ENV_KEYS) {
|
|
715
|
+
const candidate = process.env[key]?.trim();
|
|
716
|
+
if (candidate && existsSync(candidate)) return candidate;
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
function resolveFromPuppeteer() {
|
|
720
|
+
try {
|
|
721
|
+
const candidate = executablePath("chrome");
|
|
722
|
+
if (candidate && existsSync(candidate)) return candidate;
|
|
723
|
+
} catch {}
|
|
724
|
+
}
|
|
725
|
+
/**
|
|
726
|
+
* Resolve explicit browser executable path.
|
|
727
|
+
*
|
|
728
|
+
* Returns undefined when no explicit path is configured so callers can
|
|
729
|
+
* fall back to Puppeteer's managed browser behavior.
|
|
730
|
+
*/
|
|
731
|
+
function findBrowserExecutable() {
|
|
732
|
+
if (cachedBrowserPath !== null) {
|
|
733
|
+
if (!cachedBrowserPath || existsSync(cachedBrowserPath)) return cachedBrowserPath;
|
|
734
|
+
cachedBrowserPath = null;
|
|
735
|
+
}
|
|
736
|
+
cachedBrowserPath = resolveFromEnvironment() ?? resolveFromPuppeteer();
|
|
737
|
+
return cachedBrowserPath;
|
|
738
|
+
}
|
|
739
|
+
//#endregion
|
|
740
|
+
//#region src/modules/collector/PageScriptCollectors.ts
|
|
741
|
+
async function setupWebWorkerTracking(page) {
|
|
742
|
+
await page.evaluateOnNewDocument(() => {
|
|
743
|
+
const workerWindow = window;
|
|
744
|
+
const originalWorker = workerWindow.Worker;
|
|
745
|
+
if (typeof originalWorker !== "function") return;
|
|
746
|
+
const workerUrls = Array.isArray(workerWindow.__workerUrls) ? workerWindow.__workerUrls : [];
|
|
747
|
+
workerWindow.Worker = new Proxy(originalWorker, { construct(target, args, newTarget) {
|
|
748
|
+
const worker = Reflect.construct(target, args, newTarget);
|
|
749
|
+
const [scriptURL] = args;
|
|
750
|
+
if (typeof scriptURL === "string") {
|
|
751
|
+
workerUrls.push(scriptURL);
|
|
752
|
+
workerWindow.__workerUrls = workerUrls;
|
|
753
|
+
} else if (scriptURL instanceof URL) {
|
|
754
|
+
workerUrls.push(scriptURL.toString());
|
|
755
|
+
workerWindow.__workerUrls = workerUrls;
|
|
756
|
+
}
|
|
757
|
+
return worker;
|
|
758
|
+
} });
|
|
759
|
+
});
|
|
760
|
+
}
|
|
761
|
+
async function collectInlineScripts(page, maxSingleSize, maxFilesPerCollect) {
|
|
762
|
+
const scripts = await page.evaluate((limit) => {
|
|
763
|
+
return Array.from(document.querySelectorAll("script")).filter((script) => !script.src && script.textContent).map((script, index) => {
|
|
764
|
+
let content = script.textContent || "";
|
|
765
|
+
const originalSize = content.length;
|
|
766
|
+
let truncated = false;
|
|
767
|
+
if (content.length > limit) {
|
|
768
|
+
content = content.substring(0, limit);
|
|
769
|
+
truncated = true;
|
|
770
|
+
}
|
|
771
|
+
return {
|
|
772
|
+
url: `inline-script-${index}`,
|
|
773
|
+
content,
|
|
774
|
+
size: content.length,
|
|
775
|
+
type: "inline",
|
|
776
|
+
metadata: {
|
|
777
|
+
scriptType: script.type || "text/javascript",
|
|
778
|
+
async: script.async,
|
|
779
|
+
defer: script.defer,
|
|
780
|
+
integrity: script.integrity || void 0,
|
|
781
|
+
truncated,
|
|
782
|
+
originalSize: truncated ? originalSize : void 0
|
|
783
|
+
}
|
|
784
|
+
};
|
|
785
|
+
});
|
|
786
|
+
}, maxSingleSize);
|
|
787
|
+
const limitedScripts = scripts.slice(0, maxFilesPerCollect);
|
|
788
|
+
if (scripts.length > limitedScripts.length) logger.warn(`Found ${scripts.length} inline scripts, limiting to ${maxFilesPerCollect}`);
|
|
789
|
+
const truncatedCount = limitedScripts.filter((s) => s.metadata?.truncated).length;
|
|
790
|
+
if (truncatedCount > 0) logger.warn(`${truncatedCount} inline scripts were truncated due to size limits`);
|
|
791
|
+
logger.debug(`Collected ${limitedScripts.length} inline scripts`);
|
|
792
|
+
return limitedScripts;
|
|
793
|
+
}
|
|
794
|
+
async function collectServiceWorkers(page, shouldCollectUrl = () => true) {
|
|
795
|
+
try {
|
|
796
|
+
const serviceWorkers = await page.evaluate(async () => {
|
|
797
|
+
if (!("serviceWorker" in navigator)) return [];
|
|
798
|
+
const registrations = await navigator.serviceWorker.getRegistrations();
|
|
799
|
+
const workers = [];
|
|
800
|
+
for (const registration of registrations) {
|
|
801
|
+
const worker = registration.active || registration.installing || registration.waiting;
|
|
802
|
+
if (worker && worker.scriptURL) workers.push({
|
|
803
|
+
url: worker.scriptURL,
|
|
804
|
+
scope: registration.scope,
|
|
805
|
+
state: worker.state
|
|
806
|
+
});
|
|
807
|
+
}
|
|
808
|
+
return workers;
|
|
809
|
+
});
|
|
810
|
+
const files = [];
|
|
811
|
+
for (const worker of serviceWorkers) {
|
|
812
|
+
if (!shouldCollectUrl(worker.url)) continue;
|
|
813
|
+
try {
|
|
814
|
+
const content = await page.evaluate(async (url) => {
|
|
815
|
+
return await (await fetch(url)).text();
|
|
816
|
+
}, worker.url);
|
|
817
|
+
if (content) {
|
|
818
|
+
files.push({
|
|
819
|
+
url: worker.url,
|
|
820
|
+
content,
|
|
821
|
+
size: content.length,
|
|
822
|
+
type: "service-worker"
|
|
823
|
+
});
|
|
824
|
+
logger.debug(`Collected Service Worker: ${worker.url}`);
|
|
825
|
+
}
|
|
826
|
+
} catch (error) {
|
|
827
|
+
logger.warn(`Failed to collect Service Worker: ${worker.url}`, error);
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
return files;
|
|
831
|
+
} catch (error) {
|
|
832
|
+
logger.warn("Service Worker collection failed", error);
|
|
833
|
+
return [];
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
async function collectWebWorkers(page, shouldCollectUrl = () => true) {
|
|
837
|
+
try {
|
|
838
|
+
const workerUrls = await page.evaluate(() => {
|
|
839
|
+
const workerWindow = window;
|
|
840
|
+
return Array.isArray(workerWindow.__workerUrls) ? workerWindow.__workerUrls : [];
|
|
841
|
+
});
|
|
842
|
+
const files = [];
|
|
843
|
+
for (const url of workerUrls) try {
|
|
844
|
+
const absoluteUrl = new URL(url, page.url()).href;
|
|
845
|
+
if (!shouldCollectUrl(absoluteUrl)) continue;
|
|
846
|
+
const content = await page.evaluate(async (workerUrl) => {
|
|
847
|
+
return await (await fetch(workerUrl)).text();
|
|
848
|
+
}, absoluteUrl);
|
|
849
|
+
if (content) {
|
|
850
|
+
files.push({
|
|
851
|
+
url: absoluteUrl,
|
|
852
|
+
content,
|
|
853
|
+
size: content.length,
|
|
854
|
+
type: "web-worker"
|
|
855
|
+
});
|
|
856
|
+
logger.debug(`Collected Web Worker: ${absoluteUrl}`);
|
|
857
|
+
}
|
|
858
|
+
} catch (error) {
|
|
859
|
+
logger.warn(`Failed to collect Web Worker: ${url}`, error);
|
|
860
|
+
}
|
|
861
|
+
return files;
|
|
862
|
+
} catch (error) {
|
|
863
|
+
logger.warn("Web Worker collection failed", error);
|
|
864
|
+
return [];
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
function analyzeDependencies(files) {
|
|
868
|
+
const nodes = [];
|
|
869
|
+
const edges = [];
|
|
870
|
+
files.forEach((file) => {
|
|
871
|
+
nodes.push({
|
|
872
|
+
id: file.url,
|
|
873
|
+
url: file.url,
|
|
874
|
+
type: file.type
|
|
875
|
+
});
|
|
876
|
+
});
|
|
877
|
+
files.forEach((file) => {
|
|
878
|
+
extractDependencies(file.content).forEach((dep) => {
|
|
879
|
+
const targetFile = files.find((f) => f.url.includes(dep) || f.url.endsWith(dep) || f.url.endsWith(`${dep}.js`));
|
|
880
|
+
if (targetFile) edges.push({
|
|
881
|
+
from: file.url,
|
|
882
|
+
to: targetFile.url,
|
|
883
|
+
type: "import"
|
|
884
|
+
});
|
|
885
|
+
});
|
|
886
|
+
});
|
|
887
|
+
logger.debug(`Dependency graph: ${nodes.length} nodes, ${edges.length} edges`);
|
|
888
|
+
return {
|
|
889
|
+
nodes,
|
|
890
|
+
edges
|
|
891
|
+
};
|
|
892
|
+
}
|
|
893
|
+
function extractDependencies(code) {
|
|
894
|
+
const dependencies = [];
|
|
895
|
+
const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
|
|
896
|
+
let match;
|
|
897
|
+
while ((match = importRegex.exec(code)) !== null) if (match[1]) dependencies.push(match[1]);
|
|
898
|
+
const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
899
|
+
while ((match = requireRegex.exec(code)) !== null) if (match[1]) dependencies.push(match[1]);
|
|
900
|
+
const dynamicImportRegex = /import\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
901
|
+
while ((match = dynamicImportRegex.exec(code)) !== null) if (match[1]) dependencies.push(match[1]);
|
|
902
|
+
return [...new Set(dependencies)];
|
|
903
|
+
}
|
|
904
|
+
function calculatePriorityScore(file) {
|
|
905
|
+
let score = 0;
|
|
906
|
+
if (file.type === "inline") score += 10;
|
|
907
|
+
else if (file.type === "external") score += 5;
|
|
908
|
+
if (file.size < 10 * 1024) score += 15;
|
|
909
|
+
else if (file.size < 50 * 1024) score += 10;
|
|
910
|
+
else if (file.size > 200 * 1024) score -= 10;
|
|
911
|
+
const url = file.url.toLowerCase();
|
|
912
|
+
if (url.includes("main") || url.includes("index") || url.includes("app")) score += 20;
|
|
913
|
+
if (url.includes("crypto") || url.includes("encrypt") || url.includes("sign")) score += 30;
|
|
914
|
+
if (url.includes("api") || url.includes("request") || url.includes("ajax")) score += 25;
|
|
915
|
+
if (url.includes("core") || url.includes("common") || url.includes("util")) score += 15;
|
|
916
|
+
if (url.includes("vendor") || url.includes("lib") || url.includes("jquery") || url.includes("react")) score -= 20;
|
|
917
|
+
if (url.includes("node_modules") || url.includes("bundle")) score -= 30;
|
|
918
|
+
return score;
|
|
919
|
+
}
|
|
920
|
+
//#endregion
|
|
921
|
+
//#region src/modules/collector/CodeCollectorCollectInternal.ts
|
|
922
|
+
function isRecord(value) {
|
|
923
|
+
return typeof value === "object" && value !== null;
|
|
924
|
+
}
|
|
925
|
+
function toRecord(value) {
|
|
926
|
+
return isRecord(value) ? value : void 0;
|
|
927
|
+
}
|
|
928
|
+
function isCDPResponseReceivedParams(value) {
|
|
929
|
+
if (!isRecord(value) || !isRecord(value.response)) return false;
|
|
930
|
+
return typeof value.response.url === "string" && typeof value.requestId === "string";
|
|
931
|
+
}
|
|
932
|
+
function isCodeSummary(value) {
|
|
933
|
+
if (!isRecord(value)) return false;
|
|
934
|
+
return typeof value.url === "string" && typeof value.size === "number" && typeof value.type === "string" && typeof value.hasEncryption === "boolean" && typeof value.hasAPI === "boolean" && typeof value.hasObfuscation === "boolean" && Array.isArray(value.functions) && Array.isArray(value.imports) && typeof value.preview === "string";
|
|
935
|
+
}
|
|
936
|
+
function isCodeFile(value) {
|
|
937
|
+
if (!isRecord(value)) return false;
|
|
938
|
+
return typeof value.url === "string" && typeof value.content === "string" && typeof value.size === "number" && typeof value.type === "string";
|
|
939
|
+
}
|
|
940
|
+
function assertCollectorInternals(value) {
|
|
941
|
+
if (!isRecord(value)) throw new Error("Invalid collector context");
|
|
942
|
+
if (typeof value.init !== "function" || typeof value.applyAntiDetection !== "function" || typeof value.shouldCollectUrl !== "function") throw new Error("Invalid collector context");
|
|
943
|
+
}
|
|
944
|
+
async function collectInnerImpl(self, options) {
|
|
945
|
+
assertCollectorInternals(self);
|
|
946
|
+
const startTime = Date.now();
|
|
947
|
+
logger.info(`Collecting code from: ${options.url}`);
|
|
948
|
+
const cacheOptions = toRecord(options);
|
|
949
|
+
if (self.cacheEnabled) {
|
|
950
|
+
const cached = await self.cache.get(options.url, cacheOptions);
|
|
951
|
+
if (cached) {
|
|
952
|
+
logger.info(` Cache hit for: ${options.url}`);
|
|
953
|
+
return cached;
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
await self.init();
|
|
957
|
+
if (!self.browser) throw new Error("Browser not initialized");
|
|
958
|
+
let previousActivePageIndex = null;
|
|
959
|
+
const activePageContext = (typeof self.listPages === "function" ? await self.listPages().catch(() => []) : []).length > 0 && typeof self.getActivePage === "function" ? await self.getActivePage().then((page) => page.browserContext()).catch((error) => {
|
|
960
|
+
logger.debug("Failed to resolve active browser context before code collection:", error);
|
|
961
|
+
return null;
|
|
962
|
+
}) : null;
|
|
963
|
+
const temporaryContext = activePageContext === null && typeof self.browser.createBrowserContext === "function" ? await self.browser.createBrowserContext() : null;
|
|
964
|
+
if ((activePageContext !== null || !temporaryContext) && typeof self.getActivePageIndex === "function") try {
|
|
965
|
+
previousActivePageIndex = await self.getActivePageIndex();
|
|
966
|
+
} catch (error) {
|
|
967
|
+
logger.debug("Failed to capture active page index before code collection:", error);
|
|
968
|
+
}
|
|
969
|
+
const page = activePageContext !== null ? await activePageContext.newPage() : temporaryContext ? await temporaryContext.newPage() : await self.browser.newPage();
|
|
970
|
+
try {
|
|
971
|
+
const timeoutMs = options.timeout ?? self.config.timeout ?? 3e4;
|
|
972
|
+
page.setDefaultTimeout(timeoutMs);
|
|
973
|
+
await page.setUserAgent(self.userAgent);
|
|
974
|
+
await self.applyAntiDetection(page);
|
|
975
|
+
if (options.includeWebWorker !== false) await setupWebWorkerTracking(page);
|
|
976
|
+
const files = [];
|
|
977
|
+
const appendFilesWithinLimit = (incoming, label) => {
|
|
978
|
+
const remaining = self.MAX_FILES_PER_COLLECT - files.length;
|
|
979
|
+
if (remaining <= 0) {
|
|
980
|
+
logger.warn(`Reached max files limit (${self.MAX_FILES_PER_COLLECT}), skipping ${label}`);
|
|
981
|
+
return;
|
|
982
|
+
}
|
|
983
|
+
if (incoming.length > remaining) logger.warn(`Collected ${incoming.length} ${label}, limiting to remaining ${remaining} files`);
|
|
984
|
+
files.push(...incoming.slice(0, remaining));
|
|
985
|
+
};
|
|
986
|
+
self.cdpSession = await page.createCDPSession();
|
|
987
|
+
await self.cdpSession.send("Network.enable");
|
|
988
|
+
await self.cdpSession.send("Runtime.enable");
|
|
989
|
+
self.cdpListeners.responseReceived = async (params) => {
|
|
990
|
+
if (!isCDPResponseReceivedParams(params)) return;
|
|
991
|
+
const { response, requestId, type } = params;
|
|
992
|
+
const url = response.url;
|
|
993
|
+
if (files.length >= self.MAX_FILES_PER_COLLECT) {
|
|
994
|
+
if (files.length === self.MAX_FILES_PER_COLLECT) logger.warn(`Reached max files limit (${self.MAX_FILES_PER_COLLECT}), will skip remaining files`);
|
|
995
|
+
return;
|
|
996
|
+
}
|
|
997
|
+
self.cleanupCollectedUrls();
|
|
998
|
+
if (type === "Script" || response.mimeType?.includes("javascript") || url.endsWith(".js")) {
|
|
999
|
+
if (options.includeExternal === false) return;
|
|
1000
|
+
if (!self.shouldCollectUrl(url, options.filterRules)) return;
|
|
1001
|
+
try {
|
|
1002
|
+
const responseBody = await self.cdpSession.send("Network.getResponseBody", { requestId });
|
|
1003
|
+
if (typeof responseBody.body !== "string") return;
|
|
1004
|
+
const content = responseBody.base64Encoded ? Buffer.from(responseBody.body, "base64").toString("utf-8") : responseBody.body;
|
|
1005
|
+
const contentSize = content.length;
|
|
1006
|
+
let finalContent = content;
|
|
1007
|
+
let truncated = false;
|
|
1008
|
+
if (contentSize > self.MAX_SINGLE_FILE_SIZE) {
|
|
1009
|
+
finalContent = content.substring(0, self.MAX_SINGLE_FILE_SIZE);
|
|
1010
|
+
truncated = true;
|
|
1011
|
+
logger.warn(`[CDP] Large file truncated: ${url} (${(contentSize / 1024).toFixed(2)} KB -> ${(self.MAX_SINGLE_FILE_SIZE / 1024).toFixed(2)} KB)`);
|
|
1012
|
+
}
|
|
1013
|
+
if (!self.collectedUrls.has(url)) {
|
|
1014
|
+
self.collectedUrls.add(url);
|
|
1015
|
+
const file = {
|
|
1016
|
+
url,
|
|
1017
|
+
content: finalContent,
|
|
1018
|
+
size: finalContent.length,
|
|
1019
|
+
type: "external",
|
|
1020
|
+
metadata: truncated ? {
|
|
1021
|
+
truncated: true,
|
|
1022
|
+
originalSize: contentSize,
|
|
1023
|
+
truncatedSize: finalContent.length
|
|
1024
|
+
} : void 0
|
|
1025
|
+
};
|
|
1026
|
+
const fileCountBeforeAppend = files.length;
|
|
1027
|
+
appendFilesWithinLimit([file], "external scripts");
|
|
1028
|
+
if (files.length > fileCountBeforeAppend) {
|
|
1029
|
+
self.collectedFilesCache.set(url, file);
|
|
1030
|
+
logger.debug(`[CDP] Collected (${files.length}/${self.MAX_FILES_PER_COLLECT}): ${url} (${(finalContent.length / 1024).toFixed(2)} KB)${truncated ? " [TRUNCATED]" : ""}`);
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
} catch (error) {
|
|
1034
|
+
logger.warn(`[CDP] Failed to get response body for: ${url}`, error);
|
|
1035
|
+
}
|
|
1036
|
+
}
|
|
1037
|
+
};
|
|
1038
|
+
self.cdpSession.on("Network.responseReceived", self.cdpListeners.responseReceived);
|
|
1039
|
+
logger.info(`Navigating to: ${options.url}`);
|
|
1040
|
+
await page.goto(options.url, {
|
|
1041
|
+
waitUntil: "networkidle2",
|
|
1042
|
+
timeout: options.timeout || self.config.timeout
|
|
1043
|
+
});
|
|
1044
|
+
if (options.includeInline !== false) {
|
|
1045
|
+
logger.info("Collecting inline scripts...");
|
|
1046
|
+
appendFilesWithinLimit(await collectInlineScripts(page, self.MAX_SINGLE_FILE_SIZE, self.MAX_FILES_PER_COLLECT), "inline scripts");
|
|
1047
|
+
}
|
|
1048
|
+
if (options.includeServiceWorker !== false) {
|
|
1049
|
+
logger.info("Collecting Service Workers...");
|
|
1050
|
+
appendFilesWithinLimit(await collectServiceWorkers(page, (url) => self.shouldCollectUrl(url, options.filterRules)), "service workers");
|
|
1051
|
+
}
|
|
1052
|
+
if (options.includeWebWorker !== false) {
|
|
1053
|
+
logger.info("Collecting Web Workers...");
|
|
1054
|
+
appendFilesWithinLimit(await collectWebWorkers(page, (url) => self.shouldCollectUrl(url, options.filterRules)), "web workers");
|
|
1055
|
+
}
|
|
1056
|
+
if (options.includeDynamic) {
|
|
1057
|
+
logger.info("Waiting for dynamic scripts...");
|
|
1058
|
+
await new Promise((resolve) => setTimeout(resolve, 3e3));
|
|
1059
|
+
}
|
|
1060
|
+
if (self.cdpSession) {
|
|
1061
|
+
if (self.cdpListeners.responseReceived) self.cdpSession.off("Network.responseReceived", self.cdpListeners.responseReceived);
|
|
1062
|
+
await self.cdpSession.detach();
|
|
1063
|
+
self.cdpSession = null;
|
|
1064
|
+
self.cdpListeners = {};
|
|
1065
|
+
}
|
|
1066
|
+
const collectTime = Date.now() - startTime;
|
|
1067
|
+
const truncatedFiles = files.filter((f) => f.metadata?.truncated);
|
|
1068
|
+
if (truncatedFiles.length > 0) {
|
|
1069
|
+
logger.warn(`${truncatedFiles.length} files were truncated due to size limits`);
|
|
1070
|
+
truncatedFiles.forEach((f) => {
|
|
1071
|
+
const originalSize = typeof f.metadata?.originalSize === "number" ? f.metadata.originalSize : f.size;
|
|
1072
|
+
logger.warn(` - ${f.url}: ${(originalSize / 1024).toFixed(2)} KB -> ${(f.size / 1024).toFixed(2)} KB`);
|
|
1073
|
+
});
|
|
1074
|
+
}
|
|
1075
|
+
let processedFiles = files;
|
|
1076
|
+
if (options.smartMode && options.smartMode !== "full") try {
|
|
1077
|
+
logger.info(` Applying smart collection mode: ${options.smartMode}`);
|
|
1078
|
+
const smartOptions = {
|
|
1079
|
+
mode: options.smartMode,
|
|
1080
|
+
maxTotalSize: options.maxTotalSize,
|
|
1081
|
+
maxFileSize: options.maxFileSize,
|
|
1082
|
+
priorities: options.priorities
|
|
1083
|
+
};
|
|
1084
|
+
const smartResult = await self.smartCollector.smartCollect(page, files, smartOptions);
|
|
1085
|
+
if (options.smartMode === "summary") {
|
|
1086
|
+
logger.info(` Returning ${smartResult.length} code summaries`);
|
|
1087
|
+
if (Array.isArray(smartResult) && smartResult.every((item) => isCodeSummary(item))) return {
|
|
1088
|
+
files: [],
|
|
1089
|
+
summaries: smartResult,
|
|
1090
|
+
dependencies: {
|
|
1091
|
+
nodes: [],
|
|
1092
|
+
edges: []
|
|
1093
|
+
},
|
|
1094
|
+
totalSize: 0,
|
|
1095
|
+
collectTime: Date.now() - startTime
|
|
1096
|
+
};
|
|
1097
|
+
}
|
|
1098
|
+
if (Array.isArray(smartResult) && smartResult.every((item) => isCodeFile(item))) processedFiles = smartResult;
|
|
1099
|
+
else {
|
|
1100
|
+
logger.warn("Smart collection returned unexpected type, using original files");
|
|
1101
|
+
processedFiles = files;
|
|
1102
|
+
}
|
|
1103
|
+
} catch (error) {
|
|
1104
|
+
logger.error("Smart collection failed, using original files:", error);
|
|
1105
|
+
processedFiles = files;
|
|
1106
|
+
}
|
|
1107
|
+
if (options.compress) try {
|
|
1108
|
+
logger.info(`Compressing ${processedFiles.length} files with enhanced compressor...`);
|
|
1109
|
+
const filesToCompress = processedFiles.filter((file) => self.compressor.shouldCompress(file.content)).map((file) => ({
|
|
1110
|
+
url: file.url,
|
|
1111
|
+
content: file.content
|
|
1112
|
+
}));
|
|
1113
|
+
if (filesToCompress.length === 0) logger.info("No files need compression (all below threshold)");
|
|
1114
|
+
else {
|
|
1115
|
+
const compressedResults = await self.compressor.compressBatch(filesToCompress, {
|
|
1116
|
+
level: void 0,
|
|
1117
|
+
useCache: true,
|
|
1118
|
+
maxRetries: 3,
|
|
1119
|
+
concurrency: 5,
|
|
1120
|
+
onProgress: (progress) => {
|
|
1121
|
+
if (progress % 25 === 0) logger.debug(`Compression progress: ${progress.toFixed(0)}%`);
|
|
1122
|
+
}
|
|
1123
|
+
});
|
|
1124
|
+
const compressedMap = new Map(compressedResults.map((r) => [r.url, r]));
|
|
1125
|
+
for (const file of processedFiles) {
|
|
1126
|
+
const compressed = compressedMap.get(file.url);
|
|
1127
|
+
if (compressed) file.metadata = {
|
|
1128
|
+
...file.metadata,
|
|
1129
|
+
compressed: true,
|
|
1130
|
+
originalSize: compressed.originalSize,
|
|
1131
|
+
compressedSize: compressed.compressedSize,
|
|
1132
|
+
compressionRatio: compressed.compressionRatio
|
|
1133
|
+
};
|
|
1134
|
+
}
|
|
1135
|
+
const stats = self.compressor.getStats();
|
|
1136
|
+
logger.info(` Compressed ${compressedResults.length}/${processedFiles.length} files`);
|
|
1137
|
+
logger.info(` Compression stats: ${(stats.totalOriginalSize / 1024).toFixed(2)} KB -> ${(stats.totalCompressedSize / 1024).toFixed(2)} KB (${stats.averageRatio.toFixed(1)}% reduction)`);
|
|
1138
|
+
logger.info(` Cache: ${stats.cacheHits} hits, ${stats.cacheMisses} misses (${stats.cacheHits > 0 ? (stats.cacheHits / (stats.cacheHits + stats.cacheMisses) * 100).toFixed(1) : 0}% hit rate)`);
|
|
1139
|
+
}
|
|
1140
|
+
} catch (error) {
|
|
1141
|
+
logger.error("Compression failed:", error);
|
|
1142
|
+
}
|
|
1143
|
+
const dependencies = analyzeDependencies(processedFiles);
|
|
1144
|
+
const totalSize = processedFiles.reduce((sum, file) => sum + file.size, 0);
|
|
1145
|
+
logger.success(`Collected ${processedFiles.length} files (${(totalSize / 1024).toFixed(2)} KB) in ${collectTime}ms`);
|
|
1146
|
+
const result = {
|
|
1147
|
+
files: processedFiles,
|
|
1148
|
+
dependencies,
|
|
1149
|
+
totalSize,
|
|
1150
|
+
collectTime
|
|
1151
|
+
};
|
|
1152
|
+
if (self.cacheEnabled) {
|
|
1153
|
+
await self.cache.set(options.url, result, cacheOptions);
|
|
1154
|
+
logger.debug(` Saved to cache: ${options.url}`);
|
|
1155
|
+
}
|
|
1156
|
+
return result;
|
|
1157
|
+
} catch (error) {
|
|
1158
|
+
logger.error("Code collection failed", error);
|
|
1159
|
+
throw error;
|
|
1160
|
+
} finally {
|
|
1161
|
+
if (self.cdpSession) {
|
|
1162
|
+
try {
|
|
1163
|
+
if (self.cdpListeners.responseReceived) self.cdpSession.off("Network.responseReceived", self.cdpListeners.responseReceived);
|
|
1164
|
+
await self.cdpSession.detach();
|
|
1165
|
+
} catch {}
|
|
1166
|
+
self.cdpSession = null;
|
|
1167
|
+
self.cdpListeners = {};
|
|
1168
|
+
}
|
|
1169
|
+
if (temporaryContext) try {
|
|
1170
|
+
await temporaryContext.close();
|
|
1171
|
+
} catch (error) {
|
|
1172
|
+
logger.debug("Failed to close temporary browser context after code collection:", error);
|
|
1173
|
+
}
|
|
1174
|
+
else {
|
|
1175
|
+
await page.close();
|
|
1176
|
+
if (previousActivePageIndex !== null && typeof self.selectPage === "function") try {
|
|
1177
|
+
await self.selectPage(previousActivePageIndex);
|
|
1178
|
+
} catch (error) {
|
|
1179
|
+
logger.debug("Failed to restore active page after code collection:", error);
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
}
|
|
1184
|
+
//#endregion
|
|
1185
|
+
//#region src/modules/collector/CodeCollectorUtilsInternal.ts
|
|
1186
|
+
function shouldCollectUrlImpl(url, filterRules) {
|
|
1187
|
+
if (!filterRules || filterRules.length === 0) return true;
|
|
1188
|
+
for (const rule of filterRules) if (new RegExp(rule.replace(/\*/g, ".*")).test(url)) return true;
|
|
1189
|
+
return false;
|
|
1190
|
+
}
|
|
1191
|
+
async function navigateWithRetryImpl(page, url, options, maxRetries = 3) {
|
|
1192
|
+
let lastError = null;
|
|
1193
|
+
for (let i = 0; i < maxRetries; i++) try {
|
|
1194
|
+
await page.goto(url, options);
|
|
1195
|
+
return;
|
|
1196
|
+
} catch (error) {
|
|
1197
|
+
lastError = error;
|
|
1198
|
+
logger.warn(`Navigation attempt ${i + 1}/${maxRetries} failed: ${error}`);
|
|
1199
|
+
if (i < maxRetries - 1) await new Promise((resolve) => setTimeout(resolve, 1e3 * (i + 1)));
|
|
1200
|
+
}
|
|
1201
|
+
throw lastError || /* @__PURE__ */ new Error("Navigation failed after retries");
|
|
1202
|
+
}
|
|
1203
|
+
async function getPerformanceMetricsImpl(page) {
|
|
1204
|
+
try {
|
|
1205
|
+
return await page.evaluate(() => {
|
|
1206
|
+
const perf = performance.getEntriesByType("navigation")[0];
|
|
1207
|
+
return {
|
|
1208
|
+
domContentLoaded: perf.domContentLoadedEventEnd - perf.domContentLoadedEventStart,
|
|
1209
|
+
loadComplete: perf.loadEventEnd - perf.loadEventStart,
|
|
1210
|
+
domInteractive: perf.domInteractive - perf.fetchStart,
|
|
1211
|
+
totalTime: perf.loadEventEnd - perf.fetchStart
|
|
1212
|
+
};
|
|
1213
|
+
});
|
|
1214
|
+
} catch (error) {
|
|
1215
|
+
logger.warn("Failed to get performance metrics", error);
|
|
1216
|
+
return {};
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
async function collectPageMetadataImpl(page) {
|
|
1220
|
+
try {
|
|
1221
|
+
return await page.evaluate(() => {
|
|
1222
|
+
return {
|
|
1223
|
+
title: document.title,
|
|
1224
|
+
url: window.location.href,
|
|
1225
|
+
userAgent: navigator.userAgent,
|
|
1226
|
+
viewport: {
|
|
1227
|
+
width: window.innerWidth,
|
|
1228
|
+
height: window.innerHeight
|
|
1229
|
+
},
|
|
1230
|
+
cookies: document.cookie,
|
|
1231
|
+
localStorage: Object.keys(localStorage).length,
|
|
1232
|
+
sessionStorage: Object.keys(sessionStorage).length
|
|
1233
|
+
};
|
|
1234
|
+
});
|
|
1235
|
+
} catch (error) {
|
|
1236
|
+
logger.warn("Failed to collect page metadata", error);
|
|
1237
|
+
return {};
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
//#endregion
|
|
1241
|
+
//#region src/modules/collector/playwright-cdp-fallback.ts
|
|
1242
|
+
let electronCompatPatched = false;
|
|
1243
|
+
function resolvePlaywrightInternalPaths() {
|
|
1244
|
+
const packageRoot = dirname(createRequire(import.meta.url).resolve("playwright-core/package.json"));
|
|
1245
|
+
return {
|
|
1246
|
+
playwrightServerPath: join$1(packageRoot, "lib/server/playwright.js"),
|
|
1247
|
+
crBrowserPath: join$1(packageRoot, "lib/server/chromium/crBrowser.js")
|
|
1248
|
+
};
|
|
1249
|
+
}
|
|
1250
|
+
function ensureElectronDownloadBehaviorCompatPatch() {
|
|
1251
|
+
if (electronCompatPatched) return;
|
|
1252
|
+
const require = createRequire(import.meta.url);
|
|
1253
|
+
const { playwrightServerPath, crBrowserPath } = resolvePlaywrightInternalPaths();
|
|
1254
|
+
const { createPlaywright } = require(playwrightServerPath);
|
|
1255
|
+
createPlaywright({
|
|
1256
|
+
sdkLanguage: "javascript",
|
|
1257
|
+
isInternalPlaywright: true
|
|
1258
|
+
});
|
|
1259
|
+
const { CRBrowserContext } = require(crBrowserPath);
|
|
1260
|
+
if (CRBrowserContext.prototype.__jshookElectronCompatPatched) {
|
|
1261
|
+
electronCompatPatched = true;
|
|
1262
|
+
return;
|
|
1263
|
+
}
|
|
1264
|
+
const originalInitialize = CRBrowserContext.prototype.initialize;
|
|
1265
|
+
CRBrowserContext.prototype.initialize = async function patchedInitialize(...args) {
|
|
1266
|
+
try {
|
|
1267
|
+
return await originalInitialize.apply(this, args);
|
|
1268
|
+
} catch (error) {
|
|
1269
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1270
|
+
if (message.includes("Browser.setDownloadBehavior") && message.includes("Browser context management is not supported")) {
|
|
1271
|
+
logger.warn("[playwright-cdp-fallback] Swallowed Browser.setDownloadBehavior for legacy Electron CDP endpoint.");
|
|
1272
|
+
return;
|
|
1273
|
+
}
|
|
1274
|
+
throw error;
|
|
1275
|
+
}
|
|
1276
|
+
};
|
|
1277
|
+
Object.defineProperty(CRBrowserContext.prototype, "__jshookElectronCompatPatched", {
|
|
1278
|
+
value: true,
|
|
1279
|
+
configurable: false,
|
|
1280
|
+
enumerable: false,
|
|
1281
|
+
writable: false
|
|
1282
|
+
});
|
|
1283
|
+
electronCompatPatched = true;
|
|
1284
|
+
}
|
|
1285
|
+
function normalizePlaywrightConnectEndpoint(endpoint) {
|
|
1286
|
+
return endpoint;
|
|
1287
|
+
}
|
|
1288
|
+
function mapWaitUntil(waitUntil) {
|
|
1289
|
+
if (waitUntil === "networkidle0" || waitUntil === "networkidle2") return "networkidle";
|
|
1290
|
+
return waitUntil;
|
|
1291
|
+
}
|
|
1292
|
+
function getDefaultContext(browser) {
|
|
1293
|
+
const context = browser.contexts()[0];
|
|
1294
|
+
if (!context) throw new Error("Connected Playwright CDP browser exposes no default context. Cannot create or resolve pages.");
|
|
1295
|
+
return context;
|
|
1296
|
+
}
|
|
1297
|
+
function createPageAdapter(playwrightPage, pageCache) {
|
|
1298
|
+
const cached = pageCache.get(playwrightPage);
|
|
1299
|
+
if (cached) return cached;
|
|
1300
|
+
const proxiedPage = new Proxy({
|
|
1301
|
+
async goto(url, options) {
|
|
1302
|
+
return await playwrightPage.goto(url, {
|
|
1303
|
+
...options,
|
|
1304
|
+
waitUntil: mapWaitUntil(options?.waitUntil)
|
|
1305
|
+
});
|
|
1306
|
+
},
|
|
1307
|
+
async reload(options) {
|
|
1308
|
+
return await playwrightPage.reload({
|
|
1309
|
+
...options,
|
|
1310
|
+
waitUntil: mapWaitUntil(options?.waitUntil)
|
|
1311
|
+
});
|
|
1312
|
+
},
|
|
1313
|
+
async waitForNavigation(options) {
|
|
1314
|
+
const mappedWaitUntil = mapWaitUntil(options?.waitUntil);
|
|
1315
|
+
if (typeof playwrightPage.waitForNavigation === "function") return await playwrightPage.waitForNavigation({
|
|
1316
|
+
...options,
|
|
1317
|
+
waitUntil: mappedWaitUntil
|
|
1318
|
+
});
|
|
1319
|
+
return await playwrightPage.waitForLoadState(mappedWaitUntil ?? "load", { timeout: options?.timeout });
|
|
1320
|
+
},
|
|
1321
|
+
async select(selector, ...values) {
|
|
1322
|
+
return await playwrightPage.selectOption(selector, values);
|
|
1323
|
+
},
|
|
1324
|
+
async evaluateOnNewDocument(pageFunction, ...args) {
|
|
1325
|
+
return await playwrightPage.addInitScript(pageFunction, ...[...args]);
|
|
1326
|
+
},
|
|
1327
|
+
async createCDPSession() {
|
|
1328
|
+
const context = playwrightPage.context();
|
|
1329
|
+
if (typeof context.newCDPSession !== "function") throw new Error("Playwright BrowserContext does not expose newCDPSession() for the attached page.");
|
|
1330
|
+
return await context.newCDPSession(playwrightPage);
|
|
1331
|
+
},
|
|
1332
|
+
async setUserAgent(userAgent) {
|
|
1333
|
+
const context = playwrightPage.context();
|
|
1334
|
+
if (typeof context.newCDPSession !== "function") {
|
|
1335
|
+
logger.debug("[playwright-cdp-fallback] Cannot apply UA override: newCDPSession unavailable.");
|
|
1336
|
+
return;
|
|
1337
|
+
}
|
|
1338
|
+
await (await context.newCDPSession(playwrightPage)).send("Network.setUserAgentOverride", { userAgent });
|
|
1339
|
+
logger.debug("[playwright-cdp-fallback] Applied user agent override via CDP session.");
|
|
1340
|
+
}
|
|
1341
|
+
}, { get(target, prop, receiver) {
|
|
1342
|
+
if (Reflect.has(target, prop)) {
|
|
1343
|
+
const value = Reflect.get(target, prop, receiver);
|
|
1344
|
+
return typeof value === "function" ? value.bind(target) : value;
|
|
1345
|
+
}
|
|
1346
|
+
const value = Reflect.get(playwrightPage, prop);
|
|
1347
|
+
return typeof value === "function" ? value.bind(playwrightPage) : value;
|
|
1348
|
+
} });
|
|
1349
|
+
pageCache.set(playwrightPage, proxiedPage);
|
|
1350
|
+
return proxiedPage;
|
|
1351
|
+
}
|
|
1352
|
+
function createTargetAdapter(playwrightPage, pageCache) {
|
|
1353
|
+
return {
|
|
1354
|
+
type: () => "page",
|
|
1355
|
+
url: () => playwrightPage.url(),
|
|
1356
|
+
page: async () => createPageAdapter(playwrightPage, pageCache)
|
|
1357
|
+
};
|
|
1358
|
+
}
|
|
1359
|
+
function createBrowserAdapter(playwrightBrowser) {
|
|
1360
|
+
const pageCache = /* @__PURE__ */ new WeakMap();
|
|
1361
|
+
return new Proxy({
|
|
1362
|
+
targets() {
|
|
1363
|
+
return playwrightBrowser.contexts().flatMap((context) => context.pages()).map((page) => createTargetAdapter(page, pageCache));
|
|
1364
|
+
},
|
|
1365
|
+
async pages() {
|
|
1366
|
+
return playwrightBrowser.contexts().flatMap((context) => context.pages()).map((page) => createPageAdapter(page, pageCache));
|
|
1367
|
+
},
|
|
1368
|
+
async newPage() {
|
|
1369
|
+
return createPageAdapter(await getDefaultContext(playwrightBrowser).newPage(), pageCache);
|
|
1370
|
+
},
|
|
1371
|
+
async disconnect() {
|
|
1372
|
+
await playwrightBrowser.close();
|
|
1373
|
+
},
|
|
1374
|
+
async close() {
|
|
1375
|
+
await playwrightBrowser.close();
|
|
1376
|
+
}
|
|
1377
|
+
}, { get(target, prop, receiver) {
|
|
1378
|
+
if (Reflect.has(target, prop)) {
|
|
1379
|
+
const value = Reflect.get(target, prop, receiver);
|
|
1380
|
+
return typeof value === "function" ? value.bind(target) : value;
|
|
1381
|
+
}
|
|
1382
|
+
const value = Reflect.get(playwrightBrowser, prop);
|
|
1383
|
+
return typeof value === "function" ? value.bind(playwrightBrowser) : value;
|
|
1384
|
+
} });
|
|
1385
|
+
}
|
|
1386
|
+
async function connectPlaywrightCdpFallback(endpoint, timeoutMs) {
|
|
1387
|
+
ensureElectronDownloadBehaviorCompatPatch();
|
|
1388
|
+
const { chromium } = await import("playwright-core");
|
|
1389
|
+
return createBrowserAdapter(await chromium.connectOverCDP(normalizePlaywrightConnectEndpoint(endpoint), { timeout: timeoutMs }));
|
|
1390
|
+
}
|
|
1391
|
+
//#endregion
|
|
1392
|
+
//#region src/modules/collector/CodeCollectorConnectionInternal.ts
|
|
1393
|
+
function resolveDefaultChromeUserDataDir(channel = "stable") {
|
|
1394
|
+
const home = homedir();
|
|
1395
|
+
if (process.platform === "win32") {
|
|
1396
|
+
const localAppData = process.env.LOCALAPPDATA ?? join(home, "AppData", "Local");
|
|
1397
|
+
switch (channel) {
|
|
1398
|
+
case "beta": return join(localAppData, "Google", "Chrome Beta", "User Data");
|
|
1399
|
+
case "dev": return join(localAppData, "Google", "Chrome Dev", "User Data");
|
|
1400
|
+
case "canary": return join(localAppData, "Google", "Chrome SxS", "User Data");
|
|
1401
|
+
default: return join(localAppData, "Google", "Chrome", "User Data");
|
|
1402
|
+
}
|
|
1403
|
+
}
|
|
1404
|
+
if (process.platform === "darwin") {
|
|
1405
|
+
const appSupport = join(home, "Library", "Application Support");
|
|
1406
|
+
switch (channel) {
|
|
1407
|
+
case "beta": return join(appSupport, "Google", "Chrome Beta");
|
|
1408
|
+
case "dev": return join(appSupport, "Google", "Chrome Dev");
|
|
1409
|
+
case "canary": return join(appSupport, "Google", "Chrome Canary");
|
|
1410
|
+
default: return join(appSupport, "Google", "Chrome");
|
|
1411
|
+
}
|
|
1412
|
+
}
|
|
1413
|
+
const configHome = process.env.XDG_CONFIG_HOME ?? join(home, ".config");
|
|
1414
|
+
switch (channel) {
|
|
1415
|
+
case "beta": return join(configHome, "google-chrome-beta");
|
|
1416
|
+
case "dev": return join(configHome, "google-chrome-unstable");
|
|
1417
|
+
case "canary": return join(configHome, "google-chrome-canary");
|
|
1418
|
+
default: return join(configHome, "google-chrome");
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1421
|
+
async function resolveAutoConnectWsEndpointImpl(options) {
|
|
1422
|
+
const channel = options.channel ?? "stable";
|
|
1423
|
+
const devToolsActivePortPath = join(options.userDataDir ?? resolveDefaultChromeUserDataDir(channel), "DevToolsActivePort");
|
|
1424
|
+
let fileContent;
|
|
1425
|
+
try {
|
|
1426
|
+
fileContent = await readFile(devToolsActivePortPath, "utf8");
|
|
1427
|
+
} catch (error) {
|
|
1428
|
+
throw new Error(`Could not read DevToolsActivePort from "${devToolsActivePortPath}". Check if Chrome is running from this profile and remote debugging is enabled at chrome://inspect/#remote-debugging.`, { cause: error });
|
|
1429
|
+
}
|
|
1430
|
+
const [rawPort, rawPath] = fileContent.split(/\r?\n/u).map((line) => line.trim()).filter(Boolean);
|
|
1431
|
+
if (!rawPort || !rawPath) throw new Error(`Invalid DevToolsActivePort contents found in "${devToolsActivePortPath}".`);
|
|
1432
|
+
const port = Number.parseInt(rawPort, 10);
|
|
1433
|
+
if (!Number.isInteger(port) || port <= 0 || port > 65535) throw new Error(`Invalid remote debugging port "${rawPort}" in "${devToolsActivePortPath}".`);
|
|
1434
|
+
return `ws://127.0.0.1:${port}${rawPath}`;
|
|
1435
|
+
}
|
|
1436
|
+
async function resolveConnectOptionsImpl(endpointOrOptions) {
|
|
1437
|
+
if (typeof endpointOrOptions === "string") {
|
|
1438
|
+
const endpoint = endpointOrOptions.trim();
|
|
1439
|
+
if (!endpoint) throw new Error("Connection endpoint cannot be empty.");
|
|
1440
|
+
return endpoint.startsWith("ws://") || endpoint.startsWith("wss://") ? { browserWSEndpoint: endpoint } : { browserURL: endpoint };
|
|
1441
|
+
}
|
|
1442
|
+
if (endpointOrOptions.wsEndpoint) return { browserWSEndpoint: endpointOrOptions.wsEndpoint };
|
|
1443
|
+
if (endpointOrOptions.browserURL) return { browserURL: endpointOrOptions.browserURL };
|
|
1444
|
+
if (endpointOrOptions.autoConnect || endpointOrOptions.userDataDir || endpointOrOptions.channel) return { browserWSEndpoint: await resolveAutoConnectWsEndpointImpl(endpointOrOptions) };
|
|
1445
|
+
throw new Error("browserURL, wsEndpoint, autoConnect, userDataDir, or channel is required to connect to an existing browser.");
|
|
1446
|
+
}
|
|
1447
|
+
function isAutoConnectRequest(endpointOrOptions) {
|
|
1448
|
+
return typeof endpointOrOptions !== "string" && Boolean(endpointOrOptions.autoConnect || endpointOrOptions.userDataDir || endpointOrOptions.channel);
|
|
1449
|
+
}
|
|
1450
|
+
function getUnknownErrorMessage(error) {
|
|
1451
|
+
if (error instanceof Error) return error.message;
|
|
1452
|
+
if (typeof error === "object" && error !== null) {
|
|
1453
|
+
const directMessage = "message" in error && typeof error.message === "string" ? error.message.trim() : "";
|
|
1454
|
+
if (directMessage) return directMessage;
|
|
1455
|
+
const nestedError = "error" in error ? error.error : void 0;
|
|
1456
|
+
if (nestedError instanceof Error && nestedError.message) return nestedError.message;
|
|
1457
|
+
if (typeof nestedError === "object" && nestedError !== null) {
|
|
1458
|
+
const nestedMessage = "message" in nestedError && typeof nestedError.message === "string" ? nestedError.message.trim() : "";
|
|
1459
|
+
if (nestedMessage) return nestedMessage;
|
|
1460
|
+
}
|
|
1461
|
+
const serialized = JSON.stringify(error);
|
|
1462
|
+
if (serialized && serialized !== "{}") return serialized;
|
|
1463
|
+
}
|
|
1464
|
+
return String(error);
|
|
1465
|
+
}
|
|
1466
|
+
function normalizeConnectError(error, target, endpointOrOptions) {
|
|
1467
|
+
const message = getUnknownErrorMessage(error);
|
|
1468
|
+
if (isAutoConnectRequest(endpointOrOptions) && /ECONNREFUSED/i.test(message)) return /* @__PURE__ */ new Error(`Failed to connect to existing browser: ${message}. Chrome is not currently listening at ${target}. DevToolsActivePort may be stale after a browser restart. Re-open Chrome, confirm remote debugging is enabled at chrome://inspect/#remote-debugging, click Allow if prompted, and retry.`);
|
|
1469
|
+
return error instanceof Error ? error : /* @__PURE__ */ new Error(`Failed to connect to existing browser: ${message}`);
|
|
1470
|
+
}
|
|
1471
|
+
function buildConnectTimeoutError(target, endpointOrOptions, timeoutMs) {
|
|
1472
|
+
const baseMessage = `Timed out after ${timeoutMs}ms while connecting to existing browser: ${target}. The CDP handshake did not complete in time.`;
|
|
1473
|
+
if (isAutoConnectRequest(endpointOrOptions)) return /* @__PURE__ */ new Error(`${baseMessage} If Chrome prompted for remote debugging approval, click Allow in Chrome and then retry the tool call.`);
|
|
1474
|
+
return /* @__PURE__ */ new Error(`${baseMessage} Verify that the browser debugging endpoint is reachable and retry.`);
|
|
1475
|
+
}
|
|
1476
|
+
function shouldAttemptPlaywrightFallback(error) {
|
|
1477
|
+
const message = getUnknownErrorMessage(error);
|
|
1478
|
+
if (/ECONNREFUSED|ENOTFOUND|404|stale/i.test(message)) return false;
|
|
1479
|
+
return /timed out|handshake|Protocol error|Target closed|ECONNRESET|socket hang up|WebSocket/i.test(message);
|
|
1480
|
+
}
|
|
1481
|
+
async function connectWithPlaywrightFallbackImpl(connectOptions, primaryError, timeoutMs) {
|
|
1482
|
+
const endpoint = connectOptions.browserWSEndpoint ?? connectOptions.browserURL;
|
|
1483
|
+
if (!endpoint) throw primaryError instanceof Error ? primaryError : new Error(String(primaryError));
|
|
1484
|
+
logger.warn(`[connect-fallback] Rebrowser connect failed. Falling back to Playwright CDP compatibility mode for ${endpoint}.`);
|
|
1485
|
+
try {
|
|
1486
|
+
return await connectPlaywrightCdpFallback(endpoint, timeoutMs);
|
|
1487
|
+
} catch (fallbackError) {
|
|
1488
|
+
const primaryMessage = getUnknownErrorMessage(primaryError);
|
|
1489
|
+
const fallbackMessage = getUnknownErrorMessage(fallbackError);
|
|
1490
|
+
throw new Error(`Failed to connect to existing browser via both rebrowser-puppeteer and Playwright CDP compatibility fallback. Primary error: ${primaryMessage}. Fallback error: ${fallbackMessage}.`, { cause: fallbackError });
|
|
1491
|
+
}
|
|
1492
|
+
}
|
|
1493
|
+
async function connectWithTimeoutImpl(connectOptions, target, endpointOrOptions, timeoutMs, connectAttemptRef) {
|
|
1494
|
+
const attemptId = ++connectAttemptRef.current;
|
|
1495
|
+
try {
|
|
1496
|
+
return await new Promise((resolve, reject) => {
|
|
1497
|
+
let settled = false;
|
|
1498
|
+
const timer = setTimeout(() => {
|
|
1499
|
+
settled = true;
|
|
1500
|
+
if (connectAttemptRef.current === attemptId) connectAttemptRef.current += 1;
|
|
1501
|
+
reject(buildConnectTimeoutError(target, endpointOrOptions, timeoutMs));
|
|
1502
|
+
}, timeoutMs);
|
|
1503
|
+
connect({
|
|
1504
|
+
...connectOptions,
|
|
1505
|
+
defaultViewport: null
|
|
1506
|
+
}).then(async (browser) => {
|
|
1507
|
+
if (settled || connectAttemptRef.current !== attemptId) {
|
|
1508
|
+
try {
|
|
1509
|
+
await browser.disconnect();
|
|
1510
|
+
} catch {}
|
|
1511
|
+
return;
|
|
1512
|
+
}
|
|
1513
|
+
settled = true;
|
|
1514
|
+
clearTimeout(timer);
|
|
1515
|
+
resolve(browser);
|
|
1516
|
+
}).catch((error) => {
|
|
1517
|
+
if (settled || connectAttemptRef.current !== attemptId) return;
|
|
1518
|
+
settled = true;
|
|
1519
|
+
clearTimeout(timer);
|
|
1520
|
+
reject(normalizeConnectError(error, target, endpointOrOptions));
|
|
1521
|
+
});
|
|
1522
|
+
});
|
|
1523
|
+
} catch (error) {
|
|
1524
|
+
if (!shouldAttemptPlaywrightFallback(error)) throw error;
|
|
1525
|
+
return await connectWithPlaywrightFallbackImpl(connectOptions, error, timeoutMs);
|
|
1526
|
+
}
|
|
1527
|
+
}
|
|
1528
|
+
//#endregion
|
|
1529
|
+
//#region src/modules/collector/CodeCollectorFileQueryInternal.ts
|
|
1530
|
+
function getCollectedFilesSummaryImpl(collectedFilesCache) {
|
|
1531
|
+
const summaries = Array.from(collectedFilesCache.values()).map((file) => ({
|
|
1532
|
+
url: file.url,
|
|
1533
|
+
size: file.size,
|
|
1534
|
+
type: file.type,
|
|
1535
|
+
truncated: typeof file.metadata?.truncated === "boolean" ? file.metadata.truncated : void 0,
|
|
1536
|
+
originalSize: typeof file.metadata?.originalSize === "number" ? file.metadata.originalSize : void 0
|
|
1537
|
+
}));
|
|
1538
|
+
logger.info(`Returning summary of ${summaries.length} collected files`);
|
|
1539
|
+
return summaries;
|
|
1540
|
+
}
|
|
1541
|
+
function getFileByUrlImpl(collectedFilesCache, url) {
|
|
1542
|
+
const file = collectedFilesCache.get(url);
|
|
1543
|
+
if (file) {
|
|
1544
|
+
logger.info(`Returning file: ${url} (${(file.size / 1024).toFixed(2)} KB)`);
|
|
1545
|
+
return file;
|
|
1546
|
+
}
|
|
1547
|
+
logger.warn(`File not found: ${url}`);
|
|
1548
|
+
return null;
|
|
1549
|
+
}
|
|
1550
|
+
function getFilesByPatternImpl(collectedFilesCache, pattern, limit, maxTotalSize) {
|
|
1551
|
+
const regex = new RegExp(pattern);
|
|
1552
|
+
const matched = [];
|
|
1553
|
+
for (const file of collectedFilesCache.values()) if (regex.test(file.url)) matched.push(file);
|
|
1554
|
+
const returned = [];
|
|
1555
|
+
let totalSize = 0;
|
|
1556
|
+
let truncated = false;
|
|
1557
|
+
for (let i = 0; i < matched.length && i < limit; i++) {
|
|
1558
|
+
const file = matched[i];
|
|
1559
|
+
if (file && totalSize + file.size <= maxTotalSize) {
|
|
1560
|
+
returned.push(file);
|
|
1561
|
+
totalSize += file.size;
|
|
1562
|
+
} else {
|
|
1563
|
+
truncated = true;
|
|
1564
|
+
break;
|
|
1565
|
+
}
|
|
1566
|
+
}
|
|
1567
|
+
if (truncated || matched.length > limit) logger.warn(`Pattern "${pattern}" matched ${matched.length} files, returning ${returned.length} (limited by size/count)`);
|
|
1568
|
+
logger.info(` Pattern "${pattern}": matched ${matched.length}, returning ${returned.length} files (${(totalSize / 1024).toFixed(2)} KB)`);
|
|
1569
|
+
return {
|
|
1570
|
+
files: returned,
|
|
1571
|
+
totalSize,
|
|
1572
|
+
matched: matched.length,
|
|
1573
|
+
returned: returned.length,
|
|
1574
|
+
truncated
|
|
1575
|
+
};
|
|
1576
|
+
}
|
|
1577
|
+
function getTopPriorityFilesImpl(collectedFilesCache, topN, maxTotalSize) {
|
|
1578
|
+
const allFiles = Array.from(collectedFilesCache.values());
|
|
1579
|
+
const scoredFiles = allFiles.map((file) => ({
|
|
1580
|
+
file,
|
|
1581
|
+
score: calculatePriorityScore(file)
|
|
1582
|
+
}));
|
|
1583
|
+
scoredFiles.sort((a, b) => b.score - a.score);
|
|
1584
|
+
const selected = [];
|
|
1585
|
+
let totalSize = 0;
|
|
1586
|
+
for (let i = 0; i < Math.min(topN, scoredFiles.length); i++) {
|
|
1587
|
+
const item = scoredFiles[i];
|
|
1588
|
+
if (item?.file && totalSize + item.file.size <= maxTotalSize) {
|
|
1589
|
+
selected.push(item.file);
|
|
1590
|
+
totalSize += item.file.size;
|
|
1591
|
+
} else break;
|
|
1592
|
+
}
|
|
1593
|
+
logger.info(`Returning top ${selected.length}/${allFiles.length} priority files (${(totalSize / 1024).toFixed(2)} KB)`);
|
|
1594
|
+
return {
|
|
1595
|
+
files: selected,
|
|
1596
|
+
totalSize,
|
|
1597
|
+
totalFiles: allFiles.length
|
|
1598
|
+
};
|
|
1599
|
+
}
|
|
1600
|
+
//#endregion
|
|
1601
|
+
//#region src/modules/collector/CodeCollectorLaunchOptions.ts
|
|
1602
|
+
const DEFAULT_CHROME_ARGS = [
|
|
1603
|
+
"--no-sandbox",
|
|
1604
|
+
"--disable-setuid-sandbox",
|
|
1605
|
+
"--disable-dev-shm-usage",
|
|
1606
|
+
"--disable-blink-features=AutomationControlled",
|
|
1607
|
+
"--disable-extensions",
|
|
1608
|
+
"--disable-component-extensions-with-background-pages",
|
|
1609
|
+
"--disable-web-security",
|
|
1610
|
+
"--disable-features=IsolateOrigins,site-per-process",
|
|
1611
|
+
"--ignore-certificate-errors"
|
|
1612
|
+
];
|
|
1613
|
+
function normalizeStringArray(values) {
|
|
1614
|
+
if (!Array.isArray(values)) return [];
|
|
1615
|
+
return values.map((value) => value.trim()).filter((value) => value.length > 0);
|
|
1616
|
+
}
|
|
1617
|
+
function splitJsFlags(value) {
|
|
1618
|
+
return value.split(/\s+/u).map((flag) => flag.trim()).filter((flag) => flag.length > 0);
|
|
1619
|
+
}
|
|
1620
|
+
function dedupeArgs(args) {
|
|
1621
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1622
|
+
const deduped = [];
|
|
1623
|
+
for (const arg of args) {
|
|
1624
|
+
if (seen.has(arg)) continue;
|
|
1625
|
+
seen.add(arg);
|
|
1626
|
+
deduped.push(arg);
|
|
1627
|
+
}
|
|
1628
|
+
return deduped;
|
|
1629
|
+
}
|
|
1630
|
+
function mergeJsFlags(args, enableV8NativesSyntax) {
|
|
1631
|
+
const passthroughArgs = [];
|
|
1632
|
+
const jsFlags = [];
|
|
1633
|
+
for (let index = 0; index < args.length; index += 1) {
|
|
1634
|
+
const current = args[index];
|
|
1635
|
+
if (!current) continue;
|
|
1636
|
+
if (current === "--js-flags") {
|
|
1637
|
+
const next = args[index + 1];
|
|
1638
|
+
if (typeof next === "string") {
|
|
1639
|
+
jsFlags.push(...splitJsFlags(next));
|
|
1640
|
+
index += 1;
|
|
1641
|
+
continue;
|
|
1642
|
+
}
|
|
1643
|
+
}
|
|
1644
|
+
if (current.startsWith("--js-flags=")) {
|
|
1645
|
+
jsFlags.push(...splitJsFlags(current.slice(11)));
|
|
1646
|
+
continue;
|
|
1647
|
+
}
|
|
1648
|
+
passthroughArgs.push(current);
|
|
1649
|
+
}
|
|
1650
|
+
const normalizedJsFlags = dedupeArgs(jsFlags);
|
|
1651
|
+
const allowNativesSyntax = "--allow-natives-syntax";
|
|
1652
|
+
if (enableV8NativesSyntax !== false && !normalizedJsFlags.includes(allowNativesSyntax)) normalizedJsFlags.push(allowNativesSyntax);
|
|
1653
|
+
const filteredJsFlags = enableV8NativesSyntax === false ? normalizedJsFlags.filter((flag) => flag !== allowNativesSyntax) : normalizedJsFlags;
|
|
1654
|
+
return {
|
|
1655
|
+
args: filteredJsFlags.length > 0 ? [...dedupeArgs(passthroughArgs), `--js-flags=${filteredJsFlags.join(" ")}`] : dedupeArgs(passthroughArgs),
|
|
1656
|
+
v8NativeSyntaxEnabled: filteredJsFlags.includes(allowNativesSyntax)
|
|
1657
|
+
};
|
|
1658
|
+
}
|
|
1659
|
+
function resolveChromeLaunchOptions(config, overrides, executablePath, viewport) {
|
|
1660
|
+
const merged = mergeJsFlags([
|
|
1661
|
+
...normalizeStringArray(config.args),
|
|
1662
|
+
...normalizeStringArray(overrides?.args),
|
|
1663
|
+
...DEFAULT_CHROME_ARGS,
|
|
1664
|
+
`--window-size=${viewport.width},${viewport.height}`
|
|
1665
|
+
], overrides?.enableV8NativesSyntax);
|
|
1666
|
+
return {
|
|
1667
|
+
headless: overrides?.headless ?? config.headless,
|
|
1668
|
+
args: merged.args,
|
|
1669
|
+
executablePath,
|
|
1670
|
+
v8NativeSyntaxEnabled: merged.v8NativeSyntaxEnabled
|
|
1671
|
+
};
|
|
1672
|
+
}
|
|
1673
|
+
function sameChromeLaunchOptions(left, right) {
|
|
1674
|
+
if (!left) return false;
|
|
1675
|
+
if (left.headless !== right.headless || left.executablePath !== right.executablePath || left.v8NativeSyntaxEnabled !== right.v8NativeSyntaxEnabled || left.args.length !== right.args.length) return false;
|
|
1676
|
+
return left.args.every((arg, index) => arg === right.args[index]);
|
|
1677
|
+
}
|
|
1678
|
+
//#endregion
|
|
1679
|
+
//#region src/modules/collector/CodeCollector.ts
|
|
1680
|
+
var CodeCollector = class CodeCollector {
|
|
1681
|
+
config;
|
|
1682
|
+
browser = null;
|
|
1683
|
+
collectedUrls = /* @__PURE__ */ new Set();
|
|
1684
|
+
initPromise = null;
|
|
1685
|
+
collectLock = null;
|
|
1686
|
+
connectAttemptRef = { current: 0 };
|
|
1687
|
+
MAX_COLLECTED_URLS;
|
|
1688
|
+
MAX_FILES_PER_COLLECT;
|
|
1689
|
+
MAX_RESPONSE_SIZE;
|
|
1690
|
+
MAX_SINGLE_FILE_SIZE;
|
|
1691
|
+
CONNECT_TIMEOUT_MS;
|
|
1692
|
+
viewport;
|
|
1693
|
+
userAgent;
|
|
1694
|
+
collectedFilesCache = /* @__PURE__ */ new Map();
|
|
1695
|
+
cache;
|
|
1696
|
+
cacheEnabled = true;
|
|
1697
|
+
smartCollector;
|
|
1698
|
+
compressor;
|
|
1699
|
+
cdpSession = null;
|
|
1700
|
+
browserTargetSessionManager = null;
|
|
1701
|
+
cdpListeners = {};
|
|
1702
|
+
activePageIndex = null;
|
|
1703
|
+
currentHeadless = null;
|
|
1704
|
+
currentLaunchOptions = null;
|
|
1705
|
+
explicitlyClosed = false;
|
|
1706
|
+
connectedToExistingBrowser = false;
|
|
1707
|
+
/** PID of the Chrome child process launched by puppeteer, used for force-kill fallback. */
|
|
1708
|
+
chromePid = null;
|
|
1709
|
+
static BROWSER_CLOSE_TIMEOUT_MS = 5e3;
|
|
1710
|
+
constructor(config) {
|
|
1711
|
+
this.config = config;
|
|
1712
|
+
this.MAX_COLLECTED_URLS = config.maxCollectedUrls ?? 1e4;
|
|
1713
|
+
this.MAX_FILES_PER_COLLECT = config.maxFilesPerCollect ?? 200;
|
|
1714
|
+
this.MAX_RESPONSE_SIZE = config.maxTotalContentSize ?? 512 * 1024;
|
|
1715
|
+
this.MAX_SINGLE_FILE_SIZE = config.maxSingleFileSize ?? 200 * 1024;
|
|
1716
|
+
this.CONNECT_TIMEOUT_MS = Number(process.env.JSHOOK_CONNECT_TIMEOUT_MS) || 6e4;
|
|
1717
|
+
this.viewport = config.viewport ?? {
|
|
1718
|
+
width: 1920,
|
|
1719
|
+
height: 1080
|
|
1720
|
+
};
|
|
1721
|
+
this.userAgent = config.userAgent ?? "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
|
|
1722
|
+
this.cache = new CodeCache();
|
|
1723
|
+
this.smartCollector = new SmartCodeCollector();
|
|
1724
|
+
this.compressor = new CodeCompressor();
|
|
1725
|
+
logger.info(` CodeCollector limits: maxCollect=${this.MAX_FILES_PER_COLLECT} files, maxResponse=${(this.MAX_RESPONSE_SIZE / 1024).toFixed(0)}KB, maxSingle=${(this.MAX_SINGLE_FILE_SIZE / 1024).toFixed(0)}KB`);
|
|
1726
|
+
logger.info(` Strategy: Collect ALL files -> Cache -> Return summary/partial data to fit MCP limits`);
|
|
1727
|
+
}
|
|
1728
|
+
setCacheEnabled(enabled) {
|
|
1729
|
+
this.cacheEnabled = enabled;
|
|
1730
|
+
logger.info(`Code cache ${enabled ? "enabled" : "disabled"}`);
|
|
1731
|
+
}
|
|
1732
|
+
async clearFileCache() {
|
|
1733
|
+
await this.cache.clear();
|
|
1734
|
+
}
|
|
1735
|
+
async getFileCacheStats() {
|
|
1736
|
+
return await this.cache.getStats();
|
|
1737
|
+
}
|
|
1738
|
+
async clearAllData() {
|
|
1739
|
+
logger.info("Clearing all collected data...");
|
|
1740
|
+
await this.cache.clear();
|
|
1741
|
+
this.compressor.clearCache();
|
|
1742
|
+
this.compressor.resetStats();
|
|
1743
|
+
this.collectedUrls.clear();
|
|
1744
|
+
this.collectedFilesCache.clear();
|
|
1745
|
+
logger.success("All data cleared");
|
|
1746
|
+
}
|
|
1747
|
+
async getAllStats() {
|
|
1748
|
+
return {
|
|
1749
|
+
cache: await this.cache.getStats(),
|
|
1750
|
+
compression: {
|
|
1751
|
+
...this.compressor.getStats(),
|
|
1752
|
+
cacheSize: this.compressor.getCacheSize()
|
|
1753
|
+
},
|
|
1754
|
+
collector: {
|
|
1755
|
+
collectedUrls: this.collectedUrls.size,
|
|
1756
|
+
maxCollectedUrls: this.MAX_COLLECTED_URLS
|
|
1757
|
+
}
|
|
1758
|
+
};
|
|
1759
|
+
}
|
|
1760
|
+
getCache() {
|
|
1761
|
+
return this.cache;
|
|
1762
|
+
}
|
|
1763
|
+
getCompressor() {
|
|
1764
|
+
return this.compressor;
|
|
1765
|
+
}
|
|
1766
|
+
cleanupCollectedUrls() {
|
|
1767
|
+
if (this.collectedUrls.size > this.MAX_COLLECTED_URLS) {
|
|
1768
|
+
logger.warn(`Collected URLs exceeded ${this.MAX_COLLECTED_URLS}, clearing...`);
|
|
1769
|
+
const urls = Array.from(this.collectedUrls);
|
|
1770
|
+
this.collectedUrls.clear();
|
|
1771
|
+
urls.slice(-Math.floor(this.MAX_COLLECTED_URLS / 2)).forEach((url) => this.collectedUrls.add(url));
|
|
1772
|
+
}
|
|
1773
|
+
}
|
|
1774
|
+
async init(headless) {
|
|
1775
|
+
await this.launch(headless === void 0 ? void 0 : { headless });
|
|
1776
|
+
}
|
|
1777
|
+
async launch(overrides) {
|
|
1778
|
+
if (this.initPromise) await this.initPromise;
|
|
1779
|
+
const executablePath = this.resolveExecutablePath();
|
|
1780
|
+
const launchOptions = resolveChromeLaunchOptions(this.config, overrides, executablePath, this.viewport);
|
|
1781
|
+
if (this.browser && overrides === void 0) {
|
|
1782
|
+
this.explicitlyClosed = false;
|
|
1783
|
+
return {
|
|
1784
|
+
action: "reused",
|
|
1785
|
+
launchOptions: this.currentLaunchOptions ?? launchOptions
|
|
1786
|
+
};
|
|
1787
|
+
}
|
|
1788
|
+
if (this.browser && !this.connectedToExistingBrowser && sameChromeLaunchOptions(this.currentLaunchOptions, launchOptions)) {
|
|
1789
|
+
this.explicitlyClosed = false;
|
|
1790
|
+
return {
|
|
1791
|
+
action: "reused",
|
|
1792
|
+
launchOptions
|
|
1793
|
+
};
|
|
1794
|
+
}
|
|
1795
|
+
const action = this.browser ? "relaunched" : "launched";
|
|
1796
|
+
const reason = this.browser ? this.connectedToExistingBrowser ? "replacing-existing-browser-connection" : "launch-options-changed" : void 0;
|
|
1797
|
+
this.explicitlyClosed = false;
|
|
1798
|
+
this.initPromise = this.launchInner(launchOptions);
|
|
1799
|
+
try {
|
|
1800
|
+
await this.initPromise;
|
|
1801
|
+
} finally {
|
|
1802
|
+
this.initPromise = null;
|
|
1803
|
+
}
|
|
1804
|
+
return {
|
|
1805
|
+
action,
|
|
1806
|
+
launchOptions,
|
|
1807
|
+
...reason ? { reason } : {}
|
|
1808
|
+
};
|
|
1809
|
+
}
|
|
1810
|
+
async launchInner(launchOptions) {
|
|
1811
|
+
if (this.browser) await this.disposeCurrentBrowser(false);
|
|
1812
|
+
const browserLaunchOptions = {
|
|
1813
|
+
headless: launchOptions.headless,
|
|
1814
|
+
args: launchOptions.args,
|
|
1815
|
+
defaultViewport: this.viewport,
|
|
1816
|
+
protocolTimeout: 6e4
|
|
1817
|
+
};
|
|
1818
|
+
if (launchOptions.executablePath) browserLaunchOptions.executablePath = launchOptions.executablePath;
|
|
1819
|
+
logger.info("Initializing browser with anti-detection...");
|
|
1820
|
+
this.browser = await launch(browserLaunchOptions);
|
|
1821
|
+
this.connectedToExistingBrowser = false;
|
|
1822
|
+
this.chromePid = this.browser.process()?.pid ?? null;
|
|
1823
|
+
if (this.chromePid) logger.debug(`Chrome child process PID: ${this.chromePid}`);
|
|
1824
|
+
this.currentHeadless = launchOptions.headless;
|
|
1825
|
+
this.currentLaunchOptions = launchOptions;
|
|
1826
|
+
this.browser.on("disconnected", () => {
|
|
1827
|
+
this.handleBrowserDisconnected();
|
|
1828
|
+
});
|
|
1829
|
+
logger.success("Browser initialized with enhanced anti-detection");
|
|
1830
|
+
}
|
|
1831
|
+
resolveExecutablePath() {
|
|
1832
|
+
const configuredPath = this.config.executablePath?.trim();
|
|
1833
|
+
if (configuredPath) {
|
|
1834
|
+
if (existsSync(configuredPath)) return configuredPath;
|
|
1835
|
+
throw new Error(`Configured browser executable was not found: ${configuredPath}. Set a valid executablePath or configure CHROME_PATH / PUPPETEER_EXECUTABLE_PATH / BROWSER_EXECUTABLE_PATH.`);
|
|
1836
|
+
}
|
|
1837
|
+
const detectedPath = findBrowserExecutable();
|
|
1838
|
+
if (detectedPath) return detectedPath;
|
|
1839
|
+
logger.info("No explicit browser executable configured. Falling back to Puppeteer-managed browser resolution.");
|
|
1840
|
+
}
|
|
1841
|
+
handleBrowserDisconnected() {
|
|
1842
|
+
logger.warn("Browser disconnected");
|
|
1843
|
+
this.browser = null;
|
|
1844
|
+
this.currentHeadless = null;
|
|
1845
|
+
this.currentLaunchOptions = null;
|
|
1846
|
+
this.connectedToExistingBrowser = false;
|
|
1847
|
+
this.chromePid = null;
|
|
1848
|
+
this.browserTargetSessionManager?.dispose();
|
|
1849
|
+
this.browserTargetSessionManager = null;
|
|
1850
|
+
if (this.cdpSession) {
|
|
1851
|
+
this.cdpSession = null;
|
|
1852
|
+
this.cdpListeners = {};
|
|
1853
|
+
}
|
|
1854
|
+
}
|
|
1855
|
+
async disposeCurrentBrowser(markExplicitlyClosed) {
|
|
1856
|
+
await this.clearAllData();
|
|
1857
|
+
this.explicitlyClosed = markExplicitlyClosed;
|
|
1858
|
+
this.activePageIndex = null;
|
|
1859
|
+
const browser = this.browser;
|
|
1860
|
+
const disconnectOnly = this.connectedToExistingBrowser;
|
|
1861
|
+
const pid = this.chromePid;
|
|
1862
|
+
this.browser = null;
|
|
1863
|
+
this.currentHeadless = null;
|
|
1864
|
+
this.currentLaunchOptions = null;
|
|
1865
|
+
this.connectedToExistingBrowser = false;
|
|
1866
|
+
this.chromePid = null;
|
|
1867
|
+
await this.browserTargetSessionManager?.dispose();
|
|
1868
|
+
this.browserTargetSessionManager = null;
|
|
1869
|
+
if (this.cdpSession) {
|
|
1870
|
+
this.cdpSession = null;
|
|
1871
|
+
this.cdpListeners = {};
|
|
1872
|
+
}
|
|
1873
|
+
if (browser) if (disconnectOnly) await browser.disconnect();
|
|
1874
|
+
else await this.closeBrowserWithForceKill(browser, pid);
|
|
1875
|
+
}
|
|
1876
|
+
async close() {
|
|
1877
|
+
await this.disposeCurrentBrowser(true);
|
|
1878
|
+
logger.info("Browser closed and all data cleared");
|
|
1879
|
+
}
|
|
1880
|
+
/**
|
|
1881
|
+
* Close browser with a timeout guard. If browser.close() hangs or fails,
|
|
1882
|
+
* force-kill the Chrome child process by PID to prevent zombie processes.
|
|
1883
|
+
*/
|
|
1884
|
+
async closeBrowserWithForceKill(browser, pid) {
|
|
1885
|
+
try {
|
|
1886
|
+
await Promise.race([browser.close(), new Promise((_, reject) => setTimeout(() => reject(/* @__PURE__ */ new Error("browser.close() timed out")), CodeCollector.BROWSER_CLOSE_TIMEOUT_MS))]);
|
|
1887
|
+
} catch (error) {
|
|
1888
|
+
logger.warn("browser.close() failed or timed out, attempting force-kill:", error);
|
|
1889
|
+
CodeCollector.forceKillPid(pid);
|
|
1890
|
+
}
|
|
1891
|
+
}
|
|
1892
|
+
/** Force-kill a process by PID. Safe to call with null/invalid PIDs. */
|
|
1893
|
+
static forceKillPid(pid) {
|
|
1894
|
+
if (!pid) return;
|
|
1895
|
+
try {
|
|
1896
|
+
process.kill(pid, "SIGKILL");
|
|
1897
|
+
logger.info(`Force-killed Chrome process PID ${pid}`);
|
|
1898
|
+
} catch (error) {
|
|
1899
|
+
if (error.code !== "ESRCH") logger.warn(`Failed to force-kill Chrome PID ${pid}:`, error);
|
|
1900
|
+
}
|
|
1901
|
+
}
|
|
1902
|
+
/** Get the tracked Chrome child process PID (null if not launched or already closed). */
|
|
1903
|
+
getChromePid() {
|
|
1904
|
+
return this.chromePid;
|
|
1905
|
+
}
|
|
1906
|
+
getPageTargets() {
|
|
1907
|
+
if (!this.browser) return [];
|
|
1908
|
+
return this.browser.targets().filter((target) => target.type() === "page");
|
|
1909
|
+
}
|
|
1910
|
+
async resolvePageTargetHandle(target, timeoutMs = 5e3) {
|
|
1911
|
+
const page = await Promise.race([target.page(), new Promise((_, reject) => {
|
|
1912
|
+
setTimeout(() => {
|
|
1913
|
+
reject(new PrerequisiteError(`Timed out after ${timeoutMs}ms while resolving a Puppeteer Page handle from the attached Chrome target.`));
|
|
1914
|
+
}, timeoutMs);
|
|
1915
|
+
})]);
|
|
1916
|
+
if (!page) throw new PrerequisiteError("Attached browser target does not expose a Puppeteer Page handle in the current Chrome remote debugging mode.");
|
|
1917
|
+
return page;
|
|
1918
|
+
}
|
|
1919
|
+
isExistingBrowserConnection() {
|
|
1920
|
+
return this.connectedToExistingBrowser;
|
|
1921
|
+
}
|
|
1922
|
+
async getActivePage() {
|
|
1923
|
+
if (!this.browser) {
|
|
1924
|
+
if (this.explicitlyClosed) throw new PrerequisiteError("Browser was explicitly closed. Call browser_launch or browser_attach first.");
|
|
1925
|
+
try {
|
|
1926
|
+
await this.init();
|
|
1927
|
+
} catch (error) {
|
|
1928
|
+
throw new PrerequisiteError(`Browser not available: ${error instanceof Error ? error.message : String(error)}`);
|
|
1929
|
+
}
|
|
1930
|
+
}
|
|
1931
|
+
const pageTargets = this.getPageTargets();
|
|
1932
|
+
if (pageTargets.length === 0) return await this.browser.newPage();
|
|
1933
|
+
if (this.activePageIndex !== null && this.activePageIndex < pageTargets.length) return await this.resolvePageTargetHandle(pageTargets[this.activePageIndex]);
|
|
1934
|
+
const lastTarget = pageTargets[pageTargets.length - 1];
|
|
1935
|
+
if (!lastTarget) throw new Error("Failed to get active page");
|
|
1936
|
+
return await this.resolvePageTargetHandle(lastTarget);
|
|
1937
|
+
}
|
|
1938
|
+
async getActivePageIndex() {
|
|
1939
|
+
const activePage = await this.getActivePage();
|
|
1940
|
+
const resolvedPages = await this.listResolvedPages();
|
|
1941
|
+
const exactMatch = resolvedPages.find((entry) => entry.page === activePage);
|
|
1942
|
+
if (exactMatch) return exactMatch.index;
|
|
1943
|
+
const activeUrl = activePage.url();
|
|
1944
|
+
return resolvedPages.find((entry) => entry.url === activeUrl)?.index ?? null;
|
|
1945
|
+
}
|
|
1946
|
+
async listPages() {
|
|
1947
|
+
if (!this.browser) return [];
|
|
1948
|
+
return this.getPageTargets().map((target, index) => ({
|
|
1949
|
+
index,
|
|
1950
|
+
url: target.url(),
|
|
1951
|
+
title: ""
|
|
1952
|
+
}));
|
|
1953
|
+
}
|
|
1954
|
+
async listResolvedPages(timeoutMs = 1500) {
|
|
1955
|
+
if (!this.browser) return [];
|
|
1956
|
+
const targets = this.getPageTargets();
|
|
1957
|
+
return (await Promise.all(targets.map(async (target, index) => {
|
|
1958
|
+
try {
|
|
1959
|
+
const page = await this.resolvePageTargetHandle(target, timeoutMs);
|
|
1960
|
+
let title = "";
|
|
1961
|
+
try {
|
|
1962
|
+
title = await Promise.race([page.title(), new Promise((resolve) => {
|
|
1963
|
+
setTimeout(() => resolve(""), timeoutMs);
|
|
1964
|
+
})]);
|
|
1965
|
+
} catch {
|
|
1966
|
+
title = "";
|
|
1967
|
+
}
|
|
1968
|
+
return {
|
|
1969
|
+
index,
|
|
1970
|
+
url: target.url(),
|
|
1971
|
+
title,
|
|
1972
|
+
page
|
|
1973
|
+
};
|
|
1974
|
+
} catch {
|
|
1975
|
+
return null;
|
|
1976
|
+
}
|
|
1977
|
+
}))).filter((page) => page !== null);
|
|
1978
|
+
}
|
|
1979
|
+
async selectPage(index) {
|
|
1980
|
+
if (!this.browser) throw new Error("Browser not connected");
|
|
1981
|
+
const pages = await this.listPages();
|
|
1982
|
+
if (index < 0 || index >= pages.length) throw new Error(`Page index ${index} out of range (0-${pages.length - 1})`);
|
|
1983
|
+
this.activePageIndex = index;
|
|
1984
|
+
logger.info(`Active page set to index ${index}: ${pages[index].url}`);
|
|
1985
|
+
}
|
|
1986
|
+
async createPage(url) {
|
|
1987
|
+
if (!this.browser) await this.init();
|
|
1988
|
+
const page = await this.browser.newPage();
|
|
1989
|
+
await page.setUserAgent(this.userAgent);
|
|
1990
|
+
await this.applyAntiDetection(page);
|
|
1991
|
+
if (url) await page.goto(url, {
|
|
1992
|
+
waitUntil: "networkidle2",
|
|
1993
|
+
timeout: this.config.timeout
|
|
1994
|
+
});
|
|
1995
|
+
logger.info(`New page created${url ? `: ${url}` : ""}`);
|
|
1996
|
+
return page;
|
|
1997
|
+
}
|
|
1998
|
+
async applyAntiDetection(page) {
|
|
1999
|
+
await page.evaluateOnNewDocument(() => {
|
|
2000
|
+
Object.defineProperty(navigator, "webdriver", { get: () => false });
|
|
2001
|
+
Object.defineProperty(navigator, "plugins", { get: () => [
|
|
2002
|
+
1,
|
|
2003
|
+
2,
|
|
2004
|
+
3,
|
|
2005
|
+
4,
|
|
2006
|
+
5
|
|
2007
|
+
] });
|
|
2008
|
+
Object.defineProperty(navigator, "languages", { get: () => ["en-US", "en"] });
|
|
2009
|
+
const win = window;
|
|
2010
|
+
if (!win.chrome) win.chrome = {
|
|
2011
|
+
runtime: {},
|
|
2012
|
+
loadTimes: function() {},
|
|
2013
|
+
csi: function() {},
|
|
2014
|
+
app: {}
|
|
2015
|
+
};
|
|
2016
|
+
const originalQuery = window.navigator.permissions.query;
|
|
2017
|
+
window.navigator.permissions.query = (parameters) => {
|
|
2018
|
+
if (parameters.name === "notifications") return Promise.resolve({ state: "denied" });
|
|
2019
|
+
return originalQuery(parameters);
|
|
2020
|
+
};
|
|
2021
|
+
});
|
|
2022
|
+
}
|
|
2023
|
+
async getStatus() {
|
|
2024
|
+
if (!this.browser) return {
|
|
2025
|
+
running: false,
|
|
2026
|
+
pagesCount: 0
|
|
2027
|
+
};
|
|
2028
|
+
try {
|
|
2029
|
+
const version = await this.browser.version();
|
|
2030
|
+
return {
|
|
2031
|
+
running: true,
|
|
2032
|
+
pagesCount: this.getPageTargets().length,
|
|
2033
|
+
version,
|
|
2034
|
+
effectiveHeadless: this.currentHeadless ?? void 0,
|
|
2035
|
+
launchSource: this.connectedToExistingBrowser ? "attached" : "launched",
|
|
2036
|
+
v8NativeSyntaxEnabled: this.currentLaunchOptions?.v8NativeSyntaxEnabled,
|
|
2037
|
+
launchArgs: this.currentLaunchOptions?.args ? [...this.currentLaunchOptions.args] : []
|
|
2038
|
+
};
|
|
2039
|
+
} catch (error) {
|
|
2040
|
+
logger.debug("Browser not running or disconnected:", error);
|
|
2041
|
+
return {
|
|
2042
|
+
running: false,
|
|
2043
|
+
pagesCount: 0
|
|
2044
|
+
};
|
|
2045
|
+
}
|
|
2046
|
+
}
|
|
2047
|
+
async collect(options) {
|
|
2048
|
+
while (this.collectLock) try {
|
|
2049
|
+
await this.collectLock;
|
|
2050
|
+
} catch {}
|
|
2051
|
+
let resolve;
|
|
2052
|
+
let reject;
|
|
2053
|
+
this.collectLock = new Promise((res, rej) => {
|
|
2054
|
+
resolve = res;
|
|
2055
|
+
reject = rej;
|
|
2056
|
+
});
|
|
2057
|
+
try {
|
|
2058
|
+
const result = await this.collectInner(options);
|
|
2059
|
+
resolve(result);
|
|
2060
|
+
return result;
|
|
2061
|
+
} catch (e) {
|
|
2062
|
+
reject(e);
|
|
2063
|
+
throw e;
|
|
2064
|
+
} finally {
|
|
2065
|
+
this.collectLock = null;
|
|
2066
|
+
}
|
|
2067
|
+
}
|
|
2068
|
+
async collectInner(options) {
|
|
2069
|
+
return collectInnerImpl(this, options);
|
|
2070
|
+
}
|
|
2071
|
+
shouldCollectUrl(url, filterRules) {
|
|
2072
|
+
return shouldCollectUrlImpl(url, filterRules);
|
|
2073
|
+
}
|
|
2074
|
+
async navigateWithRetry(page, url, options, maxRetries = 3) {
|
|
2075
|
+
return navigateWithRetryImpl(page, url, options, maxRetries);
|
|
2076
|
+
}
|
|
2077
|
+
async getPerformanceMetrics(page) {
|
|
2078
|
+
return getPerformanceMetricsImpl(page);
|
|
2079
|
+
}
|
|
2080
|
+
async collectPageMetadata(page) {
|
|
2081
|
+
return collectPageMetadataImpl(page);
|
|
2082
|
+
}
|
|
2083
|
+
async resolveConnectOptions(endpointOrOptions) {
|
|
2084
|
+
return resolveConnectOptionsImpl(endpointOrOptions);
|
|
2085
|
+
}
|
|
2086
|
+
async connectWithTimeout(connectOptions, target, endpointOrOptions) {
|
|
2087
|
+
return connectWithTimeoutImpl(connectOptions, target, endpointOrOptions, this.CONNECT_TIMEOUT_MS, this.connectAttemptRef);
|
|
2088
|
+
}
|
|
2089
|
+
async connect(endpointOrOptions) {
|
|
2090
|
+
this.explicitlyClosed = false;
|
|
2091
|
+
if (this.browser || this.browserTargetSessionManager || this.cdpSession) await this.disposeCurrentBrowser(false);
|
|
2092
|
+
const connectOptions = await this.resolveConnectOptions(endpointOrOptions);
|
|
2093
|
+
const target = connectOptions.browserWSEndpoint ?? connectOptions.browserURL ?? "auto-detected Chrome debugging endpoint";
|
|
2094
|
+
logger.info(`Connecting to existing browser: ${target}`);
|
|
2095
|
+
this.browser = await this.connectWithTimeout(connectOptions, target, endpointOrOptions);
|
|
2096
|
+
this.connectedToExistingBrowser = true;
|
|
2097
|
+
this.currentLaunchOptions = null;
|
|
2098
|
+
this.browser.on("disconnected", () => {
|
|
2099
|
+
this.handleBrowserDisconnected();
|
|
2100
|
+
});
|
|
2101
|
+
logger.success("Connected to existing browser successfully");
|
|
2102
|
+
}
|
|
2103
|
+
getBrowser() {
|
|
2104
|
+
return this.browser;
|
|
2105
|
+
}
|
|
2106
|
+
getBrowserTargetSessionManager() {
|
|
2107
|
+
if (!this.browserTargetSessionManager) this.browserTargetSessionManager = new BrowserTargetSessionManager(() => this.browser);
|
|
2108
|
+
return this.browserTargetSessionManager;
|
|
2109
|
+
}
|
|
2110
|
+
async listCdpTargets(filters) {
|
|
2111
|
+
return await this.getBrowserTargetSessionManager().listTargets(filters);
|
|
2112
|
+
}
|
|
2113
|
+
async attachCdpTarget(targetId) {
|
|
2114
|
+
return await this.getBrowserTargetSessionManager().attach(targetId);
|
|
2115
|
+
}
|
|
2116
|
+
async detachCdpTarget() {
|
|
2117
|
+
return await this.getBrowserTargetSessionManager().detach();
|
|
2118
|
+
}
|
|
2119
|
+
getAttachedTargetSession() {
|
|
2120
|
+
return this.browserTargetSessionManager?.getAttachedTargetSession() ?? null;
|
|
2121
|
+
}
|
|
2122
|
+
getAttachedTargetInfo() {
|
|
2123
|
+
return this.browserTargetSessionManager?.getAttachedTargetInfo() ?? null;
|
|
2124
|
+
}
|
|
2125
|
+
getCollectionStats() {
|
|
2126
|
+
return {
|
|
2127
|
+
totalCollected: this.collectedUrls.size,
|
|
2128
|
+
uniqueUrls: this.collectedUrls.size
|
|
2129
|
+
};
|
|
2130
|
+
}
|
|
2131
|
+
clearCache() {
|
|
2132
|
+
this.collectedUrls.clear();
|
|
2133
|
+
logger.info("Collection cache cleared");
|
|
2134
|
+
}
|
|
2135
|
+
getCollectedFilesSummary() {
|
|
2136
|
+
return getCollectedFilesSummaryImpl(this.collectedFilesCache);
|
|
2137
|
+
}
|
|
2138
|
+
getFileByUrl(url) {
|
|
2139
|
+
return getFileByUrlImpl(this.collectedFilesCache, url);
|
|
2140
|
+
}
|
|
2141
|
+
getFilesByPattern(pattern, limit = 20, maxTotalSize = this.MAX_RESPONSE_SIZE) {
|
|
2142
|
+
return getFilesByPatternImpl(this.collectedFilesCache, pattern, limit, maxTotalSize);
|
|
2143
|
+
}
|
|
2144
|
+
getTopPriorityFiles(topN = 10, maxTotalSize = this.MAX_RESPONSE_SIZE) {
|
|
2145
|
+
return getTopPriorityFilesImpl(this.collectedFilesCache, topN, maxTotalSize);
|
|
2146
|
+
}
|
|
2147
|
+
clearCollectedFilesCache() {
|
|
2148
|
+
const count = this.collectedFilesCache.size;
|
|
2149
|
+
this.collectedFilesCache.clear();
|
|
2150
|
+
logger.info(`Cleared collected files cache (${count} files)`);
|
|
2151
|
+
}
|
|
2152
|
+
};
|
|
2153
|
+
//#endregion
|
|
2154
|
+
//#region src/modules/collector/DOMInspector.evaluations.ts
|
|
2155
|
+
const serializeForEvaluation = (value) => value === void 0 ? "undefined" : JSON.stringify(value);
|
|
2156
|
+
const SHADOW_DOM_WALKER_SCRIPT = `
|
|
2157
|
+
const walkShadowRoots = () => {
|
|
2158
|
+
const roots = [document];
|
|
2159
|
+
const queue = [document];
|
|
2160
|
+
let shadowRootCount = 0;
|
|
2161
|
+
while (queue.length > 0) {
|
|
2162
|
+
const root = queue.shift();
|
|
2163
|
+
if (!root) continue;
|
|
2164
|
+
for (const element of Array.from(root.querySelectorAll('*'))) {
|
|
2165
|
+
const shadowRoot = element.shadowRoot;
|
|
2166
|
+
if (shadowRoot) {
|
|
2167
|
+
roots.push(shadowRoot);
|
|
2168
|
+
queue.push(shadowRoot);
|
|
2169
|
+
shadowRootCount += 1;
|
|
2170
|
+
}
|
|
2171
|
+
}
|
|
2172
|
+
}
|
|
2173
|
+
return { roots, shadowRootCount };
|
|
2174
|
+
};
|
|
2175
|
+
`.trim();
|
|
2176
|
+
function buildQueryAllEvaluation(selector, limit) {
|
|
2177
|
+
return `
|
|
2178
|
+
${SHADOW_DOM_WALKER_SCRIPT}
|
|
2179
|
+
const selector = ${serializeForEvaluation(selector)};
|
|
2180
|
+
const maxLimit = ${serializeForEvaluation(limit)};
|
|
2181
|
+
const { roots, shadowRootCount } = walkShadowRoots();
|
|
2182
|
+
const seen = new Set();
|
|
2183
|
+
const results = [];
|
|
2184
|
+
let totalMatches = 0;
|
|
2185
|
+
for (const root of roots) {
|
|
2186
|
+
const nodeList = Array.from(root.querySelectorAll(selector));
|
|
2187
|
+
totalMatches += nodeList.length;
|
|
2188
|
+
for (const element of nodeList) {
|
|
2189
|
+
if (seen.has(element)) continue;
|
|
2190
|
+
seen.add(element);
|
|
2191
|
+
const attributes = {};
|
|
2192
|
+
for (const attr of Array.from(element.attributes)) {
|
|
2193
|
+
attributes[attr.name] = attr.value;
|
|
2194
|
+
}
|
|
2195
|
+
const rect = element.getBoundingClientRect();
|
|
2196
|
+
const style = window.getComputedStyle(element);
|
|
2197
|
+
const textContent = element.textContent?.trim() || '';
|
|
2198
|
+
results.push({
|
|
2199
|
+
found: true,
|
|
2200
|
+
nodeName: element.nodeName,
|
|
2201
|
+
attributes,
|
|
2202
|
+
textContent: textContent.length > 500 ? textContent.substring(0, 500) + '...[truncated]' : textContent,
|
|
2203
|
+
boundingBox: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
|
|
2204
|
+
visible: style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0',
|
|
2205
|
+
});
|
|
2206
|
+
if (results.length >= maxLimit) break;
|
|
2207
|
+
}
|
|
2208
|
+
if (results.length >= maxLimit) break;
|
|
2209
|
+
}
|
|
2210
|
+
if (totalMatches > maxLimit) {
|
|
2211
|
+
console.warn('[DOMInspector] Found ' + totalMatches + ' elements for "' + selector + '", limiting to ' + maxLimit);
|
|
2212
|
+
}
|
|
2213
|
+
return { elements: results, diagnostics: { readyState: document.readyState, shadowRootCount } };
|
|
2214
|
+
`.trim();
|
|
2215
|
+
}
|
|
2216
|
+
function buildFindClickableEvaluation(filterText) {
|
|
2217
|
+
return `
|
|
2218
|
+
${SHADOW_DOM_WALKER_SCRIPT}
|
|
2219
|
+
const filter = ${serializeForEvaluation(filterText)};
|
|
2220
|
+
const normalizedFilter = filter?.toLowerCase();
|
|
2221
|
+
const { roots, shadowRootCount } = walkShadowRoots();
|
|
2222
|
+
const results = [];
|
|
2223
|
+
const seen = new Set();
|
|
2224
|
+
const appendClickable = (element, type, fallbackSelector) => {
|
|
2225
|
+
if (seen.has(element)) return;
|
|
2226
|
+
seen.add(element);
|
|
2227
|
+
const text = element.textContent?.trim() || (element.value ?? '').trim() || '';
|
|
2228
|
+
if (normalizedFilter && !text.toLowerCase().includes(normalizedFilter)) return;
|
|
2229
|
+
const rect = element.getBoundingClientRect();
|
|
2230
|
+
const style = window.getComputedStyle(element);
|
|
2231
|
+
let selector = fallbackSelector;
|
|
2232
|
+
if (element.id) selector = '#' + element.id;
|
|
2233
|
+
else if (element.className) selector = fallbackSelector + '.' + element.className.split(' ')[0];
|
|
2234
|
+
results.push({
|
|
2235
|
+
selector,
|
|
2236
|
+
text,
|
|
2237
|
+
type,
|
|
2238
|
+
visible:
|
|
2239
|
+
style.display !== 'none' &&
|
|
2240
|
+
style.visibility !== 'hidden' &&
|
|
2241
|
+
style.opacity !== '0' &&
|
|
2242
|
+
rect.width > 0 &&
|
|
2243
|
+
rect.height > 0,
|
|
2244
|
+
boundingBox: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
|
|
2245
|
+
});
|
|
2246
|
+
};
|
|
2247
|
+
for (const root of roots) {
|
|
2248
|
+
root
|
|
2249
|
+
.querySelectorAll('button, input[type="button"], input[type="submit"], input[type="reset"]')
|
|
2250
|
+
.forEach((button) => appendClickable(button, 'button', button.tagName.toLowerCase()));
|
|
2251
|
+
root.querySelectorAll('a[href]').forEach((link) => appendClickable(link, 'link', 'a'));
|
|
2252
|
+
}
|
|
2253
|
+
return { elements: results, diagnostics: { readyState: document.readyState, shadowRootCount } };
|
|
2254
|
+
`.trim();
|
|
2255
|
+
}
|
|
2256
|
+
function querySelectorEvaluation(selector) {
|
|
2257
|
+
const element = document.querySelector(selector);
|
|
2258
|
+
if (!element) return { found: false };
|
|
2259
|
+
const attributes = {};
|
|
2260
|
+
for (const attr of Array.from(element.attributes)) attributes[attr.name] = attr.value;
|
|
2261
|
+
const rect = element.getBoundingClientRect();
|
|
2262
|
+
const style = window.getComputedStyle(element);
|
|
2263
|
+
return {
|
|
2264
|
+
found: true,
|
|
2265
|
+
nodeName: element.nodeName,
|
|
2266
|
+
attributes,
|
|
2267
|
+
textContent: element.textContent?.trim() || "",
|
|
2268
|
+
boundingBox: {
|
|
2269
|
+
x: rect.x,
|
|
2270
|
+
y: rect.y,
|
|
2271
|
+
width: rect.width,
|
|
2272
|
+
height: rect.height
|
|
2273
|
+
},
|
|
2274
|
+
visible: style.display !== "none" && style.visibility !== "hidden" && style.opacity !== "0"
|
|
2275
|
+
};
|
|
2276
|
+
}
|
|
2277
|
+
function getStructureEvaluation(depth, withText) {
|
|
2278
|
+
const buildTree = (node, currentDepth) => {
|
|
2279
|
+
if (currentDepth > depth) return null;
|
|
2280
|
+
const result = {
|
|
2281
|
+
tag: node.tagName,
|
|
2282
|
+
id: node.id || void 0,
|
|
2283
|
+
class: node.className || void 0
|
|
2284
|
+
};
|
|
2285
|
+
if (withText && node.childNodes.length === 1 && node.childNodes[0]?.nodeType === 3) result.text = node.textContent?.trim();
|
|
2286
|
+
const children = Array.from(node.children).map((child) => buildTree(child, currentDepth + 1)).filter((child) => child !== null);
|
|
2287
|
+
if (children.length > 0) result.children = children;
|
|
2288
|
+
return result;
|
|
2289
|
+
};
|
|
2290
|
+
return buildTree(document.body, 0);
|
|
2291
|
+
}
|
|
2292
|
+
function getComputedStyleEvaluation(selector) {
|
|
2293
|
+
const element = document.querySelector(selector);
|
|
2294
|
+
if (!element) return null;
|
|
2295
|
+
const computed = window.getComputedStyle(element);
|
|
2296
|
+
const result = {};
|
|
2297
|
+
for (const prop of [
|
|
2298
|
+
"display",
|
|
2299
|
+
"visibility",
|
|
2300
|
+
"opacity",
|
|
2301
|
+
"position",
|
|
2302
|
+
"zIndex",
|
|
2303
|
+
"width",
|
|
2304
|
+
"height",
|
|
2305
|
+
"top",
|
|
2306
|
+
"left",
|
|
2307
|
+
"right",
|
|
2308
|
+
"bottom",
|
|
2309
|
+
"color",
|
|
2310
|
+
"backgroundColor",
|
|
2311
|
+
"fontSize",
|
|
2312
|
+
"fontFamily",
|
|
2313
|
+
"border",
|
|
2314
|
+
"padding",
|
|
2315
|
+
"margin",
|
|
2316
|
+
"overflow"
|
|
2317
|
+
]) result[prop] = computed.getPropertyValue(prop);
|
|
2318
|
+
return result;
|
|
2319
|
+
}
|
|
2320
|
+
function observeDOMChangesEvaluation(opts) {
|
|
2321
|
+
const targetNode = opts.selector ? document.querySelector(opts.selector) : document.body;
|
|
2322
|
+
if (!targetNode) {
|
|
2323
|
+
console.error("Target node not found for MutationObserver");
|
|
2324
|
+
return;
|
|
2325
|
+
}
|
|
2326
|
+
const observer = new MutationObserver((mutations) => {
|
|
2327
|
+
mutations.forEach((mutation) => {
|
|
2328
|
+
console.warn("[DOM Change]", {
|
|
2329
|
+
type: mutation.type,
|
|
2330
|
+
target: mutation.target,
|
|
2331
|
+
addedNodes: mutation.addedNodes.length,
|
|
2332
|
+
removedNodes: mutation.removedNodes.length,
|
|
2333
|
+
attributeName: mutation.attributeName
|
|
2334
|
+
});
|
|
2335
|
+
});
|
|
2336
|
+
});
|
|
2337
|
+
observer.observe(targetNode, {
|
|
2338
|
+
childList: opts.childList !== false,
|
|
2339
|
+
attributes: opts.attributes !== false,
|
|
2340
|
+
characterData: opts.characterData !== false,
|
|
2341
|
+
subtree: opts.subtree !== false
|
|
2342
|
+
});
|
|
2343
|
+
window.__domObserver = observer;
|
|
2344
|
+
}
|
|
2345
|
+
function stopObservingDOMEvaluation() {
|
|
2346
|
+
const typedWindow = window;
|
|
2347
|
+
if (typedWindow.__domObserver) {
|
|
2348
|
+
typedWindow.__domObserver.disconnect();
|
|
2349
|
+
delete typedWindow.__domObserver;
|
|
2350
|
+
}
|
|
2351
|
+
}
|
|
2352
|
+
function findByTextEvaluation(searchText, tagName) {
|
|
2353
|
+
const xpath = tagName ? `//${tagName}[contains(text(), "${searchText}")]` : `//*[contains(text(), "${searchText}")]`;
|
|
2354
|
+
const result = document.evaluate(xpath, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
|
2355
|
+
const matchedElements = [];
|
|
2356
|
+
for (let i = 0; i < Math.min(result.snapshotLength, 100); i++) {
|
|
2357
|
+
const element = result.snapshotItem(i);
|
|
2358
|
+
if (!element) continue;
|
|
2359
|
+
const rect = element.getBoundingClientRect();
|
|
2360
|
+
const style = window.getComputedStyle(element);
|
|
2361
|
+
let selector = element.tagName.toLowerCase();
|
|
2362
|
+
if (element.id) selector = `#${element.id}`;
|
|
2363
|
+
else if (element.className) {
|
|
2364
|
+
const classes = element.className.split(" ").filter(Boolean);
|
|
2365
|
+
if (classes.length > 0) selector = `${element.tagName.toLowerCase()}.${classes[0]}`;
|
|
2366
|
+
}
|
|
2367
|
+
matchedElements.push({
|
|
2368
|
+
found: true,
|
|
2369
|
+
nodeName: element.tagName,
|
|
2370
|
+
textContent: element.textContent?.trim(),
|
|
2371
|
+
selector,
|
|
2372
|
+
boundingBox: {
|
|
2373
|
+
x: rect.x,
|
|
2374
|
+
y: rect.y,
|
|
2375
|
+
width: rect.width,
|
|
2376
|
+
height: rect.height
|
|
2377
|
+
},
|
|
2378
|
+
visible: style.display !== "none" && style.visibility !== "hidden" && style.opacity !== "0"
|
|
2379
|
+
});
|
|
2380
|
+
}
|
|
2381
|
+
return matchedElements;
|
|
2382
|
+
}
|
|
2383
|
+
function getXPathEvaluation(selector) {
|
|
2384
|
+
const element = document.querySelector(selector);
|
|
2385
|
+
if (!element) return null;
|
|
2386
|
+
const parts = [];
|
|
2387
|
+
let current = element;
|
|
2388
|
+
while (current && current !== document.body && current !== document.documentElement) {
|
|
2389
|
+
if (current.id) {
|
|
2390
|
+
parts.unshift(`//*[@id="${current.id}"]`);
|
|
2391
|
+
return parts.join("");
|
|
2392
|
+
}
|
|
2393
|
+
let ix = 0;
|
|
2394
|
+
const siblings = current.parentElement?.children;
|
|
2395
|
+
if (siblings) for (let i = 0; i < siblings.length; i++) {
|
|
2396
|
+
const sibling = siblings[i];
|
|
2397
|
+
if (!sibling) continue;
|
|
2398
|
+
if (sibling === current) break;
|
|
2399
|
+
if (sibling.tagName === current.tagName) ix += 1;
|
|
2400
|
+
}
|
|
2401
|
+
parts.unshift(`/${current.tagName.toLowerCase()}[${ix + 1}]`);
|
|
2402
|
+
current = current.parentElement;
|
|
2403
|
+
}
|
|
2404
|
+
return "/html/body" + parts.join("");
|
|
2405
|
+
}
|
|
2406
|
+
function isInViewportEvaluation(selector) {
|
|
2407
|
+
const element = document.querySelector(selector);
|
|
2408
|
+
if (!element) return false;
|
|
2409
|
+
const rect = element.getBoundingClientRect();
|
|
2410
|
+
return rect.top >= 0 && rect.left >= 0 && rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) && rect.right <= (window.innerWidth || document.documentElement.clientWidth);
|
|
2411
|
+
}
|
|
2412
|
+
//#endregion
|
|
2413
|
+
//#region src/modules/collector/DOMInspector.ts
|
|
2414
|
+
var DOMInspector = class {
|
|
2415
|
+
cdpSession = null;
|
|
2416
|
+
constructor(collector) {
|
|
2417
|
+
this.collector = collector;
|
|
2418
|
+
}
|
|
2419
|
+
async waitForReadyState(page, timeoutMs = 3e3) {
|
|
2420
|
+
const deadline = Date.now() + timeoutMs;
|
|
2421
|
+
let waitedForReadyState = false;
|
|
2422
|
+
let readyState = "unknown";
|
|
2423
|
+
while (Date.now() <= deadline) {
|
|
2424
|
+
readyState = await page.evaluate(() => document.readyState).catch(() => "unknown");
|
|
2425
|
+
if (readyState === "complete") break;
|
|
2426
|
+
waitedForReadyState = true;
|
|
2427
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
2428
|
+
}
|
|
2429
|
+
return {
|
|
2430
|
+
readyState,
|
|
2431
|
+
waitedForReadyState,
|
|
2432
|
+
frameCount: typeof page.frames === "function" ? page.frames().length : 1
|
|
2433
|
+
};
|
|
2434
|
+
}
|
|
2435
|
+
async querySelector(selector, _getAttributes = true) {
|
|
2436
|
+
try {
|
|
2437
|
+
const elementInfo = await (await this.collector.getActivePage()).evaluate(querySelectorEvaluation, selector);
|
|
2438
|
+
logger.info(`querySelector: ${selector} - ${elementInfo.found ? "found" : "not found"}`);
|
|
2439
|
+
return elementInfo;
|
|
2440
|
+
} catch (error) {
|
|
2441
|
+
logger.error(`querySelector failed for ${selector}:`, error);
|
|
2442
|
+
return { found: false };
|
|
2443
|
+
}
|
|
2444
|
+
}
|
|
2445
|
+
async querySelectorAll(selector, limit = DOM_QUERY_DEFAULT_LIMIT) {
|
|
2446
|
+
try {
|
|
2447
|
+
const page = await this.collector.getActivePage();
|
|
2448
|
+
const readyStateStatus = await this.waitForReadyState(page);
|
|
2449
|
+
const runQuery = async () => page.evaluate(new Function(buildQueryAllEvaluation(selector, limit)));
|
|
2450
|
+
let result = await runQuery();
|
|
2451
|
+
let retried = false;
|
|
2452
|
+
if (result.elements.length === 0 && result.diagnostics.readyState === "complete") {
|
|
2453
|
+
retried = true;
|
|
2454
|
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
2455
|
+
result = await runQuery();
|
|
2456
|
+
}
|
|
2457
|
+
const diagnostics = {
|
|
2458
|
+
readyState: result.diagnostics.readyState ?? readyStateStatus.readyState,
|
|
2459
|
+
frameCount: readyStateStatus.frameCount,
|
|
2460
|
+
shadowRootCount: result.diagnostics.shadowRootCount ?? 0,
|
|
2461
|
+
retried,
|
|
2462
|
+
waitedForReadyState: readyStateStatus.waitedForReadyState
|
|
2463
|
+
};
|
|
2464
|
+
logger.info(`querySelectorAll: ${selector} - found ${result.elements.length} elements (limit: ${limit}, readyState: ${diagnostics.readyState}, shadowRoots: ${diagnostics.shadowRootCount}, retried: ${retried})`);
|
|
2465
|
+
return {
|
|
2466
|
+
elements: result.elements,
|
|
2467
|
+
diagnostics
|
|
2468
|
+
};
|
|
2469
|
+
} catch (error) {
|
|
2470
|
+
logger.error(`querySelectorAll failed for ${selector}:`, error);
|
|
2471
|
+
return {
|
|
2472
|
+
elements: [],
|
|
2473
|
+
diagnostics: {
|
|
2474
|
+
readyState: "error",
|
|
2475
|
+
frameCount: 0,
|
|
2476
|
+
shadowRootCount: 0,
|
|
2477
|
+
retried: false,
|
|
2478
|
+
waitedForReadyState: false
|
|
2479
|
+
}
|
|
2480
|
+
};
|
|
2481
|
+
}
|
|
2482
|
+
}
|
|
2483
|
+
async getStructure(maxDepth = 3, includeText = true) {
|
|
2484
|
+
try {
|
|
2485
|
+
const structure = await (await this.collector.getActivePage()).evaluate(getStructureEvaluation, maxDepth, includeText);
|
|
2486
|
+
logger.info("DOM structure retrieved");
|
|
2487
|
+
return structure;
|
|
2488
|
+
} catch (error) {
|
|
2489
|
+
logger.error("getStructure failed:", error);
|
|
2490
|
+
return null;
|
|
2491
|
+
}
|
|
2492
|
+
}
|
|
2493
|
+
async findClickable(filterText) {
|
|
2494
|
+
try {
|
|
2495
|
+
const page = await this.collector.getActivePage();
|
|
2496
|
+
const readyStateStatus = await this.waitForReadyState(page);
|
|
2497
|
+
const runQuery = async () => page.evaluate(new Function(buildFindClickableEvaluation(filterText)));
|
|
2498
|
+
let result = await runQuery();
|
|
2499
|
+
let retried = false;
|
|
2500
|
+
if (result.elements.length === 0 && result.diagnostics.readyState === "complete") {
|
|
2501
|
+
retried = true;
|
|
2502
|
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
2503
|
+
result = await runQuery();
|
|
2504
|
+
}
|
|
2505
|
+
const diagnostics = {
|
|
2506
|
+
readyState: result.diagnostics.readyState ?? readyStateStatus.readyState,
|
|
2507
|
+
frameCount: readyStateStatus.frameCount,
|
|
2508
|
+
shadowRootCount: result.diagnostics.shadowRootCount ?? 0,
|
|
2509
|
+
retried,
|
|
2510
|
+
waitedForReadyState: readyStateStatus.waitedForReadyState
|
|
2511
|
+
};
|
|
2512
|
+
logger.info(`findClickable: found ${result.elements.length} elements${filterText ? ` (filtered by: ${filterText})` : ""} (readyState: ${diagnostics.readyState}, shadowRoots: ${diagnostics.shadowRootCount}, retried: ${retried})`);
|
|
2513
|
+
return {
|
|
2514
|
+
elements: result.elements,
|
|
2515
|
+
diagnostics
|
|
2516
|
+
};
|
|
2517
|
+
} catch (error) {
|
|
2518
|
+
logger.error("findClickable failed:", error);
|
|
2519
|
+
return {
|
|
2520
|
+
elements: [],
|
|
2521
|
+
diagnostics: {
|
|
2522
|
+
readyState: "error",
|
|
2523
|
+
frameCount: 0,
|
|
2524
|
+
shadowRootCount: 0,
|
|
2525
|
+
retried: false,
|
|
2526
|
+
waitedForReadyState: false
|
|
2527
|
+
}
|
|
2528
|
+
};
|
|
2529
|
+
}
|
|
2530
|
+
}
|
|
2531
|
+
async getComputedStyle(selector) {
|
|
2532
|
+
try {
|
|
2533
|
+
const styles = await (await this.collector.getActivePage()).evaluate(getComputedStyleEvaluation, selector);
|
|
2534
|
+
logger.info(`getComputedStyle: ${selector} - ${styles ? "found" : "not found"}`);
|
|
2535
|
+
return styles;
|
|
2536
|
+
} catch (error) {
|
|
2537
|
+
logger.error(`getComputedStyle failed for ${selector}:`, error);
|
|
2538
|
+
return null;
|
|
2539
|
+
}
|
|
2540
|
+
}
|
|
2541
|
+
async waitForElement(selector, timeout = DOM_WAIT_ELEMENT_TIMEOUT_MS) {
|
|
2542
|
+
try {
|
|
2543
|
+
await (await this.collector.getActivePage()).waitForSelector(selector, { timeout });
|
|
2544
|
+
return await this.querySelector(selector);
|
|
2545
|
+
} catch (error) {
|
|
2546
|
+
logger.error(`waitForElement timeout for ${selector}:`, error);
|
|
2547
|
+
return null;
|
|
2548
|
+
}
|
|
2549
|
+
}
|
|
2550
|
+
async observeDOMChanges(options = {}) {
|
|
2551
|
+
await (await this.collector.getActivePage()).evaluate(observeDOMChangesEvaluation, options);
|
|
2552
|
+
logger.info("DOM change observer started");
|
|
2553
|
+
}
|
|
2554
|
+
async stopObservingDOM() {
|
|
2555
|
+
await (await this.collector.getActivePage()).evaluate(stopObservingDOMEvaluation);
|
|
2556
|
+
logger.info("DOM change observer stopped");
|
|
2557
|
+
}
|
|
2558
|
+
async findByText(text, tag) {
|
|
2559
|
+
try {
|
|
2560
|
+
const elements = await (await this.collector.getActivePage()).evaluate(findByTextEvaluation, text, tag);
|
|
2561
|
+
logger.info(`findByText: "${text}" - found ${elements.length} elements`);
|
|
2562
|
+
return elements;
|
|
2563
|
+
} catch (error) {
|
|
2564
|
+
logger.error(`findByText failed for "${text}":`, error);
|
|
2565
|
+
return [];
|
|
2566
|
+
}
|
|
2567
|
+
}
|
|
2568
|
+
async getXPath(selector) {
|
|
2569
|
+
try {
|
|
2570
|
+
const xpath = await (await this.collector.getActivePage()).evaluate(getXPathEvaluation, selector);
|
|
2571
|
+
logger.info(`getXPath: ${selector} -> ${xpath}`);
|
|
2572
|
+
return xpath;
|
|
2573
|
+
} catch (error) {
|
|
2574
|
+
logger.error(`getXPath failed for ${selector}:`, error);
|
|
2575
|
+
return null;
|
|
2576
|
+
}
|
|
2577
|
+
}
|
|
2578
|
+
async isInViewport(selector) {
|
|
2579
|
+
try {
|
|
2580
|
+
const inViewport = await (await this.collector.getActivePage()).evaluate(isInViewportEvaluation, selector);
|
|
2581
|
+
logger.info(`isInViewport: ${selector} - ${inViewport}`);
|
|
2582
|
+
return inViewport;
|
|
2583
|
+
} catch (error) {
|
|
2584
|
+
logger.error(`isInViewport failed for ${selector}:`, error);
|
|
2585
|
+
return false;
|
|
2586
|
+
}
|
|
2587
|
+
}
|
|
2588
|
+
async close() {
|
|
2589
|
+
if (this.cdpSession) {
|
|
2590
|
+
await this.cdpSession.detach();
|
|
2591
|
+
this.cdpSession = null;
|
|
2592
|
+
logger.info("DOM Inspector CDP session closed");
|
|
2593
|
+
}
|
|
2594
|
+
}
|
|
2595
|
+
};
|
|
2596
|
+
//#endregion
|
|
2597
|
+
//#region src/modules/debugger/ScriptManager.impl.extract-function-tree.ts
|
|
2598
|
+
const getErrorMessage = (error) => error instanceof Error ? error.message : String(error);
|
|
2599
|
+
const asRecord = (value) => value !== null && typeof value === "object" ? value : null;
|
|
2600
|
+
const asCallable = (value) => typeof value === "function" ? value : null;
|
|
2601
|
+
const resolveCallableExport = (moduleValue, namedExport) => {
|
|
2602
|
+
const moduleRecord = asRecord(moduleValue);
|
|
2603
|
+
const defaultExport = moduleRecord?.default;
|
|
2604
|
+
return asCallable(asRecord(defaultExport)?.default) ?? asCallable(defaultExport) ?? asCallable(moduleRecord?.[namedExport]) ?? asCallable(moduleValue);
|
|
2605
|
+
};
|
|
2606
|
+
async function extractFunctionTreeCore(ctx, scriptId, functionName, options = {}) {
|
|
2607
|
+
const { maxDepth = 3, maxSize = 500, includeComments = true } = options;
|
|
2608
|
+
const script = await ctx.getScriptSource(scriptId);
|
|
2609
|
+
if (!script?.source) throw new Error(`Script not found: ${scriptId}`);
|
|
2610
|
+
let parser;
|
|
2611
|
+
let traverse;
|
|
2612
|
+
let generate;
|
|
2613
|
+
let t;
|
|
2614
|
+
try {
|
|
2615
|
+
parser = await import("@babel/parser");
|
|
2616
|
+
const traverseCandidate = resolveCallableExport(await import("@babel/traverse"), "traverse");
|
|
2617
|
+
if (typeof traverseCandidate !== "function") throw new Error("Invalid @babel/traverse export shape");
|
|
2618
|
+
traverse = traverseCandidate;
|
|
2619
|
+
const generateCandidate = resolveCallableExport(await import("@babel/generator"), "generate");
|
|
2620
|
+
if (typeof generateCandidate !== "function") throw new Error("Invalid @babel/generator export shape");
|
|
2621
|
+
generate = generateCandidate;
|
|
2622
|
+
t = await import("@babel/types");
|
|
2623
|
+
} catch (error) {
|
|
2624
|
+
throw new Error(`Failed to load Babel dependencies. Please install: npm install @babel/parser @babel/traverse @babel/generator @babel/types\nError: ${getErrorMessage(error)}`, { cause: error });
|
|
2625
|
+
}
|
|
2626
|
+
let ast;
|
|
2627
|
+
try {
|
|
2628
|
+
ast = parser.parse(script.source, {
|
|
2629
|
+
sourceType: "unambiguous",
|
|
2630
|
+
plugins: ["jsx", "typescript"]
|
|
2631
|
+
});
|
|
2632
|
+
} catch (error) {
|
|
2633
|
+
throw new Error(`Failed to parse script ${scriptId}: ${getErrorMessage(error)}`, { cause: error });
|
|
2634
|
+
}
|
|
2635
|
+
const allFunctions = /* @__PURE__ */ new Map();
|
|
2636
|
+
const callGraph = {};
|
|
2637
|
+
const extractDependencies = (path) => {
|
|
2638
|
+
const deps = /* @__PURE__ */ new Set();
|
|
2639
|
+
path.traverse({ CallExpression(callPath) {
|
|
2640
|
+
if (t.isIdentifier(callPath.node.callee)) deps.add(callPath.node.callee.name);
|
|
2641
|
+
} });
|
|
2642
|
+
return Array.from(deps);
|
|
2643
|
+
};
|
|
2644
|
+
traverse(ast, {
|
|
2645
|
+
FunctionDeclaration(path) {
|
|
2646
|
+
const name = path.node.id?.name;
|
|
2647
|
+
if (!name) return;
|
|
2648
|
+
const funcCode = generate(path.node, { comments: includeComments }).code;
|
|
2649
|
+
const deps = extractDependencies(path);
|
|
2650
|
+
allFunctions.set(name, {
|
|
2651
|
+
name,
|
|
2652
|
+
code: funcCode,
|
|
2653
|
+
startLine: path.node.loc?.start.line || 0,
|
|
2654
|
+
endLine: path.node.loc?.end.line || 0,
|
|
2655
|
+
dependencies: deps,
|
|
2656
|
+
size: funcCode.length
|
|
2657
|
+
});
|
|
2658
|
+
callGraph[name] = deps;
|
|
2659
|
+
},
|
|
2660
|
+
VariableDeclarator(path) {
|
|
2661
|
+
if (t.isIdentifier(path.node.id) && (t.isFunctionExpression(path.node.init) || t.isArrowFunctionExpression(path.node.init))) {
|
|
2662
|
+
const name = path.node.id.name;
|
|
2663
|
+
const funcCode = generate(path.node, { comments: includeComments }).code;
|
|
2664
|
+
const deps = extractDependencies(path);
|
|
2665
|
+
allFunctions.set(name, {
|
|
2666
|
+
name,
|
|
2667
|
+
code: funcCode,
|
|
2668
|
+
startLine: path.node.loc?.start.line || 0,
|
|
2669
|
+
endLine: path.node.loc?.end.line || 0,
|
|
2670
|
+
dependencies: deps,
|
|
2671
|
+
size: funcCode.length
|
|
2672
|
+
});
|
|
2673
|
+
callGraph[name] = deps;
|
|
2674
|
+
}
|
|
2675
|
+
}
|
|
2676
|
+
});
|
|
2677
|
+
const extracted = /* @__PURE__ */ new Set();
|
|
2678
|
+
const toExtract = [functionName];
|
|
2679
|
+
let currentDepth = 0;
|
|
2680
|
+
while (toExtract.length > 0 && currentDepth < maxDepth) {
|
|
2681
|
+
const current = toExtract.shift();
|
|
2682
|
+
if (extracted.has(current)) continue;
|
|
2683
|
+
const func = allFunctions.get(current);
|
|
2684
|
+
if (!func) continue;
|
|
2685
|
+
extracted.add(current);
|
|
2686
|
+
for (const dep of func.dependencies) if (!extracted.has(dep) && allFunctions.has(dep)) toExtract.push(dep);
|
|
2687
|
+
currentDepth++;
|
|
2688
|
+
}
|
|
2689
|
+
const functions = Array.from(extracted).map((name) => allFunctions.get(name)).filter(Boolean);
|
|
2690
|
+
const code = functions.map((f) => f.code).join("\n\n");
|
|
2691
|
+
const totalSize = code.length;
|
|
2692
|
+
if (totalSize > maxSize * 1024) logger.warn(`Extracted code size (${(totalSize / 1024).toFixed(2)}KB) exceeds limit (${maxSize}KB)`);
|
|
2693
|
+
logger.info(`extractFunctionTree: ${functionName} - extracted ${functions.length} functions (${(totalSize / 1024).toFixed(2)}KB)`);
|
|
2694
|
+
return {
|
|
2695
|
+
mainFunction: functionName,
|
|
2696
|
+
code,
|
|
2697
|
+
functions,
|
|
2698
|
+
callGraph,
|
|
2699
|
+
totalSize,
|
|
2700
|
+
extractedCount: functions.length
|
|
2701
|
+
};
|
|
2702
|
+
}
|
|
2703
|
+
//#endregion
|
|
2704
|
+
//#region src/modules/debugger/ScriptManager.impl.class.ts
|
|
2705
|
+
var ScriptManager = class ScriptManager {
|
|
2706
|
+
static SOURCE_LOAD_BATCH_SIZE = 8;
|
|
2707
|
+
static SEARCH_LINE_YIELD_INTERVAL = 250;
|
|
2708
|
+
static SEARCH_SCRIPT_YIELD_INTERVAL = 10;
|
|
2709
|
+
cdpSession = null;
|
|
2710
|
+
scripts = /* @__PURE__ */ new Map();
|
|
2711
|
+
scriptsByUrl = /* @__PURE__ */ new Map();
|
|
2712
|
+
initialized = false;
|
|
2713
|
+
initPromise;
|
|
2714
|
+
keywordIndex = /* @__PURE__ */ new Map();
|
|
2715
|
+
scriptChunks = /* @__PURE__ */ new Map();
|
|
2716
|
+
CHUNK_SIZE = 100 * 1024;
|
|
2717
|
+
constructor(collector) {
|
|
2718
|
+
this.collector = collector;
|
|
2719
|
+
}
|
|
2720
|
+
async init() {
|
|
2721
|
+
if (this.initialized) return;
|
|
2722
|
+
if (this.initPromise) return this.initPromise;
|
|
2723
|
+
this.initPromise = this.doInit();
|
|
2724
|
+
try {
|
|
2725
|
+
return await this.initPromise;
|
|
2726
|
+
} finally {
|
|
2727
|
+
this.initPromise = void 0;
|
|
2728
|
+
}
|
|
2729
|
+
}
|
|
2730
|
+
async doInit() {
|
|
2731
|
+
const page = await this.collector.getActivePage();
|
|
2732
|
+
this.cdpSession = await page.createCDPSession();
|
|
2733
|
+
this.cdpSession.on("Debugger.scriptParsed", (params) => {
|
|
2734
|
+
const scriptInfo = {
|
|
2735
|
+
scriptId: params.scriptId,
|
|
2736
|
+
url: params.url,
|
|
2737
|
+
startLine: params.startLine,
|
|
2738
|
+
startColumn: params.startColumn,
|
|
2739
|
+
endLine: params.endLine,
|
|
2740
|
+
endColumn: params.endColumn,
|
|
2741
|
+
sourceLength: params.length
|
|
2742
|
+
};
|
|
2743
|
+
this.scripts.set(params.scriptId, scriptInfo);
|
|
2744
|
+
if (params.url) {
|
|
2745
|
+
if (!this.scriptsByUrl.has(params.url)) this.scriptsByUrl.set(params.url, []);
|
|
2746
|
+
this.scriptsByUrl.get(params.url).push(scriptInfo);
|
|
2747
|
+
}
|
|
2748
|
+
logger.debug(`Script parsed: ${params.url || "inline"} (${params.scriptId})`);
|
|
2749
|
+
});
|
|
2750
|
+
await this.cdpSession.send("Debugger.enable");
|
|
2751
|
+
this.initialized = true;
|
|
2752
|
+
logger.info("ScriptManager initialized");
|
|
2753
|
+
}
|
|
2754
|
+
async loadScriptSourceInternal(script) {
|
|
2755
|
+
if (script.source) return true;
|
|
2756
|
+
try {
|
|
2757
|
+
const { scriptSource } = await this.cdpSession.send("Debugger.getScriptSource", { scriptId: script.scriptId });
|
|
2758
|
+
script.source = scriptSource;
|
|
2759
|
+
script.sourceLength = scriptSource.length;
|
|
2760
|
+
this.buildKeywordIndex(script.scriptId, script.url, scriptSource);
|
|
2761
|
+
this.chunkScript(script.scriptId, scriptSource);
|
|
2762
|
+
return true;
|
|
2763
|
+
} catch (error) {
|
|
2764
|
+
logger.warn(`Failed to get source for script ${script.scriptId}:`, error);
|
|
2765
|
+
return false;
|
|
2766
|
+
}
|
|
2767
|
+
}
|
|
2768
|
+
async enable() {
|
|
2769
|
+
return this.init();
|
|
2770
|
+
}
|
|
2771
|
+
async getAllScripts(includeSource = false, maxScripts = 1e3) {
|
|
2772
|
+
if (!this.cdpSession) await this.init();
|
|
2773
|
+
const scripts = Array.from(this.scripts.values());
|
|
2774
|
+
if (scripts.length > maxScripts) logger.warn(`Found ${scripts.length} scripts, limiting to ${maxScripts}. Increase maxScripts parameter if needed.`);
|
|
2775
|
+
const limitedScripts = scripts.slice(0, maxScripts);
|
|
2776
|
+
if (includeSource) {
|
|
2777
|
+
logger.warn(`Loading source code for ${limitedScripts.length} scripts. This may use significant memory.`);
|
|
2778
|
+
let loadedCount = 0;
|
|
2779
|
+
let failedCount = 0;
|
|
2780
|
+
const missingScripts = limitedScripts.filter((script) => !script.source);
|
|
2781
|
+
for (let batchStart = 0; batchStart < missingScripts.length; batchStart += ScriptManager.SOURCE_LOAD_BATCH_SIZE) {
|
|
2782
|
+
const batch = missingScripts.slice(batchStart, batchStart + ScriptManager.SOURCE_LOAD_BATCH_SIZE);
|
|
2783
|
+
const settled = await Promise.allSettled(batch.map(async (script) => {
|
|
2784
|
+
if (await this.loadScriptSourceInternal(script)) {
|
|
2785
|
+
loadedCount++;
|
|
2786
|
+
if (loadedCount % 10 === 0) logger.debug(`Loaded ${loadedCount}/${limitedScripts.length} scripts...`);
|
|
2787
|
+
} else failedCount++;
|
|
2788
|
+
}));
|
|
2789
|
+
for (const result of settled) if (result.status === "rejected") failedCount++;
|
|
2790
|
+
await setImmediate();
|
|
2791
|
+
}
|
|
2792
|
+
logger.info(`getAllScripts: ${limitedScripts.length} scripts (loaded: ${loadedCount}, failed: ${failedCount})`);
|
|
2793
|
+
} else logger.info(`getAllScripts: ${limitedScripts.length} scripts (source not included)`);
|
|
2794
|
+
return limitedScripts;
|
|
2795
|
+
}
|
|
2796
|
+
async getScriptSource(scriptId, url) {
|
|
2797
|
+
if (!scriptId && !url) throw new Error("Either scriptId or url parameter must be provided");
|
|
2798
|
+
if (!this.cdpSession) await this.init();
|
|
2799
|
+
let targetScript;
|
|
2800
|
+
if (scriptId) targetScript = this.scripts.get(scriptId);
|
|
2801
|
+
else if (url) {
|
|
2802
|
+
const urlPattern = url.replace(/\*/g, ".*");
|
|
2803
|
+
const regex = new RegExp(urlPattern);
|
|
2804
|
+
for (const [scriptUrl, scripts] of this.scriptsByUrl.entries()) if (regex.test(scriptUrl)) {
|
|
2805
|
+
targetScript = scripts[0];
|
|
2806
|
+
break;
|
|
2807
|
+
}
|
|
2808
|
+
}
|
|
2809
|
+
if (!targetScript) {
|
|
2810
|
+
logger.warn(`Script not found: ${scriptId || url}`);
|
|
2811
|
+
return null;
|
|
2812
|
+
}
|
|
2813
|
+
if (!targetScript.source) {
|
|
2814
|
+
if (!await this.loadScriptSourceInternal(targetScript)) {
|
|
2815
|
+
logger.error(`Failed to get script source for ${targetScript.scriptId}`);
|
|
2816
|
+
return null;
|
|
2817
|
+
}
|
|
2818
|
+
}
|
|
2819
|
+
logger.info(`getScriptSource: ${targetScript.url || "inline"} (${targetScript.sourceLength} bytes)`);
|
|
2820
|
+
return targetScript;
|
|
2821
|
+
}
|
|
2822
|
+
async findScriptsByUrl(urlPattern) {
|
|
2823
|
+
if (!this.cdpSession) await this.init();
|
|
2824
|
+
const pattern = urlPattern.replace(/\*/g, ".*");
|
|
2825
|
+
const regex = new RegExp(pattern);
|
|
2826
|
+
const results = [];
|
|
2827
|
+
for (const [url, scripts] of this.scriptsByUrl.entries()) if (regex.test(url)) results.push(...scripts);
|
|
2828
|
+
logger.info(`findScriptsByUrl: ${urlPattern} - found ${results.length} scripts`);
|
|
2829
|
+
return results;
|
|
2830
|
+
}
|
|
2831
|
+
clearCache() {
|
|
2832
|
+
this.clear();
|
|
2833
|
+
}
|
|
2834
|
+
async searchInScripts(keyword, options = {}) {
|
|
2835
|
+
if (!this.cdpSession) await this.init();
|
|
2836
|
+
const { isRegex = false, caseSensitive = false, contextLines = 3, maxMatches = 100 } = options;
|
|
2837
|
+
const searchRegex = isRegex ? new RegExp(keyword, caseSensitive ? "g" : "gi") : new RegExp(keyword.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), caseSensitive ? "g" : "gi");
|
|
2838
|
+
const matches = [];
|
|
2839
|
+
const scripts = await this.getAllScripts(true, 500);
|
|
2840
|
+
for (const [scriptIndex, script] of scripts.entries()) {
|
|
2841
|
+
if (!script.source) continue;
|
|
2842
|
+
if (matches.length >= maxMatches) break;
|
|
2843
|
+
const lines = script.source.split("\n");
|
|
2844
|
+
for (let i = 0; i < lines.length; i++) {
|
|
2845
|
+
const line = lines[i];
|
|
2846
|
+
if (!line) continue;
|
|
2847
|
+
const lineMatches = Array.from(line.matchAll(searchRegex));
|
|
2848
|
+
for (const match of lineMatches) {
|
|
2849
|
+
if (matches.length >= maxMatches) break;
|
|
2850
|
+
const startLine = Math.max(0, i - contextLines);
|
|
2851
|
+
const endLine = Math.min(lines.length - 1, i + contextLines);
|
|
2852
|
+
let context = lines.slice(startLine, endLine + 1).join("\n");
|
|
2853
|
+
if (context.length > 2e3) {
|
|
2854
|
+
const matchIndex = match.index || 0;
|
|
2855
|
+
const snippetStart = Math.max(0, matchIndex - 100);
|
|
2856
|
+
const snippetEnd = Math.min(line.length, matchIndex + 100);
|
|
2857
|
+
context = (snippetStart > 0 ? "..." : "") + line.substring(snippetStart, snippetEnd) + (snippetEnd < line.length ? "..." : "");
|
|
2858
|
+
}
|
|
2859
|
+
matches.push({
|
|
2860
|
+
scriptId: script.scriptId,
|
|
2861
|
+
url: script.url || "inline",
|
|
2862
|
+
line: i + 1,
|
|
2863
|
+
column: match.index || 0,
|
|
2864
|
+
matchText: match[0],
|
|
2865
|
+
context
|
|
2866
|
+
});
|
|
2867
|
+
}
|
|
2868
|
+
if ((i + 1) % ScriptManager.SEARCH_LINE_YIELD_INTERVAL === 0) await setImmediate();
|
|
2869
|
+
}
|
|
2870
|
+
if ((scriptIndex + 1) % ScriptManager.SEARCH_SCRIPT_YIELD_INTERVAL === 0) await setImmediate();
|
|
2871
|
+
}
|
|
2872
|
+
logger.info(`searchInScripts: "${keyword}" - found ${matches.length} matches`);
|
|
2873
|
+
return {
|
|
2874
|
+
keyword,
|
|
2875
|
+
totalMatches: matches.length,
|
|
2876
|
+
matches
|
|
2877
|
+
};
|
|
2878
|
+
}
|
|
2879
|
+
async extractFunctionTree(scriptId, functionName, options = {}) {
|
|
2880
|
+
return extractFunctionTreeCore(this, scriptId, functionName, options);
|
|
2881
|
+
}
|
|
2882
|
+
clear() {
|
|
2883
|
+
this.scripts.clear();
|
|
2884
|
+
this.scriptsByUrl.clear();
|
|
2885
|
+
this.keywordIndex.clear();
|
|
2886
|
+
this.scriptChunks.clear();
|
|
2887
|
+
logger.info(" ScriptManager cleared - ready for new website");
|
|
2888
|
+
}
|
|
2889
|
+
async close() {
|
|
2890
|
+
this.initPromise = void 0;
|
|
2891
|
+
this.clear();
|
|
2892
|
+
if (this.cdpSession) {
|
|
2893
|
+
try {
|
|
2894
|
+
await this.cdpSession.send("Debugger.disable");
|
|
2895
|
+
await this.cdpSession.detach();
|
|
2896
|
+
logger.info("CDP session closed");
|
|
2897
|
+
} catch (error) {
|
|
2898
|
+
logger.warn("Failed to close CDP session:", error);
|
|
2899
|
+
}
|
|
2900
|
+
this.cdpSession = null;
|
|
2901
|
+
}
|
|
2902
|
+
this.initialized = false;
|
|
2903
|
+
logger.info(" ScriptManager closed");
|
|
2904
|
+
}
|
|
2905
|
+
getStats() {
|
|
2906
|
+
let totalChunks = 0;
|
|
2907
|
+
for (const chunks of this.scriptChunks.values()) totalChunks += chunks.length;
|
|
2908
|
+
return {
|
|
2909
|
+
totalScripts: this.scripts.size,
|
|
2910
|
+
totalUrls: this.scriptsByUrl.size,
|
|
2911
|
+
indexedKeywords: this.keywordIndex.size,
|
|
2912
|
+
totalChunks
|
|
2913
|
+
};
|
|
2914
|
+
}
|
|
2915
|
+
buildKeywordIndex(scriptId, url, content) {
|
|
2916
|
+
const lines = content.split("\n");
|
|
2917
|
+
const keywordRegex = /\b[a-zA-Z_$][a-zA-Z0-9_$]{2,}\b/g;
|
|
2918
|
+
for (let i = 0; i < lines.length; i++) {
|
|
2919
|
+
const line = lines[i];
|
|
2920
|
+
if (!line) continue;
|
|
2921
|
+
const matches = Array.from(line.matchAll(keywordRegex));
|
|
2922
|
+
for (const match of matches) {
|
|
2923
|
+
const keyword = match[0].toLowerCase();
|
|
2924
|
+
const startLine = Math.max(0, i - 3);
|
|
2925
|
+
const endLine = Math.min(lines.length - 1, i + 3);
|
|
2926
|
+
let context = lines.slice(startLine, endLine + 1).join("\n");
|
|
2927
|
+
if (context.length > 1e3) {
|
|
2928
|
+
const matchIndex = match.index || 0;
|
|
2929
|
+
const snippetStart = Math.max(0, matchIndex - 50);
|
|
2930
|
+
const snippetEnd = Math.min(line.length, matchIndex + 50);
|
|
2931
|
+
context = (snippetStart > 0 ? "..." : "") + line.substring(snippetStart, snippetEnd) + (snippetEnd < line.length ? "..." : "");
|
|
2932
|
+
}
|
|
2933
|
+
const entry = {
|
|
2934
|
+
scriptId,
|
|
2935
|
+
url,
|
|
2936
|
+
line: i + 1,
|
|
2937
|
+
column: match.index || 0,
|
|
2938
|
+
context
|
|
2939
|
+
};
|
|
2940
|
+
if (!this.keywordIndex.has(keyword)) this.keywordIndex.set(keyword, []);
|
|
2941
|
+
this.keywordIndex.get(keyword).push(entry);
|
|
2942
|
+
}
|
|
2943
|
+
}
|
|
2944
|
+
logger.debug(` Indexed ${this.keywordIndex.size} keywords for ${url}`);
|
|
2945
|
+
}
|
|
2946
|
+
chunkScript(scriptId, content) {
|
|
2947
|
+
const chunks = [];
|
|
2948
|
+
let offset = 0;
|
|
2949
|
+
let chunkIndex = 0;
|
|
2950
|
+
while (offset < content.length) {
|
|
2951
|
+
const chunk = content.substring(offset, offset + this.CHUNK_SIZE);
|
|
2952
|
+
chunks.push({
|
|
2953
|
+
scriptId,
|
|
2954
|
+
chunkIndex,
|
|
2955
|
+
content: chunk,
|
|
2956
|
+
size: chunk.length
|
|
2957
|
+
});
|
|
2958
|
+
offset += this.CHUNK_SIZE;
|
|
2959
|
+
chunkIndex++;
|
|
2960
|
+
}
|
|
2961
|
+
this.scriptChunks.set(scriptId, chunks);
|
|
2962
|
+
logger.debug(` Chunked script ${scriptId} into ${chunks.length} chunks`);
|
|
2963
|
+
}
|
|
2964
|
+
getScriptChunk(scriptId, chunkIndex) {
|
|
2965
|
+
const chunks = this.scriptChunks.get(scriptId);
|
|
2966
|
+
if (!chunks || chunkIndex >= chunks.length) return null;
|
|
2967
|
+
const chunk = chunks[chunkIndex];
|
|
2968
|
+
return chunk ? chunk.content : null;
|
|
2969
|
+
}
|
|
2970
|
+
async searchInScriptsEnhanced(keyword, options = {}) {
|
|
2971
|
+
const { isRegex = false, caseSensitive = false, maxMatches = 100 } = options;
|
|
2972
|
+
const searchTerm = caseSensitive ? keyword : keyword.toLowerCase();
|
|
2973
|
+
const matches = [];
|
|
2974
|
+
if (!isRegex) {
|
|
2975
|
+
for (const [indexedKeyword, entries] of this.keywordIndex.entries()) {
|
|
2976
|
+
if (indexedKeyword.includes(searchTerm)) for (const entry of entries) {
|
|
2977
|
+
matches.push({
|
|
2978
|
+
scriptId: entry.scriptId,
|
|
2979
|
+
url: entry.url,
|
|
2980
|
+
line: entry.line,
|
|
2981
|
+
column: entry.column,
|
|
2982
|
+
matchText: indexedKeyword,
|
|
2983
|
+
context: entry.context
|
|
2984
|
+
});
|
|
2985
|
+
if (matches.length >= maxMatches) break;
|
|
2986
|
+
}
|
|
2987
|
+
if (matches.length >= maxMatches) break;
|
|
2988
|
+
}
|
|
2989
|
+
logger.info(` Enhanced search (indexed) found ${matches.length} matches for "${keyword}"`);
|
|
2990
|
+
return {
|
|
2991
|
+
keyword,
|
|
2992
|
+
totalMatches: matches.length,
|
|
2993
|
+
matches,
|
|
2994
|
+
searchMethod: "indexed"
|
|
2995
|
+
};
|
|
2996
|
+
} else return {
|
|
2997
|
+
...await this.searchInScripts(keyword, options),
|
|
2998
|
+
searchMethod: "regex"
|
|
2999
|
+
};
|
|
3000
|
+
}
|
|
3001
|
+
};
|
|
3002
|
+
//#endregion
|
|
3003
|
+
export { connectPlaywrightCdpFallback as i, DOMInspector as n, CodeCollector as r, ScriptManager as t };
|