real-browser-mcp-server 1.5.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -41
- package/dist/lib/cjs/index.d.ts +14 -1
- package/dist/lib/cjs/index.d.ts.map +1 -1
- package/dist/lib/cjs/index.js +5 -3
- package/dist/lib/cjs/index.js.map +1 -1
- package/dist/lib/cjs/module/pageController.d.ts +7 -1
- package/dist/lib/cjs/module/pageController.d.ts.map +1 -1
- package/dist/lib/cjs/module/pageController.js +4 -23
- package/dist/lib/cjs/module/pageController.js.map +1 -1
- package/dist/lib/cjs/module/turnstile.d.ts +4 -1
- package/dist/lib/cjs/module/turnstile.d.ts.map +1 -1
- package/dist/lib/cjs/module/turnstile.js +14 -13
- package/dist/lib/cjs/module/turnstile.js.map +1 -1
- package/dist/lib/esm/index.d.mjs +11 -0
- package/dist/lib/esm/index.mjs +4 -0
- package/dist/lib/esm/module/pageController.d.mjs +3 -0
- package/dist/lib/esm/module/turnstile.d.mjs +3 -0
- package/dist/scripts/generate-esm.d.ts +2 -0
- package/dist/scripts/generate-esm.d.ts.map +1 -0
- package/dist/scripts/generate-esm.js +100 -0
- package/dist/scripts/generate-esm.js.map +1 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +0 -1
- package/dist/src/index.js.map +1 -1
- package/dist/src/mcp/handlers/browser.d.ts +8 -7
- package/dist/src/mcp/handlers/browser.d.ts.map +1 -1
- package/dist/src/mcp/handlers/browser.js +4 -3
- package/dist/src/mcp/handlers/browser.js.map +1 -1
- package/dist/src/mcp/handlers/dom.d.ts +19 -18
- package/dist/src/mcp/handlers/dom.d.ts.map +1 -1
- package/dist/src/mcp/handlers/dom.js +7 -28
- package/dist/src/mcp/handlers/dom.js.map +1 -1
- package/dist/src/mcp/handlers/extract.js.map +1 -1
- package/dist/src/mcp/handlers/handler-utils.d.ts +14 -0
- package/dist/src/mcp/handlers/handler-utils.d.ts.map +1 -0
- package/dist/src/mcp/handlers/handler-utils.js +42 -0
- package/dist/src/mcp/handlers/handler-utils.js.map +1 -0
- package/dist/src/mcp/handlers/helpers.d.ts +0 -7
- package/dist/src/mcp/handlers/helpers.d.ts.map +1 -1
- package/dist/src/mcp/handlers/helpers.js +0 -15
- package/dist/src/mcp/handlers/helpers.js.map +1 -1
- package/dist/src/mcp/handlers/media-handlers.d.ts +2 -1
- package/dist/src/mcp/handlers/media-handlers.d.ts.map +1 -1
- package/dist/src/mcp/handlers/media-handlers.js +24 -8
- package/dist/src/mcp/handlers/media-handlers.js.map +1 -1
- package/dist/src/mcp/handlers/network-extractors.d.ts +2 -0
- package/dist/src/mcp/handlers/network-extractors.d.ts.map +1 -0
- package/dist/src/mcp/handlers/network-extractors.js +651 -0
- package/dist/src/mcp/handlers/network-extractors.js.map +1 -0
- package/dist/src/mcp/handlers/network-recorder.d.ts +119 -0
- package/dist/src/mcp/handlers/network-recorder.d.ts.map +1 -0
- package/dist/src/mcp/handlers/network-recorder.js +337 -0
- package/dist/src/mcp/handlers/network-recorder.js.map +1 -0
- package/dist/src/mcp/handlers/network.d.ts +30 -118
- package/dist/src/mcp/handlers/network.d.ts.map +1 -1
- package/dist/src/mcp/handlers/network.js +28 -1187
- package/dist/src/mcp/handlers/network.js.map +1 -1
- package/dist/src/mcp/handlers/state.d.ts +1 -0
- package/dist/src/mcp/handlers/state.d.ts.map +1 -1
- package/dist/src/mcp/handlers/state.js +17 -0
- package/dist/src/mcp/handlers/state.js.map +1 -1
- package/dist/src/mcp/handlers/utility-handlers.d.ts +8 -37
- package/dist/src/mcp/handlers/utility-handlers.d.ts.map +1 -1
- package/dist/src/mcp/handlers/utility-handlers.js +69 -31
- package/dist/src/mcp/handlers/utility-handlers.js.map +1 -1
- package/dist/src/mcp/handlers/vision-captcha.d.ts +221 -0
- package/dist/src/mcp/handlers/vision-captcha.d.ts.map +1 -0
- package/dist/src/mcp/handlers/vision-captcha.js +238 -0
- package/dist/src/mcp/handlers/vision-captcha.js.map +1 -0
- package/dist/src/mcp/handlers/vision-see-page.d.ts +32 -0
- package/dist/src/mcp/handlers/vision-see-page.d.ts.map +1 -0
- package/dist/src/mcp/handlers/vision-see-page.js +260 -0
- package/dist/src/mcp/handlers/vision-see-page.js.map +1 -0
- package/dist/src/mcp/handlers/vision.d.ts +50 -27
- package/dist/src/mcp/handlers/vision.d.ts.map +1 -1
- package/dist/src/mcp/handlers/vision.js +4 -606
- package/dist/src/mcp/handlers/vision.js.map +1 -1
- package/dist/src/mcp/index.d.ts.map +1 -1
- package/dist/src/mcp/index.js +11 -3
- package/dist/src/mcp/index.js.map +1 -1
- package/dist/src/shared/cache-manager.d.ts +0 -2
- package/dist/src/shared/cache-manager.d.ts.map +1 -1
- package/dist/src/shared/cache-manager.js +1 -3
- package/dist/src/shared/cache-manager.js.map +1 -1
- package/dist/src/shared/lib-core.d.ts +2 -1
- package/dist/src/shared/lib-core.d.ts.map +1 -1
- package/dist/src/shared/lib-core.js +5 -38
- package/dist/src/shared/lib-core.js.map +1 -1
- package/dist/src/shared/tools.d.ts.map +1 -1
- package/dist/src/shared/tools.js +6 -18
- package/dist/src/shared/tools.js.map +1 -1
- package/dist/src/types.d.ts +25 -6
- package/dist/src/types.d.ts.map +1 -1
- package/dist/test/cjs/test.js +29 -33
- package/dist/test/cjs/test.js.map +1 -1
- package/dist/test/mcp/smoke-test.d.ts.map +1 -1
- package/dist/test/mcp/smoke-test.js +9 -3
- package/dist/test/mcp/smoke-test.js.map +1 -1
- package/dist/test/unit/handler-test.d.ts +3 -0
- package/dist/test/unit/handler-test.d.ts.map +1 -0
- package/dist/test/unit/handler-test.js +133 -0
- package/dist/test/unit/handler-test.js.map +1 -0
- package/lib/esm/module/pageController.mjs +4 -22
- package/lib/esm/module/turnstile.mjs +16 -13
- package/package.json +5 -4
- package/typings.d.ts +5 -40
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.networkHandlers = void 0;
|
|
4
4
|
const state_1 = require("./state");
|
|
5
|
-
|
|
5
|
+
const network_recorder_1 = require("./network-recorder");
|
|
6
|
+
const network_extractors_1 = require("./network-extractors");
|
|
6
7
|
exports.networkHandlers = {
|
|
7
8
|
async redirect_tracer(params) {
|
|
8
9
|
const { page } = (0, state_1.requireBrowser)();
|
|
@@ -11,29 +12,19 @@ exports.networkHandlers = {
|
|
|
11
12
|
const redirects = [];
|
|
12
13
|
const jsNavigations = [];
|
|
13
14
|
let currentUrl = url;
|
|
14
|
-
// HTTP redirect handler
|
|
15
15
|
const responseHandler = (response) => {
|
|
16
16
|
if ([301, 302, 303, 307, 308].includes(response.status())) {
|
|
17
|
-
redirects.
|
|
18
|
-
url: response.url(),
|
|
19
|
-
status: response.status()
|
|
20
|
-
|
|
21
|
-
headers: includeHeaders ? response.headers() : undefined
|
|
22
|
-
});
|
|
23
|
-
(0, state_1.notifyProgress)('redirect_tracer', 'progress', `HTTP Redirect ${redirects.length}: ${response.status()}`, { status: response.status() });
|
|
17
|
+
if (redirects.length < maxRedirects) {
|
|
18
|
+
redirects.push({ url: response.url(), status: response.status(), type: 'http', headers: includeHeaders ? response.headers() : undefined });
|
|
19
|
+
(0, state_1.notifyProgress)('redirect_tracer', 'progress', `HTTP Redirect ${redirects.length}: ${response.status()}`, { status: response.status() });
|
|
20
|
+
}
|
|
24
21
|
}
|
|
25
22
|
};
|
|
26
|
-
// JS/Navigation handler for tracking window.location changes
|
|
27
23
|
const frameNavigatedHandler = (frame) => {
|
|
28
24
|
if (frame === page.mainFrame()) {
|
|
29
25
|
const newUrl = frame.url();
|
|
30
26
|
if (newUrl !== currentUrl && newUrl !== 'about:blank') {
|
|
31
|
-
jsNavigations.push({
|
|
32
|
-
url: newUrl,
|
|
33
|
-
type: 'js_navigation',
|
|
34
|
-
fromUrl: currentUrl,
|
|
35
|
-
timestamp: Date.now()
|
|
36
|
-
});
|
|
27
|
+
jsNavigations.push({ url: newUrl, type: 'js_navigation', fromUrl: currentUrl, timestamp: Date.now() });
|
|
37
28
|
(0, state_1.notifyProgress)('redirect_tracer', 'progress', `JS Navigation: ${newUrl}`, { type: 'js' });
|
|
38
29
|
currentUrl = newUrl;
|
|
39
30
|
}
|
|
@@ -43,9 +34,7 @@ exports.networkHandlers = {
|
|
|
43
34
|
page.on('framenavigated', frameNavigatedHandler);
|
|
44
35
|
try {
|
|
45
36
|
await page.goto(url, { waitUntil: 'networkidle', timeout });
|
|
46
|
-
// If followJS is enabled, wait a bit and check for meta refreshes and JS redirects
|
|
47
37
|
if (followJS) {
|
|
48
|
-
// Check for meta refresh tags
|
|
49
38
|
const metaRefresh = await page.evaluate(() => {
|
|
50
39
|
const meta = document.querySelector('meta[http-equiv="refresh"]');
|
|
51
40
|
if (meta) {
|
|
@@ -55,14 +44,8 @@ exports.networkHandlers = {
|
|
|
55
44
|
}
|
|
56
45
|
return null;
|
|
57
46
|
}).catch(() => null);
|
|
58
|
-
if (metaRefresh)
|
|
59
|
-
jsNavigations.push({
|
|
60
|
-
url: metaRefresh,
|
|
61
|
-
type: 'meta_refresh',
|
|
62
|
-
fromUrl: page.url()
|
|
63
|
-
});
|
|
64
|
-
}
|
|
65
|
-
// Extract any onclick/href javascript: URLs
|
|
47
|
+
if (metaRefresh)
|
|
48
|
+
jsNavigations.push({ url: metaRefresh, type: 'meta_refresh', fromUrl: page.url() });
|
|
66
49
|
const jsLinks = await page.evaluate(() => {
|
|
67
50
|
const links = [];
|
|
68
51
|
document.querySelectorAll('a[href^="javascript:"], [onclick]').forEach(el => {
|
|
@@ -73,9 +56,8 @@ exports.networkHandlers = {
|
|
|
73
56
|
if (match)
|
|
74
57
|
links.push({ url: match[1], type: 'onclick' });
|
|
75
58
|
}
|
|
76
|
-
if (href && href.includes('location'))
|
|
59
|
+
if (href && href.includes('location'))
|
|
77
60
|
links.push({ url: href, type: 'javascript_href' });
|
|
78
|
-
}
|
|
79
61
|
});
|
|
80
62
|
return links;
|
|
81
63
|
}).catch(() => []);
|
|
@@ -87,1183 +69,42 @@ exports.networkHandlers = {
|
|
|
87
69
|
}
|
|
88
70
|
page.off('response', responseHandler);
|
|
89
71
|
page.off('framenavigated', frameNavigatedHandler);
|
|
90
|
-
const allRedirects = [
|
|
91
|
-
...redirects,
|
|
92
|
-
...jsNavigations.filter(nav => nav.url && nav.url.startsWith('http'))
|
|
93
|
-
];
|
|
72
|
+
const allRedirects = [...redirects, ...jsNavigations.filter(nav => nav.url && nav.url.startsWith('http'))];
|
|
94
73
|
(0, state_1.notifyProgress)('redirect_tracer', 'completed', `Found ${redirects.length} HTTP + ${jsNavigations.length} JS redirects`, { httpRedirects: redirects.length, jsNavigations: jsNavigations.length, finalUrl: page.url() });
|
|
95
|
-
return {
|
|
96
|
-
success: true,
|
|
97
|
-
originalUrl: url,
|
|
98
|
-
finalUrl: page.url(),
|
|
99
|
-
redirectCount: allRedirects.length,
|
|
100
|
-
httpRedirects: redirects,
|
|
101
|
-
jsNavigations: jsNavigations,
|
|
102
|
-
allRedirects: allRedirects
|
|
103
|
-
};
|
|
74
|
+
return { success: true, originalUrl: url, finalUrl: page.url(), redirectCount: allRedirects.length, httpRedirects: redirects, jsNavigations, allRedirects };
|
|
104
75
|
},
|
|
105
76
|
async network_recorder(params = {}) {
|
|
106
77
|
const { page } = (0, state_1.requireBrowser)();
|
|
107
|
-
const { action = 'get', filter = {},
|
|
78
|
+
const { action = 'get', filter = {}, captureXhrBody = false } = params;
|
|
108
79
|
switch (action) {
|
|
109
|
-
case 'start':
|
|
110
|
-
state_1.state.networkRecords = [];
|
|
111
|
-
state_1.state.isRecordingNetwork = true;
|
|
112
|
-
// ====== FEATURE 2: Pre-page-load Runtime API Interception ======
|
|
113
|
-
// Inject BEFORE any JS runs — catches calls from obfuscated/webpack code
|
|
114
|
-
try {
|
|
115
|
-
await page.addInitScript(() => {
|
|
116
|
-
window.__interceptedApis = [];
|
|
117
|
-
window.__wsMessages = [];
|
|
118
|
-
// --- Monkey-patch fetch ---
|
|
119
|
-
const origFetch = window.fetch;
|
|
120
|
-
window.fetch = function (...args) {
|
|
121
|
-
try {
|
|
122
|
-
const url = typeof args[0] === 'string' ? args[0] : (args[0]?.url || String(args[0]));
|
|
123
|
-
const opts = args[1] || {};
|
|
124
|
-
const entry = {
|
|
125
|
-
type: 'fetch', url, method: opts.method || 'GET',
|
|
126
|
-
headers: opts.headers ? JSON.parse(JSON.stringify(opts.headers)) : null,
|
|
127
|
-
body: typeof opts.body === 'string' ? opts.body.substring(0, 2000) : null,
|
|
128
|
-
timestamp: Date.now()
|
|
129
|
-
};
|
|
130
|
-
window.__interceptedApis.push(entry);
|
|
131
|
-
}
|
|
132
|
-
catch (e) { }
|
|
133
|
-
return origFetch.apply(this, args);
|
|
134
|
-
};
|
|
135
|
-
// --- Monkey-patch XMLHttpRequest ---
|
|
136
|
-
const origOpen = XMLHttpRequest.prototype.open;
|
|
137
|
-
const origSend = XMLHttpRequest.prototype.send;
|
|
138
|
-
const origSetHeader = XMLHttpRequest.prototype.setRequestHeader;
|
|
139
|
-
XMLHttpRequest.prototype.open = function (method, url, ...rest) {
|
|
140
|
-
this.__iUrl = url;
|
|
141
|
-
this.__iMethod = method;
|
|
142
|
-
this.__iHeaders = {};
|
|
143
|
-
return origOpen.apply(this, [method, url, ...rest]);
|
|
144
|
-
};
|
|
145
|
-
XMLHttpRequest.prototype.setRequestHeader = function (name, value) {
|
|
146
|
-
if (this.__iHeaders)
|
|
147
|
-
this.__iHeaders[name] = value;
|
|
148
|
-
return origSetHeader.apply(this, [name, value]);
|
|
149
|
-
};
|
|
150
|
-
XMLHttpRequest.prototype.send = function (body) {
|
|
151
|
-
try {
|
|
152
|
-
window.__interceptedApis.push({
|
|
153
|
-
type: 'xhr', url: this.__iUrl, method: this.__iMethod,
|
|
154
|
-
headers: this.__iHeaders || null,
|
|
155
|
-
body: typeof body === 'string' ? body.substring(0, 2000) : null,
|
|
156
|
-
timestamp: Date.now()
|
|
157
|
-
});
|
|
158
|
-
}
|
|
159
|
-
catch (e) { }
|
|
160
|
-
return origSend.apply(this, [body]);
|
|
161
|
-
};
|
|
162
|
-
// --- Monkey-patch navigator.sendBeacon ---
|
|
163
|
-
if (navigator.sendBeacon) {
|
|
164
|
-
const origBeacon = navigator.sendBeacon.bind(navigator);
|
|
165
|
-
navigator.sendBeacon = function (url, data) {
|
|
166
|
-
try {
|
|
167
|
-
window.__interceptedApis.push({
|
|
168
|
-
type: 'beacon', url, method: 'POST',
|
|
169
|
-
body: typeof data === 'string' ? data.substring(0, 2000) : null,
|
|
170
|
-
timestamp: Date.now()
|
|
171
|
-
});
|
|
172
|
-
}
|
|
173
|
-
catch (e) { }
|
|
174
|
-
return origBeacon(url, data);
|
|
175
|
-
};
|
|
176
|
-
}
|
|
177
|
-
// ====== FEATURE 3: WebSocket Recording ======
|
|
178
|
-
const OrigWS = window.WebSocket;
|
|
179
|
-
window.WebSocket = function (url, protocols) {
|
|
180
|
-
const ws = protocols ? new OrigWS(url, protocols) : new OrigWS(url);
|
|
181
|
-
const wsId = window.__wsMessages.length;
|
|
182
|
-
const wsEntry = { id: wsId, url, openedAt: Date.now(), messages: [], status: 'connecting' };
|
|
183
|
-
window.__wsMessages.push(wsEntry);
|
|
184
|
-
ws.addEventListener('open', () => { wsEntry.status = 'open'; });
|
|
185
|
-
ws.addEventListener('close', (e) => { wsEntry.status = 'closed'; wsEntry.closedAt = Date.now(); wsEntry.closeCode = e.code; });
|
|
186
|
-
ws.addEventListener('error', () => { wsEntry.status = 'error'; });
|
|
187
|
-
ws.addEventListener('message', (e) => {
|
|
188
|
-
try {
|
|
189
|
-
let data = e.data;
|
|
190
|
-
let dataType = 'text';
|
|
191
|
-
if (data instanceof Blob) {
|
|
192
|
-
dataType = 'blob';
|
|
193
|
-
data = `[Blob ${data.size} bytes]`;
|
|
194
|
-
}
|
|
195
|
-
else if (data instanceof ArrayBuffer) {
|
|
196
|
-
dataType = 'binary';
|
|
197
|
-
data = `[ArrayBuffer ${data.byteLength} bytes]`;
|
|
198
|
-
}
|
|
199
|
-
else if (typeof data === 'string' && data.length > 5000) {
|
|
200
|
-
data = data.substring(0, 5000) + '...';
|
|
201
|
-
}
|
|
202
|
-
wsEntry.messages.push({ direction: 'received', data, dataType, timestamp: Date.now() });
|
|
203
|
-
}
|
|
204
|
-
catch (e) { }
|
|
205
|
-
});
|
|
206
|
-
// Intercept send
|
|
207
|
-
const origWsSend = ws.send.bind(ws);
|
|
208
|
-
ws.send = function (data) {
|
|
209
|
-
try {
|
|
210
|
-
let sendData = data;
|
|
211
|
-
let dataType = 'text';
|
|
212
|
-
if (data instanceof Blob) {
|
|
213
|
-
dataType = 'blob';
|
|
214
|
-
sendData = `[Blob ${data.size} bytes]`;
|
|
215
|
-
}
|
|
216
|
-
else if (data instanceof ArrayBuffer) {
|
|
217
|
-
dataType = 'binary';
|
|
218
|
-
sendData = `[ArrayBuffer ${data.byteLength} bytes]`;
|
|
219
|
-
}
|
|
220
|
-
else if (typeof data === 'string' && data.length > 5000) {
|
|
221
|
-
sendData = data.substring(0, 5000) + '...';
|
|
222
|
-
}
|
|
223
|
-
wsEntry.messages.push({ direction: 'sent', data: sendData, dataType, timestamp: Date.now() });
|
|
224
|
-
}
|
|
225
|
-
catch (e) { }
|
|
226
|
-
return origWsSend(data);
|
|
227
|
-
};
|
|
228
|
-
return ws;
|
|
229
|
-
};
|
|
230
|
-
window.WebSocket.prototype = OrigWS.prototype;
|
|
231
|
-
window.WebSocket.CONNECTING = OrigWS.CONNECTING;
|
|
232
|
-
window.WebSocket.OPEN = OrigWS.OPEN;
|
|
233
|
-
window.WebSocket.CLOSING = OrigWS.CLOSING;
|
|
234
|
-
window.WebSocket.CLOSED = OrigWS.CLOSED;
|
|
235
|
-
});
|
|
236
|
-
}
|
|
237
|
-
catch (e) { /* addInitScript may fail on already-loaded pages, that's OK */ }
|
|
238
|
-
// Request handler
|
|
239
|
-
page.on('request', req => {
|
|
240
|
-
if (state_1.state.isRecordingNetwork) {
|
|
241
|
-
state_1.state.networkRecords.push({
|
|
242
|
-
type: 'request',
|
|
243
|
-
url: req.url(),
|
|
244
|
-
method: req.method(),
|
|
245
|
-
resourceType: req.resourceType(),
|
|
246
|
-
headers: req.headers(),
|
|
247
|
-
timestamp: Date.now()
|
|
248
|
-
});
|
|
249
|
-
}
|
|
250
|
-
});
|
|
251
|
-
// Response handler for capturing video/media URLs
|
|
252
|
-
page.on('response', async (res) => {
|
|
253
|
-
if (state_1.state.isRecordingNetwork) {
|
|
254
|
-
const url = res.url();
|
|
255
|
-
const contentType = res.headers()['content-type'] || '';
|
|
256
|
-
const isMedia = contentType.includes('video') ||
|
|
257
|
-
contentType.includes('audio') ||
|
|
258
|
-
contentType.includes('mpegurl') ||
|
|
259
|
-
url.includes('.m3u8') ||
|
|
260
|
-
url.includes('.mpd') ||
|
|
261
|
-
url.includes('.mp4') ||
|
|
262
|
-
url.includes('.ts');
|
|
263
|
-
const isApiCall = contentType.includes('json') ||
|
|
264
|
-
contentType.includes('x-www-form-urlencoded') ||
|
|
265
|
-
url.match(/\.(php|api|json|do|action)($|\?)/) ||
|
|
266
|
-
(res.request().resourceType() === 'xhr') ||
|
|
267
|
-
(res.request().resourceType() === 'fetch');
|
|
268
|
-
const isBinaryBody = isMedia ||
|
|
269
|
-
contentType.includes('octet-stream') ||
|
|
270
|
-
contentType.includes('application/pdf') ||
|
|
271
|
-
contentType.includes('image/') ||
|
|
272
|
-
contentType.includes('font/') ||
|
|
273
|
-
contentType.includes('zip') ||
|
|
274
|
-
contentType.includes('rar');
|
|
275
|
-
const record = {
|
|
276
|
-
type: 'response',
|
|
277
|
-
url: url,
|
|
278
|
-
method: res.request().method(),
|
|
279
|
-
status: res.status(),
|
|
280
|
-
contentType: contentType,
|
|
281
|
-
isMedia: isMedia,
|
|
282
|
-
isApiCall: isApiCall,
|
|
283
|
-
resourceType: res.request().resourceType(),
|
|
284
|
-
timestamp: Date.now()
|
|
285
|
-
};
|
|
286
|
-
// For media URLs, try to get more details
|
|
287
|
-
if (isMedia) {
|
|
288
|
-
record.mediaType = url.includes('.m3u8') ? 'hls' :
|
|
289
|
-
url.includes('.mpd') ? 'dash' :
|
|
290
|
-
url.includes('.mp4') ? 'mp4' : 'other';
|
|
291
|
-
}
|
|
292
|
-
// Capture request/response body for API calls
|
|
293
|
-
if (isApiCall) {
|
|
294
|
-
try {
|
|
295
|
-
const postData = res.request().postData();
|
|
296
|
-
if (postData)
|
|
297
|
-
record.requestBody = postData.substring(0, 2000);
|
|
298
|
-
if (isBinaryBody) {
|
|
299
|
-
record.responseBody = `[binary/media body omitted: ${contentType || 'unknown content-type'}]`;
|
|
300
|
-
}
|
|
301
|
-
else {
|
|
302
|
-
const responseBody = await res.text().catch(() => null);
|
|
303
|
-
if (responseBody) {
|
|
304
|
-
record.responseBody = responseBody.substring(0, 5000);
|
|
305
|
-
record.responseTruncated = responseBody.length > 5000;
|
|
306
|
-
try {
|
|
307
|
-
record.responseJson = JSON.parse(responseBody);
|
|
308
|
-
}
|
|
309
|
-
catch (e) { }
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
}
|
|
313
|
-
catch (e) { }
|
|
314
|
-
}
|
|
315
|
-
state_1.state.networkRecords.push(record);
|
|
316
|
-
}
|
|
317
|
-
});
|
|
318
|
-
// Frame navigation handler for JS redirects
|
|
319
|
-
page.on('framenavigated', frame => {
|
|
320
|
-
if (state_1.state.isRecordingNetwork && frame === page.mainFrame()) {
|
|
321
|
-
state_1.state.networkRecords.push({
|
|
322
|
-
type: 'navigation',
|
|
323
|
-
url: frame.url(),
|
|
324
|
-
timestamp: Date.now()
|
|
325
|
-
});
|
|
326
|
-
}
|
|
327
|
-
});
|
|
328
|
-
(0, state_1.notifyProgress)('network_recorder', 'started', 'Power recording started (requests + responses + API interception + WebSocket + navigations)');
|
|
329
|
-
break;
|
|
80
|
+
case 'start': return (0, network_recorder_1.startRecording)(page).then(() => ({ success: true, message: 'Recording started' }));
|
|
330
81
|
case 'stop':
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
break;
|
|
82
|
+
(0, network_recorder_1.stopRecording)();
|
|
83
|
+
return { success: true };
|
|
334
84
|
case 'clear':
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
case '
|
|
344
|
-
// Special action to get only media URLs
|
|
345
|
-
const mediaRecords = state_1.state.networkRecords.filter(r => r.isMedia);
|
|
346
|
-
return {
|
|
347
|
-
success: true,
|
|
348
|
-
count: mediaRecords.length,
|
|
349
|
-
mediaUrls: mediaRecords.map(r => ({ url: r.url, type: r.mediaType }))
|
|
350
|
-
};
|
|
351
|
-
case 'get_navigations':
|
|
352
|
-
// Get only navigation events (for tracking JS redirects)
|
|
353
|
-
const navRecords = state_1.state.networkRecords.filter(r => r.type === 'navigation');
|
|
354
|
-
return {
|
|
355
|
-
success: true,
|
|
356
|
-
count: navRecords.length,
|
|
357
|
-
navigations: navRecords
|
|
358
|
-
};
|
|
359
|
-
case 'get_api_calls': {
|
|
360
|
-
const apiRecords = state_1.state.networkRecords.filter(r => r.isApiCall);
|
|
361
|
-
return {
|
|
362
|
-
success: true,
|
|
363
|
-
count: apiRecords.length,
|
|
364
|
-
apiCalls: apiRecords.map(r => ({
|
|
365
|
-
url: r.url, method: r.method || 'GET', status: r.status,
|
|
366
|
-
contentType: r.contentType, resourceType: r.resourceType,
|
|
367
|
-
requestBody: r.requestBody || null, responseBody: r.responseBody || null,
|
|
368
|
-
responseJson: r.responseJson || null, timestamp: r.timestamp
|
|
369
|
-
}))
|
|
370
|
-
};
|
|
371
|
-
}
|
|
372
|
-
// ====== FEATURE 2: Get Intercepted APIs (from monkey-patched fetch/XHR/beacon) ======
|
|
373
|
-
case 'get_intercepted_apis': {
|
|
374
|
-
try {
|
|
375
|
-
const intercepted = await page.evaluate(() => window.__interceptedApis || []);
|
|
376
|
-
return {
|
|
377
|
-
success: true,
|
|
378
|
-
count: intercepted.length,
|
|
379
|
-
interceptedApis: intercepted,
|
|
380
|
-
note: 'These are runtime-intercepted API calls captured via monkey-patched fetch/XHR/sendBeacon (pre-page-load injection)'
|
|
381
|
-
};
|
|
382
|
-
}
|
|
383
|
-
catch (e) {
|
|
384
|
-
return { success: false, error: 'Failed to retrieve intercepted APIs: ' + e.message, interceptedApis: [] };
|
|
385
|
-
}
|
|
386
|
-
}
|
|
387
|
-
// ====== FEATURE 3: Get WebSocket Messages ======
|
|
388
|
-
case 'get_websockets': {
|
|
389
|
-
try {
|
|
390
|
-
const wsData = await page.evaluate(() => window.__wsMessages || []);
|
|
391
|
-
const totalMessages = wsData.reduce((sum, ws) => sum + ws.messages.length, 0);
|
|
392
|
-
return {
|
|
393
|
-
success: true,
|
|
394
|
-
count: wsData.length,
|
|
395
|
-
totalMessages: totalMessages,
|
|
396
|
-
websockets: wsData,
|
|
397
|
-
note: 'WebSocket connections and messages captured via constructor monkey-patch'
|
|
398
|
-
};
|
|
399
|
-
}
|
|
400
|
-
catch (e) {
|
|
401
|
-
return { success: false, error: 'Failed to retrieve WebSocket data: ' + e.message, websockets: [] };
|
|
402
|
-
}
|
|
403
|
-
}
|
|
404
|
-
// ====== FEATURE 4: GraphQL Inspector ======
|
|
405
|
-
case 'get_graphql': {
|
|
406
|
-
const gqlRecords = state_1.state.networkRecords.filter((r) => r.isApiCall && r.requestBody && (r.requestBody.includes('"query"') || r.requestBody.includes('query '))).map((r) => {
|
|
407
|
-
let parsedQuery = null, parsedVariables = null, operationName = null;
|
|
408
|
-
try {
|
|
409
|
-
const body = JSON.parse(r.requestBody);
|
|
410
|
-
parsedQuery = body.query;
|
|
411
|
-
parsedVariables = body.variables;
|
|
412
|
-
operationName = body.operationName;
|
|
413
|
-
}
|
|
414
|
-
catch (e) { }
|
|
415
|
-
return {
|
|
416
|
-
url: r.url,
|
|
417
|
-
method: r.method,
|
|
418
|
-
operationName: operationName || 'unknown',
|
|
419
|
-
query: parsedQuery || r.requestBody,
|
|
420
|
-
variables: parsedVariables,
|
|
421
|
-
response: r.responseJson || r.responseBody,
|
|
422
|
-
timestamp: r.timestamp
|
|
423
|
-
};
|
|
424
|
-
});
|
|
425
|
-
return { success: true, count: gqlRecords.length, graphql: gqlRecords };
|
|
426
|
-
}
|
|
427
|
-
// ====== FEATURE 5: HAR Exporter ======
|
|
428
|
-
case 'export_har': {
|
|
429
|
-
// ponytail: minimal HAR 1.2 mapping without dependencies
|
|
430
|
-
const har = {
|
|
431
|
-
log: {
|
|
432
|
-
version: '1.2',
|
|
433
|
-
creator: { name: 'Real Browser MCP', version: '1.0' },
|
|
434
|
-
entries: state_1.state.networkRecords.filter((r) => r.type === 'response').map((r) => ({
|
|
435
|
-
startedDateTime: new Date(r.timestamp).toISOString(),
|
|
436
|
-
request: {
|
|
437
|
-
method: r.method || 'GET',
|
|
438
|
-
url: r.url,
|
|
439
|
-
headers: Object.entries(r.headers || {}).map(([name, value]) => ({ name, value: String(value) })),
|
|
440
|
-
postData: r.requestBody ? { text: r.requestBody } : undefined
|
|
441
|
-
},
|
|
442
|
-
response: {
|
|
443
|
-
status: r.status || 200,
|
|
444
|
-
content: {
|
|
445
|
-
mimeType: r.contentType || 'text/plain',
|
|
446
|
-
text: r.responseBody || ''
|
|
447
|
-
}
|
|
448
|
-
},
|
|
449
|
-
time: 0
|
|
450
|
-
}))
|
|
451
|
-
}
|
|
452
|
-
};
|
|
453
|
-
return { success: true, count: har.log.entries.length, har };
|
|
454
|
-
}
|
|
455
|
-
}
|
|
456
|
-
let records = state_1.state.networkRecords;
|
|
457
|
-
if (filter.resourceType) {
|
|
458
|
-
records = records.filter(r => r.resourceType === filter.resourceType);
|
|
85
|
+
await (0, network_recorder_1.clearRecording)(page);
|
|
86
|
+
return { success: true };
|
|
87
|
+
case 'get_media': return (0, network_recorder_1.getMediaRecords)();
|
|
88
|
+
case 'get_navigations': return (0, network_recorder_1.getNavigationRecords)();
|
|
89
|
+
case 'get_api_calls': return (0, network_recorder_1.getApiCallRecords)();
|
|
90
|
+
case 'get_intercepted_apis': return await (0, network_recorder_1.getInterceptedApis)(page);
|
|
91
|
+
case 'get_websockets': return await (0, network_recorder_1.getWebSocketRecords)(page);
|
|
92
|
+
case 'get_graphql': return (0, network_recorder_1.getGraphQLRecords)();
|
|
93
|
+
case 'export_har': return (0, network_recorder_1.exportHAR)();
|
|
459
94
|
}
|
|
460
|
-
|
|
461
|
-
const regex = new RegExp(filter.urlPattern);
|
|
462
|
-
records = records.filter(r => regex.test(r.url));
|
|
463
|
-
}
|
|
464
|
-
if (filter.type) {
|
|
465
|
-
records = records.filter(r => r.type === filter.type);
|
|
466
|
-
}
|
|
467
|
-
if (filter.mediaOnly) {
|
|
468
|
-
records = records.filter(r => r.isMedia);
|
|
469
|
-
}
|
|
470
|
-
return { success: true, recording: state_1.state.isRecordingNetwork, count: records.length, records: records.slice(-200) };
|
|
95
|
+
return (0, network_recorder_1.getFilteredRecords)(filter);
|
|
471
96
|
},
|
|
472
97
|
async extract_data(params = {}) {
|
|
473
|
-
|
|
474
|
-
const { type = 'auto', pattern, selector, jsonPath, source = 'all', autoDecode = true, flags = 'gi', types = ['all'], includeTitle = true, includeCanonical = true, maxMatches = 100, maxJsonObjects = 50, waitForSelector = false, selectorTimeout = 10000 } = params;
|
|
475
|
-
(0, state_1.notifyProgress)('extract_data', 'started', `Extracting data (type: ${type})...`);
|
|
476
|
-
const results = {
|
|
477
|
-
success: true,
|
|
478
|
-
type,
|
|
479
|
-
url: page.url(),
|
|
480
|
-
extracted: {}
|
|
481
|
-
};
|
|
482
|
-
// Helper: Extract regex matches
|
|
483
|
-
const extractRegex = async (regexPattern, regexFlags, contentSource) => {
|
|
484
|
-
let content;
|
|
485
|
-
if (contentSource === 'html') {
|
|
486
|
-
content = await page.content();
|
|
487
|
-
}
|
|
488
|
-
else if (contentSource === 'scripts') {
|
|
489
|
-
content = await page.$$eval('script', scripts => scripts.map(s => s.textContent).join('\n'));
|
|
490
|
-
}
|
|
491
|
-
else if (contentSource === 'text') {
|
|
492
|
-
content = await page.evaluate(() => document.body.innerText);
|
|
493
|
-
}
|
|
494
|
-
else {
|
|
495
|
-
// 'all' - search in both HTML and scripts
|
|
496
|
-
const html = await page.content();
|
|
497
|
-
const scripts = await page.$$eval('script', scripts => scripts.map(s => s.textContent).join('\n'));
|
|
498
|
-
content = html + '\n' + scripts;
|
|
499
|
-
}
|
|
500
|
-
const regex = new RegExp(regexPattern, regexFlags);
|
|
501
|
-
const matches = content.match(regex) || [];
|
|
502
|
-
return {
|
|
503
|
-
pattern: regexPattern,
|
|
504
|
-
flags: regexFlags,
|
|
505
|
-
matchCount: matches.length,
|
|
506
|
-
matches: matches.slice(0, maxMatches)
|
|
507
|
-
};
|
|
508
|
-
};
|
|
509
|
-
// Helper: Extract JSON data
|
|
510
|
-
const extractJson = async (jsonSource, sel, path) => {
|
|
511
|
-
const jsonData = [];
|
|
512
|
-
if (jsonSource === 'ld+json') {
|
|
513
|
-
const ldJson = await page.$$eval('script[type="application/ld+json"]', scripts => scripts.map(s => {
|
|
514
|
-
try {
|
|
515
|
-
return JSON.parse(s.textContent);
|
|
516
|
-
}
|
|
517
|
-
catch {
|
|
518
|
-
return null;
|
|
519
|
-
}
|
|
520
|
-
}).filter(Boolean));
|
|
521
|
-
jsonData.push(...ldJson);
|
|
522
|
-
}
|
|
523
|
-
else if (jsonSource === 'scripts') {
|
|
524
|
-
const content = await page.$$eval('script', scripts => scripts.map(s => s.textContent).join('\n'));
|
|
525
|
-
// Look for JSON objects in scripts
|
|
526
|
-
const jsonRegex = /\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}|\[[^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*\]/g;
|
|
527
|
-
const matches = content.match(jsonRegex) || [];
|
|
528
|
-
for (const match of matches.slice(0, maxJsonObjects)) {
|
|
529
|
-
try {
|
|
530
|
-
const parsed = JSON.parse(match);
|
|
531
|
-
jsonData.push(parsed);
|
|
532
|
-
}
|
|
533
|
-
catch { }
|
|
534
|
-
}
|
|
535
|
-
}
|
|
536
|
-
else if (jsonSource === 'api') {
|
|
537
|
-
// Try to find API responses in page data
|
|
538
|
-
const apiData = await page.evaluate(() => {
|
|
539
|
-
const data = [];
|
|
540
|
-
// Look for common API data storage patterns
|
|
541
|
-
if (window.__DATA__)
|
|
542
|
-
data.push(window.__DATA__);
|
|
543
|
-
if (window.__INITIAL_STATE__)
|
|
544
|
-
data.push(window.__INITIAL_STATE__);
|
|
545
|
-
if (window.__APP_DATA__)
|
|
546
|
-
data.push(window.__APP_DATA__);
|
|
547
|
-
if (window.data)
|
|
548
|
-
data.push(window.data);
|
|
549
|
-
if (window.config)
|
|
550
|
-
data.push(window.config);
|
|
551
|
-
return data;
|
|
552
|
-
});
|
|
553
|
-
jsonData.push(...apiData);
|
|
554
|
-
}
|
|
555
|
-
else if (sel) {
|
|
556
|
-
try {
|
|
557
|
-
const text = await page.$eval(sel, el => el.textContent);
|
|
558
|
-
const parsed = JSON.parse(text);
|
|
559
|
-
jsonData.push(parsed);
|
|
560
|
-
}
|
|
561
|
-
catch { }
|
|
562
|
-
}
|
|
563
|
-
else {
|
|
564
|
-
// 'page' - try all sources
|
|
565
|
-
const ldJson = await page.$$eval('script[type="application/ld+json"]', scripts => scripts.map(s => {
|
|
566
|
-
try {
|
|
567
|
-
return JSON.parse(s.textContent);
|
|
568
|
-
}
|
|
569
|
-
catch {
|
|
570
|
-
return null;
|
|
571
|
-
}
|
|
572
|
-
}).filter(Boolean));
|
|
573
|
-
jsonData.push(...ldJson);
|
|
574
|
-
}
|
|
575
|
-
// Apply JSONPath if specified
|
|
576
|
-
if (path && jsonData.length > 0) {
|
|
577
|
-
// Simple JSONPath implementation
|
|
578
|
-
const getPath = (obj, pathStr) => {
|
|
579
|
-
const parts = pathStr.replace(/^\$\./, '').split('.');
|
|
580
|
-
let current = obj;
|
|
581
|
-
for (const part of parts) {
|
|
582
|
-
if (current === null || current === undefined)
|
|
583
|
-
return undefined;
|
|
584
|
-
if (part.includes('[') && part.includes(']')) {
|
|
585
|
-
const arrName = part.substring(0, part.indexOf('['));
|
|
586
|
-
const idx = parseInt(part.match(/\[(\d+)\]/)?.[1] || '0');
|
|
587
|
-
current = current[arrName]?.[idx];
|
|
588
|
-
}
|
|
589
|
-
else {
|
|
590
|
-
current = current[part];
|
|
591
|
-
}
|
|
592
|
-
}
|
|
593
|
-
return current;
|
|
594
|
-
};
|
|
595
|
-
return jsonData.map(obj => ({
|
|
596
|
-
original: obj,
|
|
597
|
-
extracted: getPath(obj, path)
|
|
598
|
-
}));
|
|
599
|
-
}
|
|
600
|
-
return jsonData;
|
|
601
|
-
};
|
|
602
|
-
// Helper: Extract meta tags
|
|
603
|
-
const extractMeta = async (metaTypes) => {
|
|
604
|
-
const meta = await page.evaluate(([includeTitle, includeCanonical]) => {
|
|
605
|
-
const result = { meta: {}, og: {}, twitter: {} };
|
|
606
|
-
document.querySelectorAll('meta').forEach(tag => {
|
|
607
|
-
const name = tag.getAttribute('name') || tag.getAttribute('property');
|
|
608
|
-
const content = tag.getAttribute('content');
|
|
609
|
-
if (name && content) {
|
|
610
|
-
if (name.startsWith('og:')) {
|
|
611
|
-
result.og[name.replace('og:', '')] = content;
|
|
612
|
-
}
|
|
613
|
-
else if (name.startsWith('twitter:')) {
|
|
614
|
-
result.twitter[name.replace('twitter:', '')] = content;
|
|
615
|
-
}
|
|
616
|
-
else {
|
|
617
|
-
result.meta[name] = content;
|
|
618
|
-
}
|
|
619
|
-
}
|
|
620
|
-
});
|
|
621
|
-
if (includeTitle) {
|
|
622
|
-
result.title = document.title;
|
|
623
|
-
}
|
|
624
|
-
if (includeCanonical) {
|
|
625
|
-
result.canonical = document.querySelector('link[rel="canonical"]')?.href;
|
|
626
|
-
}
|
|
627
|
-
return result;
|
|
628
|
-
}, [includeTitle, includeCanonical]);
|
|
629
|
-
// Filter by requested types
|
|
630
|
-
const filtered = {};
|
|
631
|
-
if (metaTypes.includes('all')) {
|
|
632
|
-
return meta;
|
|
633
|
-
}
|
|
634
|
-
if (metaTypes.includes('meta'))
|
|
635
|
-
filtered.meta = meta.meta;
|
|
636
|
-
if (metaTypes.includes('og'))
|
|
637
|
-
filtered.og = meta.og;
|
|
638
|
-
if (metaTypes.includes('twitter'))
|
|
639
|
-
filtered.twitter = meta.twitter;
|
|
640
|
-
if (includeTitle)
|
|
641
|
-
filtered.title = meta.title;
|
|
642
|
-
if (includeCanonical)
|
|
643
|
-
filtered.canonical = meta.canonical;
|
|
644
|
-
return filtered;
|
|
645
|
-
};
|
|
646
|
-
// Helper: Extract structured data from selector
|
|
647
|
-
const extractStructured = async (sel, wait = false, timeout = 10000) => {
|
|
648
|
-
if (wait) {
|
|
649
|
-
await page.waitForSelector(sel, { timeout });
|
|
650
|
-
}
|
|
651
|
-
const element = await page.$(sel);
|
|
652
|
-
if (!element) {
|
|
653
|
-
return { error: `Element not found: ${sel}` };
|
|
654
|
-
}
|
|
655
|
-
const data = await element.evaluate(el => ({
|
|
656
|
-
tagName: el.tagName,
|
|
657
|
-
text: el.innerText,
|
|
658
|
-
html: el.innerHTML,
|
|
659
|
-
attributes: Object.fromEntries([...el.attributes].map(a => [a.name, a.value])),
|
|
660
|
-
childCount: el.children.length,
|
|
661
|
-
boundingBox: el.getBoundingClientRect ? {
|
|
662
|
-
x: el.getBoundingClientRect().x,
|
|
663
|
-
y: el.getBoundingClientRect().y,
|
|
664
|
-
width: el.getBoundingClientRect().width,
|
|
665
|
-
height: el.getBoundingClientRect().height
|
|
666
|
-
} : null
|
|
667
|
-
}));
|
|
668
|
-
return data;
|
|
669
|
-
};
|
|
670
|
-
// Helper: Auto-detect and extract all
|
|
671
|
-
const extractAuto = async () => {
|
|
672
|
-
const autoResults = {
|
|
673
|
-
meta: null,
|
|
674
|
-
json: null,
|
|
675
|
-
structured: null,
|
|
676
|
-
patterns: []
|
|
677
|
-
};
|
|
678
|
-
// Extract meta tags
|
|
679
|
-
try {
|
|
680
|
-
autoResults.meta = await extractMeta(['all']);
|
|
681
|
-
}
|
|
682
|
-
catch (e) { }
|
|
683
|
-
// Extract JSON-LD
|
|
684
|
-
try {
|
|
685
|
-
autoResults.json = await extractJson('ld+json');
|
|
686
|
-
}
|
|
687
|
-
catch (e) { }
|
|
688
|
-
// Look for common data patterns
|
|
689
|
-
const commonPatterns = [
|
|
690
|
-
{ name: 'emails', pattern: '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' },
|
|
691
|
-
{ name: 'phones', pattern: '(\+?1?[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}' },
|
|
692
|
-
{ name: 'urls', pattern: 'https?://[^\s<>"{}|\\^`\[\]]+' },
|
|
693
|
-
{ name: 'ipv4', pattern: '\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b' }
|
|
694
|
-
];
|
|
695
|
-
const pageText = await page.evaluate(() => document.body.innerText);
|
|
696
|
-
for (const { name, pattern } of commonPatterns) {
|
|
697
|
-
const regex = new RegExp(pattern, 'gi');
|
|
698
|
-
const matches = [...new Set(pageText.match(regex) || [])];
|
|
699
|
-
if (matches.length > 0) {
|
|
700
|
-
autoResults.patterns.push({ type: name, count: matches.length, samples: matches.slice(0, 10) });
|
|
701
|
-
}
|
|
702
|
-
}
|
|
703
|
-
return autoResults;
|
|
704
|
-
};
|
|
705
|
-
// Helper: Extract Links (merged from link_harvester)
|
|
706
|
-
const extractLinks = async () => {
|
|
707
|
-
const { includeHidden = true, searchIframes = false } = params;
|
|
708
|
-
const doExtract = async (context) => {
|
|
709
|
-
return await context.evaluate(({ incHidden }) => {
|
|
710
|
-
const allLinks = [];
|
|
711
|
-
const seenUrls = new Set();
|
|
712
|
-
const addLink = (href, text, source, element) => {
|
|
713
|
-
if (!href || seenUrls.has(href))
|
|
714
|
-
return;
|
|
715
|
-
if (!href.startsWith('http') && !href.startsWith('//'))
|
|
716
|
-
return;
|
|
717
|
-
if (href.startsWith('//'))
|
|
718
|
-
href = window.location.protocol + href;
|
|
719
|
-
seenUrls.add(href);
|
|
720
|
-
allLinks.push({
|
|
721
|
-
href,
|
|
722
|
-
text: (text || '').trim().substring(0, 100),
|
|
723
|
-
source,
|
|
724
|
-
hidden: element ? (element.offsetParent === null || getComputedStyle(element).display === 'none' || getComputedStyle(element).visibility === 'hidden') : false
|
|
725
|
-
});
|
|
726
|
-
};
|
|
727
|
-
document.querySelectorAll('a[href]').forEach(a => addLink(a.href, a.textContent, 'anchor', a));
|
|
728
|
-
const dataAttrs = ['data-href', 'data-url', 'data-link', 'data-src', 'data-file', 'data-download'];
|
|
729
|
-
dataAttrs.forEach(attr => document.querySelectorAll(`[${attr}]`).forEach(el => addLink(el.getAttribute(attr), el.textContent, `${attr}`, el)));
|
|
730
|
-
if (incHidden) {
|
|
731
|
-
document.querySelectorAll('[onclick]').forEach(el => {
|
|
732
|
-
const onclick = el.getAttribute('onclick');
|
|
733
|
-
if (!onclick)
|
|
734
|
-
return;
|
|
735
|
-
const urlMatches = onclick.match(/https?:\/\/[^\s"'<>]+/gi) || [];
|
|
736
|
-
urlMatches.forEach(url => addLink(url, el.textContent, 'onclick', el));
|
|
737
|
-
const hrefMatch = onclick.match(/location\.href\s*=\s*['"]([^'"]+)['"]/);
|
|
738
|
-
if (hrefMatch)
|
|
739
|
-
addLink(hrefMatch[1], el.textContent, 'onclick-location', el);
|
|
740
|
-
const openMatch = onclick.match(/window\.open\s*\(\s*['"]([^'"]+)['"]/);
|
|
741
|
-
if (openMatch)
|
|
742
|
-
addLink(openMatch[1], el.textContent, 'onclick-window-open', el);
|
|
743
|
-
});
|
|
744
|
-
const scripts = [...document.querySelectorAll('script')].slice(0, 20);
|
|
745
|
-
scripts.forEach(script => {
|
|
746
|
-
const content = script.textContent || '';
|
|
747
|
-
const patterns = [
|
|
748
|
-
/["']?(https?:\/\/[^"'\s<>]+\.(mp4|mkv|avi|m3u8|mpd|zip|rar|pdf))[^"'\s<>]*["']?/gi,
|
|
749
|
-
/download[_-]?url\s*[:=]\s*["']([^"']+)["']/gi,
|
|
750
|
-
/file\s*[:=]\s*["']([^"']+)["']/gi
|
|
751
|
-
];
|
|
752
|
-
patterns.forEach(pattern => {
|
|
753
|
-
let match;
|
|
754
|
-
while ((match = pattern.exec(content)) !== null)
|
|
755
|
-
addLink(match[1], 'script-extracted', 'script', null);
|
|
756
|
-
});
|
|
757
|
-
});
|
|
758
|
-
}
|
|
759
|
-
document.querySelectorAll('a[href^="javascript:"]').forEach(a => {
|
|
760
|
-
const match = a.getAttribute('href')?.match(/https?:\/\/[^\s"'<>]+/gi);
|
|
761
|
-
if (match)
|
|
762
|
-
match.forEach(url => addLink(url, a.textContent, 'javascript-href', a));
|
|
763
|
-
});
|
|
764
|
-
document.querySelectorAll('input[type="hidden"]').forEach((input) => {
|
|
765
|
-
if (input.value && (input.value.startsWith('http') || input.value.startsWith('//')))
|
|
766
|
-
addLink(input.value, input.name || input.id, 'hidden-input', input);
|
|
767
|
-
});
|
|
768
|
-
const metaRefresh = document.querySelector('meta[http-equiv="refresh"]');
|
|
769
|
-
if (metaRefresh) {
|
|
770
|
-
const match = metaRefresh.getAttribute('content')?.match(/url=(.+)/i);
|
|
771
|
-
if (match)
|
|
772
|
-
addLink(match[1].trim().replace(/['"]/g, ''), 'meta-refresh', 'meta', null);
|
|
773
|
-
}
|
|
774
|
-
document.querySelectorAll('iframe[src]').forEach((iframe) => addLink(iframe.src, 'iframe', 'iframe', iframe));
|
|
775
|
-
return allLinks;
|
|
776
|
-
}, { incHidden: includeHidden }).catch(() => []);
|
|
777
|
-
};
|
|
778
|
-
let links = await doExtract(page);
|
|
779
|
-
if (searchIframes) {
|
|
780
|
-
const frames = page.frames();
|
|
781
|
-
for (let i = 1; i < frames.length && i < 5; i++) {
|
|
782
|
-
try {
|
|
783
|
-
const frame = frames[i];
|
|
784
|
-
if (frame.url() && frame.url() !== 'about:blank') {
|
|
785
|
-
const frameLinks = await doExtract(frame);
|
|
786
|
-
frameLinks.forEach((link) => link.source = `iframe:${link.source}`);
|
|
787
|
-
links = [...links, ...frameLinks];
|
|
788
|
-
}
|
|
789
|
-
}
|
|
790
|
-
catch (e) { }
|
|
791
|
-
}
|
|
792
|
-
}
|
|
793
|
-
if (!includeHidden)
|
|
794
|
-
links = links.filter((link) => !link.hidden);
|
|
795
|
-
const seen = new Set();
|
|
796
|
-
return links.filter((link) => {
|
|
797
|
-
if (seen.has(link.href))
|
|
798
|
-
return false;
|
|
799
|
-
seen.add(link.href);
|
|
800
|
-
return true;
|
|
801
|
-
});
|
|
802
|
-
};
|
|
803
|
-
// Main switch based on type
|
|
804
|
-
switch (type) {
|
|
805
|
-
case 'links': {
|
|
806
|
-
const links = await extractLinks();
|
|
807
|
-
results.extracted = { count: links.length, links };
|
|
808
|
-
(0, state_1.notifyProgress)('extract_data', 'completed', `Links: ${links.length} extracted`);
|
|
809
|
-
break;
|
|
810
|
-
}
|
|
811
|
-
case 'regex': {
|
|
812
|
-
if (!pattern) {
|
|
813
|
-
return { success: false, error: 'Pattern is required for regex extraction' };
|
|
814
|
-
}
|
|
815
|
-
results.extracted = await extractRegex(pattern, flags, source);
|
|
816
|
-
(0, state_1.notifyProgress)('extract_data', 'completed', `Regex: ${results.extracted.matchCount} matches`);
|
|
817
|
-
break;
|
|
818
|
-
}
|
|
819
|
-
case 'json': {
|
|
820
|
-
results.extracted = await extractJson(source, selector, jsonPath);
|
|
821
|
-
results.count = Array.isArray(results.extracted) ? results.extracted.length : 0;
|
|
822
|
-
(0, state_1.notifyProgress)('extract_data', 'completed', `JSON: ${results.count} objects`);
|
|
823
|
-
break;
|
|
824
|
-
}
|
|
825
|
-
case 'meta': {
|
|
826
|
-
results.extracted = await extractMeta(types);
|
|
827
|
-
const tagCount = Object.values(results.extracted).reduce((sum, val) => {
|
|
828
|
-
if (typeof val === 'object' && val !== null) {
|
|
829
|
-
return sum + Object.keys(val).length;
|
|
830
|
-
}
|
|
831
|
-
return sum + (val ? 1 : 0);
|
|
832
|
-
}, 0);
|
|
833
|
-
(0, state_1.notifyProgress)('extract_data', 'completed', `Meta: ${tagCount} tags`);
|
|
834
|
-
break;
|
|
835
|
-
}
|
|
836
|
-
case 'structured': {
|
|
837
|
-
if (!selector) {
|
|
838
|
-
return { success: false, error: 'Selector is required for structured extraction. 💡 AI HINT: Run see_page(annotate: true) first to discover valid selectors or annotation IDs.' };
|
|
839
|
-
}
|
|
840
|
-
results.extracted = await extractStructured(selector, waitForSelector, selectorTimeout);
|
|
841
|
-
if (results.extracted.error) {
|
|
842
|
-
results.success = false;
|
|
843
|
-
results.error = results.extracted.error;
|
|
844
|
-
delete results.extracted;
|
|
845
|
-
}
|
|
846
|
-
(0, state_1.notifyProgress)('extract_data', 'completed', results.success ? 'Structured data extracted' : 'Extraction failed');
|
|
847
|
-
break;
|
|
848
|
-
}
|
|
849
|
-
case 'auto': {
|
|
850
|
-
results.extracted = await extractAuto();
|
|
851
|
-
const summary = [];
|
|
852
|
-
if (results.extracted.meta)
|
|
853
|
-
summary.push('meta');
|
|
854
|
-
if (results.extracted.json?.length)
|
|
855
|
-
summary.push('json');
|
|
856
|
-
if (results.extracted.patterns?.length)
|
|
857
|
-
summary.push('patterns');
|
|
858
|
-
(0, state_1.notifyProgress)('extract_data', 'completed', `Auto: ${summary.join(', ')}`);
|
|
859
|
-
break;
|
|
860
|
-
}
|
|
861
|
-
case 'deobfuscate': {
|
|
862
|
-
(0, state_1.notifyProgress)('extract_data', 'in_progress', 'Deobfuscating JavaScript (enhanced)...');
|
|
863
|
-
const scriptContents = await page.evaluate(() => {
|
|
864
|
-
return Array.from(document.querySelectorAll('script')).map(s => s.textContent).join('\n');
|
|
865
|
-
}).catch(() => '');
|
|
866
|
-
const externalScripts = await page.evaluate(() => {
|
|
867
|
-
return Array.from(document.querySelectorAll('script[src]')).map(s => s.src);
|
|
868
|
-
}).catch(() => []);
|
|
869
|
-
let allJs = scriptContents;
|
|
870
|
-
for (const src of externalScripts.slice(0, 10)) {
|
|
871
|
-
try {
|
|
872
|
-
const resp = await fetch(src);
|
|
873
|
-
allJs += '\n' + await resp.text();
|
|
874
|
-
}
|
|
875
|
-
catch (e) { }
|
|
876
|
-
}
|
|
877
|
-
const deobfuscated = {
|
|
878
|
-
stringArrays: [], decodedStrings: [], functionMappings: [],
|
|
879
|
-
apiEndpoints: [], urls: [], fetchCalls: [],
|
|
880
|
-
webpackModules: [], evalUnpacked: [], resolvedConcats: [], unicodeDecoded: []
|
|
881
|
-
};
|
|
882
|
-
// 1. Original _0x style string arrays
|
|
883
|
-
const arrayPattern = /(?:const|var|let)\s+(_0x[a-f0-9]+)\s*=\s*\[([^\]]{20,})\]/g;
|
|
884
|
-
let match;
|
|
885
|
-
while ((match = arrayPattern.exec(allJs)) !== null) {
|
|
886
|
-
const varName = match[1];
|
|
887
|
-
try {
|
|
888
|
-
const items = match[2].match(/'([^']*)'|"([^"]*)"/g) || [];
|
|
889
|
-
const decoded = items.map(s => s.replace(/^['"]|['"]$/g, ''));
|
|
890
|
-
deobfuscated.stringArrays.push({ variable: varName, count: decoded.length, strings: decoded });
|
|
891
|
-
deobfuscated.decodedStrings.push(...decoded);
|
|
892
|
-
}
|
|
893
|
-
catch (e) { }
|
|
894
|
-
}
|
|
895
|
-
// 2. Hex-encoded strings
|
|
896
|
-
const hexStrings = [...new Set((allJs.match(/(?:'(?:\\x[0-9a-f]{2})+[^']*'|"(?:\\x[0-9a-f]{2})+[^"]*")/gi) || []))];
|
|
897
|
-
for (const hs of hexStrings.slice(0, 50)) {
|
|
898
|
-
try {
|
|
899
|
-
const decoded = hs.slice(1, -1).replace(/\\x([0-9a-f]{2})/gi, (_, h) => String.fromCharCode(parseInt(h, 16)));
|
|
900
|
-
if (decoded.length > 2)
|
|
901
|
-
deobfuscated.decodedStrings.push(decoded);
|
|
902
|
-
}
|
|
903
|
-
catch (e) { }
|
|
904
|
-
}
|
|
905
|
-
// 3. NEW: Unicode escape sequences (\u0066\u0065\u0074\u0063\u0068 → fetch)
|
|
906
|
-
const unicodePattern = /(?:'(?:\\u[0-9a-f]{4})+[^']*'|"(?:\\u[0-9a-f]{4})+[^"]*")/gi;
|
|
907
|
-
const unicodeMatches = allJs.match(unicodePattern) || [];
|
|
908
|
-
for (const um of unicodeMatches.slice(0, 50)) {
|
|
909
|
-
try {
|
|
910
|
-
const decoded = um.slice(1, -1).replace(/\\u([0-9a-f]{4})/gi, (_, h) => String.fromCharCode(parseInt(h, 16)));
|
|
911
|
-
if (decoded.length > 1) {
|
|
912
|
-
deobfuscated.unicodeDecoded.push(decoded);
|
|
913
|
-
deobfuscated.decodedStrings.push(decoded);
|
|
914
|
-
}
|
|
915
|
-
}
|
|
916
|
-
catch (e) { }
|
|
917
|
-
}
|
|
918
|
-
// 4. NEW: Eval unpacker — eval(function(p,a,c,k,e,d){...})
|
|
919
|
-
const evalPattern = /eval\s*\(\s*function\s*\(\s*p\s*,\s*a\s*,\s*c\s*,\s*k\s*,\s*e\s*,?\s*[dr]?\s*\)\s*\{[^}]*\}\s*\(\s*'([^']*)'(?:\s*,\s*(\d+)){2}\s*,\s*'([^']*)'/g;
|
|
920
|
-
let evalMatch;
|
|
921
|
-
while ((evalMatch = evalPattern.exec(allJs)) !== null) {
|
|
922
|
-
try {
|
|
923
|
-
const p = evalMatch[1], a = parseInt(evalMatch[2]) || 62;
|
|
924
|
-
const keywords = evalMatch[3].split('|');
|
|
925
|
-
const unpacked = p.replace(/\b\w+\b/g, w => {
|
|
926
|
-
const n = parseInt(w, a);
|
|
927
|
-
return (n < keywords.length && keywords[n]) ? keywords[n] : w;
|
|
928
|
-
});
|
|
929
|
-
deobfuscated.evalUnpacked.push(unpacked.substring(0, 3000));
|
|
930
|
-
// Extract strings from unpacked code
|
|
931
|
-
const unpackedStrings = unpacked.match(/['"]([^'"]{3,})['"]/g) || [];
|
|
932
|
-
for (const s of unpackedStrings.slice(0, 100)) {
|
|
933
|
-
deobfuscated.decodedStrings.push(s.replace(/^['"]|['"]$/g, ''));
|
|
934
|
-
}
|
|
935
|
-
}
|
|
936
|
-
catch (e) { }
|
|
937
|
-
}
|
|
938
|
-
// Also handle simpler eval patterns
|
|
939
|
-
const simpleEval = /eval\s*\(\s*['"]([^'"]{10,})['"]\s*\)/g;
|
|
940
|
-
let seMatch;
|
|
941
|
-
while ((seMatch = simpleEval.exec(allJs)) !== null) {
|
|
942
|
-
deobfuscated.evalUnpacked.push(seMatch[1].substring(0, 2000));
|
|
943
|
-
}
|
|
944
|
-
// 5. NEW: Webpack module detection
|
|
945
|
-
const webpackPatterns = [
|
|
946
|
-
/(?:__webpack_require__|__webpack_modules__)\s*\[\s*['"]?(\w+)['"]?\s*\]/g,
|
|
947
|
-
/(?:const|var|let)\s+\w+\s*=\s*\{[\s\S]{0,50}__webpack_require__/g,
|
|
948
|
-
/\(\s*function\s*\(\s*modules\s*\)\s*\{[\s\S]{0,200}__webpack_require__/g
|
|
949
|
-
];
|
|
950
|
-
const webpackExports = allJs.match(/(?:module\.exports|exports\.\w+)\s*=\s*['"]([^'"]+)['"]/g) || [];
|
|
951
|
-
for (const exp of webpackExports.slice(0, 30)) {
|
|
952
|
-
const val = exp.match(/=\s*['"]([^'"]+)['"]/);
|
|
953
|
-
if (val) {
|
|
954
|
-
deobfuscated.webpackModules.push(val[1]);
|
|
955
|
-
deobfuscated.decodedStrings.push(val[1]);
|
|
956
|
-
}
|
|
957
|
-
}
|
|
958
|
-
// Detect webpack chunk loading and module IDs
|
|
959
|
-
const chunkIds = allJs.match(/webpackChunk\w*\.push\s*\(\s*\[\s*\[([^\]]+)\]/g) || [];
|
|
960
|
-
for (const ci of chunkIds.slice(0, 10)) {
|
|
961
|
-
deobfuscated.webpackModules.push(`chunk: ${ci.substring(0, 100)}`);
|
|
962
|
-
}
|
|
963
|
-
// 6. NEW: Terser/UglifyJS single-letter variable mappings
|
|
964
|
-
const terserPattern = /(?:var|let|const)\s+([a-z])\s*=\s*['"]([^'"]{2,})['"]/gi;
|
|
965
|
-
let terserMatch;
|
|
966
|
-
const terserMappings = {};
|
|
967
|
-
while ((terserMatch = terserPattern.exec(allJs)) !== null) {
|
|
968
|
-
const varName = terserMatch[1], value = terserMatch[2];
|
|
969
|
-
if (value.length > 2 && value.length < 200) {
|
|
970
|
-
terserMappings[varName] = value;
|
|
971
|
-
deobfuscated.functionMappings.push({ variable: varName, value: value });
|
|
972
|
-
deobfuscated.decodedStrings.push(value);
|
|
973
|
-
}
|
|
974
|
-
}
|
|
975
|
-
// 7. NEW: String concatenation resolution ("htt"+"ps://" → "https://")
|
|
976
|
-
const concatPattern = /(?:['"][^'"]*['"]\s*\+\s*){2,}['"][^'"]*['"]/g;
|
|
977
|
-
const concatMatches = allJs.match(concatPattern) || [];
|
|
978
|
-
for (const cm of concatMatches.slice(0, 50)) {
|
|
979
|
-
try {
|
|
980
|
-
const parts = cm.match(/['"]([^'"]*)['"]|(['"])/g) || [];
|
|
981
|
-
const resolved = parts.map(p => p.replace(/^['"]|['"]$/g, '')).join('');
|
|
982
|
-
if (resolved.length > 3) {
|
|
983
|
-
deobfuscated.resolvedConcats.push(resolved);
|
|
984
|
-
deobfuscated.decodedStrings.push(resolved);
|
|
985
|
-
}
|
|
986
|
-
}
|
|
987
|
-
catch (e) { }
|
|
988
|
-
}
|
|
989
|
-
// 8. NEW: Array rotation detection — function with push/shift on array
|
|
990
|
-
const rotationPattern = /function\s+\w*\s*\(\s*(_0x[a-f0-9]+)\s*,\s*\w+\s*\)\s*\{[\s\S]{0,500}push\s*\(\s*\1\s*\.\s*shift\s*\(\s*\)\s*\)/g;
|
|
991
|
-
const rotations = allJs.match(rotationPattern) || [];
|
|
992
|
-
if (rotations.length > 0) {
|
|
993
|
-
deobfuscated.functionMappings.push({ type: 'array_rotation', count: rotations.length, note: 'Array rotation functions detected — strings may be shifted' });
|
|
994
|
-
}
|
|
995
|
-
// Extract URLs and API endpoints from all decoded strings
|
|
996
|
-
deobfuscated.urls = [...new Set(deobfuscated.decodedStrings.filter((s) => s.match(/^(https?:\/\/|\/)/) || s.match(/\.(php|json|api|asp|jsp)$/i)))].slice(0, 50);
|
|
997
|
-
deobfuscated.apiEndpoints = [...new Set(deobfuscated.decodedStrings.filter((s) => s.match(/^\/[a-z]/i) && s.length > 3 && s.length < 100))].slice(0, 30);
|
|
998
|
-
// Find fetch patterns
|
|
999
|
-
const fetchPatterns = allJs.match(/fetch\s*\(\s*['"]([^'"]+)['"]/g) || [];
|
|
1000
|
-
deobfuscated.fetchCalls = fetchPatterns.map(f => f.replace(/fetch\s*\(\s*['"]/, '').replace(/['"]$/, '')).slice(0, 20);
|
|
1001
|
-
deobfuscated.decodedStrings = [...new Set(deobfuscated.decodedStrings)].slice(0, 500);
|
|
1002
|
-
results.extracted = deobfuscated;
|
|
1003
|
-
const summary = `${deobfuscated.stringArrays.length} arrays, ${deobfuscated.decodedStrings.length} strings, ${deobfuscated.evalUnpacked.length} eval unpacked, ${deobfuscated.webpackModules.length} webpack modules, ${deobfuscated.resolvedConcats.length} concats resolved, ${deobfuscated.unicodeDecoded.length} unicode decoded`;
|
|
1004
|
-
(0, state_1.notifyProgress)('extract_data', 'completed', `Deobfuscated(enhanced): ${summary}`);
|
|
1005
|
-
break;
|
|
1006
|
-
}
|
|
1007
|
-
case 'apiDiscovery': {
|
|
1008
|
-
(0, state_1.notifyProgress)('extract_data', 'in_progress', 'Discovering hidden API endpoints...');
|
|
1009
|
-
const apiResults = {
|
|
1010
|
-
fetchEndpoints: [], xhrEndpoints: [], formActions: [],
|
|
1011
|
-
scriptSources: [], inlineApiPatterns: [], postBodies: [], dynamicApis: []
|
|
1012
|
-
};
|
|
1013
|
-
// 1. Intercept runtime fetch/XHR
|
|
1014
|
-
try {
|
|
1015
|
-
const runtimeApis = await page.evaluate(() => {
|
|
1016
|
-
return new Promise((resolve) => {
|
|
1017
|
-
const found = [];
|
|
1018
|
-
if (window.__capturedApis) {
|
|
1019
|
-
resolve(window.__capturedApis);
|
|
1020
|
-
return;
|
|
1021
|
-
}
|
|
1022
|
-
const origFetch = window.fetch;
|
|
1023
|
-
window.fetch = function (...args) {
|
|
1024
|
-
try {
|
|
1025
|
-
const url = typeof args[0] === 'string' ? args[0] : args[0]?.url;
|
|
1026
|
-
const opts = args[1] || {};
|
|
1027
|
-
found.push({
|
|
1028
|
-
type: 'fetch', url, method: opts.method || 'GET',
|
|
1029
|
-
body: typeof opts.body === 'string' ? opts.body.substring(0, 500) : null
|
|
1030
|
-
});
|
|
1031
|
-
}
|
|
1032
|
-
catch (e) { }
|
|
1033
|
-
return origFetch.apply(this, args);
|
|
1034
|
-
};
|
|
1035
|
-
const origOpen = XMLHttpRequest.prototype.open;
|
|
1036
|
-
const origSend = XMLHttpRequest.prototype.send;
|
|
1037
|
-
XMLHttpRequest.prototype.open = function (method, url, ...rest) { this.__apiUrl = url; this.__apiMethod = method; return origOpen.apply(this, [method, url, ...rest]); };
|
|
1038
|
-
XMLHttpRequest.prototype.send = function (body) {
|
|
1039
|
-
found.push({
|
|
1040
|
-
type: 'xhr', url: this.__apiUrl, method: this.__apiMethod,
|
|
1041
|
-
body: typeof body === 'string' ? body.substring(0, 500) : null
|
|
1042
|
-
});
|
|
1043
|
-
return origSend.apply(this, [body]);
|
|
1044
|
-
};
|
|
1045
|
-
window.__capturedApis = found;
|
|
1046
|
-
setTimeout(() => resolve(found), 3000);
|
|
1047
|
-
});
|
|
1048
|
-
});
|
|
1049
|
-
apiResults.dynamicApis = runtimeApis;
|
|
1050
|
-
}
|
|
1051
|
-
catch (e) {
|
|
1052
|
-
apiResults.dynamicApis = [];
|
|
1053
|
-
}
|
|
1054
|
-
// 2. Static analysis
|
|
1055
|
-
const allScriptContent = await page.evaluate(() => {
|
|
1056
|
-
return Array.from(document.querySelectorAll('script')).map(s => s.textContent).join('\n');
|
|
1057
|
-
}).catch(() => '');
|
|
1058
|
-
const fetchRegex = /fetch\s*\(\s*(?:['"`]([^'"`]+)['"`]|([a-zA-Z_$][a-zA-Z0-9_$]*))/g;
|
|
1059
|
-
let fMatch;
|
|
1060
|
-
while ((fMatch = fetchRegex.exec(allScriptContent)) !== null) {
|
|
1061
|
-
apiResults.fetchEndpoints.push((fMatch[1] || fMatch[2]));
|
|
1062
|
-
}
|
|
1063
|
-
apiResults.fetchEndpoints = [...new Set(apiResults.fetchEndpoints)].slice(0, 30);
|
|
1064
|
-
const xhrRegex = /\.open\s*\(\s*['"](?:GET|POST|PUT|DELETE)['"]\s*,\s*['"`]([^'"`]+)['"`]/gi;
|
|
1065
|
-
let xMatch;
|
|
1066
|
-
while ((xMatch = xhrRegex.exec(allScriptContent)) !== null) {
|
|
1067
|
-
apiResults.xhrEndpoints.push(xMatch[1]);
|
|
1068
|
-
}
|
|
1069
|
-
apiResults.xhrEndpoints = [...new Set(apiResults.xhrEndpoints)].slice(0, 30);
|
|
1070
|
-
apiResults.formActions = await page.evaluate(() => {
|
|
1071
|
-
return Array.from(document.querySelectorAll('form[action]')).map((f) => ({ action: f.action, method: f.method || 'GET', id: f.id || null }));
|
|
1072
|
-
}).catch(() => []);
|
|
1073
|
-
const postBodyPatterns = allScriptContent.match(/(?:URLSearchParams|FormData|JSON\.stringify)\s*\(\s*\{[^}]{5,200}\}/g) || [];
|
|
1074
|
-
apiResults.postBodies = postBodyPatterns.slice(0, 10);
|
|
1075
|
-
const apiUrlPattern = /['"`]((?:https?:\/\/[^'"`]+|\/)(?:[a-zA-Z0-9_\-\/]+\.(?:php|json|api|asp|aspx|do|action))[^'"`]*)['"`]/g;
|
|
1076
|
-
let apiMatch;
|
|
1077
|
-
while ((apiMatch = apiUrlPattern.exec(allScriptContent)) !== null) {
|
|
1078
|
-
apiResults.inlineApiPatterns.push(apiMatch[1]);
|
|
1079
|
-
}
|
|
1080
|
-
apiResults.inlineApiPatterns = [...new Set(apiResults.inlineApiPatterns)].slice(0, 30);
|
|
1081
|
-
apiResults.scriptSources = await page.evaluate(() => {
|
|
1082
|
-
return Array.from(document.querySelectorAll('script[src]')).map(s => s.src);
|
|
1083
|
-
}).catch(() => []);
|
|
1084
|
-
results.extracted = apiResults;
|
|
1085
|
-
const totalFound = apiResults.fetchEndpoints.length + apiResults.xhrEndpoints.length +
|
|
1086
|
-
apiResults.inlineApiPatterns.length + apiResults.dynamicApis.length;
|
|
1087
|
-
(0, state_1.notifyProgress)('extract_data', 'completed', `API Discovery: ${totalFound} endpoints found`);
|
|
1088
|
-
break;
|
|
1089
|
-
}
|
|
1090
|
-
// ====== FEATURE 4: Response Auto-Decryption ======
|
|
1091
|
-
case 'decrypt': {
|
|
1092
|
-
(0, state_1.notifyProgress)('extract_data', 'in_progress', 'Auto-decrypting data...');
|
|
1093
|
-
const { encryptedData, autoFindKey = true } = params;
|
|
1094
|
-
const decryptResults = {
|
|
1095
|
-
original: null, decoded: [], detectedEncoding: [], extractedKeys: [], aesDecrypted: null
|
|
1096
|
-
};
|
|
1097
|
-
// Get data to decrypt — from param or from page
|
|
1098
|
-
let dataToDecrypt = encryptedData;
|
|
1099
|
-
if (!dataToDecrypt) {
|
|
1100
|
-
// Try to get from clipboard or last API response
|
|
1101
|
-
const lastApiResponse = state_1.state.networkRecords.filter(r => r.responseBody).pop();
|
|
1102
|
-
if (lastApiResponse)
|
|
1103
|
-
dataToDecrypt = lastApiResponse.responseBody;
|
|
1104
|
-
}
|
|
1105
|
-
if (!dataToDecrypt) {
|
|
1106
|
-
return { success: false, error: 'No data to decrypt. Provide encryptedData parameter or start network_recorder first.' };
|
|
1107
|
-
}
|
|
1108
|
-
decryptResults.original = dataToDecrypt.substring(0, 500);
|
|
1109
|
-
// 1. Base64 chain decode (recursive, up to 5 levels)
|
|
1110
|
-
let b64Data = dataToDecrypt.trim();
|
|
1111
|
-
for (let level = 0; level < 5; level++) {
|
|
1112
|
-
if (!/^[A-Za-z0-9+/=]+$/.test(b64Data) || b64Data.length < 4)
|
|
1113
|
-
break;
|
|
1114
|
-
try {
|
|
1115
|
-
const decoded = Buffer.from(b64Data, 'base64').toString('utf-8');
|
|
1116
|
-
if (decoded && decoded.length > 0 && !/[\x00-\x08\x0e-\x1f]/.test(decoded.substring(0, 100))) {
|
|
1117
|
-
decryptResults.decoded.push({ level: level + 1, type: 'base64', value: decoded.substring(0, 5000) });
|
|
1118
|
-
decryptResults.detectedEncoding.push('base64');
|
|
1119
|
-
// Check if result is JSON
|
|
1120
|
-
try {
|
|
1121
|
-
const json = JSON.parse(decoded);
|
|
1122
|
-
decryptResults.decoded.push({ level: level + 1, type: 'base64_json', value: json });
|
|
1123
|
-
}
|
|
1124
|
-
catch (e) { }
|
|
1125
|
-
b64Data = decoded; // Continue chain
|
|
1126
|
-
}
|
|
1127
|
-
else
|
|
1128
|
-
break;
|
|
1129
|
-
}
|
|
1130
|
-
catch (e) {
|
|
1131
|
-
break;
|
|
1132
|
-
}
|
|
1133
|
-
}
|
|
1134
|
-
// 2. Hex decode
|
|
1135
|
-
const hexClean = dataToDecrypt.replace(/\s+/g, '');
|
|
1136
|
-
if (/^[0-9a-f]+$/i.test(hexClean) && hexClean.length >= 6 && hexClean.length % 2 === 0) {
|
|
1137
|
-
try {
|
|
1138
|
-
const hexDecoded = Buffer.from(hexClean, 'hex').toString('utf-8');
|
|
1139
|
-
if (hexDecoded && !/[\x00-\x08\x0e-\x1f]/.test(hexDecoded.substring(0, 50))) {
|
|
1140
|
-
decryptResults.decoded.push({ type: 'hex', value: hexDecoded.substring(0, 5000) });
|
|
1141
|
-
decryptResults.detectedEncoding.push('hex');
|
|
1142
|
-
}
|
|
1143
|
-
}
|
|
1144
|
-
catch (e) { }
|
|
1145
|
-
}
|
|
1146
|
-
// 3. URL decode (multi-level)
|
|
1147
|
-
if (dataToDecrypt.includes('%')) {
|
|
1148
|
-
try {
|
|
1149
|
-
let urlDecoded = decodeURIComponent(dataToDecrypt);
|
|
1150
|
-
decryptResults.decoded.push({ type: 'url', value: urlDecoded.substring(0, 5000) });
|
|
1151
|
-
decryptResults.detectedEncoding.push('url');
|
|
1152
|
-
// Double URL decode
|
|
1153
|
-
if (urlDecoded.includes('%')) {
|
|
1154
|
-
urlDecoded = decodeURIComponent(urlDecoded);
|
|
1155
|
-
decryptResults.decoded.push({ type: 'url_double', value: urlDecoded.substring(0, 5000) });
|
|
1156
|
-
}
|
|
1157
|
-
}
|
|
1158
|
-
catch (e) { }
|
|
1159
|
-
}
|
|
1160
|
-
// 4. ROT13
|
|
1161
|
-
try {
|
|
1162
|
-
const rot13 = dataToDecrypt.replace(/[a-zA-Z]/g, (c) => {
|
|
1163
|
-
const base = c <= 'Z' ? 65 : 97;
|
|
1164
|
-
return String.fromCharCode(((c.charCodeAt(0) - base + 13) % 26) + base);
|
|
1165
|
-
});
|
|
1166
|
-
if (rot13 !== dataToDecrypt && (rot13.includes('http') || rot13.includes('www') || rot13.includes('.com'))) {
|
|
1167
|
-
decryptResults.decoded.push({ type: 'rot13', value: rot13.substring(0, 5000) });
|
|
1168
|
-
decryptResults.detectedEncoding.push('rot13');
|
|
1169
|
-
}
|
|
1170
|
-
}
|
|
1171
|
-
catch (e) { }
|
|
1172
|
-
// 5. Auto-extract encryption keys from page scripts
|
|
1173
|
-
if (autoFindKey) {
|
|
1174
|
-
try {
|
|
1175
|
-
const keys = await page.evaluate(() => {
|
|
1176
|
-
const scripts = Array.from(document.querySelectorAll('script')).map(s => s.textContent).join('\n');
|
|
1177
|
-
const found = [];
|
|
1178
|
-
// CryptoJS patterns
|
|
1179
|
-
const cryptoPatterns = [
|
|
1180
|
-
/CryptoJS\.AES\.decrypt\s*\(\s*\w+\s*,\s*['"]([^'"]+)['"]/g,
|
|
1181
|
-
/CryptoJS\.AES\.encrypt\s*\(\s*\w+\s*,\s*['"]([^'"]+)['"]/g,
|
|
1182
|
-
/CryptoJS\.enc\.Utf8\.parse\s*\(\s*['"]([^'"]+)['"]/g,
|
|
1183
|
-
/(?:secret|key|pass|password|iv|salt)\s*[:=]\s*['"]([^'"]{8,})['"]/gi,
|
|
1184
|
-
/aes(?:Key|_key|Secret)\s*[:=]\s*['"]([^'"]{8,})['"]/gi
|
|
1185
|
-
];
|
|
1186
|
-
for (const pat of cryptoPatterns) {
|
|
1187
|
-
let m;
|
|
1188
|
-
while ((m = pat.exec(scripts)) !== null) {
|
|
1189
|
-
found.push({ pattern: pat.source.substring(0, 50), key: m[1] });
|
|
1190
|
-
}
|
|
1191
|
-
}
|
|
1192
|
-
return found;
|
|
1193
|
-
});
|
|
1194
|
-
decryptResults.extractedKeys = keys.slice(0, 20);
|
|
1195
|
-
}
|
|
1196
|
-
catch (e) { }
|
|
1197
|
-
}
|
|
1198
|
-
// 6. AES decryption — try with extracted keys or user-provided key
|
|
1199
|
-
const aesKey = params.aesKey || (decryptResults.extractedKeys[0]?.key);
|
|
1200
|
-
if (aesKey && dataToDecrypt.length > 10) {
|
|
1201
|
-
try {
|
|
1202
|
-
const crypto = require('crypto');
|
|
1203
|
-
// Try AES-256-CBC
|
|
1204
|
-
for (const keyEncoding of ['utf8', 'hex', 'base64']) {
|
|
1205
|
-
try {
|
|
1206
|
-
let keyBuf;
|
|
1207
|
-
if (keyEncoding === 'utf8')
|
|
1208
|
-
keyBuf = Buffer.alloc(32); // pad to 32 bytes
|
|
1209
|
-
else
|
|
1210
|
-
keyBuf = Buffer.from(aesKey, keyEncoding);
|
|
1211
|
-
if (keyEncoding === 'utf8') {
|
|
1212
|
-
const kb = Buffer.from(aesKey, 'utf8');
|
|
1213
|
-
kb.copy(keyBuf);
|
|
1214
|
-
}
|
|
1215
|
-
// Try to decode the data from base64 first
|
|
1216
|
-
const dataBuf = Buffer.from(dataToDecrypt, 'base64');
|
|
1217
|
-
if (dataBuf.length > 16) {
|
|
1218
|
-
// IV might be first 16 bytes
|
|
1219
|
-
const iv = params.aesIV ? Buffer.from(params.aesIV, keyEncoding) : dataBuf.slice(0, 16);
|
|
1220
|
-
const encrypted = params.aesIV ? dataBuf : dataBuf.slice(16);
|
|
1221
|
-
const decipher = crypto.createDecipheriv('aes-256-cbc', keyBuf, iv);
|
|
1222
|
-
decipher.setAutoPadding(true);
|
|
1223
|
-
let decrypted = decipher.update(encrypted, undefined, 'utf8');
|
|
1224
|
-
decrypted += decipher.final('utf8');
|
|
1225
|
-
if (decrypted && decrypted.length > 0) {
|
|
1226
|
-
decryptResults.aesDecrypted = decrypted.substring(0, 5000);
|
|
1227
|
-
decryptResults.detectedEncoding.push('aes-256-cbc');
|
|
1228
|
-
// Try JSON parse
|
|
1229
|
-
try {
|
|
1230
|
-
decryptResults.aesDecrypted = JSON.parse(decrypted);
|
|
1231
|
-
}
|
|
1232
|
-
catch (e) { }
|
|
1233
|
-
break;
|
|
1234
|
-
}
|
|
1235
|
-
}
|
|
1236
|
-
}
|
|
1237
|
-
catch (e) {
|
|
1238
|
-
continue;
|
|
1239
|
-
}
|
|
1240
|
-
}
|
|
1241
|
-
}
|
|
1242
|
-
catch (e) { }
|
|
1243
|
-
}
|
|
1244
|
-
results.extracted = decryptResults;
|
|
1245
|
-
const decodedCount = decryptResults.decoded.length + (decryptResults.aesDecrypted ? 1 : 0);
|
|
1246
|
-
(0, state_1.notifyProgress)('extract_data', 'completed', `Decrypted: ${decodedCount} decodings, ${decryptResults.extractedKeys.length} keys found, encodings: ${decryptResults.detectedEncoding.join(', ') || 'none'}`);
|
|
1247
|
-
break;
|
|
1248
|
-
}
|
|
1249
|
-
default:
|
|
1250
|
-
return { success: false, error: `Unknown type: ${type}. Supported: regex, json, meta, structured, auto, deobfuscate, apiDiscovery, decrypt` };
|
|
1251
|
-
}
|
|
1252
|
-
return results;
|
|
98
|
+
return (0, network_extractors_1.extractData)(params);
|
|
1253
99
|
},
|
|
1254
100
|
async replay_request(params) {
|
|
1255
101
|
const { page } = (0, state_1.requireBrowser)();
|
|
1256
102
|
const { url, method = 'GET', headers, body } = params;
|
|
1257
103
|
(0, state_1.notifyProgress)('replay_request', 'started', `Replaying ${method} to ${url}`);
|
|
1258
|
-
// ponytail: reuse page.evaluate to fire native fetch, bypasses CORS & uses exact browser context auth/cookies
|
|
1259
104
|
try {
|
|
1260
105
|
const result = await page.evaluate(async ({ u, m, h, b }) => {
|
|
1261
106
|
const res = await fetch(u, { method: m, headers: h, body: b });
|
|
1262
|
-
return {
|
|
1263
|
-
status: res.status,
|
|
1264
|
-
headers: Object.fromEntries(res.headers.entries()),
|
|
1265
|
-
body: await res.text().catch(() => null)
|
|
1266
|
-
};
|
|
107
|
+
return { status: res.status, headers: Object.fromEntries(res.headers.entries()), body: await res.text().catch(() => null) };
|
|
1267
108
|
}, { u: url, m: method, h: headers || {}, b: body });
|
|
1268
109
|
(0, state_1.notifyProgress)('replay_request', 'completed', `Replay finished with status ${result.status}`);
|
|
1269
110
|
return { success: true, result };
|