real-browser-mcp-server 1.5.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/README.md +46 -41
  2. package/dist/lib/cjs/index.d.ts +14 -1
  3. package/dist/lib/cjs/index.d.ts.map +1 -1
  4. package/dist/lib/cjs/index.js +5 -3
  5. package/dist/lib/cjs/index.js.map +1 -1
  6. package/dist/lib/cjs/module/pageController.d.ts +7 -1
  7. package/dist/lib/cjs/module/pageController.d.ts.map +1 -1
  8. package/dist/lib/cjs/module/pageController.js +4 -23
  9. package/dist/lib/cjs/module/pageController.js.map +1 -1
  10. package/dist/lib/cjs/module/turnstile.d.ts +4 -1
  11. package/dist/lib/cjs/module/turnstile.d.ts.map +1 -1
  12. package/dist/lib/cjs/module/turnstile.js +14 -13
  13. package/dist/lib/cjs/module/turnstile.js.map +1 -1
  14. package/dist/lib/esm/index.d.mjs +11 -0
  15. package/dist/lib/esm/index.mjs +4 -0
  16. package/dist/lib/esm/module/pageController.d.mjs +3 -0
  17. package/dist/lib/esm/module/turnstile.d.mjs +3 -0
  18. package/dist/scripts/generate-esm.d.ts +2 -0
  19. package/dist/scripts/generate-esm.d.ts.map +1 -0
  20. package/dist/scripts/generate-esm.js +100 -0
  21. package/dist/scripts/generate-esm.js.map +1 -0
  22. package/dist/src/index.d.ts.map +1 -1
  23. package/dist/src/index.js +0 -1
  24. package/dist/src/index.js.map +1 -1
  25. package/dist/src/mcp/handlers/browser.d.ts +8 -7
  26. package/dist/src/mcp/handlers/browser.d.ts.map +1 -1
  27. package/dist/src/mcp/handlers/browser.js +4 -3
  28. package/dist/src/mcp/handlers/browser.js.map +1 -1
  29. package/dist/src/mcp/handlers/dom.d.ts +19 -18
  30. package/dist/src/mcp/handlers/dom.d.ts.map +1 -1
  31. package/dist/src/mcp/handlers/dom.js +7 -28
  32. package/dist/src/mcp/handlers/dom.js.map +1 -1
  33. package/dist/src/mcp/handlers/extract.js.map +1 -1
  34. package/dist/src/mcp/handlers/handler-utils.d.ts +14 -0
  35. package/dist/src/mcp/handlers/handler-utils.d.ts.map +1 -0
  36. package/dist/src/mcp/handlers/handler-utils.js +42 -0
  37. package/dist/src/mcp/handlers/handler-utils.js.map +1 -0
  38. package/dist/src/mcp/handlers/helpers.d.ts +0 -7
  39. package/dist/src/mcp/handlers/helpers.d.ts.map +1 -1
  40. package/dist/src/mcp/handlers/helpers.js +0 -15
  41. package/dist/src/mcp/handlers/helpers.js.map +1 -1
  42. package/dist/src/mcp/handlers/media-handlers.d.ts +2 -1
  43. package/dist/src/mcp/handlers/media-handlers.d.ts.map +1 -1
  44. package/dist/src/mcp/handlers/media-handlers.js +24 -8
  45. package/dist/src/mcp/handlers/media-handlers.js.map +1 -1
  46. package/dist/src/mcp/handlers/network-extractors.d.ts +2 -0
  47. package/dist/src/mcp/handlers/network-extractors.d.ts.map +1 -0
  48. package/dist/src/mcp/handlers/network-extractors.js +651 -0
  49. package/dist/src/mcp/handlers/network-extractors.js.map +1 -0
  50. package/dist/src/mcp/handlers/network-recorder.d.ts +119 -0
  51. package/dist/src/mcp/handlers/network-recorder.d.ts.map +1 -0
  52. package/dist/src/mcp/handlers/network-recorder.js +337 -0
  53. package/dist/src/mcp/handlers/network-recorder.js.map +1 -0
  54. package/dist/src/mcp/handlers/network.d.ts +30 -118
  55. package/dist/src/mcp/handlers/network.d.ts.map +1 -1
  56. package/dist/src/mcp/handlers/network.js +28 -1187
  57. package/dist/src/mcp/handlers/network.js.map +1 -1
  58. package/dist/src/mcp/handlers/state.d.ts +1 -0
  59. package/dist/src/mcp/handlers/state.d.ts.map +1 -1
  60. package/dist/src/mcp/handlers/state.js +17 -0
  61. package/dist/src/mcp/handlers/state.js.map +1 -1
  62. package/dist/src/mcp/handlers/utility-handlers.d.ts +8 -37
  63. package/dist/src/mcp/handlers/utility-handlers.d.ts.map +1 -1
  64. package/dist/src/mcp/handlers/utility-handlers.js +69 -31
  65. package/dist/src/mcp/handlers/utility-handlers.js.map +1 -1
  66. package/dist/src/mcp/handlers/vision-captcha.d.ts +221 -0
  67. package/dist/src/mcp/handlers/vision-captcha.d.ts.map +1 -0
  68. package/dist/src/mcp/handlers/vision-captcha.js +238 -0
  69. package/dist/src/mcp/handlers/vision-captcha.js.map +1 -0
  70. package/dist/src/mcp/handlers/vision-see-page.d.ts +32 -0
  71. package/dist/src/mcp/handlers/vision-see-page.d.ts.map +1 -0
  72. package/dist/src/mcp/handlers/vision-see-page.js +260 -0
  73. package/dist/src/mcp/handlers/vision-see-page.js.map +1 -0
  74. package/dist/src/mcp/handlers/vision.d.ts +50 -27
  75. package/dist/src/mcp/handlers/vision.d.ts.map +1 -1
  76. package/dist/src/mcp/handlers/vision.js +4 -606
  77. package/dist/src/mcp/handlers/vision.js.map +1 -1
  78. package/dist/src/mcp/index.d.ts.map +1 -1
  79. package/dist/src/mcp/index.js +11 -3
  80. package/dist/src/mcp/index.js.map +1 -1
  81. package/dist/src/shared/cache-manager.d.ts +0 -2
  82. package/dist/src/shared/cache-manager.d.ts.map +1 -1
  83. package/dist/src/shared/cache-manager.js +1 -3
  84. package/dist/src/shared/cache-manager.js.map +1 -1
  85. package/dist/src/shared/lib-core.d.ts +2 -1
  86. package/dist/src/shared/lib-core.d.ts.map +1 -1
  87. package/dist/src/shared/lib-core.js +5 -38
  88. package/dist/src/shared/lib-core.js.map +1 -1
  89. package/dist/src/shared/tools.d.ts.map +1 -1
  90. package/dist/src/shared/tools.js +6 -18
  91. package/dist/src/shared/tools.js.map +1 -1
  92. package/dist/src/types.d.ts +25 -6
  93. package/dist/src/types.d.ts.map +1 -1
  94. package/dist/test/cjs/test.js +29 -33
  95. package/dist/test/cjs/test.js.map +1 -1
  96. package/dist/test/mcp/smoke-test.d.ts.map +1 -1
  97. package/dist/test/mcp/smoke-test.js +9 -3
  98. package/dist/test/mcp/smoke-test.js.map +1 -1
  99. package/dist/test/unit/handler-test.d.ts +3 -0
  100. package/dist/test/unit/handler-test.d.ts.map +1 -0
  101. package/dist/test/unit/handler-test.js +133 -0
  102. package/dist/test/unit/handler-test.js.map +1 -0
  103. package/lib/esm/module/pageController.mjs +4 -22
  104. package/lib/esm/module/turnstile.mjs +16 -13
  105. package/package.json +5 -4
  106. package/typings.d.ts +5 -40
@@ -2,7 +2,8 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.networkHandlers = void 0;
4
4
  const state_1 = require("./state");
5
- // Auto-generated network handlers
5
+ const network_recorder_1 = require("./network-recorder");
6
+ const network_extractors_1 = require("./network-extractors");
6
7
  exports.networkHandlers = {
7
8
  async redirect_tracer(params) {
8
9
  const { page } = (0, state_1.requireBrowser)();
@@ -11,29 +12,19 @@ exports.networkHandlers = {
11
12
  const redirects = [];
12
13
  const jsNavigations = [];
13
14
  let currentUrl = url;
14
- // HTTP redirect handler
15
15
  const responseHandler = (response) => {
16
16
  if ([301, 302, 303, 307, 308].includes(response.status())) {
17
- redirects.push({
18
- url: response.url(),
19
- status: response.status(),
20
- type: 'http',
21
- headers: includeHeaders ? response.headers() : undefined
22
- });
23
- (0, state_1.notifyProgress)('redirect_tracer', 'progress', `HTTP Redirect ${redirects.length}: ${response.status()}`, { status: response.status() });
17
+ if (redirects.length < maxRedirects) {
18
+ redirects.push({ url: response.url(), status: response.status(), type: 'http', headers: includeHeaders ? response.headers() : undefined });
19
+ (0, state_1.notifyProgress)('redirect_tracer', 'progress', `HTTP Redirect ${redirects.length}: ${response.status()}`, { status: response.status() });
20
+ }
24
21
  }
25
22
  };
26
- // JS/Navigation handler for tracking window.location changes
27
23
  const frameNavigatedHandler = (frame) => {
28
24
  if (frame === page.mainFrame()) {
29
25
  const newUrl = frame.url();
30
26
  if (newUrl !== currentUrl && newUrl !== 'about:blank') {
31
- jsNavigations.push({
32
- url: newUrl,
33
- type: 'js_navigation',
34
- fromUrl: currentUrl,
35
- timestamp: Date.now()
36
- });
27
+ jsNavigations.push({ url: newUrl, type: 'js_navigation', fromUrl: currentUrl, timestamp: Date.now() });
37
28
  (0, state_1.notifyProgress)('redirect_tracer', 'progress', `JS Navigation: ${newUrl}`, { type: 'js' });
38
29
  currentUrl = newUrl;
39
30
  }
@@ -43,9 +34,7 @@ exports.networkHandlers = {
43
34
  page.on('framenavigated', frameNavigatedHandler);
44
35
  try {
45
36
  await page.goto(url, { waitUntil: 'networkidle', timeout });
46
- // If followJS is enabled, wait a bit and check for meta refreshes and JS redirects
47
37
  if (followJS) {
48
- // Check for meta refresh tags
49
38
  const metaRefresh = await page.evaluate(() => {
50
39
  const meta = document.querySelector('meta[http-equiv="refresh"]');
51
40
  if (meta) {
@@ -55,14 +44,8 @@ exports.networkHandlers = {
55
44
  }
56
45
  return null;
57
46
  }).catch(() => null);
58
- if (metaRefresh) {
59
- jsNavigations.push({
60
- url: metaRefresh,
61
- type: 'meta_refresh',
62
- fromUrl: page.url()
63
- });
64
- }
65
- // Extract any onclick/href javascript: URLs
47
+ if (metaRefresh)
48
+ jsNavigations.push({ url: metaRefresh, type: 'meta_refresh', fromUrl: page.url() });
66
49
  const jsLinks = await page.evaluate(() => {
67
50
  const links = [];
68
51
  document.querySelectorAll('a[href^="javascript:"], [onclick]').forEach(el => {
@@ -73,9 +56,8 @@ exports.networkHandlers = {
73
56
  if (match)
74
57
  links.push({ url: match[1], type: 'onclick' });
75
58
  }
76
- if (href && href.includes('location')) {
59
+ if (href && href.includes('location'))
77
60
  links.push({ url: href, type: 'javascript_href' });
78
- }
79
61
  });
80
62
  return links;
81
63
  }).catch(() => []);
@@ -87,1183 +69,42 @@ exports.networkHandlers = {
87
69
  }
88
70
  page.off('response', responseHandler);
89
71
  page.off('framenavigated', frameNavigatedHandler);
90
- const allRedirects = [
91
- ...redirects,
92
- ...jsNavigations.filter(nav => nav.url && nav.url.startsWith('http'))
93
- ];
72
+ const allRedirects = [...redirects, ...jsNavigations.filter(nav => nav.url && nav.url.startsWith('http'))];
94
73
  (0, state_1.notifyProgress)('redirect_tracer', 'completed', `Found ${redirects.length} HTTP + ${jsNavigations.length} JS redirects`, { httpRedirects: redirects.length, jsNavigations: jsNavigations.length, finalUrl: page.url() });
95
- return {
96
- success: true,
97
- originalUrl: url,
98
- finalUrl: page.url(),
99
- redirectCount: allRedirects.length,
100
- httpRedirects: redirects,
101
- jsNavigations: jsNavigations,
102
- allRedirects: allRedirects
103
- };
74
+ return { success: true, originalUrl: url, finalUrl: page.url(), redirectCount: allRedirects.length, httpRedirects: redirects, jsNavigations, allRedirects };
104
75
  },
105
76
  async network_recorder(params = {}) {
106
77
  const { page } = (0, state_1.requireBrowser)();
107
- const { action = 'get', filter = {}, captureResponses = false } = params;
78
+ const { action = 'get', filter = {}, captureXhrBody = false } = params;
108
79
  switch (action) {
109
- case 'start':
110
- state_1.state.networkRecords = [];
111
- state_1.state.isRecordingNetwork = true;
112
- // ====== FEATURE 2: Pre-page-load Runtime API Interception ======
113
- // Inject BEFORE any JS runs — catches calls from obfuscated/webpack code
114
- try {
115
- await page.addInitScript(() => {
116
- window.__interceptedApis = [];
117
- window.__wsMessages = [];
118
- // --- Monkey-patch fetch ---
119
- const origFetch = window.fetch;
120
- window.fetch = function (...args) {
121
- try {
122
- const url = typeof args[0] === 'string' ? args[0] : (args[0]?.url || String(args[0]));
123
- const opts = args[1] || {};
124
- const entry = {
125
- type: 'fetch', url, method: opts.method || 'GET',
126
- headers: opts.headers ? JSON.parse(JSON.stringify(opts.headers)) : null,
127
- body: typeof opts.body === 'string' ? opts.body.substring(0, 2000) : null,
128
- timestamp: Date.now()
129
- };
130
- window.__interceptedApis.push(entry);
131
- }
132
- catch (e) { }
133
- return origFetch.apply(this, args);
134
- };
135
- // --- Monkey-patch XMLHttpRequest ---
136
- const origOpen = XMLHttpRequest.prototype.open;
137
- const origSend = XMLHttpRequest.prototype.send;
138
- const origSetHeader = XMLHttpRequest.prototype.setRequestHeader;
139
- XMLHttpRequest.prototype.open = function (method, url, ...rest) {
140
- this.__iUrl = url;
141
- this.__iMethod = method;
142
- this.__iHeaders = {};
143
- return origOpen.apply(this, [method, url, ...rest]);
144
- };
145
- XMLHttpRequest.prototype.setRequestHeader = function (name, value) {
146
- if (this.__iHeaders)
147
- this.__iHeaders[name] = value;
148
- return origSetHeader.apply(this, [name, value]);
149
- };
150
- XMLHttpRequest.prototype.send = function (body) {
151
- try {
152
- window.__interceptedApis.push({
153
- type: 'xhr', url: this.__iUrl, method: this.__iMethod,
154
- headers: this.__iHeaders || null,
155
- body: typeof body === 'string' ? body.substring(0, 2000) : null,
156
- timestamp: Date.now()
157
- });
158
- }
159
- catch (e) { }
160
- return origSend.apply(this, [body]);
161
- };
162
- // --- Monkey-patch navigator.sendBeacon ---
163
- if (navigator.sendBeacon) {
164
- const origBeacon = navigator.sendBeacon.bind(navigator);
165
- navigator.sendBeacon = function (url, data) {
166
- try {
167
- window.__interceptedApis.push({
168
- type: 'beacon', url, method: 'POST',
169
- body: typeof data === 'string' ? data.substring(0, 2000) : null,
170
- timestamp: Date.now()
171
- });
172
- }
173
- catch (e) { }
174
- return origBeacon(url, data);
175
- };
176
- }
177
- // ====== FEATURE 3: WebSocket Recording ======
178
- const OrigWS = window.WebSocket;
179
- window.WebSocket = function (url, protocols) {
180
- const ws = protocols ? new OrigWS(url, protocols) : new OrigWS(url);
181
- const wsId = window.__wsMessages.length;
182
- const wsEntry = { id: wsId, url, openedAt: Date.now(), messages: [], status: 'connecting' };
183
- window.__wsMessages.push(wsEntry);
184
- ws.addEventListener('open', () => { wsEntry.status = 'open'; });
185
- ws.addEventListener('close', (e) => { wsEntry.status = 'closed'; wsEntry.closedAt = Date.now(); wsEntry.closeCode = e.code; });
186
- ws.addEventListener('error', () => { wsEntry.status = 'error'; });
187
- ws.addEventListener('message', (e) => {
188
- try {
189
- let data = e.data;
190
- let dataType = 'text';
191
- if (data instanceof Blob) {
192
- dataType = 'blob';
193
- data = `[Blob ${data.size} bytes]`;
194
- }
195
- else if (data instanceof ArrayBuffer) {
196
- dataType = 'binary';
197
- data = `[ArrayBuffer ${data.byteLength} bytes]`;
198
- }
199
- else if (typeof data === 'string' && data.length > 5000) {
200
- data = data.substring(0, 5000) + '...';
201
- }
202
- wsEntry.messages.push({ direction: 'received', data, dataType, timestamp: Date.now() });
203
- }
204
- catch (e) { }
205
- });
206
- // Intercept send
207
- const origWsSend = ws.send.bind(ws);
208
- ws.send = function (data) {
209
- try {
210
- let sendData = data;
211
- let dataType = 'text';
212
- if (data instanceof Blob) {
213
- dataType = 'blob';
214
- sendData = `[Blob ${data.size} bytes]`;
215
- }
216
- else if (data instanceof ArrayBuffer) {
217
- dataType = 'binary';
218
- sendData = `[ArrayBuffer ${data.byteLength} bytes]`;
219
- }
220
- else if (typeof data === 'string' && data.length > 5000) {
221
- sendData = data.substring(0, 5000) + '...';
222
- }
223
- wsEntry.messages.push({ direction: 'sent', data: sendData, dataType, timestamp: Date.now() });
224
- }
225
- catch (e) { }
226
- return origWsSend(data);
227
- };
228
- return ws;
229
- };
230
- window.WebSocket.prototype = OrigWS.prototype;
231
- window.WebSocket.CONNECTING = OrigWS.CONNECTING;
232
- window.WebSocket.OPEN = OrigWS.OPEN;
233
- window.WebSocket.CLOSING = OrigWS.CLOSING;
234
- window.WebSocket.CLOSED = OrigWS.CLOSED;
235
- });
236
- }
237
- catch (e) { /* addInitScript may fail on already-loaded pages, that's OK */ }
238
- // Request handler
239
- page.on('request', req => {
240
- if (state_1.state.isRecordingNetwork) {
241
- state_1.state.networkRecords.push({
242
- type: 'request',
243
- url: req.url(),
244
- method: req.method(),
245
- resourceType: req.resourceType(),
246
- headers: req.headers(),
247
- timestamp: Date.now()
248
- });
249
- }
250
- });
251
- // Response handler for capturing video/media URLs
252
- page.on('response', async (res) => {
253
- if (state_1.state.isRecordingNetwork) {
254
- const url = res.url();
255
- const contentType = res.headers()['content-type'] || '';
256
- const isMedia = contentType.includes('video') ||
257
- contentType.includes('audio') ||
258
- contentType.includes('mpegurl') ||
259
- url.includes('.m3u8') ||
260
- url.includes('.mpd') ||
261
- url.includes('.mp4') ||
262
- url.includes('.ts');
263
- const isApiCall = contentType.includes('json') ||
264
- contentType.includes('x-www-form-urlencoded') ||
265
- url.match(/\.(php|api|json|do|action)($|\?)/) ||
266
- (res.request().resourceType() === 'xhr') ||
267
- (res.request().resourceType() === 'fetch');
268
- const isBinaryBody = isMedia ||
269
- contentType.includes('octet-stream') ||
270
- contentType.includes('application/pdf') ||
271
- contentType.includes('image/') ||
272
- contentType.includes('font/') ||
273
- contentType.includes('zip') ||
274
- contentType.includes('rar');
275
- const record = {
276
- type: 'response',
277
- url: url,
278
- method: res.request().method(),
279
- status: res.status(),
280
- contentType: contentType,
281
- isMedia: isMedia,
282
- isApiCall: isApiCall,
283
- resourceType: res.request().resourceType(),
284
- timestamp: Date.now()
285
- };
286
- // For media URLs, try to get more details
287
- if (isMedia) {
288
- record.mediaType = url.includes('.m3u8') ? 'hls' :
289
- url.includes('.mpd') ? 'dash' :
290
- url.includes('.mp4') ? 'mp4' : 'other';
291
- }
292
- // Capture request/response body for API calls
293
- if (isApiCall) {
294
- try {
295
- const postData = res.request().postData();
296
- if (postData)
297
- record.requestBody = postData.substring(0, 2000);
298
- if (isBinaryBody) {
299
- record.responseBody = `[binary/media body omitted: ${contentType || 'unknown content-type'}]`;
300
- }
301
- else {
302
- const responseBody = await res.text().catch(() => null);
303
- if (responseBody) {
304
- record.responseBody = responseBody.substring(0, 5000);
305
- record.responseTruncated = responseBody.length > 5000;
306
- try {
307
- record.responseJson = JSON.parse(responseBody);
308
- }
309
- catch (e) { }
310
- }
311
- }
312
- }
313
- catch (e) { }
314
- }
315
- state_1.state.networkRecords.push(record);
316
- }
317
- });
318
- // Frame navigation handler for JS redirects
319
- page.on('framenavigated', frame => {
320
- if (state_1.state.isRecordingNetwork && frame === page.mainFrame()) {
321
- state_1.state.networkRecords.push({
322
- type: 'navigation',
323
- url: frame.url(),
324
- timestamp: Date.now()
325
- });
326
- }
327
- });
328
- (0, state_1.notifyProgress)('network_recorder', 'started', 'Power recording started (requests + responses + API interception + WebSocket + navigations)');
329
- break;
80
+ case 'start': return (0, network_recorder_1.startRecording)(page).then(() => ({ success: true, message: 'Recording started' }));
330
81
  case 'stop':
331
- state_1.state.isRecordingNetwork = false;
332
- (0, state_1.notifyProgress)('network_recorder', 'completed', `Recording stopped: ${state_1.state.networkRecords.length} events captured`);
333
- break;
82
+ (0, network_recorder_1.stopRecording)();
83
+ return { success: true };
334
84
  case 'clear':
335
- state_1.state.networkRecords = [];
336
- // Also clear intercepted data
337
- try {
338
- await page.evaluate(() => { window.__interceptedApis = []; window.__wsMessages = []; });
339
- }
340
- catch (e) { }
341
- (0, state_1.notifyProgress)('network_recorder', 'completed', 'Network records cleared');
342
- break;
343
- case 'get_media':
344
- // Special action to get only media URLs
345
- const mediaRecords = state_1.state.networkRecords.filter(r => r.isMedia);
346
- return {
347
- success: true,
348
- count: mediaRecords.length,
349
- mediaUrls: mediaRecords.map(r => ({ url: r.url, type: r.mediaType }))
350
- };
351
- case 'get_navigations':
352
- // Get only navigation events (for tracking JS redirects)
353
- const navRecords = state_1.state.networkRecords.filter(r => r.type === 'navigation');
354
- return {
355
- success: true,
356
- count: navRecords.length,
357
- navigations: navRecords
358
- };
359
- case 'get_api_calls': {
360
- const apiRecords = state_1.state.networkRecords.filter(r => r.isApiCall);
361
- return {
362
- success: true,
363
- count: apiRecords.length,
364
- apiCalls: apiRecords.map(r => ({
365
- url: r.url, method: r.method || 'GET', status: r.status,
366
- contentType: r.contentType, resourceType: r.resourceType,
367
- requestBody: r.requestBody || null, responseBody: r.responseBody || null,
368
- responseJson: r.responseJson || null, timestamp: r.timestamp
369
- }))
370
- };
371
- }
372
- // ====== FEATURE 2: Get Intercepted APIs (from monkey-patched fetch/XHR/beacon) ======
373
- case 'get_intercepted_apis': {
374
- try {
375
- const intercepted = await page.evaluate(() => window.__interceptedApis || []);
376
- return {
377
- success: true,
378
- count: intercepted.length,
379
- interceptedApis: intercepted,
380
- note: 'These are runtime-intercepted API calls captured via monkey-patched fetch/XHR/sendBeacon (pre-page-load injection)'
381
- };
382
- }
383
- catch (e) {
384
- return { success: false, error: 'Failed to retrieve intercepted APIs: ' + e.message, interceptedApis: [] };
385
- }
386
- }
387
- // ====== FEATURE 3: Get WebSocket Messages ======
388
- case 'get_websockets': {
389
- try {
390
- const wsData = await page.evaluate(() => window.__wsMessages || []);
391
- const totalMessages = wsData.reduce((sum, ws) => sum + ws.messages.length, 0);
392
- return {
393
- success: true,
394
- count: wsData.length,
395
- totalMessages: totalMessages,
396
- websockets: wsData,
397
- note: 'WebSocket connections and messages captured via constructor monkey-patch'
398
- };
399
- }
400
- catch (e) {
401
- return { success: false, error: 'Failed to retrieve WebSocket data: ' + e.message, websockets: [] };
402
- }
403
- }
404
- // ====== FEATURE 4: GraphQL Inspector ======
405
- case 'get_graphql': {
406
- const gqlRecords = state_1.state.networkRecords.filter((r) => r.isApiCall && r.requestBody && (r.requestBody.includes('"query"') || r.requestBody.includes('query '))).map((r) => {
407
- let parsedQuery = null, parsedVariables = null, operationName = null;
408
- try {
409
- const body = JSON.parse(r.requestBody);
410
- parsedQuery = body.query;
411
- parsedVariables = body.variables;
412
- operationName = body.operationName;
413
- }
414
- catch (e) { }
415
- return {
416
- url: r.url,
417
- method: r.method,
418
- operationName: operationName || 'unknown',
419
- query: parsedQuery || r.requestBody,
420
- variables: parsedVariables,
421
- response: r.responseJson || r.responseBody,
422
- timestamp: r.timestamp
423
- };
424
- });
425
- return { success: true, count: gqlRecords.length, graphql: gqlRecords };
426
- }
427
- // ====== FEATURE 5: HAR Exporter ======
428
- case 'export_har': {
429
- // ponytail: minimal HAR 1.2 mapping without dependencies
430
- const har = {
431
- log: {
432
- version: '1.2',
433
- creator: { name: 'Real Browser MCP', version: '1.0' },
434
- entries: state_1.state.networkRecords.filter((r) => r.type === 'response').map((r) => ({
435
- startedDateTime: new Date(r.timestamp).toISOString(),
436
- request: {
437
- method: r.method || 'GET',
438
- url: r.url,
439
- headers: Object.entries(r.headers || {}).map(([name, value]) => ({ name, value: String(value) })),
440
- postData: r.requestBody ? { text: r.requestBody } : undefined
441
- },
442
- response: {
443
- status: r.status || 200,
444
- content: {
445
- mimeType: r.contentType || 'text/plain',
446
- text: r.responseBody || ''
447
- }
448
- },
449
- time: 0
450
- }))
451
- }
452
- };
453
- return { success: true, count: har.log.entries.length, har };
454
- }
455
- }
456
- let records = state_1.state.networkRecords;
457
- if (filter.resourceType) {
458
- records = records.filter(r => r.resourceType === filter.resourceType);
85
+ await (0, network_recorder_1.clearRecording)(page);
86
+ return { success: true };
87
+ case 'get_media': return (0, network_recorder_1.getMediaRecords)();
88
+ case 'get_navigations': return (0, network_recorder_1.getNavigationRecords)();
89
+ case 'get_api_calls': return (0, network_recorder_1.getApiCallRecords)();
90
+ case 'get_intercepted_apis': return await (0, network_recorder_1.getInterceptedApis)(page);
91
+ case 'get_websockets': return await (0, network_recorder_1.getWebSocketRecords)(page);
92
+ case 'get_graphql': return (0, network_recorder_1.getGraphQLRecords)();
93
+ case 'export_har': return (0, network_recorder_1.exportHAR)();
459
94
  }
460
- if (filter.urlPattern) {
461
- const regex = new RegExp(filter.urlPattern);
462
- records = records.filter(r => regex.test(r.url));
463
- }
464
- if (filter.type) {
465
- records = records.filter(r => r.type === filter.type);
466
- }
467
- if (filter.mediaOnly) {
468
- records = records.filter(r => r.isMedia);
469
- }
470
- return { success: true, recording: state_1.state.isRecordingNetwork, count: records.length, records: records.slice(-200) };
95
+ return (0, network_recorder_1.getFilteredRecords)(filter);
471
96
  },
472
97
  async extract_data(params = {}) {
473
- const { page } = (0, state_1.requireBrowser)();
474
- const { type = 'auto', pattern, selector, jsonPath, source = 'all', autoDecode = true, flags = 'gi', types = ['all'], includeTitle = true, includeCanonical = true, maxMatches = 100, maxJsonObjects = 50, waitForSelector = false, selectorTimeout = 10000 } = params;
475
- (0, state_1.notifyProgress)('extract_data', 'started', `Extracting data (type: ${type})...`);
476
- const results = {
477
- success: true,
478
- type,
479
- url: page.url(),
480
- extracted: {}
481
- };
482
- // Helper: Extract regex matches
483
- const extractRegex = async (regexPattern, regexFlags, contentSource) => {
484
- let content;
485
- if (contentSource === 'html') {
486
- content = await page.content();
487
- }
488
- else if (contentSource === 'scripts') {
489
- content = await page.$$eval('script', scripts => scripts.map(s => s.textContent).join('\n'));
490
- }
491
- else if (contentSource === 'text') {
492
- content = await page.evaluate(() => document.body.innerText);
493
- }
494
- else {
495
- // 'all' - search in both HTML and scripts
496
- const html = await page.content();
497
- const scripts = await page.$$eval('script', scripts => scripts.map(s => s.textContent).join('\n'));
498
- content = html + '\n' + scripts;
499
- }
500
- const regex = new RegExp(regexPattern, regexFlags);
501
- const matches = content.match(regex) || [];
502
- return {
503
- pattern: regexPattern,
504
- flags: regexFlags,
505
- matchCount: matches.length,
506
- matches: matches.slice(0, maxMatches)
507
- };
508
- };
509
- // Helper: Extract JSON data
510
- const extractJson = async (jsonSource, sel, path) => {
511
- const jsonData = [];
512
- if (jsonSource === 'ld+json') {
513
- const ldJson = await page.$$eval('script[type="application/ld+json"]', scripts => scripts.map(s => {
514
- try {
515
- return JSON.parse(s.textContent);
516
- }
517
- catch {
518
- return null;
519
- }
520
- }).filter(Boolean));
521
- jsonData.push(...ldJson);
522
- }
523
- else if (jsonSource === 'scripts') {
524
- const content = await page.$$eval('script', scripts => scripts.map(s => s.textContent).join('\n'));
525
- // Look for JSON objects in scripts
526
- const jsonRegex = /\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}|\[[^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*\]/g;
527
- const matches = content.match(jsonRegex) || [];
528
- for (const match of matches.slice(0, maxJsonObjects)) {
529
- try {
530
- const parsed = JSON.parse(match);
531
- jsonData.push(parsed);
532
- }
533
- catch { }
534
- }
535
- }
536
- else if (jsonSource === 'api') {
537
- // Try to find API responses in page data
538
- const apiData = await page.evaluate(() => {
539
- const data = [];
540
- // Look for common API data storage patterns
541
- if (window.__DATA__)
542
- data.push(window.__DATA__);
543
- if (window.__INITIAL_STATE__)
544
- data.push(window.__INITIAL_STATE__);
545
- if (window.__APP_DATA__)
546
- data.push(window.__APP_DATA__);
547
- if (window.data)
548
- data.push(window.data);
549
- if (window.config)
550
- data.push(window.config);
551
- return data;
552
- });
553
- jsonData.push(...apiData);
554
- }
555
- else if (sel) {
556
- try {
557
- const text = await page.$eval(sel, el => el.textContent);
558
- const parsed = JSON.parse(text);
559
- jsonData.push(parsed);
560
- }
561
- catch { }
562
- }
563
- else {
564
- // 'page' - try all sources
565
- const ldJson = await page.$$eval('script[type="application/ld+json"]', scripts => scripts.map(s => {
566
- try {
567
- return JSON.parse(s.textContent);
568
- }
569
- catch {
570
- return null;
571
- }
572
- }).filter(Boolean));
573
- jsonData.push(...ldJson);
574
- }
575
- // Apply JSONPath if specified
576
- if (path && jsonData.length > 0) {
577
- // Simple JSONPath implementation
578
- const getPath = (obj, pathStr) => {
579
- const parts = pathStr.replace(/^\$\./, '').split('.');
580
- let current = obj;
581
- for (const part of parts) {
582
- if (current === null || current === undefined)
583
- return undefined;
584
- if (part.includes('[') && part.includes(']')) {
585
- const arrName = part.substring(0, part.indexOf('['));
586
- const idx = parseInt(part.match(/\[(\d+)\]/)?.[1] || '0');
587
- current = current[arrName]?.[idx];
588
- }
589
- else {
590
- current = current[part];
591
- }
592
- }
593
- return current;
594
- };
595
- return jsonData.map(obj => ({
596
- original: obj,
597
- extracted: getPath(obj, path)
598
- }));
599
- }
600
- return jsonData;
601
- };
602
- // Helper: Extract meta tags
603
- const extractMeta = async (metaTypes) => {
604
- const meta = await page.evaluate(([includeTitle, includeCanonical]) => {
605
- const result = { meta: {}, og: {}, twitter: {} };
606
- document.querySelectorAll('meta').forEach(tag => {
607
- const name = tag.getAttribute('name') || tag.getAttribute('property');
608
- const content = tag.getAttribute('content');
609
- if (name && content) {
610
- if (name.startsWith('og:')) {
611
- result.og[name.replace('og:', '')] = content;
612
- }
613
- else if (name.startsWith('twitter:')) {
614
- result.twitter[name.replace('twitter:', '')] = content;
615
- }
616
- else {
617
- result.meta[name] = content;
618
- }
619
- }
620
- });
621
- if (includeTitle) {
622
- result.title = document.title;
623
- }
624
- if (includeCanonical) {
625
- result.canonical = document.querySelector('link[rel="canonical"]')?.href;
626
- }
627
- return result;
628
- }, [includeTitle, includeCanonical]);
629
- // Filter by requested types
630
- const filtered = {};
631
- if (metaTypes.includes('all')) {
632
- return meta;
633
- }
634
- if (metaTypes.includes('meta'))
635
- filtered.meta = meta.meta;
636
- if (metaTypes.includes('og'))
637
- filtered.og = meta.og;
638
- if (metaTypes.includes('twitter'))
639
- filtered.twitter = meta.twitter;
640
- if (includeTitle)
641
- filtered.title = meta.title;
642
- if (includeCanonical)
643
- filtered.canonical = meta.canonical;
644
- return filtered;
645
- };
646
- // Helper: Extract structured data from selector
647
- const extractStructured = async (sel, wait = false, timeout = 10000) => {
648
- if (wait) {
649
- await page.waitForSelector(sel, { timeout });
650
- }
651
- const element = await page.$(sel);
652
- if (!element) {
653
- return { error: `Element not found: ${sel}` };
654
- }
655
- const data = await element.evaluate(el => ({
656
- tagName: el.tagName,
657
- text: el.innerText,
658
- html: el.innerHTML,
659
- attributes: Object.fromEntries([...el.attributes].map(a => [a.name, a.value])),
660
- childCount: el.children.length,
661
- boundingBox: el.getBoundingClientRect ? {
662
- x: el.getBoundingClientRect().x,
663
- y: el.getBoundingClientRect().y,
664
- width: el.getBoundingClientRect().width,
665
- height: el.getBoundingClientRect().height
666
- } : null
667
- }));
668
- return data;
669
- };
670
- // Helper: Auto-detect and extract all
671
- const extractAuto = async () => {
672
- const autoResults = {
673
- meta: null,
674
- json: null,
675
- structured: null,
676
- patterns: []
677
- };
678
- // Extract meta tags
679
- try {
680
- autoResults.meta = await extractMeta(['all']);
681
- }
682
- catch (e) { }
683
- // Extract JSON-LD
684
- try {
685
- autoResults.json = await extractJson('ld+json');
686
- }
687
- catch (e) { }
688
- // Look for common data patterns
689
- const commonPatterns = [
690
- { name: 'emails', pattern: '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' },
691
- { name: 'phones', pattern: '(\+?1?[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}' },
692
- { name: 'urls', pattern: 'https?://[^\s<>"{}|\\^`\[\]]+' },
693
- { name: 'ipv4', pattern: '\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b' }
694
- ];
695
- const pageText = await page.evaluate(() => document.body.innerText);
696
- for (const { name, pattern } of commonPatterns) {
697
- const regex = new RegExp(pattern, 'gi');
698
- const matches = [...new Set(pageText.match(regex) || [])];
699
- if (matches.length > 0) {
700
- autoResults.patterns.push({ type: name, count: matches.length, samples: matches.slice(0, 10) });
701
- }
702
- }
703
- return autoResults;
704
- };
705
- // Helper: Extract Links (merged from link_harvester)
706
- const extractLinks = async () => {
707
- const { includeHidden = true, searchIframes = false } = params;
708
- const doExtract = async (context) => {
709
- return await context.evaluate(({ incHidden }) => {
710
- const allLinks = [];
711
- const seenUrls = new Set();
712
- const addLink = (href, text, source, element) => {
713
- if (!href || seenUrls.has(href))
714
- return;
715
- if (!href.startsWith('http') && !href.startsWith('//'))
716
- return;
717
- if (href.startsWith('//'))
718
- href = window.location.protocol + href;
719
- seenUrls.add(href);
720
- allLinks.push({
721
- href,
722
- text: (text || '').trim().substring(0, 100),
723
- source,
724
- hidden: element ? (element.offsetParent === null || getComputedStyle(element).display === 'none' || getComputedStyle(element).visibility === 'hidden') : false
725
- });
726
- };
727
- document.querySelectorAll('a[href]').forEach(a => addLink(a.href, a.textContent, 'anchor', a));
728
- const dataAttrs = ['data-href', 'data-url', 'data-link', 'data-src', 'data-file', 'data-download'];
729
- dataAttrs.forEach(attr => document.querySelectorAll(`[${attr}]`).forEach(el => addLink(el.getAttribute(attr), el.textContent, `${attr}`, el)));
730
- if (incHidden) {
731
- document.querySelectorAll('[onclick]').forEach(el => {
732
- const onclick = el.getAttribute('onclick');
733
- if (!onclick)
734
- return;
735
- const urlMatches = onclick.match(/https?:\/\/[^\s"'<>]+/gi) || [];
736
- urlMatches.forEach(url => addLink(url, el.textContent, 'onclick', el));
737
- const hrefMatch = onclick.match(/location\.href\s*=\s*['"]([^'"]+)['"]/);
738
- if (hrefMatch)
739
- addLink(hrefMatch[1], el.textContent, 'onclick-location', el);
740
- const openMatch = onclick.match(/window\.open\s*\(\s*['"]([^'"]+)['"]/);
741
- if (openMatch)
742
- addLink(openMatch[1], el.textContent, 'onclick-window-open', el);
743
- });
744
- const scripts = [...document.querySelectorAll('script')].slice(0, 20);
745
- scripts.forEach(script => {
746
- const content = script.textContent || '';
747
- const patterns = [
748
- /["']?(https?:\/\/[^"'\s<>]+\.(mp4|mkv|avi|m3u8|mpd|zip|rar|pdf))[^"'\s<>]*["']?/gi,
749
- /download[_-]?url\s*[:=]\s*["']([^"']+)["']/gi,
750
- /file\s*[:=]\s*["']([^"']+)["']/gi
751
- ];
752
- patterns.forEach(pattern => {
753
- let match;
754
- while ((match = pattern.exec(content)) !== null)
755
- addLink(match[1], 'script-extracted', 'script', null);
756
- });
757
- });
758
- }
759
- document.querySelectorAll('a[href^="javascript:"]').forEach(a => {
760
- const match = a.getAttribute('href')?.match(/https?:\/\/[^\s"'<>]+/gi);
761
- if (match)
762
- match.forEach(url => addLink(url, a.textContent, 'javascript-href', a));
763
- });
764
- document.querySelectorAll('input[type="hidden"]').forEach((input) => {
765
- if (input.value && (input.value.startsWith('http') || input.value.startsWith('//')))
766
- addLink(input.value, input.name || input.id, 'hidden-input', input);
767
- });
768
- const metaRefresh = document.querySelector('meta[http-equiv="refresh"]');
769
- if (metaRefresh) {
770
- const match = metaRefresh.getAttribute('content')?.match(/url=(.+)/i);
771
- if (match)
772
- addLink(match[1].trim().replace(/['"]/g, ''), 'meta-refresh', 'meta', null);
773
- }
774
- document.querySelectorAll('iframe[src]').forEach((iframe) => addLink(iframe.src, 'iframe', 'iframe', iframe));
775
- return allLinks;
776
- }, { incHidden: includeHidden }).catch(() => []);
777
- };
778
- let links = await doExtract(page);
779
- if (searchIframes) {
780
- const frames = page.frames();
781
- for (let i = 1; i < frames.length && i < 5; i++) {
782
- try {
783
- const frame = frames[i];
784
- if (frame.url() && frame.url() !== 'about:blank') {
785
- const frameLinks = await doExtract(frame);
786
- frameLinks.forEach((link) => link.source = `iframe:${link.source}`);
787
- links = [...links, ...frameLinks];
788
- }
789
- }
790
- catch (e) { }
791
- }
792
- }
793
- if (!includeHidden)
794
- links = links.filter((link) => !link.hidden);
795
- const seen = new Set();
796
- return links.filter((link) => {
797
- if (seen.has(link.href))
798
- return false;
799
- seen.add(link.href);
800
- return true;
801
- });
802
- };
803
- // Main switch based on type
804
- switch (type) {
805
- case 'links': {
806
- const links = await extractLinks();
807
- results.extracted = { count: links.length, links };
808
- (0, state_1.notifyProgress)('extract_data', 'completed', `Links: ${links.length} extracted`);
809
- break;
810
- }
811
- case 'regex': {
812
- if (!pattern) {
813
- return { success: false, error: 'Pattern is required for regex extraction' };
814
- }
815
- results.extracted = await extractRegex(pattern, flags, source);
816
- (0, state_1.notifyProgress)('extract_data', 'completed', `Regex: ${results.extracted.matchCount} matches`);
817
- break;
818
- }
819
- case 'json': {
820
- results.extracted = await extractJson(source, selector, jsonPath);
821
- results.count = Array.isArray(results.extracted) ? results.extracted.length : 0;
822
- (0, state_1.notifyProgress)('extract_data', 'completed', `JSON: ${results.count} objects`);
823
- break;
824
- }
825
- case 'meta': {
826
- results.extracted = await extractMeta(types);
827
- const tagCount = Object.values(results.extracted).reduce((sum, val) => {
828
- if (typeof val === 'object' && val !== null) {
829
- return sum + Object.keys(val).length;
830
- }
831
- return sum + (val ? 1 : 0);
832
- }, 0);
833
- (0, state_1.notifyProgress)('extract_data', 'completed', `Meta: ${tagCount} tags`);
834
- break;
835
- }
836
- case 'structured': {
837
- if (!selector) {
838
- return { success: false, error: 'Selector is required for structured extraction. 💡 AI HINT: Run see_page(annotate: true) first to discover valid selectors or annotation IDs.' };
839
- }
840
- results.extracted = await extractStructured(selector, waitForSelector, selectorTimeout);
841
- if (results.extracted.error) {
842
- results.success = false;
843
- results.error = results.extracted.error;
844
- delete results.extracted;
845
- }
846
- (0, state_1.notifyProgress)('extract_data', 'completed', results.success ? 'Structured data extracted' : 'Extraction failed');
847
- break;
848
- }
849
- case 'auto': {
850
- results.extracted = await extractAuto();
851
- const summary = [];
852
- if (results.extracted.meta)
853
- summary.push('meta');
854
- if (results.extracted.json?.length)
855
- summary.push('json');
856
- if (results.extracted.patterns?.length)
857
- summary.push('patterns');
858
- (0, state_1.notifyProgress)('extract_data', 'completed', `Auto: ${summary.join(', ')}`);
859
- break;
860
- }
861
- case 'deobfuscate': {
862
- (0, state_1.notifyProgress)('extract_data', 'in_progress', 'Deobfuscating JavaScript (enhanced)...');
863
- const scriptContents = await page.evaluate(() => {
864
- return Array.from(document.querySelectorAll('script')).map(s => s.textContent).join('\n');
865
- }).catch(() => '');
866
- const externalScripts = await page.evaluate(() => {
867
- return Array.from(document.querySelectorAll('script[src]')).map(s => s.src);
868
- }).catch(() => []);
869
- let allJs = scriptContents;
870
- for (const src of externalScripts.slice(0, 10)) {
871
- try {
872
- const resp = await fetch(src);
873
- allJs += '\n' + await resp.text();
874
- }
875
- catch (e) { }
876
- }
877
- const deobfuscated = {
878
- stringArrays: [], decodedStrings: [], functionMappings: [],
879
- apiEndpoints: [], urls: [], fetchCalls: [],
880
- webpackModules: [], evalUnpacked: [], resolvedConcats: [], unicodeDecoded: []
881
- };
882
- // 1. Original _0x style string arrays
883
- const arrayPattern = /(?:const|var|let)\s+(_0x[a-f0-9]+)\s*=\s*\[([^\]]{20,})\]/g;
884
- let match;
885
- while ((match = arrayPattern.exec(allJs)) !== null) {
886
- const varName = match[1];
887
- try {
888
- const items = match[2].match(/'([^']*)'|"([^"]*)"/g) || [];
889
- const decoded = items.map(s => s.replace(/^['"]|['"]$/g, ''));
890
- deobfuscated.stringArrays.push({ variable: varName, count: decoded.length, strings: decoded });
891
- deobfuscated.decodedStrings.push(...decoded);
892
- }
893
- catch (e) { }
894
- }
895
- // 2. Hex-encoded strings
896
- const hexStrings = [...new Set((allJs.match(/(?:'(?:\\x[0-9a-f]{2})+[^']*'|"(?:\\x[0-9a-f]{2})+[^"]*")/gi) || []))];
897
- for (const hs of hexStrings.slice(0, 50)) {
898
- try {
899
- const decoded = hs.slice(1, -1).replace(/\\x([0-9a-f]{2})/gi, (_, h) => String.fromCharCode(parseInt(h, 16)));
900
- if (decoded.length > 2)
901
- deobfuscated.decodedStrings.push(decoded);
902
- }
903
- catch (e) { }
904
- }
905
- // 3. NEW: Unicode escape sequences (\u0066\u0065\u0074\u0063\u0068 → fetch)
906
- const unicodePattern = /(?:'(?:\\u[0-9a-f]{4})+[^']*'|"(?:\\u[0-9a-f]{4})+[^"]*")/gi;
907
- const unicodeMatches = allJs.match(unicodePattern) || [];
908
- for (const um of unicodeMatches.slice(0, 50)) {
909
- try {
910
- const decoded = um.slice(1, -1).replace(/\\u([0-9a-f]{4})/gi, (_, h) => String.fromCharCode(parseInt(h, 16)));
911
- if (decoded.length > 1) {
912
- deobfuscated.unicodeDecoded.push(decoded);
913
- deobfuscated.decodedStrings.push(decoded);
914
- }
915
- }
916
- catch (e) { }
917
- }
918
- // 4. NEW: Eval unpacker — eval(function(p,a,c,k,e,d){...})
919
- const evalPattern = /eval\s*\(\s*function\s*\(\s*p\s*,\s*a\s*,\s*c\s*,\s*k\s*,\s*e\s*,?\s*[dr]?\s*\)\s*\{[^}]*\}\s*\(\s*'([^']*)'(?:\s*,\s*(\d+)){2}\s*,\s*'([^']*)'/g;
920
- let evalMatch;
921
- while ((evalMatch = evalPattern.exec(allJs)) !== null) {
922
- try {
923
- const p = evalMatch[1], a = parseInt(evalMatch[2]) || 62;
924
- const keywords = evalMatch[3].split('|');
925
- const unpacked = p.replace(/\b\w+\b/g, w => {
926
- const n = parseInt(w, a);
927
- return (n < keywords.length && keywords[n]) ? keywords[n] : w;
928
- });
929
- deobfuscated.evalUnpacked.push(unpacked.substring(0, 3000));
930
- // Extract strings from unpacked code
931
- const unpackedStrings = unpacked.match(/['"]([^'"]{3,})['"]/g) || [];
932
- for (const s of unpackedStrings.slice(0, 100)) {
933
- deobfuscated.decodedStrings.push(s.replace(/^['"]|['"]$/g, ''));
934
- }
935
- }
936
- catch (e) { }
937
- }
938
- // Also handle simpler eval patterns
939
- const simpleEval = /eval\s*\(\s*['"]([^'"]{10,})['"]\s*\)/g;
940
- let seMatch;
941
- while ((seMatch = simpleEval.exec(allJs)) !== null) {
942
- deobfuscated.evalUnpacked.push(seMatch[1].substring(0, 2000));
943
- }
944
- // 5. NEW: Webpack module detection
945
- const webpackPatterns = [
946
- /(?:__webpack_require__|__webpack_modules__)\s*\[\s*['"]?(\w+)['"]?\s*\]/g,
947
- /(?:const|var|let)\s+\w+\s*=\s*\{[\s\S]{0,50}__webpack_require__/g,
948
- /\(\s*function\s*\(\s*modules\s*\)\s*\{[\s\S]{0,200}__webpack_require__/g
949
- ];
950
- const webpackExports = allJs.match(/(?:module\.exports|exports\.\w+)\s*=\s*['"]([^'"]+)['"]/g) || [];
951
- for (const exp of webpackExports.slice(0, 30)) {
952
- const val = exp.match(/=\s*['"]([^'"]+)['"]/);
953
- if (val) {
954
- deobfuscated.webpackModules.push(val[1]);
955
- deobfuscated.decodedStrings.push(val[1]);
956
- }
957
- }
958
- // Detect webpack chunk loading and module IDs
959
- const chunkIds = allJs.match(/webpackChunk\w*\.push\s*\(\s*\[\s*\[([^\]]+)\]/g) || [];
960
- for (const ci of chunkIds.slice(0, 10)) {
961
- deobfuscated.webpackModules.push(`chunk: ${ci.substring(0, 100)}`);
962
- }
963
- // 6. NEW: Terser/UglifyJS single-letter variable mappings
964
- const terserPattern = /(?:var|let|const)\s+([a-z])\s*=\s*['"]([^'"]{2,})['"]/gi;
965
- let terserMatch;
966
- const terserMappings = {};
967
- while ((terserMatch = terserPattern.exec(allJs)) !== null) {
968
- const varName = terserMatch[1], value = terserMatch[2];
969
- if (value.length > 2 && value.length < 200) {
970
- terserMappings[varName] = value;
971
- deobfuscated.functionMappings.push({ variable: varName, value: value });
972
- deobfuscated.decodedStrings.push(value);
973
- }
974
- }
975
- // 7. NEW: String concatenation resolution ("htt"+"ps://" → "https://")
976
- const concatPattern = /(?:['"][^'"]*['"]\s*\+\s*){2,}['"][^'"]*['"]/g;
977
- const concatMatches = allJs.match(concatPattern) || [];
978
- for (const cm of concatMatches.slice(0, 50)) {
979
- try {
980
- const parts = cm.match(/['"]([^'"]*)['"]|(['"])/g) || [];
981
- const resolved = parts.map(p => p.replace(/^['"]|['"]$/g, '')).join('');
982
- if (resolved.length > 3) {
983
- deobfuscated.resolvedConcats.push(resolved);
984
- deobfuscated.decodedStrings.push(resolved);
985
- }
986
- }
987
- catch (e) { }
988
- }
989
- // 8. NEW: Array rotation detection — function with push/shift on array
990
- const rotationPattern = /function\s+\w*\s*\(\s*(_0x[a-f0-9]+)\s*,\s*\w+\s*\)\s*\{[\s\S]{0,500}push\s*\(\s*\1\s*\.\s*shift\s*\(\s*\)\s*\)/g;
991
- const rotations = allJs.match(rotationPattern) || [];
992
- if (rotations.length > 0) {
993
- deobfuscated.functionMappings.push({ type: 'array_rotation', count: rotations.length, note: 'Array rotation functions detected — strings may be shifted' });
994
- }
995
- // Extract URLs and API endpoints from all decoded strings
996
- deobfuscated.urls = [...new Set(deobfuscated.decodedStrings.filter((s) => s.match(/^(https?:\/\/|\/)/) || s.match(/\.(php|json|api|asp|jsp)$/i)))].slice(0, 50);
997
- deobfuscated.apiEndpoints = [...new Set(deobfuscated.decodedStrings.filter((s) => s.match(/^\/[a-z]/i) && s.length > 3 && s.length < 100))].slice(0, 30);
998
- // Find fetch patterns
999
- const fetchPatterns = allJs.match(/fetch\s*\(\s*['"]([^'"]+)['"]/g) || [];
1000
- deobfuscated.fetchCalls = fetchPatterns.map(f => f.replace(/fetch\s*\(\s*['"]/, '').replace(/['"]$/, '')).slice(0, 20);
1001
- deobfuscated.decodedStrings = [...new Set(deobfuscated.decodedStrings)].slice(0, 500);
1002
- results.extracted = deobfuscated;
1003
- const summary = `${deobfuscated.stringArrays.length} arrays, ${deobfuscated.decodedStrings.length} strings, ${deobfuscated.evalUnpacked.length} eval unpacked, ${deobfuscated.webpackModules.length} webpack modules, ${deobfuscated.resolvedConcats.length} concats resolved, ${deobfuscated.unicodeDecoded.length} unicode decoded`;
1004
- (0, state_1.notifyProgress)('extract_data', 'completed', `Deobfuscated(enhanced): ${summary}`);
1005
- break;
1006
- }
1007
- case 'apiDiscovery': {
1008
- (0, state_1.notifyProgress)('extract_data', 'in_progress', 'Discovering hidden API endpoints...');
1009
- const apiResults = {
1010
- fetchEndpoints: [], xhrEndpoints: [], formActions: [],
1011
- scriptSources: [], inlineApiPatterns: [], postBodies: [], dynamicApis: []
1012
- };
1013
- // 1. Intercept runtime fetch/XHR
1014
- try {
1015
- const runtimeApis = await page.evaluate(() => {
1016
- return new Promise((resolve) => {
1017
- const found = [];
1018
- if (window.__capturedApis) {
1019
- resolve(window.__capturedApis);
1020
- return;
1021
- }
1022
- const origFetch = window.fetch;
1023
- window.fetch = function (...args) {
1024
- try {
1025
- const url = typeof args[0] === 'string' ? args[0] : args[0]?.url;
1026
- const opts = args[1] || {};
1027
- found.push({
1028
- type: 'fetch', url, method: opts.method || 'GET',
1029
- body: typeof opts.body === 'string' ? opts.body.substring(0, 500) : null
1030
- });
1031
- }
1032
- catch (e) { }
1033
- return origFetch.apply(this, args);
1034
- };
1035
- const origOpen = XMLHttpRequest.prototype.open;
1036
- const origSend = XMLHttpRequest.prototype.send;
1037
- XMLHttpRequest.prototype.open = function (method, url, ...rest) { this.__apiUrl = url; this.__apiMethod = method; return origOpen.apply(this, [method, url, ...rest]); };
1038
- XMLHttpRequest.prototype.send = function (body) {
1039
- found.push({
1040
- type: 'xhr', url: this.__apiUrl, method: this.__apiMethod,
1041
- body: typeof body === 'string' ? body.substring(0, 500) : null
1042
- });
1043
- return origSend.apply(this, [body]);
1044
- };
1045
- window.__capturedApis = found;
1046
- setTimeout(() => resolve(found), 3000);
1047
- });
1048
- });
1049
- apiResults.dynamicApis = runtimeApis;
1050
- }
1051
- catch (e) {
1052
- apiResults.dynamicApis = [];
1053
- }
1054
- // 2. Static analysis
1055
- const allScriptContent = await page.evaluate(() => {
1056
- return Array.from(document.querySelectorAll('script')).map(s => s.textContent).join('\n');
1057
- }).catch(() => '');
1058
- const fetchRegex = /fetch\s*\(\s*(?:['"`]([^'"`]+)['"`]|([a-zA-Z_$][a-zA-Z0-9_$]*))/g;
1059
- let fMatch;
1060
- while ((fMatch = fetchRegex.exec(allScriptContent)) !== null) {
1061
- apiResults.fetchEndpoints.push((fMatch[1] || fMatch[2]));
1062
- }
1063
- apiResults.fetchEndpoints = [...new Set(apiResults.fetchEndpoints)].slice(0, 30);
1064
- const xhrRegex = /\.open\s*\(\s*['"](?:GET|POST|PUT|DELETE)['"]\s*,\s*['"`]([^'"`]+)['"`]/gi;
1065
- let xMatch;
1066
- while ((xMatch = xhrRegex.exec(allScriptContent)) !== null) {
1067
- apiResults.xhrEndpoints.push(xMatch[1]);
1068
- }
1069
- apiResults.xhrEndpoints = [...new Set(apiResults.xhrEndpoints)].slice(0, 30);
1070
- apiResults.formActions = await page.evaluate(() => {
1071
- return Array.from(document.querySelectorAll('form[action]')).map((f) => ({ action: f.action, method: f.method || 'GET', id: f.id || null }));
1072
- }).catch(() => []);
1073
- const postBodyPatterns = allScriptContent.match(/(?:URLSearchParams|FormData|JSON\.stringify)\s*\(\s*\{[^}]{5,200}\}/g) || [];
1074
- apiResults.postBodies = postBodyPatterns.slice(0, 10);
1075
- const apiUrlPattern = /['"`]((?:https?:\/\/[^'"`]+|\/)(?:[a-zA-Z0-9_\-\/]+\.(?:php|json|api|asp|aspx|do|action))[^'"`]*)['"`]/g;
1076
- let apiMatch;
1077
- while ((apiMatch = apiUrlPattern.exec(allScriptContent)) !== null) {
1078
- apiResults.inlineApiPatterns.push(apiMatch[1]);
1079
- }
1080
- apiResults.inlineApiPatterns = [...new Set(apiResults.inlineApiPatterns)].slice(0, 30);
1081
- apiResults.scriptSources = await page.evaluate(() => {
1082
- return Array.from(document.querySelectorAll('script[src]')).map(s => s.src);
1083
- }).catch(() => []);
1084
- results.extracted = apiResults;
1085
- const totalFound = apiResults.fetchEndpoints.length + apiResults.xhrEndpoints.length +
1086
- apiResults.inlineApiPatterns.length + apiResults.dynamicApis.length;
1087
- (0, state_1.notifyProgress)('extract_data', 'completed', `API Discovery: ${totalFound} endpoints found`);
1088
- break;
1089
- }
1090
- // ====== FEATURE 4: Response Auto-Decryption ======
1091
- case 'decrypt': {
1092
- (0, state_1.notifyProgress)('extract_data', 'in_progress', 'Auto-decrypting data...');
1093
- const { encryptedData, autoFindKey = true } = params;
1094
- const decryptResults = {
1095
- original: null, decoded: [], detectedEncoding: [], extractedKeys: [], aesDecrypted: null
1096
- };
1097
- // Get data to decrypt — from param or from page
1098
- let dataToDecrypt = encryptedData;
1099
- if (!dataToDecrypt) {
1100
- // Try to get from clipboard or last API response
1101
- const lastApiResponse = state_1.state.networkRecords.filter(r => r.responseBody).pop();
1102
- if (lastApiResponse)
1103
- dataToDecrypt = lastApiResponse.responseBody;
1104
- }
1105
- if (!dataToDecrypt) {
1106
- return { success: false, error: 'No data to decrypt. Provide encryptedData parameter or start network_recorder first.' };
1107
- }
1108
- decryptResults.original = dataToDecrypt.substring(0, 500);
1109
- // 1. Base64 chain decode (recursive, up to 5 levels)
1110
- let b64Data = dataToDecrypt.trim();
1111
- for (let level = 0; level < 5; level++) {
1112
- if (!/^[A-Za-z0-9+/=]+$/.test(b64Data) || b64Data.length < 4)
1113
- break;
1114
- try {
1115
- const decoded = Buffer.from(b64Data, 'base64').toString('utf-8');
1116
- if (decoded && decoded.length > 0 && !/[\x00-\x08\x0e-\x1f]/.test(decoded.substring(0, 100))) {
1117
- decryptResults.decoded.push({ level: level + 1, type: 'base64', value: decoded.substring(0, 5000) });
1118
- decryptResults.detectedEncoding.push('base64');
1119
- // Check if result is JSON
1120
- try {
1121
- const json = JSON.parse(decoded);
1122
- decryptResults.decoded.push({ level: level + 1, type: 'base64_json', value: json });
1123
- }
1124
- catch (e) { }
1125
- b64Data = decoded; // Continue chain
1126
- }
1127
- else
1128
- break;
1129
- }
1130
- catch (e) {
1131
- break;
1132
- }
1133
- }
1134
- // 2. Hex decode
1135
- const hexClean = dataToDecrypt.replace(/\s+/g, '');
1136
- if (/^[0-9a-f]+$/i.test(hexClean) && hexClean.length >= 6 && hexClean.length % 2 === 0) {
1137
- try {
1138
- const hexDecoded = Buffer.from(hexClean, 'hex').toString('utf-8');
1139
- if (hexDecoded && !/[\x00-\x08\x0e-\x1f]/.test(hexDecoded.substring(0, 50))) {
1140
- decryptResults.decoded.push({ type: 'hex', value: hexDecoded.substring(0, 5000) });
1141
- decryptResults.detectedEncoding.push('hex');
1142
- }
1143
- }
1144
- catch (e) { }
1145
- }
1146
- // 3. URL decode (multi-level)
1147
- if (dataToDecrypt.includes('%')) {
1148
- try {
1149
- let urlDecoded = decodeURIComponent(dataToDecrypt);
1150
- decryptResults.decoded.push({ type: 'url', value: urlDecoded.substring(0, 5000) });
1151
- decryptResults.detectedEncoding.push('url');
1152
- // Double URL decode
1153
- if (urlDecoded.includes('%')) {
1154
- urlDecoded = decodeURIComponent(urlDecoded);
1155
- decryptResults.decoded.push({ type: 'url_double', value: urlDecoded.substring(0, 5000) });
1156
- }
1157
- }
1158
- catch (e) { }
1159
- }
1160
- // 4. ROT13
1161
- try {
1162
- const rot13 = dataToDecrypt.replace(/[a-zA-Z]/g, (c) => {
1163
- const base = c <= 'Z' ? 65 : 97;
1164
- return String.fromCharCode(((c.charCodeAt(0) - base + 13) % 26) + base);
1165
- });
1166
- if (rot13 !== dataToDecrypt && (rot13.includes('http') || rot13.includes('www') || rot13.includes('.com'))) {
1167
- decryptResults.decoded.push({ type: 'rot13', value: rot13.substring(0, 5000) });
1168
- decryptResults.detectedEncoding.push('rot13');
1169
- }
1170
- }
1171
- catch (e) { }
1172
- // 5. Auto-extract encryption keys from page scripts
1173
- if (autoFindKey) {
1174
- try {
1175
- const keys = await page.evaluate(() => {
1176
- const scripts = Array.from(document.querySelectorAll('script')).map(s => s.textContent).join('\n');
1177
- const found = [];
1178
- // CryptoJS patterns
1179
- const cryptoPatterns = [
1180
- /CryptoJS\.AES\.decrypt\s*\(\s*\w+\s*,\s*['"]([^'"]+)['"]/g,
1181
- /CryptoJS\.AES\.encrypt\s*\(\s*\w+\s*,\s*['"]([^'"]+)['"]/g,
1182
- /CryptoJS\.enc\.Utf8\.parse\s*\(\s*['"]([^'"]+)['"]/g,
1183
- /(?:secret|key|pass|password|iv|salt)\s*[:=]\s*['"]([^'"]{8,})['"]/gi,
1184
- /aes(?:Key|_key|Secret)\s*[:=]\s*['"]([^'"]{8,})['"]/gi
1185
- ];
1186
- for (const pat of cryptoPatterns) {
1187
- let m;
1188
- while ((m = pat.exec(scripts)) !== null) {
1189
- found.push({ pattern: pat.source.substring(0, 50), key: m[1] });
1190
- }
1191
- }
1192
- return found;
1193
- });
1194
- decryptResults.extractedKeys = keys.slice(0, 20);
1195
- }
1196
- catch (e) { }
1197
- }
1198
- // 6. AES decryption — try with extracted keys or user-provided key
1199
- const aesKey = params.aesKey || (decryptResults.extractedKeys[0]?.key);
1200
- if (aesKey && dataToDecrypt.length > 10) {
1201
- try {
1202
- const crypto = require('crypto');
1203
- // Try AES-256-CBC
1204
- for (const keyEncoding of ['utf8', 'hex', 'base64']) {
1205
- try {
1206
- let keyBuf;
1207
- if (keyEncoding === 'utf8')
1208
- keyBuf = Buffer.alloc(32); // pad to 32 bytes
1209
- else
1210
- keyBuf = Buffer.from(aesKey, keyEncoding);
1211
- if (keyEncoding === 'utf8') {
1212
- const kb = Buffer.from(aesKey, 'utf8');
1213
- kb.copy(keyBuf);
1214
- }
1215
- // Try to decode the data from base64 first
1216
- const dataBuf = Buffer.from(dataToDecrypt, 'base64');
1217
- if (dataBuf.length > 16) {
1218
- // IV might be first 16 bytes
1219
- const iv = params.aesIV ? Buffer.from(params.aesIV, keyEncoding) : dataBuf.slice(0, 16);
1220
- const encrypted = params.aesIV ? dataBuf : dataBuf.slice(16);
1221
- const decipher = crypto.createDecipheriv('aes-256-cbc', keyBuf, iv);
1222
- decipher.setAutoPadding(true);
1223
- let decrypted = decipher.update(encrypted, undefined, 'utf8');
1224
- decrypted += decipher.final('utf8');
1225
- if (decrypted && decrypted.length > 0) {
1226
- decryptResults.aesDecrypted = decrypted.substring(0, 5000);
1227
- decryptResults.detectedEncoding.push('aes-256-cbc');
1228
- // Try JSON parse
1229
- try {
1230
- decryptResults.aesDecrypted = JSON.parse(decrypted);
1231
- }
1232
- catch (e) { }
1233
- break;
1234
- }
1235
- }
1236
- }
1237
- catch (e) {
1238
- continue;
1239
- }
1240
- }
1241
- }
1242
- catch (e) { }
1243
- }
1244
- results.extracted = decryptResults;
1245
- const decodedCount = decryptResults.decoded.length + (decryptResults.aesDecrypted ? 1 : 0);
1246
- (0, state_1.notifyProgress)('extract_data', 'completed', `Decrypted: ${decodedCount} decodings, ${decryptResults.extractedKeys.length} keys found, encodings: ${decryptResults.detectedEncoding.join(', ') || 'none'}`);
1247
- break;
1248
- }
1249
- default:
1250
- return { success: false, error: `Unknown type: ${type}. Supported: regex, json, meta, structured, auto, deobfuscate, apiDiscovery, decrypt` };
1251
- }
1252
- return results;
98
+ return (0, network_extractors_1.extractData)(params);
1253
99
  },
1254
100
  async replay_request(params) {
1255
101
  const { page } = (0, state_1.requireBrowser)();
1256
102
  const { url, method = 'GET', headers, body } = params;
1257
103
  (0, state_1.notifyProgress)('replay_request', 'started', `Replaying ${method} to ${url}`);
1258
- // ponytail: reuse page.evaluate to fire native fetch, bypasses CORS & uses exact browser context auth/cookies
1259
104
  try {
1260
105
  const result = await page.evaluate(async ({ u, m, h, b }) => {
1261
106
  const res = await fetch(u, { method: m, headers: h, body: b });
1262
- return {
1263
- status: res.status,
1264
- headers: Object.fromEntries(res.headers.entries()),
1265
- body: await res.text().catch(() => null)
1266
- };
107
+ return { status: res.status, headers: Object.fromEntries(res.headers.entries()), body: await res.text().catch(() => null) };
1267
108
  }, { u: url, m: method, h: headers || {}, b: body });
1268
109
  (0, state_1.notifyProgress)('replay_request', 'completed', `Replay finished with status ${result.status}`);
1269
110
  return { success: true, result };