real-browser-mcp-server 1.5.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/README.md +46 -41
  2. package/dist/lib/cjs/index.d.ts +14 -1
  3. package/dist/lib/cjs/index.d.ts.map +1 -1
  4. package/dist/lib/cjs/index.js +5 -3
  5. package/dist/lib/cjs/index.js.map +1 -1
  6. package/dist/lib/cjs/module/pageController.d.ts +7 -1
  7. package/dist/lib/cjs/module/pageController.d.ts.map +1 -1
  8. package/dist/lib/cjs/module/pageController.js +4 -23
  9. package/dist/lib/cjs/module/pageController.js.map +1 -1
  10. package/dist/lib/cjs/module/turnstile.d.ts +4 -1
  11. package/dist/lib/cjs/module/turnstile.d.ts.map +1 -1
  12. package/dist/lib/cjs/module/turnstile.js +14 -13
  13. package/dist/lib/cjs/module/turnstile.js.map +1 -1
  14. package/dist/lib/esm/index.d.mjs +11 -0
  15. package/dist/lib/esm/index.mjs +4 -0
  16. package/dist/lib/esm/module/pageController.d.mjs +3 -0
  17. package/dist/lib/esm/module/turnstile.d.mjs +3 -0
  18. package/dist/scripts/generate-esm.d.ts +2 -0
  19. package/dist/scripts/generate-esm.d.ts.map +1 -0
  20. package/dist/scripts/generate-esm.js +100 -0
  21. package/dist/scripts/generate-esm.js.map +1 -0
  22. package/dist/src/index.d.ts.map +1 -1
  23. package/dist/src/index.js +0 -1
  24. package/dist/src/index.js.map +1 -1
  25. package/dist/src/mcp/handlers/browser.d.ts +8 -7
  26. package/dist/src/mcp/handlers/browser.d.ts.map +1 -1
  27. package/dist/src/mcp/handlers/browser.js +4 -3
  28. package/dist/src/mcp/handlers/browser.js.map +1 -1
  29. package/dist/src/mcp/handlers/dom.d.ts +19 -18
  30. package/dist/src/mcp/handlers/dom.d.ts.map +1 -1
  31. package/dist/src/mcp/handlers/dom.js +7 -28
  32. package/dist/src/mcp/handlers/dom.js.map +1 -1
  33. package/dist/src/mcp/handlers/extract.js.map +1 -1
  34. package/dist/src/mcp/handlers/handler-utils.d.ts +14 -0
  35. package/dist/src/mcp/handlers/handler-utils.d.ts.map +1 -0
  36. package/dist/src/mcp/handlers/handler-utils.js +42 -0
  37. package/dist/src/mcp/handlers/handler-utils.js.map +1 -0
  38. package/dist/src/mcp/handlers/helpers.d.ts +0 -7
  39. package/dist/src/mcp/handlers/helpers.d.ts.map +1 -1
  40. package/dist/src/mcp/handlers/helpers.js +0 -15
  41. package/dist/src/mcp/handlers/helpers.js.map +1 -1
  42. package/dist/src/mcp/handlers/media-handlers.d.ts +2 -1
  43. package/dist/src/mcp/handlers/media-handlers.d.ts.map +1 -1
  44. package/dist/src/mcp/handlers/media-handlers.js +24 -8
  45. package/dist/src/mcp/handlers/media-handlers.js.map +1 -1
  46. package/dist/src/mcp/handlers/network-extractors.d.ts +2 -0
  47. package/dist/src/mcp/handlers/network-extractors.d.ts.map +1 -0
  48. package/dist/src/mcp/handlers/network-extractors.js +651 -0
  49. package/dist/src/mcp/handlers/network-extractors.js.map +1 -0
  50. package/dist/src/mcp/handlers/network-recorder.d.ts +119 -0
  51. package/dist/src/mcp/handlers/network-recorder.d.ts.map +1 -0
  52. package/dist/src/mcp/handlers/network-recorder.js +337 -0
  53. package/dist/src/mcp/handlers/network-recorder.js.map +1 -0
  54. package/dist/src/mcp/handlers/network.d.ts +30 -118
  55. package/dist/src/mcp/handlers/network.d.ts.map +1 -1
  56. package/dist/src/mcp/handlers/network.js +28 -1187
  57. package/dist/src/mcp/handlers/network.js.map +1 -1
  58. package/dist/src/mcp/handlers/state.d.ts +1 -0
  59. package/dist/src/mcp/handlers/state.d.ts.map +1 -1
  60. package/dist/src/mcp/handlers/state.js +17 -0
  61. package/dist/src/mcp/handlers/state.js.map +1 -1
  62. package/dist/src/mcp/handlers/utility-handlers.d.ts +8 -37
  63. package/dist/src/mcp/handlers/utility-handlers.d.ts.map +1 -1
  64. package/dist/src/mcp/handlers/utility-handlers.js +69 -31
  65. package/dist/src/mcp/handlers/utility-handlers.js.map +1 -1
  66. package/dist/src/mcp/handlers/vision-captcha.d.ts +221 -0
  67. package/dist/src/mcp/handlers/vision-captcha.d.ts.map +1 -0
  68. package/dist/src/mcp/handlers/vision-captcha.js +238 -0
  69. package/dist/src/mcp/handlers/vision-captcha.js.map +1 -0
  70. package/dist/src/mcp/handlers/vision-see-page.d.ts +32 -0
  71. package/dist/src/mcp/handlers/vision-see-page.d.ts.map +1 -0
  72. package/dist/src/mcp/handlers/vision-see-page.js +260 -0
  73. package/dist/src/mcp/handlers/vision-see-page.js.map +1 -0
  74. package/dist/src/mcp/handlers/vision.d.ts +50 -27
  75. package/dist/src/mcp/handlers/vision.d.ts.map +1 -1
  76. package/dist/src/mcp/handlers/vision.js +4 -606
  77. package/dist/src/mcp/handlers/vision.js.map +1 -1
  78. package/dist/src/mcp/index.d.ts.map +1 -1
  79. package/dist/src/mcp/index.js +11 -3
  80. package/dist/src/mcp/index.js.map +1 -1
  81. package/dist/src/shared/cache-manager.d.ts +0 -2
  82. package/dist/src/shared/cache-manager.d.ts.map +1 -1
  83. package/dist/src/shared/cache-manager.js +1 -3
  84. package/dist/src/shared/cache-manager.js.map +1 -1
  85. package/dist/src/shared/lib-core.d.ts +2 -1
  86. package/dist/src/shared/lib-core.d.ts.map +1 -1
  87. package/dist/src/shared/lib-core.js +5 -38
  88. package/dist/src/shared/lib-core.js.map +1 -1
  89. package/dist/src/shared/tools.d.ts.map +1 -1
  90. package/dist/src/shared/tools.js +6 -18
  91. package/dist/src/shared/tools.js.map +1 -1
  92. package/dist/src/types.d.ts +25 -6
  93. package/dist/src/types.d.ts.map +1 -1
  94. package/dist/test/cjs/test.js +29 -33
  95. package/dist/test/cjs/test.js.map +1 -1
  96. package/dist/test/mcp/smoke-test.d.ts.map +1 -1
  97. package/dist/test/mcp/smoke-test.js +9 -3
  98. package/dist/test/mcp/smoke-test.js.map +1 -1
  99. package/dist/test/unit/handler-test.d.ts +3 -0
  100. package/dist/test/unit/handler-test.d.ts.map +1 -0
  101. package/dist/test/unit/handler-test.js +133 -0
  102. package/dist/test/unit/handler-test.js.map +1 -0
  103. package/lib/esm/module/pageController.mjs +4 -22
  104. package/lib/esm/module/turnstile.mjs +16 -13
  105. package/package.json +5 -4
  106. package/typings.d.ts +5 -40
@@ -0,0 +1,651 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.extractData = extractData;
4
+ const state_1 = require("./state");
5
+ async function extractData(params = {}) {
6
+ const { page } = (0, state_1.requireBrowser)();
7
+ const { type = 'auto', pattern, selector, jsonPath, source = 'all', autoDecode = true, flags = 'gi', types = ['all'], includeTitle = true, includeCanonical = true, maxMatches = 100, maxJsonObjects = 50, waitForSelector = false, selectorTimeout = 10000 } = params;
8
+ (0, state_1.notifyProgress)('extract_data', 'started', `Extracting data (type: ${type})...`);
9
+ const results = { success: true, type, url: page.url(), extracted: {} };
10
+ const extractRegex = async (regexPattern, regexFlags, contentSource) => {
11
+ let content;
12
+ if (contentSource === 'html')
13
+ content = await page.content();
14
+ else if (contentSource === 'scripts')
15
+ content = await page.$$eval('script', (scripts) => scripts.map((s) => s.textContent).join('\n'));
16
+ else if (contentSource === 'text')
17
+ content = await page.evaluate(() => document.body.innerText);
18
+ else {
19
+ const html = await page.content();
20
+ const scripts = await page.$$eval('script', (scripts) => scripts.map((s) => s.textContent).join('\n'));
21
+ content = html + '\n' + scripts;
22
+ }
23
+ const regex = new RegExp(regexPattern, regexFlags);
24
+ const matches = content.match(regex) || [];
25
+ const finalMatches = autoDecode ? matches.map((m) => {
26
+ try {
27
+ return decodeURIComponent(m);
28
+ }
29
+ catch {
30
+ return m;
31
+ }
32
+ }) : matches;
33
+ return { pattern: regexPattern, flags: regexFlags, matchCount: finalMatches.length, matches: finalMatches.slice(0, maxMatches) };
34
+ };
35
+ const extractJson = async (jsonSource, sel, path) => {
36
+ const jsonData = [];
37
+ if (jsonSource === 'ld+json') {
38
+ const ldJson = await page.$$eval('script[type="application/ld+json"]', (scripts) => scripts.map((s) => { try {
39
+ return JSON.parse(s.textContent);
40
+ }
41
+ catch {
42
+ return null;
43
+ } }).filter(Boolean));
44
+ jsonData.push(...ldJson);
45
+ }
46
+ else if (jsonSource === 'scripts') {
47
+ const content = await page.$$eval('script', (scripts) => scripts.map((s) => s.textContent).join('\n'));
48
+ const jsonRegex = /\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}|\[[^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*\]/g;
49
+ const matches = content.match(jsonRegex) || [];
50
+ for (const match of matches.slice(0, maxJsonObjects)) {
51
+ try {
52
+ jsonData.push(JSON.parse(match));
53
+ }
54
+ catch { }
55
+ }
56
+ }
57
+ else if (jsonSource === 'api') {
58
+ const apiData = await page.evaluate(() => {
59
+ const data = [];
60
+ if (window.__DATA__)
61
+ data.push(window.__DATA__);
62
+ if (window.__INITIAL_STATE__)
63
+ data.push(window.__INITIAL_STATE__);
64
+ if (window.__APP_DATA__)
65
+ data.push(window.__APP_DATA__);
66
+ if (window.data)
67
+ data.push(window.data);
68
+ if (window.config)
69
+ data.push(window.config);
70
+ return data;
71
+ });
72
+ jsonData.push(...apiData);
73
+ }
74
+ else if (sel) {
75
+ try {
76
+ const text = await page.$eval(sel, (el) => el.textContent);
77
+ jsonData.push(JSON.parse(text));
78
+ }
79
+ catch { }
80
+ }
81
+ else {
82
+ const ldJson = await page.$$eval('script[type="application/ld+json"]', (scripts) => scripts.map((s) => { try {
83
+ return JSON.parse(s.textContent);
84
+ }
85
+ catch {
86
+ return null;
87
+ } }).filter(Boolean));
88
+ jsonData.push(...ldJson);
89
+ }
90
+ if (path && jsonData.length > 0) {
91
+ const getPath = (obj, pathStr) => {
92
+ const parts = pathStr.replace(/^\$\./, '').split('.');
93
+ let current = obj;
94
+ for (const part of parts) {
95
+ if (current === null || current === undefined)
96
+ return undefined;
97
+ if (part.includes('[') && part.includes(']')) {
98
+ const arrName = part.substring(0, part.indexOf('['));
99
+ const idx = parseInt(part.match(/\[(\d+)\]/)?.[1] || '0');
100
+ current = current[arrName]?.[idx];
101
+ }
102
+ else
103
+ current = current[part];
104
+ }
105
+ return current;
106
+ };
107
+ return jsonData.map(obj => ({ original: obj, extracted: getPath(obj, path) }));
108
+ }
109
+ return jsonData;
110
+ };
111
+ const extractMeta = async (metaTypes) => {
112
+ const meta = await page.evaluate(([incTitle, incCanonical]) => {
113
+ const result = { meta: {}, og: {}, twitter: {} };
114
+ document.querySelectorAll('meta').forEach(tag => {
115
+ const name = tag.getAttribute('name') || tag.getAttribute('property');
116
+ const content = tag.getAttribute('content');
117
+ if (name && content) {
118
+ if (name.startsWith('og:'))
119
+ result.og[name.replace('og:', '')] = content;
120
+ else if (name.startsWith('twitter:'))
121
+ result.twitter[name.replace('twitter:', '')] = content;
122
+ else
123
+ result.meta[name] = content;
124
+ }
125
+ });
126
+ if (incTitle)
127
+ result.title = document.title;
128
+ if (incCanonical)
129
+ result.canonical = document.querySelector('link[rel="canonical"]')?.href;
130
+ return result;
131
+ }, [includeTitle, includeCanonical]);
132
+ if (metaTypes.includes('all'))
133
+ return meta;
134
+ const filtered = {};
135
+ if (metaTypes.includes('meta'))
136
+ filtered.meta = meta.meta;
137
+ if (metaTypes.includes('og'))
138
+ filtered.og = meta.og;
139
+ if (metaTypes.includes('twitter'))
140
+ filtered.twitter = meta.twitter;
141
+ if (includeTitle)
142
+ filtered.title = meta.title;
143
+ if (includeCanonical)
144
+ filtered.canonical = meta.canonical;
145
+ return filtered;
146
+ };
147
+ const extractStructured = async (sel, wait = false, timeout = 10000) => {
148
+ if (wait)
149
+ await page.waitForSelector(sel, { timeout });
150
+ const element = await page.$(sel);
151
+ if (!element)
152
+ return { error: `Element not found: ${sel}` };
153
+ return element.evaluate((el) => ({
154
+ tagName: el.tagName, text: el.innerText, html: el.innerHTML,
155
+ attributes: Object.fromEntries([...el.attributes].map((a) => [a.name, a.value])),
156
+ childCount: el.children.length,
157
+ boundingBox: el.getBoundingClientRect ? {
158
+ x: el.getBoundingClientRect().x, y: el.getBoundingClientRect().y,
159
+ width: el.getBoundingClientRect().width, height: el.getBoundingClientRect().height
160
+ } : null
161
+ }));
162
+ };
163
+ const extractAuto = async () => {
164
+ const autoResults = { meta: null, json: null, structured: null, patterns: [] };
165
+ try {
166
+ autoResults.meta = await extractMeta(['all']);
167
+ }
168
+ catch (e) { }
169
+ try {
170
+ autoResults.json = await extractJson('ld+json');
171
+ }
172
+ catch (e) { }
173
+ const commonPatterns = [
174
+ { name: 'emails', pattern: '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' },
175
+ { name: 'phones', pattern: '(\\+?1?[-.\\s]?)?\\(?[0-9]{3}\\)?[-.\\s]?[0-9]{3}[-.\\s]?[0-9]{4}' },
176
+ { name: 'urls', pattern: 'https?://[^\\s<>"{}|\\\\^`\\[\\]]+' },
177
+ { name: 'ipv4', pattern: '\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\b' }
178
+ ];
179
+ const pageText = await page.evaluate(() => document.body.innerText);
180
+ for (const { name, pattern } of commonPatterns) {
181
+ const regex = new RegExp(pattern, 'gi');
182
+ const matches = [...new Set(pageText.match(regex) || [])];
183
+ if (matches.length > 0)
184
+ autoResults.patterns.push({ type: name, count: matches.length, samples: matches.slice(0, 10) });
185
+ }
186
+ return autoResults;
187
+ };
188
+ const extractLinks = async () => {
189
+ const { includeHidden = true, searchIframes = false } = params;
190
+ const doExtract = async (context) => {
191
+ return await context.evaluate(({ incHidden }) => {
192
+ const allLinks = [];
193
+ const seenUrls = new Set();
194
+ const addLink = (href, text, source, element) => {
195
+ if (!href || seenUrls.has(href))
196
+ return;
197
+ if (!href.startsWith('http') && !href.startsWith('//'))
198
+ return;
199
+ if (href.startsWith('//'))
200
+ href = window.location.protocol + href;
201
+ seenUrls.add(href);
202
+ allLinks.push({
203
+ href, text: (text || '').trim().substring(0, 100), source,
204
+ hidden: element ? (element.offsetParent === null || getComputedStyle(element).display === 'none' || getComputedStyle(element).visibility === 'hidden') : false
205
+ });
206
+ };
207
+ document.querySelectorAll('a[href]').forEach(a => addLink(a.href, a.textContent, 'anchor', a));
208
+ ['data-href', 'data-url', 'data-link', 'data-src', 'data-file', 'data-download'].forEach(attr => document.querySelectorAll(`[${attr}]`).forEach(el => addLink(el.getAttribute(attr), el.textContent, attr, el)));
209
+ if (incHidden) {
210
+ document.querySelectorAll('[onclick]').forEach(el => {
211
+ const onclick = el.getAttribute('onclick');
212
+ if (!onclick)
213
+ return;
214
+ (onclick.match(/https?:\/\/[^\s"'<>]+/gi) || []).forEach(url => addLink(url, el.textContent, 'onclick', el));
215
+ const hrefMatch = onclick.match(/location\.href\s*=\s*['"]([^'"]+)['"]/);
216
+ if (hrefMatch)
217
+ addLink(hrefMatch[1], el.textContent, 'onclick-location', el);
218
+ const openMatch = onclick.match(/window\.open\s*\(\s*['"]([^'"]+)['"]/);
219
+ if (openMatch)
220
+ addLink(openMatch[1], el.textContent, 'onclick-window-open', el);
221
+ });
222
+ [...document.querySelectorAll('script')].slice(0, 20).forEach(script => {
223
+ const content = script.textContent || '';
224
+ [/["']?(https?:\/\/[^"'\s<>]+\.(mp4|mkv|avi|m3u8|mpd|zip|rar|pdf))[^"'\s<>]*["']?/gi,
225
+ /download[_-]?url\s*[:=]\s*["']([^"']+)["']/gi,
226
+ /file\s*[:=]\s*["']([^"']+)["']/gi].forEach(pattern => {
227
+ let match;
228
+ while ((match = pattern.exec(content)) !== null)
229
+ addLink(match[1], 'script-extracted', 'script', null);
230
+ });
231
+ });
232
+ }
233
+ document.querySelectorAll('a[href^="javascript:"]').forEach(a => {
234
+ const match = a.getAttribute('href')?.match(/https?:\/\/[^\s"'<>]+/gi);
235
+ if (match)
236
+ match.forEach(url => addLink(url, a.textContent, 'javascript-href', a));
237
+ });
238
+ document.querySelectorAll('input[type="hidden"]').forEach((input) => {
239
+ if (input.value && (input.value.startsWith('http') || input.value.startsWith('//')))
240
+ addLink(input.value, input.name || input.id, 'hidden-input', input);
241
+ });
242
+ const metaRefresh = document.querySelector('meta[http-equiv="refresh"]');
243
+ if (metaRefresh) {
244
+ const match = metaRefresh.getAttribute('content')?.match(/url=(.+)/i);
245
+ if (match)
246
+ addLink(match[1].trim().replace(/['"]/g, ''), 'meta-refresh', 'meta', null);
247
+ }
248
+ document.querySelectorAll('iframe[src]').forEach((iframe) => addLink(iframe.src, 'iframe', 'iframe', iframe));
249
+ return allLinks;
250
+ }, { incHidden: includeHidden }).catch(() => []);
251
+ };
252
+ let links = await doExtract(page);
253
+ if (searchIframes) {
254
+ const frames = page.frames();
255
+ for (let i = 1; i < frames.length && i < 5; i++) {
256
+ try {
257
+ const frame = frames[i];
258
+ if (frame.url() && frame.url() !== 'about:blank') {
259
+ const frameLinks = await doExtract(frame);
260
+ frameLinks.forEach((link) => link.source = `iframe:${link.source}`);
261
+ links = [...links, ...frameLinks];
262
+ }
263
+ }
264
+ catch (e) { }
265
+ }
266
+ }
267
+ if (!includeHidden)
268
+ links = links.filter((link) => !link.hidden);
269
+ const seen = new Set();
270
+ return links.filter((link) => { if (seen.has(link.href))
271
+ return false; seen.add(link.href); return true; });
272
+ };
273
+ switch (type) {
274
+ case 'links': {
275
+ const links = await extractLinks();
276
+ results.extracted = { count: links.length, links };
277
+ (0, state_1.notifyProgress)('extract_data', 'completed', `Links: ${links.length} extracted`);
278
+ break;
279
+ }
280
+ case 'regex': {
281
+ if (!pattern)
282
+ return { success: false, error: 'Pattern is required for regex extraction' };
283
+ results.extracted = await extractRegex(pattern, flags, source);
284
+ (0, state_1.notifyProgress)('extract_data', 'completed', `Regex: ${results.extracted.matchCount} matches`);
285
+ break;
286
+ }
287
+ case 'json': {
288
+ results.extracted = await extractJson(source, selector, jsonPath);
289
+ results.count = Array.isArray(results.extracted) ? results.extracted.length : 0;
290
+ (0, state_1.notifyProgress)('extract_data', 'completed', `JSON: ${results.count} objects`);
291
+ break;
292
+ }
293
+ case 'meta': {
294
+ results.extracted = await extractMeta(types);
295
+ const tagCount = Object.values(results.extracted).reduce((sum, val) => {
296
+ if (typeof val === 'object' && val !== null)
297
+ return sum + Object.keys(val).length;
298
+ return sum + (val ? 1 : 0);
299
+ }, 0);
300
+ (0, state_1.notifyProgress)('extract_data', 'completed', `Meta: ${tagCount} tags`);
301
+ break;
302
+ }
303
+ case 'structured': {
304
+ if (!selector)
305
+ return { success: false, error: 'Selector is required for structured extraction. Run see_page(annotate: true) first to discover valid selectors or annotation IDs.' };
306
+ results.extracted = await extractStructured(selector, waitForSelector, selectorTimeout);
307
+ if (results.extracted.error) {
308
+ results.success = false;
309
+ results.error = results.extracted.error;
310
+ delete results.extracted;
311
+ }
312
+ (0, state_1.notifyProgress)('extract_data', 'completed', results.success ? 'Structured data extracted' : 'Extraction failed');
313
+ break;
314
+ }
315
+ case 'auto': {
316
+ results.extracted = await extractAuto();
317
+ const summary = [];
318
+ if (results.extracted.meta)
319
+ summary.push('meta');
320
+ if (results.extracted.json?.length)
321
+ summary.push('json');
322
+ if (results.extracted.patterns?.length)
323
+ summary.push('patterns');
324
+ (0, state_1.notifyProgress)('extract_data', 'completed', `Auto: ${summary.join(', ')}`);
325
+ break;
326
+ }
327
+ case 'deobfuscate': {
328
+ results.extracted = await deobfuscateJS(page);
329
+ (0, state_1.notifyProgress)('extract_data', 'completed', `Deobfuscated: ${results.extracted.decodedStrings.length} strings`);
330
+ break;
331
+ }
332
+ case 'apiDiscovery': {
333
+ results.extracted = await discoverAPIs(page);
334
+ const total = results.extracted.fetchEndpoints.length + results.extracted.xhrEndpoints.length +
335
+ results.extracted.inlineApiPatterns.length + results.extracted.dynamicApis.length;
336
+ (0, state_1.notifyProgress)('extract_data', 'completed', `API Discovery: ${total} endpoints found`);
337
+ break;
338
+ }
339
+ case 'decrypt': {
340
+ results.extracted = await decryptData(page, params);
341
+ const decodedCount = results.extracted.decoded.length + (results.extracted.aesDecrypted ? 1 : 0);
342
+ (0, state_1.notifyProgress)('extract_data', 'completed', `Decrypted: ${decodedCount} decodings`);
343
+ break;
344
+ }
345
+ default:
346
+ return { success: false, error: `Unknown type: ${type}. Supported: regex, json, meta, structured, auto, deobfuscate, apiDiscovery, decrypt, links` };
347
+ }
348
+ return results;
349
+ }
350
+ async function deobfuscateJS(page) {
351
+ const scriptContents = await page.evaluate(() => Array.from(document.querySelectorAll('script')).map((s) => s.textContent).join('\n')).catch(() => '');
352
+ const externalScripts = await page.evaluate(() => Array.from(document.querySelectorAll('script[src]')).map((s) => s.src)).catch(() => []);
353
+ let allJs = scriptContents;
354
+ for (const src of externalScripts.slice(0, 10)) {
355
+ try {
356
+ const resp = await fetch(src);
357
+ allJs += '\n' + await resp.text();
358
+ }
359
+ catch (e) { }
360
+ }
361
+ const deobfuscated = {
362
+ stringArrays: [], decodedStrings: [], functionMappings: [],
363
+ apiEndpoints: [], urls: [], fetchCalls: [],
364
+ webpackModules: [], evalUnpacked: [], resolvedConcats: [], unicodeDecoded: []
365
+ };
366
+ const arrayPattern = /(?:const|var|let)\s+(_0x[a-f0-9]+)\s*=\s*\[([^\]]{20,})\]/g;
367
+ let match;
368
+ while ((match = arrayPattern.exec(allJs)) !== null) {
369
+ try {
370
+ const items = match[2].match(/'([^']*)'|"([^"]*)"/g) || [];
371
+ const decoded = items.map(s => s.replace(/^['"]|['"]$/g, ''));
372
+ deobfuscated.stringArrays.push({ variable: match[1], count: decoded.length, strings: decoded });
373
+ deobfuscated.decodedStrings.push(...decoded);
374
+ }
375
+ catch (e) { }
376
+ }
377
+ const hexStrings = [...new Set((allJs.match(/(?:'(?:\\x[0-9a-f]{2})+[^']*'|"(?:\\x[0-9a-f]{2})+[^"]*")/gi) || []))];
378
+ for (const hs of hexStrings.slice(0, 50)) {
379
+ try {
380
+ const decoded = hs.slice(1, -1).replace(/\\x([0-9a-f]{2})/gi, (_, h) => String.fromCharCode(parseInt(h, 16)));
381
+ if (decoded.length > 2)
382
+ deobfuscated.decodedStrings.push(decoded);
383
+ }
384
+ catch (e) { }
385
+ }
386
+ const unicodePattern = /(?:'(?:\\u[0-9a-f]{4})+[^']*'|"(?:\\u[0-9a-f]{4})+[^"]*")/gi;
387
+ const unicodeMatches = allJs.match(unicodePattern) || [];
388
+ for (const um of unicodeMatches.slice(0, 50)) {
389
+ try {
390
+ const decoded = um.slice(1, -1).replace(/\\u([0-9a-f]{4})/gi, (_, h) => String.fromCharCode(parseInt(h, 16)));
391
+ if (decoded.length > 1) {
392
+ deobfuscated.unicodeDecoded.push(decoded);
393
+ deobfuscated.decodedStrings.push(decoded);
394
+ }
395
+ }
396
+ catch (e) { }
397
+ }
398
+ const evalPattern = /eval\s*\(\s*function\s*\(\s*p\s*,\s*a\s*,\s*c\s*,\s*k\s*,\s*e\s*,?\s*[dr]?\s*\)\s*\{[^}]*\}\s*\(\s*'([^']*)'(?:\s*,\s*(\d+)){2}\s*,\s*'([^']*)'/g;
399
+ let evalMatch;
400
+ while ((evalMatch = evalPattern.exec(allJs)) !== null) {
401
+ try {
402
+ const p = evalMatch[1], a = parseInt(evalMatch[2]) || 62;
403
+ const keywords = evalMatch[3].split('|');
404
+ const unpacked = p.replace(/\b\w+\b/g, (w) => { const n = parseInt(w, a); return (n < keywords.length && keywords[n]) ? keywords[n] : w; });
405
+ deobfuscated.evalUnpacked.push(unpacked.substring(0, 3000));
406
+ const unpackedStrings = unpacked.match(/['"]([^'"]{3,})['"]/g) || [];
407
+ for (const s of unpackedStrings.slice(0, 100))
408
+ deobfuscated.decodedStrings.push(s.replace(/^['"]|['"]$/g, ''));
409
+ }
410
+ catch (e) { }
411
+ }
412
+ const simpleEval = /eval\s*\(\s*['"]([^'"]{10,})['"]\s*\)/g;
413
+ let seMatch;
414
+ while ((seMatch = simpleEval.exec(allJs)) !== null)
415
+ deobfuscated.evalUnpacked.push(seMatch[1].substring(0, 2000));
416
+ const webpackExports = allJs.match(/(?:module\.exports|exports\.\w+)\s*=\s*['"]([^'"]+)['"]/g) || [];
417
+ for (const exp of webpackExports.slice(0, 30)) {
418
+ const val = exp.match(/=\s*['"]([^'"]+)['"]/);
419
+ if (val) {
420
+ deobfuscated.webpackModules.push(val[1]);
421
+ deobfuscated.decodedStrings.push(val[1]);
422
+ }
423
+ }
424
+ const chunkIds = allJs.match(/webpackChunk\w*\.push\s*\(\s*\[\s*\[([^\]]+)\]/g) || [];
425
+ for (const ci of chunkIds.slice(0, 10))
426
+ deobfuscated.webpackModules.push(`chunk: ${ci.substring(0, 100)}`);
427
+ const terserPattern = /(?:var|let|const)\s+([a-z])\s*=\s*['"]([^'"]{2,})['"]/gi;
428
+ let terserMatch;
429
+ while ((terserMatch = terserPattern.exec(allJs)) !== null) {
430
+ const varName = terserMatch[1], value = terserMatch[2];
431
+ if (value.length > 2 && value.length < 200) {
432
+ deobfuscated.functionMappings.push({ variable: varName, value });
433
+ deobfuscated.decodedStrings.push(value);
434
+ }
435
+ }
436
+ const concatPattern = /(?:['"][^'"]*['"]\s*\+\s*){2,}['"][^'"]*['"]/g;
437
+ const concatMatches = allJs.match(concatPattern) || [];
438
+ for (const cm of concatMatches.slice(0, 50)) {
439
+ try {
440
+ const parts = cm.match(/['"]([^'"]*)['"]|(['"])/g) || [];
441
+ const resolved = parts.map((p) => p.replace(/^['"]|['"]$/g, '')).join('');
442
+ if (resolved.length > 3) {
443
+ deobfuscated.resolvedConcats.push(resolved);
444
+ deobfuscated.decodedStrings.push(resolved);
445
+ }
446
+ }
447
+ catch (e) { }
448
+ }
449
+ const fetchPatterns = allJs.match(/fetch\s*\(\s*['"]([^'"]+)['"]/g) || [];
450
+ deobfuscated.fetchCalls = fetchPatterns.map((f) => f.replace(/fetch\s*\(\s*['"]/, '').replace(/['"]$/, '')).slice(0, 20);
451
+ deobfuscated.urls = [...new Set(deobfuscated.decodedStrings.filter((s) => s.match(/^(https?:\/\/|\/)/) || s.match(/\.(php|json|api|asp|jsp)$/i)))].slice(0, 50);
452
+ deobfuscated.apiEndpoints = [...new Set(deobfuscated.decodedStrings.filter((s) => s.match(/^\/[a-z]/i) && s.length > 3 && s.length < 100))].slice(0, 30);
453
+ deobfuscated.decodedStrings = [...new Set(deobfuscated.decodedStrings)].slice(0, 500);
454
+ return deobfuscated;
455
+ }
456
+ async function discoverAPIs(page) {
457
+ const apiResults = {
458
+ fetchEndpoints: [], xhrEndpoints: [], formActions: [],
459
+ scriptSources: [], inlineApiPatterns: [], postBodies: [], dynamicApis: []
460
+ };
461
+ try {
462
+ const runtimeApis = await page.evaluate(() => {
463
+ return new Promise((resolve) => {
464
+ const found = [];
465
+ if (window.__capturedApis) {
466
+ resolve(window.__capturedApis);
467
+ return;
468
+ }
469
+ const origFetch = window.fetch;
470
+ window.fetch = function (...args) {
471
+ try {
472
+ const url = typeof args[0] === 'string' ? args[0] : args[0]?.url;
473
+ const opts = args[1] || {};
474
+ found.push({ type: 'fetch', url, method: opts.method || 'GET', body: typeof opts.body === 'string' ? opts.body.substring(0, 500) : null });
475
+ }
476
+ catch (e) { }
477
+ return origFetch.apply(this, args);
478
+ };
479
+ const origOpen = XMLHttpRequest.prototype.open;
480
+ const origSend = XMLHttpRequest.prototype.send;
481
+ XMLHttpRequest.prototype.open = function (method, url, ...rest) { this.__apiUrl = url; this.__apiMethod = method; return origOpen.apply(this, [method, url, ...rest]); };
482
+ XMLHttpRequest.prototype.send = function (body) {
483
+ found.push({ type: 'xhr', url: this.__apiUrl, method: this.__apiMethod, body: typeof body === 'string' ? body.substring(0, 500) : null });
484
+ return origSend.apply(this, [body]);
485
+ };
486
+ window.__capturedApis = found;
487
+ setTimeout(() => resolve(found), 3000);
488
+ });
489
+ });
490
+ apiResults.dynamicApis = runtimeApis;
491
+ }
492
+ catch (e) {
493
+ apiResults.dynamicApis = [];
494
+ }
495
+ const allScriptContent = await page.evaluate(() => Array.from(document.querySelectorAll('script')).map((s) => s.textContent).join('\n')).catch(() => '');
496
+ const fetchRegex = /fetch\s*\(\s*(?:['"`]([^'"`]+)['"`]|([a-zA-Z_$][a-zA-Z0-9_$]*))/g;
497
+ let fMatch;
498
+ while ((fMatch = fetchRegex.exec(allScriptContent)) !== null)
499
+ apiResults.fetchEndpoints.push((fMatch[1] || fMatch[2]));
500
+ apiResults.fetchEndpoints = [...new Set(apiResults.fetchEndpoints)].slice(0, 30);
501
+ const xhrRegex = /\.open\s*\(\s*['"](?:GET|POST|PUT|DELETE)['"]\s*,\s*['"`]([^'"`]+)['"`]/gi;
502
+ let xMatch;
503
+ while ((xMatch = xhrRegex.exec(allScriptContent)) !== null)
504
+ apiResults.xhrEndpoints.push(xMatch[1]);
505
+ apiResults.xhrEndpoints = [...new Set(apiResults.xhrEndpoints)].slice(0, 30);
506
+ apiResults.formActions = await page.evaluate(() => Array.from(document.querySelectorAll('form[action]')).map((f) => ({ action: f.action, method: f.method || 'GET', id: f.id || null }))).catch(() => []);
507
+ const postBodyPatterns = allScriptContent.match(/(?:URLSearchParams|FormData|JSON\.stringify)\s*\(\s*\{[^}]{5,200}\}/g) || [];
508
+ apiResults.postBodies = postBodyPatterns.slice(0, 10);
509
+ const apiUrlPattern = /['"`]((?:https?:\/\/[^'"`]+|\/)(?:[a-zA-Z0-9_\-\/]+\.(?:php|json|api|asp|aspx|do|action))[^'"`]*)['"`]/g;
510
+ let apiMatch;
511
+ while ((apiMatch = apiUrlPattern.exec(allScriptContent)) !== null)
512
+ apiResults.inlineApiPatterns.push(apiMatch[1]);
513
+ apiResults.inlineApiPatterns = [...new Set(apiResults.inlineApiPatterns)].slice(0, 30);
514
+ apiResults.scriptSources = await page.evaluate(() => Array.from(document.querySelectorAll('script[src]')).map((s) => s.src)).catch(() => []);
515
+ return apiResults;
516
+ }
517
+ async function decryptData(page, params) {
518
+ const { encryptedData, autoFindKey = true } = params;
519
+ const decryptResults = { original: null, decoded: [], detectedEncoding: [], extractedKeys: [], aesDecrypted: null };
520
+ let dataToDecrypt = encryptedData;
521
+ if (!dataToDecrypt) {
522
+ const { state } = require('./state');
523
+ const lastApiResponse = state.networkRecords.filter((r) => r.responseBody).pop();
524
+ if (lastApiResponse)
525
+ dataToDecrypt = lastApiResponse.responseBody;
526
+ }
527
+ if (!dataToDecrypt)
528
+ return { success: false, error: 'No data to decrypt. Provide encryptedData parameter or start network_recorder first.' };
529
+ decryptResults.original = dataToDecrypt.substring(0, 500);
530
+ let b64Data = dataToDecrypt.trim();
531
+ for (let level = 0; level < 5; level++) {
532
+ if (!/^[A-Za-z0-9+/=]+$/.test(b64Data) || b64Data.length < 4)
533
+ break;
534
+ try {
535
+ const decoded = Buffer.from(b64Data, 'base64').toString('utf-8');
536
+ if (decoded && decoded.length > 0 && !/[\x00-\x08\x0e-\x1f]/.test(decoded.substring(0, 100))) {
537
+ decryptResults.decoded.push({ level: level + 1, type: 'base64', value: decoded.substring(0, 5000) });
538
+ decryptResults.detectedEncoding.push('base64');
539
+ try {
540
+ decryptResults.decoded.push({ level: level + 1, type: 'base64_json', value: JSON.parse(decoded) });
541
+ }
542
+ catch (e) { }
543
+ b64Data = decoded;
544
+ }
545
+ else
546
+ break;
547
+ }
548
+ catch (e) {
549
+ break;
550
+ }
551
+ }
552
+ const hexClean = dataToDecrypt.replace(/\s+/g, '');
553
+ if (/^[0-9a-f]+$/i.test(hexClean) && hexClean.length >= 6 && hexClean.length % 2 === 0) {
554
+ try {
555
+ const hexDecoded = Buffer.from(hexClean, 'hex').toString('utf-8');
556
+ if (hexDecoded && !/[\x00-\x08\x0e-\x1f]/.test(hexDecoded.substring(0, 50))) {
557
+ decryptResults.decoded.push({ type: 'hex', value: hexDecoded.substring(0, 5000) });
558
+ decryptResults.detectedEncoding.push('hex');
559
+ }
560
+ }
561
+ catch (e) { }
562
+ }
563
+ if (dataToDecrypt.includes('%')) {
564
+ try {
565
+ let urlDecoded = decodeURIComponent(dataToDecrypt);
566
+ decryptResults.decoded.push({ type: 'url', value: urlDecoded.substring(0, 5000) });
567
+ decryptResults.detectedEncoding.push('url');
568
+ if (urlDecoded.includes('%')) {
569
+ urlDecoded = decodeURIComponent(urlDecoded);
570
+ decryptResults.decoded.push({ type: 'url_double', value: urlDecoded.substring(0, 5000) });
571
+ }
572
+ }
573
+ catch (e) { }
574
+ }
575
+ try {
576
+ const rot13 = dataToDecrypt.replace(/[a-zA-Z]/g, (c) => {
577
+ const base = c <= 'Z' ? 65 : 97;
578
+ return String.fromCharCode(((c.charCodeAt(0) - base + 13) % 26) + base);
579
+ });
580
+ if (rot13 !== dataToDecrypt && (rot13.includes('http') || rot13.includes('www') || rot13.includes('.com'))) {
581
+ decryptResults.decoded.push({ type: 'rot13', value: rot13.substring(0, 5000) });
582
+ decryptResults.detectedEncoding.push('rot13');
583
+ }
584
+ }
585
+ catch (e) { }
586
+ if (autoFindKey) {
587
+ try {
588
+ const keys = await page.evaluate(() => {
589
+ const scripts = Array.from(document.querySelectorAll('script')).map((s) => s.textContent).join('\n');
590
+ const found = [];
591
+ const cryptoPatterns = [
592
+ /CryptoJS\.AES\.decrypt\s*\(\s*\w+\s*,\s*['"]([^'"]+)['"]/g,
593
+ /CryptoJS\.AES\.encrypt\s*\(\s*\w+\s*,\s*['"]([^'"]+)['"]/g,
594
+ /CryptoJS\.enc\.Utf8\.parse\s*\(\s*['"]([^'"]+)['"]/g,
595
+ /(?:secret|key|pass|password|iv|salt)\s*[:=]\s*['"]([^'"]{8,})['"]/gi,
596
+ /aes(?:Key|_key|Secret)\s*[:=]\s*['"]([^'"]{8,})['"]/gi
597
+ ];
598
+ for (const pat of cryptoPatterns) {
599
+ let m;
600
+ while ((m = pat.exec(scripts)) !== null)
601
+ found.push({ pattern: pat.source.substring(0, 50), key: m[1] });
602
+ }
603
+ return found;
604
+ });
605
+ decryptResults.extractedKeys = keys.slice(0, 20);
606
+ }
607
+ catch (e) { }
608
+ }
609
+ const aesKey = params.aesKey || (decryptResults.extractedKeys[0]?.key);
610
+ if (aesKey && dataToDecrypt.length > 10) {
611
+ try {
612
+ const crypto = require('crypto');
613
+ for (const keyEncoding of ['utf8', 'hex', 'base64']) {
614
+ try {
615
+ let keyBuf;
616
+ if (keyEncoding === 'utf8') {
617
+ keyBuf = Buffer.alloc(32);
618
+ const kb = Buffer.from(aesKey, 'utf8');
619
+ kb.copy(keyBuf);
620
+ }
621
+ else
622
+ keyBuf = Buffer.from(aesKey, keyEncoding);
623
+ const dataBuf = Buffer.from(dataToDecrypt, 'base64');
624
+ if (dataBuf.length > 16) {
625
+ const iv = params.aesIV ? Buffer.from(params.aesIV, keyEncoding) : dataBuf.slice(0, 16);
626
+ const encrypted = params.aesIV ? dataBuf : dataBuf.slice(16);
627
+ const decipher = crypto.createDecipheriv('aes-256-cbc', keyBuf, iv);
628
+ decipher.setAutoPadding(true);
629
+ let decrypted = decipher.update(encrypted, undefined, 'utf8');
630
+ decrypted += decipher.final('utf8');
631
+ if (decrypted && decrypted.length > 0) {
632
+ decryptResults.aesDecrypted = decrypted.substring(0, 5000);
633
+ decryptResults.detectedEncoding.push('aes-256-cbc');
634
+ try {
635
+ decryptResults.aesDecrypted = JSON.parse(decrypted);
636
+ }
637
+ catch (e) { }
638
+ break;
639
+ }
640
+ }
641
+ }
642
+ catch (e) {
643
+ continue;
644
+ }
645
+ }
646
+ }
647
+ catch (e) { }
648
+ }
649
+ return decryptResults;
650
+ }
651
+ //# sourceMappingURL=network-extractors.js.map