@shriyanss/js-recon 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,167 @@
1
+ import chalk from "chalk";
2
+ import fs from "fs";
3
+ import frameworkDetect from "../techDetect/index.js";
4
+ import CONFIG from "../globalConfig.js";
5
+ import _traverse from "@babel/traverse";
6
+ const traverse = _traverse.default;
7
+ import { URL } from "url";
8
+
9
+ // Next.js
10
+ import subsequentRequests from "./next_js/next_SubsequentRequests.js";
11
+ import next_getJSScript from "./next_js/next_GetJSScript.js";
12
+ import next_getLazyResources from "./next_js/next_GetLazyResources.js";
13
+
14
+ // Nuxt.js
15
+ import nuxt_getFromPageSource from "./nuxt_js/nuxt_getFromPageSource.js";
16
+ import nuxt_stringAnalysisJSFiles from "./nuxt_js/nuxt_stringAnalysisJSFiles.js";
17
+ import nuxt_astParse from "./nuxt_js/nuxt_astParse.js";
18
+
19
+ // generic
20
+ import downloadFiles from "./downloadFilesUtil.js";
21
+ import downloadLoadedJs from "./downloadLoadedJsUtil.js";
22
+
23
+ // import global vars
24
+ import * as globals from "./globals.js";
25
+
26
+ /**
27
+ * Downloads all lazy-loaded JavaScript files from the specified URL or file containing URLs.
28
+ *
29
+ * The function detects the JavaScript framework used by the webpage (e.g., Next.js, Nuxt.js)
30
+ * and utilizes specific techniques to find and download lazy-loaded JS files.
31
+ * It supports subsequent requests for additional JS files if specified.
32
+ *
33
+ * @param {string} url - The URL or path to a file containing a list of URLs to process.
34
+ * @param {string} output - The directory where downloaded files will be saved.
35
+ * @param {boolean} strictScope - Whether to restrict downloads to the input URL domain.
36
+ * @param {string[]} inputScope - Specific domains to download JS files from.
37
+ * @param {number} threads - The number of threads to use for downloading files.
38
+ * @param {boolean} subsequentRequestsFlag - Whether to include JS files from subsequent requests.
39
+ * @param {string} urlsFile - The JSON file containing additional URLs for subsequent requests.
40
+ * @returns {Promise<void>}
41
+ */
42
+ const lazyLoad = async (
43
+ url,
44
+ output,
45
+ strictScope,
46
+ inputScope,
47
+ threads,
48
+ subsequentRequestsFlag,
49
+ urlsFile,
50
+ ) => {
51
+ console.log(chalk.cyan("[i] Loading 'Lazy Load' module"));
52
+
53
+ let urls;
54
+
55
+ // check if the url is file or a URL
56
+ if (fs.existsSync(url)) {
57
+ urls = fs.readFileSync(url, "utf-8").split("\n");
58
+ // remove the empty lines
59
+ urls = urls.filter((url) => url.trim() !== "");
60
+ } else if (url.match(/https?:\/\/[a-zA-Z0-9\-_\.:]+/)) {
61
+ urls = [url];
62
+ } else {
63
+ console.log(chalk.red("[!] Invalid URL or file path"));
64
+ return;
65
+ }
66
+
67
+ for (const url of urls) {
68
+ console.log(chalk.cyan(`[i] Processing ${url}`));
69
+
70
+ if (strictScope) {
71
+ globals.pushToScope(new URL(url).host);
72
+ } else {
73
+ globals.setScope(inputScope);
74
+ }
75
+
76
+ globals.setMaxReqQueue(threads);
77
+ globals.clearJsUrls(); // Initialize js_urls for each URL processing in the loop
78
+
79
+ const tech = await frameworkDetect(url);
80
+
81
+ if (tech) {
82
+ if (tech.name === "next") {
83
+ console.log(chalk.green("[✓] Next.js detected"));
84
+ console.log(chalk.yellow(`Evidence: ${tech.evidence}`));
85
+
86
+ // find the JS files from script of the webpage
87
+ const jsFilesFromScriptTag = await next_getJSScript(url);
88
+
89
+ // get lazy resources
90
+ const lazyResourcesFromWebpack = await next_getLazyResources(url);
91
+ let lazyResourcesFromSubsequentRequests;
92
+
93
+ if (subsequentRequestsFlag) {
94
+ // get JS files from subsequent requests
95
+ lazyResourcesFromSubsequentRequests = await subsequentRequests(
96
+ url,
97
+ urlsFile,
98
+ threads,
99
+ output,
100
+ globals.getJsUrls(), // Pass the global js_urls
101
+ );
102
+ }
103
+
104
+ // download the resources
105
+ // but combine them first
106
+ let jsFilesToDownload = [
107
+ ...(jsFilesFromScriptTag || []),
108
+ ...(lazyResourcesFromWebpack || []),
109
+ ...(lazyResourcesFromSubsequentRequests || []),
110
+ ];
111
+ // Ensure js_urls from globals are included if next_getJSScript or next_getLazyResources populated it.
112
+ // This is because those functions now push to the global js_urls via setters.
113
+ // The return values of next_getJSScript and next_getLazyResources might be the same array instance
114
+ // or a new one depending on their implementation, so explicitly get the global one here.
115
+ jsFilesToDownload.push(...globals.getJsUrls());
116
+
117
+ // dedupe the files
118
+ jsFilesToDownload = [...new Set(jsFilesToDownload)];
119
+
120
+ await downloadFiles(jsFilesToDownload, output);
121
+ } else if (tech.name === "vue") {
122
+ console.log(chalk.green("[✓] Vue.js detected"));
123
+ console.log(chalk.yellow(`Evidence: ${tech.evidence}`));
124
+ } else if (tech.name === "nuxt") {
125
+ console.log(chalk.green("[✓] Nuxt.js detected"));
126
+ console.log(chalk.yellow(`Evidence: ${tech.evidence}`));
127
+
128
+ let jsFilesToDownload = [];
129
+
130
+ // find the files from the page source
131
+ const jsFilesFromPageSource = await nuxt_getFromPageSource(url);
132
+ const jsFilesFromStringAnalysis = await nuxt_stringAnalysisJSFiles(url);
133
+
134
+ jsFilesToDownload.push(...jsFilesFromPageSource);
135
+ jsFilesToDownload.push(...jsFilesFromStringAnalysis);
136
+ // dedupe the files
137
+ jsFilesToDownload = [...new Set(jsFilesToDownload)];
138
+
139
+ let jsFilesFromAST = [];
140
+ console.log(chalk.cyan("[i] Analyzing functions in the files found"));
141
+ for (const jsFile of jsFilesToDownload) {
142
+ jsFilesFromAST.push(...(await nuxt_astParse(jsFile)));
143
+ }
144
+
145
+ jsFilesToDownload.push(...jsFilesFromAST);
146
+
147
+ jsFilesToDownload.push(...globals.getJsUrls());
148
+
149
+ // dedupe the files
150
+ jsFilesToDownload = [...new Set(jsFilesToDownload)];
151
+
152
+ await downloadFiles(jsFilesToDownload, output);
153
+ }
154
+ } else {
155
+ console.log(chalk.red("[!] Framework not detected :("));
156
+ console.log(chalk.magenta(CONFIG.notFoundMessage));
157
+ console.log(chalk.yellow("[i] Trying to download loaded JS files"));
158
+ const js_urls = await downloadLoadedJs(url);
159
+ if (js_urls && js_urls.length > 0) {
160
+ console.log(chalk.green(`[✓] Found ${js_urls.length} JS chunks`));
161
+ await downloadFiles(js_urls, output);
162
+ }
163
+ }
164
+ }
165
+ };
166
+
167
+ export default lazyLoad;
@@ -0,0 +1,99 @@
1
+ // lazyLoad/nextGetJSScript.js
2
+ import chalk from "chalk";
3
+ import { URL } from "url";
4
+ import * as cheerio from "cheerio";
5
+ import makeRequest from "../../utility/makeReq.js";
6
+ import { getJsUrls, pushToJsUrls } from "../globals.js";
7
+
8
+ /**
9
+ * Asynchronously fetches the given URL and extracts JavaScript file URLs
10
+ * from script tags present in the HTML content.
11
+ *
12
+ * @param {string} url - The URL of the webpage to fetch and parse.
13
+ * @returns {Promise<string[]>} - A promise that resolves to an array of
14
+ * absolute URLs pointing to JavaScript files found in script tags.
15
+ */
16
+ const next_getJSScript = async (url) => {
17
+ // get the page source
18
+ const res = await makeRequest(url);
19
+ const pageSource = await res.text();
20
+
21
+ // cheerio to parse the page source
22
+ const $ = cheerio.load(pageSource);
23
+
24
+ // find all script tags
25
+ const scriptTags = $("script");
26
+
27
+ // iterate through script tags
28
+ for (const scriptTag of scriptTags) {
29
+ // get the src attribute
30
+ const src = $(scriptTag).attr("src");
31
+
32
+ // see if the src is a JS file
33
+ if (
34
+ src !== undefined &&
35
+ src.match(/(https:\/\/[a-zA-Z0-9_\_\.]+\/.+\.js\??.*|\/.+\.js\??.*)/)) {
36
+ // if the src starts with /, like `/static/js/a.js` find the absolute URL
37
+ if (src.startsWith("/")) {
38
+ const absoluteUrl = new URL(url).origin + src;
39
+ if (!getJsUrls().includes(absoluteUrl)) {
40
+ pushToJsUrls(absoluteUrl);
41
+ }
42
+ } else if (src.match(/^[^/]/)) {
43
+ // if the src is a relative URL, like `static/js/a.js` find the absolute URL
44
+ // Get directory URL (origin + path without filename)
45
+ const pathParts = new URL(url).pathname.split("/");
46
+ pathParts.pop(); // remove filename from last
47
+ const directory = new URL(url).origin + pathParts.join("/") + "/";
48
+
49
+ if (!getJsUrls().includes(directory + src)) {
50
+ pushToJsUrls(directory + src);
51
+ }
52
+ } else {
53
+ if (!getJsUrls().includes(src)) {
54
+ pushToJsUrls(src);
55
+ }
56
+ }
57
+ } else {
58
+ // if the script tag is inline, it could contain static JS URL
59
+ // to get these, simply regex from the JS script
60
+
61
+ const js_script = $(scriptTag).html();
62
+ const matches = js_script.match(/static\/chunks\/[a-zA-Z0-9_\-]+\.js/g);
63
+
64
+ if (matches) {
65
+ const uniqueMatches = [...new Set(matches)];
66
+ for (const match of uniqueMatches) {
67
+ // if it is using that static/chunks/ pattern, I can just get the filename
68
+ const filename = match.replace("static/chunks/", "");
69
+
70
+ // go through the already found URLs, coz they will have it (src attribute
71
+ // is there before inline things)
72
+
73
+ let js_path_dir;
74
+
75
+ for (const js_url of getJsUrls()) {
76
+ if (
77
+ !js_path_dir &&
78
+ new URL(js_url).host === new URL(url).host &&
79
+ new URL(js_url).pathname.includes("static/chunks/")
80
+ ) {
81
+ js_path_dir = js_url.replace(/\/[^\/]+\.js.*$/, "");
82
+ }
83
+ }
84
+ if (js_path_dir) { // Ensure js_path_dir was found
85
+ pushToJsUrls(js_path_dir + "/" + filename);
86
+ }
87
+ }
88
+ }
89
+ }
90
+ }
91
+
92
+ console.log(
93
+ chalk.green(`[✓] Found ${getJsUrls().length} JS files from the script tags`),
94
+ );
95
+
96
+ return getJsUrls();
97
+ };
98
+
99
+ export default next_getJSScript;
@@ -0,0 +1,201 @@
1
+ import chalk from "chalk";
2
+ import puppeteer from "puppeteer";
3
+ import parser from "@babel/parser";
4
+ import _traverse from "@babel/traverse";
5
+ const traverse = _traverse.default;
6
+ import inquirer from "inquirer";
7
+ import CONFIG from "../../globalConfig.js";
8
+ import makeRequest from "../../utility/makeReq.js";
9
+ import execFunc from "../../utility/runSandboxed.js";
10
+ import { getJsUrls, pushToJsUrls } from "../globals.js"; // Import js_urls functions
11
+
12
+ /**
13
+ * Asynchronously fetches the given URL and extracts JavaScript file URLs
14
+ * from webpack's require.ensure() function.
15
+ *
16
+ * @param {string} url - The URL of the webpage to fetch and parse.
17
+ * @returns {Promise<string[]|undefined>} - A promise that resolves to an array of
18
+ * absolute URLs pointing to JavaScript files found in require.ensure()
19
+ * functions, or undefined if no webpack JS is found.
20
+ */
21
+ const next_getLazyResources = async (url) => {
22
+ const browser = await puppeteer.launch({
23
+ headless: true,
24
+ });
25
+
26
+ const page = await browser.newPage();
27
+
28
+ await page.setRequestInterception(true);
29
+
30
+ page.on("request", async (request) => {
31
+ // get the request url
32
+ const req_url = request.url(); // Renamed to avoid conflict with outer 'url'
33
+
34
+ // see if the request is a JS file, and is a get request
35
+ if (
36
+ request.method() === "GET" &&
37
+ req_url.match(/https?:\/\/[a-z\._\-]+\/.+\.js\??.*/)
38
+ ) {
39
+ if (!getJsUrls().includes(req_url)) {
40
+ pushToJsUrls(req_url);
41
+ }
42
+ }
43
+
44
+ await request.continue();
45
+ });
46
+
47
+ await page.goto(url);
48
+
49
+ await browser.close();
50
+
51
+ let webpack_js;
52
+
53
+ // iterate through JS files
54
+ for (const js_url of getJsUrls()) {
55
+ // match for webpack js file
56
+ if (js_url.match(/\/webpack.*\.js/)) {
57
+ console.log(chalk.green(`[✓] Found webpack JS file at ${js_url}`));
58
+ webpack_js = js_url;
59
+ }
60
+ }
61
+
62
+ if (!webpack_js) {
63
+ console.log(chalk.red("[!] No webpack JS file found"));
64
+ console.log(chalk.magenta(CONFIG.notFoundMessage));
65
+ return; // Return undefined as per JSDoc
66
+ }
67
+
68
+ // parse the webpack JS file
69
+ const res = await makeRequest(webpack_js);
70
+ const webpack_js_source = await res.text();
71
+
72
+ // parse it with @babel/*
73
+ const ast = parser.parse(webpack_js_source, {
74
+ sourceType: "unambiguous",
75
+ plugins: ["jsx", "typescript"],
76
+ });
77
+
78
+ let functions = [];
79
+
80
+ traverse(ast, {
81
+ FunctionDeclaration(path) {
82
+ functions.push({
83
+ name: path.node.id?.name || "(anonymous)",
84
+ type: "FunctionDeclaration",
85
+ source: webpack_js_source.slice(path.node.start, path.node.end),
86
+ });
87
+ },
88
+ FunctionExpression(path) {
89
+ functions.push({
90
+ name: path.parent.id?.name || "(anonymous)",
91
+ type: "FunctionExpression",
92
+ source: webpack_js_source.slice(path.node.start, path.node.end),
93
+ });
94
+ },
95
+ ArrowFunctionExpression(path) {
96
+ functions.push({
97
+ name: path.parent.id?.name || "(anonymous)",
98
+ type: "ArrowFunctionExpression",
99
+ source: webpack_js_source.slice(path.node.start, path.node.end),
100
+ });
101
+ },
102
+ ObjectMethod(path) {
103
+ functions.push({
104
+ name: path.node.key.name,
105
+ type: "ObjectMethod",
106
+ source: webpack_js_source.slice(path.node.start, path.node.end),
107
+ });
108
+ },
109
+ ClassMethod(path) {
110
+ functions.push({
111
+ name: path.node.key.name,
112
+ type: "ClassMethod",
113
+ source: webpack_js_source.slice(path.node.start, path.node.end),
114
+ });
115
+ },
116
+ });
117
+
118
+ let user_verified = false;
119
+ // method 1
120
+ // iterate through the functions, and find out which one ends with `".js"`
121
+
122
+ let final_Func;
123
+ for (const func of functions) {
124
+ if (func.source.match(/"\.js".{0,15}$/)) {
125
+ console.log(
126
+ chalk.green(`[✓] Found JS chunk having the following source`),
127
+ );
128
+ console.log(chalk.yellow(func.source));
129
+ final_Func = func.source;
130
+ }
131
+ }
132
+
133
+ if (!final_Func) { // Added check if final_Func was not found
134
+ console.log(chalk.red("[!] No suitable function found in webpack JS for lazy loading."));
135
+ return [];
136
+ }
137
+
138
+ // ask through input if this is the right thing
139
+ const askCorrectFuncConfirmation = async () => {
140
+ const { confirmed } = await inquirer.prompt([
141
+ {
142
+ type: "confirm",
143
+ name: "confirmed",
144
+ message: "Is this the correct function?",
145
+ default: true,
146
+ },
147
+ ]);
148
+ return confirmed;
149
+ };
150
+
151
+ user_verified = await askCorrectFuncConfirmation();
152
+ if (user_verified === true) {
153
+ console.log(
154
+ chalk.cyan("[i] Proceeding with the selected function to fetch files"),
155
+ );
156
+ } else {
157
+ console.log(chalk.red("[!] Not executing function."));
158
+ return [];
159
+ }
160
+
161
+ const urlBuilderFunc = `(() => (${final_Func}))()`;
162
+
163
+ let js_paths = [];
164
+ try {
165
+ // rather than fuzzing, grep the integers from the func code
166
+ const integers = final_Func.match(/\d+/g);
167
+ if (integers) { // Check if integers were found
168
+ // iterate through all integers, and get the output
169
+ for (const i of integers) {
170
+ const output = execFunc(urlBuilderFunc, parseInt(i));
171
+ if (output.includes("undefined")) {
172
+ continue;
173
+ } else {
174
+ js_paths.push(output);
175
+ }
176
+ }
177
+ }
178
+ } catch (err) {
179
+ console.error("Unsafe or invalid code:", err.message);
180
+ return [];
181
+ }
182
+
183
+ if (js_paths.length > 0) {
184
+ console.log(chalk.green(`[✓] Found ${js_paths.length} JS chunks`));
185
+ }
186
+
187
+ // build final URL
188
+ let final_urls = [];
189
+ for (let i = 0; i < js_paths.length; i++) {
190
+ // get the directory of webpack file
191
+ const webpack_dir = webpack_js.split("/").slice(0, -1).join("/");
192
+ // replace the filename from the js path
193
+ const js_path_dir = js_paths[i].replace(/\/[a-zA-Z0-9\.]+\.js.*$/, "");
194
+ const final_url = webpack_dir.replace(js_path_dir, js_paths[i]);
195
+ final_urls.push(final_url);
196
+ }
197
+
198
+ return final_urls;
199
+ };
200
+
201
+ export default next_getLazyResources;
@@ -0,0 +1,138 @@
1
+ import chalk from "chalk";
2
+ import fs from "fs";
3
+ import path from "path";
4
+ import { getURLDirectory } from "../../utility/urlUtils.js";
5
+ // custom request module
6
+ import makeRequest from "../../utility/makeReq.js";
7
+
8
+ let queue = [];
9
+ let max_queue;
10
+
11
+ /**
12
+ * Given a string of JS content, it finds all the static files used in the
13
+ * file, and returns them as an array.
14
+ *
15
+ * @param {string} js_content - The string of JS content to search through.
16
+ *
17
+ * @returns {string[]} An array of strings, each string being a static file
18
+ * path.
19
+ */
20
+ const findStaticFiles = async (js_content) => {
21
+ // do some regex-ing
22
+ const matches = [
23
+ ...js_content.matchAll(/\/?static\/chunks\/[a-zA-Z0-9\._\-\/]+\.js/g),
24
+ ];
25
+ // return matches
26
+
27
+ let toReturn = [];
28
+
29
+ for (const match of matches) {
30
+ toReturn.push(match[0]);
31
+ }
32
+
33
+ return toReturn;
34
+ };
35
+
36
+ const getURLDirectoryServer = (urlString) => {
37
+ const url = new URL(urlString);
38
+ const pathParts = url.pathname.split("/").filter(Boolean); // ['business', 'api']
39
+ pathParts.pop(); // Remove 'api'
40
+
41
+ const newPath = "/" + pathParts.join("/"); // '/business'
42
+ return `${url.origin}${newPath}`; // 'http://something.com/business'
43
+ };
44
+
45
+ const subsequentRequests = async (url, urlsFile, threads, output, js_urls) => {
46
+ max_queue = threads;
47
+ let staticJSURLs = [];
48
+
49
+ console.log(chalk.cyan(`[i] Fetching JS files from subsequent requests`));
50
+
51
+ // open the urls file, and load the paths (JSON)
52
+ const endpoints = JSON.parse(fs.readFileSync(urlsFile, "utf-8")).paths;
53
+
54
+ let js_contents = {};
55
+
56
+ // make requests to all of them with the special header
57
+ const reqPromises = endpoints.map(async (endpoint) => {
58
+ const reqUrl = url + endpoint;
59
+ try {
60
+ // delay in case over the thread count
61
+ while (queue >= max_queue) {
62
+ await new Promise((resolve) => setTimeout(resolve, 100));
63
+ }
64
+ queue++;
65
+
66
+ const res = await makeRequest(reqUrl, {
67
+ headers: {
68
+ RSC: "1",
69
+ },
70
+ });
71
+
72
+ if (
73
+ res &&
74
+ res.status === 200 &&
75
+ res.headers.get("content-type").includes("text/x-component")
76
+ ) {
77
+ const text = await res.text();
78
+ js_contents[endpoint] = text;
79
+
80
+ const { host, directory } = getURLDirectory(reqUrl);
81
+
82
+ // save the contents to "___subsequent_requests/"
83
+ // make the subsequent_requests directory if it doesn't exist
84
+
85
+ const output_path = path.join(
86
+ output,
87
+ host,
88
+ "___subsequent_requests",
89
+ directory,
90
+ );
91
+ if (!fs.existsSync(output_path)) {
92
+ fs.mkdirSync(output_path);
93
+ }
94
+ fs.writeFileSync(path.join(output_path, "index.js"), text);
95
+
96
+ // find the static ones from the JS resp
97
+ const staticFiles = await findStaticFiles(text);
98
+
99
+ // go through each file and get the absolute path of those
100
+ const absolutePaths = staticFiles.map((file) => {
101
+ // go through existing JS URLs found
102
+ let js_path_dir;
103
+ for (const js_url of js_urls) {
104
+ if (
105
+ !js_path_dir &&
106
+ new URL(js_url).host === new URL(url).host &&
107
+ new URL(js_url).pathname.includes("static/chunks/")
108
+ ) {
109
+ js_path_dir = js_url.replace(/\/[^\/]+\.js.*$/, "");
110
+ }
111
+ }
112
+ return js_path_dir.replace("static/chunks", "") + file;
113
+ });
114
+
115
+ // Filter out paths that are already in js_urls before pushing to staticJSURLs
116
+ const newPaths = absolutePaths.filter(path => !js_urls.includes(path));
117
+ if (newPaths.length > 0) {
118
+ staticJSURLs.push(...newPaths);
119
+ }
120
+ }
121
+
122
+ queue--;
123
+ } catch (e) {
124
+ queue--;
125
+ console.log(chalk.red(`[!] Error fetching ${reqUrl}: ${e}`));
126
+ }
127
+ });
128
+
129
+ await Promise.all(reqPromises);
130
+
131
+ staticJSURLs = [...new Set(staticJSURLs)];
132
+
133
+ console.log(chalk.green(`[✓] Found ${staticJSURLs.length} JS chunks from subsequent requests`));
134
+
135
+ return staticJSURLs;
136
+ };
137
+
138
+ export default subsequentRequests;