npm - sentinel-scanner - Versions diffs - 1.1.0-alpha.1 → 1.1.0 - Mend

sentinel-scanner 1.1.0-alpha.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/.cspell.json +19 -5
package/.github/workflows/pr.yaml +86 -0
package/.github/workflows/welcome.yaml +66 -0
package/CHANGELOG.md +2 -1
package/DISCLAIMER.md +64 -0
package/LICENSE +2 -2
package/README.md +20 -1
package/build/bin.js +376 -0
package/build/bin.js.map +7 -0
package/build/index.d.ts +24 -0
package/build/index.js +115 -158
package/build/index.js.map +3 -3
package/package.json +6 -8
package/scripts/build.ts +4 -1
package/src/bin.ts +20 -0
package/src/commands/spider.ts +193 -0
package/src/index.ts +2 -85
package/src/modules/spider/index.ts +104 -67
package/src/utils/index.ts +29 -0
package/tsconfig.json +5 -10
package/src/__tests__/spider.test.ts +0 -1
package/src/lib/logger.ts +0 -43
package/src/modules/index.ts +0 -3

package/src/commands/spider.ts ADDED Viewed

@@ -0,0 +1,193 @@
+import fs from "node:fs";
+import path from "node:path";
+import type { ArgumentsCamelCase, CommandModule } from "yargs";
+import SpiderScanner from "../modules/spider";
+import { createLogger } from "../utils";
+export type SpiderScannerCLIOptions = {
+	url: string;
+	depth?: number;
+	output?: string;
+	concurrency?: number;
+	timeout?: number;
+	retries?: number;
+};
+const cliLogger = createLogger("CLI");
+export const spiderCommand: CommandModule = {
+	command: "spider",
+	describe:
+		"Crawl a website and get an array of URLs which are internal to the website",
+	builder: (yargs) => {
+		return yargs
+			.option("url", {
+				alias: "u",
+				type: "string",
+				description: "The URL of the website to scan",
+				demandOption: true,
+				coerce: (url) => {
+					try {
+						new URL(url);
+						return url;
+					} catch (error) {
+						throw new Error(`Invalid URL: ${url}`);
+					}
+				},
+			})
+			.option("depth", {
+				alias: "d",
+				type: "number",
+				description: "The maximum depth to crawl",
+				default: 250,
+				coerce: (depth) => {
+					if (depth < 0) {
+						throw new Error("Depth must be a positive number");
+					}
+					if (depth > 250) {
+						throw new Error("Depth must be less than 250");
+					}
+					return depth;
+				},
+			})
+			.option("output", {
+				alias: "o",
+				type: "string",
+				description:
+					"The output file to write the results to. Must be a JSON file",
+				coerce: (output) => {
+					try {
+						// Should throw an error if the path is invalid
+						// Should Be A JSON File
+						const resolvedPath = path.resolve(output);
+						const parsedPath = path.parse(resolvedPath);
+						if (parsedPath.ext !== ".json") {
+							throw new Error("Output file must be a JSON file");
+						}
+						if (fs.existsSync(resolvedPath)) {
+							throw new Error("Output file already exists");
+						}
+						return resolvedPath;
+					} catch (error) {
+						throw new Error(`Invalid output file: ${output}`);
+					}
+				},
+				default: getDefaultFilePath(),
+			})
+			.option("concurrency", {
+				alias: "c",
+				type: "number",
+				description: "The number of concurrent requests to make",
+				default: 10,
+				coerce: (concurrency) => {
+					if (concurrency < 1) {
+						throw new Error("Concurrency must be a positive number");
+					}
+					if (concurrency > 20) {
+						throw new Error("Concurrency must be less than 20");
+					}
+					return concurrency;
+				},
+			})
+			.option("timeout", {
+				alias: "t",
+				type: "number",
+				description: "The timeout for each request in milliseconds",
+				default: 5000,
+				coerce: (timeout) => {
+					if (timeout < 0) {
+						throw new Error("Timeout must be a positive number");
+					}
+					if (timeout > 25_000) {
+						throw new Error("Timeout must be less than 25,000");
+					}
+					return timeout;
+				},
+			})
+			.option("retries", {
+				alias: "r",
+				type: "number",
+				description: "The number of retries for each request",
+				default: 3,
+				coerce: (retries) => {
+					if (retries < 0) {
+						throw new Error("Retries must be a positive number");
+					}
+					if (retries > 10) {
+						throw new Error("Retries must be less than 10");
+					}
+					return retries;
+				},
+			});
+	},
+	handler: async (args) => {
+		try {
+			const argData = args as ArgumentsCamelCase<SpiderScannerCLIOptions>;
+			const scanner = new SpiderScanner(argData.url, {
+				depth: argData.depth ?? 250,
+				concurrency: argData.concurrency ?? 10,
+				timeout: argData.timeout ?? 5000,
+				retries: argData.retries ?? 3,
+			});
+			cliLogger.info("Starting to crawl website");
+			const results = await scanner.crawl();
+			if (argData.output) {
+				fs.writeFileSync(argData.output, JSON.stringify(results, null, 2));
+				cliLogger.info(`Results written to ${argData.output}`);
+			} else {
+				const resolvedPath = getDefaultFilePath();
+				fs.writeFileSync(resolvedPath, JSON.stringify(results, null, 2));
+				cliLogger.info(`Results written to ${resolvedPath}`);
+			}
+		} catch (error) {
+			if (error instanceof Error) {
+				cliLogger.error(error.message);
+			}
+			cliLogger.error("Failed to run spider command");
+			process.exit(1);
+		}
+	},
+};
+const getDefaultFilePath = () => {
+	try {
+		const resolvedDir = path.resolve("sentinel_output");
+		// Check If Directory Exists
+		if (!fs.existsSync(resolvedDir)) {
+			fs.mkdirSync(resolvedDir);
+		}
+		const resolvedPath = path.resolve(
+			`sentinel_output/spider_${Date.now()}.json`,
+		);
+		// Check If File Exists
+		if (fs.existsSync(resolvedPath)) {
+			throw new Error("Output file already exists");
+		}
+		const parsedPath = path.parse(resolvedPath);
+		if (parsedPath.ext !== ".json") {
+			throw new Error("Output file must be a JSON file");
+		}
+		return resolvedPath;
+	} catch (error) {
+		throw new Error("Invalid output file");
+	}
+};

package/src/index.ts CHANGED Viewed

@@ -1,86 +1,3 @@
-#!/usr/bin/env node
+import SpiderScanner, { type SpiderScannerOptions } from "./modules/spider";
-import yargs from "yargs";
-import { hideBin } from "yargs/helpers";
-import { SpiderScanner } from "./modules";
-const commandHandler = yargs(hideBin(process.argv));
-/**
- * Command to scan for XSS vulnerabilities
- *
- * @param {string} url - URL to scan
- * @param {string} wordlist - Path to wordlist file
- * @returns {void}
- *
- * @example
- * npx sentinel-scanner xss --url https://example.com
- */
-commandHandler.command(
-	"xss",
-	"Scan for XSS vulnerabilities",
-	{
-		url: {
-			describe: "URL to scan",
-			demandOption: true,
-			type: "string",
-			coerce: (value) => {
-				try {
-					new URL(value);
-					return value;
-				} catch (err) {
-					throw new Error("Invalid URL format");
-				}
-			},
-		},
-		wordlist: {
-			describe: "Path to wordlist file",
-			type: "string",
-		},
-	},
-	(argv) => {
-		console.log("Scanning for XSS vulnerabilities...");
-		console.log(`URL: ${argv.url}`);
-		console.log(`Wordlist: ${argv.wordlist || "Default"}`);
-	},
-);
-// Command to Spider a website
-commandHandler.command(
-	"spider",
-	"Scan a website for vulnerabilities",
-	{
-		url: {
-			describe: "URL to scan",
-			demandOption: true,
-			type: "string",
-			coerce: (value) => {
-				try {
-					new URL(value);
-					return value;
-				} catch (err) {
-					throw new Error("Invalid URL format");
-				}
-			},
-		},
-	},
-	(argv) => {
-		const spider = new SpiderScanner(argv.url);
-		spider.crawl().then((output) => {
-			console.log(
-				JSON.stringify(
-					{
-						forms: output.forms,
-						links: output.links,
-					},
-					null,
-					2,
-				),
-			);
-		});
-	},
-);
-// Parse arguments and handle commands
-commandHandler.parse();
+export { SpiderScanner, type SpiderScannerOptions };

package/src/modules/spider/index.ts CHANGED Viewed

@@ -1,31 +1,43 @@
 import fetch from "isomorphic-fetch";
 import jsdom from "jsdom";
 import UserAgent from "user-agents";
-import Logger from "../../lib/logger";
+import { createLogger } from "../../utils";
-export type FormOutput = {
-	id: number;
-	url: string;
-	fields: Array<{ name: string; id: string; class: string; type: string }>;
-};
-export type CrawlOutput = {
-	links: string[];
-	forms: FormOutput[];
-};
+export interface SpiderScannerOptions {
+	depth?: number;
+	concurrency?: number;
+	retries?: number;
+	timeout?: number;
+}
 export default class SpiderScanner {
 	private header: Record<string, string> = {
 		"User-Agent": new UserAgent().toString(),
 	};
 	private url: URL;
-	private logger = new Logger("Spider");
+	private logger = createLogger("SpiderScanner");
+	private depth: number;
+	private concurrency: number;
+	private retries: number;
+	private timeout: number;
+	constructor(url: string, options: SpiderScannerOptions = {}) {
+		const {
+			depth = 250,
+			concurrency = 5,
+			retries = 3,
+			timeout = 5000,
+		} = options;
+		this.depth = depth;
+		this.concurrency = concurrency;
+		this.retries = retries;
+		this.timeout = timeout;
-	constructor(url: string) {
 		try {
 			this.url = new URL(url);
 			this.logger.info(
-				`Initialized with URL: ${url} & User-Agent: ${this.header["User-Agent"]}`,
+				`Initialized with URL: ${url}, User-Agent: ${this.header["User-Agent"]}`,
 			);
 		} catch (error) {
 			if (error instanceof TypeError) {
@@ -37,7 +49,6 @@ export default class SpiderScanner {
 		}
 	}
-	// Normalize domains (removes 'www.')
 	private normalizeDomain(domain: string): string {
 		return domain.startsWith("www.") ? domain.slice(4) : domain;
 	}
@@ -61,20 +72,42 @@ export default class SpiderScanner {
 		}
 	}
-	private async fetchUrl(url: string): Promise<string | null> {
-		try {
-			this.logger.debug(`Fetching URL: ${url}`);
-			const response = await fetch(url, { headers: this.header });
-			if (!response.ok) {
+	private async fetchWithRetries(
+		url: string,
+		retries: number,
+	): Promise<string | null> {
+		for (let attempt = 1; attempt <= retries; attempt++) {
+			const controller = new AbortController();
+			const timeoutId = setTimeout(() => controller.abort(), this.timeout);
+			try {
+				this.logger.debug(`Fetching URL (Attempt ${attempt}): ${url}`);
+				const randomUserAgent = new UserAgent().toString();
+				this.logger.info(`Changing User-Agent to: ${randomUserAgent}`);
+				this.header["User-Agent"] = randomUserAgent;
+				const response = await fetch(url, {
+					headers: this.header,
+					signal: controller.signal,
+					redirect: "follow",
+				});
+				clearTimeout(timeoutId);
+				if (response.ok) {
+					this.logger.info(`Successfully fetched URL: ${url}`);
+					return await response.text();
+				}
 				this.logger.warn(`Failed to fetch URL (${response.status}): ${url}`);
-				return null;
+			} catch (error) {
+				if ((error as Error).name === "AbortError") {
+					this.logger.warn(`Fetch timed out: ${url}`);
+				} else {
+					this.logger.error(`Error fetching URL: ${url} - ${error}`);
+				}
 			}
-			this.logger.info(`Successfully fetched URL: ${url}`);
-			return await response.text();
-		} catch (error) {
-			this.logger.error(`Error fetching URL: ${url} - ${error}`);
-			return null;
 		}
+		return null;
 	}
 	private extractLinks(html: string): string[] {
@@ -89,36 +122,40 @@ export default class SpiderScanner {
 		return internalLinks.map((link) => this.convertRelativeUrlToAbsolute(link));
 	}
-	private extractForms(html: string): FormOutput[] {
-		const { JSDOM } = jsdom;
-		const dom = new JSDOM(html);
-		const forms = Array.from(dom.window.document.querySelectorAll("form"));
-		this.logger.debug(`Extracted ${forms.length} forms from HTML content`);
-		return forms.map((form, index) => {
-			const fields = Array.from(form.querySelectorAll("input")).map(
-				(input) => ({
-					name: input.name,
-					id: input.id,
-					class: input.className,
-					type: input.type,
-				}),
-			);
-			return {
-				id: index,
-				url: this.url.href,
-				fields,
-			};
-		});
-	}
-	// Main function to scan the website with concurrency support and return both links and forms
-	public async crawl(depth = 250, concurrency = 5): Promise<CrawlOutput> {
+	public async crawl(): Promise<Array<string>> {
 		const visited = new Set<string>();
 		const queue = new Set<string>([this.url.href]);
 		const resultLinks = new Set<string>();
-		const resultForms = new Set<FormOutput>();
+		// Assets to ignore
+		const assetExtensions = [
+			".css",
+			".js",
+			".png",
+			".jpg",
+			".jpeg",
+			".gif",
+			".svg",
+			".ico",
+			".webp",
+			".mp4",
+			".mp3",
+			".wav",
+			".avi",
+			".mov",
+			".webm",
+			".pdf",
+			".doc",
+			".docx",
+			".xls",
+			".xlsx",
+			".ppt",
+			".pptx",
+			".zip",
+			".rar",
+			".tar",
+			".gz",
+		];
 		const fetchAndExtract = async (currentUrl: string) => {
 			if (visited.has(currentUrl)) {
@@ -128,19 +165,22 @@ export default class SpiderScanner {
 			visited.add(currentUrl);
 			this.logger.info(`Visiting URL: ${currentUrl}`);
-			const html = await this.fetchUrl(currentUrl);
+			const html = await this.fetchWithRetries(currentUrl, this.retries);
 			if (!html) return;
-			// Extract links and forms
 			const links = this.extractLinks(html);
-			const forms = this.extractForms(html);
-			for (const form of forms) {
-				resultForms.add(form);
+			// Filter out asset links
+			for (const link of links) {
+				if (assetExtensions.some((ext) => link.endsWith(ext))) {
+					this.logger.debug(`Ignoring asset link: ${link}`);
+					continue;
+				}
+				this.logger.debug(`Found link: ${link}`);
 			}
 			for (const link of links) {
-				if (!visited.has(link) && queue.size < depth) {
+				if (!visited.has(link) && queue.size < this.depth) {
 					queue.add(link);
 					this.logger.debug(`Added to queue: ${link}`);
 				}
@@ -149,7 +189,7 @@ export default class SpiderScanner {
 		};
 		const processBatch = async () => {
-			const batch = Array.from(queue).slice(0, concurrency);
+			const batch = Array.from(queue).slice(0, this.concurrency);
 			for (const url of batch) {
 				queue.delete(url);
 			}
@@ -157,19 +197,16 @@ export default class SpiderScanner {
 		};
 		this.logger.info(
-			`Starting crawl with depth: ${depth}, concurrency: ${concurrency}`,
+			`Starting crawl with depth: ${this.depth}, concurrency: ${this.concurrency}`,
 		);
-		while (queue.size > 0 && visited.size < depth) {
+		while (queue.size > 0 && visited.size < this.depth) {
 			await processBatch();
 		}
 		this.logger.info(
-			`Crawling completed. Total pages visited: ${resultLinks.size}, Total forms found: ${resultForms.size}`,
+			`Crawling completed. Total pages visited: ${resultLinks.size}`,
 		);
-		return {
-			links: Array.from(resultLinks),
-			forms: Array.from(resultForms),
-		};
+		return Array.from(resultLinks);
 	}
 }

package/src/utils/index.ts ADDED Viewed

@@ -0,0 +1,29 @@
+import winston from "winston";
+export const createLogger = (label: string) =>
+	winston.createLogger({
+		levels: {
+			error: 0,
+			warn: 1,
+			info: 2,
+			http: 3,
+			verbose: 4,
+			debug: 5,
+			silly: 6,
+		},
+		format: winston.format.combine(
+			winston.format.label({ label }),
+			winston.format.colorize(),
+			winston.format.timestamp({
+				format: () => {
+					return new Date().toLocaleString("en-US");
+				},
+			}),
+			winston.format.align(),
+			winston.format.printf(
+				(info) =>
+					`\x1b[34m(${info.label})\x1b[0m \x1b[33m${info.timestamp}\x1b[0m [${info.level}]: ${info.message}`,
+			),
+		),
+		transports: [new winston.transports.Console()],
+	});

package/tsconfig.json CHANGED Viewed

@@ -2,9 +2,10 @@
   "include": ["./src/**/*.ts"],
   "compilerOptions": {
     "lib": ["es2023"],
-    "module": "ESNext",
+    "module": "CommonJS",
     "target": "es2022",
     "moduleResolution": "node",
+    "allowSyntheticDefaultImports": true,
     "rootDir": "./src",
     "outDir": "build",
@@ -17,13 +18,7 @@
     "forceConsistentCasingInFileNames": true,
     "declaration": true,
     "resolveJsonModule": true,
-    "emitDeclarationOnly": true,
-    // These two options can help resolve ESM issues
-    "allowSyntheticDefaultImports": true,
-    "isolatedModules": true,
-    // Ensure TypeScript recognizes .ts file extensions in ESM
-    "allowImportingTsExtensions": true
-  }
+    "allowImportingTsExtensions": false
+  },
+  "exclude": ["src/__tests__/**/*"]
 }

package/src/__tests__/spider.test.ts DELETED Viewed

	@@ -1 +0,0 @@
1	- import { describe, it, test } from "node:test";

package/src/lib/logger.ts DELETED Viewed

@@ -1,43 +0,0 @@
-export default class Logger {
-	private moduleName: string;
-	private colors = {
-		error: "\x1b[31m",
-		info: "\x1b[32m",
-		warn: "\x1b[33m",
-		debug: "\x1b[35m",
-		reset: "\x1b[0m",
-		module: "\x1b[46m",
-	};
-	constructor(moduleName: string) {
-		this.moduleName = moduleName;
-	}
-	private formatMessage(
-		level: keyof typeof this.colors,
-		...message: string[]
-	): string {
-		const timestamp = new Date().toTimeString().split(" ")[0];
-		return `[${level}] ${this.colors[level]}${this.colors.reset}${this.colors[level]}[${timestamp}]${this.colors.reset} ${this.colors.module}[${this.moduleName}]${this.colors.reset} ${this.colors[level]}${message}${this.colors.reset}`;
-	}
-	public error(...message: string[]): void {
-		console.error(this.formatMessage("error", ...message));
-	}
-	public info(...message: string[]): void {
-		console.info(this.formatMessage("info", ...message));
-	}
-	public warn(...message: string[]): void {
-		console.warn(this.formatMessage("warn", ...message));
-	}
-	public log(...message: string[]): void {
-		console.log(this.formatMessage("info", ...message));
-	}
-	public debug(...message: string[]): void {
-		console.debug(this.formatMessage("debug", ...message));
-	}
-}

package/src/modules/index.ts DELETED Viewed

@@ -1,3 +0,0 @@
-import SpiderScanner from "./spider";
-export { SpiderScanner };