sentinel-scanner 1.1.0-alpha.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,193 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import type { ArgumentsCamelCase, CommandModule } from "yargs";
4
+ import SpiderScanner from "../modules/spider";
5
+ import { createLogger } from "../utils";
6
+
7
+ export type SpiderScannerCLIOptions = {
8
+ url: string;
9
+ depth?: number;
10
+ output?: string;
11
+ concurrency?: number;
12
+ timeout?: number;
13
+ retries?: number;
14
+ };
15
+
16
+ const cliLogger = createLogger("CLI");
17
+
18
+ export const spiderCommand: CommandModule = {
19
+ command: "spider",
20
+ describe:
21
+ "Crawl a website and get an array of URLs which are internal to the website",
22
+ builder: (yargs) => {
23
+ return yargs
24
+ .option("url", {
25
+ alias: "u",
26
+ type: "string",
27
+ description: "The URL of the website to scan",
28
+ demandOption: true,
29
+ coerce: (url) => {
30
+ try {
31
+ new URL(url);
32
+
33
+ return url;
34
+ } catch (error) {
35
+ throw new Error(`Invalid URL: ${url}`);
36
+ }
37
+ },
38
+ })
39
+ .option("depth", {
40
+ alias: "d",
41
+ type: "number",
42
+ description: "The maximum depth to crawl",
43
+ default: 250,
44
+ coerce: (depth) => {
45
+ if (depth < 0) {
46
+ throw new Error("Depth must be a positive number");
47
+ }
48
+
49
+ if (depth > 250) {
50
+ throw new Error("Depth must be less than 250");
51
+ }
52
+
53
+ return depth;
54
+ },
55
+ })
56
+ .option("output", {
57
+ alias: "o",
58
+ type: "string",
59
+ description:
60
+ "The output file to write the results to. Must be a JSON file",
61
+ coerce: (output) => {
62
+ try {
63
+ // Should throw an error if the path is invalid
64
+ // Should Be A JSON File
65
+ const resolvedPath = path.resolve(output);
66
+ const parsedPath = path.parse(resolvedPath);
67
+
68
+ if (parsedPath.ext !== ".json") {
69
+ throw new Error("Output file must be a JSON file");
70
+ }
71
+
72
+ if (fs.existsSync(resolvedPath)) {
73
+ throw new Error("Output file already exists");
74
+ }
75
+
76
+ return resolvedPath;
77
+ } catch (error) {
78
+ throw new Error(`Invalid output file: ${output}`);
79
+ }
80
+ },
81
+ default: getDefaultFilePath(),
82
+ })
83
+ .option("concurrency", {
84
+ alias: "c",
85
+ type: "number",
86
+ description: "The number of concurrent requests to make",
87
+ default: 10,
88
+ coerce: (concurrency) => {
89
+ if (concurrency < 1) {
90
+ throw new Error("Concurrency must be a positive number");
91
+ }
92
+
93
+ if (concurrency > 20) {
94
+ throw new Error("Concurrency must be less than 20");
95
+ }
96
+
97
+ return concurrency;
98
+ },
99
+ })
100
+ .option("timeout", {
101
+ alias: "t",
102
+ type: "number",
103
+ description: "The timeout for each request in milliseconds",
104
+ default: 5000,
105
+ coerce: (timeout) => {
106
+ if (timeout < 0) {
107
+ throw new Error("Timeout must be a positive number");
108
+ }
109
+
110
+ if (timeout > 25_000) {
111
+ throw new Error("Timeout must be less than 25,000");
112
+ }
113
+
114
+ return timeout;
115
+ },
116
+ })
117
+ .option("retries", {
118
+ alias: "r",
119
+ type: "number",
120
+ description: "The number of retries for each request",
121
+ default: 3,
122
+ coerce: (retries) => {
123
+ if (retries < 0) {
124
+ throw new Error("Retries must be a positive number");
125
+ }
126
+
127
+ if (retries > 10) {
128
+ throw new Error("Retries must be less than 10");
129
+ }
130
+
131
+ return retries;
132
+ },
133
+ });
134
+ },
135
+ handler: async (args) => {
136
+ try {
137
+ const argData = args as ArgumentsCamelCase<SpiderScannerCLIOptions>;
138
+
139
+ const scanner = new SpiderScanner(argData.url, {
140
+ depth: argData.depth ?? 250,
141
+ concurrency: argData.concurrency ?? 10,
142
+ timeout: argData.timeout ?? 5000,
143
+ retries: argData.retries ?? 3,
144
+ });
145
+
146
+ cliLogger.info("Starting to crawl website");
147
+
148
+ const results = await scanner.crawl();
149
+
150
+ if (argData.output) {
151
+ fs.writeFileSync(argData.output, JSON.stringify(results, null, 2));
152
+ cliLogger.info(`Results written to ${argData.output}`);
153
+ } else {
154
+ const resolvedPath = getDefaultFilePath();
155
+ fs.writeFileSync(resolvedPath, JSON.stringify(results, null, 2));
156
+ cliLogger.info(`Results written to ${resolvedPath}`);
157
+ }
158
+ } catch (error) {
159
+ if (error instanceof Error) {
160
+ cliLogger.error(error.message);
161
+ }
162
+ cliLogger.error("Failed to run spider command");
163
+ process.exit(1);
164
+ }
165
+ },
166
+ };
167
+
168
+ const getDefaultFilePath = () => {
169
+ try {
170
+ const resolvedDir = path.resolve("sentinel_output");
171
+ // Check If Directory Exists
172
+ if (!fs.existsSync(resolvedDir)) {
173
+ fs.mkdirSync(resolvedDir);
174
+ }
175
+
176
+ const resolvedPath = path.resolve(
177
+ `sentinel_output/spider_${Date.now()}.json`,
178
+ );
179
+ // Check If File Exists
180
+ if (fs.existsSync(resolvedPath)) {
181
+ throw new Error("Output file already exists");
182
+ }
183
+ const parsedPath = path.parse(resolvedPath);
184
+
185
+ if (parsedPath.ext !== ".json") {
186
+ throw new Error("Output file must be a JSON file");
187
+ }
188
+
189
+ return resolvedPath;
190
+ } catch (error) {
191
+ throw new Error("Invalid output file");
192
+ }
193
+ };
package/src/index.ts CHANGED
@@ -1,86 +1,3 @@
1
- #!/usr/bin/env node
1
+ import SpiderScanner, { type SpiderScannerOptions } from "./modules/spider";
2
2
 
3
- import yargs from "yargs";
4
- import { hideBin } from "yargs/helpers";
5
- import { SpiderScanner } from "./modules";
6
-
7
- const commandHandler = yargs(hideBin(process.argv));
8
-
9
- /**
10
- * Command to scan for XSS vulnerabilities
11
- *
12
- * @param {string} url - URL to scan
13
- * @param {string} wordlist - Path to wordlist file
14
- * @returns {void}
15
- *
16
- * @example
17
- * npx sentinel-scanner xss --url https://example.com
18
- */
19
- commandHandler.command(
20
- "xss",
21
- "Scan for XSS vulnerabilities",
22
- {
23
- url: {
24
- describe: "URL to scan",
25
- demandOption: true,
26
- type: "string",
27
- coerce: (value) => {
28
- try {
29
- new URL(value);
30
- return value;
31
- } catch (err) {
32
- throw new Error("Invalid URL format");
33
- }
34
- },
35
- },
36
- wordlist: {
37
- describe: "Path to wordlist file",
38
- type: "string",
39
- },
40
- },
41
- (argv) => {
42
- console.log("Scanning for XSS vulnerabilities...");
43
- console.log(`URL: ${argv.url}`);
44
- console.log(`Wordlist: ${argv.wordlist || "Default"}`);
45
- },
46
- );
47
-
48
- // Command to Spider a website
49
- commandHandler.command(
50
- "spider",
51
- "Scan a website for vulnerabilities",
52
- {
53
- url: {
54
- describe: "URL to scan",
55
- demandOption: true,
56
- type: "string",
57
- coerce: (value) => {
58
- try {
59
- new URL(value);
60
- return value;
61
- } catch (err) {
62
- throw new Error("Invalid URL format");
63
- }
64
- },
65
- },
66
- },
67
- (argv) => {
68
- const spider = new SpiderScanner(argv.url);
69
-
70
- spider.crawl().then((output) => {
71
- console.log(
72
- JSON.stringify(
73
- {
74
- forms: output.forms,
75
- links: output.links,
76
- },
77
- null,
78
- 2,
79
- ),
80
- );
81
- });
82
- },
83
- );
84
-
85
- // Parse arguments and handle commands
86
- commandHandler.parse();
3
+ export { SpiderScanner, type SpiderScannerOptions };
@@ -1,31 +1,43 @@
1
1
  import fetch from "isomorphic-fetch";
2
2
  import jsdom from "jsdom";
3
3
  import UserAgent from "user-agents";
4
- import Logger from "../../lib/logger";
4
+ import { createLogger } from "../../utils";
5
5
 
6
- export type FormOutput = {
7
- id: number;
8
- url: string;
9
- fields: Array<{ name: string; id: string; class: string; type: string }>;
10
- };
11
-
12
- export type CrawlOutput = {
13
- links: string[];
14
- forms: FormOutput[];
15
- };
6
+ export interface SpiderScannerOptions {
7
+ depth?: number;
8
+ concurrency?: number;
9
+ retries?: number;
10
+ timeout?: number;
11
+ }
16
12
 
17
13
  export default class SpiderScanner {
18
14
  private header: Record<string, string> = {
19
15
  "User-Agent": new UserAgent().toString(),
20
16
  };
21
17
  private url: URL;
22
- private logger = new Logger("Spider");
18
+ private logger = createLogger("SpiderScanner");
19
+
20
+ private depth: number;
21
+ private concurrency: number;
22
+ private retries: number;
23
+ private timeout: number;
24
+
25
+ constructor(url: string, options: SpiderScannerOptions = {}) {
26
+ const {
27
+ depth = 250,
28
+ concurrency = 5,
29
+ retries = 3,
30
+ timeout = 5000,
31
+ } = options;
32
+ this.depth = depth;
33
+ this.concurrency = concurrency;
34
+ this.retries = retries;
35
+ this.timeout = timeout;
23
36
 
24
- constructor(url: string) {
25
37
  try {
26
38
  this.url = new URL(url);
27
39
  this.logger.info(
28
- `Initialized with URL: ${url} & User-Agent: ${this.header["User-Agent"]}`,
40
+ `Initialized with URL: ${url}, User-Agent: ${this.header["User-Agent"]}`,
29
41
  );
30
42
  } catch (error) {
31
43
  if (error instanceof TypeError) {
@@ -37,7 +49,6 @@ export default class SpiderScanner {
37
49
  }
38
50
  }
39
51
 
40
- // Normalize domains (removes 'www.')
41
52
  private normalizeDomain(domain: string): string {
42
53
  return domain.startsWith("www.") ? domain.slice(4) : domain;
43
54
  }
@@ -61,20 +72,42 @@ export default class SpiderScanner {
61
72
  }
62
73
  }
63
74
 
64
- private async fetchUrl(url: string): Promise<string | null> {
65
- try {
66
- this.logger.debug(`Fetching URL: ${url}`);
67
- const response = await fetch(url, { headers: this.header });
68
- if (!response.ok) {
75
+ private async fetchWithRetries(
76
+ url: string,
77
+ retries: number,
78
+ ): Promise<string | null> {
79
+ for (let attempt = 1; attempt <= retries; attempt++) {
80
+ const controller = new AbortController();
81
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
82
+
83
+ try {
84
+ this.logger.debug(`Fetching URL (Attempt ${attempt}): ${url}`);
85
+ const randomUserAgent = new UserAgent().toString();
86
+ this.logger.info(`Changing User-Agent to: ${randomUserAgent}`);
87
+ this.header["User-Agent"] = randomUserAgent;
88
+ const response = await fetch(url, {
89
+ headers: this.header,
90
+ signal: controller.signal,
91
+ redirect: "follow",
92
+ });
93
+
94
+ clearTimeout(timeoutId);
95
+
96
+ if (response.ok) {
97
+ this.logger.info(`Successfully fetched URL: ${url}`);
98
+ return await response.text();
99
+ }
100
+
69
101
  this.logger.warn(`Failed to fetch URL (${response.status}): ${url}`);
70
- return null;
102
+ } catch (error) {
103
+ if ((error as Error).name === "AbortError") {
104
+ this.logger.warn(`Fetch timed out: ${url}`);
105
+ } else {
106
+ this.logger.error(`Error fetching URL: ${url} - ${error}`);
107
+ }
71
108
  }
72
- this.logger.info(`Successfully fetched URL: ${url}`);
73
- return await response.text();
74
- } catch (error) {
75
- this.logger.error(`Error fetching URL: ${url} - ${error}`);
76
- return null;
77
109
  }
110
+ return null;
78
111
  }
79
112
 
80
113
  private extractLinks(html: string): string[] {
@@ -89,36 +122,40 @@ export default class SpiderScanner {
89
122
  return internalLinks.map((link) => this.convertRelativeUrlToAbsolute(link));
90
123
  }
91
124
 
92
- private extractForms(html: string): FormOutput[] {
93
- const { JSDOM } = jsdom;
94
- const dom = new JSDOM(html);
95
- const forms = Array.from(dom.window.document.querySelectorAll("form"));
96
- this.logger.debug(`Extracted ${forms.length} forms from HTML content`);
97
-
98
- return forms.map((form, index) => {
99
- const fields = Array.from(form.querySelectorAll("input")).map(
100
- (input) => ({
101
- name: input.name,
102
- id: input.id,
103
- class: input.className,
104
- type: input.type,
105
- }),
106
- );
107
-
108
- return {
109
- id: index,
110
- url: this.url.href,
111
- fields,
112
- };
113
- });
114
- }
115
-
116
- // Main function to scan the website with concurrency support and return both links and forms
117
- public async crawl(depth = 250, concurrency = 5): Promise<CrawlOutput> {
125
+ public async crawl(): Promise<Array<string>> {
118
126
  const visited = new Set<string>();
119
127
  const queue = new Set<string>([this.url.href]);
120
128
  const resultLinks = new Set<string>();
121
- const resultForms = new Set<FormOutput>();
129
+
130
+ // Assets to ignore
131
+ const assetExtensions = [
132
+ ".css",
133
+ ".js",
134
+ ".png",
135
+ ".jpg",
136
+ ".jpeg",
137
+ ".gif",
138
+ ".svg",
139
+ ".ico",
140
+ ".webp",
141
+ ".mp4",
142
+ ".mp3",
143
+ ".wav",
144
+ ".avi",
145
+ ".mov",
146
+ ".webm",
147
+ ".pdf",
148
+ ".doc",
149
+ ".docx",
150
+ ".xls",
151
+ ".xlsx",
152
+ ".ppt",
153
+ ".pptx",
154
+ ".zip",
155
+ ".rar",
156
+ ".tar",
157
+ ".gz",
158
+ ];
122
159
 
123
160
  const fetchAndExtract = async (currentUrl: string) => {
124
161
  if (visited.has(currentUrl)) {
@@ -128,19 +165,22 @@ export default class SpiderScanner {
128
165
  visited.add(currentUrl);
129
166
  this.logger.info(`Visiting URL: ${currentUrl}`);
130
167
 
131
- const html = await this.fetchUrl(currentUrl);
168
+ const html = await this.fetchWithRetries(currentUrl, this.retries);
132
169
  if (!html) return;
133
170
 
134
- // Extract links and forms
135
171
  const links = this.extractLinks(html);
136
- const forms = this.extractForms(html);
137
172
 
138
- for (const form of forms) {
139
- resultForms.add(form);
173
+ // Filter out asset links
174
+ for (const link of links) {
175
+ if (assetExtensions.some((ext) => link.endsWith(ext))) {
176
+ this.logger.debug(`Ignoring asset link: ${link}`);
177
+ continue;
178
+ }
179
+ this.logger.debug(`Found link: ${link}`);
140
180
  }
141
181
 
142
182
  for (const link of links) {
143
- if (!visited.has(link) && queue.size < depth) {
183
+ if (!visited.has(link) && queue.size < this.depth) {
144
184
  queue.add(link);
145
185
  this.logger.debug(`Added to queue: ${link}`);
146
186
  }
@@ -149,7 +189,7 @@ export default class SpiderScanner {
149
189
  };
150
190
 
151
191
  const processBatch = async () => {
152
- const batch = Array.from(queue).slice(0, concurrency);
192
+ const batch = Array.from(queue).slice(0, this.concurrency);
153
193
  for (const url of batch) {
154
194
  queue.delete(url);
155
195
  }
@@ -157,19 +197,16 @@ export default class SpiderScanner {
157
197
  };
158
198
 
159
199
  this.logger.info(
160
- `Starting crawl with depth: ${depth}, concurrency: ${concurrency}`,
200
+ `Starting crawl with depth: ${this.depth}, concurrency: ${this.concurrency}`,
161
201
  );
162
- while (queue.size > 0 && visited.size < depth) {
202
+ while (queue.size > 0 && visited.size < this.depth) {
163
203
  await processBatch();
164
204
  }
165
205
 
166
206
  this.logger.info(
167
- `Crawling completed. Total pages visited: ${resultLinks.size}, Total forms found: ${resultForms.size}`,
207
+ `Crawling completed. Total pages visited: ${resultLinks.size}`,
168
208
  );
169
209
 
170
- return {
171
- links: Array.from(resultLinks),
172
- forms: Array.from(resultForms),
173
- };
210
+ return Array.from(resultLinks);
174
211
  }
175
212
  }
@@ -0,0 +1,29 @@
1
+ import winston from "winston";
2
+
3
+ export const createLogger = (label: string) =>
4
+ winston.createLogger({
5
+ levels: {
6
+ error: 0,
7
+ warn: 1,
8
+ info: 2,
9
+ http: 3,
10
+ verbose: 4,
11
+ debug: 5,
12
+ silly: 6,
13
+ },
14
+ format: winston.format.combine(
15
+ winston.format.label({ label }),
16
+ winston.format.colorize(),
17
+ winston.format.timestamp({
18
+ format: () => {
19
+ return new Date().toLocaleString("en-US");
20
+ },
21
+ }),
22
+ winston.format.align(),
23
+ winston.format.printf(
24
+ (info) =>
25
+ `\x1b[34m(${info.label})\x1b[0m \x1b[33m${info.timestamp}\x1b[0m [${info.level}]: ${info.message}`,
26
+ ),
27
+ ),
28
+ transports: [new winston.transports.Console()],
29
+ });
package/tsconfig.json CHANGED
@@ -2,9 +2,10 @@
2
2
  "include": ["./src/**/*.ts"],
3
3
  "compilerOptions": {
4
4
  "lib": ["es2023"],
5
- "module": "ESNext",
5
+ "module": "CommonJS",
6
6
  "target": "es2022",
7
7
  "moduleResolution": "node",
8
+ "allowSyntheticDefaultImports": true,
8
9
 
9
10
  "rootDir": "./src",
10
11
  "outDir": "build",
@@ -17,13 +18,7 @@
17
18
  "forceConsistentCasingInFileNames": true,
18
19
  "declaration": true,
19
20
  "resolveJsonModule": true,
20
- "emitDeclarationOnly": true,
21
-
22
- // These two options can help resolve ESM issues
23
- "allowSyntheticDefaultImports": true,
24
- "isolatedModules": true,
25
-
26
- // Ensure TypeScript recognizes .ts file extensions in ESM
27
- "allowImportingTsExtensions": true
28
- }
21
+ "allowImportingTsExtensions": false
22
+ },
23
+ "exclude": ["src/__tests__/**/*"]
29
24
  }
@@ -1 +0,0 @@
1
- import { describe, it, test } from "node:test";
package/src/lib/logger.ts DELETED
@@ -1,43 +0,0 @@
1
- export default class Logger {
2
- private moduleName: string;
3
- private colors = {
4
- error: "\x1b[31m",
5
- info: "\x1b[32m",
6
- warn: "\x1b[33m",
7
- debug: "\x1b[35m",
8
- reset: "\x1b[0m",
9
- module: "\x1b[46m",
10
- };
11
-
12
- constructor(moduleName: string) {
13
- this.moduleName = moduleName;
14
- }
15
-
16
- private formatMessage(
17
- level: keyof typeof this.colors,
18
- ...message: string[]
19
- ): string {
20
- const timestamp = new Date().toTimeString().split(" ")[0];
21
- return `[${level}] ${this.colors[level]}${this.colors.reset}${this.colors[level]}[${timestamp}]${this.colors.reset} ${this.colors.module}[${this.moduleName}]${this.colors.reset} ${this.colors[level]}${message}${this.colors.reset}`;
22
- }
23
-
24
- public error(...message: string[]): void {
25
- console.error(this.formatMessage("error", ...message));
26
- }
27
-
28
- public info(...message: string[]): void {
29
- console.info(this.formatMessage("info", ...message));
30
- }
31
-
32
- public warn(...message: string[]): void {
33
- console.warn(this.formatMessage("warn", ...message));
34
- }
35
-
36
- public log(...message: string[]): void {
37
- console.log(this.formatMessage("info", ...message));
38
- }
39
-
40
- public debug(...message: string[]): void {
41
- console.debug(this.formatMessage("debug", ...message));
42
- }
43
- }
@@ -1,3 +0,0 @@
1
- import SpiderScanner from "./spider";
2
-
3
- export { SpiderScanner };