sentinel-scanner 1.1.0-alpha.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,193 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import type { ArgumentsCamelCase, CommandModule } from "yargs";
4
+ import SpiderScanner from "../modules/spider";
5
+ import { createLogger } from "../utils";
6
+
7
+ export type SpiderScannerCLIOptions = {
8
+ url: string;
9
+ depth?: number;
10
+ output?: string;
11
+ concurrency?: number;
12
+ timeout?: number;
13
+ retries?: number;
14
+ };
15
+
16
+ const cliLogger = createLogger("CLI");
17
+
18
+ export const spiderCommand: CommandModule = {
19
+ command: "spider",
20
+ describe:
21
+ "Crawl a website and get an array of URLs which are internal to the website",
22
+ builder: (yargs) => {
23
+ return yargs
24
+ .option("url", {
25
+ alias: "u",
26
+ type: "string",
27
+ description: "The URL of the website to scan",
28
+ demandOption: true,
29
+ coerce: (url) => {
30
+ try {
31
+ new URL(url);
32
+
33
+ return url;
34
+ } catch (error) {
35
+ throw new Error(`Invalid URL: ${url}`);
36
+ }
37
+ },
38
+ })
39
+ .option("depth", {
40
+ alias: "d",
41
+ type: "number",
42
+ description: "The maximum depth to crawl",
43
+ default: 250,
44
+ coerce: (depth) => {
45
+ if (depth < 0) {
46
+ throw new Error("Depth must be a positive number");
47
+ }
48
+
49
+ if (depth > 250) {
50
+ throw new Error("Depth must be less than 250");
51
+ }
52
+
53
+ return depth;
54
+ },
55
+ })
56
+ .option("output", {
57
+ alias: "o",
58
+ type: "string",
59
+ description:
60
+ "The output file to write the results to. Must be a JSON file",
61
+ coerce: (output) => {
62
+ try {
63
+ // Should throw an error if the path is invalid
64
+ // Should Be A JSON File
65
+ const resolvedPath = path.resolve(output);
66
+ const parsedPath = path.parse(resolvedPath);
67
+
68
+ if (parsedPath.ext !== ".json") {
69
+ throw new Error("Output file must be a JSON file");
70
+ }
71
+
72
+ if (fs.existsSync(resolvedPath)) {
73
+ throw new Error("Output file already exists");
74
+ }
75
+
76
+ return resolvedPath;
77
+ } catch (error) {
78
+ throw new Error(`Invalid output file: ${output}`);
79
+ }
80
+ },
81
+ default: getDefaultFilePath(),
82
+ })
83
+ .option("concurrency", {
84
+ alias: "c",
85
+ type: "number",
86
+ description: "The number of concurrent requests to make",
87
+ default: 10,
88
+ coerce: (concurrency) => {
89
+ if (concurrency < 1) {
90
+ throw new Error("Concurrency must be a positive number");
91
+ }
92
+
93
+ if (concurrency > 20) {
94
+ throw new Error("Concurrency must be less than 20");
95
+ }
96
+
97
+ return concurrency;
98
+ },
99
+ })
100
+ .option("timeout", {
101
+ alias: "t",
102
+ type: "number",
103
+ description: "The timeout for each request in milliseconds",
104
+ default: 5000,
105
+ coerce: (timeout) => {
106
+ if (timeout < 0) {
107
+ throw new Error("Timeout must be a positive number");
108
+ }
109
+
110
+ if (timeout > 25_000) {
111
+ throw new Error("Timeout must be less than 25,000");
112
+ }
113
+
114
+ return timeout;
115
+ },
116
+ })
117
+ .option("retries", {
118
+ alias: "r",
119
+ type: "number",
120
+ description: "The number of retries for each request",
121
+ default: 3,
122
+ coerce: (retries) => {
123
+ if (retries < 0) {
124
+ throw new Error("Retries must be a positive number");
125
+ }
126
+
127
+ if (retries > 10) {
128
+ throw new Error("Retries must be less than 10");
129
+ }
130
+
131
+ return retries;
132
+ },
133
+ });
134
+ },
135
+ handler: async (args) => {
136
+ try {
137
+ const argData = args as ArgumentsCamelCase<SpiderScannerCLIOptions>;
138
+
139
+ const scanner = new SpiderScanner(argData.url, {
140
+ depth: argData.depth ?? 250,
141
+ concurrency: argData.concurrency ?? 10,
142
+ timeout: argData.timeout ?? 5000,
143
+ retries: argData.retries ?? 3,
144
+ });
145
+
146
+ cliLogger.info("Starting to crawl website");
147
+
148
+ const results = await scanner.crawl();
149
+
150
+ if (argData.output) {
151
+ fs.writeFileSync(argData.output, JSON.stringify(results, null, 2));
152
+ cliLogger.info(`Results written to ${argData.output}`);
153
+ } else {
154
+ const resolvedPath = getDefaultFilePath();
155
+ fs.writeFileSync(resolvedPath, JSON.stringify(results, null, 2));
156
+ cliLogger.info(`Results written to ${resolvedPath}`);
157
+ }
158
+ } catch (error) {
159
+ if (error instanceof Error) {
160
+ cliLogger.error(error.message);
161
+ }
162
+ cliLogger.error("Failed to run spider command");
163
+ process.exit(1);
164
+ }
165
+ },
166
+ };
167
+
168
+ const getDefaultFilePath = () => {
169
+ try {
170
+ const resolvedDir = path.resolve("sentinel_output");
171
+ // Check If Directory Exists
172
+ if (!fs.existsSync(resolvedDir)) {
173
+ fs.mkdirSync(resolvedDir);
174
+ }
175
+
176
+ const resolvedPath = path.resolve(
177
+ `sentinel_output/spider_${Date.now()}.json`,
178
+ );
179
+ // Check If File Exists
180
+ if (fs.existsSync(resolvedPath)) {
181
+ throw new Error("Output file already exists");
182
+ }
183
+ const parsedPath = path.parse(resolvedPath);
184
+
185
+ if (parsedPath.ext !== ".json") {
186
+ throw new Error("Output file must be a JSON file");
187
+ }
188
+
189
+ return resolvedPath;
190
+ } catch (error) {
191
+ throw new Error("Invalid output file");
192
+ }
193
+ };
package/src/index.ts CHANGED
@@ -1,86 +1,3 @@
1
- #!/usr/bin/env node
1
+ import SpiderScanner, { type SpiderScannerOptions } from "./modules/spider";
2
2
 
3
- import yargs from "yargs";
4
- import { hideBin } from "yargs/helpers";
5
- import { SpiderScanner } from "./modules";
6
-
7
- const commandHandler = yargs(hideBin(process.argv));
8
-
9
- /**
10
- * Command to scan for XSS vulnerabilities
11
- *
12
- * @param {string} url - URL to scan
13
- * @param {string} wordlist - Path to wordlist file
14
- * @returns {void}
15
- *
16
- * @example
17
- * npx sentinel-scanner xss --url https://example.com
18
- */
19
- commandHandler.command(
20
- "xss",
21
- "Scan for XSS vulnerabilities",
22
- {
23
- url: {
24
- describe: "URL to scan",
25
- demandOption: true,
26
- type: "string",
27
- coerce: (value) => {
28
- try {
29
- new URL(value);
30
- return value;
31
- } catch (err) {
32
- throw new Error("Invalid URL format");
33
- }
34
- },
35
- },
36
- wordlist: {
37
- describe: "Path to wordlist file",
38
- type: "string",
39
- },
40
- },
41
- (argv) => {
42
- console.log("Scanning for XSS vulnerabilities...");
43
- console.log(`URL: ${argv.url}`);
44
- console.log(`Wordlist: ${argv.wordlist || "Default"}`);
45
- },
46
- );
47
-
48
- // Command to Spider a website
49
- commandHandler.command(
50
- "spider",
51
- "Scan a website for vulnerabilities",
52
- {
53
- url: {
54
- describe: "URL to scan",
55
- demandOption: true,
56
- type: "string",
57
- coerce: (value) => {
58
- try {
59
- new URL(value);
60
- return value;
61
- } catch (err) {
62
- throw new Error("Invalid URL format");
63
- }
64
- },
65
- },
66
- },
67
- (argv) => {
68
- const spider = new SpiderScanner(argv.url);
69
-
70
- spider.crawl().then((output) => {
71
- console.log(
72
- JSON.stringify(
73
- {
74
- forms: output.forms,
75
- links: output.links,
76
- },
77
- null,
78
- 2,
79
- ),
80
- );
81
- });
82
- },
83
- );
84
-
85
- // Parse arguments and handle commands
86
- commandHandler.parse();
3
+ export { SpiderScanner, type SpiderScannerOptions };
@@ -1,31 +1,43 @@
1
1
  import fetch from "isomorphic-fetch";
2
2
  import jsdom from "jsdom";
3
3
  import UserAgent from "user-agents";
4
- import Logger from "../../lib/logger";
4
+ import { createLogger } from "../../utils";
5
5
 
6
- export type FormOutput = {
7
- id: number;
8
- url: string;
9
- fields: Array<{ name: string; id: string; class: string; type: string }>;
10
- };
11
-
12
- export type CrawlOutput = {
13
- links: string[];
14
- forms: FormOutput[];
15
- };
6
+ export interface SpiderScannerOptions {
7
+ depth?: number;
8
+ concurrency?: number;
9
+ retries?: number;
10
+ timeout?: number;
11
+ }
16
12
 
17
13
  export default class SpiderScanner {
18
14
  private header: Record<string, string> = {
19
15
  "User-Agent": new UserAgent().toString(),
20
16
  };
21
17
  private url: URL;
22
- private logger = new Logger("Spider");
18
+ private logger = createLogger("SpiderScanner");
19
+
20
+ private depth: number;
21
+ private concurrency: number;
22
+ private retries: number;
23
+ private timeout: number;
24
+
25
+ constructor(url: string, options: SpiderScannerOptions = {}) {
26
+ const {
27
+ depth = 250,
28
+ concurrency = 5,
29
+ retries = 3,
30
+ timeout = 5000,
31
+ } = options;
32
+ this.depth = depth;
33
+ this.concurrency = concurrency;
34
+ this.retries = retries;
35
+ this.timeout = timeout;
23
36
 
24
- constructor(url: string) {
25
37
  try {
26
38
  this.url = new URL(url);
27
39
  this.logger.info(
28
- `Initialized with URL: ${url} & User-Agent: ${this.header["User-Agent"]}`,
40
+ `Initialized with URL: ${url}, User-Agent: ${this.header["User-Agent"]}`,
29
41
  );
30
42
  } catch (error) {
31
43
  if (error instanceof TypeError) {
@@ -37,7 +49,6 @@ export default class SpiderScanner {
37
49
  }
38
50
  }
39
51
 
40
- // Normalize domains (removes 'www.')
41
52
  private normalizeDomain(domain: string): string {
42
53
  return domain.startsWith("www.") ? domain.slice(4) : domain;
43
54
  }
@@ -61,20 +72,42 @@ export default class SpiderScanner {
61
72
  }
62
73
  }
63
74
 
64
- private async fetchUrl(url: string): Promise<string | null> {
65
- try {
66
- this.logger.debug(`Fetching URL: ${url}`);
67
- const response = await fetch(url, { headers: this.header });
68
- if (!response.ok) {
75
+ private async fetchWithRetries(
76
+ url: string,
77
+ retries: number,
78
+ ): Promise<string | null> {
79
+ for (let attempt = 1; attempt <= retries; attempt++) {
80
+ const controller = new AbortController();
81
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
82
+
83
+ try {
84
+ this.logger.debug(`Fetching URL (Attempt ${attempt}): ${url}`);
85
+ const randomUserAgent = new UserAgent().toString();
86
+ this.logger.info(`Changing User-Agent to: ${randomUserAgent}`);
87
+ this.header["User-Agent"] = randomUserAgent;
88
+ const response = await fetch(url, {
89
+ headers: this.header,
90
+ signal: controller.signal,
91
+ redirect: "follow",
92
+ });
93
+
94
+ clearTimeout(timeoutId);
95
+
96
+ if (response.ok) {
97
+ this.logger.info(`Successfully fetched URL: ${url}`);
98
+ return await response.text();
99
+ }
100
+
69
101
  this.logger.warn(`Failed to fetch URL (${response.status}): ${url}`);
70
- return null;
102
+ } catch (error) {
103
+ if ((error as Error).name === "AbortError") {
104
+ this.logger.warn(`Fetch timed out: ${url}`);
105
+ } else {
106
+ this.logger.error(`Error fetching URL: ${url} - ${error}`);
107
+ }
71
108
  }
72
- this.logger.info(`Successfully fetched URL: ${url}`);
73
- return await response.text();
74
- } catch (error) {
75
- this.logger.error(`Error fetching URL: ${url} - ${error}`);
76
- return null;
77
109
  }
110
+ return null;
78
111
  }
79
112
 
80
113
  private extractLinks(html: string): string[] {
@@ -89,36 +122,40 @@ export default class SpiderScanner {
89
122
  return internalLinks.map((link) => this.convertRelativeUrlToAbsolute(link));
90
123
  }
91
124
 
92
- private extractForms(html: string): FormOutput[] {
93
- const { JSDOM } = jsdom;
94
- const dom = new JSDOM(html);
95
- const forms = Array.from(dom.window.document.querySelectorAll("form"));
96
- this.logger.debug(`Extracted ${forms.length} forms from HTML content`);
97
-
98
- return forms.map((form, index) => {
99
- const fields = Array.from(form.querySelectorAll("input")).map(
100
- (input) => ({
101
- name: input.name,
102
- id: input.id,
103
- class: input.className,
104
- type: input.type,
105
- }),
106
- );
107
-
108
- return {
109
- id: index,
110
- url: this.url.href,
111
- fields,
112
- };
113
- });
114
- }
115
-
116
- // Main function to scan the website with concurrency support and return both links and forms
117
- public async crawl(depth = 250, concurrency = 5): Promise<CrawlOutput> {
125
+ public async crawl(): Promise<Array<string>> {
118
126
  const visited = new Set<string>();
119
127
  const queue = new Set<string>([this.url.href]);
120
128
  const resultLinks = new Set<string>();
121
- const resultForms = new Set<FormOutput>();
129
+
130
+ // Assets to ignore
131
+ const assetExtensions = [
132
+ ".css",
133
+ ".js",
134
+ ".png",
135
+ ".jpg",
136
+ ".jpeg",
137
+ ".gif",
138
+ ".svg",
139
+ ".ico",
140
+ ".webp",
141
+ ".mp4",
142
+ ".mp3",
143
+ ".wav",
144
+ ".avi",
145
+ ".mov",
146
+ ".webm",
147
+ ".pdf",
148
+ ".doc",
149
+ ".docx",
150
+ ".xls",
151
+ ".xlsx",
152
+ ".ppt",
153
+ ".pptx",
154
+ ".zip",
155
+ ".rar",
156
+ ".tar",
157
+ ".gz",
158
+ ];
122
159
 
123
160
  const fetchAndExtract = async (currentUrl: string) => {
124
161
  if (visited.has(currentUrl)) {
@@ -128,19 +165,22 @@ export default class SpiderScanner {
128
165
  visited.add(currentUrl);
129
166
  this.logger.info(`Visiting URL: ${currentUrl}`);
130
167
 
131
- const html = await this.fetchUrl(currentUrl);
168
+ const html = await this.fetchWithRetries(currentUrl, this.retries);
132
169
  if (!html) return;
133
170
 
134
- // Extract links and forms
135
171
  const links = this.extractLinks(html);
136
- const forms = this.extractForms(html);
137
172
 
138
- for (const form of forms) {
139
- resultForms.add(form);
173
+ // Filter out asset links
174
+ for (const link of links) {
175
+ if (assetExtensions.some((ext) => link.endsWith(ext))) {
176
+ this.logger.debug(`Ignoring asset link: ${link}`);
177
+ continue;
178
+ }
179
+ this.logger.debug(`Found link: ${link}`);
140
180
  }
141
181
 
142
182
  for (const link of links) {
143
- if (!visited.has(link) && queue.size < depth) {
183
+ if (!visited.has(link) && queue.size < this.depth) {
144
184
  queue.add(link);
145
185
  this.logger.debug(`Added to queue: ${link}`);
146
186
  }
@@ -149,7 +189,7 @@ export default class SpiderScanner {
149
189
  };
150
190
 
151
191
  const processBatch = async () => {
152
- const batch = Array.from(queue).slice(0, concurrency);
192
+ const batch = Array.from(queue).slice(0, this.concurrency);
153
193
  for (const url of batch) {
154
194
  queue.delete(url);
155
195
  }
@@ -157,19 +197,16 @@ export default class SpiderScanner {
157
197
  };
158
198
 
159
199
  this.logger.info(
160
- `Starting crawl with depth: ${depth}, concurrency: ${concurrency}`,
200
+ `Starting crawl with depth: ${this.depth}, concurrency: ${this.concurrency}`,
161
201
  );
162
- while (queue.size > 0 && visited.size < depth) {
202
+ while (queue.size > 0 && visited.size < this.depth) {
163
203
  await processBatch();
164
204
  }
165
205
 
166
206
  this.logger.info(
167
- `Crawling completed. Total pages visited: ${resultLinks.size}, Total forms found: ${resultForms.size}`,
207
+ `Crawling completed. Total pages visited: ${resultLinks.size}`,
168
208
  );
169
209
 
170
- return {
171
- links: Array.from(resultLinks),
172
- forms: Array.from(resultForms),
173
- };
210
+ return Array.from(resultLinks);
174
211
  }
175
212
  }
@@ -0,0 +1,29 @@
1
+ import winston from "winston";
2
+
3
+ export const createLogger = (label: string) =>
4
+ winston.createLogger({
5
+ levels: {
6
+ error: 0,
7
+ warn: 1,
8
+ info: 2,
9
+ http: 3,
10
+ verbose: 4,
11
+ debug: 5,
12
+ silly: 6,
13
+ },
14
+ format: winston.format.combine(
15
+ winston.format.label({ label }),
16
+ winston.format.colorize(),
17
+ winston.format.timestamp({
18
+ format: () => {
19
+ return new Date().toLocaleString("en-US");
20
+ },
21
+ }),
22
+ winston.format.align(),
23
+ winston.format.printf(
24
+ (info) =>
25
+ `\x1b[34m(${info.label})\x1b[0m \x1b[33m${info.timestamp}\x1b[0m [${info.level}]: ${info.message}`,
26
+ ),
27
+ ),
28
+ transports: [new winston.transports.Console()],
29
+ });
package/tsconfig.json CHANGED
@@ -2,9 +2,10 @@
2
2
  "include": ["./src/**/*.ts"],
3
3
  "compilerOptions": {
4
4
  "lib": ["es2023"],
5
- "module": "ESNext",
5
+ "module": "CommonJS",
6
6
  "target": "es2022",
7
7
  "moduleResolution": "node",
8
+ "allowSyntheticDefaultImports": true,
8
9
 
9
10
  "rootDir": "./src",
10
11
  "outDir": "build",
@@ -17,13 +18,7 @@
17
18
  "forceConsistentCasingInFileNames": true,
18
19
  "declaration": true,
19
20
  "resolveJsonModule": true,
20
- "emitDeclarationOnly": true,
21
-
22
- // These two options can help resolve ESM issues
23
- "allowSyntheticDefaultImports": true,
24
- "isolatedModules": true,
25
-
26
- // Ensure TypeScript recognizes .ts file extensions in ESM
27
- "allowImportingTsExtensions": true
28
- }
21
+ "allowImportingTsExtensions": false
22
+ },
23
+ "exclude": ["src/__tests__/**/*"]
29
24
  }
@@ -1 +0,0 @@
1
- import { describe, it, test } from "node:test";
package/src/lib/logger.ts DELETED
@@ -1,43 +0,0 @@
1
- export default class Logger {
2
- private moduleName: string;
3
- private colors = {
4
- error: "\x1b[31m",
5
- info: "\x1b[32m",
6
- warn: "\x1b[33m",
7
- debug: "\x1b[35m",
8
- reset: "\x1b[0m",
9
- module: "\x1b[46m",
10
- };
11
-
12
- constructor(moduleName: string) {
13
- this.moduleName = moduleName;
14
- }
15
-
16
- private formatMessage(
17
- level: keyof typeof this.colors,
18
- ...message: string[]
19
- ): string {
20
- const timestamp = new Date().toTimeString().split(" ")[0];
21
- return `[${level}] ${this.colors[level]}${this.colors.reset}${this.colors[level]}[${timestamp}]${this.colors.reset} ${this.colors.module}[${this.moduleName}]${this.colors.reset} ${this.colors[level]}${message}${this.colors.reset}`;
22
- }
23
-
24
- public error(...message: string[]): void {
25
- console.error(this.formatMessage("error", ...message));
26
- }
27
-
28
- public info(...message: string[]): void {
29
- console.info(this.formatMessage("info", ...message));
30
- }
31
-
32
- public warn(...message: string[]): void {
33
- console.warn(this.formatMessage("warn", ...message));
34
- }
35
-
36
- public log(...message: string[]): void {
37
- console.log(this.formatMessage("info", ...message));
38
- }
39
-
40
- public debug(...message: string[]): void {
41
- console.debug(this.formatMessage("debug", ...message));
42
- }
43
- }
@@ -1,3 +0,0 @@
1
- import SpiderScanner from "./spider";
2
-
3
- export { SpiderScanner };