sentinel-scanner 2.4.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/.cspell.json +19 -51
  2. package/.github/ISSUE_TEMPLATE/config.yml +1 -1
  3. package/.github/PULL_REQUEST_TEMPLATE.md +2 -2
  4. package/.github/workflows/stale.yaml +20 -0
  5. package/.github/workflows/webapp-scanner.yml +31 -19
  6. package/.github/workflows/welcome.yaml +9 -55
  7. package/.husky/pre-commit +35 -0
  8. package/.vscode/extensions.json +7 -0
  9. package/.vscode/launch.json +20 -0
  10. package/.vscode/settings.json +32 -0
  11. package/.vscode/tasks.json +24 -0
  12. package/CHANGELOG.md +7 -3
  13. package/CODE_OF_CONDUCT.md +4 -1
  14. package/CONTRIBUTING.md +2 -2
  15. package/README.md +5 -0
  16. package/api-extractor.json +30 -30
  17. package/biome.json +6 -32
  18. package/build/index.d.ts +0 -147
  19. package/build/index.js +111 -2633
  20. package/package.json +69 -102
  21. package/scripts/build.ts +68 -78
  22. package/scripts/test.ts +55 -0
  23. package/src/__tests__/spider.test.ts +44 -0
  24. package/src/commands/spider.ts +61 -126
  25. package/src/index.ts +23 -26
  26. package/src/spider/index.ts +345 -0
  27. package/src/spider/types/index.ts +21 -0
  28. package/src/spider/types/schema.ts +54 -0
  29. package/src/utils/index.ts +199 -3
  30. package/tsconfig.json +19 -18
  31. package/.github/assets/header.png +0 -0
  32. package/.github/dependabot.yml +0 -11
  33. package/.github/workflows/pr.yaml +0 -64
  34. package/.nsprc +0 -3
  35. package/build/bin.js +0 -2679
  36. package/build/xhr-sync-worker.js +0 -59
  37. package/docs/CNAME +0 -1
  38. package/docs/disclaimer.md +0 -68
  39. package/docs/headers/details.md +0 -114
  40. package/docs/headers/index.md +0 -73
  41. package/docs/index.md +0 -82
  42. package/docs/ports/index.md +0 -86
  43. package/docs/scoring.md +0 -91
  44. package/docs/spider/index.md +0 -61
  45. package/docs/sql-injection/details.md +0 -109
  46. package/docs/sql-injection/index.md +0 -73
  47. package/docs/xss/details.md +0 -92
  48. package/docs/xss/index.md +0 -73
  49. package/scripts/extras/document-shim.js +0 -4
  50. package/src/bin.ts +0 -29
  51. package/src/commands/header.ts +0 -150
  52. package/src/commands/ports.ts +0 -175
  53. package/src/commands/sqli.ts +0 -150
  54. package/src/commands/xss.ts +0 -149
  55. package/src/modules/headers/headers.ts +0 -161
  56. package/src/modules/headers/index.ts +0 -179
  57. package/src/modules/ports/index.ts +0 -311
  58. package/src/modules/spider/index.ts +0 -178
  59. package/src/modules/sqli/index.ts +0 -486
  60. package/src/modules/sqli/payloads.json +0 -156
  61. package/src/modules/xss/index.ts +0 -401
  62. package/src/modules/xss/payloads.json +0 -2692
  63. package/src/utils/types.ts +0 -7
@@ -1,17 +1,9 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
3
  import type { ArgumentsCamelCase, CommandModule } from "yargs";
4
- import SpiderScanner from "../modules/spider/index.js";
5
- import { createLogger } from "../utils/index.js";
6
-
7
- export type SpiderScannerCLIOptions = {
8
- url: string;
9
- depth?: number;
10
- output?: string;
11
- concurrency?: number;
12
- timeout?: number;
13
- retries?: number;
14
- };
4
+ import { Spider } from "../spider/index.ts";
5
+ import type { SpiderConstructorOptions } from "../spider/types/index.ts";
6
+ import { createLogger } from "../utils/index.ts";
15
7
 
16
8
  const cliLogger = createLogger("CLI");
17
9
 
@@ -21,145 +13,88 @@ export const spiderCommand: CommandModule = {
21
13
  "Crawl a website and get an array of URLs which are internal to the website",
22
14
  builder: (yargs) => {
23
15
  return yargs
24
- .option("url", {
25
- alias: "u",
16
+ .option("seed", {
17
+ alias: "s",
18
+ describe: "The seed URL to start crawling",
26
19
  type: "string",
27
- description: "The URL of the website to scan",
28
20
  demandOption: true,
29
- coerce: (url) => {
21
+ coerce: (arg) => {
30
22
  try {
31
- new URL(url);
23
+ new URL(arg);
32
24
 
33
- return url;
25
+ return arg;
34
26
  } catch (error) {
35
- throw new Error(`Invalid URL: ${url}`);
27
+ cliLogger.error(error instanceof Error ? error.message : error);
28
+ process.exit(1);
36
29
  }
37
30
  },
38
31
  })
39
- .option("depth", {
32
+ .option("maxDepth", {
40
33
  alias: "d",
34
+ describe: "The maximum depth to crawl",
41
35
  type: "number",
42
- description: "The maximum depth to crawl",
43
36
  default: 250,
44
- coerce: (depth) => {
45
- if (depth < 0) {
46
- throw new Error("Depth must be a positive number");
47
- }
48
-
49
- if (depth > 250) {
50
- throw new Error("Depth must be less than 250");
51
- }
52
-
53
- return depth;
54
- },
55
37
  })
56
- .option("output", {
57
- alias: "o",
58
- type: "string",
59
- description:
60
- "The output file to write the results to. Must be a JSON file",
61
- coerce: (output) => {
62
- try {
63
- // Should throw an error if the path is invalid
64
- // Should Be A JSON File
65
- const resolvedPath = path.resolve(output);
66
- const parsedPath = path.parse(resolvedPath);
67
-
68
- if (parsedPath.ext !== ".json") {
69
- throw new Error("Output file must be a JSON file");
70
- }
71
-
72
- if (fs.existsSync(resolvedPath)) {
73
- throw new Error("Output file already exists");
74
- }
75
-
76
- return resolvedPath;
77
- } catch (error) {
78
- throw new Error(`Invalid output file: ${output}`);
79
- }
80
- },
81
- default: getDefaultFilePath(),
38
+ .option("maxRetries", {
39
+ alias: "r",
40
+ describe: "The maximum retries for a failed request",
41
+ type: "number",
42
+ default: 3,
82
43
  })
83
44
  .option("concurrency", {
84
45
  alias: "c",
46
+ describe: "The number of concurrent requests",
85
47
  type: "number",
86
- description: "The number of concurrent requests to make",
87
- default: 10,
88
- coerce: (concurrency) => {
89
- if (concurrency < 1) {
90
- throw new Error("Concurrency must be a positive number");
91
- }
92
-
93
- if (concurrency > 20) {
94
- throw new Error("Concurrency must be less than 20");
95
- }
96
-
97
- return concurrency;
98
- },
48
+ default: 30,
49
+ })
50
+ .option("ignoreExternalLinks", {
51
+ alias: "i",
52
+ describe: "Ignore external links",
53
+ type: "boolean",
54
+ default: true,
99
55
  })
100
56
  .option("timeout", {
101
57
  alias: "t",
58
+ describe: "Request timeout in milliseconds",
102
59
  type: "number",
103
- description: "The timeout for each request in milliseconds",
104
- default: 5000,
105
- coerce: (timeout) => {
106
- if (timeout < 0) {
107
- throw new Error("Timeout must be a positive number");
108
- }
109
-
110
- if (timeout > 25_000) {
111
- throw new Error("Timeout must be less than 25,000");
112
- }
113
-
114
- return timeout;
115
- },
60
+ default: 8000,
116
61
  })
117
- .option("retries", {
118
- alias: "r",
119
- type: "number",
120
- description: "The number of retries for each request",
121
- default: 3,
122
- coerce: (retries) => {
123
- if (retries < 0) {
124
- throw new Error("Retries must be a positive number");
125
- }
126
-
127
- if (retries > 10) {
128
- throw new Error("Retries must be less than 10");
129
- }
130
-
131
- return retries;
132
- },
62
+ .option("output", {
63
+ alias: "o",
64
+ describe: "Output file path",
65
+ type: "string",
66
+ default: getDefaultFilePath(),
133
67
  });
134
68
  },
135
- handler: async (args) => {
69
+ handler: async (yargs) => {
70
+ const args = yargs as ArgumentsCamelCase<{
71
+ seed: string;
72
+ maxDepth: number;
73
+ maxRetries: number;
74
+ concurrency: number;
75
+ ignoreExternalLinks: boolean;
76
+ timeout: number;
77
+ output: string;
78
+ }>;
79
+ const opts: SpiderConstructorOptions = {
80
+ seed: args.seed,
81
+ maxDepth: args.maxDepth || 250,
82
+ maxRetries: args.maxRetries || 3,
83
+ concurrency: args.concurrency || 30,
84
+ ignoreExternalLinks:
85
+ args.ignoreExternalLinks === undefined
86
+ ? true
87
+ : args.ignoreExternalLinks,
88
+ timeout: args.timeout || 8000,
89
+ };
90
+
91
+ const scanner = new Spider(opts);
136
92
  try {
137
- const argData = args as ArgumentsCamelCase<SpiderScannerCLIOptions>;
138
-
139
- const scanner = new SpiderScanner(argData.url, {
140
- depth: argData.depth ?? 250,
141
- concurrency: argData.concurrency ?? 10,
142
- timeout: argData.timeout ?? 5000,
143
- retries: argData.retries ?? 3,
144
- });
145
-
146
- cliLogger.info("Starting to crawl website");
147
-
148
- const results = await scanner.crawl();
149
-
150
- if (argData.output) {
151
- fs.writeFileSync(argData.output, JSON.stringify(results, null, 2));
152
- cliLogger.info(`Results written to ${argData.output}`);
153
- } else {
154
- const resolvedPath = getDefaultFilePath();
155
- fs.writeFileSync(resolvedPath, JSON.stringify(results, null, 2));
156
- cliLogger.info(`Results written to ${resolvedPath}`);
157
- }
93
+ const results = await scanner.scan();
94
+ fs.writeFileSync(args.output, JSON.stringify(results, null, 2));
95
+ cliLogger.info(`Results saved to ${args.output}`);
158
96
  } catch (error) {
159
- if (error instanceof Error) {
160
- cliLogger.error(error.message);
161
- }
162
- cliLogger.error("Failed to run spider command");
97
+ cliLogger.error(error instanceof Error ? error.message : error);
163
98
  process.exit(1);
164
99
  }
165
100
  },
@@ -187,7 +122,7 @@ const getDefaultFilePath = () => {
187
122
  }
188
123
 
189
124
  return resolvedPath;
190
- } catch (error) {
125
+ } catch (_) {
191
126
  throw new Error("Invalid output file");
192
127
  }
193
128
  };
package/src/index.ts CHANGED
@@ -1,27 +1,24 @@
1
- import HeaderScanner, {
2
- type HeadersData,
3
- type HeaderScannerOptions,
4
- } from "./modules/headers/index.js";
5
- import PortsScanner, { type PortsScannerOpts } from "./modules/ports/index.js";
6
- import SpiderScanner, {
7
- type SpiderScannerOptions,
8
- } from "./modules/spider/index.js";
9
- import SqliScanner, {
10
- type SqliConstructorOpts,
11
- type SQLErrors,
12
- type SupportedDatabases,
13
- } from "./modules/sqli/index.js";
14
- import XSSScanner, { type XSSConstructorOpts } from "./modules/xss/index.js";
15
- import { Vulnerability } from "./utils/types.js";
1
+ #!/usr/bin/env node --no-warnings
16
2
 
17
- export { SpiderScanner, type SpiderScannerOptions };
18
- export { XSSScanner, type XSSConstructorOpts };
19
- export { Vulnerability };
20
- export { HeaderScanner, type HeadersData, type HeaderScannerOptions };
21
- export {
22
- SqliScanner,
23
- type SqliConstructorOpts,
24
- type SQLErrors,
25
- type SupportedDatabases,
26
- };
27
- export { PortsScanner, type PortsScannerOpts };
3
+ import yargs from "yargs";
4
+ import { hideBin } from "yargs/helpers";
5
+ import { spiderCommand } from "./commands/spider.ts";
6
+ import { getPackageData } from "./utils/index.ts";
7
+
8
+ const { name, version } = getPackageData();
9
+
10
+ const commandHandler = yargs(hideBin(process.argv));
11
+
12
+ commandHandler.demandCommand();
13
+ commandHandler.version(version);
14
+ commandHandler.scriptName(name);
15
+ commandHandler.usage("Usage: $0 <command> [options]");
16
+ commandHandler.help().alias("help", "h");
17
+ commandHandler.version().alias("version", "v");
18
+ commandHandler.strict();
19
+ commandHandler.showHelpOnFail(true);
20
+
21
+ commandHandler.command(spiderCommand);
22
+
23
+ commandHandler.version().alias("version", "v");
24
+ commandHandler.parse();
@@ -0,0 +1,345 @@
1
+ import { parse } from "node-html-parser";
2
+ import {
3
+ chunkArray,
4
+ createLogger,
5
+ safeStringify,
6
+ withRetries,
7
+ } from "../utils/index.ts";
8
+ import type { SpiderConstructorOptions, SpiderResults } from "./types/index.ts";
9
+ import { SpiderConstructorOptionsSchema } from "./types/schema.ts";
10
+
11
+ /**
12
+ * The Spider class is used to scan a web application by crawling through the URLs and extracting information.
13
+ * The Spider class uses a breadth-first search algorithm to crawl through the URLs.
14
+ */
15
+ export class Spider {
16
+ /**
17
+ * The logger instance for the Spider class.
18
+ * We use this to log messages to the console.
19
+ */
20
+ private logger = createLogger("Spider");
21
+ /**
22
+ * The options provided to the Spider constructor.
23
+ * These options are used to configure the behavior of the Spider.
24
+ *
25
+ * @see SpiderConstructorOptionsSchema
26
+ */
27
+ private options: SpiderConstructorOptions;
28
+
29
+ constructor(opts: SpiderConstructorOptions) {
30
+ /**
31
+ * Validate the options provided to the Spider constructor.
32
+ */
33
+ const result = SpiderConstructorOptionsSchema.safeParse(opts);
34
+
35
+ if (result.error !== undefined || !result.data) {
36
+ /**
37
+ * If the options are invalid, we should throw an error and exit the process.
38
+ */
39
+ this.logger.error("Invalid options provided to the Spider constructor.");
40
+ throw new Error(
41
+ `Invalid options provided to the Spider constructor: ${safeStringify(
42
+ result.error,
43
+ )}`,
44
+ );
45
+ }
46
+
47
+ /**
48
+ * If the options are valid, we can proceed with the initialization of the Spider.
49
+ */
50
+ this.options = SpiderConstructorOptionsSchema.parse(opts);
51
+
52
+ /**
53
+ * Log the options provided to the Spider constructor.
54
+ */
55
+ this.logger.info(
56
+ `Spider created with options: ${safeStringify(this.options)}`,
57
+ );
58
+ }
59
+
60
+ private isInternalUrl(url: string): boolean {
61
+ /**
62
+ * Check if the URL starts with the seed URL.
63
+ * If it does, then it is an internal URL.
64
+ * Otherwise, it is an external URL.
65
+ */
66
+ return new URL(url).origin === new URL(this.options.seed).origin;
67
+ }
68
+
69
+ /**
70
+ * Fetches the page at the given URL.
71
+ * @param url - The URL of the page to fetch.
72
+ * @returns A promise that resolves to the fetched page content as a string.
73
+ */
74
+ private async fetchPage(url: string): Promise<string | null> {
75
+ const fetchUrl = (url: string) => {
76
+ this.logger.info(`Fetching URL: ${url}`);
77
+ /**
78
+ * We return a promise that resolves when the first of the following promises resolves.
79
+ * This allows us to handle cases where the request takes too long to complete.
80
+ */
81
+ return Promise.race([
82
+ /**
83
+ * We use the `fetch` API to fetch the page at the given URL.
84
+ */
85
+ fetch(url, {
86
+ /**
87
+ * We set the `redirect` option to "follow" to follow redirects.
88
+ *
89
+ * @see https://developer.mozilla.org/en-US/docs/Web/API/WindowOrWorkerGlobalScope/fetch#parameters
90
+ */
91
+ redirect: "follow",
92
+ })
93
+ /**
94
+ * We extract the text content of the response.
95
+ * This will be the HTML content of the page.
96
+ */
97
+ .then((res) => res.text()),
98
+ /**
99
+ * We create a promise that resolves to null after the specified timeout.
100
+ * This handles cases where the request takes too long to complete.
101
+ */
102
+ new Promise<string | null>((resolve) =>
103
+ setTimeout(() => resolve(null), this.options.timeout),
104
+ ),
105
+ ]);
106
+ };
107
+
108
+ /**
109
+ * Fetch the page at the given URL.
110
+ * We use the `withRetries` utility function to retry the fetch operation
111
+ * in case of a failure.
112
+ */
113
+ return await withRetries(fetchUrl, [url], this.options.maxRetries);
114
+ }
115
+
116
+ private normalizeUrl(baseUrl: string, href: string): string | null {
117
+ try {
118
+ if (href.startsWith("http://") || href.startsWith("https://")) {
119
+ return new URL(href).toString();
120
+ }
121
+
122
+ if (href.startsWith("/")) {
123
+ return new URL(href, baseUrl).toString();
124
+ }
125
+
126
+ const url = new URL(href, baseUrl);
127
+
128
+ return url.toString();
129
+ } catch (error) {
130
+ /**
131
+ * If an error occurs while normalizing the URL, log the error and return null.
132
+ */
133
+ this.logger.error(`Error normalizing URL: ${href}`);
134
+ this.logger.error(error);
135
+ return null;
136
+ }
137
+ }
138
+
139
+ /**
140
+ * Extracts URLs from the given HTML content using a URL regex and a base URL.
141
+ *
142
+ * @param html - The HTML content from which to extract URLs.
143
+ * @param baseUrl - The base URL used to normalize the extracted URLs.
144
+ * @returns An array of extracted URLs.
145
+ */
146
+ private extractUrls(html: string, baseUrl: string) {
147
+ const extracted = new Set<string>();
148
+
149
+ /**
150
+ * Parse the HTML content using the `parse` function from the `node-html-parser` package.
151
+ */
152
+ const root = parse(html);
153
+
154
+ /**
155
+ * Find all the anchor elements in the HTML content.
156
+ */
157
+ const anchors = root
158
+ .querySelectorAll("a")
159
+ .concat(root.querySelectorAll("link"))
160
+ .concat(root.querySelectorAll("area"))
161
+ .concat(root.querySelectorAll("base"));
162
+
163
+ /**
164
+ * Iterate over the anchor elements.
165
+ */
166
+ for (const anchor of anchors) {
167
+ /**
168
+ * Extract the `href` attribute from the anchor element.
169
+ */
170
+ const href = anchor.getAttribute("href");
171
+
172
+ /**
173
+ * If the `href` attribute is not present, skip to the next anchor element.
174
+ */
175
+ if (!href) {
176
+ continue;
177
+ }
178
+
179
+ /**
180
+ * Normalize the extracted URL using the base URL.
181
+ */
182
+ const normalized = this.normalizeUrl(baseUrl, href);
183
+
184
+ if (normalized) {
185
+ if (
186
+ this.options.ignoreExternalLinks &&
187
+ !this.isInternalUrl(normalized)
188
+ ) {
189
+ this.logger.info(`Ignoring external URL: ${normalized}`);
190
+ continue;
191
+ }
192
+
193
+ extracted.add(normalized);
194
+ }
195
+ }
196
+
197
+ /**
198
+ * Return the array of extracted URLs.
199
+ */
200
+ return Array.from(extracted);
201
+ }
202
+
203
+ /**
204
+ * Scans the web application by crawling through the URLs and extracting information.
205
+ * Returns the spider results containing the seed URL and the visited URLs.
206
+ *
207
+ * @returns A promise that resolves to the spider results.
208
+ * @see SpiderResults
209
+ */
210
+ public async scan(): Promise<SpiderResults> {
211
+ this.logger.info("Starting scan...");
212
+ /**
213
+ * Create a set to keep track of visited URLs.
214
+ * This set will be used to avoid visiting the same URL multiple times.
215
+ * Initially, the set is empty.
216
+ */
217
+ const visited = new Set<string>();
218
+ /**
219
+ * Create a queue of URLs to visit.
220
+ * Initially, the queue contains only the seed URL.
221
+ */
222
+ const queue = new Set<string>([this.options.seed]);
223
+
224
+ /**
225
+ * Process a URL.
226
+ * This function fetches the content of the URL, extracts URLs from the content, and adds the extracted URLs to the queue.
227
+ * It also adds the current URL to the set of visited URLs.
228
+ *
229
+ * @param url - The URL to process.
230
+ * @returns A promise that resolves to an array of extracted URLs.
231
+ */
232
+ const processUrl = async (url: string) => {
233
+ this.logger.info(`Processing URL: ${url}`);
234
+ /**
235
+ * Fetch the page at the given URL.
236
+ */
237
+ const pageContent = await this.fetchPage(url);
238
+
239
+ /**
240
+ * Extract URLs from the fetched page content.
241
+ * and log the number of URLs extracted.
242
+ */
243
+ if (!pageContent) {
244
+ this.logger.warn(`Failed to fetch URL: ${url}`);
245
+ return [];
246
+ }
247
+
248
+ const extractedUrls = this.extractUrls(pageContent, url);
249
+ this.logger.info(`Extracted ${extractedUrls.length} URLs`);
250
+
251
+ /**
252
+ * Add the current URL to the set of visited URLs.
253
+ */
254
+ visited.add(url);
255
+
256
+ /**
257
+ * Return the extracted URLs.
258
+ */
259
+ return extractedUrls;
260
+ };
261
+
262
+ /**
263
+ * Process a batch of URLs.
264
+ * This function fetches the content of the URLs in the batch,
265
+ * extracts URLs from the content, and adds the extracted URLs to the queue.
266
+ * It also removes the processed URLs from the queue.
267
+ *
268
+ * @param batch - The batch of URLs to process.
269
+ * @returns A promise that resolves when the batch is processed.
270
+ */
271
+ const processBatch = async (batch: string[]) => {
272
+ /**
273
+ * Process the URLs in the current batch.
274
+ */
275
+ const promises = batch.map(processUrl);
276
+ /**
277
+ * Wait for all the promises to resolve.
278
+ */
279
+ const results = await Promise.all(promises);
280
+ /**
281
+ * Flatten the results to get a single array of URLs.
282
+ * Then log the number of URLs processed.
283
+ */
284
+ const urls = results.flat();
285
+ this.logger.info(`Processed ${batch.length} URLs`);
286
+
287
+ /**
288
+ * Add the extracted URLs to the queue.
289
+ */
290
+ for (const url of urls) {
291
+ this.logger.info(`Adding URL to queue: ${url}`);
292
+ if (!visited.has(url)) {
293
+ this.logger.info(`URL not visited: ${url}`);
294
+ queue.add(url);
295
+ visited.add(url);
296
+ }
297
+ }
298
+
299
+ /**
300
+ * Remove the processed URLs from the queue.
301
+ */
302
+ for (const url of batch) {
303
+ queue.delete(url);
304
+ }
305
+ };
306
+
307
+ /**
308
+ * Initialize the current depth to 0.
309
+ */
310
+ let currentDepth = 0;
311
+
312
+ while (queue.size > 0 && currentDepth < this.options.maxDepth) {
313
+ this.logger.info(`Processing depth: ${currentDepth}`);
314
+ /**
315
+ * Split the queue into batches of URLs.
316
+ */
317
+ const batches = chunkArray(Array.from(queue), this.options.concurrency);
318
+ /**
319
+ * Iterate over the batches of URLs.
320
+ */
321
+ for (const batch of batches) {
322
+ /**
323
+ * Process the current batch of URLs.
324
+ */
325
+ await withRetries(processBatch, [batch], this.options.maxRetries);
326
+ }
327
+
328
+ /**
329
+ * Increment the current depth.
330
+ */
331
+ currentDepth++;
332
+ this.logger.silly(`Processed depth: ${currentDepth}`);
333
+ }
334
+
335
+ /**
336
+ * Return The Spider Results
337
+ *
338
+ * @see SpiderResults
339
+ */
340
+ return {
341
+ seed: this.options.seed,
342
+ urls: Array.from(visited),
343
+ };
344
+ }
345
+ }
@@ -0,0 +1,21 @@
1
+ import type { z } from "zod";
2
+ import type {
3
+ SpiderConstructorOptionsSchema,
4
+ SpiderResultSchema,
5
+ } from "./schema.ts";
6
+
7
+ /**
8
+ * Represents the options for constructing a Spider object.
9
+ *
10
+ * @see SpiderConstructorOptionsSchema
11
+ */
12
+ export type SpiderConstructorOptions = z.infer<
13
+ typeof SpiderConstructorOptionsSchema
14
+ >;
15
+
16
+ /**
17
+ * Represents the result of a Spider object.
18
+ *
19
+ * @see SpiderResultSchema
20
+ */
21
+ export type SpiderResults = z.infer<typeof SpiderResultSchema>;
@@ -0,0 +1,54 @@
1
+ import { z } from "zod";
2
+
3
+ /**
4
+ * Options for constructing a Spider instance.
5
+ */
6
+ export const SpiderConstructorOptionsSchema = z
7
+ .object({
8
+ /**
9
+ * The seed URL for the spider to start crawling from.
10
+ */
11
+ seed: z.string().url(),
12
+
13
+ /**
14
+ * The maximum depth of crawling. Defaults to 250.
15
+ */
16
+ maxDepth: z.number().int().positive().max(250).default(250),
17
+
18
+ /**
19
+ * The concurrency level for crawling. Defaults to 10.
20
+ */
21
+ concurrency: z.number().int().positive().max(30).default(30),
22
+
23
+ /**
24
+ * Whether to ignore external links. Defaults to true.
25
+ */
26
+ ignoreExternalLinks: z.boolean().default(true),
27
+ /**
28
+ * The maximum number of retries for failed requests. Defaults to 3.
29
+ */
30
+ maxRetries: z.number().int().positive().max(10).default(3),
31
+
32
+ /**
33
+ * The timeout for requests in milliseconds. Defaults to 5000.
34
+ */
35
+ timeout: z.number().int().positive().max(60_000).default(5000),
36
+ })
37
+ /**
38
+ * Ensure that default values are applied when the options are not provided.
39
+ */
40
+ .strict();
41
+
42
+ /**
43
+ * Represents the result of a spider operation.
44
+ */
45
+ export const SpiderResultSchema = z.object({
46
+ /**
47
+ * The seed URL used for the spider operation.
48
+ */
49
+ seed: z.string(),
50
+ /**
51
+ * An array of URLs found during the spider operation.
52
+ */
53
+ urls: z.array(z.string()),
54
+ });