@crawlith/core 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +70 -0
  3. package/dist/analysis/analyze.d.ts +29 -8
  4. package/dist/analysis/analyze.js +325 -221
  5. package/dist/analysis/clustering.d.ts +23 -0
  6. package/dist/analysis/clustering.js +206 -0
  7. package/dist/analysis/content.d.ts +1 -1
  8. package/dist/analysis/content.js +11 -5
  9. package/dist/analysis/duplicate.d.ts +34 -0
  10. package/dist/analysis/duplicate.js +305 -0
  11. package/dist/analysis/heading.d.ts +116 -0
  12. package/dist/analysis/heading.js +356 -0
  13. package/dist/analysis/images.d.ts +1 -1
  14. package/dist/analysis/images.js +6 -5
  15. package/dist/analysis/links.d.ts +1 -1
  16. package/dist/analysis/links.js +8 -8
  17. package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
  18. package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
  19. package/dist/analysis/scoring.js +4 -1
  20. package/dist/analysis/seo.d.ts +8 -4
  21. package/dist/analysis/seo.js +41 -30
  22. package/dist/analysis/soft404.d.ts +17 -0
  23. package/dist/analysis/soft404.js +62 -0
  24. package/dist/analysis/structuredData.d.ts +1 -1
  25. package/dist/analysis/structuredData.js +5 -4
  26. package/dist/application/index.d.ts +2 -0
  27. package/dist/application/index.js +2 -0
  28. package/dist/application/usecase.d.ts +3 -0
  29. package/dist/application/usecase.js +1 -0
  30. package/dist/application/usecases.d.ts +114 -0
  31. package/dist/application/usecases.js +201 -0
  32. package/dist/audit/index.js +1 -1
  33. package/dist/audit/transport.d.ts +1 -1
  34. package/dist/audit/transport.js +5 -4
  35. package/dist/audit/types.d.ts +1 -0
  36. package/dist/constants.d.ts +17 -0
  37. package/dist/constants.js +23 -0
  38. package/dist/core/scope/scopeManager.js +3 -0
  39. package/dist/crawler/crawl.d.ts +2 -2
  40. package/dist/crawler/crawler.d.ts +17 -5
  41. package/dist/crawler/crawler.js +259 -94
  42. package/dist/crawler/fetcher.d.ts +1 -1
  43. package/dist/crawler/fetcher.js +6 -6
  44. package/dist/crawler/metricsRunner.d.ts +21 -1
  45. package/dist/crawler/metricsRunner.js +181 -60
  46. package/dist/crawler/normalize.d.ts +41 -0
  47. package/dist/crawler/normalize.js +119 -3
  48. package/dist/crawler/parser.d.ts +1 -3
  49. package/dist/crawler/parser.js +2 -49
  50. package/dist/crawler/resolver.d.ts +11 -0
  51. package/dist/crawler/resolver.js +67 -0
  52. package/dist/crawler/sitemap.d.ts +4 -1
  53. package/dist/crawler/sitemap.js +24 -18
  54. package/dist/crawler/trap.d.ts +5 -1
  55. package/dist/crawler/trap.js +23 -2
  56. package/dist/db/CrawlithDB.d.ts +110 -0
  57. package/dist/db/CrawlithDB.js +500 -0
  58. package/dist/db/graphLoader.js +15 -32
  59. package/dist/db/index.d.ts +9 -1
  60. package/dist/db/index.js +39 -31
  61. package/dist/db/migrations.d.ts +2 -0
  62. package/dist/db/{schema.js → migrations.js} +90 -43
  63. package/dist/db/pluginRegistry.d.ts +9 -0
  64. package/dist/db/pluginRegistry.js +19 -0
  65. package/dist/db/repositories/EdgeRepository.d.ts +5 -0
  66. package/dist/db/repositories/EdgeRepository.js +7 -0
  67. package/dist/db/repositories/MetricsRepository.d.ts +13 -8
  68. package/dist/db/repositories/MetricsRepository.js +14 -6
  69. package/dist/db/repositories/PageRepository.d.ts +5 -3
  70. package/dist/db/repositories/PageRepository.js +68 -17
  71. package/dist/db/repositories/SiteRepository.d.ts +6 -0
  72. package/dist/db/repositories/SiteRepository.js +4 -0
  73. package/dist/db/repositories/SnapshotRepository.d.ts +12 -5
  74. package/dist/db/repositories/SnapshotRepository.js +48 -10
  75. package/dist/db/reset.d.ts +9 -0
  76. package/dist/db/reset.js +32 -0
  77. package/dist/db/statements.d.ts +12 -0
  78. package/dist/db/statements.js +40 -0
  79. package/dist/diff/compare.d.ts +0 -5
  80. package/dist/diff/compare.js +0 -12
  81. package/dist/diff/service.d.ts +16 -0
  82. package/dist/diff/service.js +41 -0
  83. package/dist/domain/index.d.ts +4 -0
  84. package/dist/domain/index.js +4 -0
  85. package/dist/events.d.ts +8 -0
  86. package/dist/graph/graph.d.ts +20 -42
  87. package/dist/graph/graph.js +12 -16
  88. package/dist/graph/hits.d.ts +23 -0
  89. package/dist/graph/hits.js +111 -0
  90. package/dist/graph/metrics.d.ts +0 -4
  91. package/dist/graph/metrics.js +19 -15
  92. package/dist/graph/pagerank.d.ts +17 -4
  93. package/dist/graph/pagerank.js +126 -93
  94. package/dist/index.d.ts +27 -9
  95. package/dist/index.js +27 -9
  96. package/dist/lock/lockManager.d.ts +1 -0
  97. package/dist/lock/lockManager.js +15 -0
  98. package/dist/plugin-system/plugin-cli.d.ts +10 -0
  99. package/dist/plugin-system/plugin-cli.js +31 -0
  100. package/dist/plugin-system/plugin-config.d.ts +16 -0
  101. package/dist/plugin-system/plugin-config.js +36 -0
  102. package/dist/plugin-system/plugin-loader.d.ts +17 -0
  103. package/dist/plugin-system/plugin-loader.js +122 -0
  104. package/dist/plugin-system/plugin-registry.d.ts +25 -0
  105. package/dist/plugin-system/plugin-registry.js +167 -0
  106. package/dist/plugin-system/plugin-types.d.ts +205 -0
  107. package/dist/plugin-system/plugin-types.js +1 -0
  108. package/dist/ports/index.d.ts +9 -0
  109. package/dist/ports/index.js +1 -0
  110. package/dist/report/export.d.ts +3 -0
  111. package/dist/report/export.js +81 -0
  112. package/dist/report/insight.d.ts +27 -0
  113. package/dist/report/insight.js +103 -0
  114. package/dist/scoring/health.d.ts +17 -11
  115. package/dist/scoring/health.js +183 -140
  116. package/dist/utils/chalk.d.ts +6 -0
  117. package/dist/utils/chalk.js +41 -0
  118. package/dist/utils/secureConfig.d.ts +23 -0
  119. package/dist/utils/secureConfig.js +128 -0
  120. package/package.json +10 -4
  121. package/CHANGELOG.md +0 -13
  122. package/dist/db/schema.d.ts +0 -2
  123. package/dist/graph/cluster.d.ts +0 -6
  124. package/dist/graph/cluster.js +0 -221
  125. package/dist/graph/duplicate.d.ts +0 -10
  126. package/dist/graph/duplicate.js +0 -302
  127. package/dist/scoring/hits.d.ts +0 -10
  128. package/dist/scoring/hits.js +0 -131
  129. package/scripts/copy-assets.js +0 -37
  130. package/src/analysis/analysis_list.html +0 -35
  131. package/src/analysis/analysis_page.html +0 -123
  132. package/src/analysis/analyze.ts +0 -505
  133. package/src/analysis/content.ts +0 -62
  134. package/src/analysis/images.ts +0 -28
  135. package/src/analysis/links.ts +0 -41
  136. package/src/analysis/scoring.ts +0 -66
  137. package/src/analysis/seo.ts +0 -82
  138. package/src/analysis/structuredData.ts +0 -62
  139. package/src/analysis/templates.ts +0 -9
  140. package/src/audit/dns.ts +0 -49
  141. package/src/audit/headers.ts +0 -98
  142. package/src/audit/index.ts +0 -66
  143. package/src/audit/scoring.ts +0 -232
  144. package/src/audit/transport.ts +0 -258
  145. package/src/audit/types.ts +0 -102
  146. package/src/core/network/proxyAdapter.ts +0 -21
  147. package/src/core/network/rateLimiter.ts +0 -39
  148. package/src/core/network/redirectController.ts +0 -47
  149. package/src/core/network/responseLimiter.ts +0 -34
  150. package/src/core/network/retryPolicy.ts +0 -57
  151. package/src/core/scope/domainFilter.ts +0 -45
  152. package/src/core/scope/scopeManager.ts +0 -52
  153. package/src/core/scope/subdomainPolicy.ts +0 -39
  154. package/src/core/security/ipGuard.ts +0 -171
  155. package/src/crawler/crawl.ts +0 -9
  156. package/src/crawler/crawler.ts +0 -601
  157. package/src/crawler/extract.ts +0 -39
  158. package/src/crawler/fetcher.ts +0 -251
  159. package/src/crawler/metricsRunner.ts +0 -137
  160. package/src/crawler/normalize.ts +0 -108
  161. package/src/crawler/parser.ts +0 -190
  162. package/src/crawler/sitemap.ts +0 -76
  163. package/src/crawler/trap.ts +0 -96
  164. package/src/db/graphLoader.ts +0 -135
  165. package/src/db/index.ts +0 -75
  166. package/src/db/repositories/EdgeRepository.ts +0 -43
  167. package/src/db/repositories/MetricsRepository.ts +0 -63
  168. package/src/db/repositories/PageRepository.ts +0 -228
  169. package/src/db/repositories/SiteRepository.ts +0 -43
  170. package/src/db/repositories/SnapshotRepository.ts +0 -99
  171. package/src/db/schema.ts +0 -177
  172. package/src/diff/compare.ts +0 -84
  173. package/src/events.ts +0 -16
  174. package/src/graph/cluster.ts +0 -246
  175. package/src/graph/duplicate.ts +0 -350
  176. package/src/graph/graph.ts +0 -192
  177. package/src/graph/metrics.ts +0 -125
  178. package/src/graph/pagerank.ts +0 -126
  179. package/src/graph/simhash.ts +0 -76
  180. package/src/index.ts +0 -33
  181. package/src/lock/hashKey.ts +0 -51
  182. package/src/lock/lockManager.ts +0 -132
  183. package/src/lock/pidCheck.ts +0 -13
  184. package/src/report/crawl.html +0 -879
  185. package/src/report/crawlExport.ts +0 -58
  186. package/src/report/crawl_template.ts +0 -9
  187. package/src/report/html.ts +0 -27
  188. package/src/scoring/health.ts +0 -241
  189. package/src/scoring/hits.ts +0 -153
  190. package/src/scoring/orphanSeverity.ts +0 -176
  191. package/src/utils/version.ts +0 -18
  192. package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
  193. package/tests/analysis.unit.test.ts +0 -142
  194. package/tests/analyze.integration.test.ts +0 -133
  195. package/tests/analyze_markdown.test.ts +0 -98
  196. package/tests/audit/audit.test.ts +0 -101
  197. package/tests/audit/dns.test.ts +0 -31
  198. package/tests/audit/headers.test.ts +0 -45
  199. package/tests/audit/scoring.test.ts +0 -133
  200. package/tests/audit/security.test.ts +0 -12
  201. package/tests/audit/transport.test.ts +0 -111
  202. package/tests/clustering.test.ts +0 -118
  203. package/tests/clustering_risk.test.ts +0 -118
  204. package/tests/crawler.test.ts +0 -364
  205. package/tests/db/index.test.ts +0 -134
  206. package/tests/db/repositories.test.ts +0 -115
  207. package/tests/db.test.ts +0 -159
  208. package/tests/db_repos.test.ts +0 -72
  209. package/tests/diff.test.ts +0 -67
  210. package/tests/duplicate.test.ts +0 -110
  211. package/tests/extract.test.ts +0 -86
  212. package/tests/fetcher.test.ts +0 -110
  213. package/tests/fetcher_safety.test.ts +0 -91
  214. package/tests/fixtures/analyze-crawl.json +0 -26
  215. package/tests/graph/graph.test.ts +0 -100
  216. package/tests/graphLoader.test.ts +0 -124
  217. package/tests/hits.test.ts +0 -134
  218. package/tests/html_report.test.ts +0 -59
  219. package/tests/ipGuard.test.ts +0 -73
  220. package/tests/lock/lockManager.test.ts +0 -198
  221. package/tests/metrics.test.ts +0 -196
  222. package/tests/normalize.test.ts +0 -88
  223. package/tests/orphanSeverity.test.ts +0 -160
  224. package/tests/pagerank.test.ts +0 -98
  225. package/tests/parser.test.ts +0 -117
  226. package/tests/proxy_safety.test.ts +0 -57
  227. package/tests/redirect_safety.test.ts +0 -77
  228. package/tests/renderAnalysisCsv.test.ts +0 -183
  229. package/tests/safety.test.ts +0 -126
  230. package/tests/scope.test.ts +0 -84
  231. package/tests/scoring.test.ts +0 -60
  232. package/tests/sitemap.test.ts +0 -100
  233. package/tests/soft404.test.ts +0 -41
  234. package/tests/ssrf_fix.test.ts +0 -69
  235. package/tests/trap.test.ts +0 -39
  236. package/tests/visualization_data.test.ts +0 -46
  237. package/tsconfig.json +0 -11
@@ -1,258 +0,0 @@
1
- import https from 'node:https';
2
- import http from 'node:http';
3
- import tls from 'node:tls';
4
- import { URL } from 'node:url';
5
- import { IPGuard } from '../core/security/ipGuard.js';
6
- import { TransportDiagnostics, PerformanceMetrics, CertificateInfo, RedirectInfo, AuditIssue } from './types.js';
7
- import { IncomingMessage } from 'node:http';
8
-
9
- interface RequestResult {
10
- url: string;
11
- response: IncomingMessage;
12
- body: Buffer;
13
- timings: {
14
- dns: number;
15
- tcp: number;
16
- tls: number;
17
- ttfb: number;
18
- total: number;
19
- };
20
- socket: any;
21
- redirectUrl: string | null;
22
- }
23
-
24
- export async function analyzeTransport(targetUrl: string, timeout: number): Promise<{
25
- transport: TransportDiagnostics;
26
- performance: PerformanceMetrics;
27
- issues: AuditIssue[];
28
- }> {
29
- const maxRedirects = 10;
30
- let currentUrl = targetUrl;
31
- let redirectCount = 0;
32
- const redirects: RedirectInfo[] = [];
33
- const issues: AuditIssue[] = [];
34
-
35
- // Cumulative metrics
36
- let totalRedirectTime = 0;
37
-
38
- for (let i = 0; i < maxRedirects; i++) {
39
- const urlObj = new URL(currentUrl);
40
- const isSafe = await IPGuard.validateHost(urlObj.hostname);
41
- if (!isSafe) {
42
- throw new Error(`Blocked: Redirect to internal/private IP prohibited (${currentUrl})`);
43
- }
44
-
45
- try {
46
- const result = await executeRequest(currentUrl, timeout);
47
-
48
- if (result.redirectUrl) {
49
- redirectCount++;
50
- totalRedirectTime += result.timings.total;
51
-
52
- redirects.push({
53
- url: currentUrl,
54
- statusCode: result.response.statusCode || 0,
55
- location: result.redirectUrl
56
- });
57
-
58
- currentUrl = result.redirectUrl;
59
- continue;
60
- }
61
-
62
- // Final destination reached
63
- const { response, body, timings, socket } = result;
64
-
65
- // Collect Certificate Info
66
- let certInfo: CertificateInfo | null = null;
67
- let tlsVersion: string | null = null;
68
- let cipherSuite: string | null = null;
69
- let alpnProtocol: string | null = null;
70
-
71
- if (socket instanceof tls.TLSSocket) {
72
- const cert = socket.getPeerCertificate(true);
73
- tlsVersion = socket.getProtocol();
74
- const cipher = socket.getCipher();
75
- cipherSuite = cipher ? cipher.name : null;
76
- alpnProtocol = socket.alpnProtocol || null;
77
-
78
- if (cert && Object.keys(cert).length > 0) {
79
- certInfo = {
80
- subject: (cert.subject && cert.subject.CN) ? cert.subject.CN : 'Unknown',
81
- issuer: (cert.issuer && cert.issuer.CN) ? cert.issuer.CN : 'Unknown',
82
- validFrom: cert.valid_from,
83
- validTo: cert.valid_to,
84
- daysUntilExpiry: Math.floor((new Date(cert.valid_to).getTime() - Date.now()) / (1000 * 60 * 60 * 24)),
85
- isSelfSigned: cert.issuer && cert.subject && cert.issuer.CN === cert.subject.CN,
86
- isValidChain: socket.authorized,
87
- fingerprint: cert.fingerprint,
88
- serialNumber: cert.serialNumber,
89
- subjectAltName: cert.subjectaltname
90
- };
91
-
92
- if (!socket.authorized) {
93
- issues.push({
94
- id: 'cert-invalid',
95
- severity: 'severe',
96
- category: 'tls',
97
- message: `Certificate validation failed: ${socket.authorizationError}`,
98
- scorePenalty: 30
99
- });
100
- }
101
- }
102
- }
103
-
104
- const httpVersion = response.httpVersion;
105
- const contentEncoding = response.headers['content-encoding'];
106
- const compression: string[] = [];
107
- if (contentEncoding) {
108
- compression.push(contentEncoding);
109
- }
110
-
111
- const connectionHeader = response.headers['connection'];
112
- const keepAlive = connectionHeader ? connectionHeader.toLowerCase() !== 'close' : true;
113
- const serverHeader = (response.headers['server'] as string) || null;
114
-
115
- const headerText = `HTTP/${response.httpVersion} ${response.statusCode} ${response.statusMessage}\r\n` +
116
- Object.entries(response.headers).map(([k, v]) => `${k}: ${v}`).join('\r\n') +
117
- '\r\n\r\n';
118
- const headerSize = Buffer.byteLength(headerText);
119
- const htmlSize = body.length;
120
-
121
- const transport: TransportDiagnostics = {
122
- tlsVersion,
123
- cipherSuite,
124
- alpnProtocol: alpnProtocol || (httpVersion === '2.0' ? 'h2' : 'http/1.1'),
125
- certificate: certInfo,
126
- httpVersion,
127
- compression,
128
- keepAlive,
129
- transferEncoding: (response.headers['transfer-encoding'] as string) || null,
130
- redirectCount,
131
- redirects,
132
- serverHeader,
133
- headers: response.headers
134
- };
135
-
136
- const performance: PerformanceMetrics = {
137
- dnsLookupTime: timings.dns,
138
- tcpConnectTime: timings.tcp,
139
- tlsHandshakeTime: timings.tls,
140
- ttfb: timings.ttfb,
141
- totalTime: timings.total + totalRedirectTime,
142
- htmlSize,
143
- headerSize,
144
- redirectTime: totalRedirectTime
145
- };
146
-
147
- return { transport, performance, issues };
148
-
149
- } catch (error: any) {
150
- throw new Error(`Transport analysis failed for ${currentUrl}: ${error.message}`, { cause: error });
151
- }
152
- }
153
-
154
- throw new Error(`Too many redirects (limit: ${maxRedirects})`);
155
- }
156
-
157
- function executeRequest(urlStr: string, timeout: number): Promise<RequestResult> {
158
- return new Promise((resolve, reject) => {
159
- let url: URL;
160
- try {
161
- url = new URL(urlStr);
162
- } catch (_e) {
163
- return reject(new Error(`Invalid URL: ${urlStr}`));
164
- }
165
-
166
- const isHttps = url.protocol === 'https:';
167
- const requestModule = isHttps ? https : http;
168
-
169
- const timings = {
170
- dns: 0,
171
- tcp: 0,
172
- tls: 0,
173
- ttfb: 0,
174
- total: 0
175
- };
176
-
177
- const t0 = performance.now();
178
- let tDNS = t0;
179
- let tTCP = t0;
180
- let tTLS = t0;
181
- let tReqSent = 0;
182
-
183
- // We use agent: false to force new connection for accurate timing
184
- const options = {
185
- method: 'GET',
186
- timeout,
187
- rejectUnauthorized: false,
188
- agent: false,
189
- headers: {
190
- 'User-Agent': 'Crawlith/Audit',
191
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
192
- 'Accept-Encoding': 'gzip, deflate, br'
193
- }
194
- };
195
-
196
- const req = requestModule.request(url, options, (res) => {
197
- // TTFB: Time from request sent to first byte of headers received
198
- timings.ttfb = performance.now() - (tReqSent || t0);
199
-
200
- const chunks: Buffer[] = [];
201
- res.on('data', (chunk) => chunks.push(chunk));
202
- res.on('end', () => {
203
- timings.total = performance.now() - t0;
204
- const body = Buffer.concat(chunks);
205
-
206
- let redirectUrl: string | null = null;
207
- if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
208
- try {
209
- redirectUrl = new URL(res.headers.location, urlStr).toString();
210
- } catch (_e) {
211
- // Ignore invalid redirect
212
- }
213
- }
214
-
215
- resolve({
216
- url: urlStr,
217
- response: res,
218
- body,
219
- timings,
220
- socket: res.socket,
221
- redirectUrl
222
- });
223
- });
224
- });
225
-
226
- req.on('socket', (socket) => {
227
- socket.on('lookup', () => {
228
- tDNS = performance.now();
229
- timings.dns = tDNS - t0;
230
- });
231
- socket.on('connect', () => {
232
- tTCP = performance.now();
233
- if (timings.dns === 0 && tDNS === t0) {
234
- // No lookup event
235
- timings.dns = 0;
236
- tDNS = t0;
237
- }
238
- timings.tcp = tTCP - tDNS;
239
- });
240
- socket.on('secureConnect', () => {
241
- tTLS = performance.now();
242
- timings.tls = tTLS - tTCP;
243
- });
244
- });
245
-
246
- req.on('finish', () => {
247
- tReqSent = performance.now();
248
- });
249
-
250
- req.on('error', (err) => reject(err));
251
- req.on('timeout', () => {
252
- req.destroy();
253
- reject(new Error('Request timed out'));
254
- });
255
-
256
- req.end();
257
- });
258
- }
@@ -1,102 +0,0 @@
1
-
2
- export interface AuditResult {
3
- url: string;
4
- transport: TransportDiagnostics;
5
- securityHeaders: SecurityHeadersResult;
6
- dns: DnsDiagnostics;
7
- performance: PerformanceMetrics;
8
- score: number;
9
- grade: 'A' | 'B' | 'C' | 'D' | 'F';
10
- issues: AuditIssue[];
11
- }
12
-
13
- export interface TransportDiagnostics {
14
- // TLS / SSL
15
- tlsVersion: string | null;
16
- cipherSuite: string | null;
17
- alpnProtocol: string | null; // http/1.1, h2
18
- certificate: CertificateInfo | null;
19
-
20
- // HTTP Protocol
21
- httpVersion: string;
22
- compression: string[]; // gzip, br, deflate
23
- keepAlive: boolean;
24
- transferEncoding: string | null;
25
- redirectCount: number;
26
- redirects: RedirectInfo[];
27
- serverHeader: string | null;
28
- headers: Record<string, string | string[] | undefined>;
29
- }
30
-
31
- export interface CertificateInfo {
32
- issuer: string;
33
- subject: string;
34
- validFrom: string;
35
- validTo: string;
36
- daysUntilExpiry: number;
37
- isSelfSigned: boolean;
38
- isValidChain: boolean; // basic check, relying on node tls rejectUnauthorized: true result if possible, or manual check
39
- fingerprint: string;
40
- serialNumber: string;
41
- subjectAltName?: string;
42
- }
43
-
44
- export interface RedirectInfo {
45
- url: string;
46
- statusCode: number;
47
- location: string | null;
48
- }
49
-
50
- export interface SecurityHeadersResult {
51
- strictTransportSecurity: HeaderStatus;
52
- contentSecurityPolicy: HeaderStatus;
53
- xFrameOptions: HeaderStatus;
54
- xContentTypeOptions: HeaderStatus;
55
- referrerPolicy: HeaderStatus;
56
- permissionsPolicy: HeaderStatus;
57
-
58
- details: Record<string, string>; // raw values
59
- score: number; // partial score contribution (0-100 normalized for headers section)
60
- }
61
-
62
- export interface HeaderStatus {
63
- present: boolean;
64
- value: string | null;
65
- valid: boolean; // simple syntax check
66
- issues?: string[];
67
- }
68
-
69
- export interface DnsDiagnostics {
70
- a: string[];
71
- aaaa: string[];
72
- cname: string[];
73
- reverse: string[];
74
- ipCount: number;
75
- ipv6Support: boolean;
76
- resolutionTime: number;
77
- }
78
-
79
- export interface PerformanceMetrics {
80
- dnsLookupTime: number; // ms
81
- tcpConnectTime: number; // ms
82
- tlsHandshakeTime: number; // ms
83
- ttfb: number; // ms
84
- totalTime: number; // ms
85
- htmlSize: number; // bytes
86
- headerSize: number; // bytes
87
- redirectTime?: number; // accumulated time spent in redirects
88
- }
89
-
90
- export interface AuditIssue {
91
- id: string; // unique code for tests/filtering
92
- severity: 'critical' | 'severe' | 'moderate' | 'minor' | 'info';
93
- category: 'tls' | 'http' | 'headers' | 'dns' | 'performance';
94
- message: string;
95
- scorePenalty: number;
96
- }
97
-
98
- export interface AuditOptions {
99
- timeout?: number;
100
- verbose?: boolean;
101
- debug?: boolean;
102
- }
@@ -1,21 +0,0 @@
1
- import { ProxyAgent } from 'undici';
2
-
3
- export class ProxyAdapter {
4
- private agent?: ProxyAgent;
5
-
6
- constructor(proxyUrl?: string) {
7
- if (proxyUrl) {
8
- try {
9
- // Validate URL
10
- new URL(proxyUrl);
11
- this.agent = new ProxyAgent(proxyUrl);
12
- } catch {
13
- throw new Error(`Invalid proxy URL: ${proxyUrl}`);
14
- }
15
- }
16
- }
17
-
18
- get dispatcher() {
19
- return this.agent;
20
- }
21
- }
@@ -1,39 +0,0 @@
1
- export class RateLimiter {
2
- private buckets: Map<string, { tokens: number; lastRefill: number }> = new Map();
3
- private rate: number; // tokens per second
4
-
5
- constructor(rate: number = 2) {
6
- this.rate = rate;
7
- }
8
-
9
- async waitForToken(host: string, crawlDelay: number = 0): Promise<void> {
10
- const effectiveRate = crawlDelay > 0 ? Math.min(this.rate, 1 / crawlDelay) : this.rate;
11
- const interval = 1000 / effectiveRate;
12
-
13
- if (!this.buckets.has(host)) {
14
- this.buckets.set(host, { tokens: this.rate - 1, lastRefill: Date.now() });
15
- return;
16
- }
17
-
18
- const bucket = this.buckets.get(host)!;
19
-
20
- while (true) {
21
- const now = Date.now();
22
- const elapsed = now - bucket.lastRefill;
23
-
24
- if (elapsed > 0) {
25
- const newTokens = elapsed / interval;
26
- bucket.tokens = Math.min(this.rate, bucket.tokens + newTokens);
27
- bucket.lastRefill = now;
28
- }
29
-
30
- if (bucket.tokens >= 1) {
31
- bucket.tokens -= 1;
32
- return;
33
- }
34
-
35
- const waitTime = Math.max(0, interval - (Date.now() - bucket.lastRefill));
36
- await new Promise(resolve => setTimeout(resolve, waitTime));
37
- }
38
- }
39
- }
@@ -1,47 +0,0 @@
1
- export class RedirectController {
2
- private maxHops: number;
3
- private currentHops: number = 0;
4
- private history: Set<string> = new Set();
5
-
6
- constructor(maxHops: number = 5, seedUrl?: string) {
7
- this.maxHops = maxHops;
8
- if (seedUrl) {
9
- this.history.add(this.normalize(seedUrl));
10
- }
11
- }
12
-
13
- /**
14
- * Records a hop and checks if it's within limits and not a loop.
15
- * Returns null if allowed, or an error status string if blocked.
16
- */
17
- nextHop(url: string): 'redirect_limit_exceeded' | 'redirect_loop' | null {
18
- // Normalize URL for loop detection (basic)
19
- const normalized = this.normalize(url);
20
-
21
- if (this.history.has(normalized)) {
22
- return 'redirect_loop';
23
- }
24
-
25
- if (this.currentHops >= this.maxHops) {
26
- return 'redirect_limit_exceeded';
27
- }
28
-
29
- this.history.add(normalized);
30
- this.currentHops++;
31
- return null;
32
- }
33
-
34
- get hops(): number {
35
- return this.currentHops;
36
- }
37
-
38
- private normalize(url: string): string {
39
- try {
40
- const u = new URL(url);
41
- u.hash = ''; // Ignore hash for loop detection
42
- return u.toString();
43
- } catch {
44
- return url;
45
- }
46
- }
47
- }
@@ -1,34 +0,0 @@
1
- import { Readable } from 'stream';
2
-
3
- export class ResponseLimiter {
4
- static async streamToString(
5
- stream: Readable,
6
- maxBytes: number,
7
- onOversized?: (bytes: number) => void
8
- ): Promise<string> {
9
- return new Promise((resolve, reject) => {
10
- let accumulated = 0;
11
- const chunks: Buffer[] = [];
12
-
13
- stream.on('data', (chunk: any) => {
14
- const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
15
- accumulated += buffer.length;
16
- if (accumulated > maxBytes) {
17
- stream.destroy();
18
- if (onOversized) onOversized(accumulated);
19
- reject(new Error('Oversized response'));
20
- return;
21
- }
22
- chunks.push(buffer);
23
- });
24
-
25
- stream.on('end', () => {
26
- resolve(Buffer.concat(chunks).toString('utf-8'));
27
- });
28
-
29
- stream.on('error', (err) => {
30
- reject(err);
31
- });
32
- });
33
- }
34
- }
@@ -1,57 +0,0 @@
1
- export interface RetryConfig {
2
- maxRetries: number;
3
- baseDelay: number;
4
- }
5
-
6
- export class RetryPolicy {
7
- static DEFAULT_CONFIG: RetryConfig = {
8
- maxRetries: 3,
9
- baseDelay: 500
10
- };
11
-
12
- static async execute<T>(
13
- operation: (attempt: number) => Promise<T>,
14
- isRetryable: (error: any) => boolean,
15
- config: RetryConfig = RetryPolicy.DEFAULT_CONFIG
16
- ): Promise<T> {
17
- let lastError: any;
18
-
19
- for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
20
- try {
21
- return await operation(attempt);
22
- } catch (error) {
23
- lastError = error;
24
-
25
- if (attempt === config.maxRetries || !isRetryable(error)) {
26
- throw error;
27
- }
28
-
29
- const delay = config.baseDelay * Math.pow(2, attempt);
30
- const jitter = delay * 0.1 * (Math.random() * 2 - 1);
31
- const finalDelay = Math.max(0, delay + jitter);
32
-
33
- await new Promise(resolve => setTimeout(resolve, finalDelay));
34
- }
35
- }
36
-
37
- throw lastError;
38
- }
39
-
40
- static isRetryableStatus(status: number): boolean {
41
- return status === 429 || (status >= 500 && status <= 599);
42
- }
43
-
44
- static isNetworkError(error: any): boolean {
45
- const code = error?.code || error?.cause?.code;
46
- return [
47
- 'ETIMEDOUT',
48
- 'ECONNRESET',
49
- 'EADDRINUSE',
50
- 'ECONNREFUSED',
51
- 'EPIPE',
52
- 'ENOTFOUND',
53
- 'ENETUNREACH',
54
- 'EAI_AGAIN'
55
- ].includes(code);
56
- }
57
- }
@@ -1,45 +0,0 @@
1
- export class DomainFilter {
2
- private allowed: Set<string>;
3
- private denied: Set<string>;
4
-
5
- constructor(allowed: string[] = [], denied: string[] = []) {
6
- this.allowed = new Set(allowed.map(d => this.normalize(d)));
7
- this.denied = new Set(denied.map(d => this.normalize(d)));
8
- }
9
-
10
- /**
11
- * Normalizes a hostname: lowercase, strip trailing dot.
12
- * Note: We expect hostnames, not URLs.
13
- */
14
- private normalize(hostname: string): string {
15
- let h = hostname.toLowerCase().trim();
16
- if (h.endsWith('.')) {
17
- h = h.slice(0, -1);
18
- }
19
- // Use URL to handle punycode and basic validation if possible
20
- try {
21
- // We wrap it in a dummy URL to let the browser/node logic normalize it
22
- const url = new URL(`http://${h}`);
23
- return url.hostname;
24
- } catch {
25
- return h;
26
- }
27
- }
28
-
29
- isAllowed(hostname: string): boolean {
30
- const normalized = this.normalize(hostname);
31
-
32
- // 1. Deny list match -> Reject
33
- if (this.denied.has(normalized)) {
34
- return false;
35
- }
36
-
37
- // 2. Allow list not empty AND no match -> Reject
38
- if (this.allowed.size > 0 && !this.allowed.has(normalized)) {
39
- return false;
40
- }
41
-
42
- // 3. Otherwise -> Allow
43
- return true;
44
- }
45
- }
@@ -1,52 +0,0 @@
1
- import { DomainFilter } from './domainFilter.js';
2
- import { SubdomainPolicy } from './subdomainPolicy.js';
3
-
4
- export interface ScopeOptions {
5
- allowedDomains?: string[];
6
- deniedDomains?: string[];
7
- includeSubdomains?: boolean;
8
- rootUrl: string;
9
- }
10
-
11
- export type EligibilityResult = 'allowed' | 'blocked_by_domain_filter' | 'blocked_subdomain';
12
-
13
- export class ScopeManager {
14
- private domainFilter: DomainFilter;
15
- private subdomainPolicy: SubdomainPolicy;
16
- private explicitAllowed: Set<string>;
17
-
18
- constructor(options: ScopeOptions) {
19
- this.domainFilter = new DomainFilter(options.allowedDomains, options.deniedDomains);
20
- this.subdomainPolicy = new SubdomainPolicy(options.rootUrl, options.includeSubdomains);
21
- this.explicitAllowed = new Set((options.allowedDomains || []).map(d => {
22
- let h = d.toLowerCase().trim();
23
- if (h.endsWith('.')) h = h.slice(0, -1);
24
- return h;
25
- }));
26
- }
27
-
28
- isUrlEligible(url: string): EligibilityResult {
29
- let hostname: string;
30
- try {
31
- hostname = new URL(url).hostname.toLowerCase();
32
- if (hostname.endsWith('.')) hostname = hostname.slice(0, -1);
33
- } catch {
34
- return 'blocked_by_domain_filter'; // Invalid URL is effectively blocked
35
- }
36
-
37
- if (!this.domainFilter.isAllowed(hostname)) {
38
- return 'blocked_by_domain_filter';
39
- }
40
-
41
- // If explicit whitelist is used, and this domain is in it, allow it
42
- if (this.explicitAllowed.has(hostname)) {
43
- return 'allowed';
44
- }
45
-
46
- if (!this.subdomainPolicy.isAllowed(hostname)) {
47
- return 'blocked_subdomain';
48
- }
49
-
50
- return 'allowed';
51
- }
52
- }
@@ -1,39 +0,0 @@
1
- export class SubdomainPolicy {
2
- private rootHost: string;
3
- private includeSubdomains: boolean;
4
-
5
- constructor(rootUrl: string, includeSubdomains: boolean = false) {
6
- try {
7
- this.rootHost = new URL(rootUrl).hostname.toLowerCase();
8
- if (this.rootHost.endsWith('.')) {
9
- this.rootHost = this.rootHost.slice(0, -1);
10
- }
11
- } catch {
12
- this.rootHost = '';
13
- }
14
- this.includeSubdomains = includeSubdomains;
15
- }
16
-
17
- isAllowed(hostname: string): boolean {
18
- let target = hostname.toLowerCase().trim();
19
- if (target.endsWith('.')) {
20
- target = target.slice(0, -1);
21
- }
22
-
23
- // Exact match is always allowed if rootHost is set
24
- if (target === this.rootHost) {
25
- return true;
26
- }
27
-
28
- if (!this.includeSubdomains) {
29
- return false;
30
- }
31
-
32
- // Label-based check for subdomains
33
- // target must end with .rootHost
34
- if (!target.endsWith(`.${this.rootHost}`)) {
35
- return false;
36
- }
37
- return true;
38
- }
39
- }