@crawlith/core 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +70 -0
- package/dist/analysis/analyze.d.ts +29 -8
- package/dist/analysis/analyze.js +325 -221
- package/dist/analysis/clustering.d.ts +23 -0
- package/dist/analysis/clustering.js +206 -0
- package/dist/analysis/content.d.ts +1 -1
- package/dist/analysis/content.js +11 -5
- package/dist/analysis/duplicate.d.ts +34 -0
- package/dist/analysis/duplicate.js +305 -0
- package/dist/analysis/heading.d.ts +116 -0
- package/dist/analysis/heading.js +356 -0
- package/dist/analysis/images.d.ts +1 -1
- package/dist/analysis/images.js +6 -5
- package/dist/analysis/links.d.ts +1 -1
- package/dist/analysis/links.js +8 -8
- package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
- package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
- package/dist/analysis/scoring.js +4 -1
- package/dist/analysis/seo.d.ts +8 -4
- package/dist/analysis/seo.js +41 -30
- package/dist/analysis/soft404.d.ts +17 -0
- package/dist/analysis/soft404.js +62 -0
- package/dist/analysis/structuredData.d.ts +1 -1
- package/dist/analysis/structuredData.js +5 -4
- package/dist/application/index.d.ts +2 -0
- package/dist/application/index.js +2 -0
- package/dist/application/usecase.d.ts +3 -0
- package/dist/application/usecase.js +1 -0
- package/dist/application/usecases.d.ts +114 -0
- package/dist/application/usecases.js +201 -0
- package/dist/audit/index.js +1 -1
- package/dist/audit/transport.d.ts +1 -1
- package/dist/audit/transport.js +5 -4
- package/dist/audit/types.d.ts +1 -0
- package/dist/constants.d.ts +17 -0
- package/dist/constants.js +23 -0
- package/dist/core/scope/scopeManager.js +3 -0
- package/dist/crawler/crawl.d.ts +2 -2
- package/dist/crawler/crawler.d.ts +17 -5
- package/dist/crawler/crawler.js +259 -94
- package/dist/crawler/fetcher.d.ts +1 -1
- package/dist/crawler/fetcher.js +6 -6
- package/dist/crawler/metricsRunner.d.ts +21 -1
- package/dist/crawler/metricsRunner.js +181 -60
- package/dist/crawler/normalize.d.ts +41 -0
- package/dist/crawler/normalize.js +119 -3
- package/dist/crawler/parser.d.ts +1 -3
- package/dist/crawler/parser.js +2 -49
- package/dist/crawler/resolver.d.ts +11 -0
- package/dist/crawler/resolver.js +67 -0
- package/dist/crawler/sitemap.d.ts +4 -1
- package/dist/crawler/sitemap.js +24 -18
- package/dist/crawler/trap.d.ts +5 -1
- package/dist/crawler/trap.js +23 -2
- package/dist/db/CrawlithDB.d.ts +110 -0
- package/dist/db/CrawlithDB.js +500 -0
- package/dist/db/graphLoader.js +15 -32
- package/dist/db/index.d.ts +9 -1
- package/dist/db/index.js +39 -31
- package/dist/db/migrations.d.ts +2 -0
- package/dist/db/{schema.js → migrations.js} +90 -43
- package/dist/db/pluginRegistry.d.ts +9 -0
- package/dist/db/pluginRegistry.js +19 -0
- package/dist/db/repositories/EdgeRepository.d.ts +5 -0
- package/dist/db/repositories/EdgeRepository.js +7 -0
- package/dist/db/repositories/MetricsRepository.d.ts +13 -8
- package/dist/db/repositories/MetricsRepository.js +14 -6
- package/dist/db/repositories/PageRepository.d.ts +5 -3
- package/dist/db/repositories/PageRepository.js +68 -17
- package/dist/db/repositories/SiteRepository.d.ts +6 -0
- package/dist/db/repositories/SiteRepository.js +4 -0
- package/dist/db/repositories/SnapshotRepository.d.ts +12 -5
- package/dist/db/repositories/SnapshotRepository.js +48 -10
- package/dist/db/reset.d.ts +9 -0
- package/dist/db/reset.js +32 -0
- package/dist/db/statements.d.ts +12 -0
- package/dist/db/statements.js +40 -0
- package/dist/diff/compare.d.ts +0 -5
- package/dist/diff/compare.js +0 -12
- package/dist/diff/service.d.ts +16 -0
- package/dist/diff/service.js +41 -0
- package/dist/domain/index.d.ts +4 -0
- package/dist/domain/index.js +4 -0
- package/dist/events.d.ts +8 -0
- package/dist/graph/graph.d.ts +20 -42
- package/dist/graph/graph.js +12 -16
- package/dist/graph/hits.d.ts +23 -0
- package/dist/graph/hits.js +111 -0
- package/dist/graph/metrics.d.ts +0 -4
- package/dist/graph/metrics.js +19 -15
- package/dist/graph/pagerank.d.ts +17 -4
- package/dist/graph/pagerank.js +126 -93
- package/dist/index.d.ts +27 -9
- package/dist/index.js +27 -9
- package/dist/lock/lockManager.d.ts +1 -0
- package/dist/lock/lockManager.js +15 -0
- package/dist/plugin-system/plugin-cli.d.ts +10 -0
- package/dist/plugin-system/plugin-cli.js +31 -0
- package/dist/plugin-system/plugin-config.d.ts +16 -0
- package/dist/plugin-system/plugin-config.js +36 -0
- package/dist/plugin-system/plugin-loader.d.ts +17 -0
- package/dist/plugin-system/plugin-loader.js +122 -0
- package/dist/plugin-system/plugin-registry.d.ts +25 -0
- package/dist/plugin-system/plugin-registry.js +167 -0
- package/dist/plugin-system/plugin-types.d.ts +205 -0
- package/dist/plugin-system/plugin-types.js +1 -0
- package/dist/ports/index.d.ts +9 -0
- package/dist/ports/index.js +1 -0
- package/dist/report/export.d.ts +3 -0
- package/dist/report/export.js +81 -0
- package/dist/report/insight.d.ts +27 -0
- package/dist/report/insight.js +103 -0
- package/dist/scoring/health.d.ts +17 -11
- package/dist/scoring/health.js +183 -140
- package/dist/utils/chalk.d.ts +6 -0
- package/dist/utils/chalk.js +41 -0
- package/dist/utils/secureConfig.d.ts +23 -0
- package/dist/utils/secureConfig.js +128 -0
- package/package.json +10 -4
- package/CHANGELOG.md +0 -13
- package/dist/db/schema.d.ts +0 -2
- package/dist/graph/cluster.d.ts +0 -6
- package/dist/graph/cluster.js +0 -221
- package/dist/graph/duplicate.d.ts +0 -10
- package/dist/graph/duplicate.js +0 -302
- package/dist/scoring/hits.d.ts +0 -10
- package/dist/scoring/hits.js +0 -131
- package/scripts/copy-assets.js +0 -37
- package/src/analysis/analysis_list.html +0 -35
- package/src/analysis/analysis_page.html +0 -123
- package/src/analysis/analyze.ts +0 -505
- package/src/analysis/content.ts +0 -62
- package/src/analysis/images.ts +0 -28
- package/src/analysis/links.ts +0 -41
- package/src/analysis/scoring.ts +0 -66
- package/src/analysis/seo.ts +0 -82
- package/src/analysis/structuredData.ts +0 -62
- package/src/analysis/templates.ts +0 -9
- package/src/audit/dns.ts +0 -49
- package/src/audit/headers.ts +0 -98
- package/src/audit/index.ts +0 -66
- package/src/audit/scoring.ts +0 -232
- package/src/audit/transport.ts +0 -258
- package/src/audit/types.ts +0 -102
- package/src/core/network/proxyAdapter.ts +0 -21
- package/src/core/network/rateLimiter.ts +0 -39
- package/src/core/network/redirectController.ts +0 -47
- package/src/core/network/responseLimiter.ts +0 -34
- package/src/core/network/retryPolicy.ts +0 -57
- package/src/core/scope/domainFilter.ts +0 -45
- package/src/core/scope/scopeManager.ts +0 -52
- package/src/core/scope/subdomainPolicy.ts +0 -39
- package/src/core/security/ipGuard.ts +0 -171
- package/src/crawler/crawl.ts +0 -9
- package/src/crawler/crawler.ts +0 -601
- package/src/crawler/extract.ts +0 -39
- package/src/crawler/fetcher.ts +0 -251
- package/src/crawler/metricsRunner.ts +0 -137
- package/src/crawler/normalize.ts +0 -108
- package/src/crawler/parser.ts +0 -190
- package/src/crawler/sitemap.ts +0 -76
- package/src/crawler/trap.ts +0 -96
- package/src/db/graphLoader.ts +0 -135
- package/src/db/index.ts +0 -75
- package/src/db/repositories/EdgeRepository.ts +0 -43
- package/src/db/repositories/MetricsRepository.ts +0 -63
- package/src/db/repositories/PageRepository.ts +0 -228
- package/src/db/repositories/SiteRepository.ts +0 -43
- package/src/db/repositories/SnapshotRepository.ts +0 -99
- package/src/db/schema.ts +0 -177
- package/src/diff/compare.ts +0 -84
- package/src/events.ts +0 -16
- package/src/graph/cluster.ts +0 -246
- package/src/graph/duplicate.ts +0 -350
- package/src/graph/graph.ts +0 -192
- package/src/graph/metrics.ts +0 -125
- package/src/graph/pagerank.ts +0 -126
- package/src/graph/simhash.ts +0 -76
- package/src/index.ts +0 -33
- package/src/lock/hashKey.ts +0 -51
- package/src/lock/lockManager.ts +0 -132
- package/src/lock/pidCheck.ts +0 -13
- package/src/report/crawl.html +0 -879
- package/src/report/crawlExport.ts +0 -58
- package/src/report/crawl_template.ts +0 -9
- package/src/report/html.ts +0 -27
- package/src/scoring/health.ts +0 -241
- package/src/scoring/hits.ts +0 -153
- package/src/scoring/orphanSeverity.ts +0 -176
- package/src/utils/version.ts +0 -18
- package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
- package/tests/analysis.unit.test.ts +0 -142
- package/tests/analyze.integration.test.ts +0 -133
- package/tests/analyze_markdown.test.ts +0 -98
- package/tests/audit/audit.test.ts +0 -101
- package/tests/audit/dns.test.ts +0 -31
- package/tests/audit/headers.test.ts +0 -45
- package/tests/audit/scoring.test.ts +0 -133
- package/tests/audit/security.test.ts +0 -12
- package/tests/audit/transport.test.ts +0 -111
- package/tests/clustering.test.ts +0 -118
- package/tests/clustering_risk.test.ts +0 -118
- package/tests/crawler.test.ts +0 -364
- package/tests/db/index.test.ts +0 -134
- package/tests/db/repositories.test.ts +0 -115
- package/tests/db.test.ts +0 -159
- package/tests/db_repos.test.ts +0 -72
- package/tests/diff.test.ts +0 -67
- package/tests/duplicate.test.ts +0 -110
- package/tests/extract.test.ts +0 -86
- package/tests/fetcher.test.ts +0 -110
- package/tests/fetcher_safety.test.ts +0 -91
- package/tests/fixtures/analyze-crawl.json +0 -26
- package/tests/graph/graph.test.ts +0 -100
- package/tests/graphLoader.test.ts +0 -124
- package/tests/hits.test.ts +0 -134
- package/tests/html_report.test.ts +0 -59
- package/tests/ipGuard.test.ts +0 -73
- package/tests/lock/lockManager.test.ts +0 -198
- package/tests/metrics.test.ts +0 -196
- package/tests/normalize.test.ts +0 -88
- package/tests/orphanSeverity.test.ts +0 -160
- package/tests/pagerank.test.ts +0 -98
- package/tests/parser.test.ts +0 -117
- package/tests/proxy_safety.test.ts +0 -57
- package/tests/redirect_safety.test.ts +0 -77
- package/tests/renderAnalysisCsv.test.ts +0 -183
- package/tests/safety.test.ts +0 -126
- package/tests/scope.test.ts +0 -84
- package/tests/scoring.test.ts +0 -60
- package/tests/sitemap.test.ts +0 -100
- package/tests/soft404.test.ts +0 -41
- package/tests/ssrf_fix.test.ts +0 -69
- package/tests/trap.test.ts +0 -39
- package/tests/visualization_data.test.ts +0 -46
- package/tsconfig.json +0 -11
package/src/audit/transport.ts
DELETED
|
@@ -1,258 +0,0 @@
|
|
|
1
|
-
import https from 'node:https';
|
|
2
|
-
import http from 'node:http';
|
|
3
|
-
import tls from 'node:tls';
|
|
4
|
-
import { URL } from 'node:url';
|
|
5
|
-
import { IPGuard } from '../core/security/ipGuard.js';
|
|
6
|
-
import { TransportDiagnostics, PerformanceMetrics, CertificateInfo, RedirectInfo, AuditIssue } from './types.js';
|
|
7
|
-
import { IncomingMessage } from 'node:http';
|
|
8
|
-
|
|
9
|
-
interface RequestResult {
|
|
10
|
-
url: string;
|
|
11
|
-
response: IncomingMessage;
|
|
12
|
-
body: Buffer;
|
|
13
|
-
timings: {
|
|
14
|
-
dns: number;
|
|
15
|
-
tcp: number;
|
|
16
|
-
tls: number;
|
|
17
|
-
ttfb: number;
|
|
18
|
-
total: number;
|
|
19
|
-
};
|
|
20
|
-
socket: any;
|
|
21
|
-
redirectUrl: string | null;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export async function analyzeTransport(targetUrl: string, timeout: number): Promise<{
|
|
25
|
-
transport: TransportDiagnostics;
|
|
26
|
-
performance: PerformanceMetrics;
|
|
27
|
-
issues: AuditIssue[];
|
|
28
|
-
}> {
|
|
29
|
-
const maxRedirects = 10;
|
|
30
|
-
let currentUrl = targetUrl;
|
|
31
|
-
let redirectCount = 0;
|
|
32
|
-
const redirects: RedirectInfo[] = [];
|
|
33
|
-
const issues: AuditIssue[] = [];
|
|
34
|
-
|
|
35
|
-
// Cumulative metrics
|
|
36
|
-
let totalRedirectTime = 0;
|
|
37
|
-
|
|
38
|
-
for (let i = 0; i < maxRedirects; i++) {
|
|
39
|
-
const urlObj = new URL(currentUrl);
|
|
40
|
-
const isSafe = await IPGuard.validateHost(urlObj.hostname);
|
|
41
|
-
if (!isSafe) {
|
|
42
|
-
throw new Error(`Blocked: Redirect to internal/private IP prohibited (${currentUrl})`);
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
try {
|
|
46
|
-
const result = await executeRequest(currentUrl, timeout);
|
|
47
|
-
|
|
48
|
-
if (result.redirectUrl) {
|
|
49
|
-
redirectCount++;
|
|
50
|
-
totalRedirectTime += result.timings.total;
|
|
51
|
-
|
|
52
|
-
redirects.push({
|
|
53
|
-
url: currentUrl,
|
|
54
|
-
statusCode: result.response.statusCode || 0,
|
|
55
|
-
location: result.redirectUrl
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
currentUrl = result.redirectUrl;
|
|
59
|
-
continue;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
// Final destination reached
|
|
63
|
-
const { response, body, timings, socket } = result;
|
|
64
|
-
|
|
65
|
-
// Collect Certificate Info
|
|
66
|
-
let certInfo: CertificateInfo | null = null;
|
|
67
|
-
let tlsVersion: string | null = null;
|
|
68
|
-
let cipherSuite: string | null = null;
|
|
69
|
-
let alpnProtocol: string | null = null;
|
|
70
|
-
|
|
71
|
-
if (socket instanceof tls.TLSSocket) {
|
|
72
|
-
const cert = socket.getPeerCertificate(true);
|
|
73
|
-
tlsVersion = socket.getProtocol();
|
|
74
|
-
const cipher = socket.getCipher();
|
|
75
|
-
cipherSuite = cipher ? cipher.name : null;
|
|
76
|
-
alpnProtocol = socket.alpnProtocol || null;
|
|
77
|
-
|
|
78
|
-
if (cert && Object.keys(cert).length > 0) {
|
|
79
|
-
certInfo = {
|
|
80
|
-
subject: (cert.subject && cert.subject.CN) ? cert.subject.CN : 'Unknown',
|
|
81
|
-
issuer: (cert.issuer && cert.issuer.CN) ? cert.issuer.CN : 'Unknown',
|
|
82
|
-
validFrom: cert.valid_from,
|
|
83
|
-
validTo: cert.valid_to,
|
|
84
|
-
daysUntilExpiry: Math.floor((new Date(cert.valid_to).getTime() - Date.now()) / (1000 * 60 * 60 * 24)),
|
|
85
|
-
isSelfSigned: cert.issuer && cert.subject && cert.issuer.CN === cert.subject.CN,
|
|
86
|
-
isValidChain: socket.authorized,
|
|
87
|
-
fingerprint: cert.fingerprint,
|
|
88
|
-
serialNumber: cert.serialNumber,
|
|
89
|
-
subjectAltName: cert.subjectaltname
|
|
90
|
-
};
|
|
91
|
-
|
|
92
|
-
if (!socket.authorized) {
|
|
93
|
-
issues.push({
|
|
94
|
-
id: 'cert-invalid',
|
|
95
|
-
severity: 'severe',
|
|
96
|
-
category: 'tls',
|
|
97
|
-
message: `Certificate validation failed: ${socket.authorizationError}`,
|
|
98
|
-
scorePenalty: 30
|
|
99
|
-
});
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
const httpVersion = response.httpVersion;
|
|
105
|
-
const contentEncoding = response.headers['content-encoding'];
|
|
106
|
-
const compression: string[] = [];
|
|
107
|
-
if (contentEncoding) {
|
|
108
|
-
compression.push(contentEncoding);
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
const connectionHeader = response.headers['connection'];
|
|
112
|
-
const keepAlive = connectionHeader ? connectionHeader.toLowerCase() !== 'close' : true;
|
|
113
|
-
const serverHeader = (response.headers['server'] as string) || null;
|
|
114
|
-
|
|
115
|
-
const headerText = `HTTP/${response.httpVersion} ${response.statusCode} ${response.statusMessage}\r\n` +
|
|
116
|
-
Object.entries(response.headers).map(([k, v]) => `${k}: ${v}`).join('\r\n') +
|
|
117
|
-
'\r\n\r\n';
|
|
118
|
-
const headerSize = Buffer.byteLength(headerText);
|
|
119
|
-
const htmlSize = body.length;
|
|
120
|
-
|
|
121
|
-
const transport: TransportDiagnostics = {
|
|
122
|
-
tlsVersion,
|
|
123
|
-
cipherSuite,
|
|
124
|
-
alpnProtocol: alpnProtocol || (httpVersion === '2.0' ? 'h2' : 'http/1.1'),
|
|
125
|
-
certificate: certInfo,
|
|
126
|
-
httpVersion,
|
|
127
|
-
compression,
|
|
128
|
-
keepAlive,
|
|
129
|
-
transferEncoding: (response.headers['transfer-encoding'] as string) || null,
|
|
130
|
-
redirectCount,
|
|
131
|
-
redirects,
|
|
132
|
-
serverHeader,
|
|
133
|
-
headers: response.headers
|
|
134
|
-
};
|
|
135
|
-
|
|
136
|
-
const performance: PerformanceMetrics = {
|
|
137
|
-
dnsLookupTime: timings.dns,
|
|
138
|
-
tcpConnectTime: timings.tcp,
|
|
139
|
-
tlsHandshakeTime: timings.tls,
|
|
140
|
-
ttfb: timings.ttfb,
|
|
141
|
-
totalTime: timings.total + totalRedirectTime,
|
|
142
|
-
htmlSize,
|
|
143
|
-
headerSize,
|
|
144
|
-
redirectTime: totalRedirectTime
|
|
145
|
-
};
|
|
146
|
-
|
|
147
|
-
return { transport, performance, issues };
|
|
148
|
-
|
|
149
|
-
} catch (error: any) {
|
|
150
|
-
throw new Error(`Transport analysis failed for ${currentUrl}: ${error.message}`, { cause: error });
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
throw new Error(`Too many redirects (limit: ${maxRedirects})`);
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
function executeRequest(urlStr: string, timeout: number): Promise<RequestResult> {
|
|
158
|
-
return new Promise((resolve, reject) => {
|
|
159
|
-
let url: URL;
|
|
160
|
-
try {
|
|
161
|
-
url = new URL(urlStr);
|
|
162
|
-
} catch (_e) {
|
|
163
|
-
return reject(new Error(`Invalid URL: ${urlStr}`));
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
const isHttps = url.protocol === 'https:';
|
|
167
|
-
const requestModule = isHttps ? https : http;
|
|
168
|
-
|
|
169
|
-
const timings = {
|
|
170
|
-
dns: 0,
|
|
171
|
-
tcp: 0,
|
|
172
|
-
tls: 0,
|
|
173
|
-
ttfb: 0,
|
|
174
|
-
total: 0
|
|
175
|
-
};
|
|
176
|
-
|
|
177
|
-
const t0 = performance.now();
|
|
178
|
-
let tDNS = t0;
|
|
179
|
-
let tTCP = t0;
|
|
180
|
-
let tTLS = t0;
|
|
181
|
-
let tReqSent = 0;
|
|
182
|
-
|
|
183
|
-
// We use agent: false to force new connection for accurate timing
|
|
184
|
-
const options = {
|
|
185
|
-
method: 'GET',
|
|
186
|
-
timeout,
|
|
187
|
-
rejectUnauthorized: false,
|
|
188
|
-
agent: false,
|
|
189
|
-
headers: {
|
|
190
|
-
'User-Agent': 'Crawlith/Audit',
|
|
191
|
-
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
192
|
-
'Accept-Encoding': 'gzip, deflate, br'
|
|
193
|
-
}
|
|
194
|
-
};
|
|
195
|
-
|
|
196
|
-
const req = requestModule.request(url, options, (res) => {
|
|
197
|
-
// TTFB: Time from request sent to first byte of headers received
|
|
198
|
-
timings.ttfb = performance.now() - (tReqSent || t0);
|
|
199
|
-
|
|
200
|
-
const chunks: Buffer[] = [];
|
|
201
|
-
res.on('data', (chunk) => chunks.push(chunk));
|
|
202
|
-
res.on('end', () => {
|
|
203
|
-
timings.total = performance.now() - t0;
|
|
204
|
-
const body = Buffer.concat(chunks);
|
|
205
|
-
|
|
206
|
-
let redirectUrl: string | null = null;
|
|
207
|
-
if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
208
|
-
try {
|
|
209
|
-
redirectUrl = new URL(res.headers.location, urlStr).toString();
|
|
210
|
-
} catch (_e) {
|
|
211
|
-
// Ignore invalid redirect
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
resolve({
|
|
216
|
-
url: urlStr,
|
|
217
|
-
response: res,
|
|
218
|
-
body,
|
|
219
|
-
timings,
|
|
220
|
-
socket: res.socket,
|
|
221
|
-
redirectUrl
|
|
222
|
-
});
|
|
223
|
-
});
|
|
224
|
-
});
|
|
225
|
-
|
|
226
|
-
req.on('socket', (socket) => {
|
|
227
|
-
socket.on('lookup', () => {
|
|
228
|
-
tDNS = performance.now();
|
|
229
|
-
timings.dns = tDNS - t0;
|
|
230
|
-
});
|
|
231
|
-
socket.on('connect', () => {
|
|
232
|
-
tTCP = performance.now();
|
|
233
|
-
if (timings.dns === 0 && tDNS === t0) {
|
|
234
|
-
// No lookup event
|
|
235
|
-
timings.dns = 0;
|
|
236
|
-
tDNS = t0;
|
|
237
|
-
}
|
|
238
|
-
timings.tcp = tTCP - tDNS;
|
|
239
|
-
});
|
|
240
|
-
socket.on('secureConnect', () => {
|
|
241
|
-
tTLS = performance.now();
|
|
242
|
-
timings.tls = tTLS - tTCP;
|
|
243
|
-
});
|
|
244
|
-
});
|
|
245
|
-
|
|
246
|
-
req.on('finish', () => {
|
|
247
|
-
tReqSent = performance.now();
|
|
248
|
-
});
|
|
249
|
-
|
|
250
|
-
req.on('error', (err) => reject(err));
|
|
251
|
-
req.on('timeout', () => {
|
|
252
|
-
req.destroy();
|
|
253
|
-
reject(new Error('Request timed out'));
|
|
254
|
-
});
|
|
255
|
-
|
|
256
|
-
req.end();
|
|
257
|
-
});
|
|
258
|
-
}
|
package/src/audit/types.ts
DELETED
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
export interface AuditResult {
|
|
3
|
-
url: string;
|
|
4
|
-
transport: TransportDiagnostics;
|
|
5
|
-
securityHeaders: SecurityHeadersResult;
|
|
6
|
-
dns: DnsDiagnostics;
|
|
7
|
-
performance: PerformanceMetrics;
|
|
8
|
-
score: number;
|
|
9
|
-
grade: 'A' | 'B' | 'C' | 'D' | 'F';
|
|
10
|
-
issues: AuditIssue[];
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
export interface TransportDiagnostics {
|
|
14
|
-
// TLS / SSL
|
|
15
|
-
tlsVersion: string | null;
|
|
16
|
-
cipherSuite: string | null;
|
|
17
|
-
alpnProtocol: string | null; // http/1.1, h2
|
|
18
|
-
certificate: CertificateInfo | null;
|
|
19
|
-
|
|
20
|
-
// HTTP Protocol
|
|
21
|
-
httpVersion: string;
|
|
22
|
-
compression: string[]; // gzip, br, deflate
|
|
23
|
-
keepAlive: boolean;
|
|
24
|
-
transferEncoding: string | null;
|
|
25
|
-
redirectCount: number;
|
|
26
|
-
redirects: RedirectInfo[];
|
|
27
|
-
serverHeader: string | null;
|
|
28
|
-
headers: Record<string, string | string[] | undefined>;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
export interface CertificateInfo {
|
|
32
|
-
issuer: string;
|
|
33
|
-
subject: string;
|
|
34
|
-
validFrom: string;
|
|
35
|
-
validTo: string;
|
|
36
|
-
daysUntilExpiry: number;
|
|
37
|
-
isSelfSigned: boolean;
|
|
38
|
-
isValidChain: boolean; // basic check, relying on node tls rejectUnauthorized: true result if possible, or manual check
|
|
39
|
-
fingerprint: string;
|
|
40
|
-
serialNumber: string;
|
|
41
|
-
subjectAltName?: string;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
export interface RedirectInfo {
|
|
45
|
-
url: string;
|
|
46
|
-
statusCode: number;
|
|
47
|
-
location: string | null;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
export interface SecurityHeadersResult {
|
|
51
|
-
strictTransportSecurity: HeaderStatus;
|
|
52
|
-
contentSecurityPolicy: HeaderStatus;
|
|
53
|
-
xFrameOptions: HeaderStatus;
|
|
54
|
-
xContentTypeOptions: HeaderStatus;
|
|
55
|
-
referrerPolicy: HeaderStatus;
|
|
56
|
-
permissionsPolicy: HeaderStatus;
|
|
57
|
-
|
|
58
|
-
details: Record<string, string>; // raw values
|
|
59
|
-
score: number; // partial score contribution (0-100 normalized for headers section)
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
export interface HeaderStatus {
|
|
63
|
-
present: boolean;
|
|
64
|
-
value: string | null;
|
|
65
|
-
valid: boolean; // simple syntax check
|
|
66
|
-
issues?: string[];
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
export interface DnsDiagnostics {
|
|
70
|
-
a: string[];
|
|
71
|
-
aaaa: string[];
|
|
72
|
-
cname: string[];
|
|
73
|
-
reverse: string[];
|
|
74
|
-
ipCount: number;
|
|
75
|
-
ipv6Support: boolean;
|
|
76
|
-
resolutionTime: number;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
export interface PerformanceMetrics {
|
|
80
|
-
dnsLookupTime: number; // ms
|
|
81
|
-
tcpConnectTime: number; // ms
|
|
82
|
-
tlsHandshakeTime: number; // ms
|
|
83
|
-
ttfb: number; // ms
|
|
84
|
-
totalTime: number; // ms
|
|
85
|
-
htmlSize: number; // bytes
|
|
86
|
-
headerSize: number; // bytes
|
|
87
|
-
redirectTime?: number; // accumulated time spent in redirects
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
export interface AuditIssue {
|
|
91
|
-
id: string; // unique code for tests/filtering
|
|
92
|
-
severity: 'critical' | 'severe' | 'moderate' | 'minor' | 'info';
|
|
93
|
-
category: 'tls' | 'http' | 'headers' | 'dns' | 'performance';
|
|
94
|
-
message: string;
|
|
95
|
-
scorePenalty: number;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
export interface AuditOptions {
|
|
99
|
-
timeout?: number;
|
|
100
|
-
verbose?: boolean;
|
|
101
|
-
debug?: boolean;
|
|
102
|
-
}
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import { ProxyAgent } from 'undici';
|
|
2
|
-
|
|
3
|
-
export class ProxyAdapter {
|
|
4
|
-
private agent?: ProxyAgent;
|
|
5
|
-
|
|
6
|
-
constructor(proxyUrl?: string) {
|
|
7
|
-
if (proxyUrl) {
|
|
8
|
-
try {
|
|
9
|
-
// Validate URL
|
|
10
|
-
new URL(proxyUrl);
|
|
11
|
-
this.agent = new ProxyAgent(proxyUrl);
|
|
12
|
-
} catch {
|
|
13
|
-
throw new Error(`Invalid proxy URL: ${proxyUrl}`);
|
|
14
|
-
}
|
|
15
|
-
}
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
get dispatcher() {
|
|
19
|
-
return this.agent;
|
|
20
|
-
}
|
|
21
|
-
}
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
export class RateLimiter {
|
|
2
|
-
private buckets: Map<string, { tokens: number; lastRefill: number }> = new Map();
|
|
3
|
-
private rate: number; // tokens per second
|
|
4
|
-
|
|
5
|
-
constructor(rate: number = 2) {
|
|
6
|
-
this.rate = rate;
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
async waitForToken(host: string, crawlDelay: number = 0): Promise<void> {
|
|
10
|
-
const effectiveRate = crawlDelay > 0 ? Math.min(this.rate, 1 / crawlDelay) : this.rate;
|
|
11
|
-
const interval = 1000 / effectiveRate;
|
|
12
|
-
|
|
13
|
-
if (!this.buckets.has(host)) {
|
|
14
|
-
this.buckets.set(host, { tokens: this.rate - 1, lastRefill: Date.now() });
|
|
15
|
-
return;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
const bucket = this.buckets.get(host)!;
|
|
19
|
-
|
|
20
|
-
while (true) {
|
|
21
|
-
const now = Date.now();
|
|
22
|
-
const elapsed = now - bucket.lastRefill;
|
|
23
|
-
|
|
24
|
-
if (elapsed > 0) {
|
|
25
|
-
const newTokens = elapsed / interval;
|
|
26
|
-
bucket.tokens = Math.min(this.rate, bucket.tokens + newTokens);
|
|
27
|
-
bucket.lastRefill = now;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
if (bucket.tokens >= 1) {
|
|
31
|
-
bucket.tokens -= 1;
|
|
32
|
-
return;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
const waitTime = Math.max(0, interval - (Date.now() - bucket.lastRefill));
|
|
36
|
-
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
}
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
export class RedirectController {
|
|
2
|
-
private maxHops: number;
|
|
3
|
-
private currentHops: number = 0;
|
|
4
|
-
private history: Set<string> = new Set();
|
|
5
|
-
|
|
6
|
-
constructor(maxHops: number = 5, seedUrl?: string) {
|
|
7
|
-
this.maxHops = maxHops;
|
|
8
|
-
if (seedUrl) {
|
|
9
|
-
this.history.add(this.normalize(seedUrl));
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Records a hop and checks if it's within limits and not a loop.
|
|
15
|
-
* Returns null if allowed, or an error status string if blocked.
|
|
16
|
-
*/
|
|
17
|
-
nextHop(url: string): 'redirect_limit_exceeded' | 'redirect_loop' | null {
|
|
18
|
-
// Normalize URL for loop detection (basic)
|
|
19
|
-
const normalized = this.normalize(url);
|
|
20
|
-
|
|
21
|
-
if (this.history.has(normalized)) {
|
|
22
|
-
return 'redirect_loop';
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
if (this.currentHops >= this.maxHops) {
|
|
26
|
-
return 'redirect_limit_exceeded';
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
this.history.add(normalized);
|
|
30
|
-
this.currentHops++;
|
|
31
|
-
return null;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
get hops(): number {
|
|
35
|
-
return this.currentHops;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
private normalize(url: string): string {
|
|
39
|
-
try {
|
|
40
|
-
const u = new URL(url);
|
|
41
|
-
u.hash = ''; // Ignore hash for loop detection
|
|
42
|
-
return u.toString();
|
|
43
|
-
} catch {
|
|
44
|
-
return url;
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
}
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import { Readable } from 'stream';
|
|
2
|
-
|
|
3
|
-
export class ResponseLimiter {
|
|
4
|
-
static async streamToString(
|
|
5
|
-
stream: Readable,
|
|
6
|
-
maxBytes: number,
|
|
7
|
-
onOversized?: (bytes: number) => void
|
|
8
|
-
): Promise<string> {
|
|
9
|
-
return new Promise((resolve, reject) => {
|
|
10
|
-
let accumulated = 0;
|
|
11
|
-
const chunks: Buffer[] = [];
|
|
12
|
-
|
|
13
|
-
stream.on('data', (chunk: any) => {
|
|
14
|
-
const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
15
|
-
accumulated += buffer.length;
|
|
16
|
-
if (accumulated > maxBytes) {
|
|
17
|
-
stream.destroy();
|
|
18
|
-
if (onOversized) onOversized(accumulated);
|
|
19
|
-
reject(new Error('Oversized response'));
|
|
20
|
-
return;
|
|
21
|
-
}
|
|
22
|
-
chunks.push(buffer);
|
|
23
|
-
});
|
|
24
|
-
|
|
25
|
-
stream.on('end', () => {
|
|
26
|
-
resolve(Buffer.concat(chunks).toString('utf-8'));
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
stream.on('error', (err) => {
|
|
30
|
-
reject(err);
|
|
31
|
-
});
|
|
32
|
-
});
|
|
33
|
-
}
|
|
34
|
-
}
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
export interface RetryConfig {
|
|
2
|
-
maxRetries: number;
|
|
3
|
-
baseDelay: number;
|
|
4
|
-
}
|
|
5
|
-
|
|
6
|
-
export class RetryPolicy {
|
|
7
|
-
static DEFAULT_CONFIG: RetryConfig = {
|
|
8
|
-
maxRetries: 3,
|
|
9
|
-
baseDelay: 500
|
|
10
|
-
};
|
|
11
|
-
|
|
12
|
-
static async execute<T>(
|
|
13
|
-
operation: (attempt: number) => Promise<T>,
|
|
14
|
-
isRetryable: (error: any) => boolean,
|
|
15
|
-
config: RetryConfig = RetryPolicy.DEFAULT_CONFIG
|
|
16
|
-
): Promise<T> {
|
|
17
|
-
let lastError: any;
|
|
18
|
-
|
|
19
|
-
for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
|
|
20
|
-
try {
|
|
21
|
-
return await operation(attempt);
|
|
22
|
-
} catch (error) {
|
|
23
|
-
lastError = error;
|
|
24
|
-
|
|
25
|
-
if (attempt === config.maxRetries || !isRetryable(error)) {
|
|
26
|
-
throw error;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
const delay = config.baseDelay * Math.pow(2, attempt);
|
|
30
|
-
const jitter = delay * 0.1 * (Math.random() * 2 - 1);
|
|
31
|
-
const finalDelay = Math.max(0, delay + jitter);
|
|
32
|
-
|
|
33
|
-
await new Promise(resolve => setTimeout(resolve, finalDelay));
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
throw lastError;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
static isRetryableStatus(status: number): boolean {
|
|
41
|
-
return status === 429 || (status >= 500 && status <= 599);
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
static isNetworkError(error: any): boolean {
|
|
45
|
-
const code = error?.code || error?.cause?.code;
|
|
46
|
-
return [
|
|
47
|
-
'ETIMEDOUT',
|
|
48
|
-
'ECONNRESET',
|
|
49
|
-
'EADDRINUSE',
|
|
50
|
-
'ECONNREFUSED',
|
|
51
|
-
'EPIPE',
|
|
52
|
-
'ENOTFOUND',
|
|
53
|
-
'ENETUNREACH',
|
|
54
|
-
'EAI_AGAIN'
|
|
55
|
-
].includes(code);
|
|
56
|
-
}
|
|
57
|
-
}
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
export class DomainFilter {
|
|
2
|
-
private allowed: Set<string>;
|
|
3
|
-
private denied: Set<string>;
|
|
4
|
-
|
|
5
|
-
constructor(allowed: string[] = [], denied: string[] = []) {
|
|
6
|
-
this.allowed = new Set(allowed.map(d => this.normalize(d)));
|
|
7
|
-
this.denied = new Set(denied.map(d => this.normalize(d)));
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* Normalizes a hostname: lowercase, strip trailing dot.
|
|
12
|
-
* Note: We expect hostnames, not URLs.
|
|
13
|
-
*/
|
|
14
|
-
private normalize(hostname: string): string {
|
|
15
|
-
let h = hostname.toLowerCase().trim();
|
|
16
|
-
if (h.endsWith('.')) {
|
|
17
|
-
h = h.slice(0, -1);
|
|
18
|
-
}
|
|
19
|
-
// Use URL to handle punycode and basic validation if possible
|
|
20
|
-
try {
|
|
21
|
-
// We wrap it in a dummy URL to let the browser/node logic normalize it
|
|
22
|
-
const url = new URL(`http://${h}`);
|
|
23
|
-
return url.hostname;
|
|
24
|
-
} catch {
|
|
25
|
-
return h;
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
isAllowed(hostname: string): boolean {
|
|
30
|
-
const normalized = this.normalize(hostname);
|
|
31
|
-
|
|
32
|
-
// 1. Deny list match -> Reject
|
|
33
|
-
if (this.denied.has(normalized)) {
|
|
34
|
-
return false;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
// 2. Allow list not empty AND no match -> Reject
|
|
38
|
-
if (this.allowed.size > 0 && !this.allowed.has(normalized)) {
|
|
39
|
-
return false;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
// 3. Otherwise -> Allow
|
|
43
|
-
return true;
|
|
44
|
-
}
|
|
45
|
-
}
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
import { DomainFilter } from './domainFilter.js';
|
|
2
|
-
import { SubdomainPolicy } from './subdomainPolicy.js';
|
|
3
|
-
|
|
4
|
-
export interface ScopeOptions {
|
|
5
|
-
allowedDomains?: string[];
|
|
6
|
-
deniedDomains?: string[];
|
|
7
|
-
includeSubdomains?: boolean;
|
|
8
|
-
rootUrl: string;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export type EligibilityResult = 'allowed' | 'blocked_by_domain_filter' | 'blocked_subdomain';
|
|
12
|
-
|
|
13
|
-
export class ScopeManager {
|
|
14
|
-
private domainFilter: DomainFilter;
|
|
15
|
-
private subdomainPolicy: SubdomainPolicy;
|
|
16
|
-
private explicitAllowed: Set<string>;
|
|
17
|
-
|
|
18
|
-
constructor(options: ScopeOptions) {
|
|
19
|
-
this.domainFilter = new DomainFilter(options.allowedDomains, options.deniedDomains);
|
|
20
|
-
this.subdomainPolicy = new SubdomainPolicy(options.rootUrl, options.includeSubdomains);
|
|
21
|
-
this.explicitAllowed = new Set((options.allowedDomains || []).map(d => {
|
|
22
|
-
let h = d.toLowerCase().trim();
|
|
23
|
-
if (h.endsWith('.')) h = h.slice(0, -1);
|
|
24
|
-
return h;
|
|
25
|
-
}));
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
isUrlEligible(url: string): EligibilityResult {
|
|
29
|
-
let hostname: string;
|
|
30
|
-
try {
|
|
31
|
-
hostname = new URL(url).hostname.toLowerCase();
|
|
32
|
-
if (hostname.endsWith('.')) hostname = hostname.slice(0, -1);
|
|
33
|
-
} catch {
|
|
34
|
-
return 'blocked_by_domain_filter'; // Invalid URL is effectively blocked
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
if (!this.domainFilter.isAllowed(hostname)) {
|
|
38
|
-
return 'blocked_by_domain_filter';
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
// If explicit whitelist is used, and this domain is in it, allow it
|
|
42
|
-
if (this.explicitAllowed.has(hostname)) {
|
|
43
|
-
return 'allowed';
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
if (!this.subdomainPolicy.isAllowed(hostname)) {
|
|
47
|
-
return 'blocked_subdomain';
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
return 'allowed';
|
|
51
|
-
}
|
|
52
|
-
}
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
export class SubdomainPolicy {
|
|
2
|
-
private rootHost: string;
|
|
3
|
-
private includeSubdomains: boolean;
|
|
4
|
-
|
|
5
|
-
constructor(rootUrl: string, includeSubdomains: boolean = false) {
|
|
6
|
-
try {
|
|
7
|
-
this.rootHost = new URL(rootUrl).hostname.toLowerCase();
|
|
8
|
-
if (this.rootHost.endsWith('.')) {
|
|
9
|
-
this.rootHost = this.rootHost.slice(0, -1);
|
|
10
|
-
}
|
|
11
|
-
} catch {
|
|
12
|
-
this.rootHost = '';
|
|
13
|
-
}
|
|
14
|
-
this.includeSubdomains = includeSubdomains;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
isAllowed(hostname: string): boolean {
|
|
18
|
-
let target = hostname.toLowerCase().trim();
|
|
19
|
-
if (target.endsWith('.')) {
|
|
20
|
-
target = target.slice(0, -1);
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
// Exact match is always allowed if rootHost is set
|
|
24
|
-
if (target === this.rootHost) {
|
|
25
|
-
return true;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
if (!this.includeSubdomains) {
|
|
29
|
-
return false;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
// Label-based check for subdomains
|
|
33
|
-
// target must end with .rootHost
|
|
34
|
-
if (!target.endsWith(`.${this.rootHost}`)) {
|
|
35
|
-
return false;
|
|
36
|
-
}
|
|
37
|
-
return true;
|
|
38
|
-
}
|
|
39
|
-
}
|