@crawlith/core 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +70 -0
- package/dist/analysis/analysis_list.html +35 -0
- package/dist/analysis/analysis_page.html +123 -0
- package/dist/analysis/analyze.d.ts +40 -5
- package/dist/analysis/analyze.js +395 -347
- package/dist/analysis/clustering.d.ts +23 -0
- package/dist/analysis/clustering.js +206 -0
- package/dist/analysis/content.d.ts +1 -1
- package/dist/analysis/content.js +11 -5
- package/dist/analysis/duplicate.d.ts +34 -0
- package/dist/analysis/duplicate.js +305 -0
- package/dist/analysis/heading.d.ts +116 -0
- package/dist/analysis/heading.js +356 -0
- package/dist/analysis/images.d.ts +1 -1
- package/dist/analysis/images.js +6 -5
- package/dist/analysis/links.d.ts +1 -1
- package/dist/analysis/links.js +8 -8
- package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
- package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
- package/dist/analysis/scoring.js +11 -2
- package/dist/analysis/seo.d.ts +8 -4
- package/dist/analysis/seo.js +41 -30
- package/dist/analysis/soft404.d.ts +17 -0
- package/dist/analysis/soft404.js +62 -0
- package/dist/analysis/structuredData.d.ts +1 -1
- package/dist/analysis/structuredData.js +5 -4
- package/dist/analysis/templates.d.ts +2 -0
- package/dist/analysis/templates.js +7 -0
- package/dist/application/index.d.ts +2 -0
- package/dist/application/index.js +2 -0
- package/dist/application/usecase.d.ts +3 -0
- package/dist/application/usecase.js +1 -0
- package/dist/application/usecases.d.ts +114 -0
- package/dist/application/usecases.js +201 -0
- package/dist/audit/index.js +1 -1
- package/dist/audit/transport.d.ts +1 -1
- package/dist/audit/transport.js +5 -4
- package/dist/audit/types.d.ts +1 -0
- package/dist/constants.d.ts +17 -0
- package/dist/constants.js +23 -0
- package/dist/core/scope/scopeManager.js +3 -0
- package/dist/core/security/ipGuard.d.ts +11 -0
- package/dist/core/security/ipGuard.js +71 -3
- package/dist/crawler/crawl.d.ts +4 -22
- package/dist/crawler/crawl.js +4 -335
- package/dist/crawler/crawler.d.ts +87 -0
- package/dist/crawler/crawler.js +683 -0
- package/dist/crawler/extract.d.ts +4 -1
- package/dist/crawler/extract.js +7 -2
- package/dist/crawler/fetcher.d.ts +2 -1
- package/dist/crawler/fetcher.js +26 -11
- package/dist/crawler/metricsRunner.d.ts +23 -1
- package/dist/crawler/metricsRunner.js +202 -72
- package/dist/crawler/normalize.d.ts +41 -0
- package/dist/crawler/normalize.js +119 -3
- package/dist/crawler/parser.d.ts +1 -3
- package/dist/crawler/parser.js +2 -49
- package/dist/crawler/resolver.d.ts +11 -0
- package/dist/crawler/resolver.js +67 -0
- package/dist/crawler/sitemap.d.ts +6 -0
- package/dist/crawler/sitemap.js +27 -17
- package/dist/crawler/trap.d.ts +5 -1
- package/dist/crawler/trap.js +23 -2
- package/dist/db/CrawlithDB.d.ts +110 -0
- package/dist/db/CrawlithDB.js +500 -0
- package/dist/db/graphLoader.js +42 -30
- package/dist/db/index.d.ts +11 -0
- package/dist/db/index.js +41 -29
- package/dist/db/migrations.d.ts +2 -0
- package/dist/db/{schema.js → migrations.js} +90 -43
- package/dist/db/pluginRegistry.d.ts +9 -0
- package/dist/db/pluginRegistry.js +19 -0
- package/dist/db/repositories/EdgeRepository.d.ts +13 -0
- package/dist/db/repositories/EdgeRepository.js +20 -0
- package/dist/db/repositories/MetricsRepository.d.ts +16 -8
- package/dist/db/repositories/MetricsRepository.js +28 -7
- package/dist/db/repositories/PageRepository.d.ts +15 -2
- package/dist/db/repositories/PageRepository.js +169 -25
- package/dist/db/repositories/SiteRepository.d.ts +9 -0
- package/dist/db/repositories/SiteRepository.js +13 -0
- package/dist/db/repositories/SnapshotRepository.d.ts +14 -5
- package/dist/db/repositories/SnapshotRepository.js +64 -5
- package/dist/db/reset.d.ts +9 -0
- package/dist/db/reset.js +32 -0
- package/dist/db/statements.d.ts +12 -0
- package/dist/db/statements.js +40 -0
- package/dist/diff/compare.d.ts +0 -5
- package/dist/diff/compare.js +0 -12
- package/dist/diff/service.d.ts +16 -0
- package/dist/diff/service.js +41 -0
- package/dist/domain/index.d.ts +4 -0
- package/dist/domain/index.js +4 -0
- package/dist/events.d.ts +56 -0
- package/dist/events.js +1 -0
- package/dist/graph/graph.d.ts +36 -42
- package/dist/graph/graph.js +26 -17
- package/dist/graph/hits.d.ts +23 -0
- package/dist/graph/hits.js +111 -0
- package/dist/graph/metrics.d.ts +0 -4
- package/dist/graph/metrics.js +25 -9
- package/dist/graph/pagerank.d.ts +17 -4
- package/dist/graph/pagerank.js +126 -91
- package/dist/graph/simhash.d.ts +6 -0
- package/dist/graph/simhash.js +14 -0
- package/dist/index.d.ts +29 -8
- package/dist/index.js +29 -8
- package/dist/lock/hashKey.js +1 -1
- package/dist/lock/lockManager.d.ts +5 -1
- package/dist/lock/lockManager.js +38 -13
- package/dist/plugin-system/plugin-cli.d.ts +10 -0
- package/dist/plugin-system/plugin-cli.js +31 -0
- package/dist/plugin-system/plugin-config.d.ts +16 -0
- package/dist/plugin-system/plugin-config.js +36 -0
- package/dist/plugin-system/plugin-loader.d.ts +17 -0
- package/dist/plugin-system/plugin-loader.js +122 -0
- package/dist/plugin-system/plugin-registry.d.ts +25 -0
- package/dist/plugin-system/plugin-registry.js +167 -0
- package/dist/plugin-system/plugin-types.d.ts +205 -0
- package/dist/plugin-system/plugin-types.js +1 -0
- package/dist/ports/index.d.ts +9 -0
- package/dist/ports/index.js +1 -0
- package/{src/report/sitegraph_template.ts → dist/report/crawl.html} +330 -81
- package/dist/report/crawlExport.d.ts +3 -0
- package/dist/report/{sitegraphExport.js → crawlExport.js} +3 -3
- package/dist/report/crawl_template.d.ts +1 -0
- package/dist/report/crawl_template.js +7 -0
- package/dist/report/export.d.ts +3 -0
- package/dist/report/export.js +81 -0
- package/dist/report/html.js +15 -216
- package/dist/report/insight.d.ts +27 -0
- package/dist/report/insight.js +103 -0
- package/dist/scoring/health.d.ts +56 -0
- package/dist/scoring/health.js +213 -0
- package/dist/utils/chalk.d.ts +6 -0
- package/dist/utils/chalk.js +41 -0
- package/dist/utils/secureConfig.d.ts +23 -0
- package/dist/utils/secureConfig.js +128 -0
- package/package.json +12 -6
- package/CHANGELOG.md +0 -7
- package/dist/db/schema.d.ts +0 -2
- package/dist/graph/cluster.d.ts +0 -6
- package/dist/graph/cluster.js +0 -173
- package/dist/graph/duplicate.d.ts +0 -10
- package/dist/graph/duplicate.js +0 -251
- package/dist/report/sitegraphExport.d.ts +0 -3
- package/dist/report/sitegraph_template.d.ts +0 -1
- package/dist/report/sitegraph_template.js +0 -630
- package/dist/scoring/hits.d.ts +0 -9
- package/dist/scoring/hits.js +0 -111
- package/src/analysis/analyze.ts +0 -548
- package/src/analysis/content.ts +0 -62
- package/src/analysis/images.ts +0 -28
- package/src/analysis/links.ts +0 -41
- package/src/analysis/scoring.ts +0 -59
- package/src/analysis/seo.ts +0 -82
- package/src/analysis/structuredData.ts +0 -62
- package/src/audit/dns.ts +0 -49
- package/src/audit/headers.ts +0 -98
- package/src/audit/index.ts +0 -66
- package/src/audit/scoring.ts +0 -232
- package/src/audit/transport.ts +0 -258
- package/src/audit/types.ts +0 -102
- package/src/core/network/proxyAdapter.ts +0 -21
- package/src/core/network/rateLimiter.ts +0 -39
- package/src/core/network/redirectController.ts +0 -47
- package/src/core/network/responseLimiter.ts +0 -34
- package/src/core/network/retryPolicy.ts +0 -57
- package/src/core/scope/domainFilter.ts +0 -45
- package/src/core/scope/scopeManager.ts +0 -52
- package/src/core/scope/subdomainPolicy.ts +0 -39
- package/src/core/security/ipGuard.ts +0 -92
- package/src/crawler/crawl.ts +0 -382
- package/src/crawler/extract.ts +0 -34
- package/src/crawler/fetcher.ts +0 -233
- package/src/crawler/metricsRunner.ts +0 -124
- package/src/crawler/normalize.ts +0 -108
- package/src/crawler/parser.ts +0 -190
- package/src/crawler/sitemap.ts +0 -73
- package/src/crawler/trap.ts +0 -96
- package/src/db/graphLoader.ts +0 -105
- package/src/db/index.ts +0 -70
- package/src/db/repositories/EdgeRepository.ts +0 -29
- package/src/db/repositories/MetricsRepository.ts +0 -49
- package/src/db/repositories/PageRepository.ts +0 -128
- package/src/db/repositories/SiteRepository.ts +0 -32
- package/src/db/repositories/SnapshotRepository.ts +0 -74
- package/src/db/schema.ts +0 -177
- package/src/diff/compare.ts +0 -84
- package/src/graph/cluster.ts +0 -192
- package/src/graph/duplicate.ts +0 -286
- package/src/graph/graph.ts +0 -172
- package/src/graph/metrics.ts +0 -110
- package/src/graph/pagerank.ts +0 -125
- package/src/graph/simhash.ts +0 -61
- package/src/index.ts +0 -30
- package/src/lock/hashKey.ts +0 -51
- package/src/lock/lockManager.ts +0 -124
- package/src/lock/pidCheck.ts +0 -13
- package/src/report/html.ts +0 -227
- package/src/report/sitegraphExport.ts +0 -58
- package/src/scoring/hits.ts +0 -131
- package/src/scoring/orphanSeverity.ts +0 -176
- package/src/utils/version.ts +0 -18
- package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
- package/tests/analysis.unit.test.ts +0 -98
- package/tests/analyze.integration.test.ts +0 -98
- package/tests/audit/dns.test.ts +0 -31
- package/tests/audit/headers.test.ts +0 -45
- package/tests/audit/scoring.test.ts +0 -133
- package/tests/audit/security.test.ts +0 -12
- package/tests/audit/transport.test.ts +0 -112
- package/tests/clustering.test.ts +0 -118
- package/tests/crawler.test.ts +0 -358
- package/tests/db.test.ts +0 -159
- package/tests/diff.test.ts +0 -67
- package/tests/duplicate.test.ts +0 -110
- package/tests/fetcher.test.ts +0 -106
- package/tests/fetcher_safety.test.ts +0 -85
- package/tests/fixtures/analyze-crawl.json +0 -26
- package/tests/hits.test.ts +0 -134
- package/tests/html_report.test.ts +0 -58
- package/tests/lock/lockManager.test.ts +0 -138
- package/tests/metrics.test.ts +0 -196
- package/tests/normalize.test.ts +0 -101
- package/tests/orphanSeverity.test.ts +0 -160
- package/tests/pagerank.test.ts +0 -98
- package/tests/parser.test.ts +0 -117
- package/tests/proxy_safety.test.ts +0 -57
- package/tests/redirect_safety.test.ts +0 -73
- package/tests/safety.test.ts +0 -114
- package/tests/scope.test.ts +0 -66
- package/tests/scoring.test.ts +0 -59
- package/tests/sitemap.test.ts +0 -88
- package/tests/soft404.test.ts +0 -41
- package/tests/trap.test.ts +0 -39
- package/tests/visualization_data.test.ts +0 -46
- package/tsconfig.json +0 -11
package/src/audit/scoring.ts
DELETED
|
@@ -1,232 +0,0 @@
|
|
|
1
|
-
/* eslint-disable no-useless-assignment */
|
|
2
|
-
import { TransportDiagnostics, DnsDiagnostics, SecurityHeadersResult, PerformanceMetrics, AuditIssue } from './types.js';
|
|
3
|
-
|
|
4
|
-
interface CategoryScores {
|
|
5
|
-
transport: number;
|
|
6
|
-
security: number;
|
|
7
|
-
performance: number;
|
|
8
|
-
infrastructure: number;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export function calculateScore(
|
|
12
|
-
transport: TransportDiagnostics,
|
|
13
|
-
dns: DnsDiagnostics,
|
|
14
|
-
headers: SecurityHeadersResult,
|
|
15
|
-
performance: PerformanceMetrics,
|
|
16
|
-
existingIssues: AuditIssue[]
|
|
17
|
-
): { score: number; grade: 'A' | 'B' | 'C' | 'D' | 'F'; issues: AuditIssue[]; categoryScores: CategoryScores } {
|
|
18
|
-
|
|
19
|
-
const issues: AuditIssue[] = [...existingIssues];
|
|
20
|
-
let transportScore = 0; // Max 30
|
|
21
|
-
let securityScore = 0; // Max 20
|
|
22
|
-
let performanceScore = 0; // Max 30
|
|
23
|
-
let infrastructureScore = 0; // Max 20
|
|
24
|
-
|
|
25
|
-
// 1. Transport Security (30 pts)
|
|
26
|
-
// TLS Version
|
|
27
|
-
if (transport.tlsVersion) {
|
|
28
|
-
const version = parseFloat(transport.tlsVersion.replace('v', '').replace('TLS', '').trim());
|
|
29
|
-
if (version >= 1.2) {
|
|
30
|
-
transportScore += 15;
|
|
31
|
-
} else {
|
|
32
|
-
issues.push({
|
|
33
|
-
id: 'tls-old',
|
|
34
|
-
severity: 'severe',
|
|
35
|
-
category: 'tls',
|
|
36
|
-
message: `Deprecated TLS version: ${transport.tlsVersion}`,
|
|
37
|
-
scorePenalty: 15
|
|
38
|
-
});
|
|
39
|
-
}
|
|
40
|
-
} else if (transport.certificate) {
|
|
41
|
-
// HTTPS but no version detected? Unlikely.
|
|
42
|
-
} else {
|
|
43
|
-
// HTTP only?
|
|
44
|
-
issues.push({
|
|
45
|
-
id: 'no-https',
|
|
46
|
-
severity: 'critical',
|
|
47
|
-
category: 'tls',
|
|
48
|
-
message: 'Site is not using HTTPS',
|
|
49
|
-
scorePenalty: 30
|
|
50
|
-
});
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
// Certificate
|
|
54
|
-
if (transport.certificate) {
|
|
55
|
-
if (transport.certificate.isValidChain && !transport.certificate.isSelfSigned) {
|
|
56
|
-
transportScore += 15;
|
|
57
|
-
} else {
|
|
58
|
-
// Already caught in transport.ts, but let's ensure score reflects it
|
|
59
|
-
// If issues has cert-invalid, we don't add points.
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
if (transport.certificate.daysUntilExpiry < 30 && transport.certificate.daysUntilExpiry >= 0) {
|
|
63
|
-
issues.push({
|
|
64
|
-
id: 'cert-expiring-soon',
|
|
65
|
-
severity: 'moderate',
|
|
66
|
-
category: 'tls',
|
|
67
|
-
message: `Certificate expires in ${transport.certificate.daysUntilExpiry} days`,
|
|
68
|
-
scorePenalty: 5
|
|
69
|
-
});
|
|
70
|
-
// Penalty applied to transport score logic implicitly by not reaching max,
|
|
71
|
-
// but here we are adding up points.
|
|
72
|
-
// Let's deduct from the 15 points we might have given.
|
|
73
|
-
transportScore -= 5;
|
|
74
|
-
} else if (transport.certificate.daysUntilExpiry < 0) {
|
|
75
|
-
issues.push({
|
|
76
|
-
id: 'cert-expired',
|
|
77
|
-
severity: 'critical',
|
|
78
|
-
category: 'tls',
|
|
79
|
-
message: `Certificate expired on ${transport.certificate.validTo}`,
|
|
80
|
-
scorePenalty: 30
|
|
81
|
-
});
|
|
82
|
-
transportScore = 0; // Reset transport score
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
// 2. Response Security (Headers) (20 pts)
|
|
87
|
-
// headers.score is 0-100. Map to 0-20.
|
|
88
|
-
securityScore = (headers.score / 100) * 20;
|
|
89
|
-
|
|
90
|
-
// Add issues for missing critical headers
|
|
91
|
-
if (!headers.strictTransportSecurity.present) {
|
|
92
|
-
issues.push({
|
|
93
|
-
id: 'hsts-missing',
|
|
94
|
-
severity: 'moderate',
|
|
95
|
-
category: 'headers',
|
|
96
|
-
message: 'Missing Strict-Transport-Security header',
|
|
97
|
-
scorePenalty: 5
|
|
98
|
-
});
|
|
99
|
-
}
|
|
100
|
-
if (!headers.contentSecurityPolicy.present) {
|
|
101
|
-
issues.push({
|
|
102
|
-
id: 'csp-missing',
|
|
103
|
-
severity: 'moderate',
|
|
104
|
-
category: 'headers',
|
|
105
|
-
message: 'Missing Content-Security-Policy header',
|
|
106
|
-
scorePenalty: 5
|
|
107
|
-
});
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
// 3. Performance (30 pts)
|
|
111
|
-
// HTTP/2 (5 pts)
|
|
112
|
-
if (transport.alpnProtocol === 'h2' || transport.httpVersion === '2.0') {
|
|
113
|
-
performanceScore += 5;
|
|
114
|
-
} else {
|
|
115
|
-
issues.push({
|
|
116
|
-
id: 'no-h2',
|
|
117
|
-
severity: 'minor',
|
|
118
|
-
category: 'performance',
|
|
119
|
-
message: 'HTTP/2 not supported',
|
|
120
|
-
scorePenalty: 5
|
|
121
|
-
});
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
// Compression (5 pts)
|
|
125
|
-
if (transport.compression.length > 0) {
|
|
126
|
-
performanceScore += 5;
|
|
127
|
-
} else {
|
|
128
|
-
issues.push({
|
|
129
|
-
id: 'no-compression',
|
|
130
|
-
severity: 'moderate',
|
|
131
|
-
category: 'performance',
|
|
132
|
-
message: 'No compression enabled (gzip/br)',
|
|
133
|
-
scorePenalty: 5
|
|
134
|
-
});
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
// TTFB (10 pts)
|
|
138
|
-
if (performance.ttfb < 800) {
|
|
139
|
-
performanceScore += 10;
|
|
140
|
-
} else {
|
|
141
|
-
issues.push({
|
|
142
|
-
id: 'slow-ttfb',
|
|
143
|
-
severity: 'moderate',
|
|
144
|
-
category: 'performance',
|
|
145
|
-
message: `Slow TTFB: ${performance.ttfb.toFixed(0)}ms`,
|
|
146
|
-
scorePenalty: 10
|
|
147
|
-
});
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
// Redirects (5 pts)
|
|
151
|
-
if (transport.redirectCount <= 3) {
|
|
152
|
-
performanceScore += 5;
|
|
153
|
-
} else {
|
|
154
|
-
issues.push({
|
|
155
|
-
id: 'too-many-redirects',
|
|
156
|
-
severity: 'moderate',
|
|
157
|
-
category: 'performance',
|
|
158
|
-
message: `Too many redirects: ${transport.redirectCount}`,
|
|
159
|
-
scorePenalty: 5
|
|
160
|
-
});
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
// HTML Size (5 pts)
|
|
164
|
-
if (performance.htmlSize < 1024 * 1024) { // 1MB
|
|
165
|
-
performanceScore += 5;
|
|
166
|
-
} else {
|
|
167
|
-
issues.push({
|
|
168
|
-
id: 'large-html',
|
|
169
|
-
severity: 'minor',
|
|
170
|
-
category: 'performance',
|
|
171
|
-
message: `HTML size > 1MB: ${(performance.htmlSize / 1024 / 1024).toFixed(2)}MB`,
|
|
172
|
-
scorePenalty: 5
|
|
173
|
-
});
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
// 4. Infrastructure (20 pts)
|
|
177
|
-
// IPv6 (10 pts)
|
|
178
|
-
if (dns.ipv6Support) {
|
|
179
|
-
infrastructureScore += 10;
|
|
180
|
-
} else {
|
|
181
|
-
issues.push({
|
|
182
|
-
id: 'no-ipv6',
|
|
183
|
-
severity: 'minor',
|
|
184
|
-
category: 'dns',
|
|
185
|
-
message: 'No IPv6 DNS records found',
|
|
186
|
-
scorePenalty: 5
|
|
187
|
-
});
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
// Redundancy (10 pts)
|
|
191
|
-
if (dns.ipCount > 1) {
|
|
192
|
-
infrastructureScore += 10;
|
|
193
|
-
} else {
|
|
194
|
-
issues.push({
|
|
195
|
-
id: 'single-ip',
|
|
196
|
-
severity: 'minor',
|
|
197
|
-
category: 'dns',
|
|
198
|
-
message: 'Single IP address detected (no redundancy)',
|
|
199
|
-
scorePenalty: 5
|
|
200
|
-
});
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
let totalScore = transportScore + securityScore + performanceScore + infrastructureScore;
|
|
204
|
-
|
|
205
|
-
// Critical Overrides
|
|
206
|
-
const criticalIssues = issues.filter(i => i.severity === 'critical');
|
|
207
|
-
if (criticalIssues.length > 0) {
|
|
208
|
-
totalScore = Math.min(totalScore, 39); // Cap at F (<40)
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
const grade = getGrade(totalScore);
|
|
212
|
-
|
|
213
|
-
return {
|
|
214
|
-
score: Math.round(totalScore),
|
|
215
|
-
grade,
|
|
216
|
-
issues,
|
|
217
|
-
categoryScores: {
|
|
218
|
-
transport: transportScore,
|
|
219
|
-
security: securityScore,
|
|
220
|
-
performance: performanceScore,
|
|
221
|
-
infrastructure: infrastructureScore
|
|
222
|
-
}
|
|
223
|
-
};
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
function getGrade(score: number): 'A' | 'B' | 'C' | 'D' | 'F' {
|
|
227
|
-
if (score >= 90) return 'A';
|
|
228
|
-
if (score >= 75) return 'B';
|
|
229
|
-
if (score >= 60) return 'C';
|
|
230
|
-
if (score >= 40) return 'D';
|
|
231
|
-
return 'F';
|
|
232
|
-
}
|
package/src/audit/transport.ts
DELETED
|
@@ -1,258 +0,0 @@
|
|
|
1
|
-
import https from 'node:https';
|
|
2
|
-
import http from 'node:http';
|
|
3
|
-
import tls from 'node:tls';
|
|
4
|
-
import { URL } from 'node:url';
|
|
5
|
-
import { IPGuard } from '../core/security/ipGuard.js';
|
|
6
|
-
import { TransportDiagnostics, PerformanceMetrics, CertificateInfo, RedirectInfo, AuditIssue } from './types.js';
|
|
7
|
-
import { IncomingMessage } from 'node:http';
|
|
8
|
-
|
|
9
|
-
interface RequestResult {
|
|
10
|
-
url: string;
|
|
11
|
-
response: IncomingMessage;
|
|
12
|
-
body: Buffer;
|
|
13
|
-
timings: {
|
|
14
|
-
dns: number;
|
|
15
|
-
tcp: number;
|
|
16
|
-
tls: number;
|
|
17
|
-
ttfb: number;
|
|
18
|
-
total: number;
|
|
19
|
-
};
|
|
20
|
-
socket: any;
|
|
21
|
-
redirectUrl: string | null;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export async function analyzeTransport(targetUrl: string, timeout: number): Promise<{
|
|
25
|
-
transport: TransportDiagnostics;
|
|
26
|
-
performance: PerformanceMetrics;
|
|
27
|
-
issues: AuditIssue[];
|
|
28
|
-
}> {
|
|
29
|
-
const maxRedirects = 10;
|
|
30
|
-
let currentUrl = targetUrl;
|
|
31
|
-
let redirectCount = 0;
|
|
32
|
-
const redirects: RedirectInfo[] = [];
|
|
33
|
-
const issues: AuditIssue[] = [];
|
|
34
|
-
|
|
35
|
-
// Cumulative metrics
|
|
36
|
-
let totalRedirectTime = 0;
|
|
37
|
-
|
|
38
|
-
for (let i = 0; i < maxRedirects; i++) {
|
|
39
|
-
const urlObj = new URL(currentUrl);
|
|
40
|
-
const isSafe = await IPGuard.validateHost(urlObj.hostname);
|
|
41
|
-
if (!isSafe) {
|
|
42
|
-
throw new Error(`Blocked: Redirect to internal/private IP prohibited (${currentUrl})`);
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
try {
|
|
46
|
-
const result = await executeRequest(currentUrl, timeout);
|
|
47
|
-
|
|
48
|
-
if (result.redirectUrl) {
|
|
49
|
-
redirectCount++;
|
|
50
|
-
totalRedirectTime += result.timings.total;
|
|
51
|
-
|
|
52
|
-
redirects.push({
|
|
53
|
-
url: currentUrl,
|
|
54
|
-
statusCode: result.response.statusCode || 0,
|
|
55
|
-
location: result.redirectUrl
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
currentUrl = result.redirectUrl;
|
|
59
|
-
continue;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
// Final destination reached
|
|
63
|
-
const { response, body, timings, socket } = result;
|
|
64
|
-
|
|
65
|
-
// Collect Certificate Info
|
|
66
|
-
let certInfo: CertificateInfo | null = null;
|
|
67
|
-
let tlsVersion: string | null = null;
|
|
68
|
-
let cipherSuite: string | null = null;
|
|
69
|
-
let alpnProtocol: string | null = null;
|
|
70
|
-
|
|
71
|
-
if (socket instanceof tls.TLSSocket) {
|
|
72
|
-
const cert = socket.getPeerCertificate(true);
|
|
73
|
-
tlsVersion = socket.getProtocol();
|
|
74
|
-
const cipher = socket.getCipher();
|
|
75
|
-
cipherSuite = cipher ? cipher.name : null;
|
|
76
|
-
alpnProtocol = socket.alpnProtocol || null;
|
|
77
|
-
|
|
78
|
-
if (cert && Object.keys(cert).length > 0) {
|
|
79
|
-
certInfo = {
|
|
80
|
-
subject: (cert.subject && cert.subject.CN) ? cert.subject.CN : 'Unknown',
|
|
81
|
-
issuer: (cert.issuer && cert.issuer.CN) ? cert.issuer.CN : 'Unknown',
|
|
82
|
-
validFrom: cert.valid_from,
|
|
83
|
-
validTo: cert.valid_to,
|
|
84
|
-
daysUntilExpiry: Math.floor((new Date(cert.valid_to).getTime() - Date.now()) / (1000 * 60 * 60 * 24)),
|
|
85
|
-
isSelfSigned: cert.issuer && cert.subject && cert.issuer.CN === cert.subject.CN,
|
|
86
|
-
isValidChain: socket.authorized,
|
|
87
|
-
fingerprint: cert.fingerprint,
|
|
88
|
-
serialNumber: cert.serialNumber,
|
|
89
|
-
subjectAltName: cert.subjectaltname
|
|
90
|
-
};
|
|
91
|
-
|
|
92
|
-
if (!socket.authorized) {
|
|
93
|
-
issues.push({
|
|
94
|
-
id: 'cert-invalid',
|
|
95
|
-
severity: 'severe',
|
|
96
|
-
category: 'tls',
|
|
97
|
-
message: `Certificate validation failed: ${socket.authorizationError}`,
|
|
98
|
-
scorePenalty: 30
|
|
99
|
-
});
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
const httpVersion = response.httpVersion;
|
|
105
|
-
const contentEncoding = response.headers['content-encoding'];
|
|
106
|
-
const compression: string[] = [];
|
|
107
|
-
if (contentEncoding) {
|
|
108
|
-
compression.push(contentEncoding);
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
const connectionHeader = response.headers['connection'];
|
|
112
|
-
const keepAlive = connectionHeader ? connectionHeader.toLowerCase() !== 'close' : true;
|
|
113
|
-
const serverHeader = (response.headers['server'] as string) || null;
|
|
114
|
-
|
|
115
|
-
const headerText = `HTTP/${response.httpVersion} ${response.statusCode} ${response.statusMessage}\r\n` +
|
|
116
|
-
Object.entries(response.headers).map(([k, v]) => `${k}: ${v}`).join('\r\n') +
|
|
117
|
-
'\r\n\r\n';
|
|
118
|
-
const headerSize = Buffer.byteLength(headerText);
|
|
119
|
-
const htmlSize = body.length;
|
|
120
|
-
|
|
121
|
-
const transport: TransportDiagnostics = {
|
|
122
|
-
tlsVersion,
|
|
123
|
-
cipherSuite,
|
|
124
|
-
alpnProtocol: alpnProtocol || (httpVersion === '2.0' ? 'h2' : 'http/1.1'),
|
|
125
|
-
certificate: certInfo,
|
|
126
|
-
httpVersion,
|
|
127
|
-
compression,
|
|
128
|
-
keepAlive,
|
|
129
|
-
transferEncoding: (response.headers['transfer-encoding'] as string) || null,
|
|
130
|
-
redirectCount,
|
|
131
|
-
redirects,
|
|
132
|
-
serverHeader,
|
|
133
|
-
headers: response.headers
|
|
134
|
-
};
|
|
135
|
-
|
|
136
|
-
const performance: PerformanceMetrics = {
|
|
137
|
-
dnsLookupTime: timings.dns,
|
|
138
|
-
tcpConnectTime: timings.tcp,
|
|
139
|
-
tlsHandshakeTime: timings.tls,
|
|
140
|
-
ttfb: timings.ttfb,
|
|
141
|
-
totalTime: timings.total + totalRedirectTime,
|
|
142
|
-
htmlSize,
|
|
143
|
-
headerSize,
|
|
144
|
-
redirectTime: totalRedirectTime
|
|
145
|
-
};
|
|
146
|
-
|
|
147
|
-
return { transport, performance, issues };
|
|
148
|
-
|
|
149
|
-
} catch (error: any) {
|
|
150
|
-
throw new Error(`Transport analysis failed for ${currentUrl}: ${error.message}`, { cause: error });
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
throw new Error(`Too many redirects (limit: ${maxRedirects})`);
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
function executeRequest(urlStr: string, timeout: number): Promise<RequestResult> {
|
|
158
|
-
return new Promise((resolve, reject) => {
|
|
159
|
-
let url: URL;
|
|
160
|
-
try {
|
|
161
|
-
url = new URL(urlStr);
|
|
162
|
-
} catch (_e) {
|
|
163
|
-
return reject(new Error(`Invalid URL: ${urlStr}`));
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
const isHttps = url.protocol === 'https:';
|
|
167
|
-
const requestModule = isHttps ? https : http;
|
|
168
|
-
|
|
169
|
-
const timings = {
|
|
170
|
-
dns: 0,
|
|
171
|
-
tcp: 0,
|
|
172
|
-
tls: 0,
|
|
173
|
-
ttfb: 0,
|
|
174
|
-
total: 0
|
|
175
|
-
};
|
|
176
|
-
|
|
177
|
-
const t0 = performance.now();
|
|
178
|
-
let tDNS = t0;
|
|
179
|
-
let tTCP = t0;
|
|
180
|
-
let tTLS = t0;
|
|
181
|
-
let tReqSent = 0;
|
|
182
|
-
|
|
183
|
-
// We use agent: false to force new connection for accurate timing
|
|
184
|
-
const options = {
|
|
185
|
-
method: 'GET',
|
|
186
|
-
timeout,
|
|
187
|
-
rejectUnauthorized: false,
|
|
188
|
-
agent: false,
|
|
189
|
-
headers: {
|
|
190
|
-
'User-Agent': 'Crawlith/Audit',
|
|
191
|
-
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
192
|
-
'Accept-Encoding': 'gzip, deflate, br'
|
|
193
|
-
}
|
|
194
|
-
};
|
|
195
|
-
|
|
196
|
-
const req = requestModule.request(url, options, (res) => {
|
|
197
|
-
// TTFB: Time from request sent to first byte of headers received
|
|
198
|
-
timings.ttfb = performance.now() - (tReqSent || t0);
|
|
199
|
-
|
|
200
|
-
const chunks: Buffer[] = [];
|
|
201
|
-
res.on('data', (chunk) => chunks.push(chunk));
|
|
202
|
-
res.on('end', () => {
|
|
203
|
-
timings.total = performance.now() - t0;
|
|
204
|
-
const body = Buffer.concat(chunks);
|
|
205
|
-
|
|
206
|
-
let redirectUrl: string | null = null;
|
|
207
|
-
if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
208
|
-
try {
|
|
209
|
-
redirectUrl = new URL(res.headers.location, urlStr).toString();
|
|
210
|
-
} catch (_e) {
|
|
211
|
-
// Ignore invalid redirect
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
resolve({
|
|
216
|
-
url: urlStr,
|
|
217
|
-
response: res,
|
|
218
|
-
body,
|
|
219
|
-
timings,
|
|
220
|
-
socket: res.socket,
|
|
221
|
-
redirectUrl
|
|
222
|
-
});
|
|
223
|
-
});
|
|
224
|
-
});
|
|
225
|
-
|
|
226
|
-
req.on('socket', (socket) => {
|
|
227
|
-
socket.on('lookup', () => {
|
|
228
|
-
tDNS = performance.now();
|
|
229
|
-
timings.dns = tDNS - t0;
|
|
230
|
-
});
|
|
231
|
-
socket.on('connect', () => {
|
|
232
|
-
tTCP = performance.now();
|
|
233
|
-
if (timings.dns === 0 && tDNS === t0) {
|
|
234
|
-
// No lookup event
|
|
235
|
-
timings.dns = 0;
|
|
236
|
-
tDNS = t0;
|
|
237
|
-
}
|
|
238
|
-
timings.tcp = tTCP - tDNS;
|
|
239
|
-
});
|
|
240
|
-
socket.on('secureConnect', () => {
|
|
241
|
-
tTLS = performance.now();
|
|
242
|
-
timings.tls = tTLS - tTCP;
|
|
243
|
-
});
|
|
244
|
-
});
|
|
245
|
-
|
|
246
|
-
req.on('finish', () => {
|
|
247
|
-
tReqSent = performance.now();
|
|
248
|
-
});
|
|
249
|
-
|
|
250
|
-
req.on('error', (err) => reject(err));
|
|
251
|
-
req.on('timeout', () => {
|
|
252
|
-
req.destroy();
|
|
253
|
-
reject(new Error('Request timed out'));
|
|
254
|
-
});
|
|
255
|
-
|
|
256
|
-
req.end();
|
|
257
|
-
});
|
|
258
|
-
}
|
package/src/audit/types.ts
DELETED
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
export interface AuditResult {
|
|
3
|
-
url: string;
|
|
4
|
-
transport: TransportDiagnostics;
|
|
5
|
-
securityHeaders: SecurityHeadersResult;
|
|
6
|
-
dns: DnsDiagnostics;
|
|
7
|
-
performance: PerformanceMetrics;
|
|
8
|
-
score: number;
|
|
9
|
-
grade: 'A' | 'B' | 'C' | 'D' | 'F';
|
|
10
|
-
issues: AuditIssue[];
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
export interface TransportDiagnostics {
|
|
14
|
-
// TLS / SSL
|
|
15
|
-
tlsVersion: string | null;
|
|
16
|
-
cipherSuite: string | null;
|
|
17
|
-
alpnProtocol: string | null; // http/1.1, h2
|
|
18
|
-
certificate: CertificateInfo | null;
|
|
19
|
-
|
|
20
|
-
// HTTP Protocol
|
|
21
|
-
httpVersion: string;
|
|
22
|
-
compression: string[]; // gzip, br, deflate
|
|
23
|
-
keepAlive: boolean;
|
|
24
|
-
transferEncoding: string | null;
|
|
25
|
-
redirectCount: number;
|
|
26
|
-
redirects: RedirectInfo[];
|
|
27
|
-
serverHeader: string | null;
|
|
28
|
-
headers: Record<string, string | string[] | undefined>;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
export interface CertificateInfo {
|
|
32
|
-
issuer: string;
|
|
33
|
-
subject: string;
|
|
34
|
-
validFrom: string;
|
|
35
|
-
validTo: string;
|
|
36
|
-
daysUntilExpiry: number;
|
|
37
|
-
isSelfSigned: boolean;
|
|
38
|
-
isValidChain: boolean; // basic check, relying on node tls rejectUnauthorized: true result if possible, or manual check
|
|
39
|
-
fingerprint: string;
|
|
40
|
-
serialNumber: string;
|
|
41
|
-
subjectAltName?: string;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
export interface RedirectInfo {
|
|
45
|
-
url: string;
|
|
46
|
-
statusCode: number;
|
|
47
|
-
location: string | null;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
export interface SecurityHeadersResult {
|
|
51
|
-
strictTransportSecurity: HeaderStatus;
|
|
52
|
-
contentSecurityPolicy: HeaderStatus;
|
|
53
|
-
xFrameOptions: HeaderStatus;
|
|
54
|
-
xContentTypeOptions: HeaderStatus;
|
|
55
|
-
referrerPolicy: HeaderStatus;
|
|
56
|
-
permissionsPolicy: HeaderStatus;
|
|
57
|
-
|
|
58
|
-
details: Record<string, string>; // raw values
|
|
59
|
-
score: number; // partial score contribution (0-100 normalized for headers section)
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
export interface HeaderStatus {
|
|
63
|
-
present: boolean;
|
|
64
|
-
value: string | null;
|
|
65
|
-
valid: boolean; // simple syntax check
|
|
66
|
-
issues?: string[];
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
export interface DnsDiagnostics {
|
|
70
|
-
a: string[];
|
|
71
|
-
aaaa: string[];
|
|
72
|
-
cname: string[];
|
|
73
|
-
reverse: string[];
|
|
74
|
-
ipCount: number;
|
|
75
|
-
ipv6Support: boolean;
|
|
76
|
-
resolutionTime: number;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
export interface PerformanceMetrics {
|
|
80
|
-
dnsLookupTime: number; // ms
|
|
81
|
-
tcpConnectTime: number; // ms
|
|
82
|
-
tlsHandshakeTime: number; // ms
|
|
83
|
-
ttfb: number; // ms
|
|
84
|
-
totalTime: number; // ms
|
|
85
|
-
htmlSize: number; // bytes
|
|
86
|
-
headerSize: number; // bytes
|
|
87
|
-
redirectTime?: number; // accumulated time spent in redirects
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
export interface AuditIssue {
|
|
91
|
-
id: string; // unique code for tests/filtering
|
|
92
|
-
severity: 'critical' | 'severe' | 'moderate' | 'minor' | 'info';
|
|
93
|
-
category: 'tls' | 'http' | 'headers' | 'dns' | 'performance';
|
|
94
|
-
message: string;
|
|
95
|
-
scorePenalty: number;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
export interface AuditOptions {
|
|
99
|
-
timeout?: number;
|
|
100
|
-
verbose?: boolean;
|
|
101
|
-
debug?: boolean;
|
|
102
|
-
}
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import { ProxyAgent } from 'undici';
|
|
2
|
-
|
|
3
|
-
export class ProxyAdapter {
|
|
4
|
-
private agent?: ProxyAgent;
|
|
5
|
-
|
|
6
|
-
constructor(proxyUrl?: string) {
|
|
7
|
-
if (proxyUrl) {
|
|
8
|
-
try {
|
|
9
|
-
// Validate URL
|
|
10
|
-
new URL(proxyUrl);
|
|
11
|
-
this.agent = new ProxyAgent(proxyUrl);
|
|
12
|
-
} catch {
|
|
13
|
-
throw new Error(`Invalid proxy URL: ${proxyUrl}`);
|
|
14
|
-
}
|
|
15
|
-
}
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
get dispatcher() {
|
|
19
|
-
return this.agent;
|
|
20
|
-
}
|
|
21
|
-
}
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
export class RateLimiter {
|
|
2
|
-
private buckets: Map<string, { tokens: number; lastRefill: number }> = new Map();
|
|
3
|
-
private rate: number; // tokens per second
|
|
4
|
-
|
|
5
|
-
constructor(rate: number = 2) {
|
|
6
|
-
this.rate = rate;
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
async waitForToken(host: string, crawlDelay: number = 0): Promise<void> {
|
|
10
|
-
const effectiveRate = crawlDelay > 0 ? Math.min(this.rate, 1 / crawlDelay) : this.rate;
|
|
11
|
-
const interval = 1000 / effectiveRate;
|
|
12
|
-
|
|
13
|
-
if (!this.buckets.has(host)) {
|
|
14
|
-
this.buckets.set(host, { tokens: this.rate - 1, lastRefill: Date.now() });
|
|
15
|
-
return;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
const bucket = this.buckets.get(host)!;
|
|
19
|
-
|
|
20
|
-
while (true) {
|
|
21
|
-
const now = Date.now();
|
|
22
|
-
const elapsed = now - bucket.lastRefill;
|
|
23
|
-
|
|
24
|
-
if (elapsed > 0) {
|
|
25
|
-
const newTokens = elapsed / interval;
|
|
26
|
-
bucket.tokens = Math.min(this.rate, bucket.tokens + newTokens);
|
|
27
|
-
bucket.lastRefill = now;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
if (bucket.tokens >= 1) {
|
|
31
|
-
bucket.tokens -= 1;
|
|
32
|
-
return;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
const waitTime = Math.max(0, interval - (Date.now() - bucket.lastRefill));
|
|
36
|
-
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
}
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
export class RedirectController {
|
|
2
|
-
private maxHops: number;
|
|
3
|
-
private currentHops: number = 0;
|
|
4
|
-
private history: Set<string> = new Set();
|
|
5
|
-
|
|
6
|
-
constructor(maxHops: number = 5, seedUrl?: string) {
|
|
7
|
-
this.maxHops = maxHops;
|
|
8
|
-
if (seedUrl) {
|
|
9
|
-
this.history.add(this.normalize(seedUrl));
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Records a hop and checks if it's within limits and not a loop.
|
|
15
|
-
* Returns null if allowed, or an error status string if blocked.
|
|
16
|
-
*/
|
|
17
|
-
nextHop(url: string): 'redirect_limit_exceeded' | 'redirect_loop' | null {
|
|
18
|
-
// Normalize URL for loop detection (basic)
|
|
19
|
-
const normalized = this.normalize(url);
|
|
20
|
-
|
|
21
|
-
if (this.history.has(normalized)) {
|
|
22
|
-
return 'redirect_loop';
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
if (this.currentHops >= this.maxHops) {
|
|
26
|
-
return 'redirect_limit_exceeded';
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
this.history.add(normalized);
|
|
30
|
-
this.currentHops++;
|
|
31
|
-
return null;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
get hops(): number {
|
|
35
|
-
return this.currentHops;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
private normalize(url: string): string {
|
|
39
|
-
try {
|
|
40
|
-
const u = new URL(url);
|
|
41
|
-
u.hash = ''; // Ignore hash for loop detection
|
|
42
|
-
return u.toString();
|
|
43
|
-
} catch {
|
|
44
|
-
return url;
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
}
|