@crawlith/core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +70 -0
  3. package/dist/analysis/analysis_list.html +35 -0
  4. package/dist/analysis/analysis_page.html +123 -0
  5. package/dist/analysis/analyze.d.ts +40 -5
  6. package/dist/analysis/analyze.js +395 -347
  7. package/dist/analysis/clustering.d.ts +23 -0
  8. package/dist/analysis/clustering.js +206 -0
  9. package/dist/analysis/content.d.ts +1 -1
  10. package/dist/analysis/content.js +11 -5
  11. package/dist/analysis/duplicate.d.ts +34 -0
  12. package/dist/analysis/duplicate.js +305 -0
  13. package/dist/analysis/heading.d.ts +116 -0
  14. package/dist/analysis/heading.js +356 -0
  15. package/dist/analysis/images.d.ts +1 -1
  16. package/dist/analysis/images.js +6 -5
  17. package/dist/analysis/links.d.ts +1 -1
  18. package/dist/analysis/links.js +8 -8
  19. package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
  20. package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
  21. package/dist/analysis/scoring.js +11 -2
  22. package/dist/analysis/seo.d.ts +8 -4
  23. package/dist/analysis/seo.js +41 -30
  24. package/dist/analysis/soft404.d.ts +17 -0
  25. package/dist/analysis/soft404.js +62 -0
  26. package/dist/analysis/structuredData.d.ts +1 -1
  27. package/dist/analysis/structuredData.js +5 -4
  28. package/dist/analysis/templates.d.ts +2 -0
  29. package/dist/analysis/templates.js +7 -0
  30. package/dist/application/index.d.ts +2 -0
  31. package/dist/application/index.js +2 -0
  32. package/dist/application/usecase.d.ts +3 -0
  33. package/dist/application/usecase.js +1 -0
  34. package/dist/application/usecases.d.ts +114 -0
  35. package/dist/application/usecases.js +201 -0
  36. package/dist/audit/index.js +1 -1
  37. package/dist/audit/transport.d.ts +1 -1
  38. package/dist/audit/transport.js +5 -4
  39. package/dist/audit/types.d.ts +1 -0
  40. package/dist/constants.d.ts +17 -0
  41. package/dist/constants.js +23 -0
  42. package/dist/core/scope/scopeManager.js +3 -0
  43. package/dist/core/security/ipGuard.d.ts +11 -0
  44. package/dist/core/security/ipGuard.js +71 -3
  45. package/dist/crawler/crawl.d.ts +4 -22
  46. package/dist/crawler/crawl.js +4 -335
  47. package/dist/crawler/crawler.d.ts +87 -0
  48. package/dist/crawler/crawler.js +683 -0
  49. package/dist/crawler/extract.d.ts +4 -1
  50. package/dist/crawler/extract.js +7 -2
  51. package/dist/crawler/fetcher.d.ts +2 -1
  52. package/dist/crawler/fetcher.js +26 -11
  53. package/dist/crawler/metricsRunner.d.ts +23 -1
  54. package/dist/crawler/metricsRunner.js +202 -72
  55. package/dist/crawler/normalize.d.ts +41 -0
  56. package/dist/crawler/normalize.js +119 -3
  57. package/dist/crawler/parser.d.ts +1 -3
  58. package/dist/crawler/parser.js +2 -49
  59. package/dist/crawler/resolver.d.ts +11 -0
  60. package/dist/crawler/resolver.js +67 -0
  61. package/dist/crawler/sitemap.d.ts +6 -0
  62. package/dist/crawler/sitemap.js +27 -17
  63. package/dist/crawler/trap.d.ts +5 -1
  64. package/dist/crawler/trap.js +23 -2
  65. package/dist/db/CrawlithDB.d.ts +110 -0
  66. package/dist/db/CrawlithDB.js +500 -0
  67. package/dist/db/graphLoader.js +42 -30
  68. package/dist/db/index.d.ts +11 -0
  69. package/dist/db/index.js +41 -29
  70. package/dist/db/migrations.d.ts +2 -0
  71. package/dist/db/{schema.js → migrations.js} +90 -43
  72. package/dist/db/pluginRegistry.d.ts +9 -0
  73. package/dist/db/pluginRegistry.js +19 -0
  74. package/dist/db/repositories/EdgeRepository.d.ts +13 -0
  75. package/dist/db/repositories/EdgeRepository.js +20 -0
  76. package/dist/db/repositories/MetricsRepository.d.ts +16 -8
  77. package/dist/db/repositories/MetricsRepository.js +28 -7
  78. package/dist/db/repositories/PageRepository.d.ts +15 -2
  79. package/dist/db/repositories/PageRepository.js +169 -25
  80. package/dist/db/repositories/SiteRepository.d.ts +9 -0
  81. package/dist/db/repositories/SiteRepository.js +13 -0
  82. package/dist/db/repositories/SnapshotRepository.d.ts +14 -5
  83. package/dist/db/repositories/SnapshotRepository.js +64 -5
  84. package/dist/db/reset.d.ts +9 -0
  85. package/dist/db/reset.js +32 -0
  86. package/dist/db/statements.d.ts +12 -0
  87. package/dist/db/statements.js +40 -0
  88. package/dist/diff/compare.d.ts +0 -5
  89. package/dist/diff/compare.js +0 -12
  90. package/dist/diff/service.d.ts +16 -0
  91. package/dist/diff/service.js +41 -0
  92. package/dist/domain/index.d.ts +4 -0
  93. package/dist/domain/index.js +4 -0
  94. package/dist/events.d.ts +56 -0
  95. package/dist/events.js +1 -0
  96. package/dist/graph/graph.d.ts +36 -42
  97. package/dist/graph/graph.js +26 -17
  98. package/dist/graph/hits.d.ts +23 -0
  99. package/dist/graph/hits.js +111 -0
  100. package/dist/graph/metrics.d.ts +0 -4
  101. package/dist/graph/metrics.js +25 -9
  102. package/dist/graph/pagerank.d.ts +17 -4
  103. package/dist/graph/pagerank.js +126 -91
  104. package/dist/graph/simhash.d.ts +6 -0
  105. package/dist/graph/simhash.js +14 -0
  106. package/dist/index.d.ts +29 -8
  107. package/dist/index.js +29 -8
  108. package/dist/lock/hashKey.js +1 -1
  109. package/dist/lock/lockManager.d.ts +5 -1
  110. package/dist/lock/lockManager.js +38 -13
  111. package/dist/plugin-system/plugin-cli.d.ts +10 -0
  112. package/dist/plugin-system/plugin-cli.js +31 -0
  113. package/dist/plugin-system/plugin-config.d.ts +16 -0
  114. package/dist/plugin-system/plugin-config.js +36 -0
  115. package/dist/plugin-system/plugin-loader.d.ts +17 -0
  116. package/dist/plugin-system/plugin-loader.js +122 -0
  117. package/dist/plugin-system/plugin-registry.d.ts +25 -0
  118. package/dist/plugin-system/plugin-registry.js +167 -0
  119. package/dist/plugin-system/plugin-types.d.ts +205 -0
  120. package/dist/plugin-system/plugin-types.js +1 -0
  121. package/dist/ports/index.d.ts +9 -0
  122. package/dist/ports/index.js +1 -0
  123. package/{src/report/sitegraph_template.ts → dist/report/crawl.html} +330 -81
  124. package/dist/report/crawlExport.d.ts +3 -0
  125. package/dist/report/{sitegraphExport.js → crawlExport.js} +3 -3
  126. package/dist/report/crawl_template.d.ts +1 -0
  127. package/dist/report/crawl_template.js +7 -0
  128. package/dist/report/export.d.ts +3 -0
  129. package/dist/report/export.js +81 -0
  130. package/dist/report/html.js +15 -216
  131. package/dist/report/insight.d.ts +27 -0
  132. package/dist/report/insight.js +103 -0
  133. package/dist/scoring/health.d.ts +56 -0
  134. package/dist/scoring/health.js +213 -0
  135. package/dist/utils/chalk.d.ts +6 -0
  136. package/dist/utils/chalk.js +41 -0
  137. package/dist/utils/secureConfig.d.ts +23 -0
  138. package/dist/utils/secureConfig.js +128 -0
  139. package/package.json +12 -6
  140. package/CHANGELOG.md +0 -7
  141. package/dist/db/schema.d.ts +0 -2
  142. package/dist/graph/cluster.d.ts +0 -6
  143. package/dist/graph/cluster.js +0 -173
  144. package/dist/graph/duplicate.d.ts +0 -10
  145. package/dist/graph/duplicate.js +0 -251
  146. package/dist/report/sitegraphExport.d.ts +0 -3
  147. package/dist/report/sitegraph_template.d.ts +0 -1
  148. package/dist/report/sitegraph_template.js +0 -630
  149. package/dist/scoring/hits.d.ts +0 -9
  150. package/dist/scoring/hits.js +0 -111
  151. package/src/analysis/analyze.ts +0 -548
  152. package/src/analysis/content.ts +0 -62
  153. package/src/analysis/images.ts +0 -28
  154. package/src/analysis/links.ts +0 -41
  155. package/src/analysis/scoring.ts +0 -59
  156. package/src/analysis/seo.ts +0 -82
  157. package/src/analysis/structuredData.ts +0 -62
  158. package/src/audit/dns.ts +0 -49
  159. package/src/audit/headers.ts +0 -98
  160. package/src/audit/index.ts +0 -66
  161. package/src/audit/scoring.ts +0 -232
  162. package/src/audit/transport.ts +0 -258
  163. package/src/audit/types.ts +0 -102
  164. package/src/core/network/proxyAdapter.ts +0 -21
  165. package/src/core/network/rateLimiter.ts +0 -39
  166. package/src/core/network/redirectController.ts +0 -47
  167. package/src/core/network/responseLimiter.ts +0 -34
  168. package/src/core/network/retryPolicy.ts +0 -57
  169. package/src/core/scope/domainFilter.ts +0 -45
  170. package/src/core/scope/scopeManager.ts +0 -52
  171. package/src/core/scope/subdomainPolicy.ts +0 -39
  172. package/src/core/security/ipGuard.ts +0 -92
  173. package/src/crawler/crawl.ts +0 -382
  174. package/src/crawler/extract.ts +0 -34
  175. package/src/crawler/fetcher.ts +0 -233
  176. package/src/crawler/metricsRunner.ts +0 -124
  177. package/src/crawler/normalize.ts +0 -108
  178. package/src/crawler/parser.ts +0 -190
  179. package/src/crawler/sitemap.ts +0 -73
  180. package/src/crawler/trap.ts +0 -96
  181. package/src/db/graphLoader.ts +0 -105
  182. package/src/db/index.ts +0 -70
  183. package/src/db/repositories/EdgeRepository.ts +0 -29
  184. package/src/db/repositories/MetricsRepository.ts +0 -49
  185. package/src/db/repositories/PageRepository.ts +0 -128
  186. package/src/db/repositories/SiteRepository.ts +0 -32
  187. package/src/db/repositories/SnapshotRepository.ts +0 -74
  188. package/src/db/schema.ts +0 -177
  189. package/src/diff/compare.ts +0 -84
  190. package/src/graph/cluster.ts +0 -192
  191. package/src/graph/duplicate.ts +0 -286
  192. package/src/graph/graph.ts +0 -172
  193. package/src/graph/metrics.ts +0 -110
  194. package/src/graph/pagerank.ts +0 -125
  195. package/src/graph/simhash.ts +0 -61
  196. package/src/index.ts +0 -30
  197. package/src/lock/hashKey.ts +0 -51
  198. package/src/lock/lockManager.ts +0 -124
  199. package/src/lock/pidCheck.ts +0 -13
  200. package/src/report/html.ts +0 -227
  201. package/src/report/sitegraphExport.ts +0 -58
  202. package/src/scoring/hits.ts +0 -131
  203. package/src/scoring/orphanSeverity.ts +0 -176
  204. package/src/utils/version.ts +0 -18
  205. package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
  206. package/tests/analysis.unit.test.ts +0 -98
  207. package/tests/analyze.integration.test.ts +0 -98
  208. package/tests/audit/dns.test.ts +0 -31
  209. package/tests/audit/headers.test.ts +0 -45
  210. package/tests/audit/scoring.test.ts +0 -133
  211. package/tests/audit/security.test.ts +0 -12
  212. package/tests/audit/transport.test.ts +0 -112
  213. package/tests/clustering.test.ts +0 -118
  214. package/tests/crawler.test.ts +0 -358
  215. package/tests/db.test.ts +0 -159
  216. package/tests/diff.test.ts +0 -67
  217. package/tests/duplicate.test.ts +0 -110
  218. package/tests/fetcher.test.ts +0 -106
  219. package/tests/fetcher_safety.test.ts +0 -85
  220. package/tests/fixtures/analyze-crawl.json +0 -26
  221. package/tests/hits.test.ts +0 -134
  222. package/tests/html_report.test.ts +0 -58
  223. package/tests/lock/lockManager.test.ts +0 -138
  224. package/tests/metrics.test.ts +0 -196
  225. package/tests/normalize.test.ts +0 -101
  226. package/tests/orphanSeverity.test.ts +0 -160
  227. package/tests/pagerank.test.ts +0 -98
  228. package/tests/parser.test.ts +0 -117
  229. package/tests/proxy_safety.test.ts +0 -57
  230. package/tests/redirect_safety.test.ts +0 -73
  231. package/tests/safety.test.ts +0 -114
  232. package/tests/scope.test.ts +0 -66
  233. package/tests/scoring.test.ts +0 -59
  234. package/tests/sitemap.test.ts +0 -88
  235. package/tests/soft404.test.ts +0 -41
  236. package/tests/trap.test.ts +0 -39
  237. package/tests/visualization_data.test.ts +0 -46
  238. package/tsconfig.json +0 -11
@@ -1,232 +0,0 @@
1
- /* eslint-disable no-useless-assignment */
2
- import { TransportDiagnostics, DnsDiagnostics, SecurityHeadersResult, PerformanceMetrics, AuditIssue } from './types.js';
3
-
4
- interface CategoryScores {
5
- transport: number;
6
- security: number;
7
- performance: number;
8
- infrastructure: number;
9
- }
10
-
11
- export function calculateScore(
12
- transport: TransportDiagnostics,
13
- dns: DnsDiagnostics,
14
- headers: SecurityHeadersResult,
15
- performance: PerformanceMetrics,
16
- existingIssues: AuditIssue[]
17
- ): { score: number; grade: 'A' | 'B' | 'C' | 'D' | 'F'; issues: AuditIssue[]; categoryScores: CategoryScores } {
18
-
19
- const issues: AuditIssue[] = [...existingIssues];
20
- let transportScore = 0; // Max 30
21
- let securityScore = 0; // Max 20
22
- let performanceScore = 0; // Max 30
23
- let infrastructureScore = 0; // Max 20
24
-
25
- // 1. Transport Security (30 pts)
26
- // TLS Version
27
- if (transport.tlsVersion) {
28
- const version = parseFloat(transport.tlsVersion.replace('v', '').replace('TLS', '').trim());
29
- if (version >= 1.2) {
30
- transportScore += 15;
31
- } else {
32
- issues.push({
33
- id: 'tls-old',
34
- severity: 'severe',
35
- category: 'tls',
36
- message: `Deprecated TLS version: ${transport.tlsVersion}`,
37
- scorePenalty: 15
38
- });
39
- }
40
- } else if (transport.certificate) {
41
- // HTTPS but no version detected? Unlikely.
42
- } else {
43
- // HTTP only?
44
- issues.push({
45
- id: 'no-https',
46
- severity: 'critical',
47
- category: 'tls',
48
- message: 'Site is not using HTTPS',
49
- scorePenalty: 30
50
- });
51
- }
52
-
53
- // Certificate
54
- if (transport.certificate) {
55
- if (transport.certificate.isValidChain && !transport.certificate.isSelfSigned) {
56
- transportScore += 15;
57
- } else {
58
- // Already caught in transport.ts, but let's ensure score reflects it
59
- // If issues has cert-invalid, we don't add points.
60
- }
61
-
62
- if (transport.certificate.daysUntilExpiry < 30 && transport.certificate.daysUntilExpiry >= 0) {
63
- issues.push({
64
- id: 'cert-expiring-soon',
65
- severity: 'moderate',
66
- category: 'tls',
67
- message: `Certificate expires in ${transport.certificate.daysUntilExpiry} days`,
68
- scorePenalty: 5
69
- });
70
- // Penalty applied to transport score logic implicitly by not reaching max,
71
- // but here we are adding up points.
72
- // Let's deduct from the 15 points we might have given.
73
- transportScore -= 5;
74
- } else if (transport.certificate.daysUntilExpiry < 0) {
75
- issues.push({
76
- id: 'cert-expired',
77
- severity: 'critical',
78
- category: 'tls',
79
- message: `Certificate expired on ${transport.certificate.validTo}`,
80
- scorePenalty: 30
81
- });
82
- transportScore = 0; // Reset transport score
83
- }
84
- }
85
-
86
- // 2. Response Security (Headers) (20 pts)
87
- // headers.score is 0-100. Map to 0-20.
88
- securityScore = (headers.score / 100) * 20;
89
-
90
- // Add issues for missing critical headers
91
- if (!headers.strictTransportSecurity.present) {
92
- issues.push({
93
- id: 'hsts-missing',
94
- severity: 'moderate',
95
- category: 'headers',
96
- message: 'Missing Strict-Transport-Security header',
97
- scorePenalty: 5
98
- });
99
- }
100
- if (!headers.contentSecurityPolicy.present) {
101
- issues.push({
102
- id: 'csp-missing',
103
- severity: 'moderate',
104
- category: 'headers',
105
- message: 'Missing Content-Security-Policy header',
106
- scorePenalty: 5
107
- });
108
- }
109
-
110
- // 3. Performance (30 pts)
111
- // HTTP/2 (5 pts)
112
- if (transport.alpnProtocol === 'h2' || transport.httpVersion === '2.0') {
113
- performanceScore += 5;
114
- } else {
115
- issues.push({
116
- id: 'no-h2',
117
- severity: 'minor',
118
- category: 'performance',
119
- message: 'HTTP/2 not supported',
120
- scorePenalty: 5
121
- });
122
- }
123
-
124
- // Compression (5 pts)
125
- if (transport.compression.length > 0) {
126
- performanceScore += 5;
127
- } else {
128
- issues.push({
129
- id: 'no-compression',
130
- severity: 'moderate',
131
- category: 'performance',
132
- message: 'No compression enabled (gzip/br)',
133
- scorePenalty: 5
134
- });
135
- }
136
-
137
- // TTFB (10 pts)
138
- if (performance.ttfb < 800) {
139
- performanceScore += 10;
140
- } else {
141
- issues.push({
142
- id: 'slow-ttfb',
143
- severity: 'moderate',
144
- category: 'performance',
145
- message: `Slow TTFB: ${performance.ttfb.toFixed(0)}ms`,
146
- scorePenalty: 10
147
- });
148
- }
149
-
150
- // Redirects (5 pts)
151
- if (transport.redirectCount <= 3) {
152
- performanceScore += 5;
153
- } else {
154
- issues.push({
155
- id: 'too-many-redirects',
156
- severity: 'moderate',
157
- category: 'performance',
158
- message: `Too many redirects: ${transport.redirectCount}`,
159
- scorePenalty: 5
160
- });
161
- }
162
-
163
- // HTML Size (5 pts)
164
- if (performance.htmlSize < 1024 * 1024) { // 1MB
165
- performanceScore += 5;
166
- } else {
167
- issues.push({
168
- id: 'large-html',
169
- severity: 'minor',
170
- category: 'performance',
171
- message: `HTML size > 1MB: ${(performance.htmlSize / 1024 / 1024).toFixed(2)}MB`,
172
- scorePenalty: 5
173
- });
174
- }
175
-
176
- // 4. Infrastructure (20 pts)
177
- // IPv6 (10 pts)
178
- if (dns.ipv6Support) {
179
- infrastructureScore += 10;
180
- } else {
181
- issues.push({
182
- id: 'no-ipv6',
183
- severity: 'minor',
184
- category: 'dns',
185
- message: 'No IPv6 DNS records found',
186
- scorePenalty: 5
187
- });
188
- }
189
-
190
- // Redundancy (10 pts)
191
- if (dns.ipCount > 1) {
192
- infrastructureScore += 10;
193
- } else {
194
- issues.push({
195
- id: 'single-ip',
196
- severity: 'minor',
197
- category: 'dns',
198
- message: 'Single IP address detected (no redundancy)',
199
- scorePenalty: 5
200
- });
201
- }
202
-
203
- let totalScore = transportScore + securityScore + performanceScore + infrastructureScore;
204
-
205
- // Critical Overrides
206
- const criticalIssues = issues.filter(i => i.severity === 'critical');
207
- if (criticalIssues.length > 0) {
208
- totalScore = Math.min(totalScore, 39); // Cap at F (<40)
209
- }
210
-
211
- const grade = getGrade(totalScore);
212
-
213
- return {
214
- score: Math.round(totalScore),
215
- grade,
216
- issues,
217
- categoryScores: {
218
- transport: transportScore,
219
- security: securityScore,
220
- performance: performanceScore,
221
- infrastructure: infrastructureScore
222
- }
223
- };
224
- }
225
-
226
- function getGrade(score: number): 'A' | 'B' | 'C' | 'D' | 'F' {
227
- if (score >= 90) return 'A';
228
- if (score >= 75) return 'B';
229
- if (score >= 60) return 'C';
230
- if (score >= 40) return 'D';
231
- return 'F';
232
- }
@@ -1,258 +0,0 @@
1
- import https from 'node:https';
2
- import http from 'node:http';
3
- import tls from 'node:tls';
4
- import { URL } from 'node:url';
5
- import { IPGuard } from '../core/security/ipGuard.js';
6
- import { TransportDiagnostics, PerformanceMetrics, CertificateInfo, RedirectInfo, AuditIssue } from './types.js';
7
- import { IncomingMessage } from 'node:http';
8
-
9
- interface RequestResult {
10
- url: string;
11
- response: IncomingMessage;
12
- body: Buffer;
13
- timings: {
14
- dns: number;
15
- tcp: number;
16
- tls: number;
17
- ttfb: number;
18
- total: number;
19
- };
20
- socket: any;
21
- redirectUrl: string | null;
22
- }
23
-
24
- export async function analyzeTransport(targetUrl: string, timeout: number): Promise<{
25
- transport: TransportDiagnostics;
26
- performance: PerformanceMetrics;
27
- issues: AuditIssue[];
28
- }> {
29
- const maxRedirects = 10;
30
- let currentUrl = targetUrl;
31
- let redirectCount = 0;
32
- const redirects: RedirectInfo[] = [];
33
- const issues: AuditIssue[] = [];
34
-
35
- // Cumulative metrics
36
- let totalRedirectTime = 0;
37
-
38
- for (let i = 0; i < maxRedirects; i++) {
39
- const urlObj = new URL(currentUrl);
40
- const isSafe = await IPGuard.validateHost(urlObj.hostname);
41
- if (!isSafe) {
42
- throw new Error(`Blocked: Redirect to internal/private IP prohibited (${currentUrl})`);
43
- }
44
-
45
- try {
46
- const result = await executeRequest(currentUrl, timeout);
47
-
48
- if (result.redirectUrl) {
49
- redirectCount++;
50
- totalRedirectTime += result.timings.total;
51
-
52
- redirects.push({
53
- url: currentUrl,
54
- statusCode: result.response.statusCode || 0,
55
- location: result.redirectUrl
56
- });
57
-
58
- currentUrl = result.redirectUrl;
59
- continue;
60
- }
61
-
62
- // Final destination reached
63
- const { response, body, timings, socket } = result;
64
-
65
- // Collect Certificate Info
66
- let certInfo: CertificateInfo | null = null;
67
- let tlsVersion: string | null = null;
68
- let cipherSuite: string | null = null;
69
- let alpnProtocol: string | null = null;
70
-
71
- if (socket instanceof tls.TLSSocket) {
72
- const cert = socket.getPeerCertificate(true);
73
- tlsVersion = socket.getProtocol();
74
- const cipher = socket.getCipher();
75
- cipherSuite = cipher ? cipher.name : null;
76
- alpnProtocol = socket.alpnProtocol || null;
77
-
78
- if (cert && Object.keys(cert).length > 0) {
79
- certInfo = {
80
- subject: (cert.subject && cert.subject.CN) ? cert.subject.CN : 'Unknown',
81
- issuer: (cert.issuer && cert.issuer.CN) ? cert.issuer.CN : 'Unknown',
82
- validFrom: cert.valid_from,
83
- validTo: cert.valid_to,
84
- daysUntilExpiry: Math.floor((new Date(cert.valid_to).getTime() - Date.now()) / (1000 * 60 * 60 * 24)),
85
- isSelfSigned: cert.issuer && cert.subject && cert.issuer.CN === cert.subject.CN,
86
- isValidChain: socket.authorized,
87
- fingerprint: cert.fingerprint,
88
- serialNumber: cert.serialNumber,
89
- subjectAltName: cert.subjectaltname
90
- };
91
-
92
- if (!socket.authorized) {
93
- issues.push({
94
- id: 'cert-invalid',
95
- severity: 'severe',
96
- category: 'tls',
97
- message: `Certificate validation failed: ${socket.authorizationError}`,
98
- scorePenalty: 30
99
- });
100
- }
101
- }
102
- }
103
-
104
- const httpVersion = response.httpVersion;
105
- const contentEncoding = response.headers['content-encoding'];
106
- const compression: string[] = [];
107
- if (contentEncoding) {
108
- compression.push(contentEncoding);
109
- }
110
-
111
- const connectionHeader = response.headers['connection'];
112
- const keepAlive = connectionHeader ? connectionHeader.toLowerCase() !== 'close' : true;
113
- const serverHeader = (response.headers['server'] as string) || null;
114
-
115
- const headerText = `HTTP/${response.httpVersion} ${response.statusCode} ${response.statusMessage}\r\n` +
116
- Object.entries(response.headers).map(([k, v]) => `${k}: ${v}`).join('\r\n') +
117
- '\r\n\r\n';
118
- const headerSize = Buffer.byteLength(headerText);
119
- const htmlSize = body.length;
120
-
121
- const transport: TransportDiagnostics = {
122
- tlsVersion,
123
- cipherSuite,
124
- alpnProtocol: alpnProtocol || (httpVersion === '2.0' ? 'h2' : 'http/1.1'),
125
- certificate: certInfo,
126
- httpVersion,
127
- compression,
128
- keepAlive,
129
- transferEncoding: (response.headers['transfer-encoding'] as string) || null,
130
- redirectCount,
131
- redirects,
132
- serverHeader,
133
- headers: response.headers
134
- };
135
-
136
- const performance: PerformanceMetrics = {
137
- dnsLookupTime: timings.dns,
138
- tcpConnectTime: timings.tcp,
139
- tlsHandshakeTime: timings.tls,
140
- ttfb: timings.ttfb,
141
- totalTime: timings.total + totalRedirectTime,
142
- htmlSize,
143
- headerSize,
144
- redirectTime: totalRedirectTime
145
- };
146
-
147
- return { transport, performance, issues };
148
-
149
- } catch (error: any) {
150
- throw new Error(`Transport analysis failed for ${currentUrl}: ${error.message}`, { cause: error });
151
- }
152
- }
153
-
154
- throw new Error(`Too many redirects (limit: ${maxRedirects})`);
155
- }
156
-
157
- function executeRequest(urlStr: string, timeout: number): Promise<RequestResult> {
158
- return new Promise((resolve, reject) => {
159
- let url: URL;
160
- try {
161
- url = new URL(urlStr);
162
- } catch (_e) {
163
- return reject(new Error(`Invalid URL: ${urlStr}`));
164
- }
165
-
166
- const isHttps = url.protocol === 'https:';
167
- const requestModule = isHttps ? https : http;
168
-
169
- const timings = {
170
- dns: 0,
171
- tcp: 0,
172
- tls: 0,
173
- ttfb: 0,
174
- total: 0
175
- };
176
-
177
- const t0 = performance.now();
178
- let tDNS = t0;
179
- let tTCP = t0;
180
- let tTLS = t0;
181
- let tReqSent = 0;
182
-
183
- // We use agent: false to force new connection for accurate timing
184
- const options = {
185
- method: 'GET',
186
- timeout,
187
- rejectUnauthorized: false,
188
- agent: false,
189
- headers: {
190
- 'User-Agent': 'Crawlith/Audit',
191
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
192
- 'Accept-Encoding': 'gzip, deflate, br'
193
- }
194
- };
195
-
196
- const req = requestModule.request(url, options, (res) => {
197
- // TTFB: Time from request sent to first byte of headers received
198
- timings.ttfb = performance.now() - (tReqSent || t0);
199
-
200
- const chunks: Buffer[] = [];
201
- res.on('data', (chunk) => chunks.push(chunk));
202
- res.on('end', () => {
203
- timings.total = performance.now() - t0;
204
- const body = Buffer.concat(chunks);
205
-
206
- let redirectUrl: string | null = null;
207
- if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
208
- try {
209
- redirectUrl = new URL(res.headers.location, urlStr).toString();
210
- } catch (_e) {
211
- // Ignore invalid redirect
212
- }
213
- }
214
-
215
- resolve({
216
- url: urlStr,
217
- response: res,
218
- body,
219
- timings,
220
- socket: res.socket,
221
- redirectUrl
222
- });
223
- });
224
- });
225
-
226
- req.on('socket', (socket) => {
227
- socket.on('lookup', () => {
228
- tDNS = performance.now();
229
- timings.dns = tDNS - t0;
230
- });
231
- socket.on('connect', () => {
232
- tTCP = performance.now();
233
- if (timings.dns === 0 && tDNS === t0) {
234
- // No lookup event
235
- timings.dns = 0;
236
- tDNS = t0;
237
- }
238
- timings.tcp = tTCP - tDNS;
239
- });
240
- socket.on('secureConnect', () => {
241
- tTLS = performance.now();
242
- timings.tls = tTLS - tTCP;
243
- });
244
- });
245
-
246
- req.on('finish', () => {
247
- tReqSent = performance.now();
248
- });
249
-
250
- req.on('error', (err) => reject(err));
251
- req.on('timeout', () => {
252
- req.destroy();
253
- reject(new Error('Request timed out'));
254
- });
255
-
256
- req.end();
257
- });
258
- }
@@ -1,102 +0,0 @@
1
-
2
- export interface AuditResult {
3
- url: string;
4
- transport: TransportDiagnostics;
5
- securityHeaders: SecurityHeadersResult;
6
- dns: DnsDiagnostics;
7
- performance: PerformanceMetrics;
8
- score: number;
9
- grade: 'A' | 'B' | 'C' | 'D' | 'F';
10
- issues: AuditIssue[];
11
- }
12
-
13
- export interface TransportDiagnostics {
14
- // TLS / SSL
15
- tlsVersion: string | null;
16
- cipherSuite: string | null;
17
- alpnProtocol: string | null; // http/1.1, h2
18
- certificate: CertificateInfo | null;
19
-
20
- // HTTP Protocol
21
- httpVersion: string;
22
- compression: string[]; // gzip, br, deflate
23
- keepAlive: boolean;
24
- transferEncoding: string | null;
25
- redirectCount: number;
26
- redirects: RedirectInfo[];
27
- serverHeader: string | null;
28
- headers: Record<string, string | string[] | undefined>;
29
- }
30
-
31
- export interface CertificateInfo {
32
- issuer: string;
33
- subject: string;
34
- validFrom: string;
35
- validTo: string;
36
- daysUntilExpiry: number;
37
- isSelfSigned: boolean;
38
- isValidChain: boolean; // basic check, relying on node tls rejectUnauthorized: true result if possible, or manual check
39
- fingerprint: string;
40
- serialNumber: string;
41
- subjectAltName?: string;
42
- }
43
-
44
- export interface RedirectInfo {
45
- url: string;
46
- statusCode: number;
47
- location: string | null;
48
- }
49
-
50
- export interface SecurityHeadersResult {
51
- strictTransportSecurity: HeaderStatus;
52
- contentSecurityPolicy: HeaderStatus;
53
- xFrameOptions: HeaderStatus;
54
- xContentTypeOptions: HeaderStatus;
55
- referrerPolicy: HeaderStatus;
56
- permissionsPolicy: HeaderStatus;
57
-
58
- details: Record<string, string>; // raw values
59
- score: number; // partial score contribution (0-100 normalized for headers section)
60
- }
61
-
62
- export interface HeaderStatus {
63
- present: boolean;
64
- value: string | null;
65
- valid: boolean; // simple syntax check
66
- issues?: string[];
67
- }
68
-
69
- export interface DnsDiagnostics {
70
- a: string[];
71
- aaaa: string[];
72
- cname: string[];
73
- reverse: string[];
74
- ipCount: number;
75
- ipv6Support: boolean;
76
- resolutionTime: number;
77
- }
78
-
79
- export interface PerformanceMetrics {
80
- dnsLookupTime: number; // ms
81
- tcpConnectTime: number; // ms
82
- tlsHandshakeTime: number; // ms
83
- ttfb: number; // ms
84
- totalTime: number; // ms
85
- htmlSize: number; // bytes
86
- headerSize: number; // bytes
87
- redirectTime?: number; // accumulated time spent in redirects
88
- }
89
-
90
- export interface AuditIssue {
91
- id: string; // unique code for tests/filtering
92
- severity: 'critical' | 'severe' | 'moderate' | 'minor' | 'info';
93
- category: 'tls' | 'http' | 'headers' | 'dns' | 'performance';
94
- message: string;
95
- scorePenalty: number;
96
- }
97
-
98
- export interface AuditOptions {
99
- timeout?: number;
100
- verbose?: boolean;
101
- debug?: boolean;
102
- }
@@ -1,21 +0,0 @@
1
- import { ProxyAgent } from 'undici';
2
-
3
- export class ProxyAdapter {
4
- private agent?: ProxyAgent;
5
-
6
- constructor(proxyUrl?: string) {
7
- if (proxyUrl) {
8
- try {
9
- // Validate URL
10
- new URL(proxyUrl);
11
- this.agent = new ProxyAgent(proxyUrl);
12
- } catch {
13
- throw new Error(`Invalid proxy URL: ${proxyUrl}`);
14
- }
15
- }
16
- }
17
-
18
- get dispatcher() {
19
- return this.agent;
20
- }
21
- }
@@ -1,39 +0,0 @@
1
- export class RateLimiter {
2
- private buckets: Map<string, { tokens: number; lastRefill: number }> = new Map();
3
- private rate: number; // tokens per second
4
-
5
- constructor(rate: number = 2) {
6
- this.rate = rate;
7
- }
8
-
9
- async waitForToken(host: string, crawlDelay: number = 0): Promise<void> {
10
- const effectiveRate = crawlDelay > 0 ? Math.min(this.rate, 1 / crawlDelay) : this.rate;
11
- const interval = 1000 / effectiveRate;
12
-
13
- if (!this.buckets.has(host)) {
14
- this.buckets.set(host, { tokens: this.rate - 1, lastRefill: Date.now() });
15
- return;
16
- }
17
-
18
- const bucket = this.buckets.get(host)!;
19
-
20
- while (true) {
21
- const now = Date.now();
22
- const elapsed = now - bucket.lastRefill;
23
-
24
- if (elapsed > 0) {
25
- const newTokens = elapsed / interval;
26
- bucket.tokens = Math.min(this.rate, bucket.tokens + newTokens);
27
- bucket.lastRefill = now;
28
- }
29
-
30
- if (bucket.tokens >= 1) {
31
- bucket.tokens -= 1;
32
- return;
33
- }
34
-
35
- const waitTime = Math.max(0, interval - (Date.now() - bucket.lastRefill));
36
- await new Promise(resolve => setTimeout(resolve, waitTime));
37
- }
38
- }
39
- }
@@ -1,47 +0,0 @@
1
- export class RedirectController {
2
- private maxHops: number;
3
- private currentHops: number = 0;
4
- private history: Set<string> = new Set();
5
-
6
- constructor(maxHops: number = 5, seedUrl?: string) {
7
- this.maxHops = maxHops;
8
- if (seedUrl) {
9
- this.history.add(this.normalize(seedUrl));
10
- }
11
- }
12
-
13
- /**
14
- * Records a hop and checks if it's within limits and not a loop.
15
- * Returns null if allowed, or an error status string if blocked.
16
- */
17
- nextHop(url: string): 'redirect_limit_exceeded' | 'redirect_loop' | null {
18
- // Normalize URL for loop detection (basic)
19
- const normalized = this.normalize(url);
20
-
21
- if (this.history.has(normalized)) {
22
- return 'redirect_loop';
23
- }
24
-
25
- if (this.currentHops >= this.maxHops) {
26
- return 'redirect_limit_exceeded';
27
- }
28
-
29
- this.history.add(normalized);
30
- this.currentHops++;
31
- return null;
32
- }
33
-
34
- get hops(): number {
35
- return this.currentHops;
36
- }
37
-
38
- private normalize(url: string): string {
39
- try {
40
- const u = new URL(url);
41
- u.hash = ''; // Ignore hash for loop detection
42
- return u.toString();
43
- } catch {
44
- return url;
45
- }
46
- }
47
- }