@crawlith/core 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +70 -0
  3. package/dist/analysis/analyze.d.ts +29 -8
  4. package/dist/analysis/analyze.js +325 -221
  5. package/dist/analysis/clustering.d.ts +23 -0
  6. package/dist/analysis/clustering.js +206 -0
  7. package/dist/analysis/content.d.ts +1 -1
  8. package/dist/analysis/content.js +11 -5
  9. package/dist/analysis/duplicate.d.ts +34 -0
  10. package/dist/analysis/duplicate.js +305 -0
  11. package/dist/analysis/heading.d.ts +116 -0
  12. package/dist/analysis/heading.js +356 -0
  13. package/dist/analysis/images.d.ts +1 -1
  14. package/dist/analysis/images.js +6 -5
  15. package/dist/analysis/links.d.ts +1 -1
  16. package/dist/analysis/links.js +8 -8
  17. package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
  18. package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
  19. package/dist/analysis/scoring.js +4 -1
  20. package/dist/analysis/seo.d.ts +8 -4
  21. package/dist/analysis/seo.js +41 -30
  22. package/dist/analysis/soft404.d.ts +17 -0
  23. package/dist/analysis/soft404.js +62 -0
  24. package/dist/analysis/structuredData.d.ts +1 -1
  25. package/dist/analysis/structuredData.js +5 -4
  26. package/dist/application/index.d.ts +2 -0
  27. package/dist/application/index.js +2 -0
  28. package/dist/application/usecase.d.ts +3 -0
  29. package/dist/application/usecase.js +1 -0
  30. package/dist/application/usecases.d.ts +114 -0
  31. package/dist/application/usecases.js +201 -0
  32. package/dist/audit/index.js +1 -1
  33. package/dist/audit/transport.d.ts +1 -1
  34. package/dist/audit/transport.js +5 -4
  35. package/dist/audit/types.d.ts +1 -0
  36. package/dist/constants.d.ts +17 -0
  37. package/dist/constants.js +23 -0
  38. package/dist/core/scope/scopeManager.js +3 -0
  39. package/dist/crawler/crawl.d.ts +2 -2
  40. package/dist/crawler/crawler.d.ts +17 -5
  41. package/dist/crawler/crawler.js +259 -94
  42. package/dist/crawler/fetcher.d.ts +1 -1
  43. package/dist/crawler/fetcher.js +6 -6
  44. package/dist/crawler/metricsRunner.d.ts +21 -1
  45. package/dist/crawler/metricsRunner.js +181 -60
  46. package/dist/crawler/normalize.d.ts +41 -0
  47. package/dist/crawler/normalize.js +119 -3
  48. package/dist/crawler/parser.d.ts +1 -3
  49. package/dist/crawler/parser.js +2 -49
  50. package/dist/crawler/resolver.d.ts +11 -0
  51. package/dist/crawler/resolver.js +67 -0
  52. package/dist/crawler/sitemap.d.ts +4 -1
  53. package/dist/crawler/sitemap.js +24 -18
  54. package/dist/crawler/trap.d.ts +5 -1
  55. package/dist/crawler/trap.js +23 -2
  56. package/dist/db/CrawlithDB.d.ts +110 -0
  57. package/dist/db/CrawlithDB.js +500 -0
  58. package/dist/db/graphLoader.js +15 -32
  59. package/dist/db/index.d.ts +9 -1
  60. package/dist/db/index.js +39 -31
  61. package/dist/db/migrations.d.ts +2 -0
  62. package/dist/db/{schema.js → migrations.js} +90 -43
  63. package/dist/db/pluginRegistry.d.ts +9 -0
  64. package/dist/db/pluginRegistry.js +19 -0
  65. package/dist/db/repositories/EdgeRepository.d.ts +5 -0
  66. package/dist/db/repositories/EdgeRepository.js +7 -0
  67. package/dist/db/repositories/MetricsRepository.d.ts +13 -8
  68. package/dist/db/repositories/MetricsRepository.js +14 -6
  69. package/dist/db/repositories/PageRepository.d.ts +5 -3
  70. package/dist/db/repositories/PageRepository.js +68 -17
  71. package/dist/db/repositories/SiteRepository.d.ts +6 -0
  72. package/dist/db/repositories/SiteRepository.js +4 -0
  73. package/dist/db/repositories/SnapshotRepository.d.ts +12 -5
  74. package/dist/db/repositories/SnapshotRepository.js +48 -10
  75. package/dist/db/reset.d.ts +9 -0
  76. package/dist/db/reset.js +32 -0
  77. package/dist/db/statements.d.ts +12 -0
  78. package/dist/db/statements.js +40 -0
  79. package/dist/diff/compare.d.ts +0 -5
  80. package/dist/diff/compare.js +0 -12
  81. package/dist/diff/service.d.ts +16 -0
  82. package/dist/diff/service.js +41 -0
  83. package/dist/domain/index.d.ts +4 -0
  84. package/dist/domain/index.js +4 -0
  85. package/dist/events.d.ts +8 -0
  86. package/dist/graph/graph.d.ts +20 -42
  87. package/dist/graph/graph.js +12 -16
  88. package/dist/graph/hits.d.ts +23 -0
  89. package/dist/graph/hits.js +111 -0
  90. package/dist/graph/metrics.d.ts +0 -4
  91. package/dist/graph/metrics.js +19 -15
  92. package/dist/graph/pagerank.d.ts +17 -4
  93. package/dist/graph/pagerank.js +126 -93
  94. package/dist/index.d.ts +27 -9
  95. package/dist/index.js +27 -9
  96. package/dist/lock/lockManager.d.ts +1 -0
  97. package/dist/lock/lockManager.js +15 -0
  98. package/dist/plugin-system/plugin-cli.d.ts +10 -0
  99. package/dist/plugin-system/plugin-cli.js +31 -0
  100. package/dist/plugin-system/plugin-config.d.ts +16 -0
  101. package/dist/plugin-system/plugin-config.js +36 -0
  102. package/dist/plugin-system/plugin-loader.d.ts +17 -0
  103. package/dist/plugin-system/plugin-loader.js +122 -0
  104. package/dist/plugin-system/plugin-registry.d.ts +25 -0
  105. package/dist/plugin-system/plugin-registry.js +167 -0
  106. package/dist/plugin-system/plugin-types.d.ts +205 -0
  107. package/dist/plugin-system/plugin-types.js +1 -0
  108. package/dist/ports/index.d.ts +9 -0
  109. package/dist/ports/index.js +1 -0
  110. package/dist/report/export.d.ts +3 -0
  111. package/dist/report/export.js +81 -0
  112. package/dist/report/insight.d.ts +27 -0
  113. package/dist/report/insight.js +103 -0
  114. package/dist/scoring/health.d.ts +17 -11
  115. package/dist/scoring/health.js +183 -140
  116. package/dist/utils/chalk.d.ts +6 -0
  117. package/dist/utils/chalk.js +41 -0
  118. package/dist/utils/secureConfig.d.ts +23 -0
  119. package/dist/utils/secureConfig.js +128 -0
  120. package/package.json +10 -4
  121. package/CHANGELOG.md +0 -13
  122. package/dist/db/schema.d.ts +0 -2
  123. package/dist/graph/cluster.d.ts +0 -6
  124. package/dist/graph/cluster.js +0 -221
  125. package/dist/graph/duplicate.d.ts +0 -10
  126. package/dist/graph/duplicate.js +0 -302
  127. package/dist/scoring/hits.d.ts +0 -10
  128. package/dist/scoring/hits.js +0 -131
  129. package/scripts/copy-assets.js +0 -37
  130. package/src/analysis/analysis_list.html +0 -35
  131. package/src/analysis/analysis_page.html +0 -123
  132. package/src/analysis/analyze.ts +0 -505
  133. package/src/analysis/content.ts +0 -62
  134. package/src/analysis/images.ts +0 -28
  135. package/src/analysis/links.ts +0 -41
  136. package/src/analysis/scoring.ts +0 -66
  137. package/src/analysis/seo.ts +0 -82
  138. package/src/analysis/structuredData.ts +0 -62
  139. package/src/analysis/templates.ts +0 -9
  140. package/src/audit/dns.ts +0 -49
  141. package/src/audit/headers.ts +0 -98
  142. package/src/audit/index.ts +0 -66
  143. package/src/audit/scoring.ts +0 -232
  144. package/src/audit/transport.ts +0 -258
  145. package/src/audit/types.ts +0 -102
  146. package/src/core/network/proxyAdapter.ts +0 -21
  147. package/src/core/network/rateLimiter.ts +0 -39
  148. package/src/core/network/redirectController.ts +0 -47
  149. package/src/core/network/responseLimiter.ts +0 -34
  150. package/src/core/network/retryPolicy.ts +0 -57
  151. package/src/core/scope/domainFilter.ts +0 -45
  152. package/src/core/scope/scopeManager.ts +0 -52
  153. package/src/core/scope/subdomainPolicy.ts +0 -39
  154. package/src/core/security/ipGuard.ts +0 -171
  155. package/src/crawler/crawl.ts +0 -9
  156. package/src/crawler/crawler.ts +0 -601
  157. package/src/crawler/extract.ts +0 -39
  158. package/src/crawler/fetcher.ts +0 -251
  159. package/src/crawler/metricsRunner.ts +0 -137
  160. package/src/crawler/normalize.ts +0 -108
  161. package/src/crawler/parser.ts +0 -190
  162. package/src/crawler/sitemap.ts +0 -76
  163. package/src/crawler/trap.ts +0 -96
  164. package/src/db/graphLoader.ts +0 -135
  165. package/src/db/index.ts +0 -75
  166. package/src/db/repositories/EdgeRepository.ts +0 -43
  167. package/src/db/repositories/MetricsRepository.ts +0 -63
  168. package/src/db/repositories/PageRepository.ts +0 -228
  169. package/src/db/repositories/SiteRepository.ts +0 -43
  170. package/src/db/repositories/SnapshotRepository.ts +0 -99
  171. package/src/db/schema.ts +0 -177
  172. package/src/diff/compare.ts +0 -84
  173. package/src/events.ts +0 -16
  174. package/src/graph/cluster.ts +0 -246
  175. package/src/graph/duplicate.ts +0 -350
  176. package/src/graph/graph.ts +0 -192
  177. package/src/graph/metrics.ts +0 -125
  178. package/src/graph/pagerank.ts +0 -126
  179. package/src/graph/simhash.ts +0 -76
  180. package/src/index.ts +0 -33
  181. package/src/lock/hashKey.ts +0 -51
  182. package/src/lock/lockManager.ts +0 -132
  183. package/src/lock/pidCheck.ts +0 -13
  184. package/src/report/crawl.html +0 -879
  185. package/src/report/crawlExport.ts +0 -58
  186. package/src/report/crawl_template.ts +0 -9
  187. package/src/report/html.ts +0 -27
  188. package/src/scoring/health.ts +0 -241
  189. package/src/scoring/hits.ts +0 -153
  190. package/src/scoring/orphanSeverity.ts +0 -176
  191. package/src/utils/version.ts +0 -18
  192. package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
  193. package/tests/analysis.unit.test.ts +0 -142
  194. package/tests/analyze.integration.test.ts +0 -133
  195. package/tests/analyze_markdown.test.ts +0 -98
  196. package/tests/audit/audit.test.ts +0 -101
  197. package/tests/audit/dns.test.ts +0 -31
  198. package/tests/audit/headers.test.ts +0 -45
  199. package/tests/audit/scoring.test.ts +0 -133
  200. package/tests/audit/security.test.ts +0 -12
  201. package/tests/audit/transport.test.ts +0 -111
  202. package/tests/clustering.test.ts +0 -118
  203. package/tests/clustering_risk.test.ts +0 -118
  204. package/tests/crawler.test.ts +0 -364
  205. package/tests/db/index.test.ts +0 -134
  206. package/tests/db/repositories.test.ts +0 -115
  207. package/tests/db.test.ts +0 -159
  208. package/tests/db_repos.test.ts +0 -72
  209. package/tests/diff.test.ts +0 -67
  210. package/tests/duplicate.test.ts +0 -110
  211. package/tests/extract.test.ts +0 -86
  212. package/tests/fetcher.test.ts +0 -110
  213. package/tests/fetcher_safety.test.ts +0 -91
  214. package/tests/fixtures/analyze-crawl.json +0 -26
  215. package/tests/graph/graph.test.ts +0 -100
  216. package/tests/graphLoader.test.ts +0 -124
  217. package/tests/hits.test.ts +0 -134
  218. package/tests/html_report.test.ts +0 -59
  219. package/tests/ipGuard.test.ts +0 -73
  220. package/tests/lock/lockManager.test.ts +0 -198
  221. package/tests/metrics.test.ts +0 -196
  222. package/tests/normalize.test.ts +0 -88
  223. package/tests/orphanSeverity.test.ts +0 -160
  224. package/tests/pagerank.test.ts +0 -98
  225. package/tests/parser.test.ts +0 -117
  226. package/tests/proxy_safety.test.ts +0 -57
  227. package/tests/redirect_safety.test.ts +0 -77
  228. package/tests/renderAnalysisCsv.test.ts +0 -183
  229. package/tests/safety.test.ts +0 -126
  230. package/tests/scope.test.ts +0 -84
  231. package/tests/scoring.test.ts +0 -60
  232. package/tests/sitemap.test.ts +0 -100
  233. package/tests/soft404.test.ts +0 -41
  234. package/tests/ssrf_fix.test.ts +0 -69
  235. package/tests/trap.test.ts +0 -39
  236. package/tests/visualization_data.test.ts +0 -46
  237. package/tsconfig.json +0 -11
@@ -1,66 +0,0 @@
1
- import { Metrics } from '../graph/metrics.js';
2
- import type { PageAnalysis } from './analyze.js';
3
-
4
- export interface SiteScore {
5
- seoHealthScore: number;
6
- authorityEntropyOrphanScore: number;
7
- overallScore: number;
8
- }
9
-
10
- export function scorePageSeo(page: PageAnalysis): number {
11
- if (page.meta.crawlStatus === 'blocked_by_robots') {
12
- return 0;
13
- }
14
- const titleMeta = (scoreTextStatus(page.title.status) + scoreTextStatus(page.metaDescription.status)) / 2;
15
- const h1 = page.h1.status === 'ok' ? 100 : page.h1.status === 'warning' ? 60 : 10;
16
- const wordQuality = Math.min(100, (page.content.wordCount / 600) * 100) * 0.7 + Math.min(100, page.content.textHtmlRatio * 500) * 0.3;
17
- const thin = 100 - page.thinScore;
18
- const imageDen = Math.max(1, page.images.totalImages);
19
- const imageAlt = Math.max(0, 100 - ((page.images.missingAlt + page.images.emptyAlt) / imageDen) * 100);
20
- const structured = page.structuredData.present ? (page.structuredData.valid ? 100 : 40) : 30;
21
- const linkBalance = Math.max(0, 100 - Math.abs(page.links.externalRatio - 0.3) * 200);
22
-
23
- const score =
24
- titleMeta * 0.15 +
25
- h1 * 0.1 +
26
- wordQuality * 0.2 +
27
- thin * 0.2 +
28
- imageAlt * 0.1 +
29
- structured * 0.1 +
30
- linkBalance * 0.15;
31
-
32
- return Number(Math.max(0, Math.min(100, score)).toFixed(2));
33
- }
34
-
35
- function scoreTextStatus(status: PageAnalysis['title']['status']): number {
36
- switch (status) {
37
- case 'ok': return 100;
38
- case 'duplicate': return 45;
39
- case 'too_short': return 60;
40
- case 'too_long': return 60;
41
- case 'missing': return 0;
42
- }
43
- }
44
-
45
- export function aggregateSiteScore(metrics: Metrics, pages: PageAnalysis[]): SiteScore {
46
- const seoHealthScore = pages.length === 0 ? 0 : pages.reduce((acc, page) => acc + page.seoScore, 0) / pages.length;
47
-
48
- const avgAuthority = metrics.topAuthorityPages.length === 0
49
- ? 0
50
- : metrics.topAuthorityPages.reduce((acc, item) => acc + item.authority, 0) / metrics.topAuthorityPages.length;
51
- const entropyScore = Math.max(0, 100 - Math.abs(metrics.structuralEntropy - 2) * 25);
52
- const orphanPenalty = metrics.totalPages === 0 ? 0 : (metrics.orphanPages.length / metrics.totalPages) * 100;
53
- const authorityEntropyOrphanScore = Math.max(0, Math.min(100, (avgAuthority * 100 * 0.4) + (entropyScore * 0.35) + ((100 - orphanPenalty) * 0.25)));
54
-
55
- let overallScore = Number((seoHealthScore * 0.7 + authorityEntropyOrphanScore * 0.3).toFixed(2));
56
-
57
- if (pages.some(p => p.meta.crawlStatus === 'blocked_by_robots')) {
58
- overallScore = 0;
59
- }
60
-
61
- return {
62
- seoHealthScore: Number(seoHealthScore.toFixed(2)),
63
- authorityEntropyOrphanScore: Number(authorityEntropyOrphanScore.toFixed(2)),
64
- overallScore
65
- };
66
- }
@@ -1,82 +0,0 @@
1
- import { load } from 'cheerio';
2
-
3
- export type SeoStatus = 'ok' | 'missing' | 'too_short' | 'too_long' | 'duplicate';
4
-
5
- export interface TextFieldAnalysis {
6
- value: string | null;
7
- length: number;
8
- status: SeoStatus;
9
- }
10
-
11
- export interface H1Analysis {
12
- count: number;
13
- status: 'ok' | 'critical' | 'warning';
14
- matchesTitle: boolean;
15
- }
16
-
17
- function normalizedText(value: string | null): string {
18
- return (value ?? '').trim().toLowerCase();
19
- }
20
-
21
- export function analyzeTitle(html: string): TextFieldAnalysis {
22
- const $ = load(html);
23
- const title = $('title').first().text().trim();
24
- if (!title) {
25
- return { value: null, length: 0, status: 'missing' };
26
- }
27
-
28
- if (title.length < 50) return { value: title, length: title.length, status: 'too_short' };
29
- if (title.length > 60) return { value: title, length: title.length, status: 'too_long' };
30
- return { value: title, length: title.length, status: 'ok' };
31
- }
32
-
33
- export function analyzeMetaDescription(html: string): TextFieldAnalysis {
34
- const $ = load(html);
35
- const raw = $('meta[name="description"]').attr('content');
36
- if (raw === undefined) {
37
- return { value: null, length: 0, status: 'missing' };
38
- }
39
-
40
- const description = raw.trim();
41
- if (!description) {
42
- return { value: '', length: 0, status: 'missing' };
43
- }
44
-
45
- if (description.length < 140) return { value: description, length: description.length, status: 'too_short' };
46
- if (description.length > 160) return { value: description, length: description.length, status: 'too_long' };
47
- return { value: description, length: description.length, status: 'ok' };
48
- }
49
-
50
- export function applyDuplicateStatuses<T extends TextFieldAnalysis>(fields: T[]): T[] {
51
- const counts = new Map<string, number>();
52
- for (const field of fields) {
53
- const key = normalizedText(field.value);
54
- if (!key) continue;
55
- counts.set(key, (counts.get(key) || 0) + 1);
56
- }
57
-
58
- return fields.map((field) => {
59
- const key = normalizedText(field.value);
60
- if (!key) return field;
61
- if ((counts.get(key) || 0) > 1) {
62
- return { ...field, status: 'duplicate' };
63
- }
64
- return field;
65
- });
66
- }
67
-
68
- export function analyzeH1(html: string, titleValue: string | null): H1Analysis {
69
- const $ = load(html);
70
- const h1Values = $('h1').toArray().map((el) => $(el).text().trim()).filter(Boolean);
71
- const count = h1Values.length;
72
- const first = h1Values[0] || null;
73
- const matchesTitle = Boolean(first && titleValue && normalizedText(first) === normalizedText(titleValue));
74
-
75
- if (count === 0) {
76
- return { count, status: 'critical', matchesTitle };
77
- }
78
- if (count > 1) {
79
- return { count, status: 'warning', matchesTitle };
80
- }
81
- return { count, status: 'ok', matchesTitle };
82
- }
@@ -1,62 +0,0 @@
1
- import { load } from 'cheerio';
2
-
3
- export interface StructuredDataResult {
4
- present: boolean;
5
- types: string[];
6
- valid: boolean;
7
- }
8
-
9
- export function analyzeStructuredData(html: string): StructuredDataResult {
10
- const $ = load(html);
11
- const scripts = $('script[type="application/ld+json"]').toArray();
12
- if (scripts.length === 0) {
13
- return { present: false, types: [], valid: false };
14
- }
15
-
16
- const types = new Set<string>();
17
- let valid = true;
18
-
19
- for (const script of scripts) {
20
- const raw = $(script).text().trim();
21
- if (!raw) {
22
- valid = false;
23
- continue;
24
- }
25
-
26
- try {
27
- const parsed = JSON.parse(raw);
28
- extractTypes(parsed, types);
29
- } catch {
30
- valid = false;
31
- }
32
- }
33
-
34
- return {
35
- present: true,
36
- valid,
37
- types: Array.from(types)
38
- };
39
- }
40
-
41
- function extractTypes(input: unknown, types: Set<string>): void {
42
- if (Array.isArray(input)) {
43
- input.forEach((item) => extractTypes(item, types));
44
- return;
45
- }
46
-
47
- if (!input || typeof input !== 'object') return;
48
-
49
- const maybeType = (input as Record<string, unknown>)['@type'];
50
- if (typeof maybeType === 'string') {
51
- types.add(maybeType);
52
- } else if (Array.isArray(maybeType)) {
53
- for (const item of maybeType) {
54
- if (typeof item === 'string') types.add(item);
55
- }
56
- }
57
-
58
- const graph = (input as Record<string, unknown>)['@graph'];
59
- if (Array.isArray(graph)) {
60
- graph.forEach((item) => extractTypes(item, types));
61
- }
62
- }
@@ -1,9 +0,0 @@
1
- import fs from 'node:fs';
2
- import path from 'node:path';
3
- import { fileURLToPath } from 'node:url';
4
-
5
- const __filename = fileURLToPath(import.meta.url);
6
- const __dirname = path.dirname(__filename);
7
-
8
- export const ANALYSIS_LIST_TEMPLATE = fs.readFileSync(path.join(__dirname, 'analysis_list.html'), 'utf-8');
9
- export const ANALYSIS_PAGE_TEMPLATE = fs.readFileSync(path.join(__dirname, 'analysis_page.html'), 'utf-8');
package/src/audit/dns.ts DELETED
@@ -1,49 +0,0 @@
1
- import dns from 'node:dns/promises';
2
- import { DnsDiagnostics } from './types.js';
3
-
4
- export async function resolveDns(hostname: string): Promise<DnsDiagnostics> {
5
- const start = performance.now();
6
-
7
- const result: DnsDiagnostics = {
8
- a: [],
9
- aaaa: [],
10
- cname: [],
11
- reverse: [],
12
- ipCount: 0,
13
- ipv6Support: false,
14
- resolutionTime: 0
15
- };
16
-
17
- try {
18
- // We run these in parallel
19
- const [a, aaaa, cname] = await Promise.all([
20
- dns.resolve4(hostname).catch(() => [] as string[]),
21
- dns.resolve6(hostname).catch(() => [] as string[]),
22
- dns.resolveCname(hostname).catch(() => [] as string[])
23
- ]);
24
-
25
- result.a = a;
26
- result.aaaa = aaaa;
27
- result.cname = cname;
28
- result.ipCount = a.length + aaaa.length;
29
- result.ipv6Support = aaaa.length > 0;
30
-
31
- // Try reverse lookup on first IP if available
32
- const ipToReverse = a.length > 0 ? a[0] : (aaaa.length > 0 ? aaaa[0] : null);
33
-
34
- if (ipToReverse) {
35
- try {
36
- result.reverse = await dns.reverse(ipToReverse);
37
- } catch {
38
- // Reverse lookup failed, ignore
39
- }
40
- }
41
-
42
- } catch (_error) {
43
- // DNS resolution failed entirely or other error
44
- // We return empty result but with time measured
45
- }
46
-
47
- result.resolutionTime = performance.now() - start;
48
- return result;
49
- }
@@ -1,98 +0,0 @@
1
- import { SecurityHeadersResult, HeaderStatus } from './types.js';
2
-
3
- export function analyzeHeaders(headers: Record<string, string | string[] | undefined>): SecurityHeadersResult {
4
- const normalized: Record<string, string> = {};
5
- for (const [key, value] of Object.entries(headers)) {
6
- if (typeof value === 'string') {
7
- normalized[key.toLowerCase()] = value;
8
- } else if (Array.isArray(value)) {
9
- normalized[key.toLowerCase()] = value.join(', ');
10
- }
11
- }
12
-
13
- const result: SecurityHeadersResult = {
14
- strictTransportSecurity: checkHSTS(normalized['strict-transport-security']),
15
- contentSecurityPolicy: checkCSP(normalized['content-security-policy']),
16
- xFrameOptions: checkXFrameOptions(normalized['x-frame-options']),
17
- xContentTypeOptions: checkXContentTypeOptions(normalized['x-content-type-options']),
18
- referrerPolicy: checkReferrerPolicy(normalized['referrer-policy']),
19
- permissionsPolicy: checkPermissionsPolicy(normalized['permissions-policy']),
20
- details: normalized,
21
- score: 0
22
- };
23
-
24
- // Calculate internal score (0-100) based on presence and validity
25
- let score = 0;
26
- const weights = {
27
- hsts: 30,
28
- csp: 25,
29
- xframe: 15,
30
- xcontent: 15,
31
- referrer: 10,
32
- permissions: 5
33
- };
34
-
35
- if (result.strictTransportSecurity.valid) score += weights.hsts;
36
- if (result.contentSecurityPolicy.valid) score += weights.csp;
37
- if (result.xFrameOptions.valid) score += weights.xframe;
38
- if (result.xContentTypeOptions.valid) score += weights.xcontent;
39
- if (result.referrerPolicy.valid) score += weights.referrer;
40
- if (result.permissionsPolicy.valid) score += weights.permissions;
41
-
42
- result.score = score;
43
-
44
- return result;
45
- }
46
-
47
- function checkHSTS(value: string | undefined): HeaderStatus {
48
- if (!value) return { present: false, value: null, valid: false, issues: ['Missing HSTS header'] };
49
-
50
- const valid = value.includes('max-age=');
51
- const issues: string[] = [];
52
- if (!valid) issues.push('Missing max-age directive');
53
- if (!value.includes('includeSubDomains')) issues.push('Missing includeSubDomains');
54
-
55
- return { present: true, value, valid, issues };
56
- }
57
-
58
- function checkCSP(value: string | undefined): HeaderStatus {
59
- if (!value) return { present: false, value: null, valid: false, issues: ['Missing CSP header'] };
60
-
61
- // Basic check: non-empty
62
- return { present: true, value, valid: value.length > 0, issues: [] };
63
- }
64
-
65
- function checkXFrameOptions(value: string | undefined): HeaderStatus {
66
- if (!value) return { present: false, value: null, valid: false, issues: ['Missing X-Frame-Options'] };
67
-
68
- const upper = value.toUpperCase();
69
- const valid = upper === 'DENY' || upper === 'SAMEORIGIN';
70
- return {
71
- present: true,
72
- value,
73
- valid,
74
- issues: valid ? [] : [`Invalid value: ${value}`]
75
- };
76
- }
77
-
78
- function checkXContentTypeOptions(value: string | undefined): HeaderStatus {
79
- if (!value) return { present: false, value: null, valid: false, issues: ['Missing X-Content-Type-Options'] };
80
-
81
- const valid = value.toLowerCase() === 'nosniff';
82
- return {
83
- present: true,
84
- value,
85
- valid,
86
- issues: valid ? [] : [`Invalid value: ${value}`]
87
- };
88
- }
89
-
90
- function checkReferrerPolicy(value: string | undefined): HeaderStatus {
91
- if (!value) return { present: false, value: null, valid: false, issues: ['Missing Referrer-Policy'] };
92
- return { present: true, value, valid: true, issues: [] };
93
- }
94
-
95
- function checkPermissionsPolicy(value: string | undefined): HeaderStatus {
96
- if (!value) return { present: false, value: null, valid: false, issues: ['Missing Permissions-Policy'] };
97
- return { present: true, value, valid: true, issues: [] };
98
- }
@@ -1,66 +0,0 @@
1
- import { resolveDns } from './dns.js';
2
- import { analyzeTransport } from './transport.js';
3
- import { analyzeHeaders } from './headers.js';
4
- import { calculateScore } from './scoring.js';
5
- import { AuditResult, AuditOptions } from './types.js';
6
- import { URL } from 'node:url';
7
- import { IPGuard } from '../core/security/ipGuard.js';
8
-
9
- export async function auditUrl(urlStr: string, options: AuditOptions = {}): Promise<AuditResult> {
10
- const timeout = options.timeout || 10000;
11
-
12
- // 1. Basic URL validation
13
- let url: URL;
14
- try {
15
- url = new URL(urlStr);
16
- if (!['http:', 'https:'].includes(url.protocol)) {
17
- throw new Error('Only HTTP and HTTPS protocols are supported');
18
- }
19
- } catch (error: any) {
20
- throw new Error(`Invalid URL: ${error.message}`, { cause: error });
21
- }
22
-
23
- // 2. SSRF Guard
24
- const isSafe = await IPGuard.validateHost(url.hostname);
25
- if (!isSafe) {
26
- throw new Error('Access to internal or private infrastructure is prohibited');
27
- }
28
-
29
- // 3. Parallelize DNS and Transport
30
- // We handle transport errors differently as they are fatal for the audit (e.g. connection refused)
31
- // DNS errors might return partial results but usually if transport works, DNS worked (unless transport used IP)
32
-
33
- const dnsPromise = resolveDns(url.hostname);
34
- const transportPromise = analyzeTransport(urlStr, timeout);
35
-
36
- const [dnsResult, transportResult] = await Promise.all([
37
- dnsPromise,
38
- transportPromise
39
- ]);
40
-
41
- // 3. Analyze Headers
42
- const headersResult = analyzeHeaders(transportResult.transport.headers);
43
-
44
- // 4. Calculate Score
45
- const scoringResult = calculateScore(
46
- transportResult.transport,
47
- dnsResult,
48
- headersResult,
49
- transportResult.performance,
50
- transportResult.issues
51
- );
52
-
53
- // 5. Build Result
54
- const result: AuditResult = {
55
- url: urlStr,
56
- transport: transportResult.transport,
57
- securityHeaders: headersResult,
58
- dns: dnsResult,
59
- performance: transportResult.performance,
60
- score: scoringResult.score,
61
- grade: scoringResult.grade,
62
- issues: scoringResult.issues
63
- };
64
-
65
- return result;
66
- }
@@ -1,232 +0,0 @@
1
- /* eslint-disable no-useless-assignment */
2
- import { TransportDiagnostics, DnsDiagnostics, SecurityHeadersResult, PerformanceMetrics, AuditIssue } from './types.js';
3
-
4
- interface CategoryScores {
5
- transport: number;
6
- security: number;
7
- performance: number;
8
- infrastructure: number;
9
- }
10
-
11
- export function calculateScore(
12
- transport: TransportDiagnostics,
13
- dns: DnsDiagnostics,
14
- headers: SecurityHeadersResult,
15
- performance: PerformanceMetrics,
16
- existingIssues: AuditIssue[]
17
- ): { score: number; grade: 'A' | 'B' | 'C' | 'D' | 'F'; issues: AuditIssue[]; categoryScores: CategoryScores } {
18
-
19
- const issues: AuditIssue[] = [...existingIssues];
20
- let transportScore = 0; // Max 30
21
- let securityScore = 0; // Max 20
22
- let performanceScore = 0; // Max 30
23
- let infrastructureScore = 0; // Max 20
24
-
25
- // 1. Transport Security (30 pts)
26
- // TLS Version
27
- if (transport.tlsVersion) {
28
- const version = parseFloat(transport.tlsVersion.replace('v', '').replace('TLS', '').trim());
29
- if (version >= 1.2) {
30
- transportScore += 15;
31
- } else {
32
- issues.push({
33
- id: 'tls-old',
34
- severity: 'severe',
35
- category: 'tls',
36
- message: `Deprecated TLS version: ${transport.tlsVersion}`,
37
- scorePenalty: 15
38
- });
39
- }
40
- } else if (transport.certificate) {
41
- // HTTPS but no version detected? Unlikely.
42
- } else {
43
- // HTTP only?
44
- issues.push({
45
- id: 'no-https',
46
- severity: 'critical',
47
- category: 'tls',
48
- message: 'Site is not using HTTPS',
49
- scorePenalty: 30
50
- });
51
- }
52
-
53
- // Certificate
54
- if (transport.certificate) {
55
- if (transport.certificate.isValidChain && !transport.certificate.isSelfSigned) {
56
- transportScore += 15;
57
- } else {
58
- // Already caught in transport.ts, but let's ensure score reflects it
59
- // If issues has cert-invalid, we don't add points.
60
- }
61
-
62
- if (transport.certificate.daysUntilExpiry < 30 && transport.certificate.daysUntilExpiry >= 0) {
63
- issues.push({
64
- id: 'cert-expiring-soon',
65
- severity: 'moderate',
66
- category: 'tls',
67
- message: `Certificate expires in ${transport.certificate.daysUntilExpiry} days`,
68
- scorePenalty: 5
69
- });
70
- // Penalty applied to transport score logic implicitly by not reaching max,
71
- // but here we are adding up points.
72
- // Let's deduct from the 15 points we might have given.
73
- transportScore -= 5;
74
- } else if (transport.certificate.daysUntilExpiry < 0) {
75
- issues.push({
76
- id: 'cert-expired',
77
- severity: 'critical',
78
- category: 'tls',
79
- message: `Certificate expired on ${transport.certificate.validTo}`,
80
- scorePenalty: 30
81
- });
82
- transportScore = 0; // Reset transport score
83
- }
84
- }
85
-
86
- // 2. Response Security (Headers) (20 pts)
87
- // headers.score is 0-100. Map to 0-20.
88
- securityScore = (headers.score / 100) * 20;
89
-
90
- // Add issues for missing critical headers
91
- if (!headers.strictTransportSecurity.present) {
92
- issues.push({
93
- id: 'hsts-missing',
94
- severity: 'moderate',
95
- category: 'headers',
96
- message: 'Missing Strict-Transport-Security header',
97
- scorePenalty: 5
98
- });
99
- }
100
- if (!headers.contentSecurityPolicy.present) {
101
- issues.push({
102
- id: 'csp-missing',
103
- severity: 'moderate',
104
- category: 'headers',
105
- message: 'Missing Content-Security-Policy header',
106
- scorePenalty: 5
107
- });
108
- }
109
-
110
- // 3. Performance (30 pts)
111
- // HTTP/2 (5 pts)
112
- if (transport.alpnProtocol === 'h2' || transport.httpVersion === '2.0') {
113
- performanceScore += 5;
114
- } else {
115
- issues.push({
116
- id: 'no-h2',
117
- severity: 'minor',
118
- category: 'performance',
119
- message: 'HTTP/2 not supported',
120
- scorePenalty: 5
121
- });
122
- }
123
-
124
- // Compression (5 pts)
125
- if (transport.compression.length > 0) {
126
- performanceScore += 5;
127
- } else {
128
- issues.push({
129
- id: 'no-compression',
130
- severity: 'moderate',
131
- category: 'performance',
132
- message: 'No compression enabled (gzip/br)',
133
- scorePenalty: 5
134
- });
135
- }
136
-
137
- // TTFB (10 pts)
138
- if (performance.ttfb < 800) {
139
- performanceScore += 10;
140
- } else {
141
- issues.push({
142
- id: 'slow-ttfb',
143
- severity: 'moderate',
144
- category: 'performance',
145
- message: `Slow TTFB: ${performance.ttfb.toFixed(0)}ms`,
146
- scorePenalty: 10
147
- });
148
- }
149
-
150
- // Redirects (5 pts)
151
- if (transport.redirectCount <= 3) {
152
- performanceScore += 5;
153
- } else {
154
- issues.push({
155
- id: 'too-many-redirects',
156
- severity: 'moderate',
157
- category: 'performance',
158
- message: `Too many redirects: ${transport.redirectCount}`,
159
- scorePenalty: 5
160
- });
161
- }
162
-
163
- // HTML Size (5 pts)
164
- if (performance.htmlSize < 1024 * 1024) { // 1MB
165
- performanceScore += 5;
166
- } else {
167
- issues.push({
168
- id: 'large-html',
169
- severity: 'minor',
170
- category: 'performance',
171
- message: `HTML size > 1MB: ${(performance.htmlSize / 1024 / 1024).toFixed(2)}MB`,
172
- scorePenalty: 5
173
- });
174
- }
175
-
176
- // 4. Infrastructure (20 pts)
177
- // IPv6 (10 pts)
178
- if (dns.ipv6Support) {
179
- infrastructureScore += 10;
180
- } else {
181
- issues.push({
182
- id: 'no-ipv6',
183
- severity: 'minor',
184
- category: 'dns',
185
- message: 'No IPv6 DNS records found',
186
- scorePenalty: 5
187
- });
188
- }
189
-
190
- // Redundancy (10 pts)
191
- if (dns.ipCount > 1) {
192
- infrastructureScore += 10;
193
- } else {
194
- issues.push({
195
- id: 'single-ip',
196
- severity: 'minor',
197
- category: 'dns',
198
- message: 'Single IP address detected (no redundancy)',
199
- scorePenalty: 5
200
- });
201
- }
202
-
203
- let totalScore = transportScore + securityScore + performanceScore + infrastructureScore;
204
-
205
- // Critical Overrides
206
- const criticalIssues = issues.filter(i => i.severity === 'critical');
207
- if (criticalIssues.length > 0) {
208
- totalScore = Math.min(totalScore, 39); // Cap at F (<40)
209
- }
210
-
211
- const grade = getGrade(totalScore);
212
-
213
- return {
214
- score: Math.round(totalScore),
215
- grade,
216
- issues,
217
- categoryScores: {
218
- transport: transportScore,
219
- security: securityScore,
220
- performance: performanceScore,
221
- infrastructure: infrastructureScore
222
- }
223
- };
224
- }
225
-
226
- function getGrade(score: number): 'A' | 'B' | 'C' | 'D' | 'F' {
227
- if (score >= 90) return 'A';
228
- if (score >= 75) return 'B';
229
- if (score >= 60) return 'C';
230
- if (score >= 40) return 'D';
231
- return 'F';
232
- }