@crawlith/core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +70 -0
  3. package/dist/analysis/analysis_list.html +35 -0
  4. package/dist/analysis/analysis_page.html +123 -0
  5. package/dist/analysis/analyze.d.ts +40 -5
  6. package/dist/analysis/analyze.js +395 -347
  7. package/dist/analysis/clustering.d.ts +23 -0
  8. package/dist/analysis/clustering.js +206 -0
  9. package/dist/analysis/content.d.ts +1 -1
  10. package/dist/analysis/content.js +11 -5
  11. package/dist/analysis/duplicate.d.ts +34 -0
  12. package/dist/analysis/duplicate.js +305 -0
  13. package/dist/analysis/heading.d.ts +116 -0
  14. package/dist/analysis/heading.js +356 -0
  15. package/dist/analysis/images.d.ts +1 -1
  16. package/dist/analysis/images.js +6 -5
  17. package/dist/analysis/links.d.ts +1 -1
  18. package/dist/analysis/links.js +8 -8
  19. package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
  20. package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
  21. package/dist/analysis/scoring.js +11 -2
  22. package/dist/analysis/seo.d.ts +8 -4
  23. package/dist/analysis/seo.js +41 -30
  24. package/dist/analysis/soft404.d.ts +17 -0
  25. package/dist/analysis/soft404.js +62 -0
  26. package/dist/analysis/structuredData.d.ts +1 -1
  27. package/dist/analysis/structuredData.js +5 -4
  28. package/dist/analysis/templates.d.ts +2 -0
  29. package/dist/analysis/templates.js +7 -0
  30. package/dist/application/index.d.ts +2 -0
  31. package/dist/application/index.js +2 -0
  32. package/dist/application/usecase.d.ts +3 -0
  33. package/dist/application/usecase.js +1 -0
  34. package/dist/application/usecases.d.ts +114 -0
  35. package/dist/application/usecases.js +201 -0
  36. package/dist/audit/index.js +1 -1
  37. package/dist/audit/transport.d.ts +1 -1
  38. package/dist/audit/transport.js +5 -4
  39. package/dist/audit/types.d.ts +1 -0
  40. package/dist/constants.d.ts +17 -0
  41. package/dist/constants.js +23 -0
  42. package/dist/core/scope/scopeManager.js +3 -0
  43. package/dist/core/security/ipGuard.d.ts +11 -0
  44. package/dist/core/security/ipGuard.js +71 -3
  45. package/dist/crawler/crawl.d.ts +4 -22
  46. package/dist/crawler/crawl.js +4 -335
  47. package/dist/crawler/crawler.d.ts +87 -0
  48. package/dist/crawler/crawler.js +683 -0
  49. package/dist/crawler/extract.d.ts +4 -1
  50. package/dist/crawler/extract.js +7 -2
  51. package/dist/crawler/fetcher.d.ts +2 -1
  52. package/dist/crawler/fetcher.js +26 -11
  53. package/dist/crawler/metricsRunner.d.ts +23 -1
  54. package/dist/crawler/metricsRunner.js +202 -72
  55. package/dist/crawler/normalize.d.ts +41 -0
  56. package/dist/crawler/normalize.js +119 -3
  57. package/dist/crawler/parser.d.ts +1 -3
  58. package/dist/crawler/parser.js +2 -49
  59. package/dist/crawler/resolver.d.ts +11 -0
  60. package/dist/crawler/resolver.js +67 -0
  61. package/dist/crawler/sitemap.d.ts +6 -0
  62. package/dist/crawler/sitemap.js +27 -17
  63. package/dist/crawler/trap.d.ts +5 -1
  64. package/dist/crawler/trap.js +23 -2
  65. package/dist/db/CrawlithDB.d.ts +110 -0
  66. package/dist/db/CrawlithDB.js +500 -0
  67. package/dist/db/graphLoader.js +42 -30
  68. package/dist/db/index.d.ts +11 -0
  69. package/dist/db/index.js +41 -29
  70. package/dist/db/migrations.d.ts +2 -0
  71. package/dist/db/{schema.js → migrations.js} +90 -43
  72. package/dist/db/pluginRegistry.d.ts +9 -0
  73. package/dist/db/pluginRegistry.js +19 -0
  74. package/dist/db/repositories/EdgeRepository.d.ts +13 -0
  75. package/dist/db/repositories/EdgeRepository.js +20 -0
  76. package/dist/db/repositories/MetricsRepository.d.ts +16 -8
  77. package/dist/db/repositories/MetricsRepository.js +28 -7
  78. package/dist/db/repositories/PageRepository.d.ts +15 -2
  79. package/dist/db/repositories/PageRepository.js +169 -25
  80. package/dist/db/repositories/SiteRepository.d.ts +9 -0
  81. package/dist/db/repositories/SiteRepository.js +13 -0
  82. package/dist/db/repositories/SnapshotRepository.d.ts +14 -5
  83. package/dist/db/repositories/SnapshotRepository.js +64 -5
  84. package/dist/db/reset.d.ts +9 -0
  85. package/dist/db/reset.js +32 -0
  86. package/dist/db/statements.d.ts +12 -0
  87. package/dist/db/statements.js +40 -0
  88. package/dist/diff/compare.d.ts +0 -5
  89. package/dist/diff/compare.js +0 -12
  90. package/dist/diff/service.d.ts +16 -0
  91. package/dist/diff/service.js +41 -0
  92. package/dist/domain/index.d.ts +4 -0
  93. package/dist/domain/index.js +4 -0
  94. package/dist/events.d.ts +56 -0
  95. package/dist/events.js +1 -0
  96. package/dist/graph/graph.d.ts +36 -42
  97. package/dist/graph/graph.js +26 -17
  98. package/dist/graph/hits.d.ts +23 -0
  99. package/dist/graph/hits.js +111 -0
  100. package/dist/graph/metrics.d.ts +0 -4
  101. package/dist/graph/metrics.js +25 -9
  102. package/dist/graph/pagerank.d.ts +17 -4
  103. package/dist/graph/pagerank.js +126 -91
  104. package/dist/graph/simhash.d.ts +6 -0
  105. package/dist/graph/simhash.js +14 -0
  106. package/dist/index.d.ts +29 -8
  107. package/dist/index.js +29 -8
  108. package/dist/lock/hashKey.js +1 -1
  109. package/dist/lock/lockManager.d.ts +5 -1
  110. package/dist/lock/lockManager.js +38 -13
  111. package/dist/plugin-system/plugin-cli.d.ts +10 -0
  112. package/dist/plugin-system/plugin-cli.js +31 -0
  113. package/dist/plugin-system/plugin-config.d.ts +16 -0
  114. package/dist/plugin-system/plugin-config.js +36 -0
  115. package/dist/plugin-system/plugin-loader.d.ts +17 -0
  116. package/dist/plugin-system/plugin-loader.js +122 -0
  117. package/dist/plugin-system/plugin-registry.d.ts +25 -0
  118. package/dist/plugin-system/plugin-registry.js +167 -0
  119. package/dist/plugin-system/plugin-types.d.ts +205 -0
  120. package/dist/plugin-system/plugin-types.js +1 -0
  121. package/dist/ports/index.d.ts +9 -0
  122. package/dist/ports/index.js +1 -0
  123. package/{src/report/sitegraph_template.ts → dist/report/crawl.html} +330 -81
  124. package/dist/report/crawlExport.d.ts +3 -0
  125. package/dist/report/{sitegraphExport.js → crawlExport.js} +3 -3
  126. package/dist/report/crawl_template.d.ts +1 -0
  127. package/dist/report/crawl_template.js +7 -0
  128. package/dist/report/export.d.ts +3 -0
  129. package/dist/report/export.js +81 -0
  130. package/dist/report/html.js +15 -216
  131. package/dist/report/insight.d.ts +27 -0
  132. package/dist/report/insight.js +103 -0
  133. package/dist/scoring/health.d.ts +56 -0
  134. package/dist/scoring/health.js +213 -0
  135. package/dist/utils/chalk.d.ts +6 -0
  136. package/dist/utils/chalk.js +41 -0
  137. package/dist/utils/secureConfig.d.ts +23 -0
  138. package/dist/utils/secureConfig.js +128 -0
  139. package/package.json +12 -6
  140. package/CHANGELOG.md +0 -7
  141. package/dist/db/schema.d.ts +0 -2
  142. package/dist/graph/cluster.d.ts +0 -6
  143. package/dist/graph/cluster.js +0 -173
  144. package/dist/graph/duplicate.d.ts +0 -10
  145. package/dist/graph/duplicate.js +0 -251
  146. package/dist/report/sitegraphExport.d.ts +0 -3
  147. package/dist/report/sitegraph_template.d.ts +0 -1
  148. package/dist/report/sitegraph_template.js +0 -630
  149. package/dist/scoring/hits.d.ts +0 -9
  150. package/dist/scoring/hits.js +0 -111
  151. package/src/analysis/analyze.ts +0 -548
  152. package/src/analysis/content.ts +0 -62
  153. package/src/analysis/images.ts +0 -28
  154. package/src/analysis/links.ts +0 -41
  155. package/src/analysis/scoring.ts +0 -59
  156. package/src/analysis/seo.ts +0 -82
  157. package/src/analysis/structuredData.ts +0 -62
  158. package/src/audit/dns.ts +0 -49
  159. package/src/audit/headers.ts +0 -98
  160. package/src/audit/index.ts +0 -66
  161. package/src/audit/scoring.ts +0 -232
  162. package/src/audit/transport.ts +0 -258
  163. package/src/audit/types.ts +0 -102
  164. package/src/core/network/proxyAdapter.ts +0 -21
  165. package/src/core/network/rateLimiter.ts +0 -39
  166. package/src/core/network/redirectController.ts +0 -47
  167. package/src/core/network/responseLimiter.ts +0 -34
  168. package/src/core/network/retryPolicy.ts +0 -57
  169. package/src/core/scope/domainFilter.ts +0 -45
  170. package/src/core/scope/scopeManager.ts +0 -52
  171. package/src/core/scope/subdomainPolicy.ts +0 -39
  172. package/src/core/security/ipGuard.ts +0 -92
  173. package/src/crawler/crawl.ts +0 -382
  174. package/src/crawler/extract.ts +0 -34
  175. package/src/crawler/fetcher.ts +0 -233
  176. package/src/crawler/metricsRunner.ts +0 -124
  177. package/src/crawler/normalize.ts +0 -108
  178. package/src/crawler/parser.ts +0 -190
  179. package/src/crawler/sitemap.ts +0 -73
  180. package/src/crawler/trap.ts +0 -96
  181. package/src/db/graphLoader.ts +0 -105
  182. package/src/db/index.ts +0 -70
  183. package/src/db/repositories/EdgeRepository.ts +0 -29
  184. package/src/db/repositories/MetricsRepository.ts +0 -49
  185. package/src/db/repositories/PageRepository.ts +0 -128
  186. package/src/db/repositories/SiteRepository.ts +0 -32
  187. package/src/db/repositories/SnapshotRepository.ts +0 -74
  188. package/src/db/schema.ts +0 -177
  189. package/src/diff/compare.ts +0 -84
  190. package/src/graph/cluster.ts +0 -192
  191. package/src/graph/duplicate.ts +0 -286
  192. package/src/graph/graph.ts +0 -172
  193. package/src/graph/metrics.ts +0 -110
  194. package/src/graph/pagerank.ts +0 -125
  195. package/src/graph/simhash.ts +0 -61
  196. package/src/index.ts +0 -30
  197. package/src/lock/hashKey.ts +0 -51
  198. package/src/lock/lockManager.ts +0 -124
  199. package/src/lock/pidCheck.ts +0 -13
  200. package/src/report/html.ts +0 -227
  201. package/src/report/sitegraphExport.ts +0 -58
  202. package/src/scoring/hits.ts +0 -131
  203. package/src/scoring/orphanSeverity.ts +0 -176
  204. package/src/utils/version.ts +0 -18
  205. package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
  206. package/tests/analysis.unit.test.ts +0 -98
  207. package/tests/analyze.integration.test.ts +0 -98
  208. package/tests/audit/dns.test.ts +0 -31
  209. package/tests/audit/headers.test.ts +0 -45
  210. package/tests/audit/scoring.test.ts +0 -133
  211. package/tests/audit/security.test.ts +0 -12
  212. package/tests/audit/transport.test.ts +0 -112
  213. package/tests/clustering.test.ts +0 -118
  214. package/tests/crawler.test.ts +0 -358
  215. package/tests/db.test.ts +0 -159
  216. package/tests/diff.test.ts +0 -67
  217. package/tests/duplicate.test.ts +0 -110
  218. package/tests/fetcher.test.ts +0 -106
  219. package/tests/fetcher_safety.test.ts +0 -85
  220. package/tests/fixtures/analyze-crawl.json +0 -26
  221. package/tests/hits.test.ts +0 -134
  222. package/tests/html_report.test.ts +0 -58
  223. package/tests/lock/lockManager.test.ts +0 -138
  224. package/tests/metrics.test.ts +0 -196
  225. package/tests/normalize.test.ts +0 -101
  226. package/tests/orphanSeverity.test.ts +0 -160
  227. package/tests/pagerank.test.ts +0 -98
  228. package/tests/parser.test.ts +0 -117
  229. package/tests/proxy_safety.test.ts +0 -57
  230. package/tests/redirect_safety.test.ts +0 -73
  231. package/tests/safety.test.ts +0 -114
  232. package/tests/scope.test.ts +0 -66
  233. package/tests/scoring.test.ts +0 -59
  234. package/tests/sitemap.test.ts +0 -88
  235. package/tests/soft404.test.ts +0 -41
  236. package/tests/trap.test.ts +0 -39
  237. package/tests/visualization_data.test.ts +0 -46
  238. package/tsconfig.json +0 -11
@@ -1,62 +0,0 @@
1
- import { load } from 'cheerio';
2
-
3
- export interface ContentAnalysis {
4
- wordCount: number;
5
- textHtmlRatio: number;
6
- uniqueSentenceCount: number;
7
- }
8
-
9
- export interface ThinScoreWeights {
10
- lowWordWeight: number;
11
- ratioWeight: number;
12
- dupWeight: number;
13
- }
14
-
15
- const DEFAULT_WEIGHTS: ThinScoreWeights = {
16
- lowWordWeight: 0.4,
17
- ratioWeight: 0.35,
18
- dupWeight: 0.25
19
- };
20
-
21
- export function analyzeContent(html: string): ContentAnalysis {
22
- const $ = load(html || '<html></html>');
23
- $('script,style,nav,footer').remove();
24
-
25
- const text = $('body').length ? $('body').text() : $.text();
26
- const cleanText = text.replace(/\s+/g, ' ').trim();
27
-
28
- const words = cleanText ? cleanText.split(/\s+/).filter(Boolean) : [];
29
- const wordCount = words.length;
30
-
31
- const htmlLength = Math.max(html.length, 1);
32
- const textHtmlRatio = cleanText.length / htmlLength;
33
-
34
- const sentenceSet = new Set(
35
- cleanText
36
- .split(/[.!?]+/)
37
- .map((item) => item.trim().toLowerCase())
38
- .filter(Boolean)
39
- );
40
-
41
- return {
42
- wordCount,
43
- textHtmlRatio,
44
- uniqueSentenceCount: sentenceSet.size
45
- };
46
- }
47
-
48
- export function calculateThinContentScore(
49
- content: ContentAnalysis,
50
- duplicationScore: number,
51
- weights: ThinScoreWeights = DEFAULT_WEIGHTS
52
- ): number {
53
- const wordScore = content.wordCount >= 300 ? 0 : 100 - Math.min(100, (content.wordCount / 300) * 100);
54
- const textRatioScore = content.textHtmlRatio >= 0.2 ? 0 : 100 - Math.min(100, (content.textHtmlRatio / 0.2) * 100);
55
-
56
- const raw =
57
- weights.lowWordWeight * wordScore +
58
- weights.ratioWeight * textRatioScore +
59
- weights.dupWeight * duplicationScore;
60
-
61
- return Math.max(0, Math.min(100, Number(raw.toFixed(2))));
62
- }
@@ -1,28 +0,0 @@
1
- import { load } from 'cheerio';
2
-
3
- export interface ImageAltAnalysis {
4
- totalImages: number;
5
- missingAlt: number;
6
- emptyAlt: number;
7
- }
8
-
9
- export function analyzeImageAlts(html: string): ImageAltAnalysis {
10
- const $ = load(html);
11
- let missingAlt = 0;
12
- let emptyAlt = 0;
13
-
14
- $('img').each((_idx, el) => {
15
- const alt = $(el).attr('alt');
16
- if (alt === undefined) {
17
- missingAlt += 1;
18
- return;
19
- }
20
-
21
- if (!alt.trim()) {
22
- emptyAlt += 1;
23
- }
24
- });
25
-
26
- const totalImages = $('img').length;
27
- return { totalImages, missingAlt, emptyAlt };
28
- }
@@ -1,41 +0,0 @@
1
- import { load } from 'cheerio';
2
- import { normalizeUrl } from '../crawler/normalize.js';
3
-
4
- export interface LinkRatioAnalysis {
5
- internalLinks: number;
6
- externalLinks: number;
7
- nofollowCount: number;
8
- externalRatio: number;
9
- }
10
-
11
- export function analyzeLinks(html: string, pageUrl: string, rootUrl: string): LinkRatioAnalysis {
12
- const $ = load(html);
13
- const rootOrigin = new URL(rootUrl).origin;
14
-
15
- let internalLinks = 0;
16
- let externalLinks = 0;
17
- let nofollowCount = 0;
18
-
19
- $('a[href]').each((_idx, el) => {
20
- const href = $(el).attr('href');
21
- if (!href) return;
22
- const normalized = normalizeUrl(href, pageUrl, { stripQuery: false });
23
- if (!normalized) return;
24
-
25
- const rel = ($(el).attr('rel') || '').toLowerCase();
26
- if (rel.includes('nofollow')) {
27
- nofollowCount += 1;
28
- }
29
-
30
- if (new URL(normalized).origin === rootOrigin) {
31
- internalLinks += 1;
32
- } else {
33
- externalLinks += 1;
34
- }
35
- });
36
-
37
- const total = internalLinks + externalLinks;
38
- const externalRatio = total === 0 ? 0 : externalLinks / total;
39
-
40
- return { internalLinks, externalLinks, nofollowCount, externalRatio };
41
- }
@@ -1,59 +0,0 @@
1
- import { Metrics } from '../graph/metrics.js';
2
- import type { PageAnalysis } from './analyze.js';
3
-
4
- export interface SiteScore {
5
- seoHealthScore: number;
6
- authorityEntropyOrphanScore: number;
7
- overallScore: number;
8
- }
9
-
10
- export function scorePageSeo(page: PageAnalysis): number {
11
- const titleMeta = (scoreTextStatus(page.title.status) + scoreTextStatus(page.metaDescription.status)) / 2;
12
- const h1 = page.h1.status === 'ok' ? 100 : page.h1.status === 'warning' ? 60 : 10;
13
- const wordQuality = Math.min(100, (page.content.wordCount / 600) * 100) * 0.7 + Math.min(100, page.content.textHtmlRatio * 500) * 0.3;
14
- const thin = 100 - page.thinScore;
15
- const imageDen = Math.max(1, page.images.totalImages);
16
- const imageAlt = Math.max(0, 100 - ((page.images.missingAlt + page.images.emptyAlt) / imageDen) * 100);
17
- const structured = page.structuredData.present ? (page.structuredData.valid ? 100 : 40) : 30;
18
- const linkBalance = Math.max(0, 100 - Math.abs(page.links.externalRatio - 0.3) * 200);
19
-
20
- const score =
21
- titleMeta * 0.15 +
22
- h1 * 0.1 +
23
- wordQuality * 0.2 +
24
- thin * 0.2 +
25
- imageAlt * 0.1 +
26
- structured * 0.1 +
27
- linkBalance * 0.15;
28
-
29
- return Number(Math.max(0, Math.min(100, score)).toFixed(2));
30
- }
31
-
32
- function scoreTextStatus(status: PageAnalysis['title']['status']): number {
33
- switch (status) {
34
- case 'ok': return 100;
35
- case 'duplicate': return 45;
36
- case 'too_short': return 60;
37
- case 'too_long': return 60;
38
- case 'missing': return 0;
39
- }
40
- }
41
-
42
- export function aggregateSiteScore(metrics: Metrics, pages: PageAnalysis[]): SiteScore {
43
- const seoHealthScore = pages.length === 0 ? 0 : pages.reduce((acc, page) => acc + page.seoScore, 0) / pages.length;
44
-
45
- const avgAuthority = metrics.topAuthorityPages.length === 0
46
- ? 0
47
- : metrics.topAuthorityPages.reduce((acc, item) => acc + item.authority, 0) / metrics.topAuthorityPages.length;
48
- const entropyScore = Math.max(0, 100 - Math.abs(metrics.structuralEntropy - 2) * 25);
49
- const orphanPenalty = metrics.totalPages === 0 ? 0 : (metrics.orphanPages.length / metrics.totalPages) * 100;
50
- const authorityEntropyOrphanScore = Math.max(0, Math.min(100, (avgAuthority * 100 * 0.4) + (entropyScore * 0.35) + ((100 - orphanPenalty) * 0.25)));
51
-
52
- const overallScore = Number((seoHealthScore * 0.7 + authorityEntropyOrphanScore * 0.3).toFixed(2));
53
-
54
- return {
55
- seoHealthScore: Number(seoHealthScore.toFixed(2)),
56
- authorityEntropyOrphanScore: Number(authorityEntropyOrphanScore.toFixed(2)),
57
- overallScore
58
- };
59
- }
@@ -1,82 +0,0 @@
1
- import { load } from 'cheerio';
2
-
3
- export type SeoStatus = 'ok' | 'missing' | 'too_short' | 'too_long' | 'duplicate';
4
-
5
- export interface TextFieldAnalysis {
6
- value: string | null;
7
- length: number;
8
- status: SeoStatus;
9
- }
10
-
11
- export interface H1Analysis {
12
- count: number;
13
- status: 'ok' | 'critical' | 'warning';
14
- matchesTitle: boolean;
15
- }
16
-
17
- function normalizedText(value: string | null): string {
18
- return (value ?? '').trim().toLowerCase();
19
- }
20
-
21
- export function analyzeTitle(html: string): TextFieldAnalysis {
22
- const $ = load(html);
23
- const title = $('title').first().text().trim();
24
- if (!title) {
25
- return { value: null, length: 0, status: 'missing' };
26
- }
27
-
28
- if (title.length < 50) return { value: title, length: title.length, status: 'too_short' };
29
- if (title.length > 60) return { value: title, length: title.length, status: 'too_long' };
30
- return { value: title, length: title.length, status: 'ok' };
31
- }
32
-
33
- export function analyzeMetaDescription(html: string): TextFieldAnalysis {
34
- const $ = load(html);
35
- const raw = $('meta[name="description"]').attr('content');
36
- if (raw === undefined) {
37
- return { value: null, length: 0, status: 'missing' };
38
- }
39
-
40
- const description = raw.trim();
41
- if (!description) {
42
- return { value: '', length: 0, status: 'missing' };
43
- }
44
-
45
- if (description.length < 140) return { value: description, length: description.length, status: 'too_short' };
46
- if (description.length > 160) return { value: description, length: description.length, status: 'too_long' };
47
- return { value: description, length: description.length, status: 'ok' };
48
- }
49
-
50
- export function applyDuplicateStatuses<T extends TextFieldAnalysis>(fields: T[]): T[] {
51
- const counts = new Map<string, number>();
52
- for (const field of fields) {
53
- const key = normalizedText(field.value);
54
- if (!key) continue;
55
- counts.set(key, (counts.get(key) || 0) + 1);
56
- }
57
-
58
- return fields.map((field) => {
59
- const key = normalizedText(field.value);
60
- if (!key) return field;
61
- if ((counts.get(key) || 0) > 1) {
62
- return { ...field, status: 'duplicate' };
63
- }
64
- return field;
65
- });
66
- }
67
-
68
- export function analyzeH1(html: string, titleValue: string | null): H1Analysis {
69
- const $ = load(html);
70
- const h1Values = $('h1').toArray().map((el) => $(el).text().trim()).filter(Boolean);
71
- const count = h1Values.length;
72
- const first = h1Values[0] || null;
73
- const matchesTitle = Boolean(first && titleValue && normalizedText(first) === normalizedText(titleValue));
74
-
75
- if (count === 0) {
76
- return { count, status: 'critical', matchesTitle };
77
- }
78
- if (count > 1) {
79
- return { count, status: 'warning', matchesTitle };
80
- }
81
- return { count, status: 'ok', matchesTitle };
82
- }
@@ -1,62 +0,0 @@
1
- import { load } from 'cheerio';
2
-
3
- export interface StructuredDataResult {
4
- present: boolean;
5
- types: string[];
6
- valid: boolean;
7
- }
8
-
9
- export function analyzeStructuredData(html: string): StructuredDataResult {
10
- const $ = load(html);
11
- const scripts = $('script[type="application/ld+json"]').toArray();
12
- if (scripts.length === 0) {
13
- return { present: false, types: [], valid: false };
14
- }
15
-
16
- const types = new Set<string>();
17
- let valid = true;
18
-
19
- for (const script of scripts) {
20
- const raw = $(script).text().trim();
21
- if (!raw) {
22
- valid = false;
23
- continue;
24
- }
25
-
26
- try {
27
- const parsed = JSON.parse(raw);
28
- extractTypes(parsed, types);
29
- } catch {
30
- valid = false;
31
- }
32
- }
33
-
34
- return {
35
- present: true,
36
- valid,
37
- types: Array.from(types)
38
- };
39
- }
40
-
41
- function extractTypes(input: unknown, types: Set<string>): void {
42
- if (Array.isArray(input)) {
43
- input.forEach((item) => extractTypes(item, types));
44
- return;
45
- }
46
-
47
- if (!input || typeof input !== 'object') return;
48
-
49
- const maybeType = (input as Record<string, unknown>)['@type'];
50
- if (typeof maybeType === 'string') {
51
- types.add(maybeType);
52
- } else if (Array.isArray(maybeType)) {
53
- for (const item of maybeType) {
54
- if (typeof item === 'string') types.add(item);
55
- }
56
- }
57
-
58
- const graph = (input as Record<string, unknown>)['@graph'];
59
- if (Array.isArray(graph)) {
60
- graph.forEach((item) => extractTypes(item, types));
61
- }
62
- }
package/src/audit/dns.ts DELETED
@@ -1,49 +0,0 @@
1
- import dns from 'node:dns/promises';
2
- import { DnsDiagnostics } from './types.js';
3
-
4
- export async function resolveDns(hostname: string): Promise<DnsDiagnostics> {
5
- const start = performance.now();
6
-
7
- const result: DnsDiagnostics = {
8
- a: [],
9
- aaaa: [],
10
- cname: [],
11
- reverse: [],
12
- ipCount: 0,
13
- ipv6Support: false,
14
- resolutionTime: 0
15
- };
16
-
17
- try {
18
- // We run these in parallel
19
- const [a, aaaa, cname] = await Promise.all([
20
- dns.resolve4(hostname).catch(() => [] as string[]),
21
- dns.resolve6(hostname).catch(() => [] as string[]),
22
- dns.resolveCname(hostname).catch(() => [] as string[])
23
- ]);
24
-
25
- result.a = a;
26
- result.aaaa = aaaa;
27
- result.cname = cname;
28
- result.ipCount = a.length + aaaa.length;
29
- result.ipv6Support = aaaa.length > 0;
30
-
31
- // Try reverse lookup on first IP if available
32
- const ipToReverse = a.length > 0 ? a[0] : (aaaa.length > 0 ? aaaa[0] : null);
33
-
34
- if (ipToReverse) {
35
- try {
36
- result.reverse = await dns.reverse(ipToReverse);
37
- } catch {
38
- // Reverse lookup failed, ignore
39
- }
40
- }
41
-
42
- } catch (_error) {
43
- // DNS resolution failed entirely or other error
44
- // We return empty result but with time measured
45
- }
46
-
47
- result.resolutionTime = performance.now() - start;
48
- return result;
49
- }
@@ -1,98 +0,0 @@
1
- import { SecurityHeadersResult, HeaderStatus } from './types.js';
2
-
3
- export function analyzeHeaders(headers: Record<string, string | string[] | undefined>): SecurityHeadersResult {
4
- const normalized: Record<string, string> = {};
5
- for (const [key, value] of Object.entries(headers)) {
6
- if (typeof value === 'string') {
7
- normalized[key.toLowerCase()] = value;
8
- } else if (Array.isArray(value)) {
9
- normalized[key.toLowerCase()] = value.join(', ');
10
- }
11
- }
12
-
13
- const result: SecurityHeadersResult = {
14
- strictTransportSecurity: checkHSTS(normalized['strict-transport-security']),
15
- contentSecurityPolicy: checkCSP(normalized['content-security-policy']),
16
- xFrameOptions: checkXFrameOptions(normalized['x-frame-options']),
17
- xContentTypeOptions: checkXContentTypeOptions(normalized['x-content-type-options']),
18
- referrerPolicy: checkReferrerPolicy(normalized['referrer-policy']),
19
- permissionsPolicy: checkPermissionsPolicy(normalized['permissions-policy']),
20
- details: normalized,
21
- score: 0
22
- };
23
-
24
- // Calculate internal score (0-100) based on presence and validity
25
- let score = 0;
26
- const weights = {
27
- hsts: 30,
28
- csp: 25,
29
- xframe: 15,
30
- xcontent: 15,
31
- referrer: 10,
32
- permissions: 5
33
- };
34
-
35
- if (result.strictTransportSecurity.valid) score += weights.hsts;
36
- if (result.contentSecurityPolicy.valid) score += weights.csp;
37
- if (result.xFrameOptions.valid) score += weights.xframe;
38
- if (result.xContentTypeOptions.valid) score += weights.xcontent;
39
- if (result.referrerPolicy.valid) score += weights.referrer;
40
- if (result.permissionsPolicy.valid) score += weights.permissions;
41
-
42
- result.score = score;
43
-
44
- return result;
45
- }
46
-
47
- function checkHSTS(value: string | undefined): HeaderStatus {
48
- if (!value) return { present: false, value: null, valid: false, issues: ['Missing HSTS header'] };
49
-
50
- const valid = value.includes('max-age=');
51
- const issues: string[] = [];
52
- if (!valid) issues.push('Missing max-age directive');
53
- if (!value.includes('includeSubDomains')) issues.push('Missing includeSubDomains');
54
-
55
- return { present: true, value, valid, issues };
56
- }
57
-
58
- function checkCSP(value: string | undefined): HeaderStatus {
59
- if (!value) return { present: false, value: null, valid: false, issues: ['Missing CSP header'] };
60
-
61
- // Basic check: non-empty
62
- return { present: true, value, valid: value.length > 0, issues: [] };
63
- }
64
-
65
- function checkXFrameOptions(value: string | undefined): HeaderStatus {
66
- if (!value) return { present: false, value: null, valid: false, issues: ['Missing X-Frame-Options'] };
67
-
68
- const upper = value.toUpperCase();
69
- const valid = upper === 'DENY' || upper === 'SAMEORIGIN';
70
- return {
71
- present: true,
72
- value,
73
- valid,
74
- issues: valid ? [] : [`Invalid value: ${value}`]
75
- };
76
- }
77
-
78
- function checkXContentTypeOptions(value: string | undefined): HeaderStatus {
79
- if (!value) return { present: false, value: null, valid: false, issues: ['Missing X-Content-Type-Options'] };
80
-
81
- const valid = value.toLowerCase() === 'nosniff';
82
- return {
83
- present: true,
84
- value,
85
- valid,
86
- issues: valid ? [] : [`Invalid value: ${value}`]
87
- };
88
- }
89
-
90
- function checkReferrerPolicy(value: string | undefined): HeaderStatus {
91
- if (!value) return { present: false, value: null, valid: false, issues: ['Missing Referrer-Policy'] };
92
- return { present: true, value, valid: true, issues: [] };
93
- }
94
-
95
- function checkPermissionsPolicy(value: string | undefined): HeaderStatus {
96
- if (!value) return { present: false, value: null, valid: false, issues: ['Missing Permissions-Policy'] };
97
- return { present: true, value, valid: true, issues: [] };
98
- }
@@ -1,66 +0,0 @@
1
- import { resolveDns } from './dns.js';
2
- import { analyzeTransport } from './transport.js';
3
- import { analyzeHeaders } from './headers.js';
4
- import { calculateScore } from './scoring.js';
5
- import { AuditResult, AuditOptions } from './types.js';
6
- import { URL } from 'node:url';
7
- import { IPGuard } from '../core/security/ipGuard.js';
8
-
9
- export async function auditUrl(urlStr: string, options: AuditOptions = {}): Promise<AuditResult> {
10
- const timeout = options.timeout || 10000;
11
-
12
- // 1. Basic URL validation
13
- let url: URL;
14
- try {
15
- url = new URL(urlStr);
16
- if (!['http:', 'https:'].includes(url.protocol)) {
17
- throw new Error('Only HTTP and HTTPS protocols are supported');
18
- }
19
- } catch (error: any) {
20
- throw new Error(`Invalid URL: ${error.message}`, { cause: error });
21
- }
22
-
23
- // 2. SSRF Guard
24
- const isSafe = await IPGuard.validateHost(url.hostname);
25
- if (!isSafe) {
26
- throw new Error('Access to internal or private infrastructure is prohibited');
27
- }
28
-
29
- // 3. Parallelize DNS and Transport
30
- // We handle transport errors differently as they are fatal for the audit (e.g. connection refused)
31
- // DNS errors might return partial results but usually if transport works, DNS worked (unless transport used IP)
32
-
33
- const dnsPromise = resolveDns(url.hostname);
34
- const transportPromise = analyzeTransport(urlStr, timeout);
35
-
36
- const [dnsResult, transportResult] = await Promise.all([
37
- dnsPromise,
38
- transportPromise
39
- ]);
40
-
41
- // 3. Analyze Headers
42
- const headersResult = analyzeHeaders(transportResult.transport.headers);
43
-
44
- // 4. Calculate Score
45
- const scoringResult = calculateScore(
46
- transportResult.transport,
47
- dnsResult,
48
- headersResult,
49
- transportResult.performance,
50
- transportResult.issues
51
- );
52
-
53
- // 5. Build Result
54
- const result: AuditResult = {
55
- url: urlStr,
56
- transport: transportResult.transport,
57
- securityHeaders: headersResult,
58
- dns: dnsResult,
59
- performance: transportResult.performance,
60
- score: scoringResult.score,
61
- grade: scoringResult.grade,
62
- issues: scoringResult.issues
63
- };
64
-
65
- return result;
66
- }