@crawlith/core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +70 -0
  3. package/dist/analysis/analysis_list.html +35 -0
  4. package/dist/analysis/analysis_page.html +123 -0
  5. package/dist/analysis/analyze.d.ts +40 -5
  6. package/dist/analysis/analyze.js +395 -347
  7. package/dist/analysis/clustering.d.ts +23 -0
  8. package/dist/analysis/clustering.js +206 -0
  9. package/dist/analysis/content.d.ts +1 -1
  10. package/dist/analysis/content.js +11 -5
  11. package/dist/analysis/duplicate.d.ts +34 -0
  12. package/dist/analysis/duplicate.js +305 -0
  13. package/dist/analysis/heading.d.ts +116 -0
  14. package/dist/analysis/heading.js +356 -0
  15. package/dist/analysis/images.d.ts +1 -1
  16. package/dist/analysis/images.js +6 -5
  17. package/dist/analysis/links.d.ts +1 -1
  18. package/dist/analysis/links.js +8 -8
  19. package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
  20. package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
  21. package/dist/analysis/scoring.js +11 -2
  22. package/dist/analysis/seo.d.ts +8 -4
  23. package/dist/analysis/seo.js +41 -30
  24. package/dist/analysis/soft404.d.ts +17 -0
  25. package/dist/analysis/soft404.js +62 -0
  26. package/dist/analysis/structuredData.d.ts +1 -1
  27. package/dist/analysis/structuredData.js +5 -4
  28. package/dist/analysis/templates.d.ts +2 -0
  29. package/dist/analysis/templates.js +7 -0
  30. package/dist/application/index.d.ts +2 -0
  31. package/dist/application/index.js +2 -0
  32. package/dist/application/usecase.d.ts +3 -0
  33. package/dist/application/usecase.js +1 -0
  34. package/dist/application/usecases.d.ts +114 -0
  35. package/dist/application/usecases.js +201 -0
  36. package/dist/audit/index.js +1 -1
  37. package/dist/audit/transport.d.ts +1 -1
  38. package/dist/audit/transport.js +5 -4
  39. package/dist/audit/types.d.ts +1 -0
  40. package/dist/constants.d.ts +17 -0
  41. package/dist/constants.js +23 -0
  42. package/dist/core/scope/scopeManager.js +3 -0
  43. package/dist/core/security/ipGuard.d.ts +11 -0
  44. package/dist/core/security/ipGuard.js +71 -3
  45. package/dist/crawler/crawl.d.ts +4 -22
  46. package/dist/crawler/crawl.js +4 -335
  47. package/dist/crawler/crawler.d.ts +87 -0
  48. package/dist/crawler/crawler.js +683 -0
  49. package/dist/crawler/extract.d.ts +4 -1
  50. package/dist/crawler/extract.js +7 -2
  51. package/dist/crawler/fetcher.d.ts +2 -1
  52. package/dist/crawler/fetcher.js +26 -11
  53. package/dist/crawler/metricsRunner.d.ts +23 -1
  54. package/dist/crawler/metricsRunner.js +202 -72
  55. package/dist/crawler/normalize.d.ts +41 -0
  56. package/dist/crawler/normalize.js +119 -3
  57. package/dist/crawler/parser.d.ts +1 -3
  58. package/dist/crawler/parser.js +2 -49
  59. package/dist/crawler/resolver.d.ts +11 -0
  60. package/dist/crawler/resolver.js +67 -0
  61. package/dist/crawler/sitemap.d.ts +6 -0
  62. package/dist/crawler/sitemap.js +27 -17
  63. package/dist/crawler/trap.d.ts +5 -1
  64. package/dist/crawler/trap.js +23 -2
  65. package/dist/db/CrawlithDB.d.ts +110 -0
  66. package/dist/db/CrawlithDB.js +500 -0
  67. package/dist/db/graphLoader.js +42 -30
  68. package/dist/db/index.d.ts +11 -0
  69. package/dist/db/index.js +41 -29
  70. package/dist/db/migrations.d.ts +2 -0
  71. package/dist/db/{schema.js → migrations.js} +90 -43
  72. package/dist/db/pluginRegistry.d.ts +9 -0
  73. package/dist/db/pluginRegistry.js +19 -0
  74. package/dist/db/repositories/EdgeRepository.d.ts +13 -0
  75. package/dist/db/repositories/EdgeRepository.js +20 -0
  76. package/dist/db/repositories/MetricsRepository.d.ts +16 -8
  77. package/dist/db/repositories/MetricsRepository.js +28 -7
  78. package/dist/db/repositories/PageRepository.d.ts +15 -2
  79. package/dist/db/repositories/PageRepository.js +169 -25
  80. package/dist/db/repositories/SiteRepository.d.ts +9 -0
  81. package/dist/db/repositories/SiteRepository.js +13 -0
  82. package/dist/db/repositories/SnapshotRepository.d.ts +14 -5
  83. package/dist/db/repositories/SnapshotRepository.js +64 -5
  84. package/dist/db/reset.d.ts +9 -0
  85. package/dist/db/reset.js +32 -0
  86. package/dist/db/statements.d.ts +12 -0
  87. package/dist/db/statements.js +40 -0
  88. package/dist/diff/compare.d.ts +0 -5
  89. package/dist/diff/compare.js +0 -12
  90. package/dist/diff/service.d.ts +16 -0
  91. package/dist/diff/service.js +41 -0
  92. package/dist/domain/index.d.ts +4 -0
  93. package/dist/domain/index.js +4 -0
  94. package/dist/events.d.ts +56 -0
  95. package/dist/events.js +1 -0
  96. package/dist/graph/graph.d.ts +36 -42
  97. package/dist/graph/graph.js +26 -17
  98. package/dist/graph/hits.d.ts +23 -0
  99. package/dist/graph/hits.js +111 -0
  100. package/dist/graph/metrics.d.ts +0 -4
  101. package/dist/graph/metrics.js +25 -9
  102. package/dist/graph/pagerank.d.ts +17 -4
  103. package/dist/graph/pagerank.js +126 -91
  104. package/dist/graph/simhash.d.ts +6 -0
  105. package/dist/graph/simhash.js +14 -0
  106. package/dist/index.d.ts +29 -8
  107. package/dist/index.js +29 -8
  108. package/dist/lock/hashKey.js +1 -1
  109. package/dist/lock/lockManager.d.ts +5 -1
  110. package/dist/lock/lockManager.js +38 -13
  111. package/dist/plugin-system/plugin-cli.d.ts +10 -0
  112. package/dist/plugin-system/plugin-cli.js +31 -0
  113. package/dist/plugin-system/plugin-config.d.ts +16 -0
  114. package/dist/plugin-system/plugin-config.js +36 -0
  115. package/dist/plugin-system/plugin-loader.d.ts +17 -0
  116. package/dist/plugin-system/plugin-loader.js +122 -0
  117. package/dist/plugin-system/plugin-registry.d.ts +25 -0
  118. package/dist/plugin-system/plugin-registry.js +167 -0
  119. package/dist/plugin-system/plugin-types.d.ts +205 -0
  120. package/dist/plugin-system/plugin-types.js +1 -0
  121. package/dist/ports/index.d.ts +9 -0
  122. package/dist/ports/index.js +1 -0
  123. package/{src/report/sitegraph_template.ts → dist/report/crawl.html} +330 -81
  124. package/dist/report/crawlExport.d.ts +3 -0
  125. package/dist/report/{sitegraphExport.js → crawlExport.js} +3 -3
  126. package/dist/report/crawl_template.d.ts +1 -0
  127. package/dist/report/crawl_template.js +7 -0
  128. package/dist/report/export.d.ts +3 -0
  129. package/dist/report/export.js +81 -0
  130. package/dist/report/html.js +15 -216
  131. package/dist/report/insight.d.ts +27 -0
  132. package/dist/report/insight.js +103 -0
  133. package/dist/scoring/health.d.ts +56 -0
  134. package/dist/scoring/health.js +213 -0
  135. package/dist/utils/chalk.d.ts +6 -0
  136. package/dist/utils/chalk.js +41 -0
  137. package/dist/utils/secureConfig.d.ts +23 -0
  138. package/dist/utils/secureConfig.js +128 -0
  139. package/package.json +12 -6
  140. package/CHANGELOG.md +0 -7
  141. package/dist/db/schema.d.ts +0 -2
  142. package/dist/graph/cluster.d.ts +0 -6
  143. package/dist/graph/cluster.js +0 -173
  144. package/dist/graph/duplicate.d.ts +0 -10
  145. package/dist/graph/duplicate.js +0 -251
  146. package/dist/report/sitegraphExport.d.ts +0 -3
  147. package/dist/report/sitegraph_template.d.ts +0 -1
  148. package/dist/report/sitegraph_template.js +0 -630
  149. package/dist/scoring/hits.d.ts +0 -9
  150. package/dist/scoring/hits.js +0 -111
  151. package/src/analysis/analyze.ts +0 -548
  152. package/src/analysis/content.ts +0 -62
  153. package/src/analysis/images.ts +0 -28
  154. package/src/analysis/links.ts +0 -41
  155. package/src/analysis/scoring.ts +0 -59
  156. package/src/analysis/seo.ts +0 -82
  157. package/src/analysis/structuredData.ts +0 -62
  158. package/src/audit/dns.ts +0 -49
  159. package/src/audit/headers.ts +0 -98
  160. package/src/audit/index.ts +0 -66
  161. package/src/audit/scoring.ts +0 -232
  162. package/src/audit/transport.ts +0 -258
  163. package/src/audit/types.ts +0 -102
  164. package/src/core/network/proxyAdapter.ts +0 -21
  165. package/src/core/network/rateLimiter.ts +0 -39
  166. package/src/core/network/redirectController.ts +0 -47
  167. package/src/core/network/responseLimiter.ts +0 -34
  168. package/src/core/network/retryPolicy.ts +0 -57
  169. package/src/core/scope/domainFilter.ts +0 -45
  170. package/src/core/scope/scopeManager.ts +0 -52
  171. package/src/core/scope/subdomainPolicy.ts +0 -39
  172. package/src/core/security/ipGuard.ts +0 -92
  173. package/src/crawler/crawl.ts +0 -382
  174. package/src/crawler/extract.ts +0 -34
  175. package/src/crawler/fetcher.ts +0 -233
  176. package/src/crawler/metricsRunner.ts +0 -124
  177. package/src/crawler/normalize.ts +0 -108
  178. package/src/crawler/parser.ts +0 -190
  179. package/src/crawler/sitemap.ts +0 -73
  180. package/src/crawler/trap.ts +0 -96
  181. package/src/db/graphLoader.ts +0 -105
  182. package/src/db/index.ts +0 -70
  183. package/src/db/repositories/EdgeRepository.ts +0 -29
  184. package/src/db/repositories/MetricsRepository.ts +0 -49
  185. package/src/db/repositories/PageRepository.ts +0 -128
  186. package/src/db/repositories/SiteRepository.ts +0 -32
  187. package/src/db/repositories/SnapshotRepository.ts +0 -74
  188. package/src/db/schema.ts +0 -177
  189. package/src/diff/compare.ts +0 -84
  190. package/src/graph/cluster.ts +0 -192
  191. package/src/graph/duplicate.ts +0 -286
  192. package/src/graph/graph.ts +0 -172
  193. package/src/graph/metrics.ts +0 -110
  194. package/src/graph/pagerank.ts +0 -125
  195. package/src/graph/simhash.ts +0 -61
  196. package/src/index.ts +0 -30
  197. package/src/lock/hashKey.ts +0 -51
  198. package/src/lock/lockManager.ts +0 -124
  199. package/src/lock/pidCheck.ts +0 -13
  200. package/src/report/html.ts +0 -227
  201. package/src/report/sitegraphExport.ts +0 -58
  202. package/src/scoring/hits.ts +0 -131
  203. package/src/scoring/orphanSeverity.ts +0 -176
  204. package/src/utils/version.ts +0 -18
  205. package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
  206. package/tests/analysis.unit.test.ts +0 -98
  207. package/tests/analyze.integration.test.ts +0 -98
  208. package/tests/audit/dns.test.ts +0 -31
  209. package/tests/audit/headers.test.ts +0 -45
  210. package/tests/audit/scoring.test.ts +0 -133
  211. package/tests/audit/security.test.ts +0 -12
  212. package/tests/audit/transport.test.ts +0 -112
  213. package/tests/clustering.test.ts +0 -118
  214. package/tests/crawler.test.ts +0 -358
  215. package/tests/db.test.ts +0 -159
  216. package/tests/diff.test.ts +0 -67
  217. package/tests/duplicate.test.ts +0 -110
  218. package/tests/fetcher.test.ts +0 -106
  219. package/tests/fetcher_safety.test.ts +0 -85
  220. package/tests/fixtures/analyze-crawl.json +0 -26
  221. package/tests/hits.test.ts +0 -134
  222. package/tests/html_report.test.ts +0 -58
  223. package/tests/lock/lockManager.test.ts +0 -138
  224. package/tests/metrics.test.ts +0 -196
  225. package/tests/normalize.test.ts +0 -101
  226. package/tests/orphanSeverity.test.ts +0 -160
  227. package/tests/pagerank.test.ts +0 -98
  228. package/tests/parser.test.ts +0 -117
  229. package/tests/proxy_safety.test.ts +0 -57
  230. package/tests/redirect_safety.test.ts +0 -73
  231. package/tests/safety.test.ts +0 -114
  232. package/tests/scope.test.ts +0 -66
  233. package/tests/scoring.test.ts +0 -59
  234. package/tests/sitemap.test.ts +0 -88
  235. package/tests/soft404.test.ts +0 -41
  236. package/tests/trap.test.ts +0 -39
  237. package/tests/visualization_data.test.ts +0 -46
  238. package/tsconfig.json +0 -11
@@ -1,98 +0,0 @@
1
- import { describe, expect, test } from 'vitest';
2
- import { analyzeTitle, analyzeMetaDescription, applyDuplicateStatuses, analyzeH1 } from '../src/analysis/seo.js';
3
- import { analyzeContent, calculateThinContentScore } from '../src/analysis/content.js';
4
- import { analyzeStructuredData } from '../src/analysis/structuredData.js';
5
- import { analyzeLinks } from '../src/analysis/links.js';
6
- import { analyzeImageAlts } from '../src/analysis/images.js';
7
-
8
- describe('SEO module', () => {
9
- test('analyze title edge cases', () => {
10
- expect(analyzeTitle('<html></html>').status).toBe('missing');
11
- expect(analyzeTitle('<title>short</title>').status).toBe('too_short');
12
- expect(analyzeTitle(`<title>${'a'.repeat(61)}</title>`).status).toBe('too_long');
13
- expect(analyzeTitle(`<title>${'a'.repeat(55)}</title>`).status).toBe('ok');
14
- });
15
-
16
- test('duplicate detection', () => {
17
- const values = applyDuplicateStatuses([
18
- { value: 'Same', length: 4, status: 'ok' as const },
19
- { value: 'same', length: 4, status: 'ok' as const },
20
- { value: null, length: 0, status: 'missing' as const }
21
- ]);
22
- expect(values[0].status).toBe('duplicate');
23
- expect(values[1].status).toBe('duplicate');
24
- expect(values[2].status).toBe('missing');
25
- });
26
-
27
- test('meta description boundaries', () => {
28
- expect(analyzeMetaDescription('<meta name="description" content="">').status).toBe('missing');
29
- expect(analyzeMetaDescription('<html></html>').status).toBe('missing');
30
- expect(analyzeMetaDescription('<meta name="description" content="short">').status).toBe('too_short');
31
- expect(analyzeMetaDescription(`<meta name="description" content="${'x'.repeat(150)}">`).status).toBe('ok');
32
- expect(analyzeMetaDescription(`<meta name="description" content="${'x'.repeat(170)}">`).status).toBe('too_long');
33
- });
34
-
35
- test('h1 variations', () => {
36
- expect(analyzeH1('<h1>One</h1>', 'Title').status).toBe('ok');
37
- expect(analyzeH1('<h1>One</h1><h1>Two</h1>', 'Title').status).toBe('warning');
38
- const noH1 = analyzeH1('<p>none</p>', 'Title');
39
- expect(noH1.status).toBe('critical');
40
- const same = analyzeH1('<h1>same</h1>', 'Same');
41
- expect(same.matchesTitle).toBe(true);
42
- });
43
- });
44
-
45
- describe('content module', () => {
46
- test('word count strips nav/footer/script/style', () => {
47
- const html = '<body><nav>skip me</nav><p>keep words here</p><footer>skip</footer><script>var x</script><style>.x{}</style></body>';
48
- const result = analyzeContent(html);
49
- expect(result.wordCount).toBe(3);
50
- expect(result.uniqueSentenceCount).toBe(1);
51
- expect(result.textHtmlRatio).toBeGreaterThan(0);
52
- });
53
-
54
- test('thin score boundaries', () => {
55
- expect(calculateThinContentScore({ wordCount: 600, textHtmlRatio: 0.5, uniqueSentenceCount: 4 }, 0)).toBe(0);
56
- expect(calculateThinContentScore({ wordCount: 0, textHtmlRatio: 0, uniqueSentenceCount: 1 }, 100)).toBe(100);
57
- });
58
-
59
- test('content handles malformed/empty html', () => {
60
- expect(analyzeContent('').wordCount).toBe(0);
61
- expect(analyzeContent('<div><span>broken').wordCount).toBeGreaterThanOrEqual(1);
62
- });
63
- });
64
-
65
- describe('structured data', () => {
66
- test('valid and invalid JSON-LD parsing', () => {
67
- const valid = analyzeStructuredData('<script type="application/ld+json">{"@type":"Article"}</script>');
68
- expect(valid.present).toBe(true);
69
- expect(valid.valid).toBe(true);
70
- expect(valid.types).toContain('Article');
71
-
72
- const invalid = analyzeStructuredData('<script type="application/ld+json">{invalid}</script>');
73
- expect(invalid.present).toBe(true);
74
- expect(invalid.valid).toBe(false);
75
-
76
- const missing = analyzeStructuredData('<p>none</p>');
77
- expect(missing.present).toBe(false);
78
- });
79
- });
80
-
81
- describe('links and images', () => {
82
- test('link ratio calculation', () => {
83
- const html = '<a href="/a">A</a><a href="https://other.com">B</a><a href="https://other.com" rel="nofollow">C</a>';
84
- const links = analyzeLinks(html, 'https://example.com/page', 'https://example.com');
85
- expect(links.internalLinks).toBe(1);
86
- expect(links.externalLinks).toBe(2);
87
- expect(links.nofollowCount).toBe(1);
88
- expect(links.externalRatio).toBeCloseTo(2 / 3);
89
- });
90
-
91
- test('image alt detection', () => {
92
- const html = '<img src="a"><img src="b" alt=""><img src="c" alt="ok">';
93
- const imgs = analyzeImageAlts(html);
94
- expect(imgs.totalImages).toBe(3);
95
- expect(imgs.missingAlt).toBe(1);
96
- expect(imgs.emptyAlt).toBe(1);
97
- });
98
- });
@@ -1,98 +0,0 @@
1
- import { describe, expect, test } from 'vitest';
2
- import path from 'node:path';
3
- import fs from 'node:fs/promises';
4
- import { analyzeSite, renderAnalysisHtml } from '../src/analysis/analyze.js';
5
-
6
- describe('analyze integration', () => {
7
- const fixturePath = path.resolve(import.meta.dirname, 'fixtures/analyze-crawl.json');
8
-
9
- test('analyzes full crawl fixture and schema', async () => {
10
- const result = await analyzeSite('https://example.com', { fromCrawl: fixturePath });
11
-
12
- expect(result.site_summary.pages_analyzed).toBe(3);
13
- expect(result.site_summary.duplicate_titles).toBe(2);
14
- expect(result.site_summary.avg_seo_score).toBeGreaterThanOrEqual(0);
15
- expect(result.pages[0]).toHaveProperty('title');
16
- expect(result.pages[0]).toHaveProperty('content');
17
- expect(result.pages[0]).toHaveProperty('links');
18
- expect(result.site_scores.overallScore).toBeGreaterThanOrEqual(0);
19
- expect(result.site_scores.overallScore).toBeLessThanOrEqual(100);
20
- });
21
-
22
- test('module filter flags behavior', async () => {
23
- const seoOnly = await analyzeSite('https://example.com', { fromCrawl: fixturePath, seo: true });
24
- expect(seoOnly.pages[0].content.wordCount).toBe(0);
25
- expect(seoOnly.pages[0].images.totalImages).toBe(0);
26
-
27
- const contentOnly = await analyzeSite('https://example.com', { fromCrawl: fixturePath, content: true });
28
- expect(contentOnly.pages[0].title.status).toBe('missing');
29
- expect(contentOnly.pages[0].thinScore).toBeGreaterThanOrEqual(0);
30
-
31
- const accessibilityOnly = await analyzeSite('https://example.com', { fromCrawl: fixturePath, accessibility: true });
32
- expect(accessibilityOnly.pages[0].images.totalImages).toBeGreaterThan(0);
33
- expect(accessibilityOnly.pages[0].title.status).toBe('missing');
34
- });
35
-
36
- test('html report generation', async () => {
37
- const result = await analyzeSite('https://example.com', { fromCrawl: fixturePath });
38
- const html = renderAnalysisHtml(result);
39
- expect(html).toContain('<table');
40
- expect(html).toContain('Analysis');
41
- });
42
-
43
- test('default database loading', async () => {
44
- // Force in-memory DB for this test
45
- process.env.CRAWLITH_DB_PATH = ':memory:';
46
-
47
- // Close existing DB connection if any to ensure fresh start
48
- const { getDb, closeDb } = await import('../src/db/index.js');
49
- closeDb();
50
-
51
- // Setup repositories
52
- const { SiteRepository } = await import('../src/db/repositories/SiteRepository.js');
53
- const { SnapshotRepository } = await import('../src/db/repositories/SnapshotRepository.js');
54
- const { PageRepository } = await import('../src/db/repositories/PageRepository.js');
55
-
56
- const db = getDb();
57
- const siteRepo = new SiteRepository(db);
58
- const snapshotRepo = new SnapshotRepository(db);
59
- const pageRepo = new PageRepository(db);
60
-
61
- // Create site and snapshot
62
- const siteId = siteRepo.createSite('example.com');
63
- const snapshotId = snapshotRepo.createSnapshot(siteId, 'full', 'running');
64
-
65
- // Parse fixture and load pages into db
66
- const rawYaml = await fs.readFile(fixturePath, 'utf-8');
67
- const rawData = JSON.parse(rawYaml);
68
- (rawData.pages || rawData.nodes).forEach((p: any) => {
69
- pageRepo.upsertPage({
70
- site_id: siteId,
71
- normalized_url: p.url,
72
- last_seen_snapshot_id: snapshotId,
73
- http_status: p.status || 200,
74
- html: p.html || '',
75
- depth: p.depth || 0,
76
- });
77
- });
78
-
79
- snapshotRepo.updateSnapshotStatus(snapshotId, 'completed', { node_count: 3, edge_count: 0 });
80
-
81
- try {
82
- const result = await analyzeSite('https://example.com', {});
83
- expect(result.site_summary.pages_analyzed).toBe(3);
84
- } finally {
85
- closeDb();
86
- delete process.env.CRAWLITH_DB_PATH;
87
- }
88
- });
89
-
90
- test('handles large html and js-only content', async () => {
91
- const hugeText = '<html><body><script>document.write("x")</script>' + '<p>word </p>'.repeat(1000) + '</body></html>';
92
- const tmpFile = path.resolve(import.meta.dirname, 'fixtures/large-analyze.json');
93
- await fs.writeFile(tmpFile, JSON.stringify({ pages: [{ url: 'https://example.com/', status: 200, depth: 0, html: hugeText }] }));
94
- const result = await analyzeSite('https://example.com', { fromCrawl: tmpFile });
95
- expect(result.pages[0].content.wordCount).toBe(1000);
96
- await fs.unlink(tmpFile);
97
- });
98
- });
@@ -1,31 +0,0 @@
1
- import { describe, it, expect, vi } from 'vitest';
2
- import { resolveDns } from '../../src/audit/dns.js';
3
- import dns from 'node:dns/promises';
4
-
5
- vi.mock('node:dns/promises');
6
-
7
- describe('DNS Diagnostics', () => {
8
- it('should resolve all records', async () => {
9
- vi.spyOn(dns, 'resolve4').mockResolvedValue(['1.1.1.1']);
10
- vi.spyOn(dns, 'resolve6').mockResolvedValue(['2606::1']);
11
- vi.spyOn(dns, 'resolveCname').mockRejectedValue(new Error('ENODATA'));
12
- vi.spyOn(dns, 'reverse').mockResolvedValue(['one.one.one.one']);
13
-
14
- const result = await resolveDns('example.com');
15
- expect(result.a).toEqual(['1.1.1.1']);
16
- expect(result.aaaa).toEqual(['2606::1']);
17
- expect(result.ipv6Support).toBe(true);
18
- expect(result.reverse).toEqual(['one.one.one.one']);
19
- expect(result.resolutionTime).toBeGreaterThanOrEqual(0);
20
- });
21
-
22
- it('should handle failures gracefully', async () => {
23
- vi.spyOn(dns, 'resolve4').mockRejectedValue(new Error('ENOTFOUND'));
24
- vi.spyOn(dns, 'resolve6').mockRejectedValue(new Error('ENOTFOUND'));
25
- vi.spyOn(dns, 'resolveCname').mockRejectedValue(new Error('ENOTFOUND'));
26
-
27
- const result = await resolveDns('invalid.com');
28
- expect(result.a).toEqual([]);
29
- expect(result.ipCount).toBe(0);
30
- });
31
- });
@@ -1,45 +0,0 @@
1
- import { describe, it, expect } from 'vitest';
2
- import { analyzeHeaders } from '../../src/audit/headers.js';
3
-
4
- describe('Headers Analysis', () => {
5
- it('should detect all secure headers', () => {
6
- const headers = {
7
- 'strict-transport-security': 'max-age=31536000; includeSubDomains',
8
- 'content-security-policy': "default-src 'self'",
9
- 'x-frame-options': 'DENY',
10
- 'x-content-type-options': 'nosniff',
11
- 'referrer-policy': 'strict-origin-when-cross-origin',
12
- 'permissions-policy': 'geolocation=()'
13
- };
14
- const result = analyzeHeaders(headers);
15
- expect(result.score).toBe(100);
16
- expect(result.strictTransportSecurity.valid).toBe(true);
17
- });
18
-
19
- it('should handle missing headers', () => {
20
- const headers = {};
21
- const result = analyzeHeaders(headers);
22
- expect(result.score).toBe(0);
23
- expect(result.strictTransportSecurity.present).toBe(false);
24
- });
25
-
26
- it('should validate HSTS properly', () => {
27
- const headers = {
28
- 'strict-transport-security': 'max-age=0'
29
- };
30
- // valid requires max-age
31
- const result = analyzeHeaders(headers);
32
- expect(result.strictTransportSecurity.valid).toBe(true);
33
- // Wait, checkHSTS: includes('max-age=') is true. includes('includeSubDomains') is false.
34
- // Issues will contain 'Missing includeSubDomains'.
35
- expect(result.strictTransportSecurity.issues).toContain('Missing includeSubDomains');
36
- });
37
-
38
- it('should validate invalid HSTS', () => {
39
- const headers = {
40
- 'strict-transport-security': 'invalid'
41
- };
42
- const result = analyzeHeaders(headers);
43
- expect(result.strictTransportSecurity.valid).toBe(false);
44
- });
45
- });
@@ -1,133 +0,0 @@
1
- import { describe, it, expect } from 'vitest';
2
- import { calculateScore } from '../../src/audit/scoring.js';
3
- import { TransportDiagnostics, DnsDiagnostics, SecurityHeadersResult, PerformanceMetrics, AuditIssue } from '../../src/audit/types.js';
4
-
5
- describe('Scoring Engine', () => {
6
- const mockTransport: TransportDiagnostics = {
7
- tlsVersion: 'TLSv1.3',
8
- cipherSuite: 'TLS_AES_256_GCM_SHA384',
9
- alpnProtocol: 'h2',
10
- certificate: {
11
- issuer: 'Let\'s Encrypt',
12
- subject: 'example.com',
13
- validFrom: '2023-01-01',
14
- validTo: '2024-01-01',
15
- daysUntilExpiry: 60,
16
- isSelfSigned: false,
17
- isValidChain: true,
18
- fingerprint: 'SHA256:...'
19
- } as any,
20
- httpVersion: '2.0',
21
- compression: ['gzip'],
22
- keepAlive: true,
23
- transferEncoding: null,
24
- redirectCount: 0,
25
- redirects: [],
26
- serverHeader: 'nginx',
27
- headers: {}
28
- };
29
-
30
- const mockDns: DnsDiagnostics = {
31
- a: ['1.1.1.1', '1.0.0.1'],
32
- aaaa: ['2606:4700:4700::1111'],
33
- cname: [],
34
- reverse: [],
35
- ipCount: 3,
36
- ipv6Support: true,
37
- resolutionTime: 10
38
- };
39
-
40
- const mockHeaders: SecurityHeadersResult = {
41
- strictTransportSecurity: { present: true, valid: true, value: 'max-age=31536000' },
42
- contentSecurityPolicy: { present: true, valid: true, value: "default-src 'self'" },
43
- xFrameOptions: { present: true, valid: true, value: 'DENY' },
44
- xContentTypeOptions: { present: true, valid: true, value: 'nosniff' },
45
- referrerPolicy: { present: true, valid: true, value: 'strict-origin' },
46
- permissionsPolicy: { present: true, valid: true, value: 'geolocation=()' },
47
- details: {},
48
- score: 100
49
- };
50
-
51
- const mockPerformance: PerformanceMetrics = {
52
- dnsLookupTime: 10,
53
- tcpConnectTime: 20,
54
- tlsHandshakeTime: 30,
55
- ttfb: 100,
56
- totalTime: 200,
57
- htmlSize: 50000,
58
- headerSize: 500,
59
- redirectTime: 0
60
- };
61
-
62
- it('should give perfect score for perfect inputs', () => {
63
- const result = calculateScore(mockTransport, mockDns, mockHeaders, mockPerformance, []);
64
- expect(result.score).toBe(100);
65
- expect(result.grade).toBe('A');
66
- expect(result.issues).toHaveLength(0);
67
- });
68
-
69
- it('should penalize TLS < 1.2', () => {
70
- const badTransport = { ...mockTransport, tlsVersion: 'TLSv1.1' };
71
- const result = calculateScore(badTransport, mockDns, mockHeaders, mockPerformance, []);
72
- expect(result.score).toBeLessThan(100);
73
- expect(result.categoryScores.transport).toBeLessThan(30);
74
- expect(result.issues).toEqual(expect.arrayContaining([expect.objectContaining({ id: 'tls-old' })]));
75
- });
76
-
77
- it('should penalize missing HTTPS', () => {
78
- const badTransport = { ...mockTransport, tlsVersion: null, certificate: null };
79
- const result = calculateScore(badTransport, mockDns, mockHeaders, mockPerformance, []);
80
- expect(result.score).toBeLessThan(50); // Critical
81
- expect(result.grade).toBe('F');
82
- expect(result.issues).toEqual(expect.arrayContaining([expect.objectContaining({ id: 'no-https' })]));
83
- });
84
-
85
- it('should fail on expired cert', () => {
86
- const expiredTransport = {
87
- ...mockTransport,
88
- certificate: { ...mockTransport.certificate!, daysUntilExpiry: -5, validTo: '2023-01-01' }
89
- };
90
- const result = calculateScore(expiredTransport, mockDns, mockHeaders, mockPerformance, []);
91
- expect(result.grade).toBe('F');
92
- expect(result.score).toBeLessThanOrEqual(40);
93
- expect(result.issues).toEqual(expect.arrayContaining([expect.objectContaining({ id: 'cert-expired' })]));
94
- });
95
-
96
- it('should penalize missing security headers', () => {
97
- // If score is 50, it means we lost 50 points in headers category (internal score)
98
- // headers category is 20 points total. So we lose 10 points.
99
- const badHeaders = { ...mockHeaders, score: 50, strictTransportSecurity: { present: false, valid: false, value: null } };
100
- const result = calculateScore(mockTransport, mockDns, badHeaders, mockPerformance, []);
101
- expect(result.categoryScores.security).toBe(10);
102
- expect(result.score).toBe(90); // 100 - 10
103
- expect(result.issues).toEqual(expect.arrayContaining([expect.objectContaining({ id: 'hsts-missing' })]));
104
- });
105
-
106
- it('should penalize poor performance', () => {
107
- const badPerf = { ...mockPerformance, ttfb: 1000, htmlSize: 2000000 };
108
- const result = calculateScore(mockTransport, mockDns, mockHeaders, badPerf, []);
109
- // TTFB > 800: Lose 10 pts
110
- // HTML > 1MB: Lose 5 pts
111
- // Total perf score (30) -> 15.
112
- expect(result.categoryScores.performance).toBe(15);
113
- expect(result.score).toBe(85);
114
- expect(result.issues).toEqual(expect.arrayContaining([
115
- expect.objectContaining({ id: 'slow-ttfb' }),
116
- expect.objectContaining({ id: 'large-html' })
117
- ]));
118
- });
119
-
120
- it('should penalize infrastructure issues', () => {
121
- const badDns = { ...mockDns, ipv6Support: false, ipCount: 1 };
122
- const result = calculateScore(mockTransport, badDns, mockHeaders, mockPerformance, []);
123
- // No IPv6: Lose 10 pts
124
- // Single IP: Lose 10 pts
125
- // Infra score (20) -> 0.
126
- expect(result.categoryScores.infrastructure).toBe(0);
127
- expect(result.score).toBe(80);
128
- expect(result.issues).toEqual(expect.arrayContaining([
129
- expect.objectContaining({ id: 'no-ipv6' }),
130
- expect.objectContaining({ id: 'single-ip' })
131
- ]));
132
- });
133
- });
@@ -1,12 +0,0 @@
1
- import { describe, it, expect } from 'vitest';
2
- import { auditUrl } from '../../src/audit/index.js';
3
-
4
- describe('Audit Security', () => {
5
- it('should block audits of internal IP addresses', async () => {
6
- await expect(auditUrl('http://127.0.0.1')).rejects.toThrow('Access to internal or private infrastructure is prohibited');
7
- });
8
-
9
- it('should block audits of link-local addresses', async () => {
10
- await expect(auditUrl('http://169.254.169.254')).rejects.toThrow('Access to internal or private infrastructure is prohibited');
11
- });
12
- });
@@ -1,112 +0,0 @@
1
- import { describe, it, expect, vi, afterEach } from 'vitest';
2
- import { analyzeTransport } from '../../src/audit/transport.js';
3
- import https from 'node:https';
4
- import http from 'node:http';
5
- import tls from 'node:tls';
6
- import { EventEmitter } from 'events';
7
-
8
- vi.mock('node:https');
9
- vi.mock('node:http');
10
-
11
- describe('Transport Diagnostics', () => {
12
- afterEach(() => {
13
- vi.clearAllMocks();
14
- });
15
-
16
- it('should analyze HTTPS transport', async () => {
17
- // Mock Response
18
- const mockRes = new EventEmitter() as any;
19
- mockRes.statusCode = 200;
20
- mockRes.statusMessage = 'OK';
21
- mockRes.headers = {
22
- 'content-encoding': 'gzip',
23
- 'server': 'nginx',
24
- 'connection': 'keep-alive'
25
- };
26
- mockRes.httpVersion = '1.1';
27
-
28
- const mockSocket = new EventEmitter();
29
- Object.setPrototypeOf(mockSocket, tls.TLSSocket.prototype);
30
- (mockSocket as any).getPeerCertificate = () => ({
31
- subject: { CN: 'example.com' },
32
- issuer: { CN: 'Let\'s Encrypt' },
33
- valid_from: 'Jan 1 2023',
34
- valid_to: 'Jan 1 2024',
35
- fingerprint: 'SHA256:...'
36
- });
37
- (mockSocket as any).getProtocol = () => 'TLSv1.3';
38
- (mockSocket as any).getCipher = () => ({ name: 'TLS_AES_...' });
39
- (mockSocket as any).alpnProtocol = 'h2';
40
- (mockSocket as any).authorized = true;
41
-
42
- mockRes.socket = mockSocket;
43
-
44
- // Mock Request
45
- const mockReq = new EventEmitter() as any;
46
- mockReq.end = vi.fn();
47
- mockReq.destroy = vi.fn();
48
-
49
- // Mock https.request
50
- vi.spyOn(https, 'request').mockImplementation((url, options, cb) => {
51
- if (cb) cb(mockRes);
52
- // Simulate socket events
53
- setTimeout(() => {
54
- mockReq.emit('socket', mockRes.socket);
55
- mockRes.socket.emit('lookup');
56
- mockRes.socket.emit('connect');
57
- mockRes.socket.emit('secureConnect');
58
- mockReq.emit('finish');
59
- // Response data
60
- mockRes.emit('data', Buffer.from('<html></html>'));
61
- mockRes.emit('end');
62
- }, 10);
63
- return mockReq;
64
- });
65
-
66
- const result = await analyzeTransport('https://example.com', 1000);
67
- expect(result.transport.tlsVersion).toBe('TLSv1.3');
68
- expect(result.transport.httpVersion).toBe('1.1');
69
- expect(result.performance.htmlSize).toBeGreaterThan(0);
70
- expect(result.transport.headers['server']).toBe('nginx');
71
- });
72
-
73
- it('should handle redirects', async () => {
74
- const req1 = new EventEmitter() as any; req1.end = vi.fn(); req1.destroy = vi.fn();
75
- const res1 = new EventEmitter() as any; res1.statusCode = 301; res1.headers = { location: 'https://example.com/' };
76
- res1.socket = new EventEmitter(); Object.setPrototypeOf(res1.socket, tls.TLSSocket.prototype);
77
-
78
- const req2 = new EventEmitter() as any; req2.end = vi.fn(); req2.destroy = vi.fn();
79
- const res2 = new EventEmitter() as any; res2.statusCode = 200; res2.headers = {};
80
- res2.socket = new EventEmitter(); Object.setPrototypeOf(res2.socket, tls.TLSSocket.prototype);
81
-
82
- // Setup res2 socket for TLS checks
83
- res2.socket.getPeerCertificate = () => ({});
84
- res2.socket.getProtocol = () => 'TLSv1.2';
85
- res2.socket.getCipher = () => ({ name: 'AES' });
86
-
87
- const requestSpy = vi.spyOn(https, 'request');
88
- requestSpy
89
- .mockImplementationOnce((url, options, cb) => {
90
- if (cb) cb(res1);
91
- setTimeout(() => {
92
- req1.emit('socket', res1.socket);
93
- res1.emit('data', Buffer.from('redirecting'));
94
- res1.emit('end');
95
- }, 10);
96
- return req1;
97
- })
98
- .mockImplementationOnce((url, options, cb) => {
99
- if (cb) cb(res2);
100
- setTimeout(() => {
101
- req2.emit('socket', res2.socket);
102
- res2.emit('data', Buffer.from('ok'));
103
- res2.emit('end');
104
- }, 10);
105
- return req2;
106
- });
107
-
108
- const result = await analyzeTransport('https://redirect.com', 1000);
109
- expect(result.transport.redirectCount).toBe(1);
110
- expect(result.transport.redirects[0].location).toBe('https://example.com/');
111
- });
112
- });
@@ -1,118 +0,0 @@
1
- import { describe, it, expect, beforeEach } from 'vitest';
2
- import { Graph } from '../src/graph/graph.js';
3
- import { detectContentClusters } from '../src/graph/cluster.js';
4
-
5
- describe('Content Clustering', () => {
6
- let graph: Graph;
7
-
8
- beforeEach(() => {
9
- graph = new Graph();
10
- });
11
-
12
- it('should group similar pages into a cluster', () => {
13
- // Mock simhashes for similar pages (Hamming distance 1)
14
- const h1 = 0b101010n;
15
- const h2 = 0b101011n;
16
- const h3 = 0b101001n;
17
-
18
- graph.addNode('https://example.com/p1', 0, 200);
19
- graph.addNode('https://example.com/p2', 0, 200);
20
- graph.addNode('https://example.com/p3', 0, 200);
21
-
22
- graph.updateNodeData('https://example.com/p1', { simhash: h1.toString() });
23
- graph.updateNodeData('https://example.com/p2', { simhash: h2.toString() });
24
- graph.updateNodeData('https://example.com/p3', { simhash: h3.toString() });
25
-
26
- const clusters = detectContentClusters(graph, 2, 2);
27
-
28
- expect(clusters.length).toBe(1);
29
- expect(clusters[0].count).toBe(3);
30
- expect(graph.nodes.get('https://example.com/p1')?.clusterId).toBe(1);
31
- });
32
-
33
- it('should separate dissimilar pages', () => {
34
- // Mock simhashes for very different pages
35
- const h1 = 0b1111111111n;
36
- const h2 = 0b0000000000n;
37
-
38
- graph.addNode('https://example.com/p1', 0, 200);
39
- graph.addNode('https://example.com/p2', 0, 200);
40
-
41
- graph.updateNodeData('https://example.com/p1', { simhash: h1.toString() });
42
- graph.updateNodeData('https://example.com/p2', { simhash: h2.toString() });
43
-
44
- const clusters = detectContentClusters(graph, 2, 2);
45
-
46
- expect(clusters.length).toBe(0); // None meet minSize 2
47
- });
48
-
49
- it('should respect minClusterSize', () => {
50
- const h1 = 0b1n;
51
- const h2 = 0b0n;
52
-
53
- graph.addNode('https://example.com/p1', 0, 200);
54
- graph.addNode('https://example.com/p2', 0, 200);
55
-
56
- graph.updateNodeData('https://example.com/p1', { simhash: h1.toString() });
57
- graph.updateNodeData('https://example.com/p2', { simhash: h2.toString() });
58
-
59
- const clusters = detectContentClusters(graph, 1, 3);
60
- expect(clusters.length).toBe(0);
61
- });
62
-
63
- it('should identify shared path prefixes (silos)', () => {
64
- graph.addNode('https://example.com/blog/seo-tips', 0, 200);
65
- graph.addNode('https://example.com/blog/link-building', 0, 200);
66
- graph.addNode('https://example.com/blog/technical-seo', 0, 200);
67
-
68
- const h = 0b111n;
69
- graph.updateNodeData('https://example.com/blog/seo-tips', { simhash: h.toString() });
70
- graph.updateNodeData('https://example.com/blog/link-building', { simhash: h.toString() });
71
- graph.updateNodeData('https://example.com/blog/technical-seo', { simhash: h.toString() });
72
-
73
- const clusters = detectContentClusters(graph, 0, 3);
74
- expect(clusters[0].sharedPathPrefix).toBe('/blog');
75
- });
76
-
77
- it('should be deterministic with unstable input order', () => {
78
- // We'll add nodes in different orders and check if cluster primary is same
79
- const h = 0b111n;
80
- graph.addNode('https://example.com/z', 0, 200);
81
- graph.addNode('https://example.com/a', 0, 200);
82
- graph.addNode('https://example.com/m', 0, 200);
83
-
84
- graph.updateNodeData('https://example.com/z', { simhash: h.toString(), pageRank: 10 });
85
- graph.updateNodeData('https://example.com/a', { simhash: h.toString(), pageRank: 10 });
86
- graph.updateNodeData('https://example.com/m', { simhash: h.toString(), pageRank: 10 });
87
-
88
- const clusters = detectContentClusters(graph, 0, 3);
89
- // a should be primary because it's shortest/lexicographic first since PageRanks are same
90
- expect(clusters[0].primaryUrl).toBe('https://example.com/a');
91
- });
92
-
93
- it('should use band optimization correctly (heuristic nature)', () => {
94
- // Create many nodes in 2 groups
95
- // Group 1: Matches in band 0
96
- // Group 2: Matches in band 1
97
- for (let i = 0; i < 5; i++) {
98
- const url = `https://example.com/g1/${i}`;
99
- graph.addNode(url, 0, 200);
100
- // Simhash that matches in first 16 bits (0xAAAA)
101
- const hash = BigInt(0xAAAA) | (BigInt(i) << 16n);
102
- graph.updateNodeData(url, { simhash: hash.toString() });
103
- }
104
-
105
- for (let i = 0; i < 5; i++) {
106
- const url = `https://example.com/g2/${i}`;
107
- graph.addNode(url, 0, 200);
108
- // Simhash that matches in second 16 bits (0xBBBB << 16)
109
- const hash = (BigInt(0xBBBB) << 16n) | BigInt(i);
110
- graph.updateNodeData(url, { simhash: hash.toString() });
111
- }
112
-
113
- const clusters = detectContentClusters(graph, 5, 3);
114
- expect(clusters.length).toBe(2);
115
- expect(clusters[0].count).toBe(5);
116
- expect(clusters[1].count).toBe(5);
117
- });
118
- });