@crawlith/core 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/dist/analysis/analysis_list.html +35 -0
  3. package/dist/analysis/analysis_page.html +123 -0
  4. package/dist/analysis/analyze.d.ts +17 -3
  5. package/dist/analysis/analyze.js +192 -248
  6. package/dist/analysis/scoring.js +7 -1
  7. package/dist/analysis/templates.d.ts +2 -0
  8. package/dist/analysis/templates.js +7 -0
  9. package/dist/core/security/ipGuard.d.ts +11 -0
  10. package/dist/core/security/ipGuard.js +71 -3
  11. package/dist/crawler/crawl.d.ts +4 -22
  12. package/dist/crawler/crawl.js +4 -335
  13. package/dist/crawler/crawler.d.ts +75 -0
  14. package/dist/crawler/crawler.js +518 -0
  15. package/dist/crawler/extract.d.ts +4 -1
  16. package/dist/crawler/extract.js +7 -2
  17. package/dist/crawler/fetcher.d.ts +1 -0
  18. package/dist/crawler/fetcher.js +20 -5
  19. package/dist/crawler/metricsRunner.d.ts +3 -1
  20. package/dist/crawler/metricsRunner.js +55 -46
  21. package/dist/crawler/sitemap.d.ts +3 -0
  22. package/dist/crawler/sitemap.js +5 -1
  23. package/dist/db/graphLoader.js +32 -3
  24. package/dist/db/index.d.ts +3 -0
  25. package/dist/db/index.js +4 -0
  26. package/dist/db/repositories/EdgeRepository.d.ts +8 -0
  27. package/dist/db/repositories/EdgeRepository.js +13 -0
  28. package/dist/db/repositories/MetricsRepository.d.ts +3 -0
  29. package/dist/db/repositories/MetricsRepository.js +14 -1
  30. package/dist/db/repositories/PageRepository.d.ts +11 -0
  31. package/dist/db/repositories/PageRepository.js +112 -19
  32. package/dist/db/repositories/SiteRepository.d.ts +3 -0
  33. package/dist/db/repositories/SiteRepository.js +9 -0
  34. package/dist/db/repositories/SnapshotRepository.d.ts +2 -0
  35. package/dist/db/repositories/SnapshotRepository.js +23 -2
  36. package/dist/events.d.ts +48 -0
  37. package/dist/events.js +1 -0
  38. package/dist/graph/cluster.js +62 -14
  39. package/dist/graph/duplicate.js +242 -191
  40. package/dist/graph/graph.d.ts +16 -0
  41. package/dist/graph/graph.js +17 -4
  42. package/dist/graph/metrics.js +12 -0
  43. package/dist/graph/pagerank.js +2 -0
  44. package/dist/graph/simhash.d.ts +6 -0
  45. package/dist/graph/simhash.js +14 -0
  46. package/dist/index.d.ts +5 -2
  47. package/dist/index.js +5 -2
  48. package/dist/lock/hashKey.js +1 -1
  49. package/dist/lock/lockManager.d.ts +4 -1
  50. package/dist/lock/lockManager.js +23 -13
  51. package/{src/report/sitegraph_template.ts → dist/report/crawl.html} +330 -81
  52. package/dist/report/crawlExport.d.ts +3 -0
  53. package/dist/report/{sitegraphExport.js → crawlExport.js} +3 -3
  54. package/dist/report/crawl_template.d.ts +1 -0
  55. package/dist/report/crawl_template.js +7 -0
  56. package/dist/report/html.js +15 -216
  57. package/dist/scoring/health.d.ts +50 -0
  58. package/dist/scoring/health.js +170 -0
  59. package/dist/scoring/hits.d.ts +1 -0
  60. package/dist/scoring/hits.js +64 -44
  61. package/dist/scoring/orphanSeverity.d.ts +5 -5
  62. package/package.json +3 -3
  63. package/scripts/copy-assets.js +37 -0
  64. package/src/analysis/analysis_list.html +35 -0
  65. package/src/analysis/analysis_page.html +123 -0
  66. package/src/analysis/analyze.ts +218 -261
  67. package/src/analysis/scoring.ts +8 -1
  68. package/src/analysis/templates.ts +9 -0
  69. package/src/core/security/ipGuard.ts +82 -3
  70. package/src/crawler/crawl.ts +6 -379
  71. package/src/crawler/crawler.ts +601 -0
  72. package/src/crawler/extract.ts +7 -2
  73. package/src/crawler/fetcher.ts +24 -6
  74. package/src/crawler/metricsRunner.ts +60 -47
  75. package/src/crawler/sitemap.ts +4 -1
  76. package/src/db/graphLoader.ts +33 -3
  77. package/src/db/index.ts +5 -0
  78. package/src/db/repositories/EdgeRepository.ts +14 -0
  79. package/src/db/repositories/MetricsRepository.ts +15 -1
  80. package/src/db/repositories/PageRepository.ts +119 -19
  81. package/src/db/repositories/SiteRepository.ts +11 -0
  82. package/src/db/repositories/SnapshotRepository.ts +28 -3
  83. package/src/events.ts +16 -0
  84. package/src/graph/cluster.ts +69 -15
  85. package/src/graph/duplicate.ts +249 -185
  86. package/src/graph/graph.ts +24 -4
  87. package/src/graph/metrics.ts +15 -0
  88. package/src/graph/pagerank.ts +1 -0
  89. package/src/graph/simhash.ts +15 -0
  90. package/src/index.ts +5 -2
  91. package/src/lock/hashKey.ts +1 -1
  92. package/src/lock/lockManager.ts +21 -13
  93. package/{dist/report/sitegraph_template.js → src/report/crawl.html} +330 -81
  94. package/src/report/{sitegraphExport.ts → crawlExport.ts} +3 -3
  95. package/src/report/crawl_template.ts +9 -0
  96. package/src/report/html.ts +17 -217
  97. package/src/scoring/health.ts +241 -0
  98. package/src/scoring/hits.ts +67 -45
  99. package/src/scoring/orphanSeverity.ts +8 -8
  100. package/tests/analysis.unit.test.ts +44 -0
  101. package/tests/analyze.integration.test.ts +88 -53
  102. package/tests/analyze_markdown.test.ts +98 -0
  103. package/tests/audit/audit.test.ts +101 -0
  104. package/tests/audit/scoring.test.ts +25 -25
  105. package/tests/audit/transport.test.ts +0 -1
  106. package/tests/clustering_risk.test.ts +118 -0
  107. package/tests/crawler.test.ts +19 -13
  108. package/tests/db/index.test.ts +134 -0
  109. package/tests/db/repositories.test.ts +115 -0
  110. package/tests/db_repos.test.ts +72 -0
  111. package/tests/duplicate.test.ts +2 -2
  112. package/tests/extract.test.ts +86 -0
  113. package/tests/fetcher.test.ts +5 -1
  114. package/tests/fetcher_safety.test.ts +9 -3
  115. package/tests/graph/graph.test.ts +100 -0
  116. package/tests/graphLoader.test.ts +124 -0
  117. package/tests/html_report.test.ts +52 -51
  118. package/tests/ipGuard.test.ts +73 -0
  119. package/tests/lock/lockManager.test.ts +77 -17
  120. package/tests/normalize.test.ts +6 -19
  121. package/tests/orphanSeverity.test.ts +9 -9
  122. package/tests/redirect_safety.test.ts +5 -1
  123. package/tests/renderAnalysisCsv.test.ts +183 -0
  124. package/tests/safety.test.ts +12 -0
  125. package/tests/scope.test.ts +18 -0
  126. package/tests/scoring.test.ts +25 -24
  127. package/tests/sitemap.test.ts +13 -1
  128. package/tests/ssrf_fix.test.ts +69 -0
  129. package/tests/visualization_data.test.ts +10 -10
  130. package/dist/report/sitegraphExport.d.ts +0 -3
  131. package/dist/report/sitegraph_template.d.ts +0 -1
@@ -1,7 +1,7 @@
1
1
  import { describe, expect, test } from 'vitest';
2
- import { annotateOrphans, calculateOrphanSeverity, mapImpactLevel, type SitegraphNode, type SitegraphEdge } from '../src/scoring/orphanSeverity.js';
2
+ import { annotateOrphans, calculateOrphanSeverity, mapImpactLevel, type CrawlNode, type CrawlEdge } from '../src/scoring/orphanSeverity.js';
3
3
 
4
- function baseNode(url: string, overrides: Partial<SitegraphNode> = {}): SitegraphNode {
4
+ function baseNode(url: string, overrides: Partial<CrawlNode> = {}): CrawlNode {
5
5
  return {
6
6
  url,
7
7
  depth: 1,
@@ -14,11 +14,11 @@ function baseNode(url: string, overrides: Partial<SitegraphNode> = {}): Sitegrap
14
14
 
15
15
  describe('orphan detection and severity scoring', () => {
16
16
  test('hard orphan detection and homepage exclusion', () => {
17
- const nodes: SitegraphNode[] = [
17
+ const nodes: CrawlNode[] = [
18
18
  baseNode('https://example.com/', { depth: 0, inLinks: 0 }),
19
19
  baseNode('https://example.com/orphan', { inLinks: 0 })
20
20
  ];
21
- const edges: SitegraphEdge[] = [];
21
+ const edges: CrawlEdge[] = [];
22
22
 
23
23
  const result = annotateOrphans(nodes, edges, {
24
24
  enabled: true,
@@ -34,7 +34,7 @@ describe('orphan detection and severity scoring', () => {
34
34
 
35
35
  test('near orphan threshold override', () => {
36
36
  const nodes = [baseNode('https://example.com/near', { inLinks: 2 })];
37
- const edges: SitegraphEdge[] = [];
37
+ const edges: CrawlEdge[] = [];
38
38
 
39
39
  const resultDefault = annotateOrphans(nodes, edges, {
40
40
  enabled: true,
@@ -54,14 +54,14 @@ describe('orphan detection and severity scoring', () => {
54
54
  });
55
55
 
56
56
  test('soft orphan detection only when enabled and inbound only from low-value sources', () => {
57
- const nodes: SitegraphNode[] = [
57
+ const nodes: CrawlNode[] = [
58
58
  baseNode('https://example.com/tag/seo', { pageType: 'tag', outLinks: 1 }),
59
59
  baseNode('https://example.com/list?page=2', { pageType: 'pagination', outLinks: 1 }),
60
60
  baseNode('https://example.com/target', { inLinks: 2 }),
61
61
  baseNode('https://example.com/normal', { outLinks: 1 })
62
62
  ];
63
63
 
64
- const edges: SitegraphEdge[] = [
64
+ const edges: CrawlEdge[] = [
65
65
  { source: 'https://example.com/tag/seo', target: 'https://example.com/target' },
66
66
  { source: 'https://example.com/list?page=2', target: 'https://example.com/target' }
67
67
  ];
@@ -129,14 +129,14 @@ describe('orphan detection and severity scoring', () => {
129
129
  });
130
130
 
131
131
  test('canonical consolidation, robots exclusion, and deterministic JSON output snapshot', () => {
132
- const nodes: SitegraphNode[] = [
132
+ const nodes: CrawlNode[] = [
133
133
  baseNode('https://example.com/canonical', { inLinks: 0 }),
134
134
  baseNode('https://example.com/variant?a=1', { canonicalUrl: 'https://example.com/canonical', inLinks: 1 }),
135
135
  baseNode('https://example.com/blocked', { inLinks: 0, robotsExcluded: true }),
136
136
  baseNode('https://example.com/redirect-target', { inLinks: 1 })
137
137
  ];
138
138
 
139
- const edges: SitegraphEdge[] = [
139
+ const edges: CrawlEdge[] = [
140
140
  { source: 'https://example.com/redirect-source', target: 'https://example.com/redirect-target' }
141
141
  ];
142
142
 
@@ -5,7 +5,11 @@ import { request } from 'undici';
5
5
 
6
6
  vi.mock('undici', () => ({
7
7
  request: vi.fn(),
8
- ProxyAgent: vi.fn().mockImplementation(() => ({ dispatcher: {} }))
8
+ ProxyAgent: vi.fn().mockImplementation(() => ({ dispatcher: {} })),
9
+ Agent: class {
10
+ dispatch = vi.fn();
11
+ },
12
+ Dispatcher: class {}
9
13
  }));
10
14
 
11
15
  describe('RedirectController', () => {
@@ -0,0 +1,183 @@
1
+ import { describe, expect, test } from 'vitest';
2
+ import { renderAnalysisCsv, AnalysisResult } from '../src/analysis/analyze.js';
3
+
4
+ describe('renderAnalysisCsv', () => {
5
+ test('renders CSV with headers', () => {
6
+ const result: AnalysisResult = {
7
+ pages: [],
8
+ site_summary: {
9
+ pages_analyzed: 0,
10
+ avg_seo_score: 0,
11
+ thin_pages: 0,
12
+ duplicate_titles: 0,
13
+ site_score: 0
14
+ },
15
+ site_scores: {} as any,
16
+ active_modules: {
17
+ seo: true,
18
+ content: true,
19
+ accessibility: true
20
+ }
21
+ };
22
+
23
+ const csv = renderAnalysisCsv(result);
24
+ expect(csv).toContain('URL,SEO Score,Thin Score,HTTP Status,Title,Title Length,Meta Description,Desc Length,Word Count,Internal Links,External Links');
25
+ });
26
+
27
+ test('renders a single page correctly', () => {
28
+ const result: AnalysisResult = {
29
+ pages: [
30
+ {
31
+ url: 'https://example.com',
32
+ status: 200,
33
+ seoScore: 85,
34
+ thinScore: 10,
35
+ title: { value: 'Example Domain', length: 14, status: 'ok' },
36
+ metaDescription: { value: 'This is an example description.', length: 29, status: 'ok' },
37
+ content: { wordCount: 500 } as any,
38
+ links: { internalLinks: 5, externalLinks: 2 } as any,
39
+ h1: {} as any,
40
+ images: {} as any,
41
+ structuredData: {} as any,
42
+ meta: {}
43
+ }
44
+ ],
45
+ site_summary: {
46
+ pages_analyzed: 1,
47
+ avg_seo_score: 85,
48
+ thin_pages: 0,
49
+ duplicate_titles: 0,
50
+ site_score: 85
51
+ },
52
+ site_scores: {} as any,
53
+ active_modules: {
54
+ seo: true,
55
+ content: true,
56
+ accessibility: true
57
+ }
58
+ };
59
+
60
+ const csv = renderAnalysisCsv(result);
61
+ const lines = csv.split('\n');
62
+ expect(lines.length).toBe(2);
63
+ expect(lines[1]).toContain('https://example.com,85,10,200,"Example Domain",14,"This is an example description.",29,500,5,2');
64
+ });
65
+
66
+ test('escapes quotes in title and meta description', () => {
67
+ const result: AnalysisResult = {
68
+ pages: [
69
+ {
70
+ url: 'https://example.com/quote',
71
+ status: 200,
72
+ seoScore: 90,
73
+ thinScore: 5,
74
+ title: { value: 'Example "Quoted" Domain', length: 23, status: 'ok' },
75
+ metaDescription: { value: 'This description contains "quotes" inside.', length: 42, status: 'ok' },
76
+ content: { wordCount: 300 } as any,
77
+ links: { internalLinks: 3, externalLinks: 1 } as any,
78
+ h1: {} as any,
79
+ images: {} as any,
80
+ structuredData: {} as any,
81
+ meta: {}
82
+ }
83
+ ],
84
+ site_summary: {
85
+ pages_analyzed: 1,
86
+ avg_seo_score: 90,
87
+ thin_pages: 0,
88
+ duplicate_titles: 0,
89
+ site_score: 90
90
+ },
91
+ site_scores: {} as any,
92
+ active_modules: {
93
+ seo: true,
94
+ content: true,
95
+ accessibility: true
96
+ }
97
+ };
98
+
99
+ const csv = renderAnalysisCsv(result);
100
+ const lines = csv.split('\n');
101
+ // Expect double quotes to be escaped with double quotes: " -> ""
102
+ // And the whole field wrapped in quotes
103
+ expect(lines[1]).toContain('"Example ""Quoted"" Domain"');
104
+ expect(lines[1]).toContain('"This description contains ""quotes"" inside."');
105
+ });
106
+
107
+ test('handles Pending/Limit status (status: 0)', () => {
108
+ const result: AnalysisResult = {
109
+ pages: [
110
+ {
111
+ url: 'https://example.com/pending',
112
+ status: 0,
113
+ seoScore: 0,
114
+ thinScore: 0,
115
+ title: { value: null, length: 0, status: 'missing' },
116
+ metaDescription: { value: null, length: 0, status: 'missing' },
117
+ content: { wordCount: 0 } as any,
118
+ links: { internalLinks: 0, externalLinks: 0 } as any,
119
+ h1: {} as any,
120
+ images: {} as any,
121
+ structuredData: {} as any,
122
+ meta: {}
123
+ }
124
+ ],
125
+ site_summary: {
126
+ pages_analyzed: 1,
127
+ avg_seo_score: 0,
128
+ thin_pages: 0,
129
+ duplicate_titles: 0,
130
+ site_score: 0
131
+ },
132
+ site_scores: {} as any,
133
+ active_modules: {
134
+ seo: true,
135
+ content: true,
136
+ accessibility: true
137
+ }
138
+ };
139
+
140
+ const csv = renderAnalysisCsv(result);
141
+ const lines = csv.split('\n');
142
+ expect(lines[1]).toContain('Pending/Limit');
143
+ });
144
+
145
+ test('handles missing title and description gracefully', () => {
146
+ const result: AnalysisResult = {
147
+ pages: [
148
+ {
149
+ url: 'https://example.com/missing',
150
+ status: 404,
151
+ seoScore: 0,
152
+ thinScore: 0,
153
+ title: { value: undefined as any, length: 0, status: 'missing' },
154
+ metaDescription: { value: null as any, length: 0, status: 'missing' },
155
+ content: { wordCount: 0 } as any,
156
+ links: { internalLinks: 0, externalLinks: 0 } as any,
157
+ h1: {} as any,
158
+ images: {} as any,
159
+ structuredData: {} as any,
160
+ meta: {}
161
+ }
162
+ ],
163
+ site_summary: {
164
+ pages_analyzed: 1,
165
+ avg_seo_score: 0,
166
+ thin_pages: 0,
167
+ duplicate_titles: 0,
168
+ site_score: 0
169
+ },
170
+ site_scores: {} as any,
171
+ active_modules: {
172
+ seo: true,
173
+ content: true,
174
+ accessibility: true
175
+ }
176
+ };
177
+
178
+ const csv = renderAnalysisCsv(result);
179
+ const lines = csv.split('\n');
180
+ // Should produce empty quoted strings ""
181
+ expect(lines[1]).toContain(',"",0,"",0,0,0,0');
182
+ });
183
+ });
@@ -34,6 +34,18 @@ describe('IPGuard', () => {
34
34
  expect(IPGuard.isInternal('fe80::1')).toBe(true);
35
35
  });
36
36
 
37
+ it('should block IPv4-mapped IPv6 internal addresses', () => {
38
+ expect(IPGuard.isInternal('::ffff:127.0.0.1')).toBe(true);
39
+ expect(IPGuard.isInternal('::ffff:10.0.0.1')).toBe(true);
40
+ expect(IPGuard.isInternal('::ffff:192.168.1.1')).toBe(true);
41
+ expect(IPGuard.isInternal('::ffff:169.254.169.254')).toBe(true);
42
+ expect(IPGuard.isInternal('::ffff:7f00:0001')).toBe(true); // Hex 127.0.0.1
43
+ });
44
+
45
+ it('should allow IPv4-mapped IPv6 public addresses', () => {
46
+ expect(IPGuard.isInternal('::ffff:8.8.8.8')).toBe(false);
47
+ });
48
+
37
49
  it('should validate hostname by resolving IPs', async () => {
38
50
  const resolve4Spy = vi.mocked(dns.resolve4);
39
51
  const resolve6Spy = vi.mocked(dns.resolve6);
@@ -25,6 +25,13 @@ describe('DomainFilter', () => {
25
25
  const filter = new DomainFilter(['allowed.com']);
26
26
  expect(filter.isAllowed('other.com')).toBe(false);
27
27
  });
28
+
29
+ it('should fallback to raw string on invalid hostname', () => {
30
+ // '[' and 'http://denied-invalid-[' causes new URL() to throw
31
+ const filter = new DomainFilter(['['], ['denied-invalid-[']);
32
+ expect(filter.isAllowed('[')).toBe(true);
33
+ expect(filter.isAllowed('denied-invalid-[')).toBe(false);
34
+ });
28
35
  });
29
36
 
30
37
  describe('SubdomainPolicy', () => {
@@ -63,4 +70,15 @@ describe('ScopeManager', () => {
63
70
  expect(manager.isUrlEligible('https://other.com/')).toBe('allowed');
64
71
  expect(manager.isUrlEligible('https://google.com/')).toBe('blocked_by_domain_filter');
65
72
  });
73
+
74
+ it('should handle trailing dots in hostnames', () => {
75
+ const manager = new ScopeManager({
76
+ rootUrl: 'https://example.com',
77
+ allowedDomains: ['example.com.'],
78
+ includeSubdomains: false
79
+ });
80
+
81
+ expect(manager.isUrlEligible('https://example.com./')).toBe('allowed');
82
+ expect(manager.isUrlEligible('https://example.com/')).toBe('allowed');
83
+ });
66
84
  });
@@ -1,5 +1,5 @@
1
1
  import { expect, test } from 'vitest';
2
- import { scorePageSeo, aggregateSiteScore } from '../src/analysis/scoring.js';
2
+ import { scorePageSeo } from '../src/analysis/scoring.js';
3
3
  import { PageAnalysis } from '../src/analysis/analyze.js';
4
4
 
5
5
  const basePage: PageAnalysis = {
@@ -13,7 +13,8 @@ const basePage: PageAnalysis = {
13
13
  images: { totalImages: 2, missingAlt: 0, emptyAlt: 0 },
14
14
  links: { internalLinks: 5, externalLinks: 2, nofollowCount: 1, externalRatio: 2 / 7 },
15
15
  structuredData: { present: true, valid: true, types: ['Article'] },
16
- seoScore: 0
16
+ seoScore: 0,
17
+ meta: { noindex: false, nofollow: false }
17
18
  };
18
19
 
19
20
  test('page score stays in 0-100', () => {
@@ -34,26 +35,26 @@ test('page score stays in 0-100', () => {
34
35
  expect(scorePageSeo(badPage)).toBeLessThan(50);
35
36
  });
36
37
 
37
- test('aggregate site score includes existing metrics signals', () => {
38
- const score = aggregateSiteScore({
39
- totalPages: 2,
40
- totalEdges: 1,
41
- orphanPages: ['https://example.com/x'],
42
- nearOrphans: [],
43
- deepPages: [],
44
- topAuthorityPages: [{ url: 'a', authority: 1 }],
45
- averageOutDegree: 1,
46
- maxDepthFound: 1,
47
- crawlEfficiencyScore: 0.8,
48
- averageDepth: 1,
49
- structuralEntropy: 2,
50
- limitReached: false
51
- }, [
52
- { ...basePage, seoScore: 70 },
53
- { ...basePage, seoScore: 90, url: 'https://example.com/2' }
54
- ]);
38
+ // test('aggregate site score includes existing metrics signals', () => {
39
+ // const score = aggregateSiteScore({
40
+ // totalPages: 2,
41
+ // totalEdges: 1,
42
+ // orphanPages: ['https://example.com/x'],
43
+ // nearOrphans: [],
44
+ // deepPages: [],
45
+ // topAuthorityPages: [{ url: 'a', authority: 1 }],
46
+ // averageOutDegree: 1,
47
+ // maxDepthFound: 1,
48
+ // crawlEfficiencyScore: 0.8,
49
+ // averageDepth: 1,
50
+ // structuralEntropy: 2,
51
+ // limitReached: false
52
+ // }, [
53
+ // { ...basePage, seoScore: 70 },
54
+ // { ...basePage, seoScore: 90, url: 'https://example.com/2' }
55
+ // ]);
55
56
 
56
- expect(score.seoHealthScore).toBe(80);
57
- expect(score.overallScore).toBeGreaterThan(0);
58
- expect(score.overallScore).toBeLessThanOrEqual(100);
59
- });
57
+ // expect(score.seoHealthScore).toBe(80);
58
+ // expect(score.overallScore).toBeGreaterThan(0);
59
+ // expect(score.overallScore).toBeLessThanOrEqual(100);
60
+ // });
@@ -1,6 +1,7 @@
1
- import { test, expect, beforeEach } from 'vitest';
1
+ import { test, expect, beforeEach, vi } from 'vitest';
2
2
  import { Sitemap } from '../src/crawler/sitemap.js';
3
3
  import { MockAgent, setGlobalDispatcher } from 'undici';
4
+ import { EngineContext } from '../src/events.js';
4
5
 
5
6
  let mockAgent: MockAgent;
6
7
 
@@ -86,3 +87,14 @@ test('handles fetch errors gracefully', async () => {
86
87
  const urls = await sitemap.fetch('https://example.com/error.xml');
87
88
  expect(urls.length).toBe(0);
88
89
  });
90
+
91
+ test('emits warning on fetch error', async () => {
92
+ const client = mockAgent.get('https://example.com');
93
+ client.intercept({ path: '/error.xml', method: 'GET' }).replyWithError(new Error('Network error'));
94
+
95
+ const mockContext: EngineContext = { emit: vi.fn() };
96
+ const sitemap = new Sitemap(mockContext);
97
+ await sitemap.fetch('https://example.com/error.xml');
98
+
99
+ expect(mockContext.emit).toHaveBeenCalledWith(expect.objectContaining({ type: 'warn' }));
100
+ });
@@ -0,0 +1,69 @@
1
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
2
+ import { Fetcher } from '../src/crawler/fetcher.js';
3
+ import { request } from 'undici';
4
+ import { IPGuard } from '../src/core/security/ipGuard.js';
5
+
6
+ // Mock undici request to fail with EBLOCKED
7
+ vi.mock('undici', () => {
8
+ return {
9
+ request: vi.fn(),
10
+ Agent: class {
11
+ dispatch = vi.fn();
12
+ },
13
+ Dispatcher: class {}
14
+ };
15
+ });
16
+
17
+ // Mock IPGuard.validateHost to pass
18
+ vi.mock('../src/core/security/ipGuard.js', async () => {
19
+ const original = await vi.importActual('../src/core/security/ipGuard.js');
20
+ return {
21
+ ...original as any,
22
+ IPGuard: {
23
+ ...original.IPGuard,
24
+ validateHost: vi.fn().mockResolvedValue(true), // Pass step 1
25
+ getSecureDispatcher: vi.fn()
26
+ }
27
+ };
28
+ });
29
+
30
+ describe('SSRF Fix Reproduction', () => {
31
+ let fetcher: Fetcher;
32
+
33
+ beforeEach(() => {
34
+ vi.clearAllMocks();
35
+ // Setup default mock return for dispatcher
36
+ vi.mocked(IPGuard.getSecureDispatcher).mockReturnValue({} as any);
37
+ fetcher = new Fetcher({ rate: 100 });
38
+ });
39
+
40
+ it('should return blocked_internal_ip when secureDispatcher blocks', async () => {
41
+ const mockRequest = vi.mocked(request);
42
+ const mockGetSecureDispatcher = vi.mocked(IPGuard.getSecureDispatcher);
43
+ const mockDispatcher = { dispatch: vi.fn() } as any;
44
+ mockGetSecureDispatcher.mockReturnValue(mockDispatcher);
45
+
46
+ // Re-initialize fetcher so it calls getSecureDispatcher and gets our specific mock
47
+ fetcher = new Fetcher({ rate: 100 });
48
+
49
+ // Simulate secureDispatcher blocking via undici request throwing EBLOCKED
50
+ const blockedError = new Error('Blocked internal IP: 127.0.0.1');
51
+ (blockedError as any).code = 'EBLOCKED';
52
+
53
+ mockRequest.mockRejectedValueOnce(blockedError);
54
+
55
+ const res = await fetcher.fetch('http://example.com');
56
+
57
+ // Now we expect correct handling
58
+ expect(res.status).toBe('blocked_internal_ip');
59
+
60
+ // Verify that the secure dispatcher was indeed used
61
+ expect(mockGetSecureDispatcher).toHaveBeenCalled();
62
+ expect(mockRequest).toHaveBeenCalledWith(
63
+ expect.stringContaining('http://example.com'),
64
+ expect.objectContaining({
65
+ dispatcher: mockDispatcher
66
+ })
67
+ );
68
+ });
69
+ });
@@ -1,5 +1,5 @@
1
1
  import { describe, it, expect } from 'vitest';
2
- import { SITEGRAPH_HTML } from '../src/report/sitegraph_template.js';
2
+ import { Crawl_HTML } from '../src/report/crawl_template.js';
3
3
  import { Graph } from '../src/graph/graph.js';
4
4
  import { computePageRank } from '../src/graph/pagerank.js';
5
5
 
@@ -23,24 +23,24 @@ describe('Visualization Data & Template', () => {
23
23
  });
24
24
 
25
25
  it('should contain UI toggle buttons for Authority Mode', () => {
26
- expect(SITEGRAPH_HTML).toContain('id="btn-auth-pagerank"');
27
- expect(SITEGRAPH_HTML).toContain('id="btn-auth-structural"');
26
+ expect(Crawl_HTML).toContain('id="btn-auth-pagerank"');
27
+ expect(Crawl_HTML).toContain('id="btn-auth-structural"');
28
28
  });
29
29
 
30
30
  it('should contain setAuthorityMode function', () => {
31
31
  // Use regex to be flexible with whitespace
32
- expect(SITEGRAPH_HTML).toMatch(/function\s+setAuthorityMode\s*\(mode,\s*btn\)/);
33
- expect(SITEGRAPH_HTML).toContain('n.authority = mode === \'pagerank\' ? n.pageRankAuthority : n.structuralAuthority');
32
+ expect(Crawl_HTML).toMatch(/function\s+setAuthorityMode\s*\(mode,\s*btn\)/);
33
+ expect(Crawl_HTML).toContain('n.authority = mode === \'pagerank\' ? n.pageRankAuthority : n.structuralAuthority');
34
34
  });
35
35
 
36
36
  it('should contain logic to calculate pageRankAuthority from pageRankScore', () => {
37
- expect(SITEGRAPH_HTML).toContain('n.pageRankAuthority = n.pageRankScore / 100');
38
- expect(SITEGRAPH_HTML).toContain('n.structuralAuthority = Math.log(1 + n.inLinks)');
37
+ expect(Crawl_HTML).toContain('n.pageRankAuthority = n.pageRankScore / 100');
38
+ expect(Crawl_HTML).toContain('n.structuralAuthority = Math.log(1 + n.inLinks)');
39
39
  });
40
40
 
41
41
  it('should update details panel to show both metrics', () => {
42
- expect(SITEGRAPH_HTML).toContain('id="d-auth-container"');
43
- expect(SITEGRAPH_HTML).toContain('In-Degree: ${structVal}');
44
- expect(SITEGRAPH_HTML).toContain('PR: <strong>${prVal}</strong>');
42
+ expect(Crawl_HTML).toContain('id="d-auth-container"');
43
+ expect(Crawl_HTML).toContain('In-Degree: ${structVal}');
44
+ expect(Crawl_HTML).toContain('PR: <strong>${prVal}</strong>');
45
45
  });
46
46
  });
@@ -1,3 +0,0 @@
1
- export declare function renderSitegraphCsvNodes(graphData: any): string;
2
- export declare function renderSitegraphCsvEdges(graphData: any): string;
3
- export declare function renderSitegraphMarkdown(url: string, graphData: any, metrics: any, graph: any): string;
@@ -1 +0,0 @@
1
- export declare const SITEGRAPH_HTML = "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n <meta charset=\"UTF-8\">\n <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n <title>Crawlith Site Graph</title>\n <style>\n :root {\n --bg-color: #121212;\n --text-color: #e0e0e0;\n --panel-bg: #1e1e1e;\n --border-color: #333;\n --accent-color: #4a90e2;\n --sidebar-width: 300px;\n }\n body { margin: 0; font-family: -apple-system, BlinkMacSystemFont, \"Segoe UI\", Roboto, Helvetica, Arial, sans-serif; background: var(--bg-color); color: var(--text-color); height: 100vh; display: flex; flex-direction: column; overflow: hidden; }\n\n /* Layout */\n header { padding: 0 20px; background: var(--panel-bg); border-bottom: 1px solid var(--border-color); display: flex; justify-content: space-between; align-items: center; height: 60px; box-sizing: border-box; z-index: 10; }\n main { flex: 1; display: flex; overflow: hidden; position: relative; }\n #graph-container { flex: 1; position: relative; overflow: hidden; background: var(--bg-color); }\n #details-panel { width: var(--sidebar-width); background: var(--panel-bg); border-left: 1px solid var(--border-color); padding: 20px; overflow-y: auto; box-sizing: border-box; display: none; flex-direction: column; gap: 15px; }\n #details-panel.visible { display: flex; }\n footer { padding: 5px 20px; background: var(--panel-bg); border-top: 1px solid var(--border-color); font-size: 0.8rem; text-align: center; color: #666; height: 30px; display: flex; align-items: center; justify-content: center; }\n\n /* Header Components */\n .brand { font-weight: bold; font-size: 1.2rem; display: flex; align-items: center; gap: 10px; }\n .brand span { color: var(--accent-color); }\n #metrics-summary { font-size: 0.9rem; color: #aaa; display: flex; gap: 20px; }\n .metric { display: flex; flex-direction: column; align-items: center; line-height: 1.1; }\n .metric-value { font-weight: bold; color: var(--text-color); }\n .metric-label { font-size: 0.7rem; }\n\n #controls { display: flex; gap: 10px; align-items: center; }\n .btn-group { display: flex; background: #333; border-radius: 4px; overflow: hidden; }\n button { background: transparent; color: #aaa; border: none; padding: 6px 12px; cursor: pointer; font-size: 0.85rem; transition: all 0.2s; }\n button:hover { color: white; background: rgba(255,255,255,0.1); }\n button.active { background: var(--accent-color); color: white; }\n\n /* Search */\n #search-container { position: absolute; top: 15px; left: 15px; z-index: 5; }\n #search-input { background: rgba(30,30,30,0.9); border: 1px solid #444; color: white; padding: 8px 12px; border-radius: 20px; width: 200px; outline: none; transition: width 0.3s; }\n #search-input:focus { width: 280px; border-color: var(--accent-color); }\n\n /* Graph */\n svg { width: 100%; height: 100%; display: block; }\n .node { cursor: pointer; transition: stroke-width 0.1s; }\n .link { stroke: #555; stroke-opacity: 0.3; fill: none; pointer-events: none; }\n\n /* Interaction States */\n .node.highlight { stroke: #fff; stroke-width: 2px; }\n .link.highlight { stroke-opacity: 0.8; stroke: #999; }\n .node.faded { opacity: 0.1; }\n .link.faded { opacity: 0.05; }\n\n /* Details Panel Content */\n .detail-section { border-bottom: 1px solid #333; padding-bottom: 10px; }\n .detail-section:last-child { border-bottom: none; }\n .detail-label { font-size: 0.75rem; color: #888; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 4px; }\n .detail-value { font-size: 0.95rem; word-break: break-all; }\n .detail-list { list-style: none; padding: 0; margin: 0; max-height: 150px; overflow-y: auto; font-size: 0.85rem; }\n .detail-list li { padding: 4px 0; border-bottom: 1px solid #2a2a2a; }\n .detail-list a { color: var(--accent-color); text-decoration: none; }\n .detail-list a:hover { text-decoration: underline; }\n\n .status-badge { display: inline-block; padding: 2px 6px; border-radius: 3px; font-size: 0.75rem; font-weight: bold; margin-top: 5px; }\n .status-ok { background: #2e7d32; color: white; }\n .status-warn { background: #f9a825; color: black; }\n .status-error { background: #c62828; color: white; }\n\n /* Tooltip */\n #tooltip { position: absolute; background: rgba(20,20,20,0.95); color: white; padding: 10px; border-radius: 6px; pointer-events: none; font-size: 12px; z-index: 100; box-shadow: 0 4px 15px rgba(0,0,0,0.5); border: 1px solid #444; display: none; transform: translate(-50%, -100%); margin-top: -10px; white-space: nowrap; }\n\n /* Responsive Sidebar */\n @media (max-width: 768px) {\n #details-panel { position: absolute; right: 0; top: 0; bottom: 0; z-index: 20; box-shadow: -5px 0 15px rgba(0,0,0,0.5); transform: translateX(100%); transition: transform 0.3s ease; }\n #details-panel.visible { transform: translateX(0); }\n #metrics-summary { display: none; }\n }\n </style>\n</head>\n<body>\n <header>\n <div class=\"brand\"><span>Crawlith</span> SiteGraph</div>\n\n <div id=\"metrics-summary\">\n <div class=\"metric\"><span class=\"metric-value\" id=\"m-pages\">-</span><span class=\"metric-label\">Pages</span></div>\n <div class=\"metric\"><span class=\"metric-value\" id=\"m-depth\">-</span><span class=\"metric-label\">Max Depth</span></div>\n <div class=\"metric\"><span class=\"metric-value\" id=\"m-eff\">-</span><span class=\"metric-label\">Efficiency</span></div>\n <div class=\"metric\"><span class=\"metric-value\" id=\"m-orphan\">-</span><span class=\"metric-label\">Orphans</span></div>\n </div>\n\n <div id=\"controls\">\n <div class=\"btn-group\" style=\"margin-right: 15px;\">\n <button id=\"btn-auth-pagerank\" class=\"active\" title=\"PageRank Authority\">PageRank</button>\n <button id=\"btn-auth-structural\" title=\"Structural Authority (In-Degree)\">In-Degree</button>\n </div>\n <div class=\"btn-group\">\n <button id=\"btn-hierarchical\" class=\"active\">Hierarchical</button>\n <button id=\"btn-radial\">Radial</button>\n </div>\n </div>\n </header>\n\n <main>\n <div id=\"graph-container\">\n <div id=\"search-container\">\n <input type=\"text\" id=\"search-input\" placeholder=\"Search URL...\">\n </div>\n <svg id=\"graph\"></svg>\n <div id=\"tooltip\"></div>\n </div>\n\n <aside id=\"details-panel\">\n <div class=\"detail-section\">\n <div class=\"detail-label\">URL</div>\n <div class=\"detail-value\" id=\"d-url\">-</div>\n <div id=\"d-status\"></div>\n </div>\n <div class=\"detail-section\" style=\"display: flex; gap: 20px;\">\n <div>\n <div class=\"detail-label\">Depth</div>\n <div class=\"detail-value\" id=\"d-depth\">-</div>\n </div>\n <div>\n <div class=\"detail-label\">Authority</div>\n <div class=\"detail-value\" id=\"d-auth-container\">-</div>\n </div>\n </div>\n <div class=\"detail-section\">\n <div class=\"detail-label\">In-links (<span id=\"d-in-count\">0</span>)</div>\n <!-- List could be populated here if we had the reverse index, for now just count -->\n </div>\n <div class=\"detail-section\">\n <div class=\"detail-label\">Out-links (<span id=\"d-out-count\">0</span>)</div>\n <ul class=\"detail-list\" id=\"d-out-list\"></ul>\n </div>\n </aside>\n </main>\n\n <footer>\n Generated by Crawlith Crawler\n </footer>\n\n <!-- D3 from CDN -->\n <script src=\"https://d3js.org/d3.v7.min.js\"></script>\n\n <script>\n // --- State ---\n const state = {\n nodes: [],\n links: [],\n metrics: {},\n adjacency: new Map(), // url -> { in: [], out: [] }\n simulation: null,\n width: 0,\n height: 0,\n transform: d3.zoomIdentity,\n activeNode: null,\n mode: 'hierarchical', // 'hierarchical' | 'radial'\n maxDepth: 0,\n maxInLinks: 0,\n nodeSelection: null,\n linkSelection: null,\n zoom: null\n };\n\n // --- DOM Elements ---\n const svg = d3.select(\"#graph\");\n const container = svg.append(\"g\");\n const linkGroup = container.append(\"g\").attr(\"class\", \"links\");\n const nodeGroup = container.append(\"g\").attr(\"class\", \"nodes\");\n const tooltip = d3.select(\"#tooltip\");\n const detailsPanel = d3.select(\"#details-panel\");\n\n // --- Initialization ---\n // --- Initialization ---\n async function init() {\n try {\n let graphData, metricsData;\n\n // 1. Try to use injected data (for file:// usage)\n // @ts-ignore\n if (window.GRAPH_DATA) graphData = window.GRAPH_DATA;\n // @ts-ignore\n if (window.METRICS_DATA) metricsData = window.METRICS_DATA;\n\n // 2. Fallback to fetching JSON files (for web server usage)\n if (!graphData || !metricsData) {\n try {\n const [graphRes, metricsRes] = await Promise.all([\n fetch('graph.json'),\n fetch('metrics.json')\n ]);\n if (graphRes.ok && metricsRes.ok) {\n graphData = await graphRes.json();\n metricsData = await metricsRes.json();\n }\n } catch (e) {\n console.warn(\"Fetch failed, possibly due to CORS or missing files.\", e);\n }\n }\n\n if (!graphData || !metricsData) {\n throw new Error(\"No data available. Ensure graph.json exists or data is injected.\");\n }\n\n state.metrics = metricsData;\n processData(graphData);\n updateMetricsUI();\n\n // Setup UI\n setupResize();\n setupInteractions();\n setupSearch();\n\n // Start Simulation\n initSimulation();\n\n } catch (err) {\n console.error(err);\n alert(\"Error loading visualization data: \" + err.message);\n }\n }\n\n function processData(data) {\n // Create a map for fast lookup\n const nodeMap = new Map();\n\n data.nodes.forEach(n => {\n n.inLinks = n.inLinks || 0;\n n.outLinks = n.outLinks || 0;\n nodeMap.set(n.url, n);\n });\n\n // Filter valid links\n state.links = data.edges\n .map(e => ({ source: nodeMap.get(e.source), target: nodeMap.get(e.target) }))\n .filter(e => e.source && e.target);\n\n state.nodes = data.nodes;\n\n // Calculate Stats\n state.maxDepth = d3.max(state.nodes, d => d.depth) || 1;\n state.maxInLinks = d3.max(state.nodes, d => d.inLinks) || 1;\n\n // Calculate Authority & Enrich Nodes\n state.nodes.forEach(n => {\n // Structural Authority: log-scaled normalized 0-1 based on in-links\n n.structuralAuthority = Math.log(1 + n.inLinks) / Math.log(1 + state.maxInLinks);\n\n // PageRank Authority: normalized 0-1 from pageRankScore (0-100)\n if (typeof n.pageRankScore === 'number') {\n n.pageRankAuthority = n.pageRankScore / 100;\n } else {\n n.pageRankAuthority = n.structuralAuthority;\n }\n\n // Default authority to PageRank if available, else structural\n n.authority = n.pageRankAuthority;\n\n // Ensure x,y are initialized to avoid NaNs if D3 doesn't do it fast enough\n n.x = 0; n.y = 0;\n });\n\n // Build Adjacency Map\n state.nodes.forEach(n => state.adjacency.set(n.url, { in: [], out: [] }));\n state.links.forEach(l => {\n state.adjacency.get(l.source.url).out.push(l.target);\n state.adjacency.get(l.target.url).in.push(l.source);\n });\n }\n\n function updateMetricsUI() {\n document.getElementById('m-pages').textContent = state.metrics.totalPages;\n document.getElementById('m-depth').textContent = state.metrics.maxDepthFound;\n document.getElementById('m-eff').textContent = (state.metrics.crawlEfficiencyScore * 100).toFixed(1) + '%';\n document.getElementById('m-orphan').textContent = state.metrics.orphanPages.length;\n }\n\n // --- Simulation ---\n function initSimulation() {\n const { width, height } = getDimensions();\n state.width = width;\n state.height = height;\n\n // Safeguards\n const nodeCount = state.nodes.length;\n const enableCollision = nodeCount <= 1200;\n const alphaDecay = nodeCount > 1000 ? 0.05 : 0.02; // Faster decay for large graphs\n\n state.simulation = d3.forceSimulation(state.nodes)\n .alphaDecay(alphaDecay)\n .force(\"link\", d3.forceLink(state.links).id(d => d.url).strength(0.5)) // Reduced strength for flexibility\n .force(\"charge\", d3.forceManyBody().strength(nodeCount > 1000 ? -100 : -300))\n .force(\"center\", d3.forceCenter(width / 2, height / 2));\n\n if (enableCollision) {\n state.simulation.force(\"collide\", d3.forceCollide().radius(d => getNodeRadius(d) + 2).iterations(1));\n }\n\n // Apply Layout Mode\n applyLayoutMode(state.mode);\n\n // Rendering loop\n state.simulation.on(\"tick\", ticked);\n\n // Render initial SVG elements\n render();\n }\n\n function applyLayoutMode(mode) {\n state.mode = mode;\n const { width, height } = state;\n const centerY = height / 2;\n const centerX = width / 2;\n\n // Remove conflicting forces\n state.simulation.force(\"y\", null);\n state.simulation.force(\"radial\", null);\n\n if (mode === 'hierarchical') {\n const depthSpacing = height / (state.maxDepth + 2);\n // Hierarchical: Nodes pushed to Y levels based on depth\n state.simulation.force(\"y\", d3.forceY(d => {\n return (d.depth * depthSpacing) - (height/2) + 50; // Offset to start from top\n }).strength(1));\n // We rely on \"center\" force to keep X centered, but maybe add weak forceX?\n // Let's add weak forceX to prevent wide spread\n state.simulation.force(\"x\", d3.forceX(0).strength(0.05));\n state.simulation.force(\"center\", d3.forceCenter(width/2, height/2)); // Recenter\n\n } else if (mode === 'radial') {\n const maxRadius = Math.min(width, height) / 2 - 50;\n const ringSpacing = maxRadius / (state.maxDepth + 1);\n\n state.simulation.force(\"radial\", d3.forceRadial(\n d => d.depth * ringSpacing,\n width / 2,\n height / 2\n ).strength(0.8));\n\n state.simulation.force(\"x\", null); // Remove X constraint\n }\n\n state.simulation.alpha(1).restart();\n }\n\n function getNodeRadius(d) {\n // 5 + authority * 15\n return 5 + (d.authority * 15);\n }\n\n function getNodeColor(d) {\n // Depth-based sequential color (Blue -> Purple -> Pink)\n const t = d.depth / (state.maxDepth || 1);\n return d3.interpolateViridis(1 - t); // Invert Viridis for better contrast on dark\n }\n\n function render() {\n // Links\n state.linkSelection = linkGroup.selectAll(\"line\")\n .data(state.links)\n .join(\"line\")\n .attr(\"class\", \"link\")\n .attr(\"stroke-width\", 0.5);\n\n // Nodes\n state.nodeSelection = nodeGroup.selectAll(\"circle\")\n .data(state.nodes)\n .join(\"circle\")\n .attr(\"class\", \"node\")\n .attr(\"r\", d => getNodeRadius(d))\n .attr(\"fill\", d => getNodeColor(d))\n .attr(\"stroke\", d => d.status >= 400 ? \"#ff4444\" : null) // Red stroke for errors\n .on(\"mouseover\", (event, d) => {\n if (state.activeNode) return;\n highlightNode(d);\n showTooltip(event, d);\n })\n .on(\"mouseout\", () => {\n if (state.activeNode) return;\n resetHighlight();\n hideTooltip();\n })\n .on(\"click\", (event, d) => {\n event.stopPropagation();\n selectNode(d);\n })\n .call(d3.drag()\n .on(\"start\", dragstarted)\n .on(\"drag\", dragged)\n .on(\"end\", dragended));\n\n // Zoom\n state.zoom = d3.zoom()\n .scaleExtent([0.1, 4])\n .on(\"zoom\", (event) => {\n state.transform = event.transform;\n container.attr(\"transform\", event.transform);\n });\n\n svg.call(state.zoom)\n .call(state.zoom.transform, d3.zoomIdentity.translate(state.width/2, state.height/2).scale(0.8).translate(-state.width/2, -state.height/2)); // Initial zoom out\n }\n\n function ticked() {\n if (state.linkSelection) {\n state.linkSelection\n .attr(\"x1\", d => d.source.x)\n .attr(\"y1\", d => d.source.y)\n .attr(\"x2\", d => d.target.x)\n .attr(\"y2\", d => d.target.y);\n }\n\n if (state.nodeSelection) {\n state.nodeSelection\n .attr(\"cx\", d => d.x)\n .attr(\"cy\", d => d.y);\n }\n }\n\n // --- Interactions ---\n\n function setupInteractions() {\n // Background click to clear selection\n svg.on(\"click\", () => {\n state.activeNode = null;\n resetHighlight();\n detailsPanel.classed(\"visible\", false);\n });\n\n // Layout Toggle\n d3.select(\"#btn-hierarchical\").on(\"click\", function() {\n setMode('hierarchical', this);\n });\n d3.select(\"#btn-radial\").on(\"click\", function() {\n setMode('radial', this);\n });\n\n // Authority Toggle\n d3.select(\"#btn-auth-pagerank\").on(\"click\", function() {\n setAuthorityMode('pagerank', this);\n });\n d3.select(\"#btn-auth-structural\").on(\"click\", function() {\n setAuthorityMode('structural', this);\n });\n }\n\n function setAuthorityMode(mode, btn) {\n d3.select(\"#btn-auth-pagerank\").classed(\"active\", false);\n d3.select(\"#btn-auth-structural\").classed(\"active\", false);\n d3.select(btn).classed(\"active\", true);\n\n state.nodes.forEach(n => {\n n.authority = mode === 'pagerank' ? n.pageRankAuthority : n.structuralAuthority;\n });\n\n // Update Visuals\n nodeGroup.selectAll(\"circle\")\n .transition().duration(500)\n .attr(\"r\", d => getNodeRadius(d));\n\n // Update collision force if enabled\n if (state.simulation.force(\"collide\")) {\n state.simulation.force(\"collide\", d3.forceCollide().radius(d => getNodeRadius(d) + 2).iterations(1));\n state.simulation.alpha(0.3).restart();\n }\n }\n\n function setMode(mode, btn) {\n d3.selectAll(\"#controls button\").classed(\"active\", false);\n d3.select(btn).classed(\"active\", true);\n applyLayoutMode(mode);\n }\n\n function highlightNode(d) {\n const neighbors = new Set();\n const adj = state.adjacency.get(d.url);\n if (adj) {\n adj.in.forEach(n => neighbors.add(n.url));\n adj.out.forEach(n => neighbors.add(n.url));\n }\n neighbors.add(d.url);\n\n nodeGroup.selectAll(\"circle\").classed(\"faded\", n => !neighbors.has(n.url));\n nodeGroup.selectAll(\"circle\").classed(\"highlight\", n => n.url === d.url);\n\n linkGroup.selectAll(\"line\").classed(\"faded\", l =>\n l.source.url !== d.url && l.target.url !== d.url\n );\n linkGroup.selectAll(\"line\").classed(\"highlight\", l =>\n l.source.url === d.url || l.target.url === d.url\n );\n }\n\n function resetHighlight() {\n nodeGroup.selectAll(\"circle\").classed(\"faded\", false).classed(\"highlight\", false);\n linkGroup.selectAll(\"line\").classed(\"faded\", false).classed(\"highlight\", false);\n }\n\n function selectNode(d) {\n state.activeNode = d;\n highlightNode(d);\n showDetails(d);\n }\n\n function showTooltip(event, d) {\n // If we are transforming the container, we need to map coordinates correctly or just use pageX/Y\n tooltip.style(\"display\", \"block\")\n .html(`<strong>${new URL(d.url).pathname}</strong><br>Auth: ${(d.authority * 10).toFixed(1)}`)\n .style(\"left\", (event.pageX) + \"px\")\n .style(\"top\", (event.pageY - 10) + \"px\");\n }\n\n function hideTooltip() {\n tooltip.style(\"display\", \"none\");\n }\n\n function showDetails(d) {\n detailsPanel.classed(\"visible\", true);\n d3.select(\"#d-url\").text(d.url);\n d3.select(\"#d-depth\").text(d.depth);\n\n const authContainer = d3.select(\"#d-auth-container\");\n authContainer.html(\"\");\n const prVal = (d.pageRankAuthority * 100).toFixed(1);\n const structVal = d.structuralAuthority.toFixed(3);\n authContainer.append(\"div\").html(`PR: <strong>${prVal}</strong>`);\n authContainer.append(\"div\").style(\"color\", \"#888\").style(\"font-size\", \"0.8em\").text(`In-Degree: ${structVal}`);\n\n d3.select(\"#d-in-count\").text(d.inLinks);\n d3.select(\"#d-out-count\").text(d.outLinks);\n\n // Status badge\n const statusDiv = d3.select(\"#d-status\");\n statusDiv.html(\"\");\n let sClass = \"status-ok\";\n if (d.status >= 400) sClass = \"status-error\";\n else if (d.status >= 300) sClass = \"status-warn\";\n statusDiv.append(\"span\").attr(\"class\", \"status-badge \" + sClass).text(d.status);\n\n // Outlinks list (limit to 20)\n const list = d3.select(\"#d-out-list\");\n list.html(\"\");\n const adj = state.adjacency.get(d.url);\n if (adj && adj.out.length > 0) {\n adj.out.slice(0, 50).forEach(target => {\n list.append(\"li\").append(\"a\")\n .attr(\"href\", target.url)\n .attr(\"target\", \"_blank\")\n .text(new URL(target.url).pathname);\n });\n if (adj.out.length > 50) {\n list.append(\"li\").text(`...and ${adj.out.length - 50} more`);\n }\n } else {\n list.append(\"li\").text(\"No outgoing links\");\n }\n }\n\n // --- Search ---\n function setupSearch() {\n const input = document.getElementById('search-input');\n input.addEventListener('keydown', (e) => {\n if (e.key === 'Enter') {\n const val = input.value.trim().toLowerCase();\n if (!val) return;\n\n const found = state.nodes.find(n => n.url.toLowerCase().includes(val));\n if (found) {\n selectNode(found);\n // Center view on node\n const transform = d3.zoomIdentity\n .translate(state.width/2, state.height/2)\n .scale(2)\n .translate(-found.x, -found.y);\n\n svg.transition().duration(750).call(state.zoom.transform, transform);\n }\n }\n });\n }\n\n function setupResize() {\n window.addEventListener(\"resize\", () => {\n const { width, height } = getDimensions();\n state.width = width;\n state.height = height;\n state.simulation.force(\"center\", d3.forceCenter(width / 2, height / 2));\n if (state.mode === 'hierarchical') {\n // Re-evaluate Y force if needed, but usually center is enough\n }\n state.simulation.alpha(0.3).restart();\n });\n }\n\n function getDimensions() {\n const rect = document.getElementById(\"graph-container\").getBoundingClientRect();\n return { width: rect.width, height: rect.height };\n }\n\n // --- Dragging ---\n function dragstarted(event, d) {\n if (!event.active) state.simulation.alphaTarget(0.3).restart();\n d.fx = d.x;\n d.fy = d.y;\n }\n\n function dragged(event, d) {\n d.fx = event.x;\n d.fy = event.y;\n }\n\n function dragended(event, d) {\n if (!event.active) state.simulation.alphaTarget(0);\n d.fx = null;\n d.fy = null;\n }\n\n // Start\n if (document.readyState === 'loading') {\n document.addEventListener('DOMContentLoaded', init);\n } else {\n init();\n }\n </script>\n</body>\n</html>\n";