@crawlith/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/dist/analysis/analyze.d.ts +70 -0
- package/dist/analysis/analyze.js +436 -0
- package/dist/analysis/content.d.ts +12 -0
- package/dist/analysis/content.js +33 -0
- package/dist/analysis/images.d.ts +6 -0
- package/dist/analysis/images.js +18 -0
- package/dist/analysis/links.d.ts +7 -0
- package/dist/analysis/links.js +30 -0
- package/dist/analysis/scoring.d.ts +9 -0
- package/dist/analysis/scoring.js +42 -0
- package/dist/analysis/seo.d.ts +15 -0
- package/dist/analysis/seo.js +64 -0
- package/dist/analysis/structuredData.d.ts +6 -0
- package/dist/analysis/structuredData.js +51 -0
- package/dist/audit/dns.d.ts +2 -0
- package/dist/audit/dns.js +42 -0
- package/dist/audit/headers.d.ts +2 -0
- package/dist/audit/headers.js +95 -0
- package/dist/audit/index.d.ts +2 -0
- package/dist/audit/index.js +50 -0
- package/dist/audit/scoring.d.ts +14 -0
- package/dist/audit/scoring.js +214 -0
- package/dist/audit/transport.d.ts +6 -0
- package/dist/audit/transport.js +207 -0
- package/dist/audit/types.d.ts +88 -0
- package/dist/audit/types.js +1 -0
- package/dist/core/network/proxyAdapter.d.ts +6 -0
- package/dist/core/network/proxyAdapter.js +19 -0
- package/dist/core/network/rateLimiter.d.ts +6 -0
- package/dist/core/network/rateLimiter.js +31 -0
- package/dist/core/network/redirectController.d.ts +13 -0
- package/dist/core/network/redirectController.js +41 -0
- package/dist/core/network/responseLimiter.d.ts +4 -0
- package/dist/core/network/responseLimiter.js +26 -0
- package/dist/core/network/retryPolicy.d.ts +10 -0
- package/dist/core/network/retryPolicy.js +41 -0
- package/dist/core/scope/domainFilter.d.ts +11 -0
- package/dist/core/scope/domainFilter.js +40 -0
- package/dist/core/scope/scopeManager.d.ts +14 -0
- package/dist/core/scope/scopeManager.js +39 -0
- package/dist/core/scope/subdomainPolicy.d.ts +6 -0
- package/dist/core/scope/subdomainPolicy.js +35 -0
- package/dist/core/security/ipGuard.d.ts +11 -0
- package/dist/core/security/ipGuard.js +84 -0
- package/dist/crawler/crawl.d.ts +22 -0
- package/dist/crawler/crawl.js +336 -0
- package/dist/crawler/extract.d.ts +5 -0
- package/dist/crawler/extract.js +33 -0
- package/dist/crawler/fetcher.d.ts +40 -0
- package/dist/crawler/fetcher.js +161 -0
- package/dist/crawler/metricsRunner.d.ts +1 -0
- package/dist/crawler/metricsRunner.js +108 -0
- package/dist/crawler/normalize.d.ts +7 -0
- package/dist/crawler/normalize.js +88 -0
- package/dist/crawler/parser.d.ts +22 -0
- package/dist/crawler/parser.js +158 -0
- package/dist/crawler/sitemap.d.ts +8 -0
- package/dist/crawler/sitemap.js +70 -0
- package/dist/crawler/trap.d.ts +24 -0
- package/dist/crawler/trap.js +78 -0
- package/dist/db/graphLoader.d.ts +2 -0
- package/dist/db/graphLoader.js +96 -0
- package/dist/db/index.d.ts +4 -0
- package/dist/db/index.js +61 -0
- package/dist/db/repositories/EdgeRepository.d.ts +16 -0
- package/dist/db/repositories/EdgeRepository.js +17 -0
- package/dist/db/repositories/MetricsRepository.d.ts +26 -0
- package/dist/db/repositories/MetricsRepository.js +27 -0
- package/dist/db/repositories/PageRepository.d.ts +47 -0
- package/dist/db/repositories/PageRepository.js +93 -0
- package/dist/db/repositories/SiteRepository.d.ts +15 -0
- package/dist/db/repositories/SiteRepository.js +22 -0
- package/dist/db/repositories/SnapshotRepository.d.ts +22 -0
- package/dist/db/repositories/SnapshotRepository.js +55 -0
- package/dist/db/schema.d.ts +2 -0
- package/dist/db/schema.js +169 -0
- package/dist/diff/compare.d.ts +26 -0
- package/dist/diff/compare.js +64 -0
- package/dist/graph/cluster.d.ts +6 -0
- package/dist/graph/cluster.js +173 -0
- package/dist/graph/duplicate.d.ts +10 -0
- package/dist/graph/duplicate.js +251 -0
- package/dist/graph/graph.d.ts +103 -0
- package/dist/graph/graph.js +106 -0
- package/dist/graph/metrics.d.ts +29 -0
- package/dist/graph/metrics.js +74 -0
- package/dist/graph/pagerank.d.ts +12 -0
- package/dist/graph/pagerank.js +102 -0
- package/dist/graph/simhash.d.ts +17 -0
- package/dist/graph/simhash.js +56 -0
- package/dist/index.d.ts +30 -0
- package/dist/index.js +30 -0
- package/dist/lock/hashKey.d.ts +1 -0
- package/dist/lock/hashKey.js +44 -0
- package/dist/lock/lockManager.d.ts +7 -0
- package/dist/lock/lockManager.js +112 -0
- package/dist/lock/pidCheck.d.ts +1 -0
- package/dist/lock/pidCheck.js +14 -0
- package/dist/report/html.d.ts +2 -0
- package/dist/report/html.js +223 -0
- package/dist/report/sitegraphExport.d.ts +3 -0
- package/dist/report/sitegraphExport.js +52 -0
- package/dist/report/sitegraph_template.d.ts +1 -0
- package/dist/report/sitegraph_template.js +630 -0
- package/dist/scoring/hits.d.ts +9 -0
- package/dist/scoring/hits.js +111 -0
- package/dist/scoring/orphanSeverity.d.ts +39 -0
- package/dist/scoring/orphanSeverity.js +125 -0
- package/dist/utils/version.d.ts +2 -0
- package/dist/utils/version.js +15 -0
- package/package.json +33 -0
- package/src/analysis/analyze.ts +548 -0
- package/src/analysis/content.ts +62 -0
- package/src/analysis/images.ts +28 -0
- package/src/analysis/links.ts +41 -0
- package/src/analysis/scoring.ts +59 -0
- package/src/analysis/seo.ts +82 -0
- package/src/analysis/structuredData.ts +62 -0
- package/src/audit/dns.ts +49 -0
- package/src/audit/headers.ts +98 -0
- package/src/audit/index.ts +66 -0
- package/src/audit/scoring.ts +232 -0
- package/src/audit/transport.ts +258 -0
- package/src/audit/types.ts +102 -0
- package/src/core/network/proxyAdapter.ts +21 -0
- package/src/core/network/rateLimiter.ts +39 -0
- package/src/core/network/redirectController.ts +47 -0
- package/src/core/network/responseLimiter.ts +34 -0
- package/src/core/network/retryPolicy.ts +57 -0
- package/src/core/scope/domainFilter.ts +45 -0
- package/src/core/scope/scopeManager.ts +52 -0
- package/src/core/scope/subdomainPolicy.ts +39 -0
- package/src/core/security/ipGuard.ts +92 -0
- package/src/crawler/crawl.ts +382 -0
- package/src/crawler/extract.ts +34 -0
- package/src/crawler/fetcher.ts +233 -0
- package/src/crawler/metricsRunner.ts +124 -0
- package/src/crawler/normalize.ts +108 -0
- package/src/crawler/parser.ts +190 -0
- package/src/crawler/sitemap.ts +73 -0
- package/src/crawler/trap.ts +96 -0
- package/src/db/graphLoader.ts +105 -0
- package/src/db/index.ts +70 -0
- package/src/db/repositories/EdgeRepository.ts +29 -0
- package/src/db/repositories/MetricsRepository.ts +49 -0
- package/src/db/repositories/PageRepository.ts +128 -0
- package/src/db/repositories/SiteRepository.ts +32 -0
- package/src/db/repositories/SnapshotRepository.ts +74 -0
- package/src/db/schema.ts +177 -0
- package/src/diff/compare.ts +84 -0
- package/src/graph/cluster.ts +192 -0
- package/src/graph/duplicate.ts +286 -0
- package/src/graph/graph.ts +172 -0
- package/src/graph/metrics.ts +110 -0
- package/src/graph/pagerank.ts +125 -0
- package/src/graph/simhash.ts +61 -0
- package/src/index.ts +30 -0
- package/src/lock/hashKey.ts +51 -0
- package/src/lock/lockManager.ts +124 -0
- package/src/lock/pidCheck.ts +13 -0
- package/src/report/html.ts +227 -0
- package/src/report/sitegraphExport.ts +58 -0
- package/src/report/sitegraph_template.ts +630 -0
- package/src/scoring/hits.ts +131 -0
- package/src/scoring/orphanSeverity.ts +176 -0
- package/src/utils/version.ts +18 -0
- package/tests/__snapshots__/orphanSeverity.test.ts.snap +49 -0
- package/tests/analysis.unit.test.ts +98 -0
- package/tests/analyze.integration.test.ts +98 -0
- package/tests/audit/dns.test.ts +31 -0
- package/tests/audit/headers.test.ts +45 -0
- package/tests/audit/scoring.test.ts +133 -0
- package/tests/audit/security.test.ts +12 -0
- package/tests/audit/transport.test.ts +112 -0
- package/tests/clustering.test.ts +118 -0
- package/tests/crawler.test.ts +358 -0
- package/tests/db.test.ts +159 -0
- package/tests/diff.test.ts +67 -0
- package/tests/duplicate.test.ts +110 -0
- package/tests/fetcher.test.ts +106 -0
- package/tests/fetcher_safety.test.ts +85 -0
- package/tests/fixtures/analyze-crawl.json +26 -0
- package/tests/hits.test.ts +134 -0
- package/tests/html_report.test.ts +58 -0
- package/tests/lock/lockManager.test.ts +138 -0
- package/tests/metrics.test.ts +196 -0
- package/tests/normalize.test.ts +101 -0
- package/tests/orphanSeverity.test.ts +160 -0
- package/tests/pagerank.test.ts +98 -0
- package/tests/parser.test.ts +117 -0
- package/tests/proxy_safety.test.ts +57 -0
- package/tests/redirect_safety.test.ts +73 -0
- package/tests/safety.test.ts +114 -0
- package/tests/scope.test.ts +66 -0
- package/tests/scoring.test.ts +59 -0
- package/tests/sitemap.test.ts +88 -0
- package/tests/soft404.test.ts +41 -0
- package/tests/trap.test.ts +39 -0
- package/tests/visualization_data.test.ts +46 -0
- package/tsconfig.json +11 -0
|
@@ -0,0 +1,630 @@
|
|
|
1
|
+
export const SITEGRAPH_HTML = `<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>Crawlith Site Graph</title>
|
|
7
|
+
<style>
|
|
8
|
+
:root {
|
|
9
|
+
--bg-color: #121212;
|
|
10
|
+
--text-color: #e0e0e0;
|
|
11
|
+
--panel-bg: #1e1e1e;
|
|
12
|
+
--border-color: #333;
|
|
13
|
+
--accent-color: #4a90e2;
|
|
14
|
+
--sidebar-width: 300px;
|
|
15
|
+
}
|
|
16
|
+
body { margin: 0; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; background: var(--bg-color); color: var(--text-color); height: 100vh; display: flex; flex-direction: column; overflow: hidden; }
|
|
17
|
+
|
|
18
|
+
/* Layout */
|
|
19
|
+
header { padding: 0 20px; background: var(--panel-bg); border-bottom: 1px solid var(--border-color); display: flex; justify-content: space-between; align-items: center; height: 60px; box-sizing: border-box; z-index: 10; }
|
|
20
|
+
main { flex: 1; display: flex; overflow: hidden; position: relative; }
|
|
21
|
+
#graph-container { flex: 1; position: relative; overflow: hidden; background: var(--bg-color); }
|
|
22
|
+
#details-panel { width: var(--sidebar-width); background: var(--panel-bg); border-left: 1px solid var(--border-color); padding: 20px; overflow-y: auto; box-sizing: border-box; display: none; flex-direction: column; gap: 15px; }
|
|
23
|
+
#details-panel.visible { display: flex; }
|
|
24
|
+
footer { padding: 5px 20px; background: var(--panel-bg); border-top: 1px solid var(--border-color); font-size: 0.8rem; text-align: center; color: #666; height: 30px; display: flex; align-items: center; justify-content: center; }
|
|
25
|
+
|
|
26
|
+
/* Header Components */
|
|
27
|
+
.brand { font-weight: bold; font-size: 1.2rem; display: flex; align-items: center; gap: 10px; }
|
|
28
|
+
.brand span { color: var(--accent-color); }
|
|
29
|
+
#metrics-summary { font-size: 0.9rem; color: #aaa; display: flex; gap: 20px; }
|
|
30
|
+
.metric { display: flex; flex-direction: column; align-items: center; line-height: 1.1; }
|
|
31
|
+
.metric-value { font-weight: bold; color: var(--text-color); }
|
|
32
|
+
.metric-label { font-size: 0.7rem; }
|
|
33
|
+
|
|
34
|
+
#controls { display: flex; gap: 10px; align-items: center; }
|
|
35
|
+
.btn-group { display: flex; background: #333; border-radius: 4px; overflow: hidden; }
|
|
36
|
+
button { background: transparent; color: #aaa; border: none; padding: 6px 12px; cursor: pointer; font-size: 0.85rem; transition: all 0.2s; }
|
|
37
|
+
button:hover { color: white; background: rgba(255,255,255,0.1); }
|
|
38
|
+
button.active { background: var(--accent-color); color: white; }
|
|
39
|
+
|
|
40
|
+
/* Search */
|
|
41
|
+
#search-container { position: absolute; top: 15px; left: 15px; z-index: 5; }
|
|
42
|
+
#search-input { background: rgba(30,30,30,0.9); border: 1px solid #444; color: white; padding: 8px 12px; border-radius: 20px; width: 200px; outline: none; transition: width 0.3s; }
|
|
43
|
+
#search-input:focus { width: 280px; border-color: var(--accent-color); }
|
|
44
|
+
|
|
45
|
+
/* Graph */
|
|
46
|
+
svg { width: 100%; height: 100%; display: block; }
|
|
47
|
+
.node { cursor: pointer; transition: stroke-width 0.1s; }
|
|
48
|
+
.link { stroke: #555; stroke-opacity: 0.3; fill: none; pointer-events: none; }
|
|
49
|
+
|
|
50
|
+
/* Interaction States */
|
|
51
|
+
.node.highlight { stroke: #fff; stroke-width: 2px; }
|
|
52
|
+
.link.highlight { stroke-opacity: 0.8; stroke: #999; }
|
|
53
|
+
.node.faded { opacity: 0.1; }
|
|
54
|
+
.link.faded { opacity: 0.05; }
|
|
55
|
+
|
|
56
|
+
/* Details Panel Content */
|
|
57
|
+
.detail-section { border-bottom: 1px solid #333; padding-bottom: 10px; }
|
|
58
|
+
.detail-section:last-child { border-bottom: none; }
|
|
59
|
+
.detail-label { font-size: 0.75rem; color: #888; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 4px; }
|
|
60
|
+
.detail-value { font-size: 0.95rem; word-break: break-all; }
|
|
61
|
+
.detail-list { list-style: none; padding: 0; margin: 0; max-height: 150px; overflow-y: auto; font-size: 0.85rem; }
|
|
62
|
+
.detail-list li { padding: 4px 0; border-bottom: 1px solid #2a2a2a; }
|
|
63
|
+
.detail-list a { color: var(--accent-color); text-decoration: none; }
|
|
64
|
+
.detail-list a:hover { text-decoration: underline; }
|
|
65
|
+
|
|
66
|
+
.status-badge { display: inline-block; padding: 2px 6px; border-radius: 3px; font-size: 0.75rem; font-weight: bold; margin-top: 5px; }
|
|
67
|
+
.status-ok { background: #2e7d32; color: white; }
|
|
68
|
+
.status-warn { background: #f9a825; color: black; }
|
|
69
|
+
.status-error { background: #c62828; color: white; }
|
|
70
|
+
|
|
71
|
+
/* Tooltip */
|
|
72
|
+
#tooltip { position: absolute; background: rgba(20,20,20,0.95); color: white; padding: 10px; border-radius: 6px; pointer-events: none; font-size: 12px; z-index: 100; box-shadow: 0 4px 15px rgba(0,0,0,0.5); border: 1px solid #444; display: none; transform: translate(-50%, -100%); margin-top: -10px; white-space: nowrap; }
|
|
73
|
+
|
|
74
|
+
/* Responsive Sidebar */
|
|
75
|
+
@media (max-width: 768px) {
|
|
76
|
+
#details-panel { position: absolute; right: 0; top: 0; bottom: 0; z-index: 20; box-shadow: -5px 0 15px rgba(0,0,0,0.5); transform: translateX(100%); transition: transform 0.3s ease; }
|
|
77
|
+
#details-panel.visible { transform: translateX(0); }
|
|
78
|
+
#metrics-summary { display: none; }
|
|
79
|
+
}
|
|
80
|
+
</style>
|
|
81
|
+
</head>
|
|
82
|
+
<body>
|
|
83
|
+
<header>
|
|
84
|
+
<div class="brand"><span>Crawlith</span> SiteGraph</div>
|
|
85
|
+
|
|
86
|
+
<div id="metrics-summary">
|
|
87
|
+
<div class="metric"><span class="metric-value" id="m-pages">-</span><span class="metric-label">Pages</span></div>
|
|
88
|
+
<div class="metric"><span class="metric-value" id="m-depth">-</span><span class="metric-label">Max Depth</span></div>
|
|
89
|
+
<div class="metric"><span class="metric-value" id="m-eff">-</span><span class="metric-label">Efficiency</span></div>
|
|
90
|
+
<div class="metric"><span class="metric-value" id="m-orphan">-</span><span class="metric-label">Orphans</span></div>
|
|
91
|
+
</div>
|
|
92
|
+
|
|
93
|
+
<div id="controls">
|
|
94
|
+
<div class="btn-group" style="margin-right: 15px;">
|
|
95
|
+
<button id="btn-auth-pagerank" class="active" title="PageRank Authority">PageRank</button>
|
|
96
|
+
<button id="btn-auth-structural" title="Structural Authority (In-Degree)">In-Degree</button>
|
|
97
|
+
</div>
|
|
98
|
+
<div class="btn-group">
|
|
99
|
+
<button id="btn-hierarchical" class="active">Hierarchical</button>
|
|
100
|
+
<button id="btn-radial">Radial</button>
|
|
101
|
+
</div>
|
|
102
|
+
</div>
|
|
103
|
+
</header>
|
|
104
|
+
|
|
105
|
+
<main>
|
|
106
|
+
<div id="graph-container">
|
|
107
|
+
<div id="search-container">
|
|
108
|
+
<input type="text" id="search-input" placeholder="Search URL...">
|
|
109
|
+
</div>
|
|
110
|
+
<svg id="graph"></svg>
|
|
111
|
+
<div id="tooltip"></div>
|
|
112
|
+
</div>
|
|
113
|
+
|
|
114
|
+
<aside id="details-panel">
|
|
115
|
+
<div class="detail-section">
|
|
116
|
+
<div class="detail-label">URL</div>
|
|
117
|
+
<div class="detail-value" id="d-url">-</div>
|
|
118
|
+
<div id="d-status"></div>
|
|
119
|
+
</div>
|
|
120
|
+
<div class="detail-section" style="display: flex; gap: 20px;">
|
|
121
|
+
<div>
|
|
122
|
+
<div class="detail-label">Depth</div>
|
|
123
|
+
<div class="detail-value" id="d-depth">-</div>
|
|
124
|
+
</div>
|
|
125
|
+
<div>
|
|
126
|
+
<div class="detail-label">Authority</div>
|
|
127
|
+
<div class="detail-value" id="d-auth-container">-</div>
|
|
128
|
+
</div>
|
|
129
|
+
</div>
|
|
130
|
+
<div class="detail-section">
|
|
131
|
+
<div class="detail-label">In-links (<span id="d-in-count">0</span>)</div>
|
|
132
|
+
<!-- List could be populated here if we had the reverse index, for now just count -->
|
|
133
|
+
</div>
|
|
134
|
+
<div class="detail-section">
|
|
135
|
+
<div class="detail-label">Out-links (<span id="d-out-count">0</span>)</div>
|
|
136
|
+
<ul class="detail-list" id="d-out-list"></ul>
|
|
137
|
+
</div>
|
|
138
|
+
</aside>
|
|
139
|
+
</main>
|
|
140
|
+
|
|
141
|
+
<footer>
|
|
142
|
+
Generated by Crawlith Crawler
|
|
143
|
+
</footer>
|
|
144
|
+
|
|
145
|
+
<!-- D3 from CDN -->
|
|
146
|
+
<script src="https://d3js.org/d3.v7.min.js"></script>
|
|
147
|
+
|
|
148
|
+
<script>
|
|
149
|
+
// --- State ---
|
|
150
|
+
const state = {
|
|
151
|
+
nodes: [],
|
|
152
|
+
links: [],
|
|
153
|
+
metrics: {},
|
|
154
|
+
adjacency: new Map(), // url -> { in: [], out: [] }
|
|
155
|
+
simulation: null,
|
|
156
|
+
width: 0,
|
|
157
|
+
height: 0,
|
|
158
|
+
transform: d3.zoomIdentity,
|
|
159
|
+
activeNode: null,
|
|
160
|
+
mode: 'hierarchical', // 'hierarchical' | 'radial'
|
|
161
|
+
maxDepth: 0,
|
|
162
|
+
maxInLinks: 0,
|
|
163
|
+
nodeSelection: null,
|
|
164
|
+
linkSelection: null,
|
|
165
|
+
zoom: null
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
// --- DOM Elements ---
|
|
169
|
+
const svg = d3.select("#graph");
|
|
170
|
+
const container = svg.append("g");
|
|
171
|
+
const linkGroup = container.append("g").attr("class", "links");
|
|
172
|
+
const nodeGroup = container.append("g").attr("class", "nodes");
|
|
173
|
+
const tooltip = d3.select("#tooltip");
|
|
174
|
+
const detailsPanel = d3.select("#details-panel");
|
|
175
|
+
|
|
176
|
+
// --- Initialization ---
|
|
177
|
+
// --- Initialization ---
|
|
178
|
+
async function init() {
|
|
179
|
+
try {
|
|
180
|
+
let graphData, metricsData;
|
|
181
|
+
|
|
182
|
+
// 1. Try to use injected data (for file:// usage)
|
|
183
|
+
// @ts-ignore
|
|
184
|
+
if (window.GRAPH_DATA) graphData = window.GRAPH_DATA;
|
|
185
|
+
// @ts-ignore
|
|
186
|
+
if (window.METRICS_DATA) metricsData = window.METRICS_DATA;
|
|
187
|
+
|
|
188
|
+
// 2. Fallback to fetching JSON files (for web server usage)
|
|
189
|
+
if (!graphData || !metricsData) {
|
|
190
|
+
try {
|
|
191
|
+
const [graphRes, metricsRes] = await Promise.all([
|
|
192
|
+
fetch('graph.json'),
|
|
193
|
+
fetch('metrics.json')
|
|
194
|
+
]);
|
|
195
|
+
if (graphRes.ok && metricsRes.ok) {
|
|
196
|
+
graphData = await graphRes.json();
|
|
197
|
+
metricsData = await metricsRes.json();
|
|
198
|
+
}
|
|
199
|
+
} catch (e) {
|
|
200
|
+
console.warn("Fetch failed, possibly due to CORS or missing files.", e);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (!graphData || !metricsData) {
|
|
205
|
+
throw new Error("No data available. Ensure graph.json exists or data is injected.");
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
state.metrics = metricsData;
|
|
209
|
+
processData(graphData);
|
|
210
|
+
updateMetricsUI();
|
|
211
|
+
|
|
212
|
+
// Setup UI
|
|
213
|
+
setupResize();
|
|
214
|
+
setupInteractions();
|
|
215
|
+
setupSearch();
|
|
216
|
+
|
|
217
|
+
// Start Simulation
|
|
218
|
+
initSimulation();
|
|
219
|
+
|
|
220
|
+
} catch (err) {
|
|
221
|
+
console.error(err);
|
|
222
|
+
alert("Error loading visualization data: " + err.message);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
function processData(data) {
|
|
227
|
+
// Create a map for fast lookup
|
|
228
|
+
const nodeMap = new Map();
|
|
229
|
+
|
|
230
|
+
data.nodes.forEach(n => {
|
|
231
|
+
n.inLinks = n.inLinks || 0;
|
|
232
|
+
n.outLinks = n.outLinks || 0;
|
|
233
|
+
nodeMap.set(n.url, n);
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
// Filter valid links
|
|
237
|
+
state.links = data.edges
|
|
238
|
+
.map(e => ({ source: nodeMap.get(e.source), target: nodeMap.get(e.target) }))
|
|
239
|
+
.filter(e => e.source && e.target);
|
|
240
|
+
|
|
241
|
+
state.nodes = data.nodes;
|
|
242
|
+
|
|
243
|
+
// Calculate Stats
|
|
244
|
+
state.maxDepth = d3.max(state.nodes, d => d.depth) || 1;
|
|
245
|
+
state.maxInLinks = d3.max(state.nodes, d => d.inLinks) || 1;
|
|
246
|
+
|
|
247
|
+
// Calculate Authority & Enrich Nodes
|
|
248
|
+
state.nodes.forEach(n => {
|
|
249
|
+
// Structural Authority: log-scaled normalized 0-1 based on in-links
|
|
250
|
+
n.structuralAuthority = Math.log(1 + n.inLinks) / Math.log(1 + state.maxInLinks);
|
|
251
|
+
|
|
252
|
+
// PageRank Authority: normalized 0-1 from pageRankScore (0-100)
|
|
253
|
+
if (typeof n.pageRankScore === 'number') {
|
|
254
|
+
n.pageRankAuthority = n.pageRankScore / 100;
|
|
255
|
+
} else {
|
|
256
|
+
n.pageRankAuthority = n.structuralAuthority;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Default authority to PageRank if available, else structural
|
|
260
|
+
n.authority = n.pageRankAuthority;
|
|
261
|
+
|
|
262
|
+
// Ensure x,y are initialized to avoid NaNs if D3 doesn't do it fast enough
|
|
263
|
+
n.x = 0; n.y = 0;
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
// Build Adjacency Map
|
|
267
|
+
state.nodes.forEach(n => state.adjacency.set(n.url, { in: [], out: [] }));
|
|
268
|
+
state.links.forEach(l => {
|
|
269
|
+
state.adjacency.get(l.source.url).out.push(l.target);
|
|
270
|
+
state.adjacency.get(l.target.url).in.push(l.source);
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
function updateMetricsUI() {
|
|
275
|
+
document.getElementById('m-pages').textContent = state.metrics.totalPages;
|
|
276
|
+
document.getElementById('m-depth').textContent = state.metrics.maxDepthFound;
|
|
277
|
+
document.getElementById('m-eff').textContent = (state.metrics.crawlEfficiencyScore * 100).toFixed(1) + '%';
|
|
278
|
+
document.getElementById('m-orphan').textContent = state.metrics.orphanPages.length;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// --- Simulation ---
|
|
282
|
+
function initSimulation() {
|
|
283
|
+
const { width, height } = getDimensions();
|
|
284
|
+
state.width = width;
|
|
285
|
+
state.height = height;
|
|
286
|
+
|
|
287
|
+
// Safeguards
|
|
288
|
+
const nodeCount = state.nodes.length;
|
|
289
|
+
const enableCollision = nodeCount <= 1200;
|
|
290
|
+
const alphaDecay = nodeCount > 1000 ? 0.05 : 0.02; // Faster decay for large graphs
|
|
291
|
+
|
|
292
|
+
state.simulation = d3.forceSimulation(state.nodes)
|
|
293
|
+
.alphaDecay(alphaDecay)
|
|
294
|
+
.force("link", d3.forceLink(state.links).id(d => d.url).strength(0.5)) // Reduced strength for flexibility
|
|
295
|
+
.force("charge", d3.forceManyBody().strength(nodeCount > 1000 ? -100 : -300))
|
|
296
|
+
.force("center", d3.forceCenter(width / 2, height / 2));
|
|
297
|
+
|
|
298
|
+
if (enableCollision) {
|
|
299
|
+
state.simulation.force("collide", d3.forceCollide().radius(d => getNodeRadius(d) + 2).iterations(1));
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Apply Layout Mode
|
|
303
|
+
applyLayoutMode(state.mode);
|
|
304
|
+
|
|
305
|
+
// Rendering loop
|
|
306
|
+
state.simulation.on("tick", ticked);
|
|
307
|
+
|
|
308
|
+
// Render initial SVG elements
|
|
309
|
+
render();
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
function applyLayoutMode(mode) {
|
|
313
|
+
state.mode = mode;
|
|
314
|
+
const { width, height } = state;
|
|
315
|
+
const centerY = height / 2;
|
|
316
|
+
const centerX = width / 2;
|
|
317
|
+
|
|
318
|
+
// Remove conflicting forces
|
|
319
|
+
state.simulation.force("y", null);
|
|
320
|
+
state.simulation.force("radial", null);
|
|
321
|
+
|
|
322
|
+
if (mode === 'hierarchical') {
|
|
323
|
+
const depthSpacing = height / (state.maxDepth + 2);
|
|
324
|
+
// Hierarchical: Nodes pushed to Y levels based on depth
|
|
325
|
+
state.simulation.force("y", d3.forceY(d => {
|
|
326
|
+
return (d.depth * depthSpacing) - (height/2) + 50; // Offset to start from top
|
|
327
|
+
}).strength(1));
|
|
328
|
+
// We rely on "center" force to keep X centered, but maybe add weak forceX?
|
|
329
|
+
// Let's add weak forceX to prevent wide spread
|
|
330
|
+
state.simulation.force("x", d3.forceX(0).strength(0.05));
|
|
331
|
+
state.simulation.force("center", d3.forceCenter(width/2, height/2)); // Recenter
|
|
332
|
+
|
|
333
|
+
} else if (mode === 'radial') {
|
|
334
|
+
const maxRadius = Math.min(width, height) / 2 - 50;
|
|
335
|
+
const ringSpacing = maxRadius / (state.maxDepth + 1);
|
|
336
|
+
|
|
337
|
+
state.simulation.force("radial", d3.forceRadial(
|
|
338
|
+
d => d.depth * ringSpacing,
|
|
339
|
+
width / 2,
|
|
340
|
+
height / 2
|
|
341
|
+
).strength(0.8));
|
|
342
|
+
|
|
343
|
+
state.simulation.force("x", null); // Remove X constraint
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
state.simulation.alpha(1).restart();
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
function getNodeRadius(d) {
|
|
350
|
+
// 5 + authority * 15
|
|
351
|
+
return 5 + (d.authority * 15);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
function getNodeColor(d) {
|
|
355
|
+
// Depth-based sequential color (Blue -> Purple -> Pink)
|
|
356
|
+
const t = d.depth / (state.maxDepth || 1);
|
|
357
|
+
return d3.interpolateViridis(1 - t); // Invert Viridis for better contrast on dark
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
function render() {
|
|
361
|
+
// Links
|
|
362
|
+
state.linkSelection = linkGroup.selectAll("line")
|
|
363
|
+
.data(state.links)
|
|
364
|
+
.join("line")
|
|
365
|
+
.attr("class", "link")
|
|
366
|
+
.attr("stroke-width", 0.5);
|
|
367
|
+
|
|
368
|
+
// Nodes
|
|
369
|
+
state.nodeSelection = nodeGroup.selectAll("circle")
|
|
370
|
+
.data(state.nodes)
|
|
371
|
+
.join("circle")
|
|
372
|
+
.attr("class", "node")
|
|
373
|
+
.attr("r", d => getNodeRadius(d))
|
|
374
|
+
.attr("fill", d => getNodeColor(d))
|
|
375
|
+
.attr("stroke", d => d.status >= 400 ? "#ff4444" : null) // Red stroke for errors
|
|
376
|
+
.on("mouseover", (event, d) => {
|
|
377
|
+
if (state.activeNode) return;
|
|
378
|
+
highlightNode(d);
|
|
379
|
+
showTooltip(event, d);
|
|
380
|
+
})
|
|
381
|
+
.on("mouseout", () => {
|
|
382
|
+
if (state.activeNode) return;
|
|
383
|
+
resetHighlight();
|
|
384
|
+
hideTooltip();
|
|
385
|
+
})
|
|
386
|
+
.on("click", (event, d) => {
|
|
387
|
+
event.stopPropagation();
|
|
388
|
+
selectNode(d);
|
|
389
|
+
})
|
|
390
|
+
.call(d3.drag()
|
|
391
|
+
.on("start", dragstarted)
|
|
392
|
+
.on("drag", dragged)
|
|
393
|
+
.on("end", dragended));
|
|
394
|
+
|
|
395
|
+
// Zoom
|
|
396
|
+
state.zoom = d3.zoom()
|
|
397
|
+
.scaleExtent([0.1, 4])
|
|
398
|
+
.on("zoom", (event) => {
|
|
399
|
+
state.transform = event.transform;
|
|
400
|
+
container.attr("transform", event.transform);
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
svg.call(state.zoom)
|
|
404
|
+
.call(state.zoom.transform, d3.zoomIdentity.translate(state.width/2, state.height/2).scale(0.8).translate(-state.width/2, -state.height/2)); // Initial zoom out
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
function ticked() {
|
|
408
|
+
if (state.linkSelection) {
|
|
409
|
+
state.linkSelection
|
|
410
|
+
.attr("x1", d => d.source.x)
|
|
411
|
+
.attr("y1", d => d.source.y)
|
|
412
|
+
.attr("x2", d => d.target.x)
|
|
413
|
+
.attr("y2", d => d.target.y);
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
if (state.nodeSelection) {
|
|
417
|
+
state.nodeSelection
|
|
418
|
+
.attr("cx", d => d.x)
|
|
419
|
+
.attr("cy", d => d.y);
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// --- Interactions ---
|
|
424
|
+
|
|
425
|
+
function setupInteractions() {
|
|
426
|
+
// Background click to clear selection
|
|
427
|
+
svg.on("click", () => {
|
|
428
|
+
state.activeNode = null;
|
|
429
|
+
resetHighlight();
|
|
430
|
+
detailsPanel.classed("visible", false);
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
// Layout Toggle
|
|
434
|
+
d3.select("#btn-hierarchical").on("click", function() {
|
|
435
|
+
setMode('hierarchical', this);
|
|
436
|
+
});
|
|
437
|
+
d3.select("#btn-radial").on("click", function() {
|
|
438
|
+
setMode('radial', this);
|
|
439
|
+
});
|
|
440
|
+
|
|
441
|
+
// Authority Toggle
|
|
442
|
+
d3.select("#btn-auth-pagerank").on("click", function() {
|
|
443
|
+
setAuthorityMode('pagerank', this);
|
|
444
|
+
});
|
|
445
|
+
d3.select("#btn-auth-structural").on("click", function() {
|
|
446
|
+
setAuthorityMode('structural', this);
|
|
447
|
+
});
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
function setAuthorityMode(mode, btn) {
|
|
451
|
+
d3.select("#btn-auth-pagerank").classed("active", false);
|
|
452
|
+
d3.select("#btn-auth-structural").classed("active", false);
|
|
453
|
+
d3.select(btn).classed("active", true);
|
|
454
|
+
|
|
455
|
+
state.nodes.forEach(n => {
|
|
456
|
+
n.authority = mode === 'pagerank' ? n.pageRankAuthority : n.structuralAuthority;
|
|
457
|
+
});
|
|
458
|
+
|
|
459
|
+
// Update Visuals
|
|
460
|
+
nodeGroup.selectAll("circle")
|
|
461
|
+
.transition().duration(500)
|
|
462
|
+
.attr("r", d => getNodeRadius(d));
|
|
463
|
+
|
|
464
|
+
// Update collision force if enabled
|
|
465
|
+
if (state.simulation.force("collide")) {
|
|
466
|
+
state.simulation.force("collide", d3.forceCollide().radius(d => getNodeRadius(d) + 2).iterations(1));
|
|
467
|
+
state.simulation.alpha(0.3).restart();
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
function setMode(mode, btn) {
|
|
472
|
+
d3.selectAll("#controls button").classed("active", false);
|
|
473
|
+
d3.select(btn).classed("active", true);
|
|
474
|
+
applyLayoutMode(mode);
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
function highlightNode(d) {
|
|
478
|
+
const neighbors = new Set();
|
|
479
|
+
const adj = state.adjacency.get(d.url);
|
|
480
|
+
if (adj) {
|
|
481
|
+
adj.in.forEach(n => neighbors.add(n.url));
|
|
482
|
+
adj.out.forEach(n => neighbors.add(n.url));
|
|
483
|
+
}
|
|
484
|
+
neighbors.add(d.url);
|
|
485
|
+
|
|
486
|
+
nodeGroup.selectAll("circle").classed("faded", n => !neighbors.has(n.url));
|
|
487
|
+
nodeGroup.selectAll("circle").classed("highlight", n => n.url === d.url);
|
|
488
|
+
|
|
489
|
+
linkGroup.selectAll("line").classed("faded", l =>
|
|
490
|
+
l.source.url !== d.url && l.target.url !== d.url
|
|
491
|
+
);
|
|
492
|
+
linkGroup.selectAll("line").classed("highlight", l =>
|
|
493
|
+
l.source.url === d.url || l.target.url === d.url
|
|
494
|
+
);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
function resetHighlight() {
|
|
498
|
+
nodeGroup.selectAll("circle").classed("faded", false).classed("highlight", false);
|
|
499
|
+
linkGroup.selectAll("line").classed("faded", false).classed("highlight", false);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
function selectNode(d) {
|
|
503
|
+
state.activeNode = d;
|
|
504
|
+
highlightNode(d);
|
|
505
|
+
showDetails(d);
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
function showTooltip(event, d) {
|
|
509
|
+
// If we are transforming the container, we need to map coordinates correctly or just use pageX/Y
|
|
510
|
+
tooltip.style("display", "block")
|
|
511
|
+
.html(\`<strong>\${new URL(d.url).pathname}</strong><br>Auth: \${(d.authority * 10).toFixed(1)}\`)
|
|
512
|
+
.style("left", (event.pageX) + "px")
|
|
513
|
+
.style("top", (event.pageY - 10) + "px");
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
function hideTooltip() {
|
|
517
|
+
tooltip.style("display", "none");
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
function showDetails(d) {
|
|
521
|
+
detailsPanel.classed("visible", true);
|
|
522
|
+
d3.select("#d-url").text(d.url);
|
|
523
|
+
d3.select("#d-depth").text(d.depth);
|
|
524
|
+
|
|
525
|
+
const authContainer = d3.select("#d-auth-container");
|
|
526
|
+
authContainer.html("");
|
|
527
|
+
const prVal = (d.pageRankAuthority * 100).toFixed(1);
|
|
528
|
+
const structVal = d.structuralAuthority.toFixed(3);
|
|
529
|
+
authContainer.append("div").html(\`PR: <strong>\${prVal}</strong>\`);
|
|
530
|
+
authContainer.append("div").style("color", "#888").style("font-size", "0.8em").text(\`In-Degree: \${structVal}\`);
|
|
531
|
+
|
|
532
|
+
d3.select("#d-in-count").text(d.inLinks);
|
|
533
|
+
d3.select("#d-out-count").text(d.outLinks);
|
|
534
|
+
|
|
535
|
+
// Status badge
|
|
536
|
+
const statusDiv = d3.select("#d-status");
|
|
537
|
+
statusDiv.html("");
|
|
538
|
+
let sClass = "status-ok";
|
|
539
|
+
if (d.status >= 400) sClass = "status-error";
|
|
540
|
+
else if (d.status >= 300) sClass = "status-warn";
|
|
541
|
+
statusDiv.append("span").attr("class", "status-badge " + sClass).text(d.status);
|
|
542
|
+
|
|
543
|
+
// Outlinks list (limit to 20)
|
|
544
|
+
const list = d3.select("#d-out-list");
|
|
545
|
+
list.html("");
|
|
546
|
+
const adj = state.adjacency.get(d.url);
|
|
547
|
+
if (adj && adj.out.length > 0) {
|
|
548
|
+
adj.out.slice(0, 50).forEach(target => {
|
|
549
|
+
list.append("li").append("a")
|
|
550
|
+
.attr("href", target.url)
|
|
551
|
+
.attr("target", "_blank")
|
|
552
|
+
.text(new URL(target.url).pathname);
|
|
553
|
+
});
|
|
554
|
+
if (adj.out.length > 50) {
|
|
555
|
+
list.append("li").text(\`...and \${adj.out.length - 50} more\`);
|
|
556
|
+
}
|
|
557
|
+
} else {
|
|
558
|
+
list.append("li").text("No outgoing links");
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
// --- Search ---
|
|
563
|
+
function setupSearch() {
|
|
564
|
+
const input = document.getElementById('search-input');
|
|
565
|
+
input.addEventListener('keydown', (e) => {
|
|
566
|
+
if (e.key === 'Enter') {
|
|
567
|
+
const val = input.value.trim().toLowerCase();
|
|
568
|
+
if (!val) return;
|
|
569
|
+
|
|
570
|
+
const found = state.nodes.find(n => n.url.toLowerCase().includes(val));
|
|
571
|
+
if (found) {
|
|
572
|
+
selectNode(found);
|
|
573
|
+
// Center view on node
|
|
574
|
+
const transform = d3.zoomIdentity
|
|
575
|
+
.translate(state.width/2, state.height/2)
|
|
576
|
+
.scale(2)
|
|
577
|
+
.translate(-found.x, -found.y);
|
|
578
|
+
|
|
579
|
+
svg.transition().duration(750).call(state.zoom.transform, transform);
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
});
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
function setupResize() {
|
|
586
|
+
window.addEventListener("resize", () => {
|
|
587
|
+
const { width, height } = getDimensions();
|
|
588
|
+
state.width = width;
|
|
589
|
+
state.height = height;
|
|
590
|
+
state.simulation.force("center", d3.forceCenter(width / 2, height / 2));
|
|
591
|
+
if (state.mode === 'hierarchical') {
|
|
592
|
+
// Re-evaluate Y force if needed, but usually center is enough
|
|
593
|
+
}
|
|
594
|
+
state.simulation.alpha(0.3).restart();
|
|
595
|
+
});
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
function getDimensions() {
|
|
599
|
+
const rect = document.getElementById("graph-container").getBoundingClientRect();
|
|
600
|
+
return { width: rect.width, height: rect.height };
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
// --- Dragging ---
|
|
604
|
+
function dragstarted(event, d) {
|
|
605
|
+
if (!event.active) state.simulation.alphaTarget(0.3).restart();
|
|
606
|
+
d.fx = d.x;
|
|
607
|
+
d.fy = d.y;
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
function dragged(event, d) {
|
|
611
|
+
d.fx = event.x;
|
|
612
|
+
d.fy = event.y;
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
function dragended(event, d) {
|
|
616
|
+
if (!event.active) state.simulation.alphaTarget(0);
|
|
617
|
+
d.fx = null;
|
|
618
|
+
d.fy = null;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// Start
|
|
622
|
+
if (document.readyState === 'loading') {
|
|
623
|
+
document.addEventListener('DOMContentLoaded', init);
|
|
624
|
+
} else {
|
|
625
|
+
init();
|
|
626
|
+
}
|
|
627
|
+
</script>
|
|
628
|
+
</body>
|
|
629
|
+
</html>
|
|
630
|
+
`;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { Graph } from '../graph/graph.js';
|
|
2
|
+
export interface HITSOptions {
|
|
3
|
+
iterations?: number;
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* Computes Hub and Authority scores using the HITS algorithm.
|
|
7
|
+
* Operates purely on the internal link graph.
|
|
8
|
+
*/
|
|
9
|
+
export declare function computeHITS(graph: Graph, options?: HITSOptions): void;
|