@crawlith/core 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +70 -0
- package/dist/analysis/analysis_list.html +35 -0
- package/dist/analysis/analysis_page.html +123 -0
- package/dist/analysis/analyze.d.ts +40 -5
- package/dist/analysis/analyze.js +395 -347
- package/dist/analysis/clustering.d.ts +23 -0
- package/dist/analysis/clustering.js +206 -0
- package/dist/analysis/content.d.ts +1 -1
- package/dist/analysis/content.js +11 -5
- package/dist/analysis/duplicate.d.ts +34 -0
- package/dist/analysis/duplicate.js +305 -0
- package/dist/analysis/heading.d.ts +116 -0
- package/dist/analysis/heading.js +356 -0
- package/dist/analysis/images.d.ts +1 -1
- package/dist/analysis/images.js +6 -5
- package/dist/analysis/links.d.ts +1 -1
- package/dist/analysis/links.js +8 -8
- package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
- package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
- package/dist/analysis/scoring.js +11 -2
- package/dist/analysis/seo.d.ts +8 -4
- package/dist/analysis/seo.js +41 -30
- package/dist/analysis/soft404.d.ts +17 -0
- package/dist/analysis/soft404.js +62 -0
- package/dist/analysis/structuredData.d.ts +1 -1
- package/dist/analysis/structuredData.js +5 -4
- package/dist/analysis/templates.d.ts +2 -0
- package/dist/analysis/templates.js +7 -0
- package/dist/application/index.d.ts +2 -0
- package/dist/application/index.js +2 -0
- package/dist/application/usecase.d.ts +3 -0
- package/dist/application/usecase.js +1 -0
- package/dist/application/usecases.d.ts +114 -0
- package/dist/application/usecases.js +201 -0
- package/dist/audit/index.js +1 -1
- package/dist/audit/transport.d.ts +1 -1
- package/dist/audit/transport.js +5 -4
- package/dist/audit/types.d.ts +1 -0
- package/dist/constants.d.ts +17 -0
- package/dist/constants.js +23 -0
- package/dist/core/scope/scopeManager.js +3 -0
- package/dist/core/security/ipGuard.d.ts +11 -0
- package/dist/core/security/ipGuard.js +71 -3
- package/dist/crawler/crawl.d.ts +4 -22
- package/dist/crawler/crawl.js +4 -335
- package/dist/crawler/crawler.d.ts +87 -0
- package/dist/crawler/crawler.js +683 -0
- package/dist/crawler/extract.d.ts +4 -1
- package/dist/crawler/extract.js +7 -2
- package/dist/crawler/fetcher.d.ts +2 -1
- package/dist/crawler/fetcher.js +26 -11
- package/dist/crawler/metricsRunner.d.ts +23 -1
- package/dist/crawler/metricsRunner.js +202 -72
- package/dist/crawler/normalize.d.ts +41 -0
- package/dist/crawler/normalize.js +119 -3
- package/dist/crawler/parser.d.ts +1 -3
- package/dist/crawler/parser.js +2 -49
- package/dist/crawler/resolver.d.ts +11 -0
- package/dist/crawler/resolver.js +67 -0
- package/dist/crawler/sitemap.d.ts +6 -0
- package/dist/crawler/sitemap.js +27 -17
- package/dist/crawler/trap.d.ts +5 -1
- package/dist/crawler/trap.js +23 -2
- package/dist/db/CrawlithDB.d.ts +110 -0
- package/dist/db/CrawlithDB.js +500 -0
- package/dist/db/graphLoader.js +42 -30
- package/dist/db/index.d.ts +11 -0
- package/dist/db/index.js +41 -29
- package/dist/db/migrations.d.ts +2 -0
- package/dist/db/{schema.js → migrations.js} +90 -43
- package/dist/db/pluginRegistry.d.ts +9 -0
- package/dist/db/pluginRegistry.js +19 -0
- package/dist/db/repositories/EdgeRepository.d.ts +13 -0
- package/dist/db/repositories/EdgeRepository.js +20 -0
- package/dist/db/repositories/MetricsRepository.d.ts +16 -8
- package/dist/db/repositories/MetricsRepository.js +28 -7
- package/dist/db/repositories/PageRepository.d.ts +15 -2
- package/dist/db/repositories/PageRepository.js +169 -25
- package/dist/db/repositories/SiteRepository.d.ts +9 -0
- package/dist/db/repositories/SiteRepository.js +13 -0
- package/dist/db/repositories/SnapshotRepository.d.ts +14 -5
- package/dist/db/repositories/SnapshotRepository.js +64 -5
- package/dist/db/reset.d.ts +9 -0
- package/dist/db/reset.js +32 -0
- package/dist/db/statements.d.ts +12 -0
- package/dist/db/statements.js +40 -0
- package/dist/diff/compare.d.ts +0 -5
- package/dist/diff/compare.js +0 -12
- package/dist/diff/service.d.ts +16 -0
- package/dist/diff/service.js +41 -0
- package/dist/domain/index.d.ts +4 -0
- package/dist/domain/index.js +4 -0
- package/dist/events.d.ts +56 -0
- package/dist/events.js +1 -0
- package/dist/graph/graph.d.ts +36 -42
- package/dist/graph/graph.js +26 -17
- package/dist/graph/hits.d.ts +23 -0
- package/dist/graph/hits.js +111 -0
- package/dist/graph/metrics.d.ts +0 -4
- package/dist/graph/metrics.js +25 -9
- package/dist/graph/pagerank.d.ts +17 -4
- package/dist/graph/pagerank.js +126 -91
- package/dist/graph/simhash.d.ts +6 -0
- package/dist/graph/simhash.js +14 -0
- package/dist/index.d.ts +29 -8
- package/dist/index.js +29 -8
- package/dist/lock/hashKey.js +1 -1
- package/dist/lock/lockManager.d.ts +5 -1
- package/dist/lock/lockManager.js +38 -13
- package/dist/plugin-system/plugin-cli.d.ts +10 -0
- package/dist/plugin-system/plugin-cli.js +31 -0
- package/dist/plugin-system/plugin-config.d.ts +16 -0
- package/dist/plugin-system/plugin-config.js +36 -0
- package/dist/plugin-system/plugin-loader.d.ts +17 -0
- package/dist/plugin-system/plugin-loader.js +122 -0
- package/dist/plugin-system/plugin-registry.d.ts +25 -0
- package/dist/plugin-system/plugin-registry.js +167 -0
- package/dist/plugin-system/plugin-types.d.ts +205 -0
- package/dist/plugin-system/plugin-types.js +1 -0
- package/dist/ports/index.d.ts +9 -0
- package/dist/ports/index.js +1 -0
- package/{src/report/sitegraph_template.ts → dist/report/crawl.html} +330 -81
- package/dist/report/crawlExport.d.ts +3 -0
- package/dist/report/{sitegraphExport.js → crawlExport.js} +3 -3
- package/dist/report/crawl_template.d.ts +1 -0
- package/dist/report/crawl_template.js +7 -0
- package/dist/report/export.d.ts +3 -0
- package/dist/report/export.js +81 -0
- package/dist/report/html.js +15 -216
- package/dist/report/insight.d.ts +27 -0
- package/dist/report/insight.js +103 -0
- package/dist/scoring/health.d.ts +56 -0
- package/dist/scoring/health.js +213 -0
- package/dist/utils/chalk.d.ts +6 -0
- package/dist/utils/chalk.js +41 -0
- package/dist/utils/secureConfig.d.ts +23 -0
- package/dist/utils/secureConfig.js +128 -0
- package/package.json +12 -6
- package/CHANGELOG.md +0 -7
- package/dist/db/schema.d.ts +0 -2
- package/dist/graph/cluster.d.ts +0 -6
- package/dist/graph/cluster.js +0 -173
- package/dist/graph/duplicate.d.ts +0 -10
- package/dist/graph/duplicate.js +0 -251
- package/dist/report/sitegraphExport.d.ts +0 -3
- package/dist/report/sitegraph_template.d.ts +0 -1
- package/dist/report/sitegraph_template.js +0 -630
- package/dist/scoring/hits.d.ts +0 -9
- package/dist/scoring/hits.js +0 -111
- package/src/analysis/analyze.ts +0 -548
- package/src/analysis/content.ts +0 -62
- package/src/analysis/images.ts +0 -28
- package/src/analysis/links.ts +0 -41
- package/src/analysis/scoring.ts +0 -59
- package/src/analysis/seo.ts +0 -82
- package/src/analysis/structuredData.ts +0 -62
- package/src/audit/dns.ts +0 -49
- package/src/audit/headers.ts +0 -98
- package/src/audit/index.ts +0 -66
- package/src/audit/scoring.ts +0 -232
- package/src/audit/transport.ts +0 -258
- package/src/audit/types.ts +0 -102
- package/src/core/network/proxyAdapter.ts +0 -21
- package/src/core/network/rateLimiter.ts +0 -39
- package/src/core/network/redirectController.ts +0 -47
- package/src/core/network/responseLimiter.ts +0 -34
- package/src/core/network/retryPolicy.ts +0 -57
- package/src/core/scope/domainFilter.ts +0 -45
- package/src/core/scope/scopeManager.ts +0 -52
- package/src/core/scope/subdomainPolicy.ts +0 -39
- package/src/core/security/ipGuard.ts +0 -92
- package/src/crawler/crawl.ts +0 -382
- package/src/crawler/extract.ts +0 -34
- package/src/crawler/fetcher.ts +0 -233
- package/src/crawler/metricsRunner.ts +0 -124
- package/src/crawler/normalize.ts +0 -108
- package/src/crawler/parser.ts +0 -190
- package/src/crawler/sitemap.ts +0 -73
- package/src/crawler/trap.ts +0 -96
- package/src/db/graphLoader.ts +0 -105
- package/src/db/index.ts +0 -70
- package/src/db/repositories/EdgeRepository.ts +0 -29
- package/src/db/repositories/MetricsRepository.ts +0 -49
- package/src/db/repositories/PageRepository.ts +0 -128
- package/src/db/repositories/SiteRepository.ts +0 -32
- package/src/db/repositories/SnapshotRepository.ts +0 -74
- package/src/db/schema.ts +0 -177
- package/src/diff/compare.ts +0 -84
- package/src/graph/cluster.ts +0 -192
- package/src/graph/duplicate.ts +0 -286
- package/src/graph/graph.ts +0 -172
- package/src/graph/metrics.ts +0 -110
- package/src/graph/pagerank.ts +0 -125
- package/src/graph/simhash.ts +0 -61
- package/src/index.ts +0 -30
- package/src/lock/hashKey.ts +0 -51
- package/src/lock/lockManager.ts +0 -124
- package/src/lock/pidCheck.ts +0 -13
- package/src/report/html.ts +0 -227
- package/src/report/sitegraphExport.ts +0 -58
- package/src/scoring/hits.ts +0 -131
- package/src/scoring/orphanSeverity.ts +0 -176
- package/src/utils/version.ts +0 -18
- package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
- package/tests/analysis.unit.test.ts +0 -98
- package/tests/analyze.integration.test.ts +0 -98
- package/tests/audit/dns.test.ts +0 -31
- package/tests/audit/headers.test.ts +0 -45
- package/tests/audit/scoring.test.ts +0 -133
- package/tests/audit/security.test.ts +0 -12
- package/tests/audit/transport.test.ts +0 -112
- package/tests/clustering.test.ts +0 -118
- package/tests/crawler.test.ts +0 -358
- package/tests/db.test.ts +0 -159
- package/tests/diff.test.ts +0 -67
- package/tests/duplicate.test.ts +0 -110
- package/tests/fetcher.test.ts +0 -106
- package/tests/fetcher_safety.test.ts +0 -85
- package/tests/fixtures/analyze-crawl.json +0 -26
- package/tests/hits.test.ts +0 -134
- package/tests/html_report.test.ts +0 -58
- package/tests/lock/lockManager.test.ts +0 -138
- package/tests/metrics.test.ts +0 -196
- package/tests/normalize.test.ts +0 -101
- package/tests/orphanSeverity.test.ts +0 -160
- package/tests/pagerank.test.ts +0 -98
- package/tests/parser.test.ts +0 -117
- package/tests/proxy_safety.test.ts +0 -57
- package/tests/redirect_safety.test.ts +0 -73
- package/tests/safety.test.ts +0 -114
- package/tests/scope.test.ts +0 -66
- package/tests/scoring.test.ts +0 -59
- package/tests/sitemap.test.ts +0 -88
- package/tests/soft404.test.ts +0 -41
- package/tests/trap.test.ts +0 -39
- package/tests/visualization_data.test.ts +0 -46
- package/tsconfig.json +0 -11
package/dist/report/html.js
CHANGED
|
@@ -1,223 +1,22 @@
|
|
|
1
|
+
import { Crawl_HTML } from './crawl_template.js';
|
|
1
2
|
function safeJson(data) {
|
|
2
3
|
return JSON.stringify(data).replace(/</g, '\\u003c');
|
|
3
4
|
}
|
|
4
5
|
export function generateHtml(graphData, metrics) {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
background: white;
|
|
18
|
-
border: 1px solid #ccc;
|
|
19
|
-
padding: 10px;
|
|
20
|
-
pointer-events: none;
|
|
21
|
-
font-size: 12px;
|
|
22
|
-
box-shadow: 2px 2px 5px rgba(0,0,0,0.1);
|
|
23
|
-
display: none;
|
|
24
|
-
}
|
|
25
|
-
#metrics {
|
|
26
|
-
position: absolute;
|
|
27
|
-
top: 10px;
|
|
28
|
-
left: 10px;
|
|
29
|
-
background: rgba(255, 255, 255, 0.9);
|
|
30
|
-
padding: 15px;
|
|
31
|
-
border-radius: 5px;
|
|
32
|
-
box-shadow: 0 0 10px rgba(0,0,0,0.1);
|
|
33
|
-
max-width: 320px;
|
|
34
|
-
max-height: 90vh;
|
|
35
|
-
overflow-y: auto;
|
|
36
|
-
z-index: 100;
|
|
37
|
-
}
|
|
38
|
-
h1 { font-size: 18px; margin-top: 0; }
|
|
39
|
-
h2 { font-size: 14px; margin: 15px 0 5px; border-bottom: 1px solid #ddd; }
|
|
40
|
-
ul { padding-left: 20px; margin: 5px 0; }
|
|
41
|
-
.legend { margin-top: 10px; font-size: 11px; }
|
|
42
|
-
.legend-item { display: flex; align-items: center; margin-bottom: 3px; }
|
|
43
|
-
.dot { width: 8px; height: 8px; border-radius: 50%; margin-right: 5px; }
|
|
44
|
-
.stat-row { display: flex; justify-content: space-between; font-size: 13px; margin-bottom: 3px; }
|
|
45
|
-
.stat-label { color: #666; }
|
|
46
|
-
.stat-value { font-weight: bold; }
|
|
47
|
-
</style>
|
|
48
|
-
</head>
|
|
49
|
-
<body>
|
|
50
|
-
<div id="metrics">
|
|
51
|
-
<h1>Crawlith Site Graph</h1>
|
|
52
|
-
|
|
53
|
-
<div class="stat-row">
|
|
54
|
-
<span class="stat-label">Discovered Pages:</span>
|
|
55
|
-
<span class="stat-value">${metrics.totalPages}</span>
|
|
56
|
-
</div>
|
|
57
|
-
${metrics.sessionStats ? `
|
|
58
|
-
<div class="stat-row">
|
|
59
|
-
<span class="stat-label">Session Crawl:</span>
|
|
60
|
-
<span class="stat-value">${metrics.sessionStats.pagesFetched} pages</span>
|
|
61
|
-
</div>
|
|
62
|
-
${metrics.sessionStats.pagesCached > 0 ? `
|
|
63
|
-
<div class="stat-row" style="font-size: 11px; margin-top: -3px;">
|
|
64
|
-
<span class="stat-label" style="padding-left: 10px;">- Reuse Cached:</span>
|
|
65
|
-
<span class="stat-value">${metrics.sessionStats.pagesCached}</span>
|
|
66
|
-
</div>` : ''}
|
|
67
|
-
` : ''}
|
|
68
|
-
<div class="stat-row">
|
|
69
|
-
<span class="stat-label">Total Edges:</span>
|
|
70
|
-
<span class="stat-value">${metrics.totalEdges}</span>
|
|
71
|
-
</div>
|
|
72
|
-
<div class="stat-row">
|
|
73
|
-
<span class="stat-label">Max Depth:</span>
|
|
74
|
-
<span class="stat-value">${metrics.maxDepthFound}</span>
|
|
75
|
-
</div>
|
|
76
|
-
<div class="stat-row">
|
|
77
|
-
<span class="stat-label">Avg Out-Degree:</span>
|
|
78
|
-
<span class="stat-value">${metrics.averageOutDegree.toFixed(2)}</span>
|
|
79
|
-
</div>
|
|
80
|
-
|
|
81
|
-
<div class="legend">
|
|
82
|
-
<div class="legend-item"><div class="dot" style="background: red;"></div>Orphan (In-Links: 0)</div>
|
|
83
|
-
<div class="legend-item"><div class="dot" style="background: orange;"></div>Deep (Depth >= 4)</div>
|
|
84
|
-
<div class="legend-item"><div class="dot" style="background: blue;"></div>Normal</div>
|
|
85
|
-
</div>
|
|
86
|
-
|
|
87
|
-
${metrics.topAuthorityPages.length > 0 ? `
|
|
88
|
-
<h3>Top Authority</h3>
|
|
89
|
-
<ul>
|
|
90
|
-
${metrics.topAuthorityPages.map(p => `<li><a href="${p.url}" target="_blank">${new URL(p.url).pathname}</a> (${p.authority.toFixed(2)})</li>`).join('')}
|
|
91
|
-
</ul>
|
|
92
|
-
` : ''}
|
|
93
|
-
|
|
94
|
-
${metrics.orphanPages.length > 0 ? `
|
|
95
|
-
<h3>Orphan Pages (${metrics.orphanPages.length})</h3>
|
|
96
|
-
<details>
|
|
97
|
-
<summary>Show list</summary>
|
|
98
|
-
<ul>
|
|
99
|
-
${metrics.orphanPages.slice(0, 20).map(url => `<li><a href="${url}" target="_blank">${url}</a></li>`).join('')}
|
|
100
|
-
${metrics.orphanPages.length > 20 ? `<li>... and ${metrics.orphanPages.length - 20} more</li>` : ''}
|
|
101
|
-
</ul>
|
|
102
|
-
</details>
|
|
103
|
-
` : ''}
|
|
104
|
-
</div>
|
|
105
|
-
<div id="graph"></div>
|
|
106
|
-
<div class="tooltip" id="tooltip"></div>
|
|
107
|
-
|
|
108
|
-
<script src="https://d3js.org/d3.v7.min.js"></script>
|
|
109
|
-
<script>
|
|
110
|
-
// Make data available globally
|
|
6
|
+
// Strip heavy HTML content from nodes to keep the report lightweight
|
|
7
|
+
const vizGraphData = {
|
|
8
|
+
...graphData,
|
|
9
|
+
nodes: graphData.nodes ? graphData.nodes.map((n) => {
|
|
10
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
11
|
+
const { html, ...rest } = n;
|
|
12
|
+
return rest;
|
|
13
|
+
}) : []
|
|
14
|
+
};
|
|
15
|
+
const graphJson = safeJson(vizGraphData);
|
|
16
|
+
const metricsJson = safeJson(metrics);
|
|
17
|
+
return Crawl_HTML.replace('</body>', `<script>
|
|
111
18
|
window.GRAPH_DATA = ${graphJson};
|
|
112
|
-
|
|
113
|
-
const data = window.GRAPH_DATA;
|
|
114
|
-
const width = window.innerWidth;
|
|
115
|
-
const height = window.innerHeight;
|
|
116
|
-
|
|
117
|
-
const svg = d3.select("#graph").append("svg")
|
|
118
|
-
.attr("width", width)
|
|
119
|
-
.attr("height", height)
|
|
120
|
-
.call(d3.zoom().on("zoom", (event) => {
|
|
121
|
-
g.attr("transform", event.transform);
|
|
122
|
-
}));
|
|
123
|
-
|
|
124
|
-
const g = svg.append("g");
|
|
125
|
-
|
|
126
|
-
// Define arrow marker
|
|
127
|
-
svg.append("defs").selectAll("marker")
|
|
128
|
-
.data(["arrow"])
|
|
129
|
-
.enter().append("marker")
|
|
130
|
-
.attr("id", d => d)
|
|
131
|
-
.attr("viewBox", "0 -5 10 10")
|
|
132
|
-
.attr("refX", 15)
|
|
133
|
-
.attr("refY", 0)
|
|
134
|
-
.attr("markerWidth", 6)
|
|
135
|
-
.attr("markerHeight", 6)
|
|
136
|
-
.attr("orient", "auto")
|
|
137
|
-
.append("path")
|
|
138
|
-
.attr("d", "M0,-5L10,0L0,5")
|
|
139
|
-
.attr("fill", "#999");
|
|
140
|
-
|
|
141
|
-
const simulation = d3.forceSimulation(data.nodes)
|
|
142
|
-
.force("link", d3.forceLink(data.edges).id(d => d.url).distance(100))
|
|
143
|
-
.force("charge", d3.forceManyBody().strength(-300))
|
|
144
|
-
.force("center", d3.forceCenter(width / 2, height / 2))
|
|
145
|
-
.force("collide", d3.forceCollide().radius(d => Math.sqrt((d.inLinks || 0) + 1) * 5 + 2));
|
|
146
|
-
|
|
147
|
-
const link = g.append("g")
|
|
148
|
-
.attr("stroke", "#999")
|
|
149
|
-
.attr("stroke-opacity", 0.6)
|
|
150
|
-
.selectAll("line")
|
|
151
|
-
.data(data.edges)
|
|
152
|
-
.join("line")
|
|
153
|
-
.attr("stroke-width", 1)
|
|
154
|
-
.attr("marker-end", "url(#arrow)");
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
const node = g.append("g")
|
|
158
|
-
.attr("stroke", "#fff")
|
|
159
|
-
.attr("stroke-width", 1.5)
|
|
160
|
-
.selectAll("circle")
|
|
161
|
-
.data(data.nodes)
|
|
162
|
-
.join("circle")
|
|
163
|
-
.attr("r", d => Math.sqrt((d.inLinks || 0) + 1) * 3 + 2)
|
|
164
|
-
.attr("fill", d => {
|
|
165
|
-
if (d.inLinks === 0 && d.depth > 0) return "red";
|
|
166
|
-
if (d.depth >= 4) return "orange";
|
|
167
|
-
return "blue";
|
|
168
|
-
})
|
|
169
|
-
.call(d3.drag()
|
|
170
|
-
.on("start", dragstarted)
|
|
171
|
-
.on("drag", dragged)
|
|
172
|
-
.on("end", dragended));
|
|
173
|
-
|
|
174
|
-
const tooltip = d3.select("#tooltip");
|
|
175
|
-
|
|
176
|
-
node.on("mouseover", (event, d) => {
|
|
177
|
-
tooltip.style("display", "block")
|
|
178
|
-
.html(\`
|
|
179
|
-
<strong>URL:</strong> \${d.url}<br>
|
|
180
|
-
<strong>Depth:</strong> \${d.depth}<br>
|
|
181
|
-
<strong>In-Links:</strong> \${d.inLinks}<br>
|
|
182
|
-
<strong>Out-Links:</strong> \${d.outLinks}<br>
|
|
183
|
-
<strong>Status:</strong> \${d.status}
|
|
184
|
-
\`)
|
|
185
|
-
.style("left", (event.pageX + 10) + "px")
|
|
186
|
-
.style("top", (event.pageY - 10) + "px");
|
|
187
|
-
})
|
|
188
|
-
.on("mouseout", () => {
|
|
189
|
-
tooltip.style("display", "none");
|
|
190
|
-
});
|
|
191
|
-
|
|
192
|
-
simulation.on("tick", () => {
|
|
193
|
-
link
|
|
194
|
-
.attr("x1", d => d.source.x)
|
|
195
|
-
.attr("y1", d => d.source.y)
|
|
196
|
-
.attr("x2", d => d.target.x)
|
|
197
|
-
.attr("y2", d => d.target.y);
|
|
198
|
-
|
|
199
|
-
node
|
|
200
|
-
.attr("cx", d => d.x)
|
|
201
|
-
.attr("cy", d => d.y);
|
|
202
|
-
});
|
|
203
|
-
|
|
204
|
-
function dragstarted(event, d) {
|
|
205
|
-
if (!event.active) simulation.alphaTarget(0.3).restart();
|
|
206
|
-
d.fx = d.x;
|
|
207
|
-
d.fy = d.y;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
function dragged(event, d) {
|
|
211
|
-
d.fx = event.x;
|
|
212
|
-
d.fy = event.y;
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
function dragended(event, d) {
|
|
216
|
-
if (!event.active) simulation.alphaTarget(0);
|
|
217
|
-
d.fx = null;
|
|
218
|
-
d.fy = null;
|
|
219
|
-
}
|
|
19
|
+
window.METRICS_DATA = ${metricsJson};
|
|
220
20
|
</script>
|
|
221
|
-
</body
|
|
222
|
-
</html>`;
|
|
21
|
+
</body>`);
|
|
223
22
|
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { Graph, Metrics } from '@crawlith/core';
|
|
2
|
+
export interface CrawlInsightReport {
|
|
3
|
+
pages: number;
|
|
4
|
+
fetchedPages?: number;
|
|
5
|
+
summary: {
|
|
6
|
+
crawlDepth: number;
|
|
7
|
+
internalLinks: number;
|
|
8
|
+
externalLinks: number;
|
|
9
|
+
};
|
|
10
|
+
health?: {
|
|
11
|
+
score: number;
|
|
12
|
+
status: string;
|
|
13
|
+
weightedPenalties: any;
|
|
14
|
+
};
|
|
15
|
+
issues?: any;
|
|
16
|
+
topAuthorityPages: {
|
|
17
|
+
url: string;
|
|
18
|
+
score: number;
|
|
19
|
+
}[];
|
|
20
|
+
}
|
|
21
|
+
export declare function buildCrawlInsightReport(graph: Graph, metrics: Metrics, healthData?: {
|
|
22
|
+
health: any;
|
|
23
|
+
issues: any;
|
|
24
|
+
}): CrawlInsightReport;
|
|
25
|
+
export declare function renderInsightOutput(report: CrawlInsightReport, snapshotId: number): string;
|
|
26
|
+
export declare function renderScoreBreakdown(health: any): string;
|
|
27
|
+
export declare function hasCriticalIssues(report: CrawlInsightReport): boolean;
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
export function buildCrawlInsightReport(graph, metrics, healthData) {
|
|
2
|
+
return {
|
|
3
|
+
pages: metrics.totalPages,
|
|
4
|
+
fetchedPages: metrics.sessionStats?.pagesFetched,
|
|
5
|
+
health: healthData?.health,
|
|
6
|
+
issues: healthData?.issues,
|
|
7
|
+
summary: {
|
|
8
|
+
crawlDepth: metrics.maxDepthFound,
|
|
9
|
+
internalLinks: metrics.totalEdges,
|
|
10
|
+
externalLinks: healthData?.issues?.externalLinks || 0
|
|
11
|
+
},
|
|
12
|
+
topAuthorityPages: metrics.topAuthorityPages.map(p => ({ url: p.url, score: p.authority }))
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
export function renderInsightOutput(report, snapshotId) {
|
|
16
|
+
const lines = [];
|
|
17
|
+
// Header
|
|
18
|
+
lines.push(`CRAWLITH — Crawl`);
|
|
19
|
+
lines.push('');
|
|
20
|
+
lines.push(`# ${snapshotId}`);
|
|
21
|
+
lines.push('');
|
|
22
|
+
if (report.fetchedPages !== undefined) {
|
|
23
|
+
if (report.fetchedPages === report.pages) {
|
|
24
|
+
lines.push(`${report.pages} pages crawled`);
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
lines.push(`${report.fetchedPages} pages fetched / ${report.pages} discovered`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
else {
|
|
31
|
+
lines.push(`${report.pages} pages crawled`);
|
|
32
|
+
}
|
|
33
|
+
lines.push('');
|
|
34
|
+
// Health Score if available
|
|
35
|
+
if (report.health) {
|
|
36
|
+
lines.push(`Score: ${report.health.score} (${report.health.status})`);
|
|
37
|
+
lines.push('');
|
|
38
|
+
}
|
|
39
|
+
// ===== Critical =====
|
|
40
|
+
if (report.issues) {
|
|
41
|
+
const critical = [];
|
|
42
|
+
const addLine = (arr, condition, text) => condition && arr.push(text);
|
|
43
|
+
addLine(critical, report.issues.orphanPages > 0, `${report.issues.orphanPages} orphan pages`);
|
|
44
|
+
addLine(critical, report.issues.redirectChains > 0, `${report.issues.redirectChains} redirect chains`);
|
|
45
|
+
addLine(critical, report.issues.brokenInternalLinks > 0, `${report.issues.brokenInternalLinks} broken internal links`);
|
|
46
|
+
addLine(critical, report.issues.duplicateClusters > 0, `${report.issues.duplicateClusters} near-duplicate clusters`);
|
|
47
|
+
addLine(critical, report.issues.canonicalConflicts > 0, `${report.issues.canonicalConflicts} canonical conflicts`);
|
|
48
|
+
addLine(critical, report.issues.accidentalNoindex > 0, `${report.issues.accidentalNoindex} pages accidentally noindexed`);
|
|
49
|
+
addLine(critical, report.issues.blockedByRobots > 0, `${report.issues.blockedByRobots} pages blocked by robots.txt`);
|
|
50
|
+
if (critical.length > 0) {
|
|
51
|
+
lines.push(`Critical`);
|
|
52
|
+
for (const c of critical)
|
|
53
|
+
lines.push(` • ${c}`);
|
|
54
|
+
lines.push('');
|
|
55
|
+
}
|
|
56
|
+
// ===== Warnings =====
|
|
57
|
+
const warnings = [];
|
|
58
|
+
addLine(warnings, report.issues.missingH1 > 0, `${report.issues.missingH1} pages missing H1`);
|
|
59
|
+
addLine(warnings, report.issues.thinContent > 0, `${report.issues.thinContent} thin content pages`);
|
|
60
|
+
addLine(warnings, report.issues.excessiveInternalLinkCount > 0, `${report.issues.excessiveInternalLinkCount} pages with excessive links`);
|
|
61
|
+
addLine(warnings, report.issues.imageAltMissing > 0, `${report.issues.imageAltMissing} pages missing image alt`);
|
|
62
|
+
if (warnings.length > 0) {
|
|
63
|
+
lines.push(`Warnings`);
|
|
64
|
+
for (const w of warnings)
|
|
65
|
+
lines.push(` • ${w}`);
|
|
66
|
+
lines.push('');
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
// ===== Structure =====
|
|
70
|
+
lines.push(`Structure`);
|
|
71
|
+
lines.push(` Depth Reached ${report.summary.crawlDepth}`);
|
|
72
|
+
lines.push(` Internal Links ${report.summary.internalLinks}`);
|
|
73
|
+
lines.push(` External Links ${report.summary.externalLinks}`);
|
|
74
|
+
lines.push('');
|
|
75
|
+
// ===== Authority =====
|
|
76
|
+
if (report.topAuthorityPages.length > 0) {
|
|
77
|
+
lines.push(`Top Authority`);
|
|
78
|
+
for (const page of report.topAuthorityPages.slice(0, 10)) {
|
|
79
|
+
lines.push(` ${page.url} ${page.score.toFixed(3)}`);
|
|
80
|
+
}
|
|
81
|
+
lines.push('');
|
|
82
|
+
}
|
|
83
|
+
return `${lines.join('\n')}\n`;
|
|
84
|
+
}
|
|
85
|
+
export function renderScoreBreakdown(health) {
|
|
86
|
+
return [
|
|
87
|
+
'Health Score Breakdown',
|
|
88
|
+
`weights: ${JSON.stringify(health.weights)}`,
|
|
89
|
+
`penalties: ${JSON.stringify(health.weightedPenalties)}`
|
|
90
|
+
].join('\n');
|
|
91
|
+
}
|
|
92
|
+
export function hasCriticalIssues(report) {
|
|
93
|
+
if (!report.issues)
|
|
94
|
+
return false;
|
|
95
|
+
const { issues } = report;
|
|
96
|
+
return (issues.orphanPages > 0 ||
|
|
97
|
+
issues.brokenInternalLinks > 0 ||
|
|
98
|
+
issues.redirectChains > 0 ||
|
|
99
|
+
issues.duplicateClusters > 0 ||
|
|
100
|
+
issues.canonicalConflicts > 0 ||
|
|
101
|
+
issues.accidentalNoindex > 0 ||
|
|
102
|
+
issues.blockedByRobots > 0);
|
|
103
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { Graph } from '../graph/graph.js';
|
|
2
|
+
export interface HealthScoreWeights {
|
|
3
|
+
orphans: number;
|
|
4
|
+
brokenLinks: number;
|
|
5
|
+
redirectChains: number;
|
|
6
|
+
duplicateClusters: number;
|
|
7
|
+
thinContent: number;
|
|
8
|
+
missingH1: number;
|
|
9
|
+
noindexMisuse: number;
|
|
10
|
+
canonicalConflicts: number;
|
|
11
|
+
lowInternalLinks: number;
|
|
12
|
+
excessiveLinks: number;
|
|
13
|
+
blockedByRobots: number;
|
|
14
|
+
crawlTraps: number;
|
|
15
|
+
}
|
|
16
|
+
export interface CrawlIssueCounts {
|
|
17
|
+
orphanPages: number;
|
|
18
|
+
brokenInternalLinks: number;
|
|
19
|
+
redirectChains: number;
|
|
20
|
+
duplicateClusters: number;
|
|
21
|
+
canonicalConflicts: number;
|
|
22
|
+
accidentalNoindex: number;
|
|
23
|
+
missingH1: number;
|
|
24
|
+
thinContent: number;
|
|
25
|
+
lowInternalLinkCount: number;
|
|
26
|
+
excessiveInternalLinkCount: number;
|
|
27
|
+
highExternalLinkRatio: number;
|
|
28
|
+
imageAltMissing: number;
|
|
29
|
+
strongPagesUnderLinking: number;
|
|
30
|
+
cannibalizationClusters: number;
|
|
31
|
+
nearAuthorityThreshold: number;
|
|
32
|
+
underlinkedHighAuthorityPages: number;
|
|
33
|
+
externalLinks: number;
|
|
34
|
+
blockedByRobots: number;
|
|
35
|
+
crawlTraps: number;
|
|
36
|
+
}
|
|
37
|
+
export interface HealthScoreBreakdown {
|
|
38
|
+
score: number;
|
|
39
|
+
status: string;
|
|
40
|
+
weightedPenalties: Record<string, number>;
|
|
41
|
+
weights: HealthScoreWeights;
|
|
42
|
+
}
|
|
43
|
+
export declare const THIN_CONTENT_THRESHOLD = 200;
|
|
44
|
+
export declare const LOW_INTERNAL_LINK_THRESHOLD = 2;
|
|
45
|
+
export declare const EXCESSIVE_INTERNAL_LINK_THRESHOLD = 150;
|
|
46
|
+
export declare const HIGH_EXTERNAL_LINK_RATIO_THRESHOLD = 0.6;
|
|
47
|
+
export declare const OPPORTUNITY_AUTHORITY_THRESHOLD = 0.8;
|
|
48
|
+
export declare const DEFAULT_HEALTH_WEIGHTS: HealthScoreWeights;
|
|
49
|
+
export declare class HealthService {
|
|
50
|
+
calculateHealthScore(totalPages: number, issues: Pick<CrawlIssueCounts, 'orphanPages' | 'brokenInternalLinks' | 'redirectChains' | 'duplicateClusters' | 'thinContent' | 'missingH1' | 'accidentalNoindex' | 'canonicalConflicts' | 'lowInternalLinkCount' | 'excessiveInternalLinkCount' | 'blockedByRobots' | 'crawlTraps'>, weights?: HealthScoreWeights): HealthScoreBreakdown;
|
|
51
|
+
collectCrawlIssues(graph: Graph, metrics: any, rootOrigin?: string): CrawlIssueCounts;
|
|
52
|
+
private clamp;
|
|
53
|
+
private healthStatusLabel;
|
|
54
|
+
}
|
|
55
|
+
export declare const calculateHealthScore: (totalPages: number, issues: Pick<CrawlIssueCounts, "orphanPages" | "brokenInternalLinks" | "redirectChains" | "duplicateClusters" | "thinContent" | "missingH1" | "accidentalNoindex" | "canonicalConflicts" | "lowInternalLinkCount" | "excessiveInternalLinkCount" | "blockedByRobots" | "crawlTraps">, weights?: HealthScoreWeights) => HealthScoreBreakdown;
|
|
56
|
+
export declare const healthStatusLabel: (score: number, hasCritical?: boolean) => "Needs Attention" | "Excellent" | "Good" | "Critical";
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import { analyzeContent } from '../analysis/content.js';
|
|
2
|
+
import { analyzeH1 } from '../analysis/seo.js';
|
|
3
|
+
import { analyzeImageAlts } from '../analysis/images.js';
|
|
4
|
+
import { analyzeLinks } from '../analysis/links.js';
|
|
5
|
+
import { UrlUtil } from '../crawler/normalize.js';
|
|
6
|
+
export const THIN_CONTENT_THRESHOLD = 200;
|
|
7
|
+
export const LOW_INTERNAL_LINK_THRESHOLD = 2;
|
|
8
|
+
export const EXCESSIVE_INTERNAL_LINK_THRESHOLD = 150;
|
|
9
|
+
export const HIGH_EXTERNAL_LINK_RATIO_THRESHOLD = 0.6;
|
|
10
|
+
export const OPPORTUNITY_AUTHORITY_THRESHOLD = 0.8;
|
|
11
|
+
export const DEFAULT_HEALTH_WEIGHTS = {
|
|
12
|
+
orphans: 50,
|
|
13
|
+
brokenLinks: 100,
|
|
14
|
+
redirectChains: 20,
|
|
15
|
+
duplicateClusters: 25,
|
|
16
|
+
thinContent: 15,
|
|
17
|
+
missingH1: 10,
|
|
18
|
+
noindexMisuse: 20,
|
|
19
|
+
canonicalConflicts: 10,
|
|
20
|
+
lowInternalLinks: 10,
|
|
21
|
+
excessiveLinks: 5,
|
|
22
|
+
blockedByRobots: 100,
|
|
23
|
+
crawlTraps: 50
|
|
24
|
+
};
|
|
25
|
+
export class HealthService {
|
|
26
|
+
calculateHealthScore(totalPages, issues, weights = DEFAULT_HEALTH_WEIGHTS) {
|
|
27
|
+
const safePages = Math.max(totalPages, 1);
|
|
28
|
+
const weightedPenalties = {
|
|
29
|
+
orphans: this.clamp(((issues.orphanPages || 0) / safePages) * weights.orphans, 0, weights.orphans),
|
|
30
|
+
brokenLinks: this.clamp(((issues.brokenInternalLinks || 0) / safePages) * weights.brokenLinks, 0, weights.brokenLinks),
|
|
31
|
+
redirectChains: this.clamp(((issues.redirectChains || 0) / safePages) * weights.redirectChains, 0, weights.redirectChains),
|
|
32
|
+
duplicateClusters: this.clamp(((issues.duplicateClusters || 0) / safePages) * weights.duplicateClusters, 0, weights.duplicateClusters),
|
|
33
|
+
thinContent: this.clamp(((issues.thinContent || 0) / safePages) * weights.thinContent, 0, weights.thinContent),
|
|
34
|
+
missingH1: this.clamp(((issues.missingH1 || 0) / safePages) * weights.missingH1, 0, weights.missingH1),
|
|
35
|
+
noindexMisuse: this.clamp(((issues.accidentalNoindex || 0) / safePages) * weights.noindexMisuse, 0, weights.noindexMisuse),
|
|
36
|
+
canonicalConflicts: this.clamp(((issues.canonicalConflicts || 0) / safePages) * weights.canonicalConflicts, 0, weights.canonicalConflicts),
|
|
37
|
+
lowInternalLinks: this.clamp(((issues.lowInternalLinkCount || 0) / safePages) * weights.lowInternalLinks, 0, weights.lowInternalLinks),
|
|
38
|
+
excessiveLinks: this.clamp(((issues.excessiveInternalLinkCount || 0) / safePages) * weights.excessiveLinks, 0, weights.excessiveLinks),
|
|
39
|
+
blockedByRobots: this.clamp(((issues.blockedByRobots || 0) / safePages) * weights.blockedByRobots, 0, weights.blockedByRobots),
|
|
40
|
+
crawlTraps: this.clamp(((issues.crawlTraps || 0) / safePages) * weights.crawlTraps, 0, weights.crawlTraps)
|
|
41
|
+
};
|
|
42
|
+
const totalPenalty = Object.values(weightedPenalties).reduce((sum, value) => sum + value, 0);
|
|
43
|
+
const score = Number(this.clamp(100 - totalPenalty, 0, 100).toFixed(1));
|
|
44
|
+
const hasCritical = ((issues.orphanPages || 0) > 0 ||
|
|
45
|
+
(issues.brokenInternalLinks || 0) > 0 ||
|
|
46
|
+
(issues.redirectChains || 0) > 0 ||
|
|
47
|
+
(issues.duplicateClusters || 0) > 0 ||
|
|
48
|
+
(issues.canonicalConflicts || 0) > 0 ||
|
|
49
|
+
(issues.accidentalNoindex || 0) > 0 ||
|
|
50
|
+
(issues.blockedByRobots || 0) > 0);
|
|
51
|
+
return {
|
|
52
|
+
score,
|
|
53
|
+
status: this.healthStatusLabel(score, hasCritical),
|
|
54
|
+
weightedPenalties,
|
|
55
|
+
weights
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
collectCrawlIssues(graph, metrics, rootOrigin = '') {
|
|
59
|
+
const nodes = graph.getNodes();
|
|
60
|
+
let brokenInternalLinks = 0;
|
|
61
|
+
let redirectChains = 0;
|
|
62
|
+
let canonicalConflicts = 0;
|
|
63
|
+
let accidentalNoindex = 0;
|
|
64
|
+
let missingH1 = 0;
|
|
65
|
+
let thinContent = 0;
|
|
66
|
+
let highExternalLinkRatio = 0;
|
|
67
|
+
let imageAltMissing = 0;
|
|
68
|
+
let lowInternalLinkCount = 0;
|
|
69
|
+
let excessiveInternalLinkCount = 0;
|
|
70
|
+
let strongPagesUnderLinking = 0;
|
|
71
|
+
let nearAuthorityThreshold = 0;
|
|
72
|
+
let underlinkedHighAuthorityPages = 0;
|
|
73
|
+
let externalLinks = 0;
|
|
74
|
+
let blockedByRobots = 0;
|
|
75
|
+
let crawlTraps = 0;
|
|
76
|
+
for (const node of nodes) {
|
|
77
|
+
if (!node.isInternal) {
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
if (node.crawlStatus === 'blocked' || node.crawlStatus === 'blocked_by_robots') {
|
|
81
|
+
blockedByRobots += 1;
|
|
82
|
+
}
|
|
83
|
+
if (node.crawlTrapFlag) {
|
|
84
|
+
crawlTraps += 1;
|
|
85
|
+
}
|
|
86
|
+
const isConfirmedError = node.status >= 400 || (node.status === 0 && (node.crawlStatus === 'network_error' || node.crawlStatus === 'failed_after_retries' || node.securityError || node.crawlStatus === 'fetched_error'));
|
|
87
|
+
if (isConfirmedError) {
|
|
88
|
+
brokenInternalLinks += 1;
|
|
89
|
+
}
|
|
90
|
+
if (node.brokenLinks) {
|
|
91
|
+
const actualBreaks = node.brokenLinks.filter(url => {
|
|
92
|
+
const target = graph.nodes.get(url);
|
|
93
|
+
return target && (target.status >= 400 || (target.status === 0 && (target.crawlStatus === 'network_error' || target.crawlStatus === 'failed_after_retries' || target.securityError || target.crawlStatus === 'fetched_error')));
|
|
94
|
+
});
|
|
95
|
+
brokenInternalLinks += actualBreaks.length;
|
|
96
|
+
}
|
|
97
|
+
if ((node.redirectChain?.length || 0) > 1) {
|
|
98
|
+
redirectChains += 1;
|
|
99
|
+
}
|
|
100
|
+
const absoluteUrl = rootOrigin ? (node.url.startsWith('http') ? node.url : new URL(node.url, rootOrigin).toString()) : node.url;
|
|
101
|
+
if (node.canonical && node.canonical !== node.url && node.canonical !== absoluteUrl) {
|
|
102
|
+
// Final check: normalize both to ignore trailing slash differences or protocol mismatches if they are considered "same"
|
|
103
|
+
const normCanonical = node.canonical.replace(/\/$/, '');
|
|
104
|
+
const normAbsolute = absoluteUrl.replace(/\/$/, '');
|
|
105
|
+
if (normCanonical !== normAbsolute) {
|
|
106
|
+
canonicalConflicts += 1;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
if (node.noindex && node.status >= 200 && node.status < 300) {
|
|
110
|
+
accidentalNoindex += 1;
|
|
111
|
+
}
|
|
112
|
+
if (node.inLinks === 1 && node.depth > 0) {
|
|
113
|
+
lowInternalLinkCount += 1;
|
|
114
|
+
}
|
|
115
|
+
if (node.outLinks > EXCESSIVE_INTERNAL_LINK_THRESHOLD) {
|
|
116
|
+
excessiveInternalLinkCount += 1;
|
|
117
|
+
}
|
|
118
|
+
if (!node.html) {
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
const h1Res = analyzeH1(node.html, '');
|
|
122
|
+
if (h1Res.count === 0) {
|
|
123
|
+
missingH1 += 1;
|
|
124
|
+
}
|
|
125
|
+
if (node.wordCount != null) {
|
|
126
|
+
if (node.wordCount < THIN_CONTENT_THRESHOLD) {
|
|
127
|
+
thinContent += 1;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
else if (node.html) {
|
|
131
|
+
const content = analyzeContent(node.html);
|
|
132
|
+
if (content.wordCount < THIN_CONTENT_THRESHOLD) {
|
|
133
|
+
thinContent += 1;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
const pageAbsUrl = rootOrigin ? UrlUtil.toAbsolute(node.url, rootOrigin) : node.url;
|
|
137
|
+
const links = analyzeLinks(node.html || '', pageAbsUrl, rootOrigin || node.url);
|
|
138
|
+
externalLinks += links.externalLinks;
|
|
139
|
+
if (links.externalRatio > HIGH_EXTERNAL_LINK_RATIO_THRESHOLD) {
|
|
140
|
+
highExternalLinkRatio += 1;
|
|
141
|
+
}
|
|
142
|
+
if (node.html) {
|
|
143
|
+
const imageAlt = analyzeImageAlts(node.html);
|
|
144
|
+
if (imageAlt.missingAlt > 0) {
|
|
145
|
+
imageAltMissing += 1;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
const clusters = graph.contentClusters || metrics.clusters || [];
|
|
150
|
+
const duplicateClusters = clusters.length;
|
|
151
|
+
const cannibalizationClusters = clusters.filter((cluster) => cluster.risk === 'high' || cluster.type === 'near').length;
|
|
152
|
+
for (const node of nodes) {
|
|
153
|
+
const authority = node.inLinks > 5 ? 0.8 : 0.2;
|
|
154
|
+
if (authority >= OPPORTUNITY_AUTHORITY_THRESHOLD && node.outLinks < 3) {
|
|
155
|
+
strongPagesUnderLinking += 1;
|
|
156
|
+
}
|
|
157
|
+
if (authority >= 0.65 && authority < OPPORTUNITY_AUTHORITY_THRESHOLD) {
|
|
158
|
+
nearAuthorityThreshold += 1;
|
|
159
|
+
}
|
|
160
|
+
if (authority >= OPPORTUNITY_AUTHORITY_THRESHOLD && node.inLinks < LOW_INTERNAL_LINK_THRESHOLD) {
|
|
161
|
+
underlinkedHighAuthorityPages += 1;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return {
|
|
165
|
+
orphanPages: metrics.orphanPages?.length || 0,
|
|
166
|
+
brokenInternalLinks,
|
|
167
|
+
redirectChains,
|
|
168
|
+
duplicateClusters,
|
|
169
|
+
canonicalConflicts,
|
|
170
|
+
accidentalNoindex,
|
|
171
|
+
missingH1,
|
|
172
|
+
thinContent,
|
|
173
|
+
lowInternalLinkCount,
|
|
174
|
+
excessiveInternalLinkCount,
|
|
175
|
+
highExternalLinkRatio,
|
|
176
|
+
imageAltMissing,
|
|
177
|
+
strongPagesUnderLinking,
|
|
178
|
+
cannibalizationClusters,
|
|
179
|
+
nearAuthorityThreshold,
|
|
180
|
+
underlinkedHighAuthorityPages,
|
|
181
|
+
externalLinks,
|
|
182
|
+
blockedByRobots,
|
|
183
|
+
crawlTraps
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
clamp(value, min, max) {
|
|
187
|
+
return Math.min(max, Math.max(min, value));
|
|
188
|
+
}
|
|
189
|
+
healthStatusLabel(score, hasCritical = false) {
|
|
190
|
+
if (hasCritical && score >= 75)
|
|
191
|
+
return 'Needs Attention';
|
|
192
|
+
if (score >= 90)
|
|
193
|
+
return 'Excellent';
|
|
194
|
+
if (score >= 75)
|
|
195
|
+
return 'Good';
|
|
196
|
+
if (score >= 50)
|
|
197
|
+
return 'Needs Attention';
|
|
198
|
+
return 'Critical';
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
const service = new HealthService();
|
|
202
|
+
export const calculateHealthScore = (totalPages, issues, weights = DEFAULT_HEALTH_WEIGHTS) => service.calculateHealthScore(totalPages, issues, weights);
|
|
203
|
+
export const healthStatusLabel = (score, hasCritical = false) => {
|
|
204
|
+
if (hasCritical && score >= 75)
|
|
205
|
+
return 'Needs Attention';
|
|
206
|
+
if (score >= 90)
|
|
207
|
+
return 'Excellent';
|
|
208
|
+
if (score >= 75)
|
|
209
|
+
return 'Good';
|
|
210
|
+
if (score >= 50)
|
|
211
|
+
return 'Needs Attention';
|
|
212
|
+
return 'Critical';
|
|
213
|
+
};
|