@crawlith/core 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/analysis/analysis_list.html +35 -0
- package/dist/analysis/analysis_page.html +123 -0
- package/dist/analysis/analyze.d.ts +17 -3
- package/dist/analysis/analyze.js +192 -248
- package/dist/analysis/scoring.js +7 -1
- package/dist/analysis/templates.d.ts +2 -0
- package/dist/analysis/templates.js +7 -0
- package/dist/core/security/ipGuard.d.ts +11 -0
- package/dist/core/security/ipGuard.js +71 -3
- package/dist/crawler/crawl.d.ts +4 -22
- package/dist/crawler/crawl.js +4 -335
- package/dist/crawler/crawler.d.ts +75 -0
- package/dist/crawler/crawler.js +518 -0
- package/dist/crawler/extract.d.ts +4 -1
- package/dist/crawler/extract.js +7 -2
- package/dist/crawler/fetcher.d.ts +1 -0
- package/dist/crawler/fetcher.js +20 -5
- package/dist/crawler/metricsRunner.d.ts +3 -1
- package/dist/crawler/metricsRunner.js +55 -46
- package/dist/crawler/sitemap.d.ts +3 -0
- package/dist/crawler/sitemap.js +5 -1
- package/dist/db/graphLoader.js +32 -3
- package/dist/db/index.d.ts +3 -0
- package/dist/db/index.js +4 -0
- package/dist/db/repositories/EdgeRepository.d.ts +8 -0
- package/dist/db/repositories/EdgeRepository.js +13 -0
- package/dist/db/repositories/MetricsRepository.d.ts +3 -0
- package/dist/db/repositories/MetricsRepository.js +14 -1
- package/dist/db/repositories/PageRepository.d.ts +11 -0
- package/dist/db/repositories/PageRepository.js +112 -19
- package/dist/db/repositories/SiteRepository.d.ts +3 -0
- package/dist/db/repositories/SiteRepository.js +9 -0
- package/dist/db/repositories/SnapshotRepository.d.ts +2 -0
- package/dist/db/repositories/SnapshotRepository.js +23 -2
- package/dist/events.d.ts +48 -0
- package/dist/events.js +1 -0
- package/dist/graph/cluster.js +62 -14
- package/dist/graph/duplicate.js +242 -191
- package/dist/graph/graph.d.ts +16 -0
- package/dist/graph/graph.js +17 -4
- package/dist/graph/metrics.js +12 -0
- package/dist/graph/pagerank.js +2 -0
- package/dist/graph/simhash.d.ts +6 -0
- package/dist/graph/simhash.js +14 -0
- package/dist/index.d.ts +5 -2
- package/dist/index.js +5 -2
- package/dist/lock/hashKey.js +1 -1
- package/dist/lock/lockManager.d.ts +4 -1
- package/dist/lock/lockManager.js +23 -13
- package/{src/report/sitegraph_template.ts → dist/report/crawl.html} +330 -81
- package/dist/report/crawlExport.d.ts +3 -0
- package/dist/report/{sitegraphExport.js → crawlExport.js} +3 -3
- package/dist/report/crawl_template.d.ts +1 -0
- package/dist/report/crawl_template.js +7 -0
- package/dist/report/html.js +15 -216
- package/dist/scoring/health.d.ts +50 -0
- package/dist/scoring/health.js +170 -0
- package/dist/scoring/hits.d.ts +1 -0
- package/dist/scoring/hits.js +64 -44
- package/dist/scoring/orphanSeverity.d.ts +5 -5
- package/package.json +3 -3
- package/scripts/copy-assets.js +37 -0
- package/src/analysis/analysis_list.html +35 -0
- package/src/analysis/analysis_page.html +123 -0
- package/src/analysis/analyze.ts +218 -261
- package/src/analysis/scoring.ts +8 -1
- package/src/analysis/templates.ts +9 -0
- package/src/core/security/ipGuard.ts +82 -3
- package/src/crawler/crawl.ts +6 -379
- package/src/crawler/crawler.ts +601 -0
- package/src/crawler/extract.ts +7 -2
- package/src/crawler/fetcher.ts +24 -6
- package/src/crawler/metricsRunner.ts +60 -47
- package/src/crawler/sitemap.ts +4 -1
- package/src/db/graphLoader.ts +33 -3
- package/src/db/index.ts +5 -0
- package/src/db/repositories/EdgeRepository.ts +14 -0
- package/src/db/repositories/MetricsRepository.ts +15 -1
- package/src/db/repositories/PageRepository.ts +119 -19
- package/src/db/repositories/SiteRepository.ts +11 -0
- package/src/db/repositories/SnapshotRepository.ts +28 -3
- package/src/events.ts +16 -0
- package/src/graph/cluster.ts +69 -15
- package/src/graph/duplicate.ts +249 -185
- package/src/graph/graph.ts +24 -4
- package/src/graph/metrics.ts +15 -0
- package/src/graph/pagerank.ts +1 -0
- package/src/graph/simhash.ts +15 -0
- package/src/index.ts +5 -2
- package/src/lock/hashKey.ts +1 -1
- package/src/lock/lockManager.ts +21 -13
- package/{dist/report/sitegraph_template.js → src/report/crawl.html} +330 -81
- package/src/report/{sitegraphExport.ts → crawlExport.ts} +3 -3
- package/src/report/crawl_template.ts +9 -0
- package/src/report/html.ts +17 -217
- package/src/scoring/health.ts +241 -0
- package/src/scoring/hits.ts +67 -45
- package/src/scoring/orphanSeverity.ts +8 -8
- package/tests/analysis.unit.test.ts +44 -0
- package/tests/analyze.integration.test.ts +88 -53
- package/tests/analyze_markdown.test.ts +98 -0
- package/tests/audit/audit.test.ts +101 -0
- package/tests/audit/scoring.test.ts +25 -25
- package/tests/audit/transport.test.ts +0 -1
- package/tests/clustering_risk.test.ts +118 -0
- package/tests/crawler.test.ts +19 -13
- package/tests/db/index.test.ts +134 -0
- package/tests/db/repositories.test.ts +115 -0
- package/tests/db_repos.test.ts +72 -0
- package/tests/duplicate.test.ts +2 -2
- package/tests/extract.test.ts +86 -0
- package/tests/fetcher.test.ts +5 -1
- package/tests/fetcher_safety.test.ts +9 -3
- package/tests/graph/graph.test.ts +100 -0
- package/tests/graphLoader.test.ts +124 -0
- package/tests/html_report.test.ts +52 -51
- package/tests/ipGuard.test.ts +73 -0
- package/tests/lock/lockManager.test.ts +77 -17
- package/tests/normalize.test.ts +6 -19
- package/tests/orphanSeverity.test.ts +9 -9
- package/tests/redirect_safety.test.ts +5 -1
- package/tests/renderAnalysisCsv.test.ts +183 -0
- package/tests/safety.test.ts +12 -0
- package/tests/scope.test.ts +18 -0
- package/tests/scoring.test.ts +25 -24
- package/tests/sitemap.test.ts +13 -1
- package/tests/ssrf_fix.test.ts +69 -0
- package/tests/visualization_data.test.ts +10 -10
- package/dist/report/sitegraphExport.d.ts +0 -3
- package/dist/report/sitegraph_template.d.ts +0 -1
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
<!DOCTYPE html>
|
|
2
2
|
<html lang="en">
|
|
3
|
+
|
|
3
4
|
<head>
|
|
4
5
|
<meta charset="UTF-8">
|
|
5
6
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
@@ -13,81 +14,329 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
|
|
|
13
14
|
--accent-color: #4a90e2;
|
|
14
15
|
--sidebar-width: 300px;
|
|
15
16
|
}
|
|
16
|
-
|
|
17
|
+
|
|
18
|
+
body {
|
|
19
|
+
margin: 0;
|
|
20
|
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
|
|
21
|
+
background: var(--bg-color);
|
|
22
|
+
color: var(--text-color);
|
|
23
|
+
height: 100vh;
|
|
24
|
+
display: flex;
|
|
25
|
+
flex-direction: column;
|
|
26
|
+
overflow: hidden;
|
|
27
|
+
}
|
|
17
28
|
|
|
18
29
|
/* Layout */
|
|
19
|
-
header {
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
30
|
+
header {
|
|
31
|
+
padding: 0 20px;
|
|
32
|
+
background: var(--panel-bg);
|
|
33
|
+
border-bottom: 1px solid var(--border-color);
|
|
34
|
+
display: flex;
|
|
35
|
+
justify-content: space-between;
|
|
36
|
+
align-items: center;
|
|
37
|
+
height: 60px;
|
|
38
|
+
box-sizing: border-box;
|
|
39
|
+
z-index: 10;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
main {
|
|
43
|
+
flex: 1;
|
|
44
|
+
display: flex;
|
|
45
|
+
overflow: hidden;
|
|
46
|
+
position: relative;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
#graph-container {
|
|
50
|
+
flex: 1;
|
|
51
|
+
position: relative;
|
|
52
|
+
overflow: hidden;
|
|
53
|
+
background: var(--bg-color);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
#details-panel {
|
|
57
|
+
width: var(--sidebar-width);
|
|
58
|
+
background: var(--panel-bg);
|
|
59
|
+
border-left: 1px solid var(--border-color);
|
|
60
|
+
padding: 20px;
|
|
61
|
+
overflow-y: auto;
|
|
62
|
+
box-sizing: border-box;
|
|
63
|
+
display: none;
|
|
64
|
+
flex-direction: column;
|
|
65
|
+
gap: 15px;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
#details-panel.visible {
|
|
69
|
+
display: flex;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
footer {
|
|
73
|
+
padding: 5px 20px;
|
|
74
|
+
background: var(--panel-bg);
|
|
75
|
+
border-top: 1px solid var(--border-color);
|
|
76
|
+
font-size: 0.8rem;
|
|
77
|
+
text-align: center;
|
|
78
|
+
color: #666;
|
|
79
|
+
height: 30px;
|
|
80
|
+
display: flex;
|
|
81
|
+
align-items: center;
|
|
82
|
+
justify-content: center;
|
|
83
|
+
}
|
|
25
84
|
|
|
26
85
|
/* Header Components */
|
|
27
|
-
.brand {
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
.
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
86
|
+
.brand {
|
|
87
|
+
font-weight: bold;
|
|
88
|
+
font-size: 1.2rem;
|
|
89
|
+
display: flex;
|
|
90
|
+
align-items: center;
|
|
91
|
+
gap: 10px;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
.brand span {
|
|
95
|
+
color: var(--accent-color);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
#metrics-summary {
|
|
99
|
+
font-size: 0.9rem;
|
|
100
|
+
color: #aaa;
|
|
101
|
+
display: flex;
|
|
102
|
+
gap: 20px;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
.metric {
|
|
106
|
+
display: flex;
|
|
107
|
+
flex-direction: column;
|
|
108
|
+
align-items: center;
|
|
109
|
+
line-height: 1.1;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
.metric-value {
|
|
113
|
+
font-weight: bold;
|
|
114
|
+
color: var(--text-color);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
.metric-label {
|
|
118
|
+
font-size: 0.7rem;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
#controls {
|
|
122
|
+
display: flex;
|
|
123
|
+
gap: 10px;
|
|
124
|
+
align-items: center;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
.btn-group {
|
|
128
|
+
display: flex;
|
|
129
|
+
background: #333;
|
|
130
|
+
border-radius: 4px;
|
|
131
|
+
overflow: hidden;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
button {
|
|
135
|
+
background: transparent;
|
|
136
|
+
color: #aaa;
|
|
137
|
+
border: none;
|
|
138
|
+
padding: 6px 12px;
|
|
139
|
+
cursor: pointer;
|
|
140
|
+
font-size: 0.85rem;
|
|
141
|
+
transition: all 0.2s;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
button:hover {
|
|
145
|
+
color: white;
|
|
146
|
+
background: rgba(255, 255, 255, 0.1);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
button.active {
|
|
150
|
+
background: var(--accent-color);
|
|
151
|
+
color: white;
|
|
152
|
+
}
|
|
39
153
|
|
|
40
154
|
/* Search */
|
|
41
|
-
#search-container {
|
|
42
|
-
|
|
43
|
-
|
|
155
|
+
#search-container {
|
|
156
|
+
position: absolute;
|
|
157
|
+
top: 15px;
|
|
158
|
+
left: 15px;
|
|
159
|
+
z-index: 5;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
#search-input {
|
|
163
|
+
background: rgba(30, 30, 30, 0.9);
|
|
164
|
+
border: 1px solid #444;
|
|
165
|
+
color: white;
|
|
166
|
+
padding: 8px 12px;
|
|
167
|
+
border-radius: 20px;
|
|
168
|
+
width: 200px;
|
|
169
|
+
outline: none;
|
|
170
|
+
transition: width 0.3s;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
#search-input:focus {
|
|
174
|
+
width: 280px;
|
|
175
|
+
border-color: var(--accent-color);
|
|
176
|
+
}
|
|
44
177
|
|
|
45
178
|
/* Graph */
|
|
46
|
-
svg {
|
|
47
|
-
|
|
48
|
-
|
|
179
|
+
svg {
|
|
180
|
+
width: 100%;
|
|
181
|
+
height: 100%;
|
|
182
|
+
display: block;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
.node {
|
|
186
|
+
cursor: pointer;
|
|
187
|
+
transition: stroke-width 0.1s;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
.link {
|
|
191
|
+
stroke: #555;
|
|
192
|
+
stroke-opacity: 0.3;
|
|
193
|
+
fill: none;
|
|
194
|
+
pointer-events: none;
|
|
195
|
+
}
|
|
49
196
|
|
|
50
197
|
/* Interaction States */
|
|
51
|
-
.node.highlight {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
198
|
+
.node.highlight {
|
|
199
|
+
stroke: #fff;
|
|
200
|
+
stroke-width: 2px;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
.link.highlight {
|
|
204
|
+
stroke-opacity: 0.8;
|
|
205
|
+
stroke: #999;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
.node.faded {
|
|
209
|
+
opacity: 0.1;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
.link.faded {
|
|
213
|
+
opacity: 0.05;
|
|
214
|
+
}
|
|
55
215
|
|
|
56
216
|
/* Details Panel Content */
|
|
57
|
-
.detail-section {
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
.detail-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
.
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
217
|
+
.detail-section {
|
|
218
|
+
border-bottom: 1px solid #333;
|
|
219
|
+
padding-bottom: 10px;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
.detail-section:last-child {
|
|
223
|
+
border-bottom: none;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
.detail-label {
|
|
227
|
+
font-size: 0.75rem;
|
|
228
|
+
color: #888;
|
|
229
|
+
text-transform: uppercase;
|
|
230
|
+
letter-spacing: 0.5px;
|
|
231
|
+
margin-bottom: 4px;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
.detail-value {
|
|
235
|
+
font-size: 0.95rem;
|
|
236
|
+
word-break: break-all;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
.detail-list {
|
|
240
|
+
list-style: none;
|
|
241
|
+
padding: 0;
|
|
242
|
+
margin: 0;
|
|
243
|
+
max-height: 150px;
|
|
244
|
+
overflow-y: auto;
|
|
245
|
+
font-size: 0.85rem;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
.detail-list li {
|
|
249
|
+
padding: 4px 0;
|
|
250
|
+
border-bottom: 1px solid #2a2a2a;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
.detail-list a {
|
|
254
|
+
color: var(--accent-color);
|
|
255
|
+
text-decoration: none;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
.detail-list a:hover {
|
|
259
|
+
text-decoration: underline;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
.status-badge {
|
|
263
|
+
display: inline-block;
|
|
264
|
+
padding: 2px 6px;
|
|
265
|
+
border-radius: 3px;
|
|
266
|
+
font-size: 0.75rem;
|
|
267
|
+
font-weight: bold;
|
|
268
|
+
margin-top: 5px;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
.status-ok {
|
|
272
|
+
background: #2e7d32;
|
|
273
|
+
color: white;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
.status-warn {
|
|
277
|
+
background: #f9a825;
|
|
278
|
+
color: black;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
.status-error {
|
|
282
|
+
background: #c62828;
|
|
283
|
+
color: white;
|
|
284
|
+
}
|
|
70
285
|
|
|
71
286
|
/* Tooltip */
|
|
72
|
-
#tooltip {
|
|
287
|
+
#tooltip {
|
|
288
|
+
position: absolute;
|
|
289
|
+
background: rgba(20, 20, 20, 0.95);
|
|
290
|
+
color: white;
|
|
291
|
+
padding: 10px;
|
|
292
|
+
border-radius: 6px;
|
|
293
|
+
pointer-events: none;
|
|
294
|
+
font-size: 12px;
|
|
295
|
+
z-index: 100;
|
|
296
|
+
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.5);
|
|
297
|
+
border: 1px solid #444;
|
|
298
|
+
display: none;
|
|
299
|
+
transform: translate(-50%, -100%);
|
|
300
|
+
margin-top: -10px;
|
|
301
|
+
white-space: nowrap;
|
|
302
|
+
}
|
|
73
303
|
|
|
74
304
|
/* Responsive Sidebar */
|
|
75
305
|
@media (max-width: 768px) {
|
|
76
|
-
#details-panel {
|
|
77
|
-
|
|
78
|
-
|
|
306
|
+
#details-panel {
|
|
307
|
+
position: absolute;
|
|
308
|
+
right: 0;
|
|
309
|
+
top: 0;
|
|
310
|
+
bottom: 0;
|
|
311
|
+
z-index: 20;
|
|
312
|
+
box-shadow: -5px 0 15px rgba(0, 0, 0, 0.5);
|
|
313
|
+
transform: translateX(100%);
|
|
314
|
+
transition: transform 0.3s ease;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
#details-panel.visible {
|
|
318
|
+
transform: translateX(0);
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
#metrics-summary {
|
|
322
|
+
display: none;
|
|
323
|
+
}
|
|
79
324
|
}
|
|
80
325
|
</style>
|
|
81
326
|
</head>
|
|
327
|
+
|
|
82
328
|
<body>
|
|
83
329
|
<header>
|
|
84
|
-
<div class="brand"><span>Crawlith</span>
|
|
330
|
+
<div class="brand"><span>Crawlith</span> Crawl</div>
|
|
85
331
|
|
|
86
332
|
<div id="metrics-summary">
|
|
87
333
|
<div class="metric"><span class="metric-value" id="m-pages">-</span><span class="metric-label">Pages</span></div>
|
|
88
|
-
<div class="metric"><span class="metric-value" id="m-depth">-</span><span class="metric-label">Max Depth</span
|
|
89
|
-
|
|
90
|
-
<div class="metric"><span class="metric-value" id="m-
|
|
334
|
+
<div class="metric"><span class="metric-value" id="m-depth">-</span><span class="metric-label">Max Depth</span>
|
|
335
|
+
</div>
|
|
336
|
+
<div class="metric"><span class="metric-value" id="m-eff">-</span><span class="metric-label">Efficiency</span>
|
|
337
|
+
</div>
|
|
338
|
+
<div class="metric"><span class="metric-value" id="m-orphan">-</span><span class="metric-label">Orphans</span>
|
|
339
|
+
</div>
|
|
91
340
|
</div>
|
|
92
341
|
|
|
93
342
|
<div id="controls">
|
|
@@ -129,7 +378,7 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
|
|
|
129
378
|
</div>
|
|
130
379
|
<div class="detail-section">
|
|
131
380
|
<div class="detail-label">In-links (<span id="d-in-count">0</span>)</div>
|
|
132
|
-
|
|
381
|
+
<!-- List could be populated here if we had the reverse index, for now just count -->
|
|
133
382
|
</div>
|
|
134
383
|
<div class="detail-section">
|
|
135
384
|
<div class="detail-label">Out-links (<span id="d-out-count">0</span>)</div>
|
|
@@ -187,22 +436,22 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
|
|
|
187
436
|
|
|
188
437
|
// 2. Fallback to fetching JSON files (for web server usage)
|
|
189
438
|
if (!graphData || !metricsData) {
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
}
|
|
199
|
-
} catch (e) {
|
|
200
|
-
console.warn("Fetch failed, possibly due to CORS or missing files.", e);
|
|
439
|
+
try {
|
|
440
|
+
const [graphRes, metricsRes] = await Promise.all([
|
|
441
|
+
fetch('graph.json'),
|
|
442
|
+
fetch('metrics.json')
|
|
443
|
+
]);
|
|
444
|
+
if (graphRes.ok && metricsRes.ok) {
|
|
445
|
+
graphData = await graphRes.json();
|
|
446
|
+
metricsData = await metricsRes.json();
|
|
201
447
|
}
|
|
448
|
+
} catch (e) {
|
|
449
|
+
console.warn("Fetch failed, possibly due to CORS or missing files.", e);
|
|
450
|
+
}
|
|
202
451
|
}
|
|
203
452
|
|
|
204
453
|
if (!graphData || !metricsData) {
|
|
205
|
-
|
|
454
|
+
throw new Error("No data available. Ensure graph.json exists or data is injected.");
|
|
206
455
|
}
|
|
207
456
|
|
|
208
457
|
state.metrics = metricsData;
|
|
@@ -323,12 +572,12 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
|
|
|
323
572
|
const depthSpacing = height / (state.maxDepth + 2);
|
|
324
573
|
// Hierarchical: Nodes pushed to Y levels based on depth
|
|
325
574
|
state.simulation.force("y", d3.forceY(d => {
|
|
326
|
-
|
|
575
|
+
return (d.depth * depthSpacing) - (height / 2) + 50; // Offset to start from top
|
|
327
576
|
}).strength(1));
|
|
328
577
|
// We rely on "center" force to keep X centered, but maybe add weak forceX?
|
|
329
578
|
// Let's add weak forceX to prevent wide spread
|
|
330
579
|
state.simulation.force("x", d3.forceX(0).strength(0.05));
|
|
331
|
-
state.simulation.force("center", d3.forceCenter(width/2, height/2)); // Recenter
|
|
580
|
+
state.simulation.force("center", d3.forceCenter(width / 2, height / 2)); // Recenter
|
|
332
581
|
|
|
333
582
|
} else if (mode === 'radial') {
|
|
334
583
|
const maxRadius = Math.min(width, height) / 2 - 50;
|
|
@@ -401,7 +650,7 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
|
|
|
401
650
|
});
|
|
402
651
|
|
|
403
652
|
svg.call(state.zoom)
|
|
404
|
-
|
|
653
|
+
.call(state.zoom.transform, d3.zoomIdentity.translate(state.width / 2, state.height / 2).scale(0.8).translate(-state.width / 2, -state.height / 2)); // Initial zoom out
|
|
405
654
|
}
|
|
406
655
|
|
|
407
656
|
function ticked() {
|
|
@@ -431,18 +680,18 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
|
|
|
431
680
|
});
|
|
432
681
|
|
|
433
682
|
// Layout Toggle
|
|
434
|
-
d3.select("#btn-hierarchical").on("click", function() {
|
|
683
|
+
d3.select("#btn-hierarchical").on("click", function () {
|
|
435
684
|
setMode('hierarchical', this);
|
|
436
685
|
});
|
|
437
|
-
d3.select("#btn-radial").on("click", function() {
|
|
686
|
+
d3.select("#btn-radial").on("click", function () {
|
|
438
687
|
setMode('radial', this);
|
|
439
688
|
});
|
|
440
689
|
|
|
441
690
|
// Authority Toggle
|
|
442
|
-
d3.select("#btn-auth-pagerank").on("click", function() {
|
|
691
|
+
d3.select("#btn-auth-pagerank").on("click", function () {
|
|
443
692
|
setAuthorityMode('pagerank', this);
|
|
444
693
|
});
|
|
445
|
-
d3.select("#btn-auth-structural").on("click", function() {
|
|
694
|
+
d3.select("#btn-auth-structural").on("click", function () {
|
|
446
695
|
setAuthorityMode('structural', this);
|
|
447
696
|
});
|
|
448
697
|
}
|
|
@@ -508,7 +757,7 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
|
|
|
508
757
|
function showTooltip(event, d) {
|
|
509
758
|
// If we are transforming the container, we need to map coordinates correctly or just use pageX/Y
|
|
510
759
|
tooltip.style("display", "block")
|
|
511
|
-
.html(
|
|
760
|
+
.html(`<strong>${new URL(d.url).pathname}</strong><br>Auth: ${(d.authority * 10).toFixed(1)}`)
|
|
512
761
|
.style("left", (event.pageX) + "px")
|
|
513
762
|
.style("top", (event.pageY - 10) + "px");
|
|
514
763
|
}
|
|
@@ -526,8 +775,8 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
|
|
|
526
775
|
authContainer.html("");
|
|
527
776
|
const prVal = (d.pageRankAuthority * 100).toFixed(1);
|
|
528
777
|
const structVal = d.structuralAuthority.toFixed(3);
|
|
529
|
-
authContainer.append("div").html(
|
|
530
|
-
authContainer.append("div").style("color", "#888").style("font-size", "0.8em").text(
|
|
778
|
+
authContainer.append("div").html(`PR: <strong>${prVal}</strong>`);
|
|
779
|
+
authContainer.append("div").style("color", "#888").style("font-size", "0.8em").text(`In-Degree: ${structVal}`);
|
|
531
780
|
|
|
532
781
|
d3.select("#d-in-count").text(d.inLinks);
|
|
533
782
|
d3.select("#d-out-count").text(d.outLinks);
|
|
@@ -552,7 +801,7 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
|
|
|
552
801
|
.text(new URL(target.url).pathname);
|
|
553
802
|
});
|
|
554
803
|
if (adj.out.length > 50) {
|
|
555
|
-
|
|
804
|
+
list.append("li").text(`...and ${adj.out.length - 50} more`);
|
|
556
805
|
}
|
|
557
806
|
} else {
|
|
558
807
|
list.append("li").text("No outgoing links");
|
|
@@ -572,7 +821,7 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
|
|
|
572
821
|
selectNode(found);
|
|
573
822
|
// Center view on node
|
|
574
823
|
const transform = d3.zoomIdentity
|
|
575
|
-
.translate(state.width/2, state.height/2)
|
|
824
|
+
.translate(state.width / 2, state.height / 2)
|
|
576
825
|
.scale(2)
|
|
577
826
|
.translate(-found.x, -found.y);
|
|
578
827
|
|
|
@@ -589,7 +838,7 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
|
|
|
589
838
|
state.height = height;
|
|
590
839
|
state.simulation.force("center", d3.forceCenter(width / 2, height / 2));
|
|
591
840
|
if (state.mode === 'hierarchical') {
|
|
592
|
-
|
|
841
|
+
// Re-evaluate Y force if needed, but usually center is enough
|
|
593
842
|
}
|
|
594
843
|
state.simulation.alpha(0.3).restart();
|
|
595
844
|
});
|
|
@@ -620,11 +869,11 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
|
|
|
620
869
|
|
|
621
870
|
// Start
|
|
622
871
|
if (document.readyState === 'loading') {
|
|
623
|
-
|
|
872
|
+
document.addEventListener('DOMContentLoaded', init);
|
|
624
873
|
} else {
|
|
625
|
-
|
|
874
|
+
init();
|
|
626
875
|
}
|
|
627
876
|
</script>
|
|
628
877
|
</body>
|
|
629
|
-
|
|
630
|
-
|
|
878
|
+
|
|
879
|
+
</html>
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export function
|
|
1
|
+
export function renderCrawlCsvNodes(graphData: any): string {
|
|
2
2
|
const nodeHeaders = ['URL', 'Depth', 'Status', 'InboundLinks', 'OutboundLinks', 'PageRankScore'];
|
|
3
3
|
const nodeRows = graphData.nodes.map((n: any) => {
|
|
4
4
|
const outbound = graphData.edges.filter((e: any) => e.source === n.url).length;
|
|
@@ -9,13 +9,13 @@ export function renderSitegraphCsvNodes(graphData: any): string {
|
|
|
9
9
|
return [nodeHeaders.join(','), ...nodeRows].join('\n');
|
|
10
10
|
}
|
|
11
11
|
|
|
12
|
-
export function
|
|
12
|
+
export function renderCrawlCsvEdges(graphData: any): string {
|
|
13
13
|
const edgeHeaders = ['Source', 'Target', 'Weight'];
|
|
14
14
|
const edgeRows = graphData.edges.map((e: any) => [e.source, e.target, e.weight].join(','));
|
|
15
15
|
return [edgeHeaders.join(','), ...edgeRows].join('\n');
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
export function
|
|
18
|
+
export function renderCrawlMarkdown(url: string, graphData: any, metrics: any, graph: any): string {
|
|
19
19
|
const md = [
|
|
20
20
|
`# Crawlith Crawl Summary - ${url}`,
|
|
21
21
|
'',
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { fileURLToPath } from 'node:url';
|
|
4
|
+
|
|
5
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
6
|
+
const __dirname = path.dirname(__filename);
|
|
7
|
+
const templatePath = path.join(__dirname, 'crawl.html');
|
|
8
|
+
|
|
9
|
+
export const Crawl_HTML = fs.readFileSync(templatePath, 'utf-8');
|