@crawlith/core 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +70 -0
  3. package/dist/analysis/analyze.d.ts +29 -8
  4. package/dist/analysis/analyze.js +325 -221
  5. package/dist/analysis/clustering.d.ts +23 -0
  6. package/dist/analysis/clustering.js +206 -0
  7. package/dist/analysis/content.d.ts +1 -1
  8. package/dist/analysis/content.js +11 -5
  9. package/dist/analysis/duplicate.d.ts +34 -0
  10. package/dist/analysis/duplicate.js +305 -0
  11. package/dist/analysis/heading.d.ts +116 -0
  12. package/dist/analysis/heading.js +356 -0
  13. package/dist/analysis/images.d.ts +1 -1
  14. package/dist/analysis/images.js +6 -5
  15. package/dist/analysis/links.d.ts +1 -1
  16. package/dist/analysis/links.js +8 -8
  17. package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
  18. package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
  19. package/dist/analysis/scoring.js +4 -1
  20. package/dist/analysis/seo.d.ts +8 -4
  21. package/dist/analysis/seo.js +41 -30
  22. package/dist/analysis/soft404.d.ts +17 -0
  23. package/dist/analysis/soft404.js +62 -0
  24. package/dist/analysis/structuredData.d.ts +1 -1
  25. package/dist/analysis/structuredData.js +5 -4
  26. package/dist/application/index.d.ts +2 -0
  27. package/dist/application/index.js +2 -0
  28. package/dist/application/usecase.d.ts +3 -0
  29. package/dist/application/usecase.js +1 -0
  30. package/dist/application/usecases.d.ts +114 -0
  31. package/dist/application/usecases.js +201 -0
  32. package/dist/audit/index.js +1 -1
  33. package/dist/audit/transport.d.ts +1 -1
  34. package/dist/audit/transport.js +5 -4
  35. package/dist/audit/types.d.ts +1 -0
  36. package/dist/constants.d.ts +17 -0
  37. package/dist/constants.js +23 -0
  38. package/dist/core/scope/scopeManager.js +3 -0
  39. package/dist/crawler/crawl.d.ts +2 -2
  40. package/dist/crawler/crawler.d.ts +17 -5
  41. package/dist/crawler/crawler.js +259 -94
  42. package/dist/crawler/fetcher.d.ts +1 -1
  43. package/dist/crawler/fetcher.js +6 -6
  44. package/dist/crawler/metricsRunner.d.ts +21 -1
  45. package/dist/crawler/metricsRunner.js +181 -60
  46. package/dist/crawler/normalize.d.ts +41 -0
  47. package/dist/crawler/normalize.js +119 -3
  48. package/dist/crawler/parser.d.ts +1 -3
  49. package/dist/crawler/parser.js +2 -49
  50. package/dist/crawler/resolver.d.ts +11 -0
  51. package/dist/crawler/resolver.js +67 -0
  52. package/dist/crawler/sitemap.d.ts +4 -1
  53. package/dist/crawler/sitemap.js +24 -18
  54. package/dist/crawler/trap.d.ts +5 -1
  55. package/dist/crawler/trap.js +23 -2
  56. package/dist/db/CrawlithDB.d.ts +110 -0
  57. package/dist/db/CrawlithDB.js +500 -0
  58. package/dist/db/graphLoader.js +15 -32
  59. package/dist/db/index.d.ts +9 -1
  60. package/dist/db/index.js +39 -31
  61. package/dist/db/migrations.d.ts +2 -0
  62. package/dist/db/{schema.js → migrations.js} +90 -43
  63. package/dist/db/pluginRegistry.d.ts +9 -0
  64. package/dist/db/pluginRegistry.js +19 -0
  65. package/dist/db/repositories/EdgeRepository.d.ts +5 -0
  66. package/dist/db/repositories/EdgeRepository.js +7 -0
  67. package/dist/db/repositories/MetricsRepository.d.ts +13 -8
  68. package/dist/db/repositories/MetricsRepository.js +14 -6
  69. package/dist/db/repositories/PageRepository.d.ts +5 -3
  70. package/dist/db/repositories/PageRepository.js +68 -17
  71. package/dist/db/repositories/SiteRepository.d.ts +6 -0
  72. package/dist/db/repositories/SiteRepository.js +4 -0
  73. package/dist/db/repositories/SnapshotRepository.d.ts +12 -5
  74. package/dist/db/repositories/SnapshotRepository.js +48 -10
  75. package/dist/db/reset.d.ts +9 -0
  76. package/dist/db/reset.js +32 -0
  77. package/dist/db/statements.d.ts +12 -0
  78. package/dist/db/statements.js +40 -0
  79. package/dist/diff/compare.d.ts +0 -5
  80. package/dist/diff/compare.js +0 -12
  81. package/dist/diff/service.d.ts +16 -0
  82. package/dist/diff/service.js +41 -0
  83. package/dist/domain/index.d.ts +4 -0
  84. package/dist/domain/index.js +4 -0
  85. package/dist/events.d.ts +8 -0
  86. package/dist/graph/graph.d.ts +20 -42
  87. package/dist/graph/graph.js +12 -16
  88. package/dist/graph/hits.d.ts +23 -0
  89. package/dist/graph/hits.js +111 -0
  90. package/dist/graph/metrics.d.ts +0 -4
  91. package/dist/graph/metrics.js +19 -15
  92. package/dist/graph/pagerank.d.ts +17 -4
  93. package/dist/graph/pagerank.js +126 -93
  94. package/dist/index.d.ts +27 -9
  95. package/dist/index.js +27 -9
  96. package/dist/lock/lockManager.d.ts +1 -0
  97. package/dist/lock/lockManager.js +15 -0
  98. package/dist/plugin-system/plugin-cli.d.ts +10 -0
  99. package/dist/plugin-system/plugin-cli.js +31 -0
  100. package/dist/plugin-system/plugin-config.d.ts +16 -0
  101. package/dist/plugin-system/plugin-config.js +36 -0
  102. package/dist/plugin-system/plugin-loader.d.ts +17 -0
  103. package/dist/plugin-system/plugin-loader.js +122 -0
  104. package/dist/plugin-system/plugin-registry.d.ts +25 -0
  105. package/dist/plugin-system/plugin-registry.js +167 -0
  106. package/dist/plugin-system/plugin-types.d.ts +205 -0
  107. package/dist/plugin-system/plugin-types.js +1 -0
  108. package/dist/ports/index.d.ts +9 -0
  109. package/dist/ports/index.js +1 -0
  110. package/dist/report/export.d.ts +3 -0
  111. package/dist/report/export.js +81 -0
  112. package/dist/report/insight.d.ts +27 -0
  113. package/dist/report/insight.js +103 -0
  114. package/dist/scoring/health.d.ts +17 -11
  115. package/dist/scoring/health.js +183 -140
  116. package/dist/utils/chalk.d.ts +6 -0
  117. package/dist/utils/chalk.js +41 -0
  118. package/dist/utils/secureConfig.d.ts +23 -0
  119. package/dist/utils/secureConfig.js +128 -0
  120. package/package.json +10 -4
  121. package/CHANGELOG.md +0 -13
  122. package/dist/db/schema.d.ts +0 -2
  123. package/dist/graph/cluster.d.ts +0 -6
  124. package/dist/graph/cluster.js +0 -221
  125. package/dist/graph/duplicate.d.ts +0 -10
  126. package/dist/graph/duplicate.js +0 -302
  127. package/dist/scoring/hits.d.ts +0 -10
  128. package/dist/scoring/hits.js +0 -131
  129. package/scripts/copy-assets.js +0 -37
  130. package/src/analysis/analysis_list.html +0 -35
  131. package/src/analysis/analysis_page.html +0 -123
  132. package/src/analysis/analyze.ts +0 -505
  133. package/src/analysis/content.ts +0 -62
  134. package/src/analysis/images.ts +0 -28
  135. package/src/analysis/links.ts +0 -41
  136. package/src/analysis/scoring.ts +0 -66
  137. package/src/analysis/seo.ts +0 -82
  138. package/src/analysis/structuredData.ts +0 -62
  139. package/src/analysis/templates.ts +0 -9
  140. package/src/audit/dns.ts +0 -49
  141. package/src/audit/headers.ts +0 -98
  142. package/src/audit/index.ts +0 -66
  143. package/src/audit/scoring.ts +0 -232
  144. package/src/audit/transport.ts +0 -258
  145. package/src/audit/types.ts +0 -102
  146. package/src/core/network/proxyAdapter.ts +0 -21
  147. package/src/core/network/rateLimiter.ts +0 -39
  148. package/src/core/network/redirectController.ts +0 -47
  149. package/src/core/network/responseLimiter.ts +0 -34
  150. package/src/core/network/retryPolicy.ts +0 -57
  151. package/src/core/scope/domainFilter.ts +0 -45
  152. package/src/core/scope/scopeManager.ts +0 -52
  153. package/src/core/scope/subdomainPolicy.ts +0 -39
  154. package/src/core/security/ipGuard.ts +0 -171
  155. package/src/crawler/crawl.ts +0 -9
  156. package/src/crawler/crawler.ts +0 -601
  157. package/src/crawler/extract.ts +0 -39
  158. package/src/crawler/fetcher.ts +0 -251
  159. package/src/crawler/metricsRunner.ts +0 -137
  160. package/src/crawler/normalize.ts +0 -108
  161. package/src/crawler/parser.ts +0 -190
  162. package/src/crawler/sitemap.ts +0 -76
  163. package/src/crawler/trap.ts +0 -96
  164. package/src/db/graphLoader.ts +0 -135
  165. package/src/db/index.ts +0 -75
  166. package/src/db/repositories/EdgeRepository.ts +0 -43
  167. package/src/db/repositories/MetricsRepository.ts +0 -63
  168. package/src/db/repositories/PageRepository.ts +0 -228
  169. package/src/db/repositories/SiteRepository.ts +0 -43
  170. package/src/db/repositories/SnapshotRepository.ts +0 -99
  171. package/src/db/schema.ts +0 -177
  172. package/src/diff/compare.ts +0 -84
  173. package/src/events.ts +0 -16
  174. package/src/graph/cluster.ts +0 -246
  175. package/src/graph/duplicate.ts +0 -350
  176. package/src/graph/graph.ts +0 -192
  177. package/src/graph/metrics.ts +0 -125
  178. package/src/graph/pagerank.ts +0 -126
  179. package/src/graph/simhash.ts +0 -76
  180. package/src/index.ts +0 -33
  181. package/src/lock/hashKey.ts +0 -51
  182. package/src/lock/lockManager.ts +0 -132
  183. package/src/lock/pidCheck.ts +0 -13
  184. package/src/report/crawl.html +0 -879
  185. package/src/report/crawlExport.ts +0 -58
  186. package/src/report/crawl_template.ts +0 -9
  187. package/src/report/html.ts +0 -27
  188. package/src/scoring/health.ts +0 -241
  189. package/src/scoring/hits.ts +0 -153
  190. package/src/scoring/orphanSeverity.ts +0 -176
  191. package/src/utils/version.ts +0 -18
  192. package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
  193. package/tests/analysis.unit.test.ts +0 -142
  194. package/tests/analyze.integration.test.ts +0 -133
  195. package/tests/analyze_markdown.test.ts +0 -98
  196. package/tests/audit/audit.test.ts +0 -101
  197. package/tests/audit/dns.test.ts +0 -31
  198. package/tests/audit/headers.test.ts +0 -45
  199. package/tests/audit/scoring.test.ts +0 -133
  200. package/tests/audit/security.test.ts +0 -12
  201. package/tests/audit/transport.test.ts +0 -111
  202. package/tests/clustering.test.ts +0 -118
  203. package/tests/clustering_risk.test.ts +0 -118
  204. package/tests/crawler.test.ts +0 -364
  205. package/tests/db/index.test.ts +0 -134
  206. package/tests/db/repositories.test.ts +0 -115
  207. package/tests/db.test.ts +0 -159
  208. package/tests/db_repos.test.ts +0 -72
  209. package/tests/diff.test.ts +0 -67
  210. package/tests/duplicate.test.ts +0 -110
  211. package/tests/extract.test.ts +0 -86
  212. package/tests/fetcher.test.ts +0 -110
  213. package/tests/fetcher_safety.test.ts +0 -91
  214. package/tests/fixtures/analyze-crawl.json +0 -26
  215. package/tests/graph/graph.test.ts +0 -100
  216. package/tests/graphLoader.test.ts +0 -124
  217. package/tests/hits.test.ts +0 -134
  218. package/tests/html_report.test.ts +0 -59
  219. package/tests/ipGuard.test.ts +0 -73
  220. package/tests/lock/lockManager.test.ts +0 -198
  221. package/tests/metrics.test.ts +0 -196
  222. package/tests/normalize.test.ts +0 -88
  223. package/tests/orphanSeverity.test.ts +0 -160
  224. package/tests/pagerank.test.ts +0 -98
  225. package/tests/parser.test.ts +0 -117
  226. package/tests/proxy_safety.test.ts +0 -57
  227. package/tests/redirect_safety.test.ts +0 -77
  228. package/tests/renderAnalysisCsv.test.ts +0 -183
  229. package/tests/safety.test.ts +0 -126
  230. package/tests/scope.test.ts +0 -84
  231. package/tests/scoring.test.ts +0 -60
  232. package/tests/sitemap.test.ts +0 -100
  233. package/tests/soft404.test.ts +0 -41
  234. package/tests/ssrf_fix.test.ts +0 -69
  235. package/tests/trap.test.ts +0 -39
  236. package/tests/visualization_data.test.ts +0 -46
  237. package/tsconfig.json +0 -11
@@ -1,879 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
-
4
- <head>
5
- <meta charset="UTF-8">
6
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
- <title>Crawlith Site Graph</title>
8
- <style>
9
- :root {
10
- --bg-color: #121212;
11
- --text-color: #e0e0e0;
12
- --panel-bg: #1e1e1e;
13
- --border-color: #333;
14
- --accent-color: #4a90e2;
15
- --sidebar-width: 300px;
16
- }
17
-
18
- body {
19
- margin: 0;
20
- font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
21
- background: var(--bg-color);
22
- color: var(--text-color);
23
- height: 100vh;
24
- display: flex;
25
- flex-direction: column;
26
- overflow: hidden;
27
- }
28
-
29
- /* Layout */
30
- header {
31
- padding: 0 20px;
32
- background: var(--panel-bg);
33
- border-bottom: 1px solid var(--border-color);
34
- display: flex;
35
- justify-content: space-between;
36
- align-items: center;
37
- height: 60px;
38
- box-sizing: border-box;
39
- z-index: 10;
40
- }
41
-
42
- main {
43
- flex: 1;
44
- display: flex;
45
- overflow: hidden;
46
- position: relative;
47
- }
48
-
49
- #graph-container {
50
- flex: 1;
51
- position: relative;
52
- overflow: hidden;
53
- background: var(--bg-color);
54
- }
55
-
56
- #details-panel {
57
- width: var(--sidebar-width);
58
- background: var(--panel-bg);
59
- border-left: 1px solid var(--border-color);
60
- padding: 20px;
61
- overflow-y: auto;
62
- box-sizing: border-box;
63
- display: none;
64
- flex-direction: column;
65
- gap: 15px;
66
- }
67
-
68
- #details-panel.visible {
69
- display: flex;
70
- }
71
-
72
- footer {
73
- padding: 5px 20px;
74
- background: var(--panel-bg);
75
- border-top: 1px solid var(--border-color);
76
- font-size: 0.8rem;
77
- text-align: center;
78
- color: #666;
79
- height: 30px;
80
- display: flex;
81
- align-items: center;
82
- justify-content: center;
83
- }
84
-
85
- /* Header Components */
86
- .brand {
87
- font-weight: bold;
88
- font-size: 1.2rem;
89
- display: flex;
90
- align-items: center;
91
- gap: 10px;
92
- }
93
-
94
- .brand span {
95
- color: var(--accent-color);
96
- }
97
-
98
- #metrics-summary {
99
- font-size: 0.9rem;
100
- color: #aaa;
101
- display: flex;
102
- gap: 20px;
103
- }
104
-
105
- .metric {
106
- display: flex;
107
- flex-direction: column;
108
- align-items: center;
109
- line-height: 1.1;
110
- }
111
-
112
- .metric-value {
113
- font-weight: bold;
114
- color: var(--text-color);
115
- }
116
-
117
- .metric-label {
118
- font-size: 0.7rem;
119
- }
120
-
121
- #controls {
122
- display: flex;
123
- gap: 10px;
124
- align-items: center;
125
- }
126
-
127
- .btn-group {
128
- display: flex;
129
- background: #333;
130
- border-radius: 4px;
131
- overflow: hidden;
132
- }
133
-
134
- button {
135
- background: transparent;
136
- color: #aaa;
137
- border: none;
138
- padding: 6px 12px;
139
- cursor: pointer;
140
- font-size: 0.85rem;
141
- transition: all 0.2s;
142
- }
143
-
144
- button:hover {
145
- color: white;
146
- background: rgba(255, 255, 255, 0.1);
147
- }
148
-
149
- button.active {
150
- background: var(--accent-color);
151
- color: white;
152
- }
153
-
154
- /* Search */
155
- #search-container {
156
- position: absolute;
157
- top: 15px;
158
- left: 15px;
159
- z-index: 5;
160
- }
161
-
162
- #search-input {
163
- background: rgba(30, 30, 30, 0.9);
164
- border: 1px solid #444;
165
- color: white;
166
- padding: 8px 12px;
167
- border-radius: 20px;
168
- width: 200px;
169
- outline: none;
170
- transition: width 0.3s;
171
- }
172
-
173
- #search-input:focus {
174
- width: 280px;
175
- border-color: var(--accent-color);
176
- }
177
-
178
- /* Graph */
179
- svg {
180
- width: 100%;
181
- height: 100%;
182
- display: block;
183
- }
184
-
185
- .node {
186
- cursor: pointer;
187
- transition: stroke-width 0.1s;
188
- }
189
-
190
- .link {
191
- stroke: #555;
192
- stroke-opacity: 0.3;
193
- fill: none;
194
- pointer-events: none;
195
- }
196
-
197
- /* Interaction States */
198
- .node.highlight {
199
- stroke: #fff;
200
- stroke-width: 2px;
201
- }
202
-
203
- .link.highlight {
204
- stroke-opacity: 0.8;
205
- stroke: #999;
206
- }
207
-
208
- .node.faded {
209
- opacity: 0.1;
210
- }
211
-
212
- .link.faded {
213
- opacity: 0.05;
214
- }
215
-
216
- /* Details Panel Content */
217
- .detail-section {
218
- border-bottom: 1px solid #333;
219
- padding-bottom: 10px;
220
- }
221
-
222
- .detail-section:last-child {
223
- border-bottom: none;
224
- }
225
-
226
- .detail-label {
227
- font-size: 0.75rem;
228
- color: #888;
229
- text-transform: uppercase;
230
- letter-spacing: 0.5px;
231
- margin-bottom: 4px;
232
- }
233
-
234
- .detail-value {
235
- font-size: 0.95rem;
236
- word-break: break-all;
237
- }
238
-
239
- .detail-list {
240
- list-style: none;
241
- padding: 0;
242
- margin: 0;
243
- max-height: 150px;
244
- overflow-y: auto;
245
- font-size: 0.85rem;
246
- }
247
-
248
- .detail-list li {
249
- padding: 4px 0;
250
- border-bottom: 1px solid #2a2a2a;
251
- }
252
-
253
- .detail-list a {
254
- color: var(--accent-color);
255
- text-decoration: none;
256
- }
257
-
258
- .detail-list a:hover {
259
- text-decoration: underline;
260
- }
261
-
262
- .status-badge {
263
- display: inline-block;
264
- padding: 2px 6px;
265
- border-radius: 3px;
266
- font-size: 0.75rem;
267
- font-weight: bold;
268
- margin-top: 5px;
269
- }
270
-
271
- .status-ok {
272
- background: #2e7d32;
273
- color: white;
274
- }
275
-
276
- .status-warn {
277
- background: #f9a825;
278
- color: black;
279
- }
280
-
281
- .status-error {
282
- background: #c62828;
283
- color: white;
284
- }
285
-
286
- /* Tooltip */
287
- #tooltip {
288
- position: absolute;
289
- background: rgba(20, 20, 20, 0.95);
290
- color: white;
291
- padding: 10px;
292
- border-radius: 6px;
293
- pointer-events: none;
294
- font-size: 12px;
295
- z-index: 100;
296
- box-shadow: 0 4px 15px rgba(0, 0, 0, 0.5);
297
- border: 1px solid #444;
298
- display: none;
299
- transform: translate(-50%, -100%);
300
- margin-top: -10px;
301
- white-space: nowrap;
302
- }
303
-
304
- /* Responsive Sidebar */
305
- @media (max-width: 768px) {
306
- #details-panel {
307
- position: absolute;
308
- right: 0;
309
- top: 0;
310
- bottom: 0;
311
- z-index: 20;
312
- box-shadow: -5px 0 15px rgba(0, 0, 0, 0.5);
313
- transform: translateX(100%);
314
- transition: transform 0.3s ease;
315
- }
316
-
317
- #details-panel.visible {
318
- transform: translateX(0);
319
- }
320
-
321
- #metrics-summary {
322
- display: none;
323
- }
324
- }
325
- </style>
326
- </head>
327
-
328
- <body>
329
- <header>
330
- <div class="brand"><span>Crawlith</span> Crawl</div>
331
-
332
- <div id="metrics-summary">
333
- <div class="metric"><span class="metric-value" id="m-pages">-</span><span class="metric-label">Pages</span></div>
334
- <div class="metric"><span class="metric-value" id="m-depth">-</span><span class="metric-label">Max Depth</span>
335
- </div>
336
- <div class="metric"><span class="metric-value" id="m-eff">-</span><span class="metric-label">Efficiency</span>
337
- </div>
338
- <div class="metric"><span class="metric-value" id="m-orphan">-</span><span class="metric-label">Orphans</span>
339
- </div>
340
- </div>
341
-
342
- <div id="controls">
343
- <div class="btn-group" style="margin-right: 15px;">
344
- <button id="btn-auth-pagerank" class="active" title="PageRank Authority">PageRank</button>
345
- <button id="btn-auth-structural" title="Structural Authority (In-Degree)">In-Degree</button>
346
- </div>
347
- <div class="btn-group">
348
- <button id="btn-hierarchical" class="active">Hierarchical</button>
349
- <button id="btn-radial">Radial</button>
350
- </div>
351
- </div>
352
- </header>
353
-
354
- <main>
355
- <div id="graph-container">
356
- <div id="search-container">
357
- <input type="text" id="search-input" placeholder="Search URL...">
358
- </div>
359
- <svg id="graph"></svg>
360
- <div id="tooltip"></div>
361
- </div>
362
-
363
- <aside id="details-panel">
364
- <div class="detail-section">
365
- <div class="detail-label">URL</div>
366
- <div class="detail-value" id="d-url">-</div>
367
- <div id="d-status"></div>
368
- </div>
369
- <div class="detail-section" style="display: flex; gap: 20px;">
370
- <div>
371
- <div class="detail-label">Depth</div>
372
- <div class="detail-value" id="d-depth">-</div>
373
- </div>
374
- <div>
375
- <div class="detail-label">Authority</div>
376
- <div class="detail-value" id="d-auth-container">-</div>
377
- </div>
378
- </div>
379
- <div class="detail-section">
380
- <div class="detail-label">In-links (<span id="d-in-count">0</span>)</div>
381
- <!-- List could be populated here if we had the reverse index, for now just count -->
382
- </div>
383
- <div class="detail-section">
384
- <div class="detail-label">Out-links (<span id="d-out-count">0</span>)</div>
385
- <ul class="detail-list" id="d-out-list"></ul>
386
- </div>
387
- </aside>
388
- </main>
389
-
390
- <footer>
391
- Generated by Crawlith Crawler
392
- </footer>
393
-
394
- <!-- D3 from CDN -->
395
- <script src="https://d3js.org/d3.v7.min.js"></script>
396
-
397
- <script>
398
- // --- State ---
399
- const state = {
400
- nodes: [],
401
- links: [],
402
- metrics: {},
403
- adjacency: new Map(), // url -> { in: [], out: [] }
404
- simulation: null,
405
- width: 0,
406
- height: 0,
407
- transform: d3.zoomIdentity,
408
- activeNode: null,
409
- mode: 'hierarchical', // 'hierarchical' | 'radial'
410
- maxDepth: 0,
411
- maxInLinks: 0,
412
- nodeSelection: null,
413
- linkSelection: null,
414
- zoom: null
415
- };
416
-
417
- // --- DOM Elements ---
418
- const svg = d3.select("#graph");
419
- const container = svg.append("g");
420
- const linkGroup = container.append("g").attr("class", "links");
421
- const nodeGroup = container.append("g").attr("class", "nodes");
422
- const tooltip = d3.select("#tooltip");
423
- const detailsPanel = d3.select("#details-panel");
424
-
425
- // --- Initialization ---
426
- // --- Initialization ---
427
- async function init() {
428
- try {
429
- let graphData, metricsData;
430
-
431
- // 1. Try to use injected data (for file:// usage)
432
- // @ts-ignore
433
- if (window.GRAPH_DATA) graphData = window.GRAPH_DATA;
434
- // @ts-ignore
435
- if (window.METRICS_DATA) metricsData = window.METRICS_DATA;
436
-
437
- // 2. Fallback to fetching JSON files (for web server usage)
438
- if (!graphData || !metricsData) {
439
- try {
440
- const [graphRes, metricsRes] = await Promise.all([
441
- fetch('graph.json'),
442
- fetch('metrics.json')
443
- ]);
444
- if (graphRes.ok && metricsRes.ok) {
445
- graphData = await graphRes.json();
446
- metricsData = await metricsRes.json();
447
- }
448
- } catch (e) {
449
- console.warn("Fetch failed, possibly due to CORS or missing files.", e);
450
- }
451
- }
452
-
453
- if (!graphData || !metricsData) {
454
- throw new Error("No data available. Ensure graph.json exists or data is injected.");
455
- }
456
-
457
- state.metrics = metricsData;
458
- processData(graphData);
459
- updateMetricsUI();
460
-
461
- // Setup UI
462
- setupResize();
463
- setupInteractions();
464
- setupSearch();
465
-
466
- // Start Simulation
467
- initSimulation();
468
-
469
- } catch (err) {
470
- console.error(err);
471
- alert("Error loading visualization data: " + err.message);
472
- }
473
- }
474
-
475
- function processData(data) {
476
- // Create a map for fast lookup
477
- const nodeMap = new Map();
478
-
479
- data.nodes.forEach(n => {
480
- n.inLinks = n.inLinks || 0;
481
- n.outLinks = n.outLinks || 0;
482
- nodeMap.set(n.url, n);
483
- });
484
-
485
- // Filter valid links
486
- state.links = data.edges
487
- .map(e => ({ source: nodeMap.get(e.source), target: nodeMap.get(e.target) }))
488
- .filter(e => e.source && e.target);
489
-
490
- state.nodes = data.nodes;
491
-
492
- // Calculate Stats
493
- state.maxDepth = d3.max(state.nodes, d => d.depth) || 1;
494
- state.maxInLinks = d3.max(state.nodes, d => d.inLinks) || 1;
495
-
496
- // Calculate Authority & Enrich Nodes
497
- state.nodes.forEach(n => {
498
- // Structural Authority: log-scaled normalized 0-1 based on in-links
499
- n.structuralAuthority = Math.log(1 + n.inLinks) / Math.log(1 + state.maxInLinks);
500
-
501
- // PageRank Authority: normalized 0-1 from pageRankScore (0-100)
502
- if (typeof n.pageRankScore === 'number') {
503
- n.pageRankAuthority = n.pageRankScore / 100;
504
- } else {
505
- n.pageRankAuthority = n.structuralAuthority;
506
- }
507
-
508
- // Default authority to PageRank if available, else structural
509
- n.authority = n.pageRankAuthority;
510
-
511
- // Ensure x,y are initialized to avoid NaNs if D3 doesn't do it fast enough
512
- n.x = 0; n.y = 0;
513
- });
514
-
515
- // Build Adjacency Map
516
- state.nodes.forEach(n => state.adjacency.set(n.url, { in: [], out: [] }));
517
- state.links.forEach(l => {
518
- state.adjacency.get(l.source.url).out.push(l.target);
519
- state.adjacency.get(l.target.url).in.push(l.source);
520
- });
521
- }
522
-
523
- function updateMetricsUI() {
524
- document.getElementById('m-pages').textContent = state.metrics.totalPages;
525
- document.getElementById('m-depth').textContent = state.metrics.maxDepthFound;
526
- document.getElementById('m-eff').textContent = (state.metrics.crawlEfficiencyScore * 100).toFixed(1) + '%';
527
- document.getElementById('m-orphan').textContent = state.metrics.orphanPages.length;
528
- }
529
-
530
- // --- Simulation ---
531
- function initSimulation() {
532
- const { width, height } = getDimensions();
533
- state.width = width;
534
- state.height = height;
535
-
536
- // Safeguards
537
- const nodeCount = state.nodes.length;
538
- const enableCollision = nodeCount <= 1200;
539
- const alphaDecay = nodeCount > 1000 ? 0.05 : 0.02; // Faster decay for large graphs
540
-
541
- state.simulation = d3.forceSimulation(state.nodes)
542
- .alphaDecay(alphaDecay)
543
- .force("link", d3.forceLink(state.links).id(d => d.url).strength(0.5)) // Reduced strength for flexibility
544
- .force("charge", d3.forceManyBody().strength(nodeCount > 1000 ? -100 : -300))
545
- .force("center", d3.forceCenter(width / 2, height / 2));
546
-
547
- if (enableCollision) {
548
- state.simulation.force("collide", d3.forceCollide().radius(d => getNodeRadius(d) + 2).iterations(1));
549
- }
550
-
551
- // Apply Layout Mode
552
- applyLayoutMode(state.mode);
553
-
554
- // Rendering loop
555
- state.simulation.on("tick", ticked);
556
-
557
- // Render initial SVG elements
558
- render();
559
- }
560
-
561
- function applyLayoutMode(mode) {
562
- state.mode = mode;
563
- const { width, height } = state;
564
- const centerY = height / 2;
565
- const centerX = width / 2;
566
-
567
- // Remove conflicting forces
568
- state.simulation.force("y", null);
569
- state.simulation.force("radial", null);
570
-
571
- if (mode === 'hierarchical') {
572
- const depthSpacing = height / (state.maxDepth + 2);
573
- // Hierarchical: Nodes pushed to Y levels based on depth
574
- state.simulation.force("y", d3.forceY(d => {
575
- return (d.depth * depthSpacing) - (height / 2) + 50; // Offset to start from top
576
- }).strength(1));
577
- // We rely on "center" force to keep X centered, but maybe add weak forceX?
578
- // Let's add weak forceX to prevent wide spread
579
- state.simulation.force("x", d3.forceX(0).strength(0.05));
580
- state.simulation.force("center", d3.forceCenter(width / 2, height / 2)); // Recenter
581
-
582
- } else if (mode === 'radial') {
583
- const maxRadius = Math.min(width, height) / 2 - 50;
584
- const ringSpacing = maxRadius / (state.maxDepth + 1);
585
-
586
- state.simulation.force("radial", d3.forceRadial(
587
- d => d.depth * ringSpacing,
588
- width / 2,
589
- height / 2
590
- ).strength(0.8));
591
-
592
- state.simulation.force("x", null); // Remove X constraint
593
- }
594
-
595
- state.simulation.alpha(1).restart();
596
- }
597
-
598
- function getNodeRadius(d) {
599
- // 5 + authority * 15
600
- return 5 + (d.authority * 15);
601
- }
602
-
603
- function getNodeColor(d) {
604
- // Depth-based sequential color (Blue -> Purple -> Pink)
605
- const t = d.depth / (state.maxDepth || 1);
606
- return d3.interpolateViridis(1 - t); // Invert Viridis for better contrast on dark
607
- }
608
-
609
- function render() {
610
- // Links
611
- state.linkSelection = linkGroup.selectAll("line")
612
- .data(state.links)
613
- .join("line")
614
- .attr("class", "link")
615
- .attr("stroke-width", 0.5);
616
-
617
- // Nodes
618
- state.nodeSelection = nodeGroup.selectAll("circle")
619
- .data(state.nodes)
620
- .join("circle")
621
- .attr("class", "node")
622
- .attr("r", d => getNodeRadius(d))
623
- .attr("fill", d => getNodeColor(d))
624
- .attr("stroke", d => d.status >= 400 ? "#ff4444" : null) // Red stroke for errors
625
- .on("mouseover", (event, d) => {
626
- if (state.activeNode) return;
627
- highlightNode(d);
628
- showTooltip(event, d);
629
- })
630
- .on("mouseout", () => {
631
- if (state.activeNode) return;
632
- resetHighlight();
633
- hideTooltip();
634
- })
635
- .on("click", (event, d) => {
636
- event.stopPropagation();
637
- selectNode(d);
638
- })
639
- .call(d3.drag()
640
- .on("start", dragstarted)
641
- .on("drag", dragged)
642
- .on("end", dragended));
643
-
644
- // Zoom
645
- state.zoom = d3.zoom()
646
- .scaleExtent([0.1, 4])
647
- .on("zoom", (event) => {
648
- state.transform = event.transform;
649
- container.attr("transform", event.transform);
650
- });
651
-
652
- svg.call(state.zoom)
653
- .call(state.zoom.transform, d3.zoomIdentity.translate(state.width / 2, state.height / 2).scale(0.8).translate(-state.width / 2, -state.height / 2)); // Initial zoom out
654
- }
655
-
656
- function ticked() {
657
- if (state.linkSelection) {
658
- state.linkSelection
659
- .attr("x1", d => d.source.x)
660
- .attr("y1", d => d.source.y)
661
- .attr("x2", d => d.target.x)
662
- .attr("y2", d => d.target.y);
663
- }
664
-
665
- if (state.nodeSelection) {
666
- state.nodeSelection
667
- .attr("cx", d => d.x)
668
- .attr("cy", d => d.y);
669
- }
670
- }
671
-
672
- // --- Interactions ---
673
-
674
- function setupInteractions() {
675
- // Background click to clear selection
676
- svg.on("click", () => {
677
- state.activeNode = null;
678
- resetHighlight();
679
- detailsPanel.classed("visible", false);
680
- });
681
-
682
- // Layout Toggle
683
- d3.select("#btn-hierarchical").on("click", function () {
684
- setMode('hierarchical', this);
685
- });
686
- d3.select("#btn-radial").on("click", function () {
687
- setMode('radial', this);
688
- });
689
-
690
- // Authority Toggle
691
- d3.select("#btn-auth-pagerank").on("click", function () {
692
- setAuthorityMode('pagerank', this);
693
- });
694
- d3.select("#btn-auth-structural").on("click", function () {
695
- setAuthorityMode('structural', this);
696
- });
697
- }
698
-
699
- function setAuthorityMode(mode, btn) {
700
- d3.select("#btn-auth-pagerank").classed("active", false);
701
- d3.select("#btn-auth-structural").classed("active", false);
702
- d3.select(btn).classed("active", true);
703
-
704
- state.nodes.forEach(n => {
705
- n.authority = mode === 'pagerank' ? n.pageRankAuthority : n.structuralAuthority;
706
- });
707
-
708
- // Update Visuals
709
- nodeGroup.selectAll("circle")
710
- .transition().duration(500)
711
- .attr("r", d => getNodeRadius(d));
712
-
713
- // Update collision force if enabled
714
- if (state.simulation.force("collide")) {
715
- state.simulation.force("collide", d3.forceCollide().radius(d => getNodeRadius(d) + 2).iterations(1));
716
- state.simulation.alpha(0.3).restart();
717
- }
718
- }
719
-
720
- function setMode(mode, btn) {
721
- d3.selectAll("#controls button").classed("active", false);
722
- d3.select(btn).classed("active", true);
723
- applyLayoutMode(mode);
724
- }
725
-
726
- function highlightNode(d) {
727
- const neighbors = new Set();
728
- const adj = state.adjacency.get(d.url);
729
- if (adj) {
730
- adj.in.forEach(n => neighbors.add(n.url));
731
- adj.out.forEach(n => neighbors.add(n.url));
732
- }
733
- neighbors.add(d.url);
734
-
735
- nodeGroup.selectAll("circle").classed("faded", n => !neighbors.has(n.url));
736
- nodeGroup.selectAll("circle").classed("highlight", n => n.url === d.url);
737
-
738
- linkGroup.selectAll("line").classed("faded", l =>
739
- l.source.url !== d.url && l.target.url !== d.url
740
- );
741
- linkGroup.selectAll("line").classed("highlight", l =>
742
- l.source.url === d.url || l.target.url === d.url
743
- );
744
- }
745
-
746
- function resetHighlight() {
747
- nodeGroup.selectAll("circle").classed("faded", false).classed("highlight", false);
748
- linkGroup.selectAll("line").classed("faded", false).classed("highlight", false);
749
- }
750
-
751
- function selectNode(d) {
752
- state.activeNode = d;
753
- highlightNode(d);
754
- showDetails(d);
755
- }
756
-
757
- function showTooltip(event, d) {
758
- // If we are transforming the container, we need to map coordinates correctly or just use pageX/Y
759
- tooltip.style("display", "block")
760
- .html(`<strong>${new URL(d.url).pathname}</strong><br>Auth: ${(d.authority * 10).toFixed(1)}`)
761
- .style("left", (event.pageX) + "px")
762
- .style("top", (event.pageY - 10) + "px");
763
- }
764
-
765
- function hideTooltip() {
766
- tooltip.style("display", "none");
767
- }
768
-
769
- function showDetails(d) {
770
- detailsPanel.classed("visible", true);
771
- d3.select("#d-url").text(d.url);
772
- d3.select("#d-depth").text(d.depth);
773
-
774
- const authContainer = d3.select("#d-auth-container");
775
- authContainer.html("");
776
- const prVal = (d.pageRankAuthority * 100).toFixed(1);
777
- const structVal = d.structuralAuthority.toFixed(3);
778
- authContainer.append("div").html(`PR: <strong>${prVal}</strong>`);
779
- authContainer.append("div").style("color", "#888").style("font-size", "0.8em").text(`In-Degree: ${structVal}`);
780
-
781
- d3.select("#d-in-count").text(d.inLinks);
782
- d3.select("#d-out-count").text(d.outLinks);
783
-
784
- // Status badge
785
- const statusDiv = d3.select("#d-status");
786
- statusDiv.html("");
787
- let sClass = "status-ok";
788
- if (d.status >= 400) sClass = "status-error";
789
- else if (d.status >= 300) sClass = "status-warn";
790
- statusDiv.append("span").attr("class", "status-badge " + sClass).text(d.status);
791
-
792
- // Outlinks list (limit to 20)
793
- const list = d3.select("#d-out-list");
794
- list.html("");
795
- const adj = state.adjacency.get(d.url);
796
- if (adj && adj.out.length > 0) {
797
- adj.out.slice(0, 50).forEach(target => {
798
- list.append("li").append("a")
799
- .attr("href", target.url)
800
- .attr("target", "_blank")
801
- .text(new URL(target.url).pathname);
802
- });
803
- if (adj.out.length > 50) {
804
- list.append("li").text(`...and ${adj.out.length - 50} more`);
805
- }
806
- } else {
807
- list.append("li").text("No outgoing links");
808
- }
809
- }
810
-
811
- // --- Search ---
812
- function setupSearch() {
813
- const input = document.getElementById('search-input');
814
- input.addEventListener('keydown', (e) => {
815
- if (e.key === 'Enter') {
816
- const val = input.value.trim().toLowerCase();
817
- if (!val) return;
818
-
819
- const found = state.nodes.find(n => n.url.toLowerCase().includes(val));
820
- if (found) {
821
- selectNode(found);
822
- // Center view on node
823
- const transform = d3.zoomIdentity
824
- .translate(state.width / 2, state.height / 2)
825
- .scale(2)
826
- .translate(-found.x, -found.y);
827
-
828
- svg.transition().duration(750).call(state.zoom.transform, transform);
829
- }
830
- }
831
- });
832
- }
833
-
834
- function setupResize() {
835
- window.addEventListener("resize", () => {
836
- const { width, height } = getDimensions();
837
- state.width = width;
838
- state.height = height;
839
- state.simulation.force("center", d3.forceCenter(width / 2, height / 2));
840
- if (state.mode === 'hierarchical') {
841
- // Re-evaluate Y force if needed, but usually center is enough
842
- }
843
- state.simulation.alpha(0.3).restart();
844
- });
845
- }
846
-
847
- function getDimensions() {
848
- const rect = document.getElementById("graph-container").getBoundingClientRect();
849
- return { width: rect.width, height: rect.height };
850
- }
851
-
852
- // --- Dragging ---
853
- function dragstarted(event, d) {
854
- if (!event.active) state.simulation.alphaTarget(0.3).restart();
855
- d.fx = d.x;
856
- d.fy = d.y;
857
- }
858
-
859
- function dragged(event, d) {
860
- d.fx = event.x;
861
- d.fy = event.y;
862
- }
863
-
864
- function dragended(event, d) {
865
- if (!event.active) state.simulation.alphaTarget(0);
866
- d.fx = null;
867
- d.fy = null;
868
- }
869
-
870
- // Start
871
- if (document.readyState === 'loading') {
872
- document.addEventListener('DOMContentLoaded', init);
873
- } else {
874
- init();
875
- }
876
- </script>
877
- </body>
878
-
879
- </html>