@crawlith/core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +70 -0
  3. package/dist/analysis/analysis_list.html +35 -0
  4. package/dist/analysis/analysis_page.html +123 -0
  5. package/dist/analysis/analyze.d.ts +40 -5
  6. package/dist/analysis/analyze.js +395 -347
  7. package/dist/analysis/clustering.d.ts +23 -0
  8. package/dist/analysis/clustering.js +206 -0
  9. package/dist/analysis/content.d.ts +1 -1
  10. package/dist/analysis/content.js +11 -5
  11. package/dist/analysis/duplicate.d.ts +34 -0
  12. package/dist/analysis/duplicate.js +305 -0
  13. package/dist/analysis/heading.d.ts +116 -0
  14. package/dist/analysis/heading.js +356 -0
  15. package/dist/analysis/images.d.ts +1 -1
  16. package/dist/analysis/images.js +6 -5
  17. package/dist/analysis/links.d.ts +1 -1
  18. package/dist/analysis/links.js +8 -8
  19. package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
  20. package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
  21. package/dist/analysis/scoring.js +11 -2
  22. package/dist/analysis/seo.d.ts +8 -4
  23. package/dist/analysis/seo.js +41 -30
  24. package/dist/analysis/soft404.d.ts +17 -0
  25. package/dist/analysis/soft404.js +62 -0
  26. package/dist/analysis/structuredData.d.ts +1 -1
  27. package/dist/analysis/structuredData.js +5 -4
  28. package/dist/analysis/templates.d.ts +2 -0
  29. package/dist/analysis/templates.js +7 -0
  30. package/dist/application/index.d.ts +2 -0
  31. package/dist/application/index.js +2 -0
  32. package/dist/application/usecase.d.ts +3 -0
  33. package/dist/application/usecase.js +1 -0
  34. package/dist/application/usecases.d.ts +114 -0
  35. package/dist/application/usecases.js +201 -0
  36. package/dist/audit/index.js +1 -1
  37. package/dist/audit/transport.d.ts +1 -1
  38. package/dist/audit/transport.js +5 -4
  39. package/dist/audit/types.d.ts +1 -0
  40. package/dist/constants.d.ts +17 -0
  41. package/dist/constants.js +23 -0
  42. package/dist/core/scope/scopeManager.js +3 -0
  43. package/dist/core/security/ipGuard.d.ts +11 -0
  44. package/dist/core/security/ipGuard.js +71 -3
  45. package/dist/crawler/crawl.d.ts +4 -22
  46. package/dist/crawler/crawl.js +4 -335
  47. package/dist/crawler/crawler.d.ts +87 -0
  48. package/dist/crawler/crawler.js +683 -0
  49. package/dist/crawler/extract.d.ts +4 -1
  50. package/dist/crawler/extract.js +7 -2
  51. package/dist/crawler/fetcher.d.ts +2 -1
  52. package/dist/crawler/fetcher.js +26 -11
  53. package/dist/crawler/metricsRunner.d.ts +23 -1
  54. package/dist/crawler/metricsRunner.js +202 -72
  55. package/dist/crawler/normalize.d.ts +41 -0
  56. package/dist/crawler/normalize.js +119 -3
  57. package/dist/crawler/parser.d.ts +1 -3
  58. package/dist/crawler/parser.js +2 -49
  59. package/dist/crawler/resolver.d.ts +11 -0
  60. package/dist/crawler/resolver.js +67 -0
  61. package/dist/crawler/sitemap.d.ts +6 -0
  62. package/dist/crawler/sitemap.js +27 -17
  63. package/dist/crawler/trap.d.ts +5 -1
  64. package/dist/crawler/trap.js +23 -2
  65. package/dist/db/CrawlithDB.d.ts +110 -0
  66. package/dist/db/CrawlithDB.js +500 -0
  67. package/dist/db/graphLoader.js +42 -30
  68. package/dist/db/index.d.ts +11 -0
  69. package/dist/db/index.js +41 -29
  70. package/dist/db/migrations.d.ts +2 -0
  71. package/dist/db/{schema.js → migrations.js} +90 -43
  72. package/dist/db/pluginRegistry.d.ts +9 -0
  73. package/dist/db/pluginRegistry.js +19 -0
  74. package/dist/db/repositories/EdgeRepository.d.ts +13 -0
  75. package/dist/db/repositories/EdgeRepository.js +20 -0
  76. package/dist/db/repositories/MetricsRepository.d.ts +16 -8
  77. package/dist/db/repositories/MetricsRepository.js +28 -7
  78. package/dist/db/repositories/PageRepository.d.ts +15 -2
  79. package/dist/db/repositories/PageRepository.js +169 -25
  80. package/dist/db/repositories/SiteRepository.d.ts +9 -0
  81. package/dist/db/repositories/SiteRepository.js +13 -0
  82. package/dist/db/repositories/SnapshotRepository.d.ts +14 -5
  83. package/dist/db/repositories/SnapshotRepository.js +64 -5
  84. package/dist/db/reset.d.ts +9 -0
  85. package/dist/db/reset.js +32 -0
  86. package/dist/db/statements.d.ts +12 -0
  87. package/dist/db/statements.js +40 -0
  88. package/dist/diff/compare.d.ts +0 -5
  89. package/dist/diff/compare.js +0 -12
  90. package/dist/diff/service.d.ts +16 -0
  91. package/dist/diff/service.js +41 -0
  92. package/dist/domain/index.d.ts +4 -0
  93. package/dist/domain/index.js +4 -0
  94. package/dist/events.d.ts +56 -0
  95. package/dist/events.js +1 -0
  96. package/dist/graph/graph.d.ts +36 -42
  97. package/dist/graph/graph.js +26 -17
  98. package/dist/graph/hits.d.ts +23 -0
  99. package/dist/graph/hits.js +111 -0
  100. package/dist/graph/metrics.d.ts +0 -4
  101. package/dist/graph/metrics.js +25 -9
  102. package/dist/graph/pagerank.d.ts +17 -4
  103. package/dist/graph/pagerank.js +126 -91
  104. package/dist/graph/simhash.d.ts +6 -0
  105. package/dist/graph/simhash.js +14 -0
  106. package/dist/index.d.ts +29 -8
  107. package/dist/index.js +29 -8
  108. package/dist/lock/hashKey.js +1 -1
  109. package/dist/lock/lockManager.d.ts +5 -1
  110. package/dist/lock/lockManager.js +38 -13
  111. package/dist/plugin-system/plugin-cli.d.ts +10 -0
  112. package/dist/plugin-system/plugin-cli.js +31 -0
  113. package/dist/plugin-system/plugin-config.d.ts +16 -0
  114. package/dist/plugin-system/plugin-config.js +36 -0
  115. package/dist/plugin-system/plugin-loader.d.ts +17 -0
  116. package/dist/plugin-system/plugin-loader.js +122 -0
  117. package/dist/plugin-system/plugin-registry.d.ts +25 -0
  118. package/dist/plugin-system/plugin-registry.js +167 -0
  119. package/dist/plugin-system/plugin-types.d.ts +205 -0
  120. package/dist/plugin-system/plugin-types.js +1 -0
  121. package/dist/ports/index.d.ts +9 -0
  122. package/dist/ports/index.js +1 -0
  123. package/{src/report/sitegraph_template.ts → dist/report/crawl.html} +330 -81
  124. package/dist/report/crawlExport.d.ts +3 -0
  125. package/dist/report/{sitegraphExport.js → crawlExport.js} +3 -3
  126. package/dist/report/crawl_template.d.ts +1 -0
  127. package/dist/report/crawl_template.js +7 -0
  128. package/dist/report/export.d.ts +3 -0
  129. package/dist/report/export.js +81 -0
  130. package/dist/report/html.js +15 -216
  131. package/dist/report/insight.d.ts +27 -0
  132. package/dist/report/insight.js +103 -0
  133. package/dist/scoring/health.d.ts +56 -0
  134. package/dist/scoring/health.js +213 -0
  135. package/dist/utils/chalk.d.ts +6 -0
  136. package/dist/utils/chalk.js +41 -0
  137. package/dist/utils/secureConfig.d.ts +23 -0
  138. package/dist/utils/secureConfig.js +128 -0
  139. package/package.json +12 -6
  140. package/CHANGELOG.md +0 -7
  141. package/dist/db/schema.d.ts +0 -2
  142. package/dist/graph/cluster.d.ts +0 -6
  143. package/dist/graph/cluster.js +0 -173
  144. package/dist/graph/duplicate.d.ts +0 -10
  145. package/dist/graph/duplicate.js +0 -251
  146. package/dist/report/sitegraphExport.d.ts +0 -3
  147. package/dist/report/sitegraph_template.d.ts +0 -1
  148. package/dist/report/sitegraph_template.js +0 -630
  149. package/dist/scoring/hits.d.ts +0 -9
  150. package/dist/scoring/hits.js +0 -111
  151. package/src/analysis/analyze.ts +0 -548
  152. package/src/analysis/content.ts +0 -62
  153. package/src/analysis/images.ts +0 -28
  154. package/src/analysis/links.ts +0 -41
  155. package/src/analysis/scoring.ts +0 -59
  156. package/src/analysis/seo.ts +0 -82
  157. package/src/analysis/structuredData.ts +0 -62
  158. package/src/audit/dns.ts +0 -49
  159. package/src/audit/headers.ts +0 -98
  160. package/src/audit/index.ts +0 -66
  161. package/src/audit/scoring.ts +0 -232
  162. package/src/audit/transport.ts +0 -258
  163. package/src/audit/types.ts +0 -102
  164. package/src/core/network/proxyAdapter.ts +0 -21
  165. package/src/core/network/rateLimiter.ts +0 -39
  166. package/src/core/network/redirectController.ts +0 -47
  167. package/src/core/network/responseLimiter.ts +0 -34
  168. package/src/core/network/retryPolicy.ts +0 -57
  169. package/src/core/scope/domainFilter.ts +0 -45
  170. package/src/core/scope/scopeManager.ts +0 -52
  171. package/src/core/scope/subdomainPolicy.ts +0 -39
  172. package/src/core/security/ipGuard.ts +0 -92
  173. package/src/crawler/crawl.ts +0 -382
  174. package/src/crawler/extract.ts +0 -34
  175. package/src/crawler/fetcher.ts +0 -233
  176. package/src/crawler/metricsRunner.ts +0 -124
  177. package/src/crawler/normalize.ts +0 -108
  178. package/src/crawler/parser.ts +0 -190
  179. package/src/crawler/sitemap.ts +0 -73
  180. package/src/crawler/trap.ts +0 -96
  181. package/src/db/graphLoader.ts +0 -105
  182. package/src/db/index.ts +0 -70
  183. package/src/db/repositories/EdgeRepository.ts +0 -29
  184. package/src/db/repositories/MetricsRepository.ts +0 -49
  185. package/src/db/repositories/PageRepository.ts +0 -128
  186. package/src/db/repositories/SiteRepository.ts +0 -32
  187. package/src/db/repositories/SnapshotRepository.ts +0 -74
  188. package/src/db/schema.ts +0 -177
  189. package/src/diff/compare.ts +0 -84
  190. package/src/graph/cluster.ts +0 -192
  191. package/src/graph/duplicate.ts +0 -286
  192. package/src/graph/graph.ts +0 -172
  193. package/src/graph/metrics.ts +0 -110
  194. package/src/graph/pagerank.ts +0 -125
  195. package/src/graph/simhash.ts +0 -61
  196. package/src/index.ts +0 -30
  197. package/src/lock/hashKey.ts +0 -51
  198. package/src/lock/lockManager.ts +0 -124
  199. package/src/lock/pidCheck.ts +0 -13
  200. package/src/report/html.ts +0 -227
  201. package/src/report/sitegraphExport.ts +0 -58
  202. package/src/scoring/hits.ts +0 -131
  203. package/src/scoring/orphanSeverity.ts +0 -176
  204. package/src/utils/version.ts +0 -18
  205. package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
  206. package/tests/analysis.unit.test.ts +0 -98
  207. package/tests/analyze.integration.test.ts +0 -98
  208. package/tests/audit/dns.test.ts +0 -31
  209. package/tests/audit/headers.test.ts +0 -45
  210. package/tests/audit/scoring.test.ts +0 -133
  211. package/tests/audit/security.test.ts +0 -12
  212. package/tests/audit/transport.test.ts +0 -112
  213. package/tests/clustering.test.ts +0 -118
  214. package/tests/crawler.test.ts +0 -358
  215. package/tests/db.test.ts +0 -159
  216. package/tests/diff.test.ts +0 -67
  217. package/tests/duplicate.test.ts +0 -110
  218. package/tests/fetcher.test.ts +0 -106
  219. package/tests/fetcher_safety.test.ts +0 -85
  220. package/tests/fixtures/analyze-crawl.json +0 -26
  221. package/tests/hits.test.ts +0 -134
  222. package/tests/html_report.test.ts +0 -58
  223. package/tests/lock/lockManager.test.ts +0 -138
  224. package/tests/metrics.test.ts +0 -196
  225. package/tests/normalize.test.ts +0 -101
  226. package/tests/orphanSeverity.test.ts +0 -160
  227. package/tests/pagerank.test.ts +0 -98
  228. package/tests/parser.test.ts +0 -117
  229. package/tests/proxy_safety.test.ts +0 -57
  230. package/tests/redirect_safety.test.ts +0 -73
  231. package/tests/safety.test.ts +0 -114
  232. package/tests/scope.test.ts +0 -66
  233. package/tests/scoring.test.ts +0 -59
  234. package/tests/sitemap.test.ts +0 -88
  235. package/tests/soft404.test.ts +0 -41
  236. package/tests/trap.test.ts +0 -39
  237. package/tests/visualization_data.test.ts +0 -46
  238. package/tsconfig.json +0 -11
@@ -1,630 +0,0 @@
1
- export const SITEGRAPH_HTML = `<!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Crawlith Site Graph</title>
7
- <style>
8
- :root {
9
- --bg-color: #121212;
10
- --text-color: #e0e0e0;
11
- --panel-bg: #1e1e1e;
12
- --border-color: #333;
13
- --accent-color: #4a90e2;
14
- --sidebar-width: 300px;
15
- }
16
- body { margin: 0; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; background: var(--bg-color); color: var(--text-color); height: 100vh; display: flex; flex-direction: column; overflow: hidden; }
17
-
18
- /* Layout */
19
- header { padding: 0 20px; background: var(--panel-bg); border-bottom: 1px solid var(--border-color); display: flex; justify-content: space-between; align-items: center; height: 60px; box-sizing: border-box; z-index: 10; }
20
- main { flex: 1; display: flex; overflow: hidden; position: relative; }
21
- #graph-container { flex: 1; position: relative; overflow: hidden; background: var(--bg-color); }
22
- #details-panel { width: var(--sidebar-width); background: var(--panel-bg); border-left: 1px solid var(--border-color); padding: 20px; overflow-y: auto; box-sizing: border-box; display: none; flex-direction: column; gap: 15px; }
23
- #details-panel.visible { display: flex; }
24
- footer { padding: 5px 20px; background: var(--panel-bg); border-top: 1px solid var(--border-color); font-size: 0.8rem; text-align: center; color: #666; height: 30px; display: flex; align-items: center; justify-content: center; }
25
-
26
- /* Header Components */
27
- .brand { font-weight: bold; font-size: 1.2rem; display: flex; align-items: center; gap: 10px; }
28
- .brand span { color: var(--accent-color); }
29
- #metrics-summary { font-size: 0.9rem; color: #aaa; display: flex; gap: 20px; }
30
- .metric { display: flex; flex-direction: column; align-items: center; line-height: 1.1; }
31
- .metric-value { font-weight: bold; color: var(--text-color); }
32
- .metric-label { font-size: 0.7rem; }
33
-
34
- #controls { display: flex; gap: 10px; align-items: center; }
35
- .btn-group { display: flex; background: #333; border-radius: 4px; overflow: hidden; }
36
- button { background: transparent; color: #aaa; border: none; padding: 6px 12px; cursor: pointer; font-size: 0.85rem; transition: all 0.2s; }
37
- button:hover { color: white; background: rgba(255,255,255,0.1); }
38
- button.active { background: var(--accent-color); color: white; }
39
-
40
- /* Search */
41
- #search-container { position: absolute; top: 15px; left: 15px; z-index: 5; }
42
- #search-input { background: rgba(30,30,30,0.9); border: 1px solid #444; color: white; padding: 8px 12px; border-radius: 20px; width: 200px; outline: none; transition: width 0.3s; }
43
- #search-input:focus { width: 280px; border-color: var(--accent-color); }
44
-
45
- /* Graph */
46
- svg { width: 100%; height: 100%; display: block; }
47
- .node { cursor: pointer; transition: stroke-width 0.1s; }
48
- .link { stroke: #555; stroke-opacity: 0.3; fill: none; pointer-events: none; }
49
-
50
- /* Interaction States */
51
- .node.highlight { stroke: #fff; stroke-width: 2px; }
52
- .link.highlight { stroke-opacity: 0.8; stroke: #999; }
53
- .node.faded { opacity: 0.1; }
54
- .link.faded { opacity: 0.05; }
55
-
56
- /* Details Panel Content */
57
- .detail-section { border-bottom: 1px solid #333; padding-bottom: 10px; }
58
- .detail-section:last-child { border-bottom: none; }
59
- .detail-label { font-size: 0.75rem; color: #888; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 4px; }
60
- .detail-value { font-size: 0.95rem; word-break: break-all; }
61
- .detail-list { list-style: none; padding: 0; margin: 0; max-height: 150px; overflow-y: auto; font-size: 0.85rem; }
62
- .detail-list li { padding: 4px 0; border-bottom: 1px solid #2a2a2a; }
63
- .detail-list a { color: var(--accent-color); text-decoration: none; }
64
- .detail-list a:hover { text-decoration: underline; }
65
-
66
- .status-badge { display: inline-block; padding: 2px 6px; border-radius: 3px; font-size: 0.75rem; font-weight: bold; margin-top: 5px; }
67
- .status-ok { background: #2e7d32; color: white; }
68
- .status-warn { background: #f9a825; color: black; }
69
- .status-error { background: #c62828; color: white; }
70
-
71
- /* Tooltip */
72
- #tooltip { position: absolute; background: rgba(20,20,20,0.95); color: white; padding: 10px; border-radius: 6px; pointer-events: none; font-size: 12px; z-index: 100; box-shadow: 0 4px 15px rgba(0,0,0,0.5); border: 1px solid #444; display: none; transform: translate(-50%, -100%); margin-top: -10px; white-space: nowrap; }
73
-
74
- /* Responsive Sidebar */
75
- @media (max-width: 768px) {
76
- #details-panel { position: absolute; right: 0; top: 0; bottom: 0; z-index: 20; box-shadow: -5px 0 15px rgba(0,0,0,0.5); transform: translateX(100%); transition: transform 0.3s ease; }
77
- #details-panel.visible { transform: translateX(0); }
78
- #metrics-summary { display: none; }
79
- }
80
- </style>
81
- </head>
82
- <body>
83
- <header>
84
- <div class="brand"><span>Crawlith</span> SiteGraph</div>
85
-
86
- <div id="metrics-summary">
87
- <div class="metric"><span class="metric-value" id="m-pages">-</span><span class="metric-label">Pages</span></div>
88
- <div class="metric"><span class="metric-value" id="m-depth">-</span><span class="metric-label">Max Depth</span></div>
89
- <div class="metric"><span class="metric-value" id="m-eff">-</span><span class="metric-label">Efficiency</span></div>
90
- <div class="metric"><span class="metric-value" id="m-orphan">-</span><span class="metric-label">Orphans</span></div>
91
- </div>
92
-
93
- <div id="controls">
94
- <div class="btn-group" style="margin-right: 15px;">
95
- <button id="btn-auth-pagerank" class="active" title="PageRank Authority">PageRank</button>
96
- <button id="btn-auth-structural" title="Structural Authority (In-Degree)">In-Degree</button>
97
- </div>
98
- <div class="btn-group">
99
- <button id="btn-hierarchical" class="active">Hierarchical</button>
100
- <button id="btn-radial">Radial</button>
101
- </div>
102
- </div>
103
- </header>
104
-
105
- <main>
106
- <div id="graph-container">
107
- <div id="search-container">
108
- <input type="text" id="search-input" placeholder="Search URL...">
109
- </div>
110
- <svg id="graph"></svg>
111
- <div id="tooltip"></div>
112
- </div>
113
-
114
- <aside id="details-panel">
115
- <div class="detail-section">
116
- <div class="detail-label">URL</div>
117
- <div class="detail-value" id="d-url">-</div>
118
- <div id="d-status"></div>
119
- </div>
120
- <div class="detail-section" style="display: flex; gap: 20px;">
121
- <div>
122
- <div class="detail-label">Depth</div>
123
- <div class="detail-value" id="d-depth">-</div>
124
- </div>
125
- <div>
126
- <div class="detail-label">Authority</div>
127
- <div class="detail-value" id="d-auth-container">-</div>
128
- </div>
129
- </div>
130
- <div class="detail-section">
131
- <div class="detail-label">In-links (<span id="d-in-count">0</span>)</div>
132
- <!-- List could be populated here if we had the reverse index, for now just count -->
133
- </div>
134
- <div class="detail-section">
135
- <div class="detail-label">Out-links (<span id="d-out-count">0</span>)</div>
136
- <ul class="detail-list" id="d-out-list"></ul>
137
- </div>
138
- </aside>
139
- </main>
140
-
141
- <footer>
142
- Generated by Crawlith Crawler
143
- </footer>
144
-
145
- <!-- D3 from CDN -->
146
- <script src="https://d3js.org/d3.v7.min.js"></script>
147
-
148
- <script>
149
- // --- State ---
150
- const state = {
151
- nodes: [],
152
- links: [],
153
- metrics: {},
154
- adjacency: new Map(), // url -> { in: [], out: [] }
155
- simulation: null,
156
- width: 0,
157
- height: 0,
158
- transform: d3.zoomIdentity,
159
- activeNode: null,
160
- mode: 'hierarchical', // 'hierarchical' | 'radial'
161
- maxDepth: 0,
162
- maxInLinks: 0,
163
- nodeSelection: null,
164
- linkSelection: null,
165
- zoom: null
166
- };
167
-
168
- // --- DOM Elements ---
169
- const svg = d3.select("#graph");
170
- const container = svg.append("g");
171
- const linkGroup = container.append("g").attr("class", "links");
172
- const nodeGroup = container.append("g").attr("class", "nodes");
173
- const tooltip = d3.select("#tooltip");
174
- const detailsPanel = d3.select("#details-panel");
175
-
176
- // --- Initialization ---
177
- // --- Initialization ---
178
- async function init() {
179
- try {
180
- let graphData, metricsData;
181
-
182
- // 1. Try to use injected data (for file:// usage)
183
- // @ts-ignore
184
- if (window.GRAPH_DATA) graphData = window.GRAPH_DATA;
185
- // @ts-ignore
186
- if (window.METRICS_DATA) metricsData = window.METRICS_DATA;
187
-
188
- // 2. Fallback to fetching JSON files (for web server usage)
189
- if (!graphData || !metricsData) {
190
- try {
191
- const [graphRes, metricsRes] = await Promise.all([
192
- fetch('graph.json'),
193
- fetch('metrics.json')
194
- ]);
195
- if (graphRes.ok && metricsRes.ok) {
196
- graphData = await graphRes.json();
197
- metricsData = await metricsRes.json();
198
- }
199
- } catch (e) {
200
- console.warn("Fetch failed, possibly due to CORS or missing files.", e);
201
- }
202
- }
203
-
204
- if (!graphData || !metricsData) {
205
- throw new Error("No data available. Ensure graph.json exists or data is injected.");
206
- }
207
-
208
- state.metrics = metricsData;
209
- processData(graphData);
210
- updateMetricsUI();
211
-
212
- // Setup UI
213
- setupResize();
214
- setupInteractions();
215
- setupSearch();
216
-
217
- // Start Simulation
218
- initSimulation();
219
-
220
- } catch (err) {
221
- console.error(err);
222
- alert("Error loading visualization data: " + err.message);
223
- }
224
- }
225
-
226
- function processData(data) {
227
- // Create a map for fast lookup
228
- const nodeMap = new Map();
229
-
230
- data.nodes.forEach(n => {
231
- n.inLinks = n.inLinks || 0;
232
- n.outLinks = n.outLinks || 0;
233
- nodeMap.set(n.url, n);
234
- });
235
-
236
- // Filter valid links
237
- state.links = data.edges
238
- .map(e => ({ source: nodeMap.get(e.source), target: nodeMap.get(e.target) }))
239
- .filter(e => e.source && e.target);
240
-
241
- state.nodes = data.nodes;
242
-
243
- // Calculate Stats
244
- state.maxDepth = d3.max(state.nodes, d => d.depth) || 1;
245
- state.maxInLinks = d3.max(state.nodes, d => d.inLinks) || 1;
246
-
247
- // Calculate Authority & Enrich Nodes
248
- state.nodes.forEach(n => {
249
- // Structural Authority: log-scaled normalized 0-1 based on in-links
250
- n.structuralAuthority = Math.log(1 + n.inLinks) / Math.log(1 + state.maxInLinks);
251
-
252
- // PageRank Authority: normalized 0-1 from pageRankScore (0-100)
253
- if (typeof n.pageRankScore === 'number') {
254
- n.pageRankAuthority = n.pageRankScore / 100;
255
- } else {
256
- n.pageRankAuthority = n.structuralAuthority;
257
- }
258
-
259
- // Default authority to PageRank if available, else structural
260
- n.authority = n.pageRankAuthority;
261
-
262
- // Ensure x,y are initialized to avoid NaNs if D3 doesn't do it fast enough
263
- n.x = 0; n.y = 0;
264
- });
265
-
266
- // Build Adjacency Map
267
- state.nodes.forEach(n => state.adjacency.set(n.url, { in: [], out: [] }));
268
- state.links.forEach(l => {
269
- state.adjacency.get(l.source.url).out.push(l.target);
270
- state.adjacency.get(l.target.url).in.push(l.source);
271
- });
272
- }
273
-
274
- function updateMetricsUI() {
275
- document.getElementById('m-pages').textContent = state.metrics.totalPages;
276
- document.getElementById('m-depth').textContent = state.metrics.maxDepthFound;
277
- document.getElementById('m-eff').textContent = (state.metrics.crawlEfficiencyScore * 100).toFixed(1) + '%';
278
- document.getElementById('m-orphan').textContent = state.metrics.orphanPages.length;
279
- }
280
-
281
- // --- Simulation ---
282
- function initSimulation() {
283
- const { width, height } = getDimensions();
284
- state.width = width;
285
- state.height = height;
286
-
287
- // Safeguards
288
- const nodeCount = state.nodes.length;
289
- const enableCollision = nodeCount <= 1200;
290
- const alphaDecay = nodeCount > 1000 ? 0.05 : 0.02; // Faster decay for large graphs
291
-
292
- state.simulation = d3.forceSimulation(state.nodes)
293
- .alphaDecay(alphaDecay)
294
- .force("link", d3.forceLink(state.links).id(d => d.url).strength(0.5)) // Reduced strength for flexibility
295
- .force("charge", d3.forceManyBody().strength(nodeCount > 1000 ? -100 : -300))
296
- .force("center", d3.forceCenter(width / 2, height / 2));
297
-
298
- if (enableCollision) {
299
- state.simulation.force("collide", d3.forceCollide().radius(d => getNodeRadius(d) + 2).iterations(1));
300
- }
301
-
302
- // Apply Layout Mode
303
- applyLayoutMode(state.mode);
304
-
305
- // Rendering loop
306
- state.simulation.on("tick", ticked);
307
-
308
- // Render initial SVG elements
309
- render();
310
- }
311
-
312
- function applyLayoutMode(mode) {
313
- state.mode = mode;
314
- const { width, height } = state;
315
- const centerY = height / 2;
316
- const centerX = width / 2;
317
-
318
- // Remove conflicting forces
319
- state.simulation.force("y", null);
320
- state.simulation.force("radial", null);
321
-
322
- if (mode === 'hierarchical') {
323
- const depthSpacing = height / (state.maxDepth + 2);
324
- // Hierarchical: Nodes pushed to Y levels based on depth
325
- state.simulation.force("y", d3.forceY(d => {
326
- return (d.depth * depthSpacing) - (height/2) + 50; // Offset to start from top
327
- }).strength(1));
328
- // We rely on "center" force to keep X centered, but maybe add weak forceX?
329
- // Let's add weak forceX to prevent wide spread
330
- state.simulation.force("x", d3.forceX(0).strength(0.05));
331
- state.simulation.force("center", d3.forceCenter(width/2, height/2)); // Recenter
332
-
333
- } else if (mode === 'radial') {
334
- const maxRadius = Math.min(width, height) / 2 - 50;
335
- const ringSpacing = maxRadius / (state.maxDepth + 1);
336
-
337
- state.simulation.force("radial", d3.forceRadial(
338
- d => d.depth * ringSpacing,
339
- width / 2,
340
- height / 2
341
- ).strength(0.8));
342
-
343
- state.simulation.force("x", null); // Remove X constraint
344
- }
345
-
346
- state.simulation.alpha(1).restart();
347
- }
348
-
349
- function getNodeRadius(d) {
350
- // 5 + authority * 15
351
- return 5 + (d.authority * 15);
352
- }
353
-
354
- function getNodeColor(d) {
355
- // Depth-based sequential color (Blue -> Purple -> Pink)
356
- const t = d.depth / (state.maxDepth || 1);
357
- return d3.interpolateViridis(1 - t); // Invert Viridis for better contrast on dark
358
- }
359
-
360
- function render() {
361
- // Links
362
- state.linkSelection = linkGroup.selectAll("line")
363
- .data(state.links)
364
- .join("line")
365
- .attr("class", "link")
366
- .attr("stroke-width", 0.5);
367
-
368
- // Nodes
369
- state.nodeSelection = nodeGroup.selectAll("circle")
370
- .data(state.nodes)
371
- .join("circle")
372
- .attr("class", "node")
373
- .attr("r", d => getNodeRadius(d))
374
- .attr("fill", d => getNodeColor(d))
375
- .attr("stroke", d => d.status >= 400 ? "#ff4444" : null) // Red stroke for errors
376
- .on("mouseover", (event, d) => {
377
- if (state.activeNode) return;
378
- highlightNode(d);
379
- showTooltip(event, d);
380
- })
381
- .on("mouseout", () => {
382
- if (state.activeNode) return;
383
- resetHighlight();
384
- hideTooltip();
385
- })
386
- .on("click", (event, d) => {
387
- event.stopPropagation();
388
- selectNode(d);
389
- })
390
- .call(d3.drag()
391
- .on("start", dragstarted)
392
- .on("drag", dragged)
393
- .on("end", dragended));
394
-
395
- // Zoom
396
- state.zoom = d3.zoom()
397
- .scaleExtent([0.1, 4])
398
- .on("zoom", (event) => {
399
- state.transform = event.transform;
400
- container.attr("transform", event.transform);
401
- });
402
-
403
- svg.call(state.zoom)
404
- .call(state.zoom.transform, d3.zoomIdentity.translate(state.width/2, state.height/2).scale(0.8).translate(-state.width/2, -state.height/2)); // Initial zoom out
405
- }
406
-
407
- function ticked() {
408
- if (state.linkSelection) {
409
- state.linkSelection
410
- .attr("x1", d => d.source.x)
411
- .attr("y1", d => d.source.y)
412
- .attr("x2", d => d.target.x)
413
- .attr("y2", d => d.target.y);
414
- }
415
-
416
- if (state.nodeSelection) {
417
- state.nodeSelection
418
- .attr("cx", d => d.x)
419
- .attr("cy", d => d.y);
420
- }
421
- }
422
-
423
- // --- Interactions ---
424
-
425
- function setupInteractions() {
426
- // Background click to clear selection
427
- svg.on("click", () => {
428
- state.activeNode = null;
429
- resetHighlight();
430
- detailsPanel.classed("visible", false);
431
- });
432
-
433
- // Layout Toggle
434
- d3.select("#btn-hierarchical").on("click", function() {
435
- setMode('hierarchical', this);
436
- });
437
- d3.select("#btn-radial").on("click", function() {
438
- setMode('radial', this);
439
- });
440
-
441
- // Authority Toggle
442
- d3.select("#btn-auth-pagerank").on("click", function() {
443
- setAuthorityMode('pagerank', this);
444
- });
445
- d3.select("#btn-auth-structural").on("click", function() {
446
- setAuthorityMode('structural', this);
447
- });
448
- }
449
-
450
- function setAuthorityMode(mode, btn) {
451
- d3.select("#btn-auth-pagerank").classed("active", false);
452
- d3.select("#btn-auth-structural").classed("active", false);
453
- d3.select(btn).classed("active", true);
454
-
455
- state.nodes.forEach(n => {
456
- n.authority = mode === 'pagerank' ? n.pageRankAuthority : n.structuralAuthority;
457
- });
458
-
459
- // Update Visuals
460
- nodeGroup.selectAll("circle")
461
- .transition().duration(500)
462
- .attr("r", d => getNodeRadius(d));
463
-
464
- // Update collision force if enabled
465
- if (state.simulation.force("collide")) {
466
- state.simulation.force("collide", d3.forceCollide().radius(d => getNodeRadius(d) + 2).iterations(1));
467
- state.simulation.alpha(0.3).restart();
468
- }
469
- }
470
-
471
- function setMode(mode, btn) {
472
- d3.selectAll("#controls button").classed("active", false);
473
- d3.select(btn).classed("active", true);
474
- applyLayoutMode(mode);
475
- }
476
-
477
- function highlightNode(d) {
478
- const neighbors = new Set();
479
- const adj = state.adjacency.get(d.url);
480
- if (adj) {
481
- adj.in.forEach(n => neighbors.add(n.url));
482
- adj.out.forEach(n => neighbors.add(n.url));
483
- }
484
- neighbors.add(d.url);
485
-
486
- nodeGroup.selectAll("circle").classed("faded", n => !neighbors.has(n.url));
487
- nodeGroup.selectAll("circle").classed("highlight", n => n.url === d.url);
488
-
489
- linkGroup.selectAll("line").classed("faded", l =>
490
- l.source.url !== d.url && l.target.url !== d.url
491
- );
492
- linkGroup.selectAll("line").classed("highlight", l =>
493
- l.source.url === d.url || l.target.url === d.url
494
- );
495
- }
496
-
497
- function resetHighlight() {
498
- nodeGroup.selectAll("circle").classed("faded", false).classed("highlight", false);
499
- linkGroup.selectAll("line").classed("faded", false).classed("highlight", false);
500
- }
501
-
502
- function selectNode(d) {
503
- state.activeNode = d;
504
- highlightNode(d);
505
- showDetails(d);
506
- }
507
-
508
- function showTooltip(event, d) {
509
- // If we are transforming the container, we need to map coordinates correctly or just use pageX/Y
510
- tooltip.style("display", "block")
511
- .html(\`<strong>\${new URL(d.url).pathname}</strong><br>Auth: \${(d.authority * 10).toFixed(1)}\`)
512
- .style("left", (event.pageX) + "px")
513
- .style("top", (event.pageY - 10) + "px");
514
- }
515
-
516
- function hideTooltip() {
517
- tooltip.style("display", "none");
518
- }
519
-
520
- function showDetails(d) {
521
- detailsPanel.classed("visible", true);
522
- d3.select("#d-url").text(d.url);
523
- d3.select("#d-depth").text(d.depth);
524
-
525
- const authContainer = d3.select("#d-auth-container");
526
- authContainer.html("");
527
- const prVal = (d.pageRankAuthority * 100).toFixed(1);
528
- const structVal = d.structuralAuthority.toFixed(3);
529
- authContainer.append("div").html(\`PR: <strong>\${prVal}</strong>\`);
530
- authContainer.append("div").style("color", "#888").style("font-size", "0.8em").text(\`In-Degree: \${structVal}\`);
531
-
532
- d3.select("#d-in-count").text(d.inLinks);
533
- d3.select("#d-out-count").text(d.outLinks);
534
-
535
- // Status badge
536
- const statusDiv = d3.select("#d-status");
537
- statusDiv.html("");
538
- let sClass = "status-ok";
539
- if (d.status >= 400) sClass = "status-error";
540
- else if (d.status >= 300) sClass = "status-warn";
541
- statusDiv.append("span").attr("class", "status-badge " + sClass).text(d.status);
542
-
543
- // Outlinks list (limit to 20)
544
- const list = d3.select("#d-out-list");
545
- list.html("");
546
- const adj = state.adjacency.get(d.url);
547
- if (adj && adj.out.length > 0) {
548
- adj.out.slice(0, 50).forEach(target => {
549
- list.append("li").append("a")
550
- .attr("href", target.url)
551
- .attr("target", "_blank")
552
- .text(new URL(target.url).pathname);
553
- });
554
- if (adj.out.length > 50) {
555
- list.append("li").text(\`...and \${adj.out.length - 50} more\`);
556
- }
557
- } else {
558
- list.append("li").text("No outgoing links");
559
- }
560
- }
561
-
562
- // --- Search ---
563
- function setupSearch() {
564
- const input = document.getElementById('search-input');
565
- input.addEventListener('keydown', (e) => {
566
- if (e.key === 'Enter') {
567
- const val = input.value.trim().toLowerCase();
568
- if (!val) return;
569
-
570
- const found = state.nodes.find(n => n.url.toLowerCase().includes(val));
571
- if (found) {
572
- selectNode(found);
573
- // Center view on node
574
- const transform = d3.zoomIdentity
575
- .translate(state.width/2, state.height/2)
576
- .scale(2)
577
- .translate(-found.x, -found.y);
578
-
579
- svg.transition().duration(750).call(state.zoom.transform, transform);
580
- }
581
- }
582
- });
583
- }
584
-
585
- function setupResize() {
586
- window.addEventListener("resize", () => {
587
- const { width, height } = getDimensions();
588
- state.width = width;
589
- state.height = height;
590
- state.simulation.force("center", d3.forceCenter(width / 2, height / 2));
591
- if (state.mode === 'hierarchical') {
592
- // Re-evaluate Y force if needed, but usually center is enough
593
- }
594
- state.simulation.alpha(0.3).restart();
595
- });
596
- }
597
-
598
- function getDimensions() {
599
- const rect = document.getElementById("graph-container").getBoundingClientRect();
600
- return { width: rect.width, height: rect.height };
601
- }
602
-
603
- // --- Dragging ---
604
- function dragstarted(event, d) {
605
- if (!event.active) state.simulation.alphaTarget(0.3).restart();
606
- d.fx = d.x;
607
- d.fy = d.y;
608
- }
609
-
610
- function dragged(event, d) {
611
- d.fx = event.x;
612
- d.fy = event.y;
613
- }
614
-
615
- function dragended(event, d) {
616
- if (!event.active) state.simulation.alphaTarget(0);
617
- d.fx = null;
618
- d.fy = null;
619
- }
620
-
621
- // Start
622
- if (document.readyState === 'loading') {
623
- document.addEventListener('DOMContentLoaded', init);
624
- } else {
625
- init();
626
- }
627
- </script>
628
- </body>
629
- </html>
630
- `;
@@ -1,9 +0,0 @@
1
- import { Graph } from '../graph/graph.js';
2
- export interface HITSOptions {
3
- iterations?: number;
4
- }
5
- /**
6
- * Computes Hub and Authority scores using the HITS algorithm.
7
- * Operates purely on the internal link graph.
8
- */
9
- export declare function computeHITS(graph: Graph, options?: HITSOptions): void;