@crawlith/core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +70 -0
  3. package/dist/analysis/analysis_list.html +35 -0
  4. package/dist/analysis/analysis_page.html +123 -0
  5. package/dist/analysis/analyze.d.ts +40 -5
  6. package/dist/analysis/analyze.js +395 -347
  7. package/dist/analysis/clustering.d.ts +23 -0
  8. package/dist/analysis/clustering.js +206 -0
  9. package/dist/analysis/content.d.ts +1 -1
  10. package/dist/analysis/content.js +11 -5
  11. package/dist/analysis/duplicate.d.ts +34 -0
  12. package/dist/analysis/duplicate.js +305 -0
  13. package/dist/analysis/heading.d.ts +116 -0
  14. package/dist/analysis/heading.js +356 -0
  15. package/dist/analysis/images.d.ts +1 -1
  16. package/dist/analysis/images.js +6 -5
  17. package/dist/analysis/links.d.ts +1 -1
  18. package/dist/analysis/links.js +8 -8
  19. package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
  20. package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
  21. package/dist/analysis/scoring.js +11 -2
  22. package/dist/analysis/seo.d.ts +8 -4
  23. package/dist/analysis/seo.js +41 -30
  24. package/dist/analysis/soft404.d.ts +17 -0
  25. package/dist/analysis/soft404.js +62 -0
  26. package/dist/analysis/structuredData.d.ts +1 -1
  27. package/dist/analysis/structuredData.js +5 -4
  28. package/dist/analysis/templates.d.ts +2 -0
  29. package/dist/analysis/templates.js +7 -0
  30. package/dist/application/index.d.ts +2 -0
  31. package/dist/application/index.js +2 -0
  32. package/dist/application/usecase.d.ts +3 -0
  33. package/dist/application/usecase.js +1 -0
  34. package/dist/application/usecases.d.ts +114 -0
  35. package/dist/application/usecases.js +201 -0
  36. package/dist/audit/index.js +1 -1
  37. package/dist/audit/transport.d.ts +1 -1
  38. package/dist/audit/transport.js +5 -4
  39. package/dist/audit/types.d.ts +1 -0
  40. package/dist/constants.d.ts +17 -0
  41. package/dist/constants.js +23 -0
  42. package/dist/core/scope/scopeManager.js +3 -0
  43. package/dist/core/security/ipGuard.d.ts +11 -0
  44. package/dist/core/security/ipGuard.js +71 -3
  45. package/dist/crawler/crawl.d.ts +4 -22
  46. package/dist/crawler/crawl.js +4 -335
  47. package/dist/crawler/crawler.d.ts +87 -0
  48. package/dist/crawler/crawler.js +683 -0
  49. package/dist/crawler/extract.d.ts +4 -1
  50. package/dist/crawler/extract.js +7 -2
  51. package/dist/crawler/fetcher.d.ts +2 -1
  52. package/dist/crawler/fetcher.js +26 -11
  53. package/dist/crawler/metricsRunner.d.ts +23 -1
  54. package/dist/crawler/metricsRunner.js +202 -72
  55. package/dist/crawler/normalize.d.ts +41 -0
  56. package/dist/crawler/normalize.js +119 -3
  57. package/dist/crawler/parser.d.ts +1 -3
  58. package/dist/crawler/parser.js +2 -49
  59. package/dist/crawler/resolver.d.ts +11 -0
  60. package/dist/crawler/resolver.js +67 -0
  61. package/dist/crawler/sitemap.d.ts +6 -0
  62. package/dist/crawler/sitemap.js +27 -17
  63. package/dist/crawler/trap.d.ts +5 -1
  64. package/dist/crawler/trap.js +23 -2
  65. package/dist/db/CrawlithDB.d.ts +110 -0
  66. package/dist/db/CrawlithDB.js +500 -0
  67. package/dist/db/graphLoader.js +42 -30
  68. package/dist/db/index.d.ts +11 -0
  69. package/dist/db/index.js +41 -29
  70. package/dist/db/migrations.d.ts +2 -0
  71. package/dist/db/{schema.js → migrations.js} +90 -43
  72. package/dist/db/pluginRegistry.d.ts +9 -0
  73. package/dist/db/pluginRegistry.js +19 -0
  74. package/dist/db/repositories/EdgeRepository.d.ts +13 -0
  75. package/dist/db/repositories/EdgeRepository.js +20 -0
  76. package/dist/db/repositories/MetricsRepository.d.ts +16 -8
  77. package/dist/db/repositories/MetricsRepository.js +28 -7
  78. package/dist/db/repositories/PageRepository.d.ts +15 -2
  79. package/dist/db/repositories/PageRepository.js +169 -25
  80. package/dist/db/repositories/SiteRepository.d.ts +9 -0
  81. package/dist/db/repositories/SiteRepository.js +13 -0
  82. package/dist/db/repositories/SnapshotRepository.d.ts +14 -5
  83. package/dist/db/repositories/SnapshotRepository.js +64 -5
  84. package/dist/db/reset.d.ts +9 -0
  85. package/dist/db/reset.js +32 -0
  86. package/dist/db/statements.d.ts +12 -0
  87. package/dist/db/statements.js +40 -0
  88. package/dist/diff/compare.d.ts +0 -5
  89. package/dist/diff/compare.js +0 -12
  90. package/dist/diff/service.d.ts +16 -0
  91. package/dist/diff/service.js +41 -0
  92. package/dist/domain/index.d.ts +4 -0
  93. package/dist/domain/index.js +4 -0
  94. package/dist/events.d.ts +56 -0
  95. package/dist/events.js +1 -0
  96. package/dist/graph/graph.d.ts +36 -42
  97. package/dist/graph/graph.js +26 -17
  98. package/dist/graph/hits.d.ts +23 -0
  99. package/dist/graph/hits.js +111 -0
  100. package/dist/graph/metrics.d.ts +0 -4
  101. package/dist/graph/metrics.js +25 -9
  102. package/dist/graph/pagerank.d.ts +17 -4
  103. package/dist/graph/pagerank.js +126 -91
  104. package/dist/graph/simhash.d.ts +6 -0
  105. package/dist/graph/simhash.js +14 -0
  106. package/dist/index.d.ts +29 -8
  107. package/dist/index.js +29 -8
  108. package/dist/lock/hashKey.js +1 -1
  109. package/dist/lock/lockManager.d.ts +5 -1
  110. package/dist/lock/lockManager.js +38 -13
  111. package/dist/plugin-system/plugin-cli.d.ts +10 -0
  112. package/dist/plugin-system/plugin-cli.js +31 -0
  113. package/dist/plugin-system/plugin-config.d.ts +16 -0
  114. package/dist/plugin-system/plugin-config.js +36 -0
  115. package/dist/plugin-system/plugin-loader.d.ts +17 -0
  116. package/dist/plugin-system/plugin-loader.js +122 -0
  117. package/dist/plugin-system/plugin-registry.d.ts +25 -0
  118. package/dist/plugin-system/plugin-registry.js +167 -0
  119. package/dist/plugin-system/plugin-types.d.ts +205 -0
  120. package/dist/plugin-system/plugin-types.js +1 -0
  121. package/dist/ports/index.d.ts +9 -0
  122. package/dist/ports/index.js +1 -0
  123. package/{src/report/sitegraph_template.ts → dist/report/crawl.html} +330 -81
  124. package/dist/report/crawlExport.d.ts +3 -0
  125. package/dist/report/{sitegraphExport.js → crawlExport.js} +3 -3
  126. package/dist/report/crawl_template.d.ts +1 -0
  127. package/dist/report/crawl_template.js +7 -0
  128. package/dist/report/export.d.ts +3 -0
  129. package/dist/report/export.js +81 -0
  130. package/dist/report/html.js +15 -216
  131. package/dist/report/insight.d.ts +27 -0
  132. package/dist/report/insight.js +103 -0
  133. package/dist/scoring/health.d.ts +56 -0
  134. package/dist/scoring/health.js +213 -0
  135. package/dist/utils/chalk.d.ts +6 -0
  136. package/dist/utils/chalk.js +41 -0
  137. package/dist/utils/secureConfig.d.ts +23 -0
  138. package/dist/utils/secureConfig.js +128 -0
  139. package/package.json +12 -6
  140. package/CHANGELOG.md +0 -7
  141. package/dist/db/schema.d.ts +0 -2
  142. package/dist/graph/cluster.d.ts +0 -6
  143. package/dist/graph/cluster.js +0 -173
  144. package/dist/graph/duplicate.d.ts +0 -10
  145. package/dist/graph/duplicate.js +0 -251
  146. package/dist/report/sitegraphExport.d.ts +0 -3
  147. package/dist/report/sitegraph_template.d.ts +0 -1
  148. package/dist/report/sitegraph_template.js +0 -630
  149. package/dist/scoring/hits.d.ts +0 -9
  150. package/dist/scoring/hits.js +0 -111
  151. package/src/analysis/analyze.ts +0 -548
  152. package/src/analysis/content.ts +0 -62
  153. package/src/analysis/images.ts +0 -28
  154. package/src/analysis/links.ts +0 -41
  155. package/src/analysis/scoring.ts +0 -59
  156. package/src/analysis/seo.ts +0 -82
  157. package/src/analysis/structuredData.ts +0 -62
  158. package/src/audit/dns.ts +0 -49
  159. package/src/audit/headers.ts +0 -98
  160. package/src/audit/index.ts +0 -66
  161. package/src/audit/scoring.ts +0 -232
  162. package/src/audit/transport.ts +0 -258
  163. package/src/audit/types.ts +0 -102
  164. package/src/core/network/proxyAdapter.ts +0 -21
  165. package/src/core/network/rateLimiter.ts +0 -39
  166. package/src/core/network/redirectController.ts +0 -47
  167. package/src/core/network/responseLimiter.ts +0 -34
  168. package/src/core/network/retryPolicy.ts +0 -57
  169. package/src/core/scope/domainFilter.ts +0 -45
  170. package/src/core/scope/scopeManager.ts +0 -52
  171. package/src/core/scope/subdomainPolicy.ts +0 -39
  172. package/src/core/security/ipGuard.ts +0 -92
  173. package/src/crawler/crawl.ts +0 -382
  174. package/src/crawler/extract.ts +0 -34
  175. package/src/crawler/fetcher.ts +0 -233
  176. package/src/crawler/metricsRunner.ts +0 -124
  177. package/src/crawler/normalize.ts +0 -108
  178. package/src/crawler/parser.ts +0 -190
  179. package/src/crawler/sitemap.ts +0 -73
  180. package/src/crawler/trap.ts +0 -96
  181. package/src/db/graphLoader.ts +0 -105
  182. package/src/db/index.ts +0 -70
  183. package/src/db/repositories/EdgeRepository.ts +0 -29
  184. package/src/db/repositories/MetricsRepository.ts +0 -49
  185. package/src/db/repositories/PageRepository.ts +0 -128
  186. package/src/db/repositories/SiteRepository.ts +0 -32
  187. package/src/db/repositories/SnapshotRepository.ts +0 -74
  188. package/src/db/schema.ts +0 -177
  189. package/src/diff/compare.ts +0 -84
  190. package/src/graph/cluster.ts +0 -192
  191. package/src/graph/duplicate.ts +0 -286
  192. package/src/graph/graph.ts +0 -172
  193. package/src/graph/metrics.ts +0 -110
  194. package/src/graph/pagerank.ts +0 -125
  195. package/src/graph/simhash.ts +0 -61
  196. package/src/index.ts +0 -30
  197. package/src/lock/hashKey.ts +0 -51
  198. package/src/lock/lockManager.ts +0 -124
  199. package/src/lock/pidCheck.ts +0 -13
  200. package/src/report/html.ts +0 -227
  201. package/src/report/sitegraphExport.ts +0 -58
  202. package/src/scoring/hits.ts +0 -131
  203. package/src/scoring/orphanSeverity.ts +0 -176
  204. package/src/utils/version.ts +0 -18
  205. package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
  206. package/tests/analysis.unit.test.ts +0 -98
  207. package/tests/analyze.integration.test.ts +0 -98
  208. package/tests/audit/dns.test.ts +0 -31
  209. package/tests/audit/headers.test.ts +0 -45
  210. package/tests/audit/scoring.test.ts +0 -133
  211. package/tests/audit/security.test.ts +0 -12
  212. package/tests/audit/transport.test.ts +0 -112
  213. package/tests/clustering.test.ts +0 -118
  214. package/tests/crawler.test.ts +0 -358
  215. package/tests/db.test.ts +0 -159
  216. package/tests/diff.test.ts +0 -67
  217. package/tests/duplicate.test.ts +0 -110
  218. package/tests/fetcher.test.ts +0 -106
  219. package/tests/fetcher_safety.test.ts +0 -85
  220. package/tests/fixtures/analyze-crawl.json +0 -26
  221. package/tests/hits.test.ts +0 -134
  222. package/tests/html_report.test.ts +0 -58
  223. package/tests/lock/lockManager.test.ts +0 -138
  224. package/tests/metrics.test.ts +0 -196
  225. package/tests/normalize.test.ts +0 -101
  226. package/tests/orphanSeverity.test.ts +0 -160
  227. package/tests/pagerank.test.ts +0 -98
  228. package/tests/parser.test.ts +0 -117
  229. package/tests/proxy_safety.test.ts +0 -57
  230. package/tests/redirect_safety.test.ts +0 -73
  231. package/tests/safety.test.ts +0 -114
  232. package/tests/scope.test.ts +0 -66
  233. package/tests/scoring.test.ts +0 -59
  234. package/tests/sitemap.test.ts +0 -88
  235. package/tests/soft404.test.ts +0 -41
  236. package/tests/trap.test.ts +0 -39
  237. package/tests/visualization_data.test.ts +0 -46
  238. package/tsconfig.json +0 -11
@@ -1,5 +1,6 @@
1
- export const SITEGRAPH_HTML = `<!DOCTYPE html>
1
+ <!DOCTYPE html>
2
2
  <html lang="en">
3
+
3
4
  <head>
4
5
  <meta charset="UTF-8">
5
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
@@ -13,81 +14,329 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
13
14
  --accent-color: #4a90e2;
14
15
  --sidebar-width: 300px;
15
16
  }
16
- body { margin: 0; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; background: var(--bg-color); color: var(--text-color); height: 100vh; display: flex; flex-direction: column; overflow: hidden; }
17
+
18
+ body {
19
+ margin: 0;
20
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
21
+ background: var(--bg-color);
22
+ color: var(--text-color);
23
+ height: 100vh;
24
+ display: flex;
25
+ flex-direction: column;
26
+ overflow: hidden;
27
+ }
17
28
 
18
29
  /* Layout */
19
- header { padding: 0 20px; background: var(--panel-bg); border-bottom: 1px solid var(--border-color); display: flex; justify-content: space-between; align-items: center; height: 60px; box-sizing: border-box; z-index: 10; }
20
- main { flex: 1; display: flex; overflow: hidden; position: relative; }
21
- #graph-container { flex: 1; position: relative; overflow: hidden; background: var(--bg-color); }
22
- #details-panel { width: var(--sidebar-width); background: var(--panel-bg); border-left: 1px solid var(--border-color); padding: 20px; overflow-y: auto; box-sizing: border-box; display: none; flex-direction: column; gap: 15px; }
23
- #details-panel.visible { display: flex; }
24
- footer { padding: 5px 20px; background: var(--panel-bg); border-top: 1px solid var(--border-color); font-size: 0.8rem; text-align: center; color: #666; height: 30px; display: flex; align-items: center; justify-content: center; }
30
+ header {
31
+ padding: 0 20px;
32
+ background: var(--panel-bg);
33
+ border-bottom: 1px solid var(--border-color);
34
+ display: flex;
35
+ justify-content: space-between;
36
+ align-items: center;
37
+ height: 60px;
38
+ box-sizing: border-box;
39
+ z-index: 10;
40
+ }
41
+
42
+ main {
43
+ flex: 1;
44
+ display: flex;
45
+ overflow: hidden;
46
+ position: relative;
47
+ }
48
+
49
+ #graph-container {
50
+ flex: 1;
51
+ position: relative;
52
+ overflow: hidden;
53
+ background: var(--bg-color);
54
+ }
55
+
56
+ #details-panel {
57
+ width: var(--sidebar-width);
58
+ background: var(--panel-bg);
59
+ border-left: 1px solid var(--border-color);
60
+ padding: 20px;
61
+ overflow-y: auto;
62
+ box-sizing: border-box;
63
+ display: none;
64
+ flex-direction: column;
65
+ gap: 15px;
66
+ }
67
+
68
+ #details-panel.visible {
69
+ display: flex;
70
+ }
71
+
72
+ footer {
73
+ padding: 5px 20px;
74
+ background: var(--panel-bg);
75
+ border-top: 1px solid var(--border-color);
76
+ font-size: 0.8rem;
77
+ text-align: center;
78
+ color: #666;
79
+ height: 30px;
80
+ display: flex;
81
+ align-items: center;
82
+ justify-content: center;
83
+ }
25
84
 
26
85
  /* Header Components */
27
- .brand { font-weight: bold; font-size: 1.2rem; display: flex; align-items: center; gap: 10px; }
28
- .brand span { color: var(--accent-color); }
29
- #metrics-summary { font-size: 0.9rem; color: #aaa; display: flex; gap: 20px; }
30
- .metric { display: flex; flex-direction: column; align-items: center; line-height: 1.1; }
31
- .metric-value { font-weight: bold; color: var(--text-color); }
32
- .metric-label { font-size: 0.7rem; }
33
-
34
- #controls { display: flex; gap: 10px; align-items: center; }
35
- .btn-group { display: flex; background: #333; border-radius: 4px; overflow: hidden; }
36
- button { background: transparent; color: #aaa; border: none; padding: 6px 12px; cursor: pointer; font-size: 0.85rem; transition: all 0.2s; }
37
- button:hover { color: white; background: rgba(255,255,255,0.1); }
38
- button.active { background: var(--accent-color); color: white; }
86
+ .brand {
87
+ font-weight: bold;
88
+ font-size: 1.2rem;
89
+ display: flex;
90
+ align-items: center;
91
+ gap: 10px;
92
+ }
93
+
94
+ .brand span {
95
+ color: var(--accent-color);
96
+ }
97
+
98
+ #metrics-summary {
99
+ font-size: 0.9rem;
100
+ color: #aaa;
101
+ display: flex;
102
+ gap: 20px;
103
+ }
104
+
105
+ .metric {
106
+ display: flex;
107
+ flex-direction: column;
108
+ align-items: center;
109
+ line-height: 1.1;
110
+ }
111
+
112
+ .metric-value {
113
+ font-weight: bold;
114
+ color: var(--text-color);
115
+ }
116
+
117
+ .metric-label {
118
+ font-size: 0.7rem;
119
+ }
120
+
121
+ #controls {
122
+ display: flex;
123
+ gap: 10px;
124
+ align-items: center;
125
+ }
126
+
127
+ .btn-group {
128
+ display: flex;
129
+ background: #333;
130
+ border-radius: 4px;
131
+ overflow: hidden;
132
+ }
133
+
134
+ button {
135
+ background: transparent;
136
+ color: #aaa;
137
+ border: none;
138
+ padding: 6px 12px;
139
+ cursor: pointer;
140
+ font-size: 0.85rem;
141
+ transition: all 0.2s;
142
+ }
143
+
144
+ button:hover {
145
+ color: white;
146
+ background: rgba(255, 255, 255, 0.1);
147
+ }
148
+
149
+ button.active {
150
+ background: var(--accent-color);
151
+ color: white;
152
+ }
39
153
 
40
154
  /* Search */
41
- #search-container { position: absolute; top: 15px; left: 15px; z-index: 5; }
42
- #search-input { background: rgba(30,30,30,0.9); border: 1px solid #444; color: white; padding: 8px 12px; border-radius: 20px; width: 200px; outline: none; transition: width 0.3s; }
43
- #search-input:focus { width: 280px; border-color: var(--accent-color); }
155
+ #search-container {
156
+ position: absolute;
157
+ top: 15px;
158
+ left: 15px;
159
+ z-index: 5;
160
+ }
161
+
162
+ #search-input {
163
+ background: rgba(30, 30, 30, 0.9);
164
+ border: 1px solid #444;
165
+ color: white;
166
+ padding: 8px 12px;
167
+ border-radius: 20px;
168
+ width: 200px;
169
+ outline: none;
170
+ transition: width 0.3s;
171
+ }
172
+
173
+ #search-input:focus {
174
+ width: 280px;
175
+ border-color: var(--accent-color);
176
+ }
44
177
 
45
178
  /* Graph */
46
- svg { width: 100%; height: 100%; display: block; }
47
- .node { cursor: pointer; transition: stroke-width 0.1s; }
48
- .link { stroke: #555; stroke-opacity: 0.3; fill: none; pointer-events: none; }
179
+ svg {
180
+ width: 100%;
181
+ height: 100%;
182
+ display: block;
183
+ }
184
+
185
+ .node {
186
+ cursor: pointer;
187
+ transition: stroke-width 0.1s;
188
+ }
189
+
190
+ .link {
191
+ stroke: #555;
192
+ stroke-opacity: 0.3;
193
+ fill: none;
194
+ pointer-events: none;
195
+ }
49
196
 
50
197
  /* Interaction States */
51
- .node.highlight { stroke: #fff; stroke-width: 2px; }
52
- .link.highlight { stroke-opacity: 0.8; stroke: #999; }
53
- .node.faded { opacity: 0.1; }
54
- .link.faded { opacity: 0.05; }
198
+ .node.highlight {
199
+ stroke: #fff;
200
+ stroke-width: 2px;
201
+ }
202
+
203
+ .link.highlight {
204
+ stroke-opacity: 0.8;
205
+ stroke: #999;
206
+ }
207
+
208
+ .node.faded {
209
+ opacity: 0.1;
210
+ }
211
+
212
+ .link.faded {
213
+ opacity: 0.05;
214
+ }
55
215
 
56
216
  /* Details Panel Content */
57
- .detail-section { border-bottom: 1px solid #333; padding-bottom: 10px; }
58
- .detail-section:last-child { border-bottom: none; }
59
- .detail-label { font-size: 0.75rem; color: #888; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 4px; }
60
- .detail-value { font-size: 0.95rem; word-break: break-all; }
61
- .detail-list { list-style: none; padding: 0; margin: 0; max-height: 150px; overflow-y: auto; font-size: 0.85rem; }
62
- .detail-list li { padding: 4px 0; border-bottom: 1px solid #2a2a2a; }
63
- .detail-list a { color: var(--accent-color); text-decoration: none; }
64
- .detail-list a:hover { text-decoration: underline; }
65
-
66
- .status-badge { display: inline-block; padding: 2px 6px; border-radius: 3px; font-size: 0.75rem; font-weight: bold; margin-top: 5px; }
67
- .status-ok { background: #2e7d32; color: white; }
68
- .status-warn { background: #f9a825; color: black; }
69
- .status-error { background: #c62828; color: white; }
217
+ .detail-section {
218
+ border-bottom: 1px solid #333;
219
+ padding-bottom: 10px;
220
+ }
221
+
222
+ .detail-section:last-child {
223
+ border-bottom: none;
224
+ }
225
+
226
+ .detail-label {
227
+ font-size: 0.75rem;
228
+ color: #888;
229
+ text-transform: uppercase;
230
+ letter-spacing: 0.5px;
231
+ margin-bottom: 4px;
232
+ }
233
+
234
+ .detail-value {
235
+ font-size: 0.95rem;
236
+ word-break: break-all;
237
+ }
238
+
239
+ .detail-list {
240
+ list-style: none;
241
+ padding: 0;
242
+ margin: 0;
243
+ max-height: 150px;
244
+ overflow-y: auto;
245
+ font-size: 0.85rem;
246
+ }
247
+
248
+ .detail-list li {
249
+ padding: 4px 0;
250
+ border-bottom: 1px solid #2a2a2a;
251
+ }
252
+
253
+ .detail-list a {
254
+ color: var(--accent-color);
255
+ text-decoration: none;
256
+ }
257
+
258
+ .detail-list a:hover {
259
+ text-decoration: underline;
260
+ }
261
+
262
+ .status-badge {
263
+ display: inline-block;
264
+ padding: 2px 6px;
265
+ border-radius: 3px;
266
+ font-size: 0.75rem;
267
+ font-weight: bold;
268
+ margin-top: 5px;
269
+ }
270
+
271
+ .status-ok {
272
+ background: #2e7d32;
273
+ color: white;
274
+ }
275
+
276
+ .status-warn {
277
+ background: #f9a825;
278
+ color: black;
279
+ }
280
+
281
+ .status-error {
282
+ background: #c62828;
283
+ color: white;
284
+ }
70
285
 
71
286
  /* Tooltip */
72
- #tooltip { position: absolute; background: rgba(20,20,20,0.95); color: white; padding: 10px; border-radius: 6px; pointer-events: none; font-size: 12px; z-index: 100; box-shadow: 0 4px 15px rgba(0,0,0,0.5); border: 1px solid #444; display: none; transform: translate(-50%, -100%); margin-top: -10px; white-space: nowrap; }
287
+ #tooltip {
288
+ position: absolute;
289
+ background: rgba(20, 20, 20, 0.95);
290
+ color: white;
291
+ padding: 10px;
292
+ border-radius: 6px;
293
+ pointer-events: none;
294
+ font-size: 12px;
295
+ z-index: 100;
296
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.5);
297
+ border: 1px solid #444;
298
+ display: none;
299
+ transform: translate(-50%, -100%);
300
+ margin-top: -10px;
301
+ white-space: nowrap;
302
+ }
73
303
 
74
304
  /* Responsive Sidebar */
75
305
  @media (max-width: 768px) {
76
- #details-panel { position: absolute; right: 0; top: 0; bottom: 0; z-index: 20; box-shadow: -5px 0 15px rgba(0,0,0,0.5); transform: translateX(100%); transition: transform 0.3s ease; }
77
- #details-panel.visible { transform: translateX(0); }
78
- #metrics-summary { display: none; }
306
+ #details-panel {
307
+ position: absolute;
308
+ right: 0;
309
+ top: 0;
310
+ bottom: 0;
311
+ z-index: 20;
312
+ box-shadow: -5px 0 15px rgba(0, 0, 0, 0.5);
313
+ transform: translateX(100%);
314
+ transition: transform 0.3s ease;
315
+ }
316
+
317
+ #details-panel.visible {
318
+ transform: translateX(0);
319
+ }
320
+
321
+ #metrics-summary {
322
+ display: none;
323
+ }
79
324
  }
80
325
  </style>
81
326
  </head>
327
+
82
328
  <body>
83
329
  <header>
84
- <div class="brand"><span>Crawlith</span> SiteGraph</div>
330
+ <div class="brand"><span>Crawlith</span> Crawl</div>
85
331
 
86
332
  <div id="metrics-summary">
87
333
  <div class="metric"><span class="metric-value" id="m-pages">-</span><span class="metric-label">Pages</span></div>
88
- <div class="metric"><span class="metric-value" id="m-depth">-</span><span class="metric-label">Max Depth</span></div>
89
- <div class="metric"><span class="metric-value" id="m-eff">-</span><span class="metric-label">Efficiency</span></div>
90
- <div class="metric"><span class="metric-value" id="m-orphan">-</span><span class="metric-label">Orphans</span></div>
334
+ <div class="metric"><span class="metric-value" id="m-depth">-</span><span class="metric-label">Max Depth</span>
335
+ </div>
336
+ <div class="metric"><span class="metric-value" id="m-eff">-</span><span class="metric-label">Efficiency</span>
337
+ </div>
338
+ <div class="metric"><span class="metric-value" id="m-orphan">-</span><span class="metric-label">Orphans</span>
339
+ </div>
91
340
  </div>
92
341
 
93
342
  <div id="controls">
@@ -129,7 +378,7 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
129
378
  </div>
130
379
  <div class="detail-section">
131
380
  <div class="detail-label">In-links (<span id="d-in-count">0</span>)</div>
132
- <!-- List could be populated here if we had the reverse index, for now just count -->
381
+ <!-- List could be populated here if we had the reverse index, for now just count -->
133
382
  </div>
134
383
  <div class="detail-section">
135
384
  <div class="detail-label">Out-links (<span id="d-out-count">0</span>)</div>
@@ -187,22 +436,22 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
187
436
 
188
437
  // 2. Fallback to fetching JSON files (for web server usage)
189
438
  if (!graphData || !metricsData) {
190
- try {
191
- const [graphRes, metricsRes] = await Promise.all([
192
- fetch('graph.json'),
193
- fetch('metrics.json')
194
- ]);
195
- if (graphRes.ok && metricsRes.ok) {
196
- graphData = await graphRes.json();
197
- metricsData = await metricsRes.json();
198
- }
199
- } catch (e) {
200
- console.warn("Fetch failed, possibly due to CORS or missing files.", e);
439
+ try {
440
+ const [graphRes, metricsRes] = await Promise.all([
441
+ fetch('graph.json'),
442
+ fetch('metrics.json')
443
+ ]);
444
+ if (graphRes.ok && metricsRes.ok) {
445
+ graphData = await graphRes.json();
446
+ metricsData = await metricsRes.json();
201
447
  }
448
+ } catch (e) {
449
+ console.warn("Fetch failed, possibly due to CORS or missing files.", e);
450
+ }
202
451
  }
203
452
 
204
453
  if (!graphData || !metricsData) {
205
- throw new Error("No data available. Ensure graph.json exists or data is injected.");
454
+ throw new Error("No data available. Ensure graph.json exists or data is injected.");
206
455
  }
207
456
 
208
457
  state.metrics = metricsData;
@@ -323,12 +572,12 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
323
572
  const depthSpacing = height / (state.maxDepth + 2);
324
573
  // Hierarchical: Nodes pushed to Y levels based on depth
325
574
  state.simulation.force("y", d3.forceY(d => {
326
- return (d.depth * depthSpacing) - (height/2) + 50; // Offset to start from top
575
+ return (d.depth * depthSpacing) - (height / 2) + 50; // Offset to start from top
327
576
  }).strength(1));
328
577
  // We rely on "center" force to keep X centered, but maybe add weak forceX?
329
578
  // Let's add weak forceX to prevent wide spread
330
579
  state.simulation.force("x", d3.forceX(0).strength(0.05));
331
- state.simulation.force("center", d3.forceCenter(width/2, height/2)); // Recenter
580
+ state.simulation.force("center", d3.forceCenter(width / 2, height / 2)); // Recenter
332
581
 
333
582
  } else if (mode === 'radial') {
334
583
  const maxRadius = Math.min(width, height) / 2 - 50;
@@ -401,7 +650,7 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
401
650
  });
402
651
 
403
652
  svg.call(state.zoom)
404
- .call(state.zoom.transform, d3.zoomIdentity.translate(state.width/2, state.height/2).scale(0.8).translate(-state.width/2, -state.height/2)); // Initial zoom out
653
+ .call(state.zoom.transform, d3.zoomIdentity.translate(state.width / 2, state.height / 2).scale(0.8).translate(-state.width / 2, -state.height / 2)); // Initial zoom out
405
654
  }
406
655
 
407
656
  function ticked() {
@@ -431,18 +680,18 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
431
680
  });
432
681
 
433
682
  // Layout Toggle
434
- d3.select("#btn-hierarchical").on("click", function() {
683
+ d3.select("#btn-hierarchical").on("click", function () {
435
684
  setMode('hierarchical', this);
436
685
  });
437
- d3.select("#btn-radial").on("click", function() {
686
+ d3.select("#btn-radial").on("click", function () {
438
687
  setMode('radial', this);
439
688
  });
440
689
 
441
690
  // Authority Toggle
442
- d3.select("#btn-auth-pagerank").on("click", function() {
691
+ d3.select("#btn-auth-pagerank").on("click", function () {
443
692
  setAuthorityMode('pagerank', this);
444
693
  });
445
- d3.select("#btn-auth-structural").on("click", function() {
694
+ d3.select("#btn-auth-structural").on("click", function () {
446
695
  setAuthorityMode('structural', this);
447
696
  });
448
697
  }
@@ -508,7 +757,7 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
508
757
  function showTooltip(event, d) {
509
758
  // If we are transforming the container, we need to map coordinates correctly or just use pageX/Y
510
759
  tooltip.style("display", "block")
511
- .html(\`<strong>\${new URL(d.url).pathname}</strong><br>Auth: \${(d.authority * 10).toFixed(1)}\`)
760
+ .html(`<strong>${new URL(d.url).pathname}</strong><br>Auth: ${(d.authority * 10).toFixed(1)}`)
512
761
  .style("left", (event.pageX) + "px")
513
762
  .style("top", (event.pageY - 10) + "px");
514
763
  }
@@ -526,8 +775,8 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
526
775
  authContainer.html("");
527
776
  const prVal = (d.pageRankAuthority * 100).toFixed(1);
528
777
  const structVal = d.structuralAuthority.toFixed(3);
529
- authContainer.append("div").html(\`PR: <strong>\${prVal}</strong>\`);
530
- authContainer.append("div").style("color", "#888").style("font-size", "0.8em").text(\`In-Degree: \${structVal}\`);
778
+ authContainer.append("div").html(`PR: <strong>${prVal}</strong>`);
779
+ authContainer.append("div").style("color", "#888").style("font-size", "0.8em").text(`In-Degree: ${structVal}`);
531
780
 
532
781
  d3.select("#d-in-count").text(d.inLinks);
533
782
  d3.select("#d-out-count").text(d.outLinks);
@@ -552,7 +801,7 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
552
801
  .text(new URL(target.url).pathname);
553
802
  });
554
803
  if (adj.out.length > 50) {
555
- list.append("li").text(\`...and \${adj.out.length - 50} more\`);
804
+ list.append("li").text(`...and ${adj.out.length - 50} more`);
556
805
  }
557
806
  } else {
558
807
  list.append("li").text("No outgoing links");
@@ -572,7 +821,7 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
572
821
  selectNode(found);
573
822
  // Center view on node
574
823
  const transform = d3.zoomIdentity
575
- .translate(state.width/2, state.height/2)
824
+ .translate(state.width / 2, state.height / 2)
576
825
  .scale(2)
577
826
  .translate(-found.x, -found.y);
578
827
 
@@ -589,7 +838,7 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
589
838
  state.height = height;
590
839
  state.simulation.force("center", d3.forceCenter(width / 2, height / 2));
591
840
  if (state.mode === 'hierarchical') {
592
- // Re-evaluate Y force if needed, but usually center is enough
841
+ // Re-evaluate Y force if needed, but usually center is enough
593
842
  }
594
843
  state.simulation.alpha(0.3).restart();
595
844
  });
@@ -620,11 +869,11 @@ export const SITEGRAPH_HTML = `<!DOCTYPE html>
620
869
 
621
870
  // Start
622
871
  if (document.readyState === 'loading') {
623
- document.addEventListener('DOMContentLoaded', init);
872
+ document.addEventListener('DOMContentLoaded', init);
624
873
  } else {
625
- init();
874
+ init();
626
875
  }
627
876
  </script>
628
877
  </body>
629
- </html>
630
- `;
878
+
879
+ </html>
@@ -0,0 +1,3 @@
1
+ export declare function renderCrawlCsvNodes(graphData: any): string;
2
+ export declare function renderCrawlCsvEdges(graphData: any): string;
3
+ export declare function renderCrawlMarkdown(url: string, graphData: any, metrics: any, graph: any): string;
@@ -1,4 +1,4 @@
1
- export function renderSitegraphCsvNodes(graphData) {
1
+ export function renderCrawlCsvNodes(graphData) {
2
2
  const nodeHeaders = ['URL', 'Depth', 'Status', 'InboundLinks', 'OutboundLinks', 'PageRankScore'];
3
3
  const nodeRows = graphData.nodes.map((n) => {
4
4
  const outbound = graphData.edges.filter((e) => e.source === n.url).length;
@@ -8,12 +8,12 @@ export function renderSitegraphCsvNodes(graphData) {
8
8
  });
9
9
  return [nodeHeaders.join(','), ...nodeRows].join('\n');
10
10
  }
11
- export function renderSitegraphCsvEdges(graphData) {
11
+ export function renderCrawlCsvEdges(graphData) {
12
12
  const edgeHeaders = ['Source', 'Target', 'Weight'];
13
13
  const edgeRows = graphData.edges.map((e) => [e.source, e.target, e.weight].join(','));
14
14
  return [edgeHeaders.join(','), ...edgeRows].join('\n');
15
15
  }
16
- export function renderSitegraphMarkdown(url, graphData, metrics, graph) {
16
+ export function renderCrawlMarkdown(url, graphData, metrics, graph) {
17
17
  const md = [
18
18
  `# Crawlith Crawl Summary - ${url}`,
19
19
  '',
@@ -0,0 +1 @@
1
+ export declare const Crawl_HTML: string;
@@ -0,0 +1,7 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { fileURLToPath } from 'node:url';
4
+ const __filename = fileURLToPath(import.meta.url);
5
+ const __dirname = path.dirname(__filename);
6
+ const templatePath = path.join(__dirname, 'crawl.html');
7
+ export const Crawl_HTML = fs.readFileSync(templatePath, 'utf-8');
@@ -0,0 +1,3 @@
1
+ export declare function parseExportFormats(exportOption: string | boolean | undefined): string[];
2
+ export declare function runCrawlExports(formats: string[], outputDir: string, url: string, graphData: any, metrics: any, graphObj: any, report?: any): Promise<void>;
3
+ export declare function runAnalysisExports(formats: string[], outputDir: string, result: any, isLive: boolean): Promise<void>;
@@ -0,0 +1,81 @@
1
+ import path from 'node:path';
2
+ import fs from 'node:fs/promises';
3
+ import chalk from '../utils/chalk.js';
4
+ import { generateHtml, } from './html.js';
5
+ import { renderCrawlMarkdown, renderCrawlCsvNodes, renderCrawlCsvEdges } from './crawlExport.js';
6
+ import { renderAnalysisHtml, renderAnalysisMarkdown, renderAnalysisCsv } from '../analysis/analyze.js';
7
+ export function parseExportFormats(exportOption) {
8
+ if (exportOption === undefined || exportOption === false)
9
+ return [];
10
+ if (exportOption === true)
11
+ return ['json'];
12
+ return exportOption.split(',').map(s => s.trim().toLowerCase()).filter(Boolean);
13
+ }
14
+ export async function runCrawlExports(formats, outputDir, url, graphData, metrics, graphObj, report) {
15
+ if (formats.length === 0)
16
+ return;
17
+ await fs.mkdir(outputDir, { recursive: true });
18
+ if (formats.includes('json')) {
19
+ await fs.writeFile(path.join(outputDir, 'graph.json'), JSON.stringify(graphData, null, 2));
20
+ await fs.writeFile(path.join(outputDir, 'metrics.json'), JSON.stringify(metrics, null, 2));
21
+ if (report) {
22
+ await fs.writeFile(path.join(outputDir, 'report.json'), JSON.stringify(report, null, 2));
23
+ }
24
+ console.log(chalk.green(`JSON exports saved to ${outputDir} (graph.json, metrics.json${report ? ', report.json' : ''})`));
25
+ }
26
+ if (formats.includes('html')) {
27
+ const html = generateHtml(graphData, metrics);
28
+ await fs.writeFile(path.join(outputDir, 'graph.html'), html);
29
+ console.log(chalk.green(`HTML report saved to ${path.join(outputDir, 'graph.html')}`));
30
+ }
31
+ if (formats.includes('visualize')) {
32
+ const CrawlHtml = generateHtml(graphData, metrics);
33
+ await fs.writeFile(path.join(outputDir, 'crawl.html'), CrawlHtml);
34
+ console.log(chalk.green(`Visualization saved to ${path.join(outputDir, 'crawl.html')}`));
35
+ }
36
+ if (formats.includes('csv')) {
37
+ await fs.writeFile(path.join(outputDir, 'nodes.csv'), renderCrawlCsvNodes(graphData));
38
+ await fs.writeFile(path.join(outputDir, 'edges.csv'), renderCrawlCsvEdges(graphData));
39
+ console.log(chalk.green(`CSV exports saved to ${outputDir} (nodes.csv, edges.csv)`));
40
+ }
41
+ if (formats.includes('markdown')) {
42
+ const md = renderCrawlMarkdown(url, graphData, metrics, graphObj);
43
+ await fs.writeFile(path.join(outputDir, 'summary.md'), md);
44
+ console.log(chalk.green(`Markdown summary saved to ${path.join(outputDir, 'summary.md')}`));
45
+ if (report && report.plugins) {
46
+ for (const [pluginName, pluginData] of Object.entries(report.plugins)) {
47
+ // Ensure Exporter remains generic without plugin-specific logical branches
48
+ const serialized = JSON.stringify(pluginData, null, 2);
49
+ const pluginMd = `\n## Plugin: ${pluginName}\n\n\`\`\`json\n${serialized}\n\`\`\`\n`;
50
+ await fs.appendFile(path.join(outputDir, 'summary.md'), pluginMd);
51
+ }
52
+ }
53
+ }
54
+ }
55
+ export async function runAnalysisExports(formats, outputDir, result, isLive) {
56
+ if (formats.length === 0)
57
+ return;
58
+ await fs.mkdir(outputDir, { recursive: true });
59
+ if (formats.includes('json')) {
60
+ await fs.writeFile(path.join(outputDir, 'analysis.json'), JSON.stringify(result, null, 2));
61
+ console.log(chalk.green(`JSON export saved to ${path.join(outputDir, 'analysis.json')}`));
62
+ }
63
+ if (formats.includes('html')) {
64
+ const html = renderAnalysisHtml(result);
65
+ const filename = isLive ? 'page.html' : 'analysis.html';
66
+ await fs.writeFile(path.join(outputDir, filename), html, 'utf-8');
67
+ console.log(chalk.green(`HTML report saved to ${path.join(outputDir, filename)}`));
68
+ }
69
+ if (formats.includes('markdown')) {
70
+ const markdown = renderAnalysisMarkdown(result);
71
+ const filename = isLive ? 'analysis.md' : 'analysis.md';
72
+ await fs.writeFile(path.join(outputDir, filename), markdown, 'utf-8');
73
+ console.log(chalk.green(`Markdown report saved to ${path.join(outputDir, filename)}`));
74
+ }
75
+ if (formats.includes('csv')) {
76
+ const csv = renderAnalysisCsv(result);
77
+ const filename = isLive ? 'analysis.csv' : 'analysis.csv';
78
+ await fs.writeFile(path.join(outputDir, filename), csv, 'utf-8');
79
+ console.log(chalk.green(`CSV export saved to ${path.join(outputDir, filename)}`));
80
+ }
81
+ }