spec-up-t-healthcheck 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,361 @@
1
+ /**
2
+ * @fileoverview Link checker health check module
3
+ *
4
+ * This module validates links in the generated HTML output file using the linkinator
5
+ * package. It checks all links (internal and external) for broken links, redirects,
6
+ * and other issues. The check requires the output_path from specs.json to locate
7
+ * the index.html file to scan.
8
+ *
9
+ * Key features:
10
+ * - Uses the well-tested linkinator package (113k+ weekly downloads)
11
+ * - Checks both internal and external links
12
+ * - Handles redirects gracefully
13
+ * - Provides detailed information about broken links
14
+ * - Categorizes issues by severity (broken, timeout, errors)
15
+ *
16
+ * @author spec-up-t-healthcheck
17
+ */
18
+
19
+ import { LinkChecker } from 'linkinator';
20
+ import { createHealthCheckResult, createErrorResult } from '../health-check-utils.js';
21
+ import path from 'path';
22
+
23
+ /**
24
+ * The identifier for this health check, used in reports and registries.
25
+ * @type {string}
26
+ */
27
+ export const CHECK_ID = 'link-checker';
28
+
29
+ /**
30
+ * Human-readable name for this health check.
31
+ * @type {string}
32
+ */
33
+ export const CHECK_NAME = 'Link checker';
34
+
35
+ /**
36
+ * Description of what this health check validates.
37
+ * @type {string}
38
+ */
39
+ export const CHECK_DESCRIPTION = 'Validates all links in the generated HTML output using linkinator';
40
+
41
+ /**
42
+ * Timeout for link checking in milliseconds.
43
+ * Set to 30 seconds to handle slow external links.
44
+ * @type {number}
45
+ */
46
+ const LINK_CHECK_TIMEOUT = 30000;
47
+
48
+ /**
49
+ * Maximum number of concurrent link checks.
50
+ * Prevents overwhelming servers with too many simultaneous requests.
51
+ * @type {number}
52
+ */
53
+ const MAX_CONCURRENCY = 25;
54
+
55
+ /**
56
+ * Link states that indicate broken links.
57
+ * @type {readonly string[]}
58
+ */
59
+ const BROKEN_STATES = Object.freeze(['BROKEN']);
60
+
61
+ /**
62
+ * Link states that indicate skipped links.
63
+ * @type {readonly string[]}
64
+ */
65
+ const SKIPPED_STATES = Object.freeze(['SKIPPED']);
66
+
67
+ /**
68
+ * Validates all links in the generated HTML output file.
69
+ *
70
+ * This health check scans the index.html file in the output_path directory
71
+ * specified in specs.json. It uses the linkinator package to check all links
72
+ * (both internal and external) and reports any broken links, timeouts, or errors.
73
+ *
74
+ * The check performs the following validations:
75
+ * - Verifies specs.json exists and is valid JSON
76
+ * - Extracts output_path from specs.json
77
+ * - Checks that the output directory exists
78
+ * - Verifies index.html exists in the output directory
79
+ * - Scans all links in the HTML file
80
+ * - Reports broken links with details (URL, status code, parent page)
81
+ * - Categorizes issues by type (404, 500 errors, timeouts, etc.)
82
+ *
83
+ * @param {import('../providers.js').Provider} provider - The provider instance for file operations
84
+ * @returns {Promise<import('../health-check-utils.js').HealthCheckResult>} The health check result with link validation details
85
+ *
86
+ * @example
87
+ * ```javascript
88
+ * const provider = createLocalProvider('/path/to/repo');
89
+ * const result = await checkLinks(provider);
90
+ * console.log(result.status); // 'pass', 'fail', or 'warn'
91
+ * console.log(result.details.brokenLinks); // Array of broken links
92
+ * ```
93
+ */
94
+ export async function checkLinks(provider) {
95
+ try {
96
+ // Step 1: Read and parse specs.json
97
+ const specsExists = await provider.fileExists('specs.json');
98
+ if (!specsExists) {
99
+ return createHealthCheckResult(
100
+ CHECK_NAME,
101
+ 'skip',
102
+ 'specs.json not found - cannot determine output path',
103
+ {
104
+ suggestions: [
105
+ 'Create a specs.json file in your repository root',
106
+ 'Link checking requires specs.json to locate the generated HTML'
107
+ ]
108
+ }
109
+ );
110
+ }
111
+
112
+ let specsData;
113
+ try {
114
+ const specsContent = await provider.readFile('specs.json');
115
+ specsData = JSON.parse(specsContent);
116
+ } catch (parseError) {
117
+ return createHealthCheckResult(
118
+ CHECK_NAME,
119
+ 'skip',
120
+ 'specs.json contains invalid JSON - cannot determine output path',
121
+ {
122
+ parseError: parseError.message
123
+ }
124
+ );
125
+ }
126
+
127
+ // Step 2: Extract output_path from specs.json
128
+ if (!specsData.specs || !Array.isArray(specsData.specs) || specsData.specs.length === 0) {
129
+ return createHealthCheckResult(
130
+ CHECK_NAME,
131
+ 'skip',
132
+ 'specs.json does not contain valid specs array - cannot determine output path',
133
+ {
134
+ suggestions: ['Ensure specs.json has a valid "specs" array with at least one spec configuration']
135
+ }
136
+ );
137
+ }
138
+
139
+ const spec = specsData.specs[0];
140
+ const outputPath = spec.output_path;
141
+
142
+ if (!outputPath) {
143
+ return createHealthCheckResult(
144
+ CHECK_NAME,
145
+ 'skip',
146
+ 'output_path not specified in specs.json - cannot locate HTML file',
147
+ {
148
+ suggestions: ['Add "output_path" field to your spec configuration in specs.json']
149
+ }
150
+ );
151
+ }
152
+
153
+ // Step 3: Check if output directory exists
154
+ const outputDirExists = await provider.directoryExists(outputPath);
155
+ if (!outputDirExists) {
156
+ return createHealthCheckResult(
157
+ CHECK_NAME,
158
+ 'skip',
159
+ `Output directory "${outputPath}" does not exist - run spec-up-t render first`,
160
+ {
161
+ outputPath,
162
+ suggestions: [
163
+ 'Run "npm run render" or "npm run dev" to generate the HTML output',
164
+ `The output directory "${outputPath}" will be created after rendering`
165
+ ]
166
+ }
167
+ );
168
+ }
169
+
170
+ // Step 4: Check if index.html exists in output directory
171
+ const indexPath = path.join(outputPath, 'index.html');
172
+ const indexExists = await provider.fileExists(indexPath);
173
+
174
+ if (!indexExists) {
175
+ return createHealthCheckResult(
176
+ CHECK_NAME,
177
+ 'skip',
178
+ `index.html not found in "${outputPath}" - run spec-up-t render first`,
179
+ {
180
+ expectedPath: indexPath,
181
+ suggestions: [
182
+ 'Run "npm run render" or "npm run dev" to generate index.html',
183
+ 'Ensure the rendering process completes successfully'
184
+ ]
185
+ }
186
+ );
187
+ }
188
+
189
+ // Step 5: Perform link checking using linkinator
190
+ // Linkinator needs the directory path, and it will look for index.html automatically
191
+ const outputDirPath = path.join(provider.getBasePath(), outputPath);
192
+
193
+ // Create a linkinator instance
194
+ const checker = new LinkChecker();
195
+
196
+ // Collect all link results
197
+ const allLinks = [];
198
+ const brokenLinks = [];
199
+ const warnings = [];
200
+
201
+ // Listen for link events
202
+ checker.on('link', (result) => {
203
+ allLinks.push(result);
204
+
205
+ // Categorize broken links
206
+ if (result.state === 'BROKEN') {
207
+ brokenLinks.push({
208
+ url: result.url,
209
+ status: result.status,
210
+ statusText: result.statusText || 'Unknown error',
211
+ parent: result.parent || 'Unknown'
212
+ });
213
+ }
214
+
215
+ // Track redirects as warnings
216
+ if (result.status >= 300 && result.status < 400 && result.state !== 'BROKEN') {
217
+ warnings.push({
218
+ url: result.url,
219
+ status: result.status,
220
+ message: 'Link redirects',
221
+ parent: result.parent || 'Unknown'
222
+ });
223
+ }
224
+ });
225
+
226
+ // Run the link check
227
+ // Linkinator will automatically serve the directory and check index.html
228
+ const checkResult = await checker.check({
229
+ path: outputDirPath,
230
+ recurse: false, // Only check links in this file, don't crawl
231
+ timeout: LINK_CHECK_TIMEOUT,
232
+ concurrency: MAX_CONCURRENCY,
233
+ retry: true, // Retry on 429 (rate limit)
234
+ retryErrors: false, // Don't retry on other errors to save time
235
+ });
236
+
237
+ // Step 6: Analyze results and create report
238
+ const totalLinks = allLinks.length;
239
+ const passedLinks = allLinks.filter(link => link.state === 'OK').length;
240
+ const skippedLinks = allLinks.filter(link => link.state === 'SKIPPED').length;
241
+
242
+ // Categorize broken links by status code
243
+ const categorizedBroken = categorizeBrokenLinks(brokenLinks);
244
+
245
+ // Determine status
246
+ let status = 'pass';
247
+ let message = `All ${passedLinks} links are valid`;
248
+
249
+ if (brokenLinks.length > 0) {
250
+ status = 'fail';
251
+ // Create detailed message with first broken link
252
+ const firstBroken = brokenLinks[0];
253
+ message = `Found ${brokenLinks.length} broken link(s) out of ${totalLinks} total links. First broken: ${firstBroken.url} (${firstBroken.status || 'Error'})`;
254
+
255
+ // If there are multiple broken links, add count
256
+ if (brokenLinks.length > 1) {
257
+ message += ` and ${brokenLinks.length - 1} more`;
258
+ }
259
+ } else if (warnings.length > 0) {
260
+ status = 'warn';
261
+ message = `All links are valid, but ${warnings.length} link(s) redirect`;
262
+ } else if (totalLinks === 0) {
263
+ status = 'warn';
264
+ message = 'No links found to check in the HTML file';
265
+ }
266
+
267
+ return createHealthCheckResult(
268
+ CHECK_NAME,
269
+ status,
270
+ message,
271
+ {
272
+ totalLinks,
273
+ passedLinks,
274
+ brokenLinks: brokenLinks.length,
275
+ skippedLinks,
276
+ redirects: warnings.length,
277
+ outputPath,
278
+ indexPath,
279
+ brokenLinkDetails: categorizedBroken,
280
+ redirectDetails: warnings.slice(0, 10), // Limit to first 10 redirects
281
+ suggestions: brokenLinks.length > 0 ? generateSuggestions(categorizedBroken) : []
282
+ }
283
+ );
284
+
285
+ } catch (error) {
286
+ return createErrorResult(CHECK_NAME, error);
287
+ }
288
+ }
289
+
290
+ /**
291
+ * Categorizes broken links by HTTP status code or error type.
292
+ * This helps identify patterns in link failures (e.g., all 404s vs timeouts).
293
+ *
294
+ * @param {Array<Object>} brokenLinks - Array of broken link objects
295
+ * @returns {Object} Categorized broken links by status code
296
+ * @private
297
+ */
298
+ function categorizeBrokenLinks(brokenLinks) {
299
+ const categories = {
300
+ notFound: [], // 404
301
+ serverError: [], // 5xx
302
+ timeout: [], // Timeout errors
303
+ other: [] // Other errors
304
+ };
305
+
306
+ for (const link of brokenLinks) {
307
+ if (link.status === 404) {
308
+ categories.notFound.push(link);
309
+ } else if (link.status >= 500 && link.status < 600) {
310
+ categories.serverError.push(link);
311
+ } else if (link.statusText && link.statusText.toLowerCase().includes('timeout')) {
312
+ categories.timeout.push(link);
313
+ } else {
314
+ categories.other.push(link);
315
+ }
316
+ }
317
+
318
+ return categories;
319
+ }
320
+
321
+ /**
322
+ * Generates helpful suggestions based on the types of broken links found.
323
+ * Provides actionable advice for common link issues.
324
+ *
325
+ * @param {Object} categorizedBroken - Categorized broken links
326
+ * @returns {string[]} Array of suggestion strings
327
+ * @private
328
+ */
329
+ function generateSuggestions(categorizedBroken) {
330
+ const suggestions = [];
331
+
332
+ if (categorizedBroken.notFound.length > 0) {
333
+ suggestions.push(
334
+ `Fix ${categorizedBroken.notFound.length} broken link(s) with 404 Not Found errors`,
335
+ 'Check for typos in URLs or removed pages'
336
+ );
337
+ }
338
+
339
+ if (categorizedBroken.serverError.length > 0) {
340
+ suggestions.push(
341
+ `${categorizedBroken.serverError.length} link(s) returned server errors (5xx)`,
342
+ 'These may be temporary - verify the linked servers are operational'
343
+ );
344
+ }
345
+
346
+ if (categorizedBroken.timeout.length > 0) {
347
+ suggestions.push(
348
+ `${categorizedBroken.timeout.length} link(s) timed out`,
349
+ 'Check if these URLs are accessible and responding'
350
+ );
351
+ }
352
+
353
+ if (categorizedBroken.other.length > 0) {
354
+ suggestions.push(
355
+ `${categorizedBroken.other.length} link(s) failed with other errors`,
356
+ 'Review the details to understand the specific issues'
357
+ );
358
+ }
359
+
360
+ return suggestions;
361
+ }
@@ -24,7 +24,7 @@ export const CHECK_ID = 'package-json';
24
24
  * Human-readable name for this health check.
25
25
  * @type {string}
26
26
  */
27
- export const CHECK_NAME = 'Package.json Validation';
27
+ export const CHECK_NAME = 'package.json';
28
28
 
29
29
  /**
30
30
  * Description of what this health check validates.