@djangocfg/seo 2.1.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +192 -0
  2. package/dist/cli.d.ts +1 -0
  3. package/dist/cli.mjs +3780 -0
  4. package/dist/cli.mjs.map +1 -0
  5. package/dist/crawler/index.d.ts +88 -0
  6. package/dist/crawler/index.mjs +610 -0
  7. package/dist/crawler/index.mjs.map +1 -0
  8. package/dist/google-console/index.d.ts +95 -0
  9. package/dist/google-console/index.mjs +539 -0
  10. package/dist/google-console/index.mjs.map +1 -0
  11. package/dist/index.d.ts +285 -0
  12. package/dist/index.mjs +3236 -0
  13. package/dist/index.mjs.map +1 -0
  14. package/dist/link-checker/index.d.ts +76 -0
  15. package/dist/link-checker/index.mjs +326 -0
  16. package/dist/link-checker/index.mjs.map +1 -0
  17. package/dist/markdown-report-B3QdDzxE.d.ts +193 -0
  18. package/dist/reports/index.d.ts +24 -0
  19. package/dist/reports/index.mjs +836 -0
  20. package/dist/reports/index.mjs.map +1 -0
  21. package/dist/routes/index.d.ts +69 -0
  22. package/dist/routes/index.mjs +372 -0
  23. package/dist/routes/index.mjs.map +1 -0
  24. package/dist/scanner-Cz4Th2Pt.d.ts +60 -0
  25. package/dist/types/index.d.ts +144 -0
  26. package/dist/types/index.mjs +3 -0
  27. package/dist/types/index.mjs.map +1 -0
  28. package/package.json +114 -0
  29. package/src/analyzer.ts +256 -0
  30. package/src/cli/commands/audit.ts +260 -0
  31. package/src/cli/commands/content.ts +180 -0
  32. package/src/cli/commands/crawl.ts +32 -0
  33. package/src/cli/commands/index.ts +12 -0
  34. package/src/cli/commands/inspect.ts +60 -0
  35. package/src/cli/commands/links.ts +41 -0
  36. package/src/cli/commands/robots.ts +36 -0
  37. package/src/cli/commands/routes.ts +126 -0
  38. package/src/cli/commands/sitemap.ts +48 -0
  39. package/src/cli/index.ts +149 -0
  40. package/src/cli/types.ts +40 -0
  41. package/src/config.ts +207 -0
  42. package/src/content/index.ts +51 -0
  43. package/src/content/link-checker.ts +182 -0
  44. package/src/content/link-fixer.ts +188 -0
  45. package/src/content/scanner.ts +200 -0
  46. package/src/content/sitemap-generator.ts +321 -0
  47. package/src/content/types.ts +140 -0
  48. package/src/crawler/crawler.ts +425 -0
  49. package/src/crawler/index.ts +10 -0
  50. package/src/crawler/robots-parser.ts +171 -0
  51. package/src/crawler/sitemap-validator.ts +204 -0
  52. package/src/google-console/analyzer.ts +317 -0
  53. package/src/google-console/auth.ts +100 -0
  54. package/src/google-console/client.ts +281 -0
  55. package/src/google-console/index.ts +9 -0
  56. package/src/index.ts +144 -0
  57. package/src/link-checker/index.ts +461 -0
  58. package/src/reports/claude-context.ts +149 -0
  59. package/src/reports/generator.ts +244 -0
  60. package/src/reports/index.ts +27 -0
  61. package/src/reports/json-report.ts +320 -0
  62. package/src/reports/markdown-report.ts +246 -0
  63. package/src/reports/split-report.ts +252 -0
  64. package/src/routes/analyzer.ts +324 -0
  65. package/src/routes/index.ts +25 -0
  66. package/src/routes/scanner.ts +298 -0
  67. package/src/types/index.ts +222 -0
  68. package/src/utils/index.ts +154 -0
package/dist/index.mjs ADDED
@@ -0,0 +1,3236 @@
1
+ import consola2 from 'consola';
2
+ import { searchconsole } from '@googleapis/searchconsole';
3
+ import pLimit from 'p-limit';
4
+ import pRetry from 'p-retry';
5
+ import { JWT } from 'google-auth-library';
6
+ import fs, { existsSync, readdirSync, rmSync, mkdirSync, writeFileSync, readFileSync, statSync } from 'fs';
7
+ import { load } from 'cheerio';
8
+ import robotsParser from 'robots-parser';
9
+ import path, { join, dirname } from 'path';
10
+ import { mkdir, writeFile } from 'fs/promises';
11
+ import * as linkinator from 'linkinator';
12
+ import chalk from 'chalk';
13
+
14
+ // src/analyzer.ts
15
+ var SCOPES = [
16
+ "https://www.googleapis.com/auth/webmasters.readonly",
17
+ "https://www.googleapis.com/auth/webmasters"
18
+ ];
19
+ function loadCredentials(config) {
20
+ if (config.serviceAccountJson) {
21
+ return config.serviceAccountJson;
22
+ }
23
+ if (config.serviceAccountPath) {
24
+ if (!existsSync(config.serviceAccountPath)) {
25
+ throw new Error(`Service account file not found: ${config.serviceAccountPath}`);
26
+ }
27
+ const content = readFileSync(config.serviceAccountPath, "utf-8");
28
+ return JSON.parse(content);
29
+ }
30
+ const envJson = process.env.GOOGLE_SERVICE_ACCOUNT_JSON;
31
+ if (envJson) {
32
+ return JSON.parse(envJson);
33
+ }
34
+ const defaultPath = "./service_account.json";
35
+ if (existsSync(defaultPath)) {
36
+ const content = readFileSync(defaultPath, "utf-8");
37
+ return JSON.parse(content);
38
+ }
39
+ throw new Error(
40
+ "No service account credentials found. Provide serviceAccountPath, serviceAccountJson, or set GOOGLE_SERVICE_ACCOUNT_JSON env variable."
41
+ );
42
+ }
43
+ function createAuthClient(config) {
44
+ const credentials = loadCredentials(config);
45
+ const auth = new JWT({
46
+ email: credentials.client_email,
47
+ key: credentials.private_key,
48
+ scopes: SCOPES
49
+ });
50
+ auth._serviceAccountEmail = credentials.client_email;
51
+ return auth;
52
+ }
53
+ async function verifyAuth(auth, siteUrl) {
54
+ const email = auth._serviceAccountEmail || auth.email;
55
+ try {
56
+ await auth.authorize();
57
+ consola2.success("Google Search Console authentication verified");
58
+ consola2.info(`Service account: ${email}`);
59
+ if (siteUrl) {
60
+ const domain = new URL(siteUrl).hostname;
61
+ const gscUrl = `https://search.google.com/search-console/users?resource_id=sc-domain%3A${domain}`;
62
+ consola2.info(`Ensure this email has Full access in GSC: ${gscUrl}`);
63
+ }
64
+ return true;
65
+ } catch (error) {
66
+ consola2.error("Authentication failed");
67
+ consola2.info(`Service account email: ${email}`);
68
+ consola2.info("Make sure this email is added to GSC with Full access");
69
+ return false;
70
+ }
71
+ }
72
+
73
+ // src/google-console/client.ts
74
+ var GoogleConsoleClient = class {
75
+ auth;
76
+ searchconsole;
77
+ siteUrl;
78
+ gscSiteUrl;
79
+ // Format for GSC API (may be sc-domain:xxx)
80
+ limit = pLimit(2);
81
+ // Max 2 concurrent requests (Cloudflare-friendly)
82
+ requestDelay = 500;
83
+ // Delay between requests in ms
84
+ constructor(config) {
85
+ this.auth = createAuthClient(config);
86
+ this.searchconsole = searchconsole({ version: "v1", auth: this.auth });
87
+ this.siteUrl = config.siteUrl;
88
+ if (config.gscSiteUrl) {
89
+ this.gscSiteUrl = config.gscSiteUrl;
90
+ } else {
91
+ const domain = new URL(config.siteUrl).hostname;
92
+ this.gscSiteUrl = `sc-domain:${domain}`;
93
+ }
94
+ consola2.debug(`GSC site URL: ${this.gscSiteUrl}`);
95
+ }
96
+ /**
97
+ * Delay helper for rate limiting
98
+ */
99
+ delay(ms) {
100
+ return new Promise((resolve) => setTimeout(resolve, ms));
101
+ }
102
+ /**
103
+ * Verify the client is authenticated
104
+ */
105
+ async verify() {
106
+ return verifyAuth(this.auth, this.siteUrl);
107
+ }
108
+ /**
109
+ * List all sites in Search Console
110
+ */
111
+ async listSites() {
112
+ try {
113
+ const response = await this.searchconsole.sites.list();
114
+ return response.data.siteEntry?.map((site) => site.siteUrl || "") || [];
115
+ } catch (error) {
116
+ consola2.error("Failed to list sites:", error);
117
+ throw error;
118
+ }
119
+ }
120
+ /**
121
+ * Inspect a single URL
122
+ */
123
+ async inspectUrl(url) {
124
+ return this.limit(async () => {
125
+ return pRetry(
126
+ async () => {
127
+ const response = await this.searchconsole.urlInspection.index.inspect({
128
+ requestBody: {
129
+ inspectionUrl: url,
130
+ siteUrl: this.gscSiteUrl,
131
+ languageCode: "en-US"
132
+ }
133
+ });
134
+ const result = response.data.inspectionResult;
135
+ if (!result?.indexStatusResult) {
136
+ throw new Error(`No inspection result for URL: ${url}`);
137
+ }
138
+ return this.mapInspectionResult(url, result);
139
+ },
140
+ {
141
+ retries: 2,
142
+ minTimeout: 2e3,
143
+ maxTimeout: 1e4,
144
+ factor: 2,
145
+ // Exponential backoff
146
+ onFailedAttempt: (ctx) => {
147
+ if (ctx.retriesLeft === 0) {
148
+ consola2.warn(`Failed: ${url}`);
149
+ }
150
+ }
151
+ }
152
+ );
153
+ });
154
+ }
155
+ /**
156
+ * Inspect multiple URLs in batch
157
+ * Stops early if too many consecutive errors (likely rate limiting)
158
+ */
159
+ async inspectUrls(urls) {
160
+ consola2.info(`Inspecting ${urls.length} URLs...`);
161
+ const results = [];
162
+ const errors = [];
163
+ let consecutiveErrors = 0;
164
+ const maxConsecutiveErrors = 3;
165
+ for (const url of urls) {
166
+ try {
167
+ const result = await this.inspectUrl(url);
168
+ results.push(result);
169
+ consecutiveErrors = 0;
170
+ await this.delay(this.requestDelay);
171
+ } catch (error) {
172
+ const err = error;
173
+ errors.push({ url, error: err });
174
+ consecutiveErrors++;
175
+ if (consecutiveErrors >= maxConsecutiveErrors) {
176
+ console.log("");
177
+ consola2.error(`Stopping after ${maxConsecutiveErrors} consecutive failures`);
178
+ this.showRateLimitHelp();
179
+ break;
180
+ }
181
+ }
182
+ }
183
+ if (errors.length > 0 && consecutiveErrors < maxConsecutiveErrors) {
184
+ consola2.warn(`Failed to inspect ${errors.length} URLs`);
185
+ }
186
+ if (results.length > 0) {
187
+ consola2.success(`Successfully inspected ${results.length}/${urls.length} URLs`);
188
+ } else if (errors.length > 0) {
189
+ consola2.warn("No URLs were successfully inspected");
190
+ }
191
+ return results;
192
+ }
193
+ /**
194
+ * Show help message for rate limiting issues
195
+ */
196
+ showRateLimitHelp() {
197
+ consola2.info("Possible causes:");
198
+ consola2.info(" 1. Google API quota exceeded (2000 requests/day)");
199
+ consola2.info(" 2. Cloudflare blocking Google's crawler");
200
+ consola2.info(" 3. Service account not added to GSC");
201
+ console.log("");
202
+ consola2.info("Solutions:");
203
+ consola2.info(" \u2022 Check GSC access: https://search.google.com/search-console/users");
204
+ console.log("");
205
+ consola2.info(" \u2022 Cloudflare WAF rule to allow Googlebot:");
206
+ consola2.info(" 1. Dashboard \u2192 Security \u2192 WAF \u2192 Custom rules \u2192 Create rule");
207
+ consola2.info(' 2. Name: "Allow Googlebot"');
208
+ consola2.info(' 3. Field: "Known Bots" | Operator: "equals" | Value: "true"');
209
+ consola2.info(' 4. Or click "Edit expression" and paste: (cf.client.bot)');
210
+ consola2.info(" 5. Action: Skip \u2192 check all rules");
211
+ consola2.info(" 6. Deploy");
212
+ consola2.info(" Docs: https://developers.cloudflare.com/waf/custom-rules/use-cases/allow-traffic-from-verified-bots/");
213
+ console.log("");
214
+ }
215
+ /**
216
+ * Get search analytics data
217
+ */
218
+ async getSearchAnalytics(options) {
219
+ try {
220
+ const response = await this.searchconsole.searchanalytics.query({
221
+ siteUrl: this.gscSiteUrl,
222
+ requestBody: {
223
+ startDate: options.startDate,
224
+ endDate: options.endDate,
225
+ dimensions: options.dimensions || ["page"],
226
+ rowLimit: options.rowLimit || 1e3
227
+ }
228
+ });
229
+ return response.data.rows || [];
230
+ } catch (error) {
231
+ consola2.error("Failed to get search analytics:", error);
232
+ throw error;
233
+ }
234
+ }
235
+ /**
236
+ * Get list of sitemaps
237
+ */
238
+ async getSitemaps() {
239
+ try {
240
+ const response = await this.searchconsole.sitemaps.list({
241
+ siteUrl: this.gscSiteUrl
242
+ });
243
+ return response.data.sitemap || [];
244
+ } catch (error) {
245
+ consola2.error("Failed to get sitemaps:", error);
246
+ throw error;
247
+ }
248
+ }
249
+ /**
250
+ * Map API response to our types
251
+ */
252
+ mapInspectionResult(url, result) {
253
+ const indexStatus = result.indexStatusResult;
254
+ return {
255
+ url,
256
+ inspectionResultLink: result.inspectionResultLink || void 0,
257
+ indexStatusResult: {
258
+ verdict: indexStatus.verdict || "VERDICT_UNSPECIFIED",
259
+ coverageState: indexStatus.coverageState || "COVERAGE_STATE_UNSPECIFIED",
260
+ indexingState: indexStatus.indexingState || "INDEXING_STATE_UNSPECIFIED",
261
+ robotsTxtState: indexStatus.robotsTxtState || "ROBOTS_TXT_STATE_UNSPECIFIED",
262
+ pageFetchState: indexStatus.pageFetchState || "PAGE_FETCH_STATE_UNSPECIFIED",
263
+ lastCrawlTime: indexStatus.lastCrawlTime || void 0,
264
+ crawledAs: indexStatus.crawledAs,
265
+ googleCanonical: indexStatus.googleCanonical || void 0,
266
+ userCanonical: indexStatus.userCanonical || void 0,
267
+ sitemap: indexStatus.sitemap || void 0,
268
+ referringUrls: indexStatus.referringUrls || void 0
269
+ },
270
+ mobileUsabilityResult: result.mobileUsabilityResult ? {
271
+ verdict: result.mobileUsabilityResult.verdict || "VERDICT_UNSPECIFIED",
272
+ issues: result.mobileUsabilityResult.issues?.map((issue) => ({
273
+ issueType: issue.issueType || "UNKNOWN",
274
+ message: issue.message || ""
275
+ }))
276
+ } : void 0,
277
+ richResultsResult: result.richResultsResult ? {
278
+ verdict: result.richResultsResult.verdict || "VERDICT_UNSPECIFIED",
279
+ detectedItems: result.richResultsResult.detectedItems?.map((item) => ({
280
+ richResultType: item.richResultType || "UNKNOWN",
281
+ items: item.items?.map((i) => ({
282
+ name: i.name || "",
283
+ issues: i.issues?.map((issue) => ({
284
+ issueMessage: issue.issueMessage || "",
285
+ severity: issue.severity || "WARNING"
286
+ }))
287
+ }))
288
+ }))
289
+ } : void 0
290
+ };
291
+ }
292
+ };
293
+
294
+ // src/google-console/analyzer.ts
295
+ function analyzeInspectionResults(results) {
296
+ const issues = [];
297
+ for (const result of results) {
298
+ issues.push(...analyzeUrlInspection(result));
299
+ }
300
+ return issues.sort((a, b) => severityOrder(a.severity) - severityOrder(b.severity));
301
+ }
302
+ function analyzeUrlInspection(result) {
303
+ const issues = [];
304
+ const { indexStatusResult, mobileUsabilityResult, richResultsResult } = result;
305
+ switch (indexStatusResult.coverageState) {
306
+ case "CRAWLED_CURRENTLY_NOT_INDEXED":
307
+ issues.push({
308
+ id: `crawled-not-indexed-${hash(result.url)}`,
309
+ url: result.url,
310
+ category: "indexing",
311
+ severity: "error",
312
+ title: "Page crawled but not indexed",
313
+ description: "Google crawled this page but decided not to index it. This often indicates low content quality or duplicate content.",
314
+ recommendation: "Improve content quality, ensure uniqueness, add more valuable information, and check for duplicate content issues.",
315
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
316
+ metadata: { coverageState: indexStatusResult.coverageState }
317
+ });
318
+ break;
319
+ case "DISCOVERED_CURRENTLY_NOT_INDEXED":
320
+ issues.push({
321
+ id: `discovered-not-indexed-${hash(result.url)}`,
322
+ url: result.url,
323
+ category: "indexing",
324
+ severity: "warning",
325
+ title: "Page discovered but not crawled",
326
+ description: "Google discovered this URL but has not crawled it yet. This may indicate crawl budget issues or low priority.",
327
+ recommendation: "Improve internal linking to this page, submit URL through Google Search Console, or add to sitemap.",
328
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
329
+ metadata: { coverageState: indexStatusResult.coverageState }
330
+ });
331
+ break;
332
+ case "DUPLICATE_WITHOUT_USER_SELECTED_CANONICAL":
333
+ issues.push({
334
+ id: `duplicate-no-canonical-${hash(result.url)}`,
335
+ url: result.url,
336
+ category: "indexing",
337
+ severity: "warning",
338
+ title: "Duplicate page without canonical",
339
+ description: "This page is considered a duplicate but no canonical URL has been specified. Google chose a canonical for you.",
340
+ recommendation: "Add a canonical tag pointing to the preferred version of this page.",
341
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
342
+ metadata: {
343
+ coverageState: indexStatusResult.coverageState,
344
+ googleCanonical: indexStatusResult.googleCanonical
345
+ }
346
+ });
347
+ break;
348
+ case "DUPLICATE_GOOGLE_CHOSE_DIFFERENT_CANONICAL":
349
+ issues.push({
350
+ id: `canonical-mismatch-${hash(result.url)}`,
351
+ url: result.url,
352
+ category: "indexing",
353
+ severity: "warning",
354
+ title: "Google chose different canonical",
355
+ description: "You specified a canonical URL, but Google chose a different one. This may cause indexing issues.",
356
+ recommendation: "Review canonical tags and ensure they point to the correct URL. Check for duplicate content.",
357
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
358
+ metadata: {
359
+ coverageState: indexStatusResult.coverageState,
360
+ userCanonical: indexStatusResult.userCanonical,
361
+ googleCanonical: indexStatusResult.googleCanonical
362
+ }
363
+ });
364
+ break;
365
+ }
366
+ switch (indexStatusResult.indexingState) {
367
+ case "BLOCKED_BY_META_TAG":
368
+ issues.push({
369
+ id: `blocked-meta-noindex-${hash(result.url)}`,
370
+ url: result.url,
371
+ category: "indexing",
372
+ severity: "error",
373
+ title: "Blocked by noindex meta tag",
374
+ description: "This page has a noindex meta tag preventing it from being indexed.",
375
+ recommendation: "Remove the noindex meta tag if you want this page to be indexed. If intentional, no action needed.",
376
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
377
+ metadata: { indexingState: indexStatusResult.indexingState }
378
+ });
379
+ break;
380
+ case "BLOCKED_BY_HTTP_HEADER":
381
+ issues.push({
382
+ id: `blocked-http-header-${hash(result.url)}`,
383
+ url: result.url,
384
+ category: "indexing",
385
+ severity: "error",
386
+ title: "Blocked by X-Robots-Tag header",
387
+ description: "This page has a noindex directive in the X-Robots-Tag HTTP header.",
388
+ recommendation: "Remove the X-Robots-Tag: noindex header if you want this page to be indexed.",
389
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
390
+ metadata: { indexingState: indexStatusResult.indexingState }
391
+ });
392
+ break;
393
+ case "BLOCKED_BY_ROBOTS_TXT":
394
+ issues.push({
395
+ id: `blocked-robots-txt-${hash(result.url)}`,
396
+ url: result.url,
397
+ category: "crawling",
398
+ severity: "error",
399
+ title: "Blocked by robots.txt",
400
+ description: "This page is blocked from crawling by robots.txt rules.",
401
+ recommendation: "Update robots.txt to allow crawling if you want this page to be indexed.",
402
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
403
+ metadata: { indexingState: indexStatusResult.indexingState }
404
+ });
405
+ break;
406
+ }
407
+ switch (indexStatusResult.pageFetchState) {
408
+ case "SOFT_404":
409
+ issues.push({
410
+ id: `soft-404-${hash(result.url)}`,
411
+ url: result.url,
412
+ category: "technical",
413
+ severity: "error",
414
+ title: "Soft 404 error",
415
+ description: "This page returns a 200 status but Google detected it as a 404 page (empty or low-value content).",
416
+ recommendation: "Either return a proper 404 status code or add meaningful content to this page.",
417
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
418
+ metadata: { pageFetchState: indexStatusResult.pageFetchState }
419
+ });
420
+ break;
421
+ case "NOT_FOUND":
422
+ issues.push({
423
+ id: `404-error-${hash(result.url)}`,
424
+ url: result.url,
425
+ category: "technical",
426
+ severity: "error",
427
+ title: "404 Not Found",
428
+ description: "This page returns a 404 error.",
429
+ recommendation: "Either restore the page content or set up a redirect to a relevant page.",
430
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
431
+ metadata: { pageFetchState: indexStatusResult.pageFetchState }
432
+ });
433
+ break;
434
+ case "SERVER_ERROR":
435
+ issues.push({
436
+ id: `server-error-${hash(result.url)}`,
437
+ url: result.url,
438
+ category: "technical",
439
+ severity: "critical",
440
+ title: "Server error (5xx)",
441
+ description: "This page returns a server error when Google tries to crawl it.",
442
+ recommendation: "Fix the server-side error. Check server logs for details.",
443
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
444
+ metadata: { pageFetchState: indexStatusResult.pageFetchState }
445
+ });
446
+ break;
447
+ case "REDIRECT_ERROR":
448
+ issues.push({
449
+ id: `redirect-error-${hash(result.url)}`,
450
+ url: result.url,
451
+ category: "technical",
452
+ severity: "error",
453
+ title: "Redirect error",
454
+ description: "There is a redirect issue with this page (redirect loop, too many redirects, or invalid redirect).",
455
+ recommendation: "Fix the redirect chain. Ensure redirects point to valid, accessible pages.",
456
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
457
+ metadata: { pageFetchState: indexStatusResult.pageFetchState }
458
+ });
459
+ break;
460
+ case "ACCESS_DENIED":
461
+ case "ACCESS_FORBIDDEN":
462
+ issues.push({
463
+ id: `access-denied-${hash(result.url)}`,
464
+ url: result.url,
465
+ category: "technical",
466
+ severity: "error",
467
+ title: "Access denied (401/403)",
468
+ description: "Google cannot access this page due to authentication requirements.",
469
+ recommendation: "Ensure the page is publicly accessible without authentication for Googlebot.",
470
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
471
+ metadata: { pageFetchState: indexStatusResult.pageFetchState }
472
+ });
473
+ break;
474
+ }
475
+ if (mobileUsabilityResult?.verdict === "FAIL" && mobileUsabilityResult.issues) {
476
+ for (const issue of mobileUsabilityResult.issues) {
477
+ issues.push({
478
+ id: `mobile-${issue.issueType}-${hash(result.url)}`,
479
+ url: result.url,
480
+ category: "mobile",
481
+ severity: "warning",
482
+ title: `Mobile usability: ${formatIssueType(issue.issueType)}`,
483
+ description: issue.message || "Mobile usability issue detected.",
484
+ recommendation: getMobileRecommendation(issue.issueType),
485
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
486
+ metadata: { issueType: issue.issueType }
487
+ });
488
+ }
489
+ }
490
+ if (richResultsResult?.verdict === "FAIL" && richResultsResult.detectedItems) {
491
+ for (const item of richResultsResult.detectedItems) {
492
+ for (const i of item.items || []) {
493
+ for (const issueDetail of i.issues || []) {
494
+ issues.push({
495
+ id: `rich-result-${item.richResultType}-${hash(result.url)}`,
496
+ url: result.url,
497
+ category: "structured-data",
498
+ severity: issueDetail.severity === "ERROR" ? "error" : "warning",
499
+ title: `${item.richResultType}: ${i.name}`,
500
+ description: issueDetail.issueMessage,
501
+ recommendation: "Fix the structured data markup according to Google guidelines.",
502
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
503
+ metadata: { richResultType: item.richResultType }
504
+ });
505
+ }
506
+ }
507
+ }
508
+ }
509
+ return issues;
510
+ }
511
+ function severityOrder(severity) {
512
+ const order = {
513
+ critical: 0,
514
+ error: 1,
515
+ warning: 2,
516
+ info: 3
517
+ };
518
+ return order[severity];
519
+ }
520
+ function hash(str) {
521
+ let hash5 = 0;
522
+ for (let i = 0; i < str.length; i++) {
523
+ const char = str.charCodeAt(i);
524
+ hash5 = (hash5 << 5) - hash5 + char;
525
+ hash5 = hash5 & hash5;
526
+ }
527
+ return Math.abs(hash5).toString(36);
528
+ }
529
+ function formatIssueType(type) {
530
+ return type.replace(/_/g, " ").toLowerCase().replace(/\b\w/g, (c) => c.toUpperCase());
531
+ }
532
+ function getMobileRecommendation(issueType) {
533
+ const recommendations = {
534
+ MOBILE_FRIENDLY_RULE_USES_INCOMPATIBLE_PLUGINS: "Remove Flash or other incompatible plugins. Use HTML5 alternatives.",
535
+ MOBILE_FRIENDLY_RULE_CONFIGURE_VIEWPORT: 'Add a viewport meta tag: <meta name="viewport" content="width=device-width, initial-scale=1">',
536
+ MOBILE_FRIENDLY_RULE_CONTENT_NOT_SIZED_TO_VIEWPORT: "Ensure content width fits the viewport. Use responsive CSS.",
537
+ MOBILE_FRIENDLY_RULE_TAP_TARGETS_TOO_SMALL: "Increase the size of touch targets (buttons, links) to at least 48x48 pixels.",
538
+ MOBILE_FRIENDLY_RULE_TEXT_TOO_SMALL: "Use at least 16px font size for body text."
539
+ };
540
+ return recommendations[issueType] || "Fix the mobile usability issue according to Google guidelines.";
541
+ }
542
+ var DEFAULT_CONFIG = {
543
+ maxPages: 100,
544
+ maxDepth: 3,
545
+ concurrency: 5,
546
+ timeout: 3e4,
547
+ userAgent: "DjangoCFG-SEO-Crawler/1.0 (+https://djangocfg.com/bot)",
548
+ respectRobotsTxt: true,
549
+ includePatterns: [],
550
+ excludePatterns: [
551
+ "/api/",
552
+ "/admin/",
553
+ "/_next/",
554
+ "/static/",
555
+ ".pdf",
556
+ ".jpg",
557
+ ".png",
558
+ ".gif",
559
+ ".svg",
560
+ ".css",
561
+ ".js"
562
+ ]
563
+ };
564
+ var SiteCrawler = class {
565
+ config;
566
+ baseUrl;
567
+ visited = /* @__PURE__ */ new Set();
568
+ queue = [];
569
+ results = [];
570
+ limit;
571
+ constructor(siteUrl, config) {
572
+ this.config = { ...DEFAULT_CONFIG, ...config };
573
+ this.baseUrl = new URL(siteUrl);
574
+ this.limit = pLimit(this.config.concurrency);
575
+ }
576
+ /**
577
+ * Start crawling the site
578
+ */
579
+ async crawl() {
580
+ consola2.info(`Starting crawl of ${this.baseUrl.origin}`);
581
+ consola2.info(`Config: maxPages=${this.config.maxPages}, maxDepth=${this.config.maxDepth}`);
582
+ this.queue.push({ url: this.baseUrl.href, depth: 0 });
583
+ while (this.queue.length > 0 && this.results.length < this.config.maxPages) {
584
+ const batch = this.queue.splice(0, this.config.concurrency);
585
+ const promises = batch.map(
586
+ ({ url, depth }) => this.limit(() => this.crawlPage(url, depth))
587
+ );
588
+ await Promise.all(promises);
589
+ }
590
+ consola2.success(`Crawl complete. Crawled ${this.results.length} pages.`);
591
+ return this.results;
592
+ }
593
+ /**
594
+ * Crawl a single page
595
+ */
596
+ async crawlPage(url, depth) {
597
+ const normalizedUrl = this.normalizeUrl(url);
598
+ if (this.visited.has(normalizedUrl)) return;
599
+ if (this.shouldExclude(normalizedUrl)) return;
600
+ this.visited.add(normalizedUrl);
601
+ const startTime = Date.now();
602
+ const result = {
603
+ url: normalizedUrl,
604
+ statusCode: 0,
605
+ links: { internal: [], external: [] },
606
+ images: [],
607
+ loadTime: 0,
608
+ errors: [],
609
+ warnings: [],
610
+ crawledAt: (/* @__PURE__ */ new Date()).toISOString()
611
+ };
612
+ try {
613
+ const controller = new AbortController();
614
+ const timeoutId = setTimeout(() => controller.abort(), this.config.timeout);
615
+ const response = await fetch(normalizedUrl, {
616
+ headers: {
617
+ "User-Agent": this.config.userAgent,
618
+ Accept: "text/html,application/xhtml+xml"
619
+ },
620
+ signal: controller.signal,
621
+ redirect: "follow"
622
+ });
623
+ result.ttfb = Date.now() - startTime;
624
+ clearTimeout(timeoutId);
625
+ result.statusCode = response.status;
626
+ result.contentType = response.headers.get("content-type") || void 0;
627
+ result.contentLength = Number(response.headers.get("content-length")) || void 0;
628
+ if (response.ok && result.contentType?.includes("text/html")) {
629
+ const html = await response.text();
630
+ this.parseHtml(html, result, normalizedUrl, depth);
631
+ } else if (!response.ok) {
632
+ result.errors.push(`HTTP ${response.status}: ${response.statusText}`);
633
+ }
634
+ } catch (error) {
635
+ if (error instanceof Error) {
636
+ if (error.name === "AbortError") {
637
+ result.errors.push("Request timeout");
638
+ } else {
639
+ result.errors.push(error.message);
640
+ }
641
+ }
642
+ }
643
+ result.loadTime = Date.now() - startTime;
644
+ this.results.push(result);
645
+ consola2.debug(`Crawled: ${normalizedUrl} (${result.statusCode}) - ${result.loadTime}ms`);
646
+ }
647
+ /**
648
+ * Parse HTML and extract SEO-relevant data
649
+ */
650
+ parseHtml(html, result, pageUrl, depth) {
651
+ const $ = load(html);
652
+ result.title = $("title").first().text().trim() || void 0;
653
+ if (!result.title) {
654
+ result.warnings.push("Missing title tag");
655
+ } else if (result.title.length > 60) {
656
+ result.warnings.push(`Title too long (${result.title.length} chars, recommended: <60)`);
657
+ }
658
+ result.metaDescription = $('meta[name="description"]').attr("content")?.trim() || void 0;
659
+ if (!result.metaDescription) {
660
+ result.warnings.push("Missing meta description");
661
+ } else if (result.metaDescription.length > 160) {
662
+ result.warnings.push(
663
+ `Meta description too long (${result.metaDescription.length} chars, recommended: <160)`
664
+ );
665
+ }
666
+ result.metaRobots = $('meta[name="robots"]').attr("content")?.trim() || void 0;
667
+ const xRobots = $('meta[http-equiv="X-Robots-Tag"]').attr("content")?.trim();
668
+ if (xRobots) {
669
+ result.metaRobots = result.metaRobots ? `${result.metaRobots}, ${xRobots}` : xRobots;
670
+ }
671
+ result.canonicalUrl = $('link[rel="canonical"]').attr("href")?.trim() || void 0;
672
+ if (!result.canonicalUrl) {
673
+ result.warnings.push("Missing canonical tag");
674
+ }
675
+ result.h1 = $("h1").map((_, el) => $(el).text().trim()).get();
676
+ result.h2 = $("h2").map((_, el) => $(el).text().trim()).get();
677
+ if (result.h1.length === 0) {
678
+ result.warnings.push("Missing H1 tag");
679
+ } else if (result.h1.length > 1) {
680
+ result.warnings.push(`Multiple H1 tags (${result.h1.length})`);
681
+ }
682
+ $("a[href]").each((_, el) => {
683
+ const href = $(el).attr("href");
684
+ if (!href) return;
685
+ try {
686
+ const linkUrl = new URL(href, pageUrl);
687
+ if (linkUrl.hostname === this.baseUrl.hostname) {
688
+ const internalUrl = this.normalizeUrl(linkUrl.href);
689
+ result.links.internal.push(internalUrl);
690
+ if (depth < this.config.maxDepth && !this.visited.has(internalUrl)) {
691
+ this.queue.push({ url: internalUrl, depth: depth + 1 });
692
+ }
693
+ } else {
694
+ result.links.external.push(linkUrl.href);
695
+ }
696
+ } catch {
697
+ }
698
+ });
699
+ $("img").each((_, el) => {
700
+ const src = $(el).attr("src");
701
+ const alt = $(el).attr("alt");
702
+ if (src) {
703
+ result.images.push({
704
+ src,
705
+ alt,
706
+ hasAlt: alt !== void 0 && alt.trim().length > 0
707
+ });
708
+ }
709
+ });
710
+ const imagesWithoutAlt = result.images.filter((img) => !img.hasAlt);
711
+ if (imagesWithoutAlt.length > 0) {
712
+ result.warnings.push(`${imagesWithoutAlt.length} images without alt text`);
713
+ }
714
+ }
715
+ /**
716
+ * Normalize URL for deduplication
717
+ */
718
+ normalizeUrl(url) {
719
+ try {
720
+ const parsed = new URL(url, this.baseUrl.href);
721
+ parsed.hash = "";
722
+ let pathname = parsed.pathname;
723
+ if (pathname.endsWith("/") && pathname !== "/") {
724
+ pathname = pathname.slice(0, -1);
725
+ }
726
+ parsed.pathname = pathname;
727
+ return parsed.href;
728
+ } catch {
729
+ return url;
730
+ }
731
+ }
732
+ /**
733
+ * Check if URL should be excluded
734
+ */
735
+ shouldExclude(url) {
736
+ if (this.config.includePatterns.length > 0) {
737
+ const included = this.config.includePatterns.some(
738
+ (pattern) => url.includes(pattern)
739
+ );
740
+ if (!included) return true;
741
+ }
742
+ return this.config.excludePatterns.some((pattern) => url.includes(pattern));
743
+ }
744
+ };
745
+ function analyzeCrawlResults(results) {
746
+ const issues = [];
747
+ for (const result of results) {
748
+ if (result.statusCode >= 400) {
749
+ issues.push({
750
+ id: `http-error-${hash2(result.url)}`,
751
+ url: result.url,
752
+ category: "technical",
753
+ severity: result.statusCode >= 500 ? "critical" : "error",
754
+ title: `HTTP ${result.statusCode} error`,
755
+ description: `Page returns ${result.statusCode} status code.`,
756
+ recommendation: result.statusCode === 404 ? "Either restore the content or set up a redirect." : "Fix the server error and ensure the page is accessible.",
757
+ detectedAt: result.crawledAt,
758
+ metadata: { statusCode: result.statusCode }
759
+ });
760
+ }
761
+ if (!result.title && result.statusCode === 200) {
762
+ issues.push({
763
+ id: `missing-title-${hash2(result.url)}`,
764
+ url: result.url,
765
+ category: "content",
766
+ severity: "error",
767
+ title: "Missing title tag",
768
+ description: "This page does not have a title tag.",
769
+ recommendation: "Add a unique, descriptive title tag (50-60 characters).",
770
+ detectedAt: result.crawledAt
771
+ });
772
+ }
773
+ if (!result.metaDescription && result.statusCode === 200) {
774
+ issues.push({
775
+ id: `missing-meta-desc-${hash2(result.url)}`,
776
+ url: result.url,
777
+ category: "content",
778
+ severity: "warning",
779
+ title: "Missing meta description",
780
+ description: "This page does not have a meta description.",
781
+ recommendation: "Add a unique meta description (120-160 characters).",
782
+ detectedAt: result.crawledAt
783
+ });
784
+ }
785
+ if (result.h1 && result.h1.length === 0 && result.statusCode === 200) {
786
+ issues.push({
787
+ id: `missing-h1-${hash2(result.url)}`,
788
+ url: result.url,
789
+ category: "content",
790
+ severity: "warning",
791
+ title: "Missing H1 heading",
792
+ description: "This page does not have an H1 heading.",
793
+ recommendation: "Add a single H1 heading that describes the page content.",
794
+ detectedAt: result.crawledAt
795
+ });
796
+ }
797
+ if (result.h1 && result.h1.length > 1) {
798
+ issues.push({
799
+ id: `multiple-h1-${hash2(result.url)}`,
800
+ url: result.url,
801
+ category: "content",
802
+ severity: "warning",
803
+ title: "Multiple H1 headings",
804
+ description: `This page has ${result.h1.length} H1 headings.`,
805
+ recommendation: "Use only one H1 heading per page.",
806
+ detectedAt: result.crawledAt,
807
+ metadata: { h1Count: result.h1.length }
808
+ });
809
+ }
810
+ const imagesWithoutAlt = result.images.filter((img) => !img.hasAlt);
811
+ if (imagesWithoutAlt.length > 0) {
812
+ issues.push({
813
+ id: `images-no-alt-${hash2(result.url)}`,
814
+ url: result.url,
815
+ category: "content",
816
+ severity: "info",
817
+ title: "Images without alt text",
818
+ description: `${imagesWithoutAlt.length} images are missing alt text.`,
819
+ recommendation: "Add descriptive alt text to all images for accessibility and SEO.",
820
+ detectedAt: result.crawledAt,
821
+ metadata: { count: imagesWithoutAlt.length }
822
+ });
823
+ }
824
+ if (result.loadTime > 3e3) {
825
+ issues.push({
826
+ id: `slow-page-${hash2(result.url)}`,
827
+ url: result.url,
828
+ category: "performance",
829
+ severity: result.loadTime > 5e3 ? "error" : "warning",
830
+ title: "Slow page load time",
831
+ description: `Page took ${result.loadTime}ms to load.`,
832
+ recommendation: "Optimize page load time. Target under 3 seconds.",
833
+ detectedAt: result.crawledAt,
834
+ metadata: { loadTime: result.loadTime }
835
+ });
836
+ }
837
+ if (result.ttfb && result.ttfb > 800) {
838
+ issues.push({
839
+ id: `slow-ttfb-${hash2(result.url)}`,
840
+ url: result.url,
841
+ category: "performance",
842
+ severity: result.ttfb > 1500 ? "error" : "warning",
843
+ title: "Slow Time to First Byte",
844
+ description: `TTFB is ${result.ttfb}ms. Server responded slowly.`,
845
+ recommendation: "Optimize server response. Target TTFB under 800ms. Consider CDN, caching, or server upgrades.",
846
+ detectedAt: result.crawledAt,
847
+ metadata: { ttfb: result.ttfb }
848
+ });
849
+ }
850
+ if (result.metaRobots?.includes("noindex")) {
851
+ issues.push({
852
+ id: `noindex-${hash2(result.url)}`,
853
+ url: result.url,
854
+ category: "indexing",
855
+ severity: "info",
856
+ title: "Page marked as noindex",
857
+ description: "This page has a noindex directive.",
858
+ recommendation: "Verify this is intentional. Remove noindex if the page should be indexed.",
859
+ detectedAt: result.crawledAt,
860
+ metadata: { metaRobots: result.metaRobots }
861
+ });
862
+ }
863
+ }
864
+ return issues;
865
+ }
866
+ function hash2(str) {
867
+ let hash5 = 0;
868
+ for (let i = 0; i < str.length; i++) {
869
+ const char = str.charCodeAt(i);
870
+ hash5 = (hash5 << 5) - hash5 + char;
871
+ hash5 = hash5 & hash5;
872
+ }
873
+ return Math.abs(hash5).toString(36);
874
+ }
875
+ async function analyzeRobotsTxt(siteUrl) {
876
+ const robotsUrl = new URL("/robots.txt", siteUrl).href;
877
+ const analysis = {
878
+ exists: false,
879
+ sitemaps: [],
880
+ allowedPaths: [],
881
+ disallowedPaths: [],
882
+ issues: []
883
+ };
884
+ try {
885
+ const response = await fetch(robotsUrl);
886
+ if (!response.ok) {
887
+ analysis.issues.push({
888
+ id: "missing-robots-txt",
889
+ url: robotsUrl,
890
+ category: "technical",
891
+ severity: "warning",
892
+ title: "Missing robots.txt",
893
+ description: `No robots.txt file found (HTTP ${response.status}).`,
894
+ recommendation: "Create a robots.txt file to control crawler access.",
895
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString()
896
+ });
897
+ return analysis;
898
+ }
899
+ analysis.exists = true;
900
+ analysis.content = await response.text();
901
+ const robots = robotsParser(robotsUrl, analysis.content);
902
+ analysis.sitemaps = robots.getSitemaps();
903
+ if (analysis.sitemaps.length === 0) {
904
+ analysis.issues.push({
905
+ id: "no-sitemap-in-robots",
906
+ url: robotsUrl,
907
+ category: "technical",
908
+ severity: "info",
909
+ title: "No sitemap in robots.txt",
910
+ description: "No sitemap URL is declared in robots.txt.",
911
+ recommendation: "Add a Sitemap directive pointing to your XML sitemap.",
912
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString()
913
+ });
914
+ }
915
+ const lines = analysis.content.split("\n");
916
+ let currentUserAgent = "*";
917
+ for (const line of lines) {
918
+ const trimmed = line.trim().toLowerCase();
919
+ if (trimmed.startsWith("user-agent:")) {
920
+ currentUserAgent = trimmed.replace("user-agent:", "").trim();
921
+ } else if (trimmed.startsWith("disallow:")) {
922
+ const path5 = line.trim().replace(/disallow:/i, "").trim();
923
+ if (path5) {
924
+ analysis.disallowedPaths.push(path5);
925
+ }
926
+ } else if (trimmed.startsWith("allow:")) {
927
+ const path5 = line.trim().replace(/allow:/i, "").trim();
928
+ if (path5) {
929
+ analysis.allowedPaths.push(path5);
930
+ }
931
+ } else if (trimmed.startsWith("crawl-delay:")) {
932
+ const delay = parseInt(trimmed.replace("crawl-delay:", "").trim(), 10);
933
+ if (!isNaN(delay)) {
934
+ analysis.crawlDelay = delay;
935
+ }
936
+ }
937
+ }
938
+ const importantPaths = ["/", "/sitemap.xml"];
939
+ for (const path5 of importantPaths) {
940
+ if (!robots.isAllowed(new URL(path5, siteUrl).href, "Googlebot")) {
941
+ analysis.issues.push({
942
+ id: `blocked-important-path-${path5.replace(/\//g, "-")}`,
943
+ url: siteUrl,
944
+ category: "crawling",
945
+ severity: "error",
946
+ title: `Important path blocked: ${path5}`,
947
+ description: `The path ${path5} is blocked in robots.txt.`,
948
+ recommendation: `Ensure ${path5} is accessible to search engines.`,
949
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
950
+ metadata: { path: path5 }
951
+ });
952
+ }
953
+ }
954
+ if (analysis.disallowedPaths.includes("/")) {
955
+ analysis.issues.push({
956
+ id: "all-blocked",
957
+ url: robotsUrl,
958
+ category: "crawling",
959
+ severity: "critical",
960
+ title: "Entire site blocked",
961
+ description: "robots.txt blocks access to the entire site (Disallow: /).",
962
+ recommendation: "Remove or modify this rule if you want your site to be indexed.",
963
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString()
964
+ });
965
+ }
966
+ consola2.debug(`Analyzed robots.txt: ${analysis.disallowedPaths.length} disallow rules`);
967
+ } catch (error) {
968
+ consola2.error("Failed to fetch robots.txt:", error);
969
+ analysis.issues.push({
970
+ id: "robots-txt-error",
971
+ url: robotsUrl,
972
+ category: "technical",
973
+ severity: "warning",
974
+ title: "Failed to fetch robots.txt",
975
+ description: `Error fetching robots.txt: ${error instanceof Error ? error.message : "Unknown error"}`,
976
+ recommendation: "Ensure robots.txt is accessible.",
977
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString()
978
+ });
979
+ }
980
+ return analysis;
981
+ }
982
+ async function isUrlAllowed(siteUrl, url, userAgent = "Googlebot") {
983
+ const robotsUrl = new URL("/robots.txt", siteUrl).href;
984
+ try {
985
+ const response = await fetch(robotsUrl);
986
+ if (!response.ok) return true;
987
+ const content = await response.text();
988
+ const robots = robotsParser(robotsUrl, content);
989
+ return robots.isAllowed(url, userAgent) ?? true;
990
+ } catch {
991
+ return true;
992
+ }
993
+ }
994
+ async function analyzeSitemap(sitemapUrl) {
995
+ const analysis = {
996
+ url: sitemapUrl,
997
+ exists: false,
998
+ type: "unknown",
999
+ urls: [],
1000
+ childSitemaps: [],
1001
+ issues: []
1002
+ };
1003
+ try {
1004
+ const response = await fetch(sitemapUrl, {
1005
+ headers: {
1006
+ Accept: "application/xml, text/xml, */*"
1007
+ }
1008
+ });
1009
+ if (!response.ok) {
1010
+ analysis.issues.push({
1011
+ id: `sitemap-not-found-${hash3(sitemapUrl)}`,
1012
+ url: sitemapUrl,
1013
+ category: "technical",
1014
+ severity: "error",
1015
+ title: "Sitemap not accessible",
1016
+ description: `Sitemap returned HTTP ${response.status}.`,
1017
+ recommendation: "Ensure the sitemap URL is correct and accessible.",
1018
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
1019
+ metadata: { statusCode: response.status }
1020
+ });
1021
+ return analysis;
1022
+ }
1023
+ analysis.exists = true;
1024
+ const content = await response.text();
1025
+ const contentType = response.headers.get("content-type") || "";
1026
+ if (!contentType.includes("xml") && !content.trim().startsWith("<?xml")) {
1027
+ analysis.issues.push({
1028
+ id: `sitemap-not-xml-${hash3(sitemapUrl)}`,
1029
+ url: sitemapUrl,
1030
+ category: "technical",
1031
+ severity: "warning",
1032
+ title: "Sitemap is not XML",
1033
+ description: "The sitemap does not have an XML content type.",
1034
+ recommendation: "Ensure sitemap is served with Content-Type: application/xml.",
1035
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
1036
+ metadata: { contentType }
1037
+ });
1038
+ }
1039
+ const $ = load(content, { xmlMode: true });
1040
+ const sitemapIndex = $("sitemapindex");
1041
+ if (sitemapIndex.length > 0) {
1042
+ analysis.type = "sitemap-index";
1043
+ $("sitemap").each((_, el) => {
1044
+ const loc = $("loc", el).text().trim();
1045
+ if (loc) {
1046
+ analysis.childSitemaps.push(loc);
1047
+ }
1048
+ });
1049
+ consola2.debug(`Sitemap index contains ${analysis.childSitemaps.length} sitemaps`);
1050
+ } else {
1051
+ analysis.type = "sitemap";
1052
+ $("url").each((_, el) => {
1053
+ const loc = $("loc", el).text().trim();
1054
+ if (loc) {
1055
+ analysis.urls.push(loc);
1056
+ }
1057
+ });
1058
+ const lastmod = $("url lastmod").first().text().trim();
1059
+ if (lastmod) {
1060
+ analysis.lastmod = lastmod;
1061
+ }
1062
+ consola2.debug(`Sitemap contains ${analysis.urls.length} URLs`);
1063
+ }
1064
+ if (analysis.type === "sitemap" && analysis.urls.length === 0) {
1065
+ analysis.issues.push({
1066
+ id: `sitemap-empty-${hash3(sitemapUrl)}`,
1067
+ url: sitemapUrl,
1068
+ category: "technical",
1069
+ severity: "warning",
1070
+ title: "Sitemap is empty",
1071
+ description: "The sitemap contains no URLs.",
1072
+ recommendation: "Add URLs to your sitemap or remove it if not needed.",
1073
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString()
1074
+ });
1075
+ }
1076
+ if (analysis.urls.length > 5e4) {
1077
+ analysis.issues.push({
1078
+ id: `sitemap-too-large-${hash3(sitemapUrl)}`,
1079
+ url: sitemapUrl,
1080
+ category: "technical",
1081
+ severity: "error",
1082
+ title: "Sitemap exceeds URL limit",
1083
+ description: `Sitemap contains ${analysis.urls.length} URLs. Maximum is 50,000.`,
1084
+ recommendation: "Split the sitemap into multiple files using a sitemap index.",
1085
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
1086
+ metadata: { urlCount: analysis.urls.length }
1087
+ });
1088
+ }
1089
+ const sizeInMB = new Blob([content]).size / (1024 * 1024);
1090
+ if (sizeInMB > 50) {
1091
+ analysis.issues.push({
1092
+ id: `sitemap-too-large-size-${hash3(sitemapUrl)}`,
1093
+ url: sitemapUrl,
1094
+ category: "technical",
1095
+ severity: "error",
1096
+ title: "Sitemap exceeds size limit",
1097
+ description: `Sitemap is ${sizeInMB.toFixed(2)}MB. Maximum is 50MB.`,
1098
+ recommendation: "Split the sitemap or compress it.",
1099
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
1100
+ metadata: { sizeMB: sizeInMB }
1101
+ });
1102
+ }
1103
+ } catch (error) {
1104
+ consola2.error("Failed to analyze sitemap:", error);
1105
+ analysis.issues.push({
1106
+ id: `sitemap-error-${hash3(sitemapUrl)}`,
1107
+ url: sitemapUrl,
1108
+ category: "technical",
1109
+ severity: "error",
1110
+ title: "Failed to parse sitemap",
1111
+ description: `Error: ${error instanceof Error ? error.message : "Unknown error"}`,
1112
+ recommendation: "Check sitemap validity using Google Search Console.",
1113
+ detectedAt: (/* @__PURE__ */ new Date()).toISOString()
1114
+ });
1115
+ }
1116
+ return analysis;
1117
+ }
1118
+ async function analyzeAllSitemaps(sitemapUrl, maxDepth = 3) {
1119
+ const results = [];
1120
+ const visited = /* @__PURE__ */ new Set();
1121
+ async function analyze(url, depth) {
1122
+ if (depth > maxDepth || visited.has(url)) return;
1123
+ visited.add(url);
1124
+ const analysis = await analyzeSitemap(url);
1125
+ results.push(analysis);
1126
+ for (const childUrl of analysis.childSitemaps) {
1127
+ await analyze(childUrl, depth + 1);
1128
+ }
1129
+ }
1130
+ await analyze(sitemapUrl, 0);
1131
+ return results;
1132
+ }
1133
+ function hash3(str) {
1134
+ let hash5 = 0;
1135
+ for (let i = 0; i < str.length; i++) {
1136
+ const char = str.charCodeAt(i);
1137
+ hash5 = (hash5 << 5) - hash5 + char;
1138
+ hash5 = hash5 & hash5;
1139
+ }
1140
+ return Math.abs(hash5).toString(36);
1141
+ }
1142
+
1143
+ // src/reports/json-report.ts
1144
+ function generateJsonReport(siteUrl, data, options = {}) {
1145
+ const { issues, urlInspections = [], crawlResults = [] } = data;
1146
+ const maxUrlsPerIssue = options.maxUrlsPerIssue ?? 10;
1147
+ const limitedIssues = limitIssuesByTitle(issues, maxUrlsPerIssue);
1148
+ const report = {
1149
+ id: generateReportId(),
1150
+ siteUrl,
1151
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
1152
+ summary: generateSummary(issues, urlInspections, crawlResults),
1153
+ // Use original for accurate counts
1154
+ issues: sortIssues(limitedIssues),
1155
+ urlInspections: options.includeRawData ? urlInspections.slice(0, 100) : [],
1156
+ crawlResults: options.includeRawData ? crawlResults.slice(0, 100) : [],
1157
+ recommendations: generateRecommendations(issues, maxUrlsPerIssue)
1158
+ };
1159
+ return report;
1160
+ }
1161
+ function limitIssuesByTitle(issues, maxUrls) {
1162
+ const byTitle = /* @__PURE__ */ new Map();
1163
+ for (const issue of issues) {
1164
+ const existing = byTitle.get(issue.title) || [];
1165
+ existing.push(issue);
1166
+ byTitle.set(issue.title, existing);
1167
+ }
1168
+ const limited = [];
1169
+ for (const [, group] of byTitle) {
1170
+ const sorted = group.sort((a, b) => {
1171
+ const severityOrder2 = { critical: 0, error: 1, warning: 2, info: 3 };
1172
+ return severityOrder2[a.severity] - severityOrder2[b.severity];
1173
+ });
1174
+ limited.push(...sorted.slice(0, maxUrls));
1175
+ }
1176
+ return limited;
1177
+ }
1178
+ function generateSummary(issues, urlInspections, crawlResults) {
1179
+ const totalUrls = Math.max(
1180
+ urlInspections.length,
1181
+ crawlResults.length,
1182
+ new Set(issues.map((i) => i.url)).size
1183
+ );
1184
+ const indexedUrls = urlInspections.filter(
1185
+ (r) => r.indexStatusResult.coverageState === "SUBMITTED_AND_INDEXED"
1186
+ ).length;
1187
+ const notIndexedUrls = urlInspections.filter(
1188
+ (r) => r.indexStatusResult.coverageState === "NOT_INDEXED" || r.indexStatusResult.coverageState === "CRAWLED_CURRENTLY_NOT_INDEXED" || r.indexStatusResult.coverageState === "DISCOVERED_CURRENTLY_NOT_INDEXED"
1189
+ ).length;
1190
+ const issuesByCategory = issues.reduce(
1191
+ (acc, issue) => {
1192
+ acc[issue.category] = (acc[issue.category] || 0) + 1;
1193
+ return acc;
1194
+ },
1195
+ {}
1196
+ );
1197
+ const issuesBySeverity = issues.reduce(
1198
+ (acc, issue) => {
1199
+ acc[issue.severity] = (acc[issue.severity] || 0) + 1;
1200
+ return acc;
1201
+ },
1202
+ {}
1203
+ );
1204
+ const healthScore = calculateHealthScore(issues, totalUrls);
1205
+ return {
1206
+ totalUrls,
1207
+ indexedUrls,
1208
+ notIndexedUrls,
1209
+ issuesByCategory,
1210
+ issuesBySeverity,
1211
+ healthScore
1212
+ };
1213
+ }
1214
+ function calculateHealthScore(issues, totalUrls) {
1215
+ if (totalUrls === 0) return 100;
1216
+ const severityWeights = {
1217
+ critical: 10,
1218
+ error: 5,
1219
+ warning: 2,
1220
+ info: 0.5
1221
+ };
1222
+ const totalPenalty = issues.reduce(
1223
+ (sum, issue) => sum + severityWeights[issue.severity],
1224
+ 0
1225
+ );
1226
+ const maxPenalty = totalUrls * 20;
1227
+ const penaltyRatio = Math.min(totalPenalty / maxPenalty, 1);
1228
+ return Math.round((1 - penaltyRatio) * 100);
1229
+ }
1230
+ function generateRecommendations(issues, maxUrls = 10) {
1231
+ const recommendations = [];
1232
+ const issueGroups = /* @__PURE__ */ new Map();
1233
+ for (const issue of issues) {
1234
+ const key = `${issue.category}:${issue.title}`;
1235
+ if (!issueGroups.has(key)) {
1236
+ issueGroups.set(key, []);
1237
+ }
1238
+ issueGroups.get(key).push(issue);
1239
+ }
1240
+ for (const [, groupedIssues] of issueGroups) {
1241
+ const firstIssue = groupedIssues[0];
1242
+ if (!firstIssue) continue;
1243
+ const severity = firstIssue.severity;
1244
+ const priority = severity === "critical" ? 1 : severity === "error" ? 2 : severity === "warning" ? 3 : 4;
1245
+ const impact = priority <= 2 ? "high" : priority === 3 ? "medium" : "low";
1246
+ const allUrls = groupedIssues.map((i) => i.url);
1247
+ const totalCount = allUrls.length;
1248
+ const limitedUrls = allUrls.slice(0, maxUrls);
1249
+ recommendations.push({
1250
+ priority,
1251
+ category: firstIssue.category,
1252
+ title: firstIssue.title,
1253
+ description: totalCount > maxUrls ? `${firstIssue.description} (showing ${maxUrls} of ${totalCount} URLs)` : firstIssue.description,
1254
+ affectedUrls: limitedUrls,
1255
+ estimatedImpact: impact,
1256
+ actionItems: [firstIssue.recommendation]
1257
+ });
1258
+ }
1259
+ return recommendations.sort((a, b) => a.priority - b.priority);
1260
+ }
1261
+ function sortIssues(issues) {
1262
+ const severityOrder2 = {
1263
+ critical: 0,
1264
+ error: 1,
1265
+ warning: 2,
1266
+ info: 3
1267
+ };
1268
+ return [...issues].sort((a, b) => {
1269
+ const severityDiff = severityOrder2[a.severity] - severityOrder2[b.severity];
1270
+ if (severityDiff !== 0) return severityDiff;
1271
+ return a.category.localeCompare(b.category);
1272
+ });
1273
+ }
1274
+ function generateReportId() {
1275
+ const timestamp = Date.now().toString(36);
1276
+ const random = Math.random().toString(36).substring(2, 8);
1277
+ return `seo-report-${timestamp}-${random}`;
1278
+ }
1279
+ function exportJsonReport(report, pretty = true) {
1280
+ return JSON.stringify(report, null, pretty ? 2 : 0);
1281
+ }
1282
+ var AI_REPORT_SCHEMA = {
1283
+ $schema: "http://json-schema.org/draft-07/schema#",
1284
+ title: "SEO Report",
1285
+ description: "AI-friendly SEO analysis report with issues and recommendations",
1286
+ type: "object",
1287
+ properties: {
1288
+ id: { type: "string", description: "Unique report identifier" },
1289
+ siteUrl: { type: "string", description: "Analyzed site URL" },
1290
+ generatedAt: { type: "string", format: "date-time" },
1291
+ summary: {
1292
+ type: "object",
1293
+ description: "Quick overview of SEO health",
1294
+ properties: {
1295
+ totalUrls: { type: "number" },
1296
+ indexedUrls: { type: "number" },
1297
+ notIndexedUrls: { type: "number" },
1298
+ healthScore: {
1299
+ type: "number",
1300
+ minimum: 0,
1301
+ maximum: 100,
1302
+ description: "0-100 score, higher is better"
1303
+ }
1304
+ }
1305
+ },
1306
+ issues: {
1307
+ type: "array",
1308
+ description: "List of detected SEO issues sorted by severity",
1309
+ items: {
1310
+ type: "object",
1311
+ properties: {
1312
+ severity: {
1313
+ type: "string",
1314
+ enum: ["critical", "error", "warning", "info"]
1315
+ },
1316
+ category: {
1317
+ type: "string",
1318
+ enum: [
1319
+ "indexing",
1320
+ "crawling",
1321
+ "content",
1322
+ "technical",
1323
+ "mobile",
1324
+ "performance",
1325
+ "structured-data",
1326
+ "security"
1327
+ ]
1328
+ },
1329
+ title: { type: "string" },
1330
+ description: { type: "string" },
1331
+ recommendation: { type: "string" },
1332
+ url: { type: "string" }
1333
+ }
1334
+ }
1335
+ },
1336
+ recommendations: {
1337
+ type: "array",
1338
+ description: "Prioritized action items",
1339
+ items: {
1340
+ type: "object",
1341
+ properties: {
1342
+ priority: { type: "number", minimum: 1, maximum: 5 },
1343
+ title: { type: "string" },
1344
+ affectedUrls: { type: "array", items: { type: "string" } },
1345
+ actionItems: { type: "array", items: { type: "string" } }
1346
+ }
1347
+ }
1348
+ }
1349
+ }
1350
+ };
1351
+
1352
+ // src/reports/markdown-report.ts
1353
+ function generateMarkdownReport(report, options = {}) {
1354
+ const { includeRawIssues = true, includeUrls = true, maxUrlsPerIssue = 10 } = options;
1355
+ const lines = [];
1356
+ lines.push(`# SEO Analysis Report`);
1357
+ lines.push("");
1358
+ lines.push(`**Site:** ${report.siteUrl}`);
1359
+ lines.push(`**Generated:** ${new Date(report.generatedAt).toLocaleString()}`);
1360
+ lines.push(`**Report ID:** ${report.id}`);
1361
+ lines.push("");
1362
+ lines.push("## Summary");
1363
+ lines.push("");
1364
+ lines.push(`| Metric | Value |`);
1365
+ lines.push(`|--------|-------|`);
1366
+ lines.push(`| Health Score | ${getHealthScoreEmoji(report.summary.healthScore)} **${report.summary.healthScore}/100** |`);
1367
+ lines.push(`| Total URLs | ${report.summary.totalUrls} |`);
1368
+ lines.push(`| Indexed URLs | ${report.summary.indexedUrls} |`);
1369
+ lines.push(`| Not Indexed | ${report.summary.notIndexedUrls} |`);
1370
+ lines.push("");
1371
+ lines.push("### Issues by Severity");
1372
+ lines.push("");
1373
+ const severities = ["critical", "error", "warning", "info"];
1374
+ for (const severity of severities) {
1375
+ const count = report.summary.issuesBySeverity[severity] || 0;
1376
+ lines.push(`- ${getSeverityEmoji(severity)} **${capitalize(severity)}:** ${count}`);
1377
+ }
1378
+ lines.push("");
1379
+ lines.push("### Issues by Category");
1380
+ lines.push("");
1381
+ const categories = Object.entries(report.summary.issuesByCategory).sort(
1382
+ ([, a], [, b]) => b - a
1383
+ );
1384
+ for (const [category, count] of categories) {
1385
+ lines.push(`- ${getCategoryEmoji(category)} **${formatCategory(category)}:** ${count}`);
1386
+ }
1387
+ lines.push("");
1388
+ lines.push("## Prioritized Recommendations");
1389
+ lines.push("");
1390
+ for (const rec of report.recommendations) {
1391
+ lines.push(`### ${getPriorityEmoji(rec.priority)} Priority ${rec.priority}: ${rec.title}`);
1392
+ lines.push("");
1393
+ lines.push(`**Category:** ${formatCategory(rec.category)}`);
1394
+ lines.push(`**Impact:** ${capitalize(rec.estimatedImpact)}`);
1395
+ lines.push(`**Affected URLs:** ${rec.affectedUrls.length}`);
1396
+ lines.push("");
1397
+ lines.push(`${rec.description}`);
1398
+ lines.push("");
1399
+ lines.push("**Action Items:**");
1400
+ for (const action of rec.actionItems) {
1401
+ lines.push(`- ${action}`);
1402
+ }
1403
+ lines.push("");
1404
+ if (includeUrls && rec.affectedUrls.length > 0) {
1405
+ const urlsToShow = rec.affectedUrls.slice(0, maxUrlsPerIssue);
1406
+ lines.push("<details>");
1407
+ lines.push(`<summary>Affected URLs (${rec.affectedUrls.length})</summary>`);
1408
+ lines.push("");
1409
+ for (const url of urlsToShow) {
1410
+ lines.push(`- ${url}`);
1411
+ }
1412
+ if (rec.affectedUrls.length > maxUrlsPerIssue) {
1413
+ lines.push(`- ... and ${rec.affectedUrls.length - maxUrlsPerIssue} more`);
1414
+ }
1415
+ lines.push("</details>");
1416
+ lines.push("");
1417
+ }
1418
+ lines.push("---");
1419
+ lines.push("");
1420
+ }
1421
+ if (includeRawIssues) {
1422
+ lines.push("## All Issues");
1423
+ lines.push("");
1424
+ const issuesByCategory = groupBy(report.issues, "category");
1425
+ for (const [category, issues] of Object.entries(issuesByCategory)) {
1426
+ lines.push(`### ${getCategoryEmoji(category)} ${formatCategory(category)}`);
1427
+ lines.push("");
1428
+ for (const issue of issues) {
1429
+ lines.push(
1430
+ `#### ${getSeverityEmoji(issue.severity)} ${issue.title}`
1431
+ );
1432
+ lines.push("");
1433
+ lines.push(`**URL:** \`${issue.url}\``);
1434
+ lines.push(`**Severity:** ${capitalize(issue.severity)}`);
1435
+ lines.push("");
1436
+ lines.push(issue.description);
1437
+ lines.push("");
1438
+ lines.push(`**Recommendation:** ${issue.recommendation}`);
1439
+ lines.push("");
1440
+ }
1441
+ }
1442
+ }
1443
+ lines.push("---");
1444
+ lines.push("");
1445
+ lines.push("*Report generated by [@djangocfg/seo](https://djangocfg.com)*");
1446
+ lines.push("");
1447
+ lines.push("> This report is designed to be processed by AI assistants for automated SEO improvements.");
1448
+ return lines.join("\n");
1449
+ }
1450
+ function generateAiSummary(report) {
1451
+ const lines = [];
1452
+ lines.push("# SEO Report Summary for AI Processing");
1453
+ lines.push("");
1454
+ lines.push("## Context");
1455
+ lines.push(`Site: ${report.siteUrl}`);
1456
+ lines.push(`Health Score: ${report.summary.healthScore}/100`);
1457
+ lines.push(`Critical Issues: ${report.summary.issuesBySeverity.critical || 0}`);
1458
+ lines.push(`Errors: ${report.summary.issuesBySeverity.error || 0}`);
1459
+ lines.push(`Warnings: ${report.summary.issuesBySeverity.warning || 0}`);
1460
+ lines.push("");
1461
+ lines.push("## Top Priority Actions");
1462
+ lines.push("");
1463
+ const topRecommendations = report.recommendations.slice(0, 5);
1464
+ for (let i = 0; i < topRecommendations.length; i++) {
1465
+ const rec = topRecommendations[i];
1466
+ if (!rec) continue;
1467
+ lines.push(`${i + 1}. **${rec.title}** (${rec.affectedUrls.length} URLs)`);
1468
+ lines.push(` - ${rec.actionItems[0]}`);
1469
+ }
1470
+ lines.push("");
1471
+ lines.push("## Issue Categories");
1472
+ lines.push("");
1473
+ const sortedCategories = Object.entries(report.summary.issuesByCategory).sort(([, a], [, b]) => b - a);
1474
+ for (const [category, count] of sortedCategories) {
1475
+ lines.push(`- ${formatCategory(category)}: ${count} issues`);
1476
+ }
1477
+ return lines.join("\n");
1478
+ }
1479
+ function getSeverityEmoji(severity) {
1480
+ const emojis = {
1481
+ critical: "\u{1F534}",
1482
+ error: "\u{1F7E0}",
1483
+ warning: "\u{1F7E1}",
1484
+ info: "\u{1F535}"
1485
+ };
1486
+ return emojis[severity];
1487
+ }
1488
+ function getCategoryEmoji(category) {
1489
+ const emojis = {
1490
+ indexing: "\u{1F4D1}",
1491
+ crawling: "\u{1F577}\uFE0F",
1492
+ content: "\u{1F4DD}",
1493
+ technical: "\u2699\uFE0F",
1494
+ mobile: "\u{1F4F1}",
1495
+ performance: "\u26A1",
1496
+ "structured-data": "\u{1F3F7}\uFE0F",
1497
+ security: "\u{1F512}"
1498
+ };
1499
+ return emojis[category] || "\u{1F4CB}";
1500
+ }
1501
+ function getPriorityEmoji(priority) {
1502
+ const emojis = {
1503
+ 1: "\u{1F6A8}",
1504
+ 2: "\u26A0\uFE0F",
1505
+ 3: "\u{1F4CC}",
1506
+ 4: "\u{1F4A1}",
1507
+ 5: "\u2139\uFE0F"
1508
+ };
1509
+ return emojis[priority] || "\u{1F4CB}";
1510
+ }
1511
+ function getHealthScoreEmoji(score) {
1512
+ if (score >= 90) return "\u{1F7E2}";
1513
+ if (score >= 70) return "\u{1F7E1}";
1514
+ if (score >= 50) return "\u{1F7E0}";
1515
+ return "\u{1F534}";
1516
+ }
1517
+ function capitalize(str) {
1518
+ return str.charAt(0).toUpperCase() + str.slice(1);
1519
+ }
1520
+ function formatCategory(category) {
1521
+ return category.split("-").map(capitalize).join(" ");
1522
+ }
1523
+ function groupBy(array, key) {
1524
+ return array.reduce(
1525
+ (acc, item) => {
1526
+ const groupKey = String(item[key]);
1527
+ if (!acc[groupKey]) {
1528
+ acc[groupKey] = [];
1529
+ }
1530
+ acc[groupKey].push(item);
1531
+ return acc;
1532
+ },
1533
+ {}
1534
+ );
1535
+ }
1536
+ var MAX_LINES = 1e3;
1537
+ function generateSplitReports(report, options) {
1538
+ const { outputDir, clearOutputDir = true } = options;
1539
+ if (clearOutputDir && existsSync(outputDir)) {
1540
+ const files = readdirSync(outputDir);
1541
+ for (const file of files) {
1542
+ if (file.startsWith("seo-") && file.endsWith(".md")) {
1543
+ rmSync(join(outputDir, file), { force: true });
1544
+ }
1545
+ }
1546
+ }
1547
+ if (!existsSync(outputDir)) {
1548
+ mkdirSync(outputDir, { recursive: true });
1549
+ }
1550
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").slice(0, 19);
1551
+ const siteName = new URL(report.siteUrl).hostname.replace(/\./g, "-");
1552
+ const prefix = `seo-${siteName}-${timestamp}`;
1553
+ const categoryFiles = [];
1554
+ const issuesByCategory = groupIssuesByCategory(report.issues);
1555
+ const categories = Object.keys(issuesByCategory);
1556
+ for (const category of categories) {
1557
+ const issues = issuesByCategory[category] || [];
1558
+ if (issues.length === 0) continue;
1559
+ const chunks = splitIntoChunks(issues);
1560
+ for (let i = 0; i < chunks.length; i++) {
1561
+ const suffix = chunks.length > 1 ? `-${i + 1}` : "";
1562
+ const filename = `${prefix}-${category}${suffix}.md`;
1563
+ const filepath = join(outputDir, filename);
1564
+ const chunk2 = chunks[i];
1565
+ if (!chunk2) continue;
1566
+ const content = generateCategoryFile(report.siteUrl, category, chunk2, {
1567
+ part: chunks.length > 1 ? i + 1 : void 0,
1568
+ totalParts: chunks.length > 1 ? chunks.length : void 0
1569
+ });
1570
+ writeFileSync(filepath, content, "utf-8");
1571
+ categoryFiles.push(filename);
1572
+ }
1573
+ }
1574
+ const indexFilename = `${prefix}-index.md`;
1575
+ const indexFilepath = join(outputDir, indexFilename);
1576
+ const indexContent = generateIndexFile(report, categoryFiles);
1577
+ writeFileSync(indexFilepath, indexContent, "utf-8");
1578
+ return {
1579
+ indexFile: indexFilename,
1580
+ categoryFiles,
1581
+ totalFiles: categoryFiles.length + 1
1582
+ };
1583
+ }
1584
+ function generateIndexFile(report, categoryFiles) {
1585
+ const lines = [];
1586
+ lines.push("# SEO Report Index");
1587
+ lines.push("");
1588
+ lines.push(`Site: ${report.siteUrl}`);
1589
+ lines.push(`Score: ${report.summary.healthScore}/100`);
1590
+ lines.push(`Date: ${report.generatedAt.slice(0, 10)}`);
1591
+ lines.push("");
1592
+ lines.push("## Issues");
1593
+ lines.push("");
1594
+ lines.push("| Severity | Count |");
1595
+ lines.push("|----------|-------|");
1596
+ const severities = ["critical", "error", "warning", "info"];
1597
+ for (const sev of severities) {
1598
+ const count = report.summary.issuesBySeverity[sev] || 0;
1599
+ if (count > 0) {
1600
+ lines.push(`| ${sev} | ${count} |`);
1601
+ }
1602
+ }
1603
+ lines.push("");
1604
+ lines.push("## Actions");
1605
+ lines.push("");
1606
+ const topRecs = report.recommendations.slice(0, 10);
1607
+ for (let i = 0; i < topRecs.length; i++) {
1608
+ const rec = topRecs[i];
1609
+ if (!rec) continue;
1610
+ lines.push(`${i + 1}. **${rec.title}** (${rec.affectedUrls.length})`);
1611
+ lines.push(` ${rec.actionItems[0]}`);
1612
+ }
1613
+ lines.push("");
1614
+ lines.push("## Files");
1615
+ lines.push("");
1616
+ for (const file of categoryFiles) {
1617
+ lines.push(`- [${file}](./${file})`);
1618
+ }
1619
+ return lines.join("\n");
1620
+ }
1621
+ function generateCategoryFile(siteUrl, category, issues, opts) {
1622
+ const lines = [];
1623
+ const partStr = opts.part ? ` (Part ${opts.part}/${opts.totalParts})` : "";
1624
+ lines.push(`# ${formatCategory2(category)}${partStr}`);
1625
+ lines.push("");
1626
+ lines.push(`Site: ${siteUrl}`);
1627
+ lines.push(`Issues: ${issues.length}`);
1628
+ lines.push("");
1629
+ const byTitle = /* @__PURE__ */ new Map();
1630
+ for (const issue of issues) {
1631
+ const group = byTitle.get(issue.title) || [];
1632
+ group.push(issue);
1633
+ byTitle.set(issue.title, group);
1634
+ }
1635
+ for (const [title, groupIssues] of byTitle) {
1636
+ const first = groupIssues[0];
1637
+ if (!first) continue;
1638
+ lines.push(`## ${title}`);
1639
+ lines.push("");
1640
+ lines.push(`Severity: ${first.severity}`);
1641
+ lines.push(`Count: ${groupIssues.length}`);
1642
+ lines.push("");
1643
+ lines.push(`> ${first.recommendation}`);
1644
+ lines.push("");
1645
+ lines.push("URLs:");
1646
+ for (const issue of groupIssues.slice(0, 20)) {
1647
+ lines.push(`- ${issue.url}`);
1648
+ }
1649
+ if (groupIssues.length > 20) {
1650
+ lines.push(`- ... +${groupIssues.length - 20} more`);
1651
+ }
1652
+ lines.push("");
1653
+ }
1654
+ return lines.join("\n");
1655
+ }
1656
+ function splitIntoChunks(issues, category) {
1657
+ const byTitle = /* @__PURE__ */ new Map();
1658
+ for (const issue of issues) {
1659
+ const group = byTitle.get(issue.title) || [];
1660
+ group.push(issue);
1661
+ byTitle.set(issue.title, group);
1662
+ }
1663
+ const chunks = [];
1664
+ let currentChunk = [];
1665
+ let currentLines = 10;
1666
+ for (const [, groupIssues] of byTitle) {
1667
+ const urlCount = Math.min(20, groupIssues.length);
1668
+ const groupLines = 8 + urlCount;
1669
+ if (currentLines + groupLines > MAX_LINES && currentChunk.length > 0) {
1670
+ chunks.push(currentChunk);
1671
+ currentChunk = [];
1672
+ currentLines = 10;
1673
+ }
1674
+ currentChunk.push(...groupIssues);
1675
+ currentLines += groupLines;
1676
+ }
1677
+ if (currentChunk.length > 0) {
1678
+ chunks.push(currentChunk);
1679
+ }
1680
+ return chunks.length > 0 ? chunks : [[]];
1681
+ }
1682
+ function groupIssuesByCategory(issues) {
1683
+ const result = {};
1684
+ for (const issue of issues) {
1685
+ if (!result[issue.category]) {
1686
+ result[issue.category] = [];
1687
+ }
1688
+ result[issue.category].push(issue);
1689
+ }
1690
+ return result;
1691
+ }
1692
+ function formatCategory2(category) {
1693
+ return category.split("-").map((s) => s.charAt(0).toUpperCase() + s.slice(1)).join(" ");
1694
+ }
1695
+
1696
+ // src/reports/claude-context.ts
1697
+ function generateClaudeContext(report) {
1698
+ const lines = [];
1699
+ lines.push("# @djangocfg/seo");
1700
+ lines.push("");
1701
+ lines.push("SEO audit toolkit. Generates AI-optimized split reports (max 1000 lines each).");
1702
+ lines.push("");
1703
+ lines.push("## Commands");
1704
+ lines.push("");
1705
+ lines.push("```bash");
1706
+ lines.push("# Audit (HTTP-based, crawls live site)");
1707
+ lines.push("pnpm seo:audit # Full audit (split reports)");
1708
+ lines.push("pnpm seo:audit --env dev # Audit local dev");
1709
+ lines.push("pnpm seo:audit --format all # All formats");
1710
+ lines.push("");
1711
+ lines.push("# Content (file-based, scans MDX/content/)");
1712
+ lines.push("pnpm exec djangocfg-seo content check # Check MDX links");
1713
+ lines.push("pnpm exec djangocfg-seo content fix # Show fixable links");
1714
+ lines.push("pnpm exec djangocfg-seo content fix --fix # Apply fixes");
1715
+ lines.push("pnpm exec djangocfg-seo content sitemap # Generate sitemap.ts");
1716
+ lines.push("```");
1717
+ lines.push("");
1718
+ lines.push("## Options");
1719
+ lines.push("");
1720
+ lines.push("- `--env, -e` - prod (default) or dev");
1721
+ lines.push("- `--site, -s` - Site URL (overrides env)");
1722
+ lines.push("- `--output, -o` - Output directory");
1723
+ lines.push("- `--format, -f` - split (default), json, markdown, ai-summary, all");
1724
+ lines.push("- `--max-pages` - Max pages (default: 100)");
1725
+ lines.push("- `--service-account` - Google service account JSON path");
1726
+ lines.push("- `--content-dir` - Content directory (default: content/)");
1727
+ lines.push("- `--base-path` - Base URL path for docs (default: /docs)");
1728
+ lines.push("");
1729
+ lines.push("## Reports");
1730
+ lines.push("");
1731
+ lines.push("- `seo-*-index.md` - Summary + links to categories");
1732
+ lines.push("- `seo-*-technical.md` - Broken links, sitemap issues");
1733
+ lines.push("- `seo-*-content.md` - H1, meta, title issues");
1734
+ lines.push("- `seo-*-performance.md` - Load time, TTFB issues");
1735
+ lines.push("- `seo-ai-summary-*.md` - Quick overview");
1736
+ lines.push("");
1737
+ lines.push("## Issue Severity");
1738
+ lines.push("");
1739
+ lines.push("- **critical** - Blocks indexing (fix immediately)");
1740
+ lines.push("- **error** - SEO problems (high priority)");
1741
+ lines.push("- **warning** - Recommendations (medium priority)");
1742
+ lines.push("- **info** - Best practices (low priority)");
1743
+ lines.push("");
1744
+ lines.push("## Issue Categories");
1745
+ lines.push("");
1746
+ lines.push("- **technical** - Broken links, sitemap, robots.txt");
1747
+ lines.push("- **content** - Missing H1, meta description, title");
1748
+ lines.push("- **indexing** - Not indexed, crawl errors from GSC");
1749
+ lines.push("- **performance** - Slow load time (>3s), high TTFB (>800ms)");
1750
+ lines.push("");
1751
+ lines.push("## Routes Scanner");
1752
+ lines.push("");
1753
+ lines.push("Scans Next.js App Router `app/` directory. Handles:");
1754
+ lines.push("- Route groups `(group)` - ignored in URL");
1755
+ lines.push("- Dynamic `[slug]` - shown as `:slug`");
1756
+ lines.push("- Catch-all `[...slug]` - shown as `:...slug`");
1757
+ lines.push("- Parallel `@folder` - skipped");
1758
+ lines.push("- Private `_folder` - skipped");
1759
+ lines.push("");
1760
+ lines.push("## Link Guidelines");
1761
+ lines.push("");
1762
+ lines.push("### Nextra/MDX Projects (content/)");
1763
+ lines.push("");
1764
+ lines.push("For non-index files (e.g., `overview.mdx`):");
1765
+ lines.push("- **Sibling file**: `../sibling` (one level up)");
1766
+ lines.push("- **Other section**: `/docs/full/path` (absolute)");
1767
+ lines.push("- **AVOID**: `./sibling` (browser adds filename to path!)");
1768
+ lines.push("- **AVOID**: `../../deep/path` (hard to maintain)");
1769
+ lines.push("");
1770
+ lines.push("For index files (e.g., `index.mdx`):");
1771
+ lines.push("- **Child file**: `./child` works correctly");
1772
+ lines.push("- **Sibling folder**: `../sibling/` or absolute");
1773
+ lines.push("");
1774
+ lines.push("### Next.js App Router Projects");
1775
+ lines.push("");
1776
+ lines.push("Use declarative routes from `_routes/`:");
1777
+ lines.push("```typescript");
1778
+ lines.push('import { routes } from "@/app/_routes";');
1779
+ lines.push("<Link href={routes.dashboard.machines}>Machines</Link>");
1780
+ lines.push("```");
1781
+ lines.push("");
1782
+ lines.push("Benefits: type-safe, refactor-friendly, centralized.");
1783
+ lines.push("");
1784
+ lines.push("---");
1785
+ lines.push("");
1786
+ lines.push("## Current Audit");
1787
+ lines.push("");
1788
+ lines.push(`Site: ${report.siteUrl}`);
1789
+ lines.push(`Score: ${report.summary.healthScore}/100`);
1790
+ lines.push(`Date: ${report.generatedAt.slice(0, 10)}`);
1791
+ lines.push("");
1792
+ lines.push("### Issues");
1793
+ lines.push("");
1794
+ const { critical = 0, error = 0, warning = 0, info = 0 } = report.summary.issuesBySeverity;
1795
+ if (critical > 0) lines.push(`- Critical: ${critical}`);
1796
+ if (error > 0) lines.push(`- Error: ${error}`);
1797
+ if (warning > 0) lines.push(`- Warning: ${warning}`);
1798
+ if (info > 0) lines.push(`- Info: ${info}`);
1799
+ lines.push("");
1800
+ lines.push("### Top Actions");
1801
+ lines.push("");
1802
+ const topRecs = report.recommendations.slice(0, 5);
1803
+ for (let i = 0; i < topRecs.length; i++) {
1804
+ const rec = topRecs[i];
1805
+ if (!rec) continue;
1806
+ lines.push(`${i + 1}. **${rec.title}** (${rec.affectedUrls.length} URLs)`);
1807
+ }
1808
+ lines.push("");
1809
+ lines.push("### Report Files");
1810
+ lines.push("");
1811
+ lines.push("See split reports in this directory:");
1812
+ lines.push("- `seo-*-index.md` - Start here");
1813
+ lines.push("- `seo-*-technical.md` - Technical issues");
1814
+ lines.push("- `seo-*-content.md` - Content issues");
1815
+ lines.push("- `seo-*-performance.md` - Performance issues");
1816
+ lines.push("");
1817
+ return lines.join("\n");
1818
+ }
1819
+
1820
+ // src/reports/generator.ts
1821
+ async function generateAndSaveReports(siteUrl, data, options) {
1822
+ const {
1823
+ outputDir,
1824
+ formats,
1825
+ includeRawData = false,
1826
+ timestamp = true,
1827
+ clearOutputDir = true,
1828
+ maxUrlsPerIssue = 10
1829
+ } = options;
1830
+ if (clearOutputDir && existsSync(outputDir)) {
1831
+ try {
1832
+ const files = readdirSync(outputDir);
1833
+ for (const file of files) {
1834
+ if (file.startsWith("seo-")) {
1835
+ rmSync(join(outputDir, file), { force: true });
1836
+ }
1837
+ }
1838
+ } catch {
1839
+ }
1840
+ }
1841
+ if (!existsSync(outputDir)) {
1842
+ mkdirSync(outputDir, { recursive: true });
1843
+ }
1844
+ const report = generateJsonReport(siteUrl, data, { includeRawData, maxUrlsPerIssue });
1845
+ const result = {
1846
+ report,
1847
+ files: {}
1848
+ };
1849
+ const ts = timestamp ? `-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").slice(0, 19)}` : "";
1850
+ const siteName = new URL(siteUrl).hostname.replace(/\./g, "-");
1851
+ if (formats.includes("json")) {
1852
+ const filename = `seo-report-${siteName}${ts}.json`;
1853
+ const filepath = join(outputDir, filename);
1854
+ const content = exportJsonReport(report, true);
1855
+ writeFileSync(filepath, content, "utf-8");
1856
+ result.files.json = filepath;
1857
+ consola2.success(`JSON report saved: ${filepath}`);
1858
+ }
1859
+ if (formats.includes("markdown")) {
1860
+ const filename = `seo-report-${siteName}${ts}.md`;
1861
+ const filepath = join(outputDir, filename);
1862
+ const content = generateMarkdownReport(report, {
1863
+ includeRawIssues: true,
1864
+ includeUrls: true
1865
+ });
1866
+ writeFileSync(filepath, content, "utf-8");
1867
+ result.files.markdown = filepath;
1868
+ consola2.success(`Markdown report saved: ${filepath}`);
1869
+ }
1870
+ if (formats.includes("ai-summary")) {
1871
+ const filename = `seo-ai-summary-${siteName}${ts}.md`;
1872
+ const filepath = join(outputDir, filename);
1873
+ const content = generateAiSummary(report);
1874
+ writeFileSync(filepath, content, "utf-8");
1875
+ result.files.aiSummary = filepath;
1876
+ consola2.success(`AI summary saved: ${filepath}`);
1877
+ }
1878
+ if (formats.includes("split")) {
1879
+ const splitResult = generateSplitReports(report, {
1880
+ outputDir,
1881
+ clearOutputDir: false
1882
+ // Already cleared above
1883
+ });
1884
+ result.files.split = {
1885
+ index: join(outputDir, splitResult.indexFile),
1886
+ categories: splitResult.categoryFiles.map((f) => join(outputDir, f))
1887
+ };
1888
+ consola2.success(`Split reports saved: ${splitResult.totalFiles} files (index + ${splitResult.categoryFiles.length} categories)`);
1889
+ }
1890
+ const claudeContent = generateClaudeContext(report);
1891
+ const claudeFilepath = join(outputDir, "CLAUDE.md");
1892
+ writeFileSync(claudeFilepath, claudeContent, "utf-8");
1893
+ consola2.success(`AI context saved: ${claudeFilepath}`);
1894
+ return result;
1895
+ }
1896
+ function printReportSummary(report) {
1897
+ consola2.box(
1898
+ `SEO Report: ${report.siteUrl}
1899
+ Health Score: ${report.summary.healthScore}/100
1900
+ Total URLs: ${report.summary.totalUrls}
1901
+ Indexed: ${report.summary.indexedUrls} | Not Indexed: ${report.summary.notIndexedUrls}
1902
+ Issues: ${report.issues.length}`
1903
+ );
1904
+ if (report.summary.issuesBySeverity.critical) {
1905
+ consola2.error(`Critical issues: ${report.summary.issuesBySeverity.critical}`);
1906
+ }
1907
+ if (report.summary.issuesBySeverity.error) {
1908
+ consola2.warn(`Errors: ${report.summary.issuesBySeverity.error}`);
1909
+ }
1910
+ if (report.summary.issuesBySeverity.warning) {
1911
+ consola2.info(`Warnings: ${report.summary.issuesBySeverity.warning}`);
1912
+ }
1913
+ consola2.log("");
1914
+ consola2.info("Top recommendations:");
1915
+ for (const rec of report.recommendations.slice(0, 3)) {
1916
+ consola2.log(` ${rec.priority}. ${rec.title} (${rec.affectedUrls.length} URLs)`);
1917
+ }
1918
+ }
1919
+ function mergeReports(reports) {
1920
+ if (reports.length === 0) {
1921
+ throw new Error("Cannot merge empty reports array");
1922
+ }
1923
+ const merged = {
1924
+ id: `merged-${Date.now().toString(36)}`,
1925
+ siteUrl: reports.map((r) => r.siteUrl).join(", "),
1926
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
1927
+ summary: {
1928
+ totalUrls: reports.reduce((sum, r) => sum + r.summary.totalUrls, 0),
1929
+ indexedUrls: reports.reduce((sum, r) => sum + r.summary.indexedUrls, 0),
1930
+ notIndexedUrls: reports.reduce((sum, r) => sum + r.summary.notIndexedUrls, 0),
1931
+ issuesByCategory: {},
1932
+ issuesBySeverity: {},
1933
+ healthScore: Math.round(
1934
+ reports.reduce((sum, r) => sum + r.summary.healthScore, 0) / reports.length
1935
+ )
1936
+ },
1937
+ issues: reports.flatMap((r) => r.issues),
1938
+ urlInspections: reports.flatMap((r) => r.urlInspections),
1939
+ crawlResults: reports.flatMap((r) => r.crawlResults),
1940
+ recommendations: []
1941
+ };
1942
+ for (const report of reports) {
1943
+ for (const [category, count] of Object.entries(report.summary.issuesByCategory)) {
1944
+ const cat = category;
1945
+ merged.summary.issuesByCategory[cat] = (merged.summary.issuesByCategory[cat] || 0) + count;
1946
+ }
1947
+ for (const [severity, count] of Object.entries(report.summary.issuesBySeverity)) {
1948
+ const sev = severity;
1949
+ merged.summary.issuesBySeverity[sev] = (merged.summary.issuesBySeverity[sev] || 0) + count;
1950
+ }
1951
+ }
1952
+ const recMap = /* @__PURE__ */ new Map();
1953
+ for (const report of reports) {
1954
+ for (const rec of report.recommendations) {
1955
+ const key = `${rec.category}:${rec.title}`;
1956
+ if (recMap.has(key)) {
1957
+ const existing = recMap.get(key);
1958
+ existing.affectedUrls.push(...rec.affectedUrls);
1959
+ } else {
1960
+ recMap.set(key, { ...rec, affectedUrls: [...rec.affectedUrls] });
1961
+ }
1962
+ }
1963
+ }
1964
+ merged.recommendations = Array.from(recMap.values()).sort(
1965
+ (a, b) => a.priority - b.priority
1966
+ );
1967
+ return merged;
1968
+ }
1969
+
1970
+ // src/analyzer.ts
1971
+ var SeoAnalyzer = class {
1972
+ config;
1973
+ siteUrl;
1974
+ gscClient;
1975
+ constructor(config) {
1976
+ this.config = config;
1977
+ this.siteUrl = config.siteUrl || config.googleConsole?.siteUrl || "";
1978
+ if (!this.siteUrl) {
1979
+ throw new Error("siteUrl is required");
1980
+ }
1981
+ if (config.googleConsole) {
1982
+ this.gscClient = new GoogleConsoleClient({
1983
+ ...config.googleConsole,
1984
+ siteUrl: this.siteUrl
1985
+ });
1986
+ }
1987
+ }
1988
+ /**
1989
+ * Run full SEO analysis
1990
+ */
1991
+ async analyze(options = {}) {
1992
+ const {
1993
+ includeGoogleConsole = true,
1994
+ includeCrawler = true,
1995
+ includeRobotsTxt = true,
1996
+ includeSitemap = true,
1997
+ crawlerMaxPages = this.config.crawler?.maxPages || 100,
1998
+ crawlerMaxDepth = this.config.crawler?.maxDepth || 3,
1999
+ urlsToInspect
2000
+ } = options;
2001
+ consola2.start(`Starting SEO analysis for ${this.siteUrl}`);
2002
+ const allIssues = [];
2003
+ const allInspections = [];
2004
+ const allCrawlResults = [];
2005
+ if (includeRobotsTxt) {
2006
+ consola2.info("Analyzing robots.txt...");
2007
+ try {
2008
+ const robotsAnalysis = await analyzeRobotsTxt(this.siteUrl);
2009
+ allIssues.push(...robotsAnalysis.issues);
2010
+ consola2.success(`robots.txt: ${robotsAnalysis.exists ? "found" : "not found"}`);
2011
+ } catch (error) {
2012
+ consola2.warn("Failed to analyze robots.txt:", error);
2013
+ }
2014
+ }
2015
+ if (includeSitemap) {
2016
+ consola2.info("Analyzing sitemaps...");
2017
+ try {
2018
+ const sitemapUrl = new URL("/sitemap.xml", this.siteUrl).href;
2019
+ const sitemapAnalyses = await analyzeAllSitemaps(sitemapUrl);
2020
+ let totalUrls = 0;
2021
+ for (const analysis of sitemapAnalyses) {
2022
+ allIssues.push(...analysis.issues);
2023
+ totalUrls += analysis.urls.length;
2024
+ }
2025
+ consola2.success(`Sitemaps: ${sitemapAnalyses.length} found, ${totalUrls} URLs`);
2026
+ } catch (error) {
2027
+ consola2.warn("Failed to analyze sitemaps:", error);
2028
+ }
2029
+ }
2030
+ if (includeCrawler) {
2031
+ consola2.info("Crawling site...");
2032
+ try {
2033
+ const crawler = new SiteCrawler(this.siteUrl, {
2034
+ maxPages: crawlerMaxPages,
2035
+ maxDepth: crawlerMaxDepth,
2036
+ ...this.config.crawler
2037
+ });
2038
+ const crawlResults = await crawler.crawl();
2039
+ const crawlIssues = analyzeCrawlResults(crawlResults);
2040
+ allCrawlResults.push(...crawlResults);
2041
+ allIssues.push(...crawlIssues);
2042
+ consola2.success(`Crawled ${crawlResults.length} pages, found ${crawlIssues.length} issues`);
2043
+ } catch (error) {
2044
+ consola2.error("Failed to crawl site:", error);
2045
+ }
2046
+ }
2047
+ if (includeGoogleConsole && this.gscClient) {
2048
+ consola2.info("Inspecting URLs via Google Search Console...");
2049
+ try {
2050
+ const isAuth = await this.gscClient.verify();
2051
+ if (isAuth) {
2052
+ const urls = urlsToInspect || allCrawlResults.filter((r) => r.statusCode === 200).slice(0, 50).map((r) => r.url);
2053
+ if (urls.length > 0) {
2054
+ const inspections = await this.gscClient.inspectUrls(urls);
2055
+ const gscIssues = analyzeInspectionResults(inspections);
2056
+ allInspections.push(...inspections);
2057
+ allIssues.push(...gscIssues);
2058
+ consola2.success(`Inspected ${inspections.length} URLs, found ${gscIssues.length} issues`);
2059
+ }
2060
+ }
2061
+ } catch (error) {
2062
+ consola2.warn("Google Search Console inspection failed:", error);
2063
+ }
2064
+ }
2065
+ const report = generateJsonReport(
2066
+ this.siteUrl,
2067
+ {
2068
+ issues: allIssues,
2069
+ urlInspections: allInspections,
2070
+ crawlResults: allCrawlResults
2071
+ },
2072
+ { includeRawData: true }
2073
+ );
2074
+ consola2.success(`Analysis complete. Health score: ${report.summary.healthScore}/100`);
2075
+ return {
2076
+ report,
2077
+ issues: allIssues,
2078
+ urlInspections: allInspections,
2079
+ crawlResults: allCrawlResults
2080
+ };
2081
+ }
2082
+ /**
2083
+ * Save report to files
2084
+ */
2085
+ async saveReport(outputDir) {
2086
+ const result = await this.analyze();
2087
+ await generateAndSaveReports(
2088
+ this.siteUrl,
2089
+ {
2090
+ issues: result.issues,
2091
+ urlInspections: result.urlInspections,
2092
+ crawlResults: result.crawlResults
2093
+ },
2094
+ {
2095
+ outputDir: outputDir || this.config.reports?.outputDir || "./seo-reports",
2096
+ formats: this.config.reports?.formats || ["json", "markdown"],
2097
+ includeRawData: true
2098
+ }
2099
+ );
2100
+ printReportSummary(result.report);
2101
+ }
2102
+ /**
2103
+ * Quick health check
2104
+ */
2105
+ async healthCheck() {
2106
+ const result = await this.analyze({
2107
+ includeGoogleConsole: false,
2108
+ crawlerMaxPages: 20,
2109
+ crawlerMaxDepth: 2
2110
+ });
2111
+ return {
2112
+ healthScore: result.report.summary.healthScore,
2113
+ criticalIssues: result.report.summary.issuesBySeverity.critical || 0,
2114
+ errors: result.report.summary.issuesBySeverity.error || 0,
2115
+ warnings: result.report.summary.issuesBySeverity.warning || 0
2116
+ };
2117
+ }
2118
+ /**
2119
+ * Inspect specific URLs
2120
+ */
2121
+ async inspectUrls(urls) {
2122
+ if (!this.gscClient) {
2123
+ throw new Error("Google Console is not configured");
2124
+ }
2125
+ return this.gscClient.inspectUrls(urls);
2126
+ }
2127
+ /**
2128
+ * Get Google Console client
2129
+ */
2130
+ getGoogleConsoleClient() {
2131
+ return this.gscClient;
2132
+ }
2133
+ };
2134
+ var DEFAULT_SKIP_PATTERN = [
2135
+ "github.com",
2136
+ "twitter.com",
2137
+ "linkedin.com",
2138
+ "x.com",
2139
+ "127.0.0.1",
2140
+ "localhost:[0-9]+",
2141
+ "api\\.localhost",
2142
+ "demo\\.localhost",
2143
+ "cdn-cgi",
2144
+ // Cloudflare email protection
2145
+ "mailto:",
2146
+ // Email links
2147
+ "tel:",
2148
+ // Phone links
2149
+ "javascript:"
2150
+ // JavaScript links
2151
+ ].join("|");
2152
+ function getSiteUrl(options) {
2153
+ if (options.url) {
2154
+ return options.url;
2155
+ }
2156
+ const envUrl = process.env.NEXT_PUBLIC_SITE_URL || process.env.SITE_URL || process.env.BASE_URL;
2157
+ if (envUrl) {
2158
+ return envUrl;
2159
+ }
2160
+ throw new Error(
2161
+ "URL is required. Provide it via options.url or set NEXT_PUBLIC_SITE_URL environment variable."
2162
+ );
2163
+ }
2164
+ function isExternalUrl(linkUrl, baseUrl) {
2165
+ try {
2166
+ const link = new URL(linkUrl);
2167
+ const base = new URL(baseUrl);
2168
+ return link.hostname !== base.hostname;
2169
+ } catch {
2170
+ return true;
2171
+ }
2172
+ }
2173
+ async function checkLinks(options) {
2174
+ const url = getSiteUrl(options);
2175
+ const {
2176
+ timeout = 6e4,
2177
+ skipPattern = DEFAULT_SKIP_PATTERN,
2178
+ showOnlyBroken = true,
2179
+ concurrency = 50,
2180
+ outputFile,
2181
+ reportFormat = "text",
2182
+ verbose = true
2183
+ } = options;
2184
+ const startTime = Date.now();
2185
+ if (verbose) {
2186
+ console.log(chalk.cyan(`
2187
+ \u{1F50D} Starting link check for: ${chalk.bold(url)}`));
2188
+ console.log(chalk.dim(` Timeout: ${timeout}ms | Concurrency: ${concurrency}`));
2189
+ console.log("");
2190
+ }
2191
+ const skipRegex = new RegExp(skipPattern);
2192
+ const checkOptions = {
2193
+ path: url,
2194
+ recurse: true,
2195
+ timeout,
2196
+ concurrency,
2197
+ linksToSkip: (link) => {
2198
+ return Promise.resolve(skipRegex.test(link));
2199
+ }
2200
+ };
2201
+ const broken = [];
2202
+ const internalErrors = [];
2203
+ const externalErrors = [];
2204
+ let total = 0;
2205
+ try {
2206
+ const results = await linkinator.check(checkOptions);
2207
+ for (const result2 of results.links) {
2208
+ total++;
2209
+ const status = result2.status || 0;
2210
+ const isExternal = isExternalUrl(result2.url, url);
2211
+ if (status < 200 || status >= 400 || result2.state === "BROKEN") {
2212
+ const statusValue = status || "TIMEOUT";
2213
+ if (statusValue === "TIMEOUT" && isExternal) {
2214
+ continue;
2215
+ }
2216
+ const brokenLink = {
2217
+ url: result2.url,
2218
+ status: statusValue,
2219
+ reason: result2.state === "BROKEN" ? "BROKEN" : void 0,
2220
+ isExternal,
2221
+ sourceUrl: result2.parent
2222
+ };
2223
+ broken.push(brokenLink);
2224
+ if (isExternal) {
2225
+ externalErrors.push(brokenLink);
2226
+ } else {
2227
+ internalErrors.push(brokenLink);
2228
+ }
2229
+ }
2230
+ }
2231
+ const success = internalErrors.length === 0;
2232
+ if (!showOnlyBroken || broken.length > 0) {
2233
+ if (success && externalErrors.length === 0) {
2234
+ console.log(`\u2705 All links are valid!`);
2235
+ console.log(` Checked ${total} links.`);
2236
+ } else {
2237
+ if (internalErrors.length > 0) {
2238
+ console.log(chalk.red(`\u274C Found ${internalErrors.length} broken internal links:`));
2239
+ for (const { url: linkUrl, status, reason } of internalErrors.slice(0, 20)) {
2240
+ console.log(` [${status}] ${linkUrl}${reason ? ` (${reason})` : ""}`);
2241
+ }
2242
+ if (internalErrors.length > 20) {
2243
+ console.log(chalk.dim(` ... and ${internalErrors.length - 20} more`));
2244
+ }
2245
+ }
2246
+ if (externalErrors.length > 0) {
2247
+ console.log("");
2248
+ console.log(chalk.yellow(`\u26A0\uFE0F Found ${externalErrors.length} broken external links:`));
2249
+ for (const { url: linkUrl, status } of externalErrors.slice(0, 10)) {
2250
+ console.log(` [${status}] ${linkUrl}`);
2251
+ }
2252
+ if (externalErrors.length > 10) {
2253
+ console.log(chalk.dim(` ... and ${externalErrors.length - 10} more`));
2254
+ }
2255
+ }
2256
+ }
2257
+ }
2258
+ const duration = Date.now() - startTime;
2259
+ const result = {
2260
+ success,
2261
+ broken: broken.length,
2262
+ total,
2263
+ errors: broken,
2264
+ internalErrors,
2265
+ externalErrors,
2266
+ url,
2267
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2268
+ duration
2269
+ };
2270
+ if (outputFile) {
2271
+ await saveReport(result, outputFile, reportFormat);
2272
+ console.log(chalk.green(`
2273
+ \u{1F4C4} Report saved to: ${chalk.cyan(outputFile)}`));
2274
+ }
2275
+ return result;
2276
+ } catch (error) {
2277
+ const errorMessage = error instanceof Error ? error.message : String(error);
2278
+ const errorName = error instanceof Error ? error.name : "UnknownError";
2279
+ if (errorMessage.includes("timeout") || errorMessage.includes("TimeoutError") || errorName === "TimeoutError" || errorMessage.includes("aborted")) {
2280
+ console.warn(chalk.yellow(`\u26A0\uFE0F Some links timed out after ${timeout}ms`));
2281
+ console.warn(chalk.dim(` This is normal for slow or protected URLs.`));
2282
+ if (total > 0) {
2283
+ console.warn(chalk.dim(` Checked ${total} links before timeout.`));
2284
+ }
2285
+ if (broken.length > 0) {
2286
+ console.log(chalk.red(`
2287
+ \u274C Found ${broken.length} broken links:`));
2288
+ for (const { url: url2, status, reason } of broken) {
2289
+ const statusColor = typeof status === "number" && status >= 500 ? chalk.red : chalk.yellow;
2290
+ console.log(
2291
+ ` ${statusColor(`[${status}]`)} ${chalk.cyan(url2)}${reason ? chalk.dim(` (${reason})`) : ""}`
2292
+ );
2293
+ }
2294
+ }
2295
+ } else {
2296
+ console.error(chalk.red(`\u274C Error checking links: ${errorMessage}`));
2297
+ }
2298
+ const duration = Date.now() - startTime;
2299
+ const result = {
2300
+ success: internalErrors.length === 0 && total > 0,
2301
+ broken: broken.length,
2302
+ total,
2303
+ errors: broken,
2304
+ internalErrors,
2305
+ externalErrors,
2306
+ url,
2307
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2308
+ duration
2309
+ };
2310
+ if (outputFile) {
2311
+ try {
2312
+ await saveReport(result, outputFile, reportFormat);
2313
+ console.log(chalk.green(`
2314
+ \u{1F4C4} Report saved to: ${chalk.cyan(outputFile)}`));
2315
+ } catch (saveError) {
2316
+ console.warn(
2317
+ chalk.yellow(
2318
+ `
2319
+ \u26A0\uFE0F Failed to save report: ${saveError instanceof Error ? saveError.message : String(saveError)}`
2320
+ )
2321
+ );
2322
+ }
2323
+ }
2324
+ return result;
2325
+ }
2326
+ }
2327
+ function linkResultsToSeoIssues(result) {
2328
+ const issues = [];
2329
+ for (const error of result.internalErrors) {
2330
+ issues.push({
2331
+ id: `broken-internal-link-${hash4(error.url)}`,
2332
+ url: error.url,
2333
+ category: "technical",
2334
+ severity: typeof error.status === "number" && error.status >= 500 ? "critical" : "error",
2335
+ title: `Broken internal link: ${error.status}`,
2336
+ description: `Internal link returned ${error.status} status${error.reason ? ` (${error.reason})` : ""}.`,
2337
+ recommendation: "Fix the internal link. This affects user experience and SEO.",
2338
+ detectedAt: result.timestamp,
2339
+ metadata: {
2340
+ status: error.status,
2341
+ reason: error.reason,
2342
+ sourceUrl: error.sourceUrl || result.url,
2343
+ isExternal: false
2344
+ }
2345
+ });
2346
+ }
2347
+ for (const error of result.externalErrors) {
2348
+ issues.push({
2349
+ id: `broken-external-link-${hash4(error.url)}`,
2350
+ url: error.url,
2351
+ category: "technical",
2352
+ severity: "warning",
2353
+ title: `Broken external link: ${error.status}`,
2354
+ description: `External link returned ${error.status} status.`,
2355
+ recommendation: "Consider removing or updating the external link.",
2356
+ detectedAt: result.timestamp,
2357
+ metadata: {
2358
+ status: error.status,
2359
+ reason: error.reason,
2360
+ sourceUrl: error.sourceUrl || result.url,
2361
+ isExternal: true
2362
+ }
2363
+ });
2364
+ }
2365
+ return issues;
2366
+ }
2367
+ async function saveReport(result, filePath, format) {
2368
+ const dir = dirname(filePath);
2369
+ if (dir !== ".") {
2370
+ await mkdir(dir, { recursive: true });
2371
+ }
2372
+ let content;
2373
+ switch (format) {
2374
+ case "json":
2375
+ content = JSON.stringify(result, null, 2);
2376
+ break;
2377
+ case "markdown":
2378
+ content = generateMarkdownReport2(result);
2379
+ break;
2380
+ case "text":
2381
+ default:
2382
+ content = generateTextReport(result);
2383
+ break;
2384
+ }
2385
+ await writeFile(filePath, content, "utf-8");
2386
+ }
2387
+ function generateMarkdownReport2(result) {
2388
+ const lines = [];
2389
+ lines.push("# Link Check Report");
2390
+ lines.push("");
2391
+ lines.push(`**URL:** ${result.url}`);
2392
+ lines.push(`**Timestamp:** ${result.timestamp}`);
2393
+ if (result.duration) {
2394
+ lines.push(`**Duration:** ${(result.duration / 1e3).toFixed(2)}s`);
2395
+ }
2396
+ lines.push("");
2397
+ lines.push(
2398
+ `**Status:** ${result.success ? "\u2705 All links valid" : "\u274C Broken links found"}`
2399
+ );
2400
+ lines.push(`**Total links:** ${result.total}`);
2401
+ lines.push(`**Broken links:** ${result.broken}`);
2402
+ lines.push("");
2403
+ if (result.errors.length > 0) {
2404
+ lines.push("## Broken Links");
2405
+ lines.push("");
2406
+ lines.push("| Status | URL | Reason |");
2407
+ lines.push("|--------|-----|--------|");
2408
+ for (const { url, status, reason } of result.errors) {
2409
+ lines.push(`| ${status} | ${url} | ${reason || "-"} |`);
2410
+ }
2411
+ lines.push("");
2412
+ }
2413
+ return lines.join("\n");
2414
+ }
2415
+ function generateTextReport(result) {
2416
+ const lines = [];
2417
+ lines.push("Link Check Report");
2418
+ lines.push("=".repeat(50));
2419
+ lines.push(`URL: ${result.url}`);
2420
+ lines.push(`Timestamp: ${result.timestamp}`);
2421
+ if (result.duration) {
2422
+ lines.push(`Duration: ${(result.duration / 1e3).toFixed(2)}s`);
2423
+ }
2424
+ lines.push("");
2425
+ lines.push(
2426
+ `Status: ${result.success ? "\u2705 All links valid" : "\u274C Broken links found"}`
2427
+ );
2428
+ lines.push(`Total links: ${result.total}`);
2429
+ lines.push(`Broken links: ${result.broken}`);
2430
+ lines.push("");
2431
+ if (result.errors.length > 0) {
2432
+ lines.push("Broken Links:");
2433
+ lines.push("-".repeat(50));
2434
+ for (const { url, status, reason } of result.errors) {
2435
+ lines.push(`[${status}] ${url}${reason ? ` (${reason})` : ""}`);
2436
+ }
2437
+ lines.push("");
2438
+ }
2439
+ return lines.join("\n");
2440
+ }
2441
+ function hash4(str) {
2442
+ let h = 0;
2443
+ for (let i = 0; i < str.length; i++) {
2444
+ const char = str.charCodeAt(i);
2445
+ h = (h << 5) - h + char;
2446
+ h = h & h;
2447
+ }
2448
+ return Math.abs(h).toString(36);
2449
+ }
2450
+ function loadUrlsFromFile(filePath) {
2451
+ if (!existsSync(filePath)) {
2452
+ throw new Error(`File not found: ${filePath}`);
2453
+ }
2454
+ const content = readFileSync(filePath, "utf-8");
2455
+ return content.split("\n").map((line) => line.trim()).filter((line) => line && !line.startsWith("#"));
2456
+ }
2457
+ function normalizeUrl(url, baseUrl) {
2458
+ try {
2459
+ const parsed = new URL(url, baseUrl);
2460
+ parsed.hash = "";
2461
+ if (parsed.pathname.endsWith("/") && parsed.pathname !== "/") {
2462
+ parsed.pathname = parsed.pathname.slice(0, -1);
2463
+ }
2464
+ return parsed.href;
2465
+ } catch {
2466
+ return url;
2467
+ }
2468
+ }
2469
+ function isSameDomain(url, baseUrl) {
2470
+ try {
2471
+ const urlHost = new URL(url).hostname;
2472
+ const baseHost = new URL(baseUrl).hostname;
2473
+ return urlHost === baseHost;
2474
+ } catch {
2475
+ return false;
2476
+ }
2477
+ }
2478
+ function formatBytes(bytes) {
2479
+ if (bytes === 0) return "0 B";
2480
+ const units = ["B", "KB", "MB", "GB"];
2481
+ const k = 1024;
2482
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
2483
+ return `${parseFloat((bytes / Math.pow(k, i)).toFixed(2))} ${units[i]}`;
2484
+ }
2485
+ function formatDuration(ms) {
2486
+ if (ms < 1e3) return `${ms}ms`;
2487
+ if (ms < 6e4) return `${(ms / 1e3).toFixed(1)}s`;
2488
+ return `${Math.floor(ms / 6e4)}m ${Math.round(ms % 6e4 / 1e3)}s`;
2489
+ }
2490
+ function chunk(array, size) {
2491
+ const chunks = [];
2492
+ for (let i = 0; i < array.length; i += size) {
2493
+ chunks.push(array.slice(i, i + size));
2494
+ }
2495
+ return chunks;
2496
+ }
2497
+ function sleep(ms) {
2498
+ return new Promise((resolve) => setTimeout(resolve, ms));
2499
+ }
2500
+ async function retry(fn, options = {}) {
2501
+ const { retries = 3, minTimeout = 1e3, maxTimeout = 3e4, factor = 2 } = options;
2502
+ let lastError;
2503
+ let timeout = minTimeout;
2504
+ for (let attempt = 0; attempt <= retries; attempt++) {
2505
+ try {
2506
+ return await fn();
2507
+ } catch (error) {
2508
+ lastError = error;
2509
+ if (attempt < retries) {
2510
+ await sleep(timeout);
2511
+ timeout = Math.min(timeout * factor, maxTimeout);
2512
+ }
2513
+ }
2514
+ }
2515
+ throw lastError;
2516
+ }
2517
+ function detectProjectType(cwd) {
2518
+ const contentDir = path.join(cwd, "content");
2519
+ const appDir = path.join(cwd, "app");
2520
+ if (fs.existsSync(contentDir)) {
2521
+ if (hasMetaFiles(contentDir)) {
2522
+ return "nextra";
2523
+ }
2524
+ }
2525
+ if (fs.existsSync(appDir)) {
2526
+ return "nextjs";
2527
+ }
2528
+ return "unknown";
2529
+ }
2530
+ function hasMetaFiles(dir) {
2531
+ if (!fs.existsSync(dir)) return false;
2532
+ const entries = fs.readdirSync(dir, { withFileTypes: true });
2533
+ for (const entry of entries) {
2534
+ if (entry.name === "_meta.ts" || entry.name === "_meta.tsx") {
2535
+ return true;
2536
+ }
2537
+ if (entry.isDirectory() && !entry.name.startsWith(".")) {
2538
+ const subDir = path.join(dir, entry.name);
2539
+ if (hasMetaFiles(subDir)) {
2540
+ return true;
2541
+ }
2542
+ }
2543
+ }
2544
+ return false;
2545
+ }
2546
+ function scanProject(cwd, config) {
2547
+ const contentDir = path.join(cwd, config?.contentDir || "content");
2548
+ const appDir = path.join(cwd, config?.appDir || "app");
2549
+ const extensions = config?.extensions || [".mdx", ".md"];
2550
+ const hasContent = fs.existsSync(contentDir);
2551
+ const hasApp = fs.existsSync(appDir);
2552
+ const projectType = detectProjectType(cwd);
2553
+ const mdxFiles = hasContent ? getAllFiles(contentDir, extensions) : [];
2554
+ const pageFiles = hasApp ? getPageFiles(appDir) : [];
2555
+ return {
2556
+ projectType,
2557
+ hasContent,
2558
+ hasApp,
2559
+ mdxFiles,
2560
+ pageFiles
2561
+ };
2562
+ }
2563
+ function getAllFiles(dir, extensions, files = []) {
2564
+ if (!fs.existsSync(dir)) return files;
2565
+ const entries = fs.readdirSync(dir, { withFileTypes: true });
2566
+ for (const entry of entries) {
2567
+ const fullPath = path.join(dir, entry.name);
2568
+ if (entry.isDirectory()) {
2569
+ if (!entry.name.startsWith(".") && entry.name !== "node_modules") {
2570
+ getAllFiles(fullPath, extensions, files);
2571
+ }
2572
+ } else {
2573
+ const ext = path.extname(entry.name).toLowerCase();
2574
+ if (extensions.includes(ext)) {
2575
+ files.push(fullPath);
2576
+ }
2577
+ }
2578
+ }
2579
+ return files;
2580
+ }
2581
+ function getAllMdxFiles(contentDir) {
2582
+ return getAllFiles(contentDir, [".mdx", ".md"]);
2583
+ }
2584
+ function getPageFiles(appDir) {
2585
+ const pages = [];
2586
+ function scan(dir) {
2587
+ if (!fs.existsSync(dir)) return;
2588
+ const entries = fs.readdirSync(dir, { withFileTypes: true });
2589
+ for (const entry of entries) {
2590
+ if (entry.name.startsWith("_") || entry.name.startsWith(".")) continue;
2591
+ const fullPath = path.join(dir, entry.name);
2592
+ if (entry.isDirectory()) {
2593
+ scan(fullPath);
2594
+ } else if (entry.name === "page.tsx" || entry.name === "page.ts") {
2595
+ pages.push(fullPath);
2596
+ }
2597
+ }
2598
+ }
2599
+ scan(appDir);
2600
+ return pages;
2601
+ }
2602
+ function getFileInfo(filePath, contentDir) {
2603
+ const relativePath = path.relative(contentDir, filePath);
2604
+ const parsed = path.parse(relativePath);
2605
+ const isIndex = parsed.name === "index";
2606
+ const folder = parsed.dir || "";
2607
+ return {
2608
+ fullPath: filePath,
2609
+ relativePath,
2610
+ isIndex,
2611
+ folder,
2612
+ name: parsed.name
2613
+ };
2614
+ }
2615
+ function pathExists(docsPath, contentDir) {
2616
+ const cleanPath = docsPath.replace(/\/$/, "").replace(/^\//, "");
2617
+ const candidates = [
2618
+ path.join(contentDir, cleanPath + ".mdx"),
2619
+ path.join(contentDir, cleanPath + ".md"),
2620
+ path.join(contentDir, cleanPath, "index.mdx"),
2621
+ path.join(contentDir, cleanPath, "index.md")
2622
+ ];
2623
+ return candidates.some((p) => fs.existsSync(p));
2624
+ }
2625
+ var LINK_PATTERNS = [
2626
+ // Absolute links: [text](/docs/path)
2627
+ { regex: /\]\(\/docs\/([^)#\s"]+)/g, type: "absolute" },
2628
+ { regex: /href="\/docs\/([^"#]+)"/g, type: "absolute" },
2629
+ { regex: /to="\/docs\/([^"#]+)"/g, type: "absolute" },
2630
+ // Dot-slash relative: [text](./path)
2631
+ { regex: /\]\(\.\/([^)#\s"]+)(?:#[^)]*)?\)/g, type: "dotslash" },
2632
+ { regex: /href="\.\/([^"#]+)"/g, type: "dotslash" },
2633
+ // Parent relative: [text](../path)
2634
+ { regex: /\]\(\.\.\/([^)#\s"]+)(?:#[^)]*)?\)/g, type: "parent" },
2635
+ { regex: /href="\.\.\/([^"#]+)"/g, type: "parent" },
2636
+ // Simple relative (no prefix): [text](path)
2637
+ { regex: /\]\((?!\/|http|#|\.|\[)([a-zA-Z][^)#\s"]*)(?:#[^)]*)?\)/g, type: "simple" },
2638
+ { regex: /href="(?!\/|http|#|\.)([a-zA-Z][^"#]*)"/g, type: "simple" }
2639
+ ];
2640
+ function isAssetLink(linkPath, assetExtensions) {
2641
+ return assetExtensions.some((ext) => linkPath.toLowerCase().endsWith(ext));
2642
+ }
2643
+ function resolveLink(fromFilePath, linkPath, linkType, contentDir) {
2644
+ if (linkType === "absolute") {
2645
+ return linkPath;
2646
+ }
2647
+ const { isIndex, folder: sourceFolder, name: fileName } = getFileInfo(fromFilePath, contentDir);
2648
+ const sourceParts = sourceFolder ? sourceFolder.split("/") : [];
2649
+ if (linkType === "dotslash" || linkType === "simple") {
2650
+ if (isIndex) {
2651
+ return sourceFolder ? `${sourceFolder}/${linkPath}` : linkPath;
2652
+ } else {
2653
+ return sourceFolder ? `${sourceFolder}/${fileName}/${linkPath}` : `${fileName}/${linkPath}`;
2654
+ }
2655
+ }
2656
+ if (linkType === "parent") {
2657
+ if (isIndex) {
2658
+ const newParts = [...sourceParts];
2659
+ newParts.pop();
2660
+ return newParts.length ? `${newParts.join("/")}/${linkPath}` : linkPath;
2661
+ } else {
2662
+ return sourceParts.length ? `${sourceParts.join("/")}/${linkPath}` : linkPath;
2663
+ }
2664
+ }
2665
+ return linkPath;
2666
+ }
2667
+ function extractLinks(filePath, contentDir, assetExtensions) {
2668
+ const content = fs.readFileSync(filePath, "utf-8");
2669
+ const links = [];
2670
+ for (const { regex, type } of LINK_PATTERNS) {
2671
+ regex.lastIndex = 0;
2672
+ let match;
2673
+ while ((match = regex.exec(content)) !== null) {
2674
+ const rawLink = match[1];
2675
+ if (!rawLink) continue;
2676
+ if (isAssetLink(rawLink, assetExtensions)) continue;
2677
+ const resolved = resolveLink(filePath, rawLink, type, contentDir);
2678
+ links.push({
2679
+ raw: rawLink,
2680
+ resolved,
2681
+ type,
2682
+ line: content.substring(0, match.index).split("\n").length
2683
+ });
2684
+ }
2685
+ }
2686
+ return links;
2687
+ }
2688
+ function checkContentLinks(contentDir, config) {
2689
+ const assetExtensions = config?.assetExtensions || [
2690
+ ".png",
2691
+ ".jpg",
2692
+ ".jpeg",
2693
+ ".gif",
2694
+ ".svg",
2695
+ ".webp",
2696
+ ".ico",
2697
+ ".pdf",
2698
+ ".zip",
2699
+ ".tar",
2700
+ ".gz"
2701
+ ];
2702
+ const basePath = config?.basePath || "/docs";
2703
+ const files = getAllMdxFiles(contentDir);
2704
+ const brokenLinks = [];
2705
+ const checkedLinks = /* @__PURE__ */ new Map();
2706
+ for (const file of files) {
2707
+ const links = extractLinks(file, contentDir, assetExtensions);
2708
+ const relativePath = path.relative(contentDir, file);
2709
+ for (const link of links) {
2710
+ if (!checkedLinks.has(link.resolved)) {
2711
+ checkedLinks.set(link.resolved, pathExists(link.resolved, contentDir));
2712
+ }
2713
+ if (!checkedLinks.get(link.resolved)) {
2714
+ brokenLinks.push({
2715
+ file: relativePath,
2716
+ link: `${basePath}/${link.resolved}`,
2717
+ type: link.type,
2718
+ raw: link.raw,
2719
+ line: link.line
2720
+ });
2721
+ }
2722
+ }
2723
+ }
2724
+ return {
2725
+ filesChecked: files.length,
2726
+ uniqueLinks: checkedLinks.size,
2727
+ brokenLinks,
2728
+ success: brokenLinks.length === 0
2729
+ };
2730
+ }
2731
+ function groupBrokenLinksByFile(brokenLinks) {
2732
+ const byFile = /* @__PURE__ */ new Map();
2733
+ for (const link of brokenLinks) {
2734
+ const existing = byFile.get(link.file) || [];
2735
+ existing.push(link);
2736
+ byFile.set(link.file, existing);
2737
+ }
2738
+ return byFile;
2739
+ }
2740
+ function isAssetLink2(linkPath, assetExtensions) {
2741
+ return assetExtensions.some((ext) => linkPath.toLowerCase().endsWith(ext));
2742
+ }
2743
+ function calculateRelativePath(sourceFile, targetDocsPath, contentDir) {
2744
+ const { isIndex, folder: sourceFolder } = getFileInfo(sourceFile, contentDir);
2745
+ const targetPath = targetDocsPath.replace(/^\//, "");
2746
+ const sourceParts = sourceFolder ? sourceFolder.split("/") : [];
2747
+ const targetParts = targetPath.split("/");
2748
+ let commonLength = 0;
2749
+ for (let i = 0; i < Math.min(sourceParts.length, targetParts.length); i++) {
2750
+ if (sourceParts[i] === targetParts[i]) {
2751
+ commonLength++;
2752
+ } else {
2753
+ break;
2754
+ }
2755
+ }
2756
+ if (isIndex) {
2757
+ if (targetPath.startsWith(sourceFolder + "/") || sourceFolder === "") {
2758
+ const relative2 = sourceFolder ? targetPath.slice(sourceFolder.length + 1) : targetPath;
2759
+ return "./" + relative2;
2760
+ }
2761
+ const upsNeeded = sourceParts.length - commonLength;
2762
+ const downs = targetParts.slice(commonLength);
2763
+ if (upsNeeded <= 1) {
2764
+ return "../".repeat(upsNeeded) + downs.join("/");
2765
+ }
2766
+ return null;
2767
+ } else {
2768
+ if (targetPath.startsWith(sourceFolder + "/") || sourceFolder === "") {
2769
+ const relative2 = sourceFolder ? targetPath.slice(sourceFolder.length + 1) : targetPath;
2770
+ if (!relative2.includes("/")) {
2771
+ return "../" + relative2;
2772
+ }
2773
+ return "../" + relative2;
2774
+ }
2775
+ const upsNeeded = sourceParts.length - commonLength + 1;
2776
+ const downs = targetParts.slice(commonLength);
2777
+ if (upsNeeded <= 2) {
2778
+ return "../".repeat(upsNeeded) + downs.join("/");
2779
+ }
2780
+ return null;
2781
+ }
2782
+ }
2783
+ function processFile(filePath, contentDir, assetExtensions) {
2784
+ const content = fs.readFileSync(filePath, "utf-8");
2785
+ const fixes = [];
2786
+ const patterns = [
2787
+ { regex: /(\]\()\/docs\/([^)#\s"]+)(\))/g },
2788
+ { regex: /(href=")\/docs\/([^"#]+)(")/g }
2789
+ ];
2790
+ for (const { regex } of patterns) {
2791
+ regex.lastIndex = 0;
2792
+ let match;
2793
+ while ((match = regex.exec(content)) !== null) {
2794
+ const targetPath = match[2];
2795
+ if (!targetPath) continue;
2796
+ if (isAssetLink2(targetPath, assetExtensions)) continue;
2797
+ if (!pathExists(targetPath, contentDir)) continue;
2798
+ const relativePath = calculateRelativePath(filePath, targetPath, contentDir);
2799
+ if (relativePath) {
2800
+ fixes.push({
2801
+ from: `/docs/${targetPath}`,
2802
+ to: relativePath,
2803
+ line: content.substring(0, match.index).split("\n").length
2804
+ });
2805
+ }
2806
+ }
2807
+ }
2808
+ return fixes;
2809
+ }
2810
+ function applyFixes(filePath, fixes) {
2811
+ let content = fs.readFileSync(filePath, "utf-8");
2812
+ for (const { from, to } of fixes) {
2813
+ content = content.split(from).join(to);
2814
+ }
2815
+ fs.writeFileSync(filePath, content, "utf-8");
2816
+ }
2817
+ function fixContentLinks(contentDir, options = {}) {
2818
+ const { apply = false, config } = options;
2819
+ const assetExtensions = config?.assetExtensions || [
2820
+ ".png",
2821
+ ".jpg",
2822
+ ".jpeg",
2823
+ ".gif",
2824
+ ".svg",
2825
+ ".webp",
2826
+ ".ico",
2827
+ ".pdf"
2828
+ ];
2829
+ const files = getAllMdxFiles(contentDir);
2830
+ let totalChanges = 0;
2831
+ const fileChanges = [];
2832
+ for (const file of files) {
2833
+ const fixes = processFile(file, contentDir, assetExtensions);
2834
+ if (fixes.length > 0) {
2835
+ const relativePath = path.relative(contentDir, file);
2836
+ fileChanges.push({
2837
+ file: relativePath,
2838
+ fullPath: file,
2839
+ fixes
2840
+ });
2841
+ totalChanges += fixes.length;
2842
+ if (apply) {
2843
+ applyFixes(file, fixes);
2844
+ }
2845
+ }
2846
+ }
2847
+ return {
2848
+ totalChanges,
2849
+ fileChanges,
2850
+ applied: apply
2851
+ };
2852
+ }
2853
+ var PAGE_FILES = ["page.tsx", "page.ts", "page.jsx", "page.js"];
2854
+ var ROUTE_FILES = ["route.tsx", "route.ts", "route.jsx", "route.js"];
2855
+ var SPECIAL_FILES = ["layout", "loading", "error", "not-found", "template"];
2856
+ function findAppDir(startDir = process.cwd()) {
2857
+ const candidates = [
2858
+ join(startDir, "app"),
2859
+ join(startDir, "src", "app")
2860
+ ];
2861
+ for (const dir of candidates) {
2862
+ if (existsSync(dir) && statSync(dir).isDirectory()) {
2863
+ return dir;
2864
+ }
2865
+ }
2866
+ return null;
2867
+ }
2868
+ function scanRoutes(options = {}) {
2869
+ const {
2870
+ appDir = findAppDir() || "./app",
2871
+ includeApi = true,
2872
+ includeSpecial = false
2873
+ } = options;
2874
+ if (!existsSync(appDir)) {
2875
+ throw new Error(`App directory not found: ${appDir}`);
2876
+ }
2877
+ const routes = [];
2878
+ scanDirectory(appDir, "", routes, { includeApi, includeSpecial });
2879
+ return {
2880
+ routes,
2881
+ staticRoutes: routes.filter((r) => !r.isDynamic && r.type === "page"),
2882
+ dynamicRoutes: routes.filter((r) => r.isDynamic && r.type === "page"),
2883
+ apiRoutes: routes.filter((r) => r.type === "api"),
2884
+ appDir
2885
+ };
2886
+ }
2887
+ function scanDirectory(dir, routePath, routes, options) {
2888
+ let entries;
2889
+ try {
2890
+ entries = readdirSync(dir);
2891
+ } catch {
2892
+ return;
2893
+ }
2894
+ for (const entry of entries) {
2895
+ const fullPath = join(dir, entry);
2896
+ let stat;
2897
+ try {
2898
+ stat = statSync(fullPath);
2899
+ } catch {
2900
+ continue;
2901
+ }
2902
+ if (stat.isDirectory()) {
2903
+ if (entry.startsWith("_") || entry.startsWith(".")) continue;
2904
+ if (entry.startsWith("(") && entry.endsWith(")")) {
2905
+ scanDirectory(fullPath, routePath, routes, options);
2906
+ continue;
2907
+ }
2908
+ if (entry.startsWith("@")) {
2909
+ scanDirectory(fullPath, routePath, routes, options);
2910
+ continue;
2911
+ }
2912
+ if (entry.startsWith("(") && !entry.endsWith(")")) {
2913
+ continue;
2914
+ }
2915
+ const segment = processSegment(entry);
2916
+ const newRoutePath = routePath + "/" + segment.urlSegment;
2917
+ scanDirectory(fullPath, newRoutePath, routes, options);
2918
+ } else if (stat.isFile()) {
2919
+ if (PAGE_FILES.includes(entry)) {
2920
+ const route = createRouteInfo(routePath || "/", dir, "page");
2921
+ routes.push(route);
2922
+ }
2923
+ if (options.includeApi && ROUTE_FILES.includes(entry)) {
2924
+ const route = createRouteInfo(routePath || "/", dir, "api");
2925
+ routes.push(route);
2926
+ }
2927
+ if (options.includeSpecial) {
2928
+ const baseName = entry.replace(/\.(tsx?|jsx?|js)$/, "");
2929
+ if (SPECIAL_FILES.includes(baseName)) {
2930
+ const route = createRouteInfo(routePath || "/", dir, baseName);
2931
+ routes.push(route);
2932
+ }
2933
+ }
2934
+ }
2935
+ }
2936
+ }
2937
+ function processSegment(segment) {
2938
+ if (segment.startsWith("[[...") && segment.endsWith("]]")) {
2939
+ const paramName = segment.slice(5, -2);
2940
+ return {
2941
+ urlSegment: segment,
2942
+ isDynamic: true,
2943
+ paramName,
2944
+ isCatchAll: true,
2945
+ isOptionalCatchAll: true
2946
+ };
2947
+ }
2948
+ if (segment.startsWith("[...") && segment.endsWith("]")) {
2949
+ const paramName = segment.slice(4, -1);
2950
+ return {
2951
+ urlSegment: segment,
2952
+ isDynamic: true,
2953
+ paramName,
2954
+ isCatchAll: true,
2955
+ isOptionalCatchAll: false
2956
+ };
2957
+ }
2958
+ if (segment.startsWith("[") && segment.endsWith("]")) {
2959
+ const paramName = segment.slice(1, -1);
2960
+ return {
2961
+ urlSegment: segment,
2962
+ isDynamic: true,
2963
+ paramName,
2964
+ isCatchAll: false,
2965
+ isOptionalCatchAll: false
2966
+ };
2967
+ }
2968
+ return {
2969
+ urlSegment: segment,
2970
+ isDynamic: false,
2971
+ isCatchAll: false,
2972
+ isOptionalCatchAll: false
2973
+ };
2974
+ }
2975
+ function createRouteInfo(path5, filePath, type) {
2976
+ const segments = path5.split("/").filter(Boolean);
2977
+ const dynamicSegments = [];
2978
+ let isDynamic = false;
2979
+ let isCatchAll = false;
2980
+ let isOptionalCatchAll = false;
2981
+ for (const segment of segments) {
2982
+ const info = processSegment(segment);
2983
+ if (info.isDynamic) {
2984
+ isDynamic = true;
2985
+ if (info.paramName) {
2986
+ dynamicSegments.push(info.paramName);
2987
+ }
2988
+ if (info.isCatchAll) isCatchAll = true;
2989
+ if (info.isOptionalCatchAll) isOptionalCatchAll = true;
2990
+ }
2991
+ }
2992
+ let routeGroup;
2993
+ const groupMatch = filePath.match(/\(([^)]+)\)/);
2994
+ if (groupMatch) {
2995
+ routeGroup = groupMatch[1];
2996
+ }
2997
+ return {
2998
+ path: path5 || "/",
2999
+ filePath,
3000
+ type,
3001
+ isDynamic,
3002
+ dynamicSegments,
3003
+ isCatchAll,
3004
+ isOptionalCatchAll,
3005
+ routeGroup
3006
+ };
3007
+ }
3008
+ function routeToUrl(route, params) {
3009
+ let url = route.path;
3010
+ for (const segment of route.dynamicSegments) {
3011
+ const value = params?.[segment] || `{${segment}}`;
3012
+ if (route.isOptionalCatchAll) {
3013
+ url = url.replace(`[[...${segment}]]`, value);
3014
+ } else if (route.isCatchAll) {
3015
+ url = url.replace(`[...${segment}]`, value);
3016
+ } else {
3017
+ url = url.replace(`[${segment}]`, value);
3018
+ }
3019
+ }
3020
+ return url;
3021
+ }
3022
+ function getStaticUrls(scanResult, baseUrl) {
3023
+ return scanResult.staticRoutes.map((route) => {
3024
+ const url = new URL(route.path, baseUrl);
3025
+ return url.href;
3026
+ });
3027
+ }
3028
+
3029
+ // src/content/sitemap-generator.ts
3030
+ async function getMeta(dir) {
3031
+ const metaPath = path.join(dir, "_meta.ts");
3032
+ if (!fs.existsSync(metaPath)) return {};
3033
+ try {
3034
+ const meta = await import(metaPath);
3035
+ return meta.default || {};
3036
+ } catch {
3037
+ try {
3038
+ const content = fs.readFileSync(metaPath, "utf-8");
3039
+ const matches = content.matchAll(/'([^']+)':\s*['"]([^'"]+)['"]/g);
3040
+ const result = {};
3041
+ for (const match of matches) {
3042
+ const key = match[1];
3043
+ const value = match[2];
3044
+ if (key && value) {
3045
+ result[key] = value;
3046
+ }
3047
+ }
3048
+ return result;
3049
+ } catch {
3050
+ return {};
3051
+ }
3052
+ }
3053
+ }
3054
+ function getTitleFromMeta(key, meta) {
3055
+ const value = meta[key];
3056
+ if (!value) return key;
3057
+ if (typeof value === "string") return value;
3058
+ if (typeof value === "object" && value !== null && "title" in value) {
3059
+ return String(value.title);
3060
+ }
3061
+ return key;
3062
+ }
3063
+ async function scanContent(dir, baseUrl = "/docs") {
3064
+ const items = [];
3065
+ if (!fs.existsSync(dir)) return items;
3066
+ const meta = await getMeta(dir);
3067
+ const entries = fs.readdirSync(dir, { withFileTypes: true });
3068
+ const fileMap = /* @__PURE__ */ new Map();
3069
+ for (const entry of entries) {
3070
+ const key = entry.name.replace(/\.mdx?$/, "");
3071
+ fileMap.set(key, entry);
3072
+ }
3073
+ const metaKeys = Object.keys(meta);
3074
+ for (const key of metaKeys) {
3075
+ const entry = fileMap.get(key);
3076
+ if (!entry) continue;
3077
+ fileMap.delete(key);
3078
+ const fullPath = path.join(dir, entry.name);
3079
+ const itemPath = path.join(baseUrl, key).replace(/\\/g, "/");
3080
+ if (entry.isDirectory()) {
3081
+ const children = await scanContent(fullPath, itemPath);
3082
+ items.push({
3083
+ title: getTitleFromMeta(key, meta),
3084
+ path: itemPath,
3085
+ children: children.length > 0 ? children : void 0
3086
+ });
3087
+ } else if (entry.isFile() && (entry.name.endsWith(".md") || entry.name.endsWith(".mdx"))) {
3088
+ if (entry.name !== "index.mdx" && entry.name !== "index.md") {
3089
+ items.push({
3090
+ title: getTitleFromMeta(key, meta),
3091
+ path: itemPath
3092
+ });
3093
+ }
3094
+ }
3095
+ }
3096
+ for (const [key, entry] of fileMap.entries()) {
3097
+ if (key.startsWith("_") || key.startsWith(".")) continue;
3098
+ const fullPath = path.join(dir, entry.name);
3099
+ const itemPath = path.join(baseUrl, key).replace(/\\/g, "/");
3100
+ if (entry.isDirectory()) {
3101
+ const children = await scanContent(fullPath, itemPath);
3102
+ items.push({
3103
+ title: key,
3104
+ path: itemPath,
3105
+ children: children.length > 0 ? children : void 0
3106
+ });
3107
+ } else if (entry.isFile() && (entry.name.endsWith(".md") || entry.name.endsWith(".mdx"))) {
3108
+ if (entry.name !== "index.mdx" && entry.name !== "index.md") {
3109
+ items.push({
3110
+ title: key,
3111
+ path: itemPath
3112
+ });
3113
+ }
3114
+ }
3115
+ }
3116
+ return items;
3117
+ }
3118
+ function routeToTitle(routePath) {
3119
+ const segment = routePath.split("/").filter(Boolean).pop() || "Home";
3120
+ if (segment.startsWith("[")) return segment;
3121
+ return segment.split("-").map((word) => word.charAt(0).toUpperCase() + word.slice(1)).join(" ");
3122
+ }
3123
+ function routesToSitemapItems(routes) {
3124
+ const root = /* @__PURE__ */ new Map();
3125
+ const items = [];
3126
+ const sortedRoutes = [...routes].filter((r) => r.type === "page" && !r.isDynamic).sort((a, b) => a.path.split("/").length - b.path.split("/").length);
3127
+ for (const route of sortedRoutes) {
3128
+ const segments = route.path.split("/").filter(Boolean);
3129
+ if (segments.length === 0) {
3130
+ items.push({
3131
+ title: "Home",
3132
+ path: "/"
3133
+ });
3134
+ continue;
3135
+ }
3136
+ let currentPath = "";
3137
+ let parentChildren = items;
3138
+ for (let i = 0; i < segments.length - 1; i++) {
3139
+ currentPath += "/" + segments[i];
3140
+ let parent = root.get(currentPath);
3141
+ if (!parent) {
3142
+ parent = {
3143
+ title: routeToTitle(currentPath),
3144
+ path: currentPath,
3145
+ children: []
3146
+ };
3147
+ root.set(currentPath, parent);
3148
+ parentChildren.push(parent);
3149
+ }
3150
+ parentChildren = parent.children || (parent.children = []);
3151
+ }
3152
+ const item = {
3153
+ title: routeToTitle(route.path),
3154
+ path: route.path
3155
+ };
3156
+ parentChildren.push(item);
3157
+ }
3158
+ return items;
3159
+ }
3160
+ async function generateSitemapData(cwd, config) {
3161
+ const projectType = detectProjectType(cwd);
3162
+ const contentDir = path.join(cwd, config?.contentDir || "content");
3163
+ const appDir = path.join(cwd, config?.appDir || "app");
3164
+ const basePath = config?.basePath || "/docs";
3165
+ let docsItems = [];
3166
+ let appItems = [];
3167
+ if (projectType === "nextra" && fs.existsSync(contentDir)) {
3168
+ docsItems = await scanContent(contentDir, basePath);
3169
+ }
3170
+ if (fs.existsSync(appDir)) {
3171
+ try {
3172
+ const scanResult = scanRoutes({ appDir, includeApi: false });
3173
+ appItems = routesToSitemapItems(scanResult.routes);
3174
+ } catch {
3175
+ appItems = [];
3176
+ }
3177
+ }
3178
+ return {
3179
+ app: appItems,
3180
+ docs: docsItems
3181
+ };
3182
+ }
3183
+ function generateTsContent(data) {
3184
+ return `
3185
+ // This file is auto-generated by @djangocfg/seo
3186
+ // Do not edit manually
3187
+
3188
+ export interface SitemapItem {
3189
+ title: string;
3190
+ path: string;
3191
+ children?: SitemapItem[];
3192
+ }
3193
+
3194
+ export const sitemap: { app: SitemapItem[], docs: SitemapItem[] } = ${JSON.stringify(data, null, 2)};
3195
+ `;
3196
+ }
3197
+ async function generateSitemap(cwd, options = {}) {
3198
+ const output = options.output || "app/_core/sitemap.ts";
3199
+ const outputPath = path.join(cwd, output);
3200
+ const data = await generateSitemapData(cwd, options.config);
3201
+ const outputDir = path.dirname(outputPath);
3202
+ if (!fs.existsSync(outputDir)) {
3203
+ fs.mkdirSync(outputDir, { recursive: true });
3204
+ }
3205
+ const content = generateTsContent(data);
3206
+ fs.writeFileSync(outputPath, content);
3207
+ return { outputPath, data };
3208
+ }
3209
+ function flattenSitemap(items) {
3210
+ const paths = [];
3211
+ function traverse(item) {
3212
+ paths.push(item.path);
3213
+ if (item.children) {
3214
+ for (const child of item.children) {
3215
+ traverse(child);
3216
+ }
3217
+ }
3218
+ }
3219
+ for (const item of items) {
3220
+ traverse(item);
3221
+ }
3222
+ return paths;
3223
+ }
3224
+ function countSitemapItems(data) {
3225
+ const appPaths = flattenSitemap(data.app);
3226
+ const docsPaths = flattenSitemap(data.docs);
3227
+ return {
3228
+ app: appPaths.length,
3229
+ docs: docsPaths.length,
3230
+ total: appPaths.length + docsPaths.length
3231
+ };
3232
+ }
3233
+
3234
+ export { AI_REPORT_SCHEMA, GoogleConsoleClient, SeoAnalyzer, SiteCrawler, analyzeAllSitemaps, analyzeCrawlResults, analyzeInspectionResults, analyzeRobotsTxt, analyzeSitemap, checkContentLinks, checkLinks, chunk, countSitemapItems, createAuthClient, detectProjectType, findAppDir, fixContentLinks, flattenSitemap, formatBytes, formatDuration, generateAiSummary, generateAndSaveReports, generateJsonReport, generateMarkdownReport, generateSitemap, generateSitemapData, getStaticUrls, groupBrokenLinksByFile, isSameDomain, isUrlAllowed, linkResultsToSeoIssues, loadUrlsFromFile, mergeReports, normalizeUrl, printReportSummary, retry, routeToUrl, scanProject, scanRoutes, sleep, verifyAuth };
3235
+ //# sourceMappingURL=index.mjs.map
3236
+ //# sourceMappingURL=index.mjs.map