@djangocfg/seo 2.1.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +192 -0
  2. package/dist/cli.d.ts +1 -0
  3. package/dist/cli.mjs +3780 -0
  4. package/dist/cli.mjs.map +1 -0
  5. package/dist/crawler/index.d.ts +88 -0
  6. package/dist/crawler/index.mjs +610 -0
  7. package/dist/crawler/index.mjs.map +1 -0
  8. package/dist/google-console/index.d.ts +95 -0
  9. package/dist/google-console/index.mjs +539 -0
  10. package/dist/google-console/index.mjs.map +1 -0
  11. package/dist/index.d.ts +285 -0
  12. package/dist/index.mjs +3236 -0
  13. package/dist/index.mjs.map +1 -0
  14. package/dist/link-checker/index.d.ts +76 -0
  15. package/dist/link-checker/index.mjs +326 -0
  16. package/dist/link-checker/index.mjs.map +1 -0
  17. package/dist/markdown-report-B3QdDzxE.d.ts +193 -0
  18. package/dist/reports/index.d.ts +24 -0
  19. package/dist/reports/index.mjs +836 -0
  20. package/dist/reports/index.mjs.map +1 -0
  21. package/dist/routes/index.d.ts +69 -0
  22. package/dist/routes/index.mjs +372 -0
  23. package/dist/routes/index.mjs.map +1 -0
  24. package/dist/scanner-Cz4Th2Pt.d.ts +60 -0
  25. package/dist/types/index.d.ts +144 -0
  26. package/dist/types/index.mjs +3 -0
  27. package/dist/types/index.mjs.map +1 -0
  28. package/package.json +114 -0
  29. package/src/analyzer.ts +256 -0
  30. package/src/cli/commands/audit.ts +260 -0
  31. package/src/cli/commands/content.ts +180 -0
  32. package/src/cli/commands/crawl.ts +32 -0
  33. package/src/cli/commands/index.ts +12 -0
  34. package/src/cli/commands/inspect.ts +60 -0
  35. package/src/cli/commands/links.ts +41 -0
  36. package/src/cli/commands/robots.ts +36 -0
  37. package/src/cli/commands/routes.ts +126 -0
  38. package/src/cli/commands/sitemap.ts +48 -0
  39. package/src/cli/index.ts +149 -0
  40. package/src/cli/types.ts +40 -0
  41. package/src/config.ts +207 -0
  42. package/src/content/index.ts +51 -0
  43. package/src/content/link-checker.ts +182 -0
  44. package/src/content/link-fixer.ts +188 -0
  45. package/src/content/scanner.ts +200 -0
  46. package/src/content/sitemap-generator.ts +321 -0
  47. package/src/content/types.ts +140 -0
  48. package/src/crawler/crawler.ts +425 -0
  49. package/src/crawler/index.ts +10 -0
  50. package/src/crawler/robots-parser.ts +171 -0
  51. package/src/crawler/sitemap-validator.ts +204 -0
  52. package/src/google-console/analyzer.ts +317 -0
  53. package/src/google-console/auth.ts +100 -0
  54. package/src/google-console/client.ts +281 -0
  55. package/src/google-console/index.ts +9 -0
  56. package/src/index.ts +144 -0
  57. package/src/link-checker/index.ts +461 -0
  58. package/src/reports/claude-context.ts +149 -0
  59. package/src/reports/generator.ts +244 -0
  60. package/src/reports/index.ts +27 -0
  61. package/src/reports/json-report.ts +320 -0
  62. package/src/reports/markdown-report.ts +246 -0
  63. package/src/reports/split-report.ts +252 -0
  64. package/src/routes/analyzer.ts +324 -0
  65. package/src/routes/index.ts +25 -0
  66. package/src/routes/scanner.ts +298 -0
  67. package/src/types/index.ts +222 -0
  68. package/src/utils/index.ts +154 -0
@@ -0,0 +1,281 @@
1
+ /**
2
+ * @djangocfg/seo - Google Search Console Client
3
+ * Main client for interacting with Google Search Console API
4
+ */
5
+
6
+ import { searchconsole, type searchconsole_v1 } from '@googleapis/searchconsole';
7
+ import type { JWT } from 'google-auth-library';
8
+ import consola from 'consola';
9
+ import pLimit from 'p-limit';
10
+ import pRetry from 'p-retry';
11
+ import { createAuthClient, verifyAuth } from './auth.js';
12
+ import type {
13
+ GoogleConsoleConfig,
14
+ UrlInspectionResult,
15
+ CoverageState,
16
+ IndexingState,
17
+ IndexingVerdict,
18
+ RobotsTxtState,
19
+ PageFetchState,
20
+ } from '../types/index.js';
21
+
22
+ export class GoogleConsoleClient {
23
+ private auth: JWT;
24
+ private searchconsole: searchconsole_v1.Searchconsole;
25
+ private siteUrl: string;
26
+ private gscSiteUrl: string; // Format for GSC API (may be sc-domain:xxx)
27
+ private limit = pLimit(2); // Max 2 concurrent requests (Cloudflare-friendly)
28
+ private requestDelay = 500; // Delay between requests in ms
29
+
30
+ constructor(config: GoogleConsoleConfig) {
31
+ this.auth = createAuthClient(config);
32
+ this.searchconsole = searchconsole({ version: 'v1', auth: this.auth });
33
+ this.siteUrl = config.siteUrl;
34
+
35
+ // Support both URL prefix and domain property formats
36
+ // If gscSiteUrl provided, use it; otherwise try domain property format
37
+ if (config.gscSiteUrl) {
38
+ this.gscSiteUrl = config.gscSiteUrl;
39
+ } else {
40
+ // Default to domain property format (most common)
41
+ const domain = new URL(config.siteUrl).hostname;
42
+ this.gscSiteUrl = `sc-domain:${domain}`;
43
+ }
44
+
45
+ consola.debug(`GSC site URL: ${this.gscSiteUrl}`);
46
+ }
47
+
48
+ /**
49
+ * Delay helper for rate limiting
50
+ */
51
+ private delay(ms: number): Promise<void> {
52
+ return new Promise((resolve) => setTimeout(resolve, ms));
53
+ }
54
+
55
+ /**
56
+ * Verify the client is authenticated
57
+ */
58
+ async verify(): Promise<boolean> {
59
+ return verifyAuth(this.auth, this.siteUrl);
60
+ }
61
+
62
+ /**
63
+ * List all sites in Search Console
64
+ */
65
+ async listSites(): Promise<string[]> {
66
+ try {
67
+ const response = await this.searchconsole.sites.list();
68
+ return response.data.siteEntry?.map((site) => site.siteUrl || '') || [];
69
+ } catch (error) {
70
+ consola.error('Failed to list sites:', error);
71
+ throw error;
72
+ }
73
+ }
74
+
75
+ /**
76
+ * Inspect a single URL
77
+ */
78
+ async inspectUrl(url: string): Promise<UrlInspectionResult> {
79
+ return this.limit(async () => {
80
+ return pRetry(
81
+ async () => {
82
+ const response = await this.searchconsole.urlInspection.index.inspect({
83
+ requestBody: {
84
+ inspectionUrl: url,
85
+ siteUrl: this.gscSiteUrl,
86
+ languageCode: 'en-US',
87
+ },
88
+ });
89
+
90
+ const result = response.data.inspectionResult;
91
+
92
+ if (!result?.indexStatusResult) {
93
+ throw new Error(`No inspection result for URL: ${url}`);
94
+ }
95
+
96
+ return this.mapInspectionResult(url, result);
97
+ },
98
+ {
99
+ retries: 2,
100
+ minTimeout: 2000,
101
+ maxTimeout: 10000,
102
+ factor: 2, // Exponential backoff
103
+ onFailedAttempt: (ctx) => {
104
+ // Only log on final failure to reduce noise
105
+ if (ctx.retriesLeft === 0) {
106
+ consola.warn(`Failed: ${url}`);
107
+ }
108
+ },
109
+ }
110
+ );
111
+ });
112
+ }
113
+
114
+ /**
115
+ * Inspect multiple URLs in batch
116
+ * Stops early if too many consecutive errors (likely rate limiting)
117
+ */
118
+ async inspectUrls(urls: string[]): Promise<UrlInspectionResult[]> {
119
+ consola.info(`Inspecting ${urls.length} URLs...`);
120
+
121
+ const results: UrlInspectionResult[] = [];
122
+ const errors: Array<{ url: string; error: Error }> = [];
123
+ let consecutiveErrors = 0;
124
+ const maxConsecutiveErrors = 3; // Stop after 3 consecutive failures
125
+
126
+ // Process URLs sequentially with delay to avoid rate limiting
127
+ for (const url of urls) {
128
+ try {
129
+ const result = await this.inspectUrl(url);
130
+ results.push(result);
131
+ consecutiveErrors = 0; // Reset on success
132
+ // Add delay between requests
133
+ await this.delay(this.requestDelay);
134
+ } catch (error) {
135
+ const err = error as Error;
136
+ errors.push({ url, error: err });
137
+ consecutiveErrors++;
138
+
139
+ // Early exit on consecutive errors (likely rate limiting or auth issue)
140
+ if (consecutiveErrors >= maxConsecutiveErrors) {
141
+ console.log('');
142
+ consola.error(`Stopping after ${maxConsecutiveErrors} consecutive failures`);
143
+ this.showRateLimitHelp();
144
+ break;
145
+ }
146
+ }
147
+ }
148
+
149
+ if (errors.length > 0 && consecutiveErrors < maxConsecutiveErrors) {
150
+ consola.warn(`Failed to inspect ${errors.length} URLs`);
151
+ }
152
+
153
+ if (results.length > 0) {
154
+ consola.success(`Successfully inspected ${results.length}/${urls.length} URLs`);
155
+ } else if (errors.length > 0) {
156
+ consola.warn('No URLs were successfully inspected');
157
+ }
158
+
159
+ return results;
160
+ }
161
+
162
+ /**
163
+ * Show help message for rate limiting issues
164
+ */
165
+ private showRateLimitHelp(): void {
166
+ consola.info('Possible causes:');
167
+ consola.info(' 1. Google API quota exceeded (2000 requests/day)');
168
+ consola.info(' 2. Cloudflare blocking Google\'s crawler');
169
+ consola.info(' 3. Service account not added to GSC');
170
+ console.log('');
171
+ consola.info('Solutions:');
172
+ consola.info(' • Check GSC access: https://search.google.com/search-console/users');
173
+ console.log('');
174
+ consola.info(' • Cloudflare WAF rule to allow Googlebot:');
175
+ consola.info(' 1. Dashboard → Security → WAF → Custom rules → Create rule');
176
+ consola.info(' 2. Name: "Allow Googlebot"');
177
+ consola.info(' 3. Field: "Known Bots" | Operator: "equals" | Value: "true"');
178
+ consola.info(' 4. Or click "Edit expression" and paste: (cf.client.bot)');
179
+ consola.info(' 5. Action: Skip → check all rules');
180
+ consola.info(' 6. Deploy');
181
+ consola.info(' Docs: https://developers.cloudflare.com/waf/custom-rules/use-cases/allow-traffic-from-verified-bots/');
182
+ console.log('');
183
+ }
184
+
185
+ /**
186
+ * Get search analytics data
187
+ */
188
+ async getSearchAnalytics(
189
+ options: {
190
+ startDate: string;
191
+ endDate: string;
192
+ dimensions?: ('query' | 'page' | 'country' | 'device' | 'date')[];
193
+ rowLimit?: number;
194
+ }
195
+ ): Promise<searchconsole_v1.Schema$ApiDataRow[]> {
196
+ try {
197
+ const response = await this.searchconsole.searchanalytics.query({
198
+ siteUrl: this.gscSiteUrl,
199
+ requestBody: {
200
+ startDate: options.startDate,
201
+ endDate: options.endDate,
202
+ dimensions: options.dimensions || ['page'],
203
+ rowLimit: options.rowLimit || 1000,
204
+ },
205
+ });
206
+
207
+ return response.data.rows || [];
208
+ } catch (error) {
209
+ consola.error('Failed to get search analytics:', error);
210
+ throw error;
211
+ }
212
+ }
213
+
214
+ /**
215
+ * Get list of sitemaps
216
+ */
217
+ async getSitemaps(): Promise<searchconsole_v1.Schema$WmxSitemap[]> {
218
+ try {
219
+ const response = await this.searchconsole.sitemaps.list({
220
+ siteUrl: this.gscSiteUrl,
221
+ });
222
+
223
+ return response.data.sitemap || [];
224
+ } catch (error) {
225
+ consola.error('Failed to get sitemaps:', error);
226
+ throw error;
227
+ }
228
+ }
229
+
230
+ /**
231
+ * Map API response to our types
232
+ */
233
+ private mapInspectionResult(
234
+ url: string,
235
+ result: searchconsole_v1.Schema$UrlInspectionResult
236
+ ): UrlInspectionResult {
237
+ const indexStatus = result.indexStatusResult!;
238
+
239
+ return {
240
+ url,
241
+ inspectionResultLink: result.inspectionResultLink || undefined,
242
+ indexStatusResult: {
243
+ verdict: (indexStatus.verdict as IndexingVerdict) || 'VERDICT_UNSPECIFIED',
244
+ coverageState: (indexStatus.coverageState as CoverageState) || 'COVERAGE_STATE_UNSPECIFIED',
245
+ indexingState: (indexStatus.indexingState as IndexingState) || 'INDEXING_STATE_UNSPECIFIED',
246
+ robotsTxtState: (indexStatus.robotsTxtState as RobotsTxtState) || 'ROBOTS_TXT_STATE_UNSPECIFIED',
247
+ pageFetchState: (indexStatus.pageFetchState as PageFetchState) || 'PAGE_FETCH_STATE_UNSPECIFIED',
248
+ lastCrawlTime: indexStatus.lastCrawlTime || undefined,
249
+ crawledAs: indexStatus.crawledAs as 'DESKTOP' | 'MOBILE' | undefined,
250
+ googleCanonical: indexStatus.googleCanonical || undefined,
251
+ userCanonical: indexStatus.userCanonical || undefined,
252
+ sitemap: indexStatus.sitemap || undefined,
253
+ referringUrls: indexStatus.referringUrls || undefined,
254
+ },
255
+ mobileUsabilityResult: result.mobileUsabilityResult
256
+ ? {
257
+ verdict: (result.mobileUsabilityResult.verdict as IndexingVerdict) || 'VERDICT_UNSPECIFIED',
258
+ issues: result.mobileUsabilityResult.issues?.map((issue) => ({
259
+ issueType: issue.issueType || 'UNKNOWN',
260
+ message: issue.message || '',
261
+ })),
262
+ }
263
+ : undefined,
264
+ richResultsResult: result.richResultsResult
265
+ ? {
266
+ verdict: (result.richResultsResult.verdict as IndexingVerdict) || 'VERDICT_UNSPECIFIED',
267
+ detectedItems: result.richResultsResult.detectedItems?.map((item) => ({
268
+ richResultType: item.richResultType || 'UNKNOWN',
269
+ items: item.items?.map((i) => ({
270
+ name: i.name || '',
271
+ issues: i.issues?.map((issue) => ({
272
+ issueMessage: issue.issueMessage || '',
273
+ severity: (issue.severity as 'ERROR' | 'WARNING') || 'WARNING',
274
+ })),
275
+ })),
276
+ })),
277
+ }
278
+ : undefined,
279
+ };
280
+ }
281
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * @djangocfg/seo - Google Console Module
3
+ * Integration with Google Search Console API
4
+ */
5
+
6
+ export { GoogleConsoleClient } from './client.js';
7
+ export { createAuthClient, verifyAuth, loadCredentials } from './auth.js';
8
+ export { analyzeInspectionResults } from './analyzer.js';
9
+ export type { ServiceAccountCredentials } from './auth.js';
package/src/index.ts ADDED
@@ -0,0 +1,144 @@
1
+ /**
2
+ * @djangocfg/seo
3
+ * SEO analytics and indexing diagnostics module
4
+ *
5
+ * Features:
6
+ * - Google Search Console integration (URL Inspection API)
7
+ * - Site crawler with SEO analysis
8
+ * - AI-ready reports (JSON + Markdown)
9
+ * - robots.txt and sitemap validation
10
+ *
11
+ * @example
12
+ * ```typescript
13
+ * import { SeoAnalyzer } from '@djangocfg/seo';
14
+ *
15
+ * const analyzer = new SeoAnalyzer({
16
+ * siteUrl: 'https://example.com',
17
+ * googleConsole: {
18
+ * serviceAccountPath: './service_account.json',
19
+ * },
20
+ * });
21
+ *
22
+ * const report = await analyzer.analyze();
23
+ * await analyzer.saveReport('./reports');
24
+ * ```
25
+ */
26
+
27
+ // Main Analyzer Class
28
+ export { SeoAnalyzer } from './analyzer.js';
29
+
30
+ // Google Console
31
+ export {
32
+ GoogleConsoleClient,
33
+ createAuthClient,
34
+ verifyAuth,
35
+ analyzeInspectionResults,
36
+ } from './google-console/index.js';
37
+
38
+ // Crawler
39
+ export {
40
+ SiteCrawler,
41
+ analyzeCrawlResults,
42
+ analyzeRobotsTxt,
43
+ isUrlAllowed,
44
+ analyzeSitemap,
45
+ analyzeAllSitemaps,
46
+ } from './crawler/index.js';
47
+
48
+ // Link Checker
49
+ export {
50
+ checkLinks,
51
+ linkResultsToSeoIssues,
52
+ } from './link-checker/index.js';
53
+ export type {
54
+ CheckLinksOptions,
55
+ CheckLinksResult,
56
+ } from './link-checker/index.js';
57
+
58
+ // Reports
59
+ export {
60
+ generateAndSaveReports,
61
+ generateJsonReport,
62
+ generateMarkdownReport,
63
+ generateAiSummary,
64
+ printReportSummary,
65
+ mergeReports,
66
+ AI_REPORT_SCHEMA,
67
+ } from './reports/index.js';
68
+
69
+ // Types
70
+ export type {
71
+ // Core types
72
+ SeoIssue,
73
+ SeoReport,
74
+ ReportSummary,
75
+ Recommendation,
76
+ IssueSeverity,
77
+ IssueCategory,
78
+
79
+ // Google Console types
80
+ GoogleConsoleConfig,
81
+ UrlInspectionResult,
82
+ IndexingVerdict,
83
+ CoverageState,
84
+ IndexingState,
85
+ RobotsTxtState,
86
+ PageFetchState,
87
+
88
+ // Crawler types
89
+ CrawlResult,
90
+ CrawlerConfig,
91
+
92
+ // Config types
93
+ SeoModuleConfig,
94
+ } from './types/index.js';
95
+
96
+ // Utils
97
+ export {
98
+ loadUrlsFromFile,
99
+ normalizeUrl,
100
+ isSameDomain,
101
+ formatBytes,
102
+ formatDuration,
103
+ chunk,
104
+ sleep,
105
+ retry,
106
+ } from './utils/index.js';
107
+
108
+ // Content (MDX/Nextra tools)
109
+ export {
110
+ checkContentLinks,
111
+ fixContentLinks,
112
+ generateSitemap,
113
+ generateSitemapData,
114
+ flattenSitemap,
115
+ countSitemapItems,
116
+ detectProjectType,
117
+ scanProject,
118
+ groupBrokenLinksByFile,
119
+ } from './content/index.js';
120
+
121
+ export type {
122
+ ContentConfig,
123
+ SitemapConfig,
124
+ SitemapItem,
125
+ SitemapData,
126
+ BrokenLink,
127
+ LinkCheckResult,
128
+ FixLinksResult,
129
+ ContentScanResult,
130
+ } from './content/index.js';
131
+
132
+ // Routes (App Router scanner)
133
+ export {
134
+ scanRoutes,
135
+ findAppDir,
136
+ routeToUrl,
137
+ getStaticUrls,
138
+ } from './routes/scanner.js';
139
+
140
+ export type {
141
+ RouteInfo,
142
+ ScanResult,
143
+ ScanOptions,
144
+ } from './routes/scanner.js';