@djangocfg/seo 2.1.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +192 -0
  2. package/dist/cli.d.ts +1 -0
  3. package/dist/cli.mjs +3780 -0
  4. package/dist/cli.mjs.map +1 -0
  5. package/dist/crawler/index.d.ts +88 -0
  6. package/dist/crawler/index.mjs +610 -0
  7. package/dist/crawler/index.mjs.map +1 -0
  8. package/dist/google-console/index.d.ts +95 -0
  9. package/dist/google-console/index.mjs +539 -0
  10. package/dist/google-console/index.mjs.map +1 -0
  11. package/dist/index.d.ts +285 -0
  12. package/dist/index.mjs +3236 -0
  13. package/dist/index.mjs.map +1 -0
  14. package/dist/link-checker/index.d.ts +76 -0
  15. package/dist/link-checker/index.mjs +326 -0
  16. package/dist/link-checker/index.mjs.map +1 -0
  17. package/dist/markdown-report-B3QdDzxE.d.ts +193 -0
  18. package/dist/reports/index.d.ts +24 -0
  19. package/dist/reports/index.mjs +836 -0
  20. package/dist/reports/index.mjs.map +1 -0
  21. package/dist/routes/index.d.ts +69 -0
  22. package/dist/routes/index.mjs +372 -0
  23. package/dist/routes/index.mjs.map +1 -0
  24. package/dist/scanner-Cz4Th2Pt.d.ts +60 -0
  25. package/dist/types/index.d.ts +144 -0
  26. package/dist/types/index.mjs +3 -0
  27. package/dist/types/index.mjs.map +1 -0
  28. package/package.json +114 -0
  29. package/src/analyzer.ts +256 -0
  30. package/src/cli/commands/audit.ts +260 -0
  31. package/src/cli/commands/content.ts +180 -0
  32. package/src/cli/commands/crawl.ts +32 -0
  33. package/src/cli/commands/index.ts +12 -0
  34. package/src/cli/commands/inspect.ts +60 -0
  35. package/src/cli/commands/links.ts +41 -0
  36. package/src/cli/commands/robots.ts +36 -0
  37. package/src/cli/commands/routes.ts +126 -0
  38. package/src/cli/commands/sitemap.ts +48 -0
  39. package/src/cli/index.ts +149 -0
  40. package/src/cli/types.ts +40 -0
  41. package/src/config.ts +207 -0
  42. package/src/content/index.ts +51 -0
  43. package/src/content/link-checker.ts +182 -0
  44. package/src/content/link-fixer.ts +188 -0
  45. package/src/content/scanner.ts +200 -0
  46. package/src/content/sitemap-generator.ts +321 -0
  47. package/src/content/types.ts +140 -0
  48. package/src/crawler/crawler.ts +425 -0
  49. package/src/crawler/index.ts +10 -0
  50. package/src/crawler/robots-parser.ts +171 -0
  51. package/src/crawler/sitemap-validator.ts +204 -0
  52. package/src/google-console/analyzer.ts +317 -0
  53. package/src/google-console/auth.ts +100 -0
  54. package/src/google-console/client.ts +281 -0
  55. package/src/google-console/index.ts +9 -0
  56. package/src/index.ts +144 -0
  57. package/src/link-checker/index.ts +461 -0
  58. package/src/reports/claude-context.ts +149 -0
  59. package/src/reports/generator.ts +244 -0
  60. package/src/reports/index.ts +27 -0
  61. package/src/reports/json-report.ts +320 -0
  62. package/src/reports/markdown-report.ts +246 -0
  63. package/src/reports/split-report.ts +252 -0
  64. package/src/routes/analyzer.ts +324 -0
  65. package/src/routes/index.ts +25 -0
  66. package/src/routes/scanner.ts +298 -0
  67. package/src/types/index.ts +222 -0
  68. package/src/utils/index.ts +154 -0
@@ -0,0 +1,222 @@
1
+ /**
2
+ * @djangocfg/seo - Types
3
+ * Shared types for SEO module
4
+ */
5
+
6
+ // ============================================
7
+ // Google Search Console Types
8
+ // ============================================
9
+
10
+ export type IndexingVerdict =
11
+ | 'PASS'
12
+ | 'PARTIAL'
13
+ | 'FAIL'
14
+ | 'NEUTRAL'
15
+ | 'VERDICT_UNSPECIFIED';
16
+
17
+ export type CoverageState =
18
+ | 'SUBMITTED_AND_INDEXED'
19
+ | 'DUPLICATE_WITHOUT_USER_SELECTED_CANONICAL'
20
+ | 'DUPLICATE_GOOGLE_CHOSE_DIFFERENT_CANONICAL'
21
+ | 'NOT_INDEXED'
22
+ | 'URL_NOT_FOUND'
23
+ | 'CRAWLED_CURRENTLY_NOT_INDEXED'
24
+ | 'DISCOVERED_CURRENTLY_NOT_INDEXED'
25
+ | 'BLOCKED_DUE_TO_UNAUTHORIZED_REQUEST'
26
+ | 'BLOCKED_BY_ROBOTS_TXT'
27
+ | 'INDEXED_NOT_SUBMITTED_IN_SITEMAP'
28
+ | 'COVERAGE_STATE_UNSPECIFIED';
29
+
30
+ export type IndexingState =
31
+ | 'INDEXING_ALLOWED'
32
+ | 'BLOCKED_BY_META_TAG'
33
+ | 'BLOCKED_BY_HTTP_HEADER'
34
+ | 'BLOCKED_BY_ROBOTS_TXT'
35
+ | 'INDEXING_STATE_UNSPECIFIED';
36
+
37
+ export type RobotsTxtState =
38
+ | 'ALLOWED'
39
+ | 'DISALLOWED'
40
+ | 'ROBOTS_TXT_STATE_UNSPECIFIED';
41
+
42
+ export type PageFetchState =
43
+ | 'SUCCESSFUL'
44
+ | 'SOFT_404'
45
+ | 'BLOCKED_ROBOTS_TXT'
46
+ | 'NOT_FOUND'
47
+ | 'ACCESS_DENIED'
48
+ | 'SERVER_ERROR'
49
+ | 'REDIRECT_ERROR'
50
+ | 'ACCESS_FORBIDDEN'
51
+ | 'BLOCKED_4XX'
52
+ | 'INTERNAL_CRAWL_ERROR'
53
+ | 'INVALID_URL'
54
+ | 'PAGE_FETCH_STATE_UNSPECIFIED';
55
+
56
+ export interface UrlInspectionResult {
57
+ url: string;
58
+ inspectionResultLink?: string;
59
+ indexStatusResult: {
60
+ verdict: IndexingVerdict;
61
+ coverageState: CoverageState;
62
+ indexingState: IndexingState;
63
+ robotsTxtState: RobotsTxtState;
64
+ pageFetchState: PageFetchState;
65
+ lastCrawlTime?: string;
66
+ crawledAs?: 'DESKTOP' | 'MOBILE';
67
+ googleCanonical?: string;
68
+ userCanonical?: string;
69
+ sitemap?: string[];
70
+ referringUrls?: string[];
71
+ };
72
+ mobileUsabilityResult?: {
73
+ verdict: IndexingVerdict;
74
+ issues?: Array<{
75
+ issueType: string;
76
+ message: string;
77
+ }>;
78
+ };
79
+ richResultsResult?: {
80
+ verdict: IndexingVerdict;
81
+ detectedItems?: Array<{
82
+ richResultType: string;
83
+ items?: Array<{
84
+ name: string;
85
+ issues?: Array<{
86
+ issueMessage: string;
87
+ severity: 'ERROR' | 'WARNING';
88
+ }>;
89
+ }>;
90
+ }>;
91
+ };
92
+ }
93
+
94
+ // ============================================
95
+ // SEO Issue Types
96
+ // ============================================
97
+
98
+ export type IssueSeverity = 'critical' | 'error' | 'warning' | 'info';
99
+
100
+ export type IssueCategory =
101
+ | 'indexing'
102
+ | 'crawling'
103
+ | 'content'
104
+ | 'technical'
105
+ | 'mobile'
106
+ | 'performance'
107
+ | 'structured-data'
108
+ | 'security';
109
+
110
+ export interface SeoIssue {
111
+ id: string;
112
+ url: string;
113
+ category: IssueCategory;
114
+ severity: IssueSeverity;
115
+ title: string;
116
+ description: string;
117
+ recommendation: string;
118
+ detectedAt: string;
119
+ metadata?: Record<string, unknown>;
120
+ }
121
+
122
+ // ============================================
123
+ // Crawler Types
124
+ // ============================================
125
+
126
+ export interface CrawlResult {
127
+ url: string;
128
+ statusCode: number;
129
+ contentType?: string;
130
+ title?: string;
131
+ metaDescription?: string;
132
+ metaRobots?: string;
133
+ canonicalUrl?: string;
134
+ h1?: string[];
135
+ h2?: string[];
136
+ links: {
137
+ internal: string[];
138
+ external: string[];
139
+ };
140
+ images: Array<{
141
+ src: string;
142
+ alt?: string;
143
+ hasAlt: boolean;
144
+ }>;
145
+ loadTime: number;
146
+ /** Time to first byte (ms) */
147
+ ttfb?: number;
148
+ contentLength?: number;
149
+ errors: string[];
150
+ warnings: string[];
151
+ crawledAt: string;
152
+ }
153
+
154
+ export interface CrawlerConfig {
155
+ maxPages?: number;
156
+ maxDepth?: number;
157
+ concurrency?: number;
158
+ timeout?: number;
159
+ userAgent?: string;
160
+ respectRobotsTxt?: boolean;
161
+ includePatterns?: string[];
162
+ excludePatterns?: string[];
163
+ }
164
+
165
+ // ============================================
166
+ // Report Types
167
+ // ============================================
168
+
169
+ export interface SeoReport {
170
+ id: string;
171
+ siteUrl: string;
172
+ generatedAt: string;
173
+ summary: ReportSummary;
174
+ issues: SeoIssue[];
175
+ urlInspections: UrlInspectionResult[];
176
+ crawlResults: CrawlResult[];
177
+ recommendations: Recommendation[];
178
+ }
179
+
180
+ export interface ReportSummary {
181
+ totalUrls: number;
182
+ indexedUrls: number;
183
+ notIndexedUrls: number;
184
+ issuesByCategory: Record<IssueCategory, number>;
185
+ issuesBySeverity: Record<IssueSeverity, number>;
186
+ healthScore: number; // 0-100
187
+ }
188
+
189
+ export interface Recommendation {
190
+ priority: 1 | 2 | 3 | 4 | 5;
191
+ category: IssueCategory;
192
+ title: string;
193
+ description: string;
194
+ affectedUrls: string[];
195
+ estimatedImpact: 'high' | 'medium' | 'low';
196
+ actionItems: string[];
197
+ }
198
+
199
+ // ============================================
200
+ // Configuration Types
201
+ // ============================================
202
+
203
+ export interface GoogleConsoleConfig {
204
+ serviceAccountPath?: string;
205
+ serviceAccountJson?: {
206
+ client_email: string;
207
+ private_key: string;
208
+ project_id?: string;
209
+ };
210
+ siteUrl: string;
211
+ /** GSC property format: 'sc-domain:example.com' or 'https://example.com'. Auto-detected if not provided. */
212
+ gscSiteUrl?: string;
213
+ }
214
+
215
+ export interface SeoModuleConfig {
216
+ googleConsole?: GoogleConsoleConfig;
217
+ crawler?: CrawlerConfig;
218
+ reports?: {
219
+ outputDir: string;
220
+ formats: ('json' | 'markdown')[];
221
+ };
222
+ }
@@ -0,0 +1,154 @@
1
+ /**
2
+ * @djangocfg/seo - Utilities
3
+ * Shared utility functions
4
+ */
5
+
6
+ import { readFileSync, existsSync } from 'node:fs';
7
+
8
+ /**
9
+ * Load URLs from a file (one URL per line)
10
+ */
11
+ export function loadUrlsFromFile(filePath: string): string[] {
12
+ if (!existsSync(filePath)) {
13
+ throw new Error(`File not found: ${filePath}`);
14
+ }
15
+
16
+ const content = readFileSync(filePath, 'utf-8');
17
+ return content
18
+ .split('\n')
19
+ .map((line) => line.trim())
20
+ .filter((line) => line && !line.startsWith('#'));
21
+ }
22
+
23
+ /**
24
+ * Extract URLs from sitemap XML content
25
+ */
26
+ export function extractUrlsFromSitemap(xmlContent: string): string[] {
27
+ const urls: string[] = [];
28
+ const locRegex = /<loc>([^<]+)<\/loc>/g;
29
+ let match;
30
+
31
+ while ((match = locRegex.exec(xmlContent)) !== null) {
32
+ if (match[1]) {
33
+ urls.push(match[1].trim());
34
+ }
35
+ }
36
+
37
+ return urls;
38
+ }
39
+
40
+ /**
41
+ * Normalize URL for comparison
42
+ */
43
+ export function normalizeUrl(url: string, baseUrl?: string): string {
44
+ try {
45
+ const parsed = new URL(url, baseUrl);
46
+ parsed.hash = '';
47
+ // Remove trailing slash
48
+ if (parsed.pathname.endsWith('/') && parsed.pathname !== '/') {
49
+ parsed.pathname = parsed.pathname.slice(0, -1);
50
+ }
51
+ return parsed.href;
52
+ } catch {
53
+ return url;
54
+ }
55
+ }
56
+
57
+ /**
58
+ * Check if URL belongs to the same domain
59
+ */
60
+ export function isSameDomain(url: string, baseUrl: string): boolean {
61
+ try {
62
+ const urlHost = new URL(url).hostname;
63
+ const baseHost = new URL(baseUrl).hostname;
64
+ return urlHost === baseHost;
65
+ } catch {
66
+ return false;
67
+ }
68
+ }
69
+
70
+ /**
71
+ * Format bytes to human readable string
72
+ */
73
+ export function formatBytes(bytes: number): string {
74
+ if (bytes === 0) return '0 B';
75
+
76
+ const units = ['B', 'KB', 'MB', 'GB'];
77
+ const k = 1024;
78
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
79
+
80
+ return `${parseFloat((bytes / Math.pow(k, i)).toFixed(2))} ${units[i]}`;
81
+ }
82
+
83
+ /**
84
+ * Format milliseconds to human readable string
85
+ */
86
+ export function formatDuration(ms: number): string {
87
+ if (ms < 1000) return `${ms}ms`;
88
+ if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
89
+ return `${Math.floor(ms / 60000)}m ${Math.round((ms % 60000) / 1000)}s`;
90
+ }
91
+
92
+ /**
93
+ * Simple hash function for generating IDs
94
+ */
95
+ export function hash(str: string): string {
96
+ let h = 0;
97
+ for (let i = 0; i < str.length; i++) {
98
+ const char = str.charCodeAt(i);
99
+ h = (h << 5) - h + char;
100
+ h = h & h;
101
+ }
102
+ return Math.abs(h).toString(36);
103
+ }
104
+
105
+ /**
106
+ * Chunk array into smaller arrays
107
+ */
108
+ export function chunk<T>(array: T[], size: number): T[][] {
109
+ const chunks: T[][] = [];
110
+ for (let i = 0; i < array.length; i += size) {
111
+ chunks.push(array.slice(i, i + size));
112
+ }
113
+ return chunks;
114
+ }
115
+
116
+ /**
117
+ * Sleep for a given number of milliseconds
118
+ */
119
+ export function sleep(ms: number): Promise<void> {
120
+ return new Promise((resolve) => setTimeout(resolve, ms));
121
+ }
122
+
123
+ /**
124
+ * Retry a function with exponential backoff
125
+ */
126
+ export async function retry<T>(
127
+ fn: () => Promise<T>,
128
+ options: {
129
+ retries?: number;
130
+ minTimeout?: number;
131
+ maxTimeout?: number;
132
+ factor?: number;
133
+ } = {}
134
+ ): Promise<T> {
135
+ const { retries = 3, minTimeout = 1000, maxTimeout = 30000, factor = 2 } = options;
136
+
137
+ let lastError: Error | undefined;
138
+ let timeout = minTimeout;
139
+
140
+ for (let attempt = 0; attempt <= retries; attempt++) {
141
+ try {
142
+ return await fn();
143
+ } catch (error) {
144
+ lastError = error as Error;
145
+
146
+ if (attempt < retries) {
147
+ await sleep(timeout);
148
+ timeout = Math.min(timeout * factor, maxTimeout);
149
+ }
150
+ }
151
+ }
152
+
153
+ throw lastError;
154
+ }