@djangocfg/seo 2.1.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +192 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.mjs +3780 -0
- package/dist/cli.mjs.map +1 -0
- package/dist/crawler/index.d.ts +88 -0
- package/dist/crawler/index.mjs +610 -0
- package/dist/crawler/index.mjs.map +1 -0
- package/dist/google-console/index.d.ts +95 -0
- package/dist/google-console/index.mjs +539 -0
- package/dist/google-console/index.mjs.map +1 -0
- package/dist/index.d.ts +285 -0
- package/dist/index.mjs +3236 -0
- package/dist/index.mjs.map +1 -0
- package/dist/link-checker/index.d.ts +76 -0
- package/dist/link-checker/index.mjs +326 -0
- package/dist/link-checker/index.mjs.map +1 -0
- package/dist/markdown-report-B3QdDzxE.d.ts +193 -0
- package/dist/reports/index.d.ts +24 -0
- package/dist/reports/index.mjs +836 -0
- package/dist/reports/index.mjs.map +1 -0
- package/dist/routes/index.d.ts +69 -0
- package/dist/routes/index.mjs +372 -0
- package/dist/routes/index.mjs.map +1 -0
- package/dist/scanner-Cz4Th2Pt.d.ts +60 -0
- package/dist/types/index.d.ts +144 -0
- package/dist/types/index.mjs +3 -0
- package/dist/types/index.mjs.map +1 -0
- package/package.json +114 -0
- package/src/analyzer.ts +256 -0
- package/src/cli/commands/audit.ts +260 -0
- package/src/cli/commands/content.ts +180 -0
- package/src/cli/commands/crawl.ts +32 -0
- package/src/cli/commands/index.ts +12 -0
- package/src/cli/commands/inspect.ts +60 -0
- package/src/cli/commands/links.ts +41 -0
- package/src/cli/commands/robots.ts +36 -0
- package/src/cli/commands/routes.ts +126 -0
- package/src/cli/commands/sitemap.ts +48 -0
- package/src/cli/index.ts +149 -0
- package/src/cli/types.ts +40 -0
- package/src/config.ts +207 -0
- package/src/content/index.ts +51 -0
- package/src/content/link-checker.ts +182 -0
- package/src/content/link-fixer.ts +188 -0
- package/src/content/scanner.ts +200 -0
- package/src/content/sitemap-generator.ts +321 -0
- package/src/content/types.ts +140 -0
- package/src/crawler/crawler.ts +425 -0
- package/src/crawler/index.ts +10 -0
- package/src/crawler/robots-parser.ts +171 -0
- package/src/crawler/sitemap-validator.ts +204 -0
- package/src/google-console/analyzer.ts +317 -0
- package/src/google-console/auth.ts +100 -0
- package/src/google-console/client.ts +281 -0
- package/src/google-console/index.ts +9 -0
- package/src/index.ts +144 -0
- package/src/link-checker/index.ts +461 -0
- package/src/reports/claude-context.ts +149 -0
- package/src/reports/generator.ts +244 -0
- package/src/reports/index.ts +27 -0
- package/src/reports/json-report.ts +320 -0
- package/src/reports/markdown-report.ts +246 -0
- package/src/reports/split-report.ts +252 -0
- package/src/routes/analyzer.ts +324 -0
- package/src/routes/index.ts +25 -0
- package/src/routes/scanner.ts +298 -0
- package/src/types/index.ts +222 -0
- package/src/utils/index.ts +154 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @djangocfg/seo - Types
|
|
3
|
+
* Shared types for SEO module
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// ============================================
|
|
7
|
+
// Google Search Console Types
|
|
8
|
+
// ============================================
|
|
9
|
+
|
|
10
|
+
export type IndexingVerdict =
|
|
11
|
+
| 'PASS'
|
|
12
|
+
| 'PARTIAL'
|
|
13
|
+
| 'FAIL'
|
|
14
|
+
| 'NEUTRAL'
|
|
15
|
+
| 'VERDICT_UNSPECIFIED';
|
|
16
|
+
|
|
17
|
+
export type CoverageState =
|
|
18
|
+
| 'SUBMITTED_AND_INDEXED'
|
|
19
|
+
| 'DUPLICATE_WITHOUT_USER_SELECTED_CANONICAL'
|
|
20
|
+
| 'DUPLICATE_GOOGLE_CHOSE_DIFFERENT_CANONICAL'
|
|
21
|
+
| 'NOT_INDEXED'
|
|
22
|
+
| 'URL_NOT_FOUND'
|
|
23
|
+
| 'CRAWLED_CURRENTLY_NOT_INDEXED'
|
|
24
|
+
| 'DISCOVERED_CURRENTLY_NOT_INDEXED'
|
|
25
|
+
| 'BLOCKED_DUE_TO_UNAUTHORIZED_REQUEST'
|
|
26
|
+
| 'BLOCKED_BY_ROBOTS_TXT'
|
|
27
|
+
| 'INDEXED_NOT_SUBMITTED_IN_SITEMAP'
|
|
28
|
+
| 'COVERAGE_STATE_UNSPECIFIED';
|
|
29
|
+
|
|
30
|
+
export type IndexingState =
|
|
31
|
+
| 'INDEXING_ALLOWED'
|
|
32
|
+
| 'BLOCKED_BY_META_TAG'
|
|
33
|
+
| 'BLOCKED_BY_HTTP_HEADER'
|
|
34
|
+
| 'BLOCKED_BY_ROBOTS_TXT'
|
|
35
|
+
| 'INDEXING_STATE_UNSPECIFIED';
|
|
36
|
+
|
|
37
|
+
export type RobotsTxtState =
|
|
38
|
+
| 'ALLOWED'
|
|
39
|
+
| 'DISALLOWED'
|
|
40
|
+
| 'ROBOTS_TXT_STATE_UNSPECIFIED';
|
|
41
|
+
|
|
42
|
+
export type PageFetchState =
|
|
43
|
+
| 'SUCCESSFUL'
|
|
44
|
+
| 'SOFT_404'
|
|
45
|
+
| 'BLOCKED_ROBOTS_TXT'
|
|
46
|
+
| 'NOT_FOUND'
|
|
47
|
+
| 'ACCESS_DENIED'
|
|
48
|
+
| 'SERVER_ERROR'
|
|
49
|
+
| 'REDIRECT_ERROR'
|
|
50
|
+
| 'ACCESS_FORBIDDEN'
|
|
51
|
+
| 'BLOCKED_4XX'
|
|
52
|
+
| 'INTERNAL_CRAWL_ERROR'
|
|
53
|
+
| 'INVALID_URL'
|
|
54
|
+
| 'PAGE_FETCH_STATE_UNSPECIFIED';
|
|
55
|
+
|
|
56
|
+
export interface UrlInspectionResult {
|
|
57
|
+
url: string;
|
|
58
|
+
inspectionResultLink?: string;
|
|
59
|
+
indexStatusResult: {
|
|
60
|
+
verdict: IndexingVerdict;
|
|
61
|
+
coverageState: CoverageState;
|
|
62
|
+
indexingState: IndexingState;
|
|
63
|
+
robotsTxtState: RobotsTxtState;
|
|
64
|
+
pageFetchState: PageFetchState;
|
|
65
|
+
lastCrawlTime?: string;
|
|
66
|
+
crawledAs?: 'DESKTOP' | 'MOBILE';
|
|
67
|
+
googleCanonical?: string;
|
|
68
|
+
userCanonical?: string;
|
|
69
|
+
sitemap?: string[];
|
|
70
|
+
referringUrls?: string[];
|
|
71
|
+
};
|
|
72
|
+
mobileUsabilityResult?: {
|
|
73
|
+
verdict: IndexingVerdict;
|
|
74
|
+
issues?: Array<{
|
|
75
|
+
issueType: string;
|
|
76
|
+
message: string;
|
|
77
|
+
}>;
|
|
78
|
+
};
|
|
79
|
+
richResultsResult?: {
|
|
80
|
+
verdict: IndexingVerdict;
|
|
81
|
+
detectedItems?: Array<{
|
|
82
|
+
richResultType: string;
|
|
83
|
+
items?: Array<{
|
|
84
|
+
name: string;
|
|
85
|
+
issues?: Array<{
|
|
86
|
+
issueMessage: string;
|
|
87
|
+
severity: 'ERROR' | 'WARNING';
|
|
88
|
+
}>;
|
|
89
|
+
}>;
|
|
90
|
+
}>;
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// ============================================
|
|
95
|
+
// SEO Issue Types
|
|
96
|
+
// ============================================
|
|
97
|
+
|
|
98
|
+
export type IssueSeverity = 'critical' | 'error' | 'warning' | 'info';
|
|
99
|
+
|
|
100
|
+
export type IssueCategory =
|
|
101
|
+
| 'indexing'
|
|
102
|
+
| 'crawling'
|
|
103
|
+
| 'content'
|
|
104
|
+
| 'technical'
|
|
105
|
+
| 'mobile'
|
|
106
|
+
| 'performance'
|
|
107
|
+
| 'structured-data'
|
|
108
|
+
| 'security';
|
|
109
|
+
|
|
110
|
+
export interface SeoIssue {
|
|
111
|
+
id: string;
|
|
112
|
+
url: string;
|
|
113
|
+
category: IssueCategory;
|
|
114
|
+
severity: IssueSeverity;
|
|
115
|
+
title: string;
|
|
116
|
+
description: string;
|
|
117
|
+
recommendation: string;
|
|
118
|
+
detectedAt: string;
|
|
119
|
+
metadata?: Record<string, unknown>;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// ============================================
|
|
123
|
+
// Crawler Types
|
|
124
|
+
// ============================================
|
|
125
|
+
|
|
126
|
+
export interface CrawlResult {
|
|
127
|
+
url: string;
|
|
128
|
+
statusCode: number;
|
|
129
|
+
contentType?: string;
|
|
130
|
+
title?: string;
|
|
131
|
+
metaDescription?: string;
|
|
132
|
+
metaRobots?: string;
|
|
133
|
+
canonicalUrl?: string;
|
|
134
|
+
h1?: string[];
|
|
135
|
+
h2?: string[];
|
|
136
|
+
links: {
|
|
137
|
+
internal: string[];
|
|
138
|
+
external: string[];
|
|
139
|
+
};
|
|
140
|
+
images: Array<{
|
|
141
|
+
src: string;
|
|
142
|
+
alt?: string;
|
|
143
|
+
hasAlt: boolean;
|
|
144
|
+
}>;
|
|
145
|
+
loadTime: number;
|
|
146
|
+
/** Time to first byte (ms) */
|
|
147
|
+
ttfb?: number;
|
|
148
|
+
contentLength?: number;
|
|
149
|
+
errors: string[];
|
|
150
|
+
warnings: string[];
|
|
151
|
+
crawledAt: string;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export interface CrawlerConfig {
|
|
155
|
+
maxPages?: number;
|
|
156
|
+
maxDepth?: number;
|
|
157
|
+
concurrency?: number;
|
|
158
|
+
timeout?: number;
|
|
159
|
+
userAgent?: string;
|
|
160
|
+
respectRobotsTxt?: boolean;
|
|
161
|
+
includePatterns?: string[];
|
|
162
|
+
excludePatterns?: string[];
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// ============================================
|
|
166
|
+
// Report Types
|
|
167
|
+
// ============================================
|
|
168
|
+
|
|
169
|
+
export interface SeoReport {
|
|
170
|
+
id: string;
|
|
171
|
+
siteUrl: string;
|
|
172
|
+
generatedAt: string;
|
|
173
|
+
summary: ReportSummary;
|
|
174
|
+
issues: SeoIssue[];
|
|
175
|
+
urlInspections: UrlInspectionResult[];
|
|
176
|
+
crawlResults: CrawlResult[];
|
|
177
|
+
recommendations: Recommendation[];
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
export interface ReportSummary {
|
|
181
|
+
totalUrls: number;
|
|
182
|
+
indexedUrls: number;
|
|
183
|
+
notIndexedUrls: number;
|
|
184
|
+
issuesByCategory: Record<IssueCategory, number>;
|
|
185
|
+
issuesBySeverity: Record<IssueSeverity, number>;
|
|
186
|
+
healthScore: number; // 0-100
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export interface Recommendation {
|
|
190
|
+
priority: 1 | 2 | 3 | 4 | 5;
|
|
191
|
+
category: IssueCategory;
|
|
192
|
+
title: string;
|
|
193
|
+
description: string;
|
|
194
|
+
affectedUrls: string[];
|
|
195
|
+
estimatedImpact: 'high' | 'medium' | 'low';
|
|
196
|
+
actionItems: string[];
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// ============================================
|
|
200
|
+
// Configuration Types
|
|
201
|
+
// ============================================
|
|
202
|
+
|
|
203
|
+
export interface GoogleConsoleConfig {
|
|
204
|
+
serviceAccountPath?: string;
|
|
205
|
+
serviceAccountJson?: {
|
|
206
|
+
client_email: string;
|
|
207
|
+
private_key: string;
|
|
208
|
+
project_id?: string;
|
|
209
|
+
};
|
|
210
|
+
siteUrl: string;
|
|
211
|
+
/** GSC property format: 'sc-domain:example.com' or 'https://example.com'. Auto-detected if not provided. */
|
|
212
|
+
gscSiteUrl?: string;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
export interface SeoModuleConfig {
|
|
216
|
+
googleConsole?: GoogleConsoleConfig;
|
|
217
|
+
crawler?: CrawlerConfig;
|
|
218
|
+
reports?: {
|
|
219
|
+
outputDir: string;
|
|
220
|
+
formats: ('json' | 'markdown')[];
|
|
221
|
+
};
|
|
222
|
+
}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @djangocfg/seo - Utilities
|
|
3
|
+
* Shared utility functions
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { readFileSync, existsSync } from 'node:fs';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Load URLs from a file (one URL per line)
|
|
10
|
+
*/
|
|
11
|
+
export function loadUrlsFromFile(filePath: string): string[] {
|
|
12
|
+
if (!existsSync(filePath)) {
|
|
13
|
+
throw new Error(`File not found: ${filePath}`);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const content = readFileSync(filePath, 'utf-8');
|
|
17
|
+
return content
|
|
18
|
+
.split('\n')
|
|
19
|
+
.map((line) => line.trim())
|
|
20
|
+
.filter((line) => line && !line.startsWith('#'));
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Extract URLs from sitemap XML content
|
|
25
|
+
*/
|
|
26
|
+
export function extractUrlsFromSitemap(xmlContent: string): string[] {
|
|
27
|
+
const urls: string[] = [];
|
|
28
|
+
const locRegex = /<loc>([^<]+)<\/loc>/g;
|
|
29
|
+
let match;
|
|
30
|
+
|
|
31
|
+
while ((match = locRegex.exec(xmlContent)) !== null) {
|
|
32
|
+
if (match[1]) {
|
|
33
|
+
urls.push(match[1].trim());
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return urls;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Normalize URL for comparison
|
|
42
|
+
*/
|
|
43
|
+
export function normalizeUrl(url: string, baseUrl?: string): string {
|
|
44
|
+
try {
|
|
45
|
+
const parsed = new URL(url, baseUrl);
|
|
46
|
+
parsed.hash = '';
|
|
47
|
+
// Remove trailing slash
|
|
48
|
+
if (parsed.pathname.endsWith('/') && parsed.pathname !== '/') {
|
|
49
|
+
parsed.pathname = parsed.pathname.slice(0, -1);
|
|
50
|
+
}
|
|
51
|
+
return parsed.href;
|
|
52
|
+
} catch {
|
|
53
|
+
return url;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Check if URL belongs to the same domain
|
|
59
|
+
*/
|
|
60
|
+
export function isSameDomain(url: string, baseUrl: string): boolean {
|
|
61
|
+
try {
|
|
62
|
+
const urlHost = new URL(url).hostname;
|
|
63
|
+
const baseHost = new URL(baseUrl).hostname;
|
|
64
|
+
return urlHost === baseHost;
|
|
65
|
+
} catch {
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Format bytes to human readable string
|
|
72
|
+
*/
|
|
73
|
+
export function formatBytes(bytes: number): string {
|
|
74
|
+
if (bytes === 0) return '0 B';
|
|
75
|
+
|
|
76
|
+
const units = ['B', 'KB', 'MB', 'GB'];
|
|
77
|
+
const k = 1024;
|
|
78
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
79
|
+
|
|
80
|
+
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(2))} ${units[i]}`;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Format milliseconds to human readable string
|
|
85
|
+
*/
|
|
86
|
+
export function formatDuration(ms: number): string {
|
|
87
|
+
if (ms < 1000) return `${ms}ms`;
|
|
88
|
+
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
|
|
89
|
+
return `${Math.floor(ms / 60000)}m ${Math.round((ms % 60000) / 1000)}s`;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Simple hash function for generating IDs
|
|
94
|
+
*/
|
|
95
|
+
export function hash(str: string): string {
|
|
96
|
+
let h = 0;
|
|
97
|
+
for (let i = 0; i < str.length; i++) {
|
|
98
|
+
const char = str.charCodeAt(i);
|
|
99
|
+
h = (h << 5) - h + char;
|
|
100
|
+
h = h & h;
|
|
101
|
+
}
|
|
102
|
+
return Math.abs(h).toString(36);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Chunk array into smaller arrays
|
|
107
|
+
*/
|
|
108
|
+
export function chunk<T>(array: T[], size: number): T[][] {
|
|
109
|
+
const chunks: T[][] = [];
|
|
110
|
+
for (let i = 0; i < array.length; i += size) {
|
|
111
|
+
chunks.push(array.slice(i, i + size));
|
|
112
|
+
}
|
|
113
|
+
return chunks;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Sleep for a given number of milliseconds
|
|
118
|
+
*/
|
|
119
|
+
export function sleep(ms: number): Promise<void> {
|
|
120
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Retry a function with exponential backoff
|
|
125
|
+
*/
|
|
126
|
+
export async function retry<T>(
|
|
127
|
+
fn: () => Promise<T>,
|
|
128
|
+
options: {
|
|
129
|
+
retries?: number;
|
|
130
|
+
minTimeout?: number;
|
|
131
|
+
maxTimeout?: number;
|
|
132
|
+
factor?: number;
|
|
133
|
+
} = {}
|
|
134
|
+
): Promise<T> {
|
|
135
|
+
const { retries = 3, minTimeout = 1000, maxTimeout = 30000, factor = 2 } = options;
|
|
136
|
+
|
|
137
|
+
let lastError: Error | undefined;
|
|
138
|
+
let timeout = minTimeout;
|
|
139
|
+
|
|
140
|
+
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
141
|
+
try {
|
|
142
|
+
return await fn();
|
|
143
|
+
} catch (error) {
|
|
144
|
+
lastError = error as Error;
|
|
145
|
+
|
|
146
|
+
if (attempt < retries) {
|
|
147
|
+
await sleep(timeout);
|
|
148
|
+
timeout = Math.min(timeout * factor, maxTimeout);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
throw lastError;
|
|
154
|
+
}
|