@houtini/seo-crawler-mcp 2.1.2 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/houtini-logo.jpg +0 -0
- package/package.json +9 -1
- package/server.json +2 -2
- package/.github/workflows/ci.yml +0 -59
- package/src/analyzers/QueryLoader.ts +0 -175
- package/src/analyzers/queries/README.md +0 -228
- package/src/analyzers/queries/content/duplicate-h1.sql +0 -18
- package/src/analyzers/queries/content/duplicate-meta-descriptions.sql +0 -18
- package/src/analyzers/queries/content/duplicate-titles.sql +0 -19
- package/src/analyzers/queries/content/missing-h1.sql +0 -18
- package/src/analyzers/queries/content/missing-meta-descriptions.sql +0 -19
- package/src/analyzers/queries/content/multiple-h1.sql +0 -17
- package/src/analyzers/queries/content/thin-content.sql +0 -18
- package/src/analyzers/queries/critical/404-errors.sql +0 -14
- package/src/analyzers/queries/critical/broken-internal-links.sql +0 -22
- package/src/analyzers/queries/critical/missing-titles.sql +0 -17
- package/src/analyzers/queries/critical/server-errors.sql +0 -15
- package/src/analyzers/queries/opportunities/high-external-links.sql +0 -18
- package/src/analyzers/queries/opportunities/meta-description-length.sql +0 -27
- package/src/analyzers/queries/opportunities/missing-images.sql +0 -18
- package/src/analyzers/queries/opportunities/no-outbound-links.sql +0 -18
- package/src/analyzers/queries/opportunities/title-equals-h1.sql +0 -21
- package/src/analyzers/queries/opportunities/title-length.sql +0 -27
- package/src/analyzers/queries/opportunities/uncrawled-internal-links.sql +0 -20
- package/src/analyzers/queries/security/missing-csp.sql +0 -16
- package/src/analyzers/queries/security/missing-hsts.sql +0 -17
- package/src/analyzers/queries/security/missing-referrer-policy.sql +0 -16
- package/src/analyzers/queries/security/missing-x-frame-options.sql +0 -16
- package/src/analyzers/queries/security/protocol-relative-links.sql +0 -16
- package/src/analyzers/queries/security/unsafe-external-links.sql +0 -17
- package/src/analyzers/queries/technical/canonical-issues.sql +0 -20
- package/src/analyzers/queries/technical/heading-hierarchy-issues.sql +0 -19
- package/src/analyzers/queries/technical/non-https.sql +0 -16
- package/src/analyzers/queries/technical/orphan-pages.sql +0 -21
- package/src/analyzers/queries/technical/redirects.sql +0 -15
- package/src/cli.ts +0 -228
- package/src/core/ContentExtractor.ts +0 -480
- package/src/core/CrawlDatabase.ts +0 -736
- package/src/core/CrawlOrchestrator.ts +0 -346
- package/src/core/CrawlStorage.ts +0 -148
- package/src/core/LinkExtractor.ts +0 -119
- package/src/core/UrlManager.ts +0 -110
- package/src/formatters/structured-report-format.ts +0 -254
- package/src/index.ts +0 -261
- package/src/schema/index.ts +0 -176
- package/src/tools/analyze-seo.ts +0 -184
- package/src/tools/list-queries.ts +0 -70
- package/src/tools/query-seo-data.ts +0 -77
- package/src/tools/run-seo-audit.ts +0 -91
- package/src/types/index.ts +0 -179
- package/src/utils/debug.ts +0 -12
- package/tsconfig.json +0 -26
package/houtini-logo.jpg
ADDED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@houtini/seo-crawler-mcp",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.3",
|
|
4
4
|
"mcpName": "io.github.houtini-ai/seo-crawler-mcp",
|
|
5
5
|
"description": "Crawl and analyse websites for SEO errors and issues using Crawlee with SQLite storage",
|
|
6
6
|
"type": "module",
|
|
@@ -23,6 +23,14 @@
|
|
|
23
23
|
"audit",
|
|
24
24
|
"model-context-protocol"
|
|
25
25
|
],
|
|
26
|
+
"files": [
|
|
27
|
+
"build",
|
|
28
|
+
"README.md",
|
|
29
|
+
"LICENSE",
|
|
30
|
+
"NOTICE",
|
|
31
|
+
"houtini-logo.jpg",
|
|
32
|
+
"server.json"
|
|
33
|
+
],
|
|
26
34
|
"engines": {
|
|
27
35
|
"node": ">=18.0.0"
|
|
28
36
|
},
|
package/server.json
CHANGED
|
@@ -6,12 +6,12 @@
|
|
|
6
6
|
"url": "https://github.com/houtini-ai/seo-crawler-mcp",
|
|
7
7
|
"source": "github"
|
|
8
8
|
},
|
|
9
|
-
"version": "2.
|
|
9
|
+
"version": "2.1.3",
|
|
10
10
|
"packages": [
|
|
11
11
|
{
|
|
12
12
|
"registryType": "npm",
|
|
13
13
|
"identifier": "@houtini/seo-crawler-mcp",
|
|
14
|
-
"version": "2.
|
|
14
|
+
"version": "2.1.3",
|
|
15
15
|
"transport": {
|
|
16
16
|
"type": "stdio"
|
|
17
17
|
},
|
package/.github/workflows/ci.yml
DELETED
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
name: CI
|
|
2
|
-
|
|
3
|
-
on:
|
|
4
|
-
push:
|
|
5
|
-
branches: [ main ]
|
|
6
|
-
pull_request:
|
|
7
|
-
branches: [ main ]
|
|
8
|
-
|
|
9
|
-
jobs:
|
|
10
|
-
build:
|
|
11
|
-
runs-on: ubuntu-latest
|
|
12
|
-
|
|
13
|
-
strategy:
|
|
14
|
-
matrix:
|
|
15
|
-
node-version: [18.x, 20.x, 22.x]
|
|
16
|
-
|
|
17
|
-
steps:
|
|
18
|
-
- name: Checkout repository
|
|
19
|
-
uses: actions/checkout@v4
|
|
20
|
-
|
|
21
|
-
- name: Setup Node.js ${{ matrix.node-version }}
|
|
22
|
-
uses: actions/setup-node@v4
|
|
23
|
-
with:
|
|
24
|
-
node-version: ${{ matrix.node-version }}
|
|
25
|
-
cache: 'npm'
|
|
26
|
-
|
|
27
|
-
- name: Install dependencies
|
|
28
|
-
run: npm ci
|
|
29
|
-
|
|
30
|
-
- name: Run TypeScript build
|
|
31
|
-
run: npm run build
|
|
32
|
-
|
|
33
|
-
- name: Run tests
|
|
34
|
-
run: npm test
|
|
35
|
-
continue-on-error: true
|
|
36
|
-
|
|
37
|
-
- name: Check for TypeScript errors
|
|
38
|
-
run: npx tsc --noEmit
|
|
39
|
-
|
|
40
|
-
lint:
|
|
41
|
-
runs-on: ubuntu-latest
|
|
42
|
-
|
|
43
|
-
steps:
|
|
44
|
-
- name: Checkout repository
|
|
45
|
-
uses: actions/checkout@v4
|
|
46
|
-
|
|
47
|
-
- name: Setup Node.js
|
|
48
|
-
uses: actions/setup-node@v4
|
|
49
|
-
with:
|
|
50
|
-
node-version: '20.x'
|
|
51
|
-
cache: 'npm'
|
|
52
|
-
|
|
53
|
-
- name: Install dependencies
|
|
54
|
-
run: npm ci
|
|
55
|
-
|
|
56
|
-
- name: Check formatting
|
|
57
|
-
run: |
|
|
58
|
-
echo "Checking for common code issues..."
|
|
59
|
-
npx tsc --noEmit || echo "TypeScript check completed"
|
|
@@ -1,175 +0,0 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import { fileURLToPath } from 'url';
|
|
4
|
-
|
|
5
|
-
const __filename = fileURLToPath(import.meta.url);
|
|
6
|
-
const __dirname = path.dirname(__filename);
|
|
7
|
-
|
|
8
|
-
export interface QueryMetadata {
|
|
9
|
-
name: string;
|
|
10
|
-
category: 'critical' | 'content' | 'technical' | 'security' | 'opportunities';
|
|
11
|
-
priority: 'CRITICAL' | 'HIGH' | 'MEDIUM' | 'LOW';
|
|
12
|
-
description: string;
|
|
13
|
-
impact: string;
|
|
14
|
-
fix: string;
|
|
15
|
-
sql: string;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
function findQueriesPath(): string {
|
|
19
|
-
// Try multiple possible locations
|
|
20
|
-
const candidates = [
|
|
21
|
-
// When running from compiled build/analyzers/QueryLoader.js
|
|
22
|
-
path.join(__dirname, 'queries'),
|
|
23
|
-
// When running from src/analyzers/QueryLoader.ts
|
|
24
|
-
path.join(__dirname, '..', '..', 'src', 'analyzers', 'queries'),
|
|
25
|
-
// When running from project root
|
|
26
|
-
path.join(process.cwd(), 'src', 'analyzers', 'queries'),
|
|
27
|
-
// When running tests from project root
|
|
28
|
-
path.join(process.cwd(), 'build', 'analyzers', 'queries'),
|
|
29
|
-
];
|
|
30
|
-
|
|
31
|
-
for (const candidate of candidates) {
|
|
32
|
-
if (fs.existsSync(candidate)) {
|
|
33
|
-
return candidate;
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
throw new Error(`Could not find queries directory. Tried: ${candidates.join(', ')}`);
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
export class QueryLoader {
|
|
41
|
-
private queriesPath: string;
|
|
42
|
-
private queries: Map<string, QueryMetadata>;
|
|
43
|
-
|
|
44
|
-
constructor() {
|
|
45
|
-
this.queriesPath = findQueriesPath();
|
|
46
|
-
this.queries = new Map();
|
|
47
|
-
this.loadAllQueries();
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
private loadAllQueries(): void {
|
|
51
|
-
const categories = ['critical', 'content', 'technical', 'security', 'opportunities'];
|
|
52
|
-
|
|
53
|
-
for (const category of categories) {
|
|
54
|
-
const categoryPath = path.join(this.queriesPath, category);
|
|
55
|
-
|
|
56
|
-
if (!fs.existsSync(categoryPath)) {
|
|
57
|
-
continue;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
const files = fs.readdirSync(categoryPath).filter(f => f.endsWith('.sql'));
|
|
61
|
-
|
|
62
|
-
for (const file of files) {
|
|
63
|
-
const filePath = path.join(categoryPath, file);
|
|
64
|
-
const queryName = file.replace('.sql', '');
|
|
65
|
-
const metadata = this.parseQueryFile(filePath, queryName, category as any);
|
|
66
|
-
|
|
67
|
-
this.queries.set(queryName, metadata);
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
private parseQueryFile(
|
|
73
|
-
filePath: string,
|
|
74
|
-
name: string,
|
|
75
|
-
category: QueryMetadata['category']
|
|
76
|
-
): QueryMetadata {
|
|
77
|
-
const content = fs.readFileSync(filePath, 'utf-8');
|
|
78
|
-
const lines = content.split('\n');
|
|
79
|
-
|
|
80
|
-
let description = '';
|
|
81
|
-
let priority: QueryMetadata['priority'] = 'MEDIUM';
|
|
82
|
-
let impact = '';
|
|
83
|
-
let fix = '';
|
|
84
|
-
const sqlLines: string[] = [];
|
|
85
|
-
let inSQL = false;
|
|
86
|
-
|
|
87
|
-
for (const line of lines) {
|
|
88
|
-
const trimmed = line.trim();
|
|
89
|
-
|
|
90
|
-
if (trimmed.startsWith('--')) {
|
|
91
|
-
const comment = trimmed.substring(2).trim();
|
|
92
|
-
|
|
93
|
-
if (comment.startsWith('Priority:')) {
|
|
94
|
-
priority = comment.replace('Priority:', '').trim() as QueryMetadata['priority'];
|
|
95
|
-
} else if (comment.startsWith('Impact:')) {
|
|
96
|
-
impact = comment.replace('Impact:', '').trim();
|
|
97
|
-
} else if (comment.startsWith('Fix:')) {
|
|
98
|
-
fix = comment.replace('Fix:', '').trim();
|
|
99
|
-
} else if (comment && !comment.startsWith('Category:')) {
|
|
100
|
-
if (!description) {
|
|
101
|
-
description = comment;
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
} else if (trimmed.startsWith('SELECT')) {
|
|
105
|
-
inSQL = true;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
if (inSQL) {
|
|
109
|
-
sqlLines.push(line);
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
return {
|
|
114
|
-
name,
|
|
115
|
-
category,
|
|
116
|
-
priority,
|
|
117
|
-
description,
|
|
118
|
-
impact,
|
|
119
|
-
fix,
|
|
120
|
-
sql: sqlLines.join('\n').trim()
|
|
121
|
-
};
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
getQuery(name: string): QueryMetadata | undefined {
|
|
125
|
-
return this.queries.get(name);
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
getAllQueries(): QueryMetadata[] {
|
|
129
|
-
return Array.from(this.queries.values());
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
getQueriesByCategory(category: QueryMetadata['category']): QueryMetadata[] {
|
|
133
|
-
return this.getAllQueries().filter(q => q.category === category);
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
getQueriesByPriority(priority: QueryMetadata['priority']): QueryMetadata[] {
|
|
137
|
-
return this.getAllQueries().filter(q => q.priority === priority);
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
getCriticalQueries(): QueryMetadata[] {
|
|
141
|
-
return this.getQueriesByPriority('CRITICAL');
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
getHighPriorityQueries(): QueryMetadata[] {
|
|
145
|
-
return this.getQueriesByPriority('HIGH');
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
listQueryNames(): string[] {
|
|
149
|
-
return Array.from(this.queries.keys()).sort();
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
getQueryStats(): {
|
|
153
|
-
total: number;
|
|
154
|
-
byCategory: Record<string, number>;
|
|
155
|
-
byPriority: Record<string, number>;
|
|
156
|
-
} {
|
|
157
|
-
const queries = this.getAllQueries();
|
|
158
|
-
|
|
159
|
-
const byCategory: Record<string, number> = {};
|
|
160
|
-
const byPriority: Record<string, number> = {};
|
|
161
|
-
|
|
162
|
-
for (const query of queries) {
|
|
163
|
-
byCategory[query.category] = (byCategory[query.category] || 0) + 1;
|
|
164
|
-
byPriority[query.priority] = (byPriority[query.priority] || 0) + 1;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
return {
|
|
168
|
-
total: queries.length,
|
|
169
|
-
byCategory,
|
|
170
|
-
byPriority
|
|
171
|
-
};
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
export const queryLoader = new QueryLoader();
|
|
@@ -1,228 +0,0 @@
|
|
|
1
|
-
# SEO Analysis Query Library
|
|
2
|
-
|
|
3
|
-
**Version:** 1.0.0
|
|
4
|
-
**Last Updated:** 2026-02-01
|
|
5
|
-
**Coverage:** 25 detectable SEO issues
|
|
6
|
-
|
|
7
|
-
## Query Organization
|
|
8
|
-
|
|
9
|
-
All queries follow a standard format:
|
|
10
|
-
- SQL comments describing the issue
|
|
11
|
-
- Priority level (CRITICAL, HIGH, MEDIUM, LOW)
|
|
12
|
-
- Category classification
|
|
13
|
-
- Optimized SELECT statements with ORDER BY and LIMIT
|
|
14
|
-
- Results limited to 100 rows for performance
|
|
15
|
-
|
|
16
|
-
## Critical Issues (4 queries)
|
|
17
|
-
|
|
18
|
-
**Indexability issues that must be fixed immediately**
|
|
19
|
-
|
|
20
|
-
1. **missing-titles.sql** - Pages without title tags
|
|
21
|
-
- Priority: CRITICAL
|
|
22
|
-
- Impact: Major indexability problem
|
|
23
|
-
- Fix: Add unique, descriptive title tags
|
|
24
|
-
|
|
25
|
-
2. **broken-internal-links.sql** - Internal links to 404/5xx pages
|
|
26
|
-
- Priority: CRITICAL
|
|
27
|
-
- Impact: Poor user experience, crawl budget waste
|
|
28
|
-
- Fix: Update or remove broken links
|
|
29
|
-
|
|
30
|
-
3. **server-errors.sql** - Pages returning 5xx errors
|
|
31
|
-
- Priority: CRITICAL
|
|
32
|
-
- Impact: Prevents indexing
|
|
33
|
-
- Fix: Debug server issues immediately
|
|
34
|
-
|
|
35
|
-
4. **404-errors.sql** - Pages not found
|
|
36
|
-
- Priority: CRITICAL
|
|
37
|
-
- Impact: Dead ends for users and crawlers
|
|
38
|
-
- Fix: Redirect to relevant pages or restore content
|
|
39
|
-
|
|
40
|
-
## Content Quality Issues (7 queries)
|
|
41
|
-
|
|
42
|
-
**Problems with page content and metadata**
|
|
43
|
-
|
|
44
|
-
5. **duplicate-titles.sql** - Multiple pages with same title
|
|
45
|
-
- Priority: HIGH
|
|
46
|
-
- Impact: Cannibalization, poor CTR
|
|
47
|
-
- Fix: Create unique titles for each page
|
|
48
|
-
|
|
49
|
-
6. **duplicate-meta-descriptions.sql** - Duplicate meta descriptions
|
|
50
|
-
- Priority: MEDIUM
|
|
51
|
-
- Impact: Reduced CTR, missed opportunities
|
|
52
|
-
- Fix: Write unique descriptions
|
|
53
|
-
|
|
54
|
-
7. **missing-meta-descriptions.sql** - Pages without descriptions
|
|
55
|
-
- Priority: MEDIUM
|
|
56
|
-
- Impact: Search engines auto-generate poor snippets
|
|
57
|
-
- Fix: Add compelling meta descriptions
|
|
58
|
-
|
|
59
|
-
8. **thin-content.sql** - Pages with < 300 words
|
|
60
|
-
- Priority: MEDIUM
|
|
61
|
-
- Impact: Low quality signals
|
|
62
|
-
- Fix: Expand content or consolidate pages
|
|
63
|
-
|
|
64
|
-
9. **missing-h1.sql** - Pages without H1 tags
|
|
65
|
-
- Priority: HIGH
|
|
66
|
-
- Impact: Unclear page topic
|
|
67
|
-
- Fix: Add descriptive H1 tags
|
|
68
|
-
|
|
69
|
-
10. **multiple-h1.sql** - Pages with multiple H1 tags
|
|
70
|
-
- Priority: MEDIUM
|
|
71
|
-
- Impact: Diluted topical focus
|
|
72
|
-
- Fix: Use single H1 per page
|
|
73
|
-
|
|
74
|
-
11. **duplicate-h1.sql** - Multiple pages with same H1
|
|
75
|
-
- Priority: MEDIUM
|
|
76
|
-
- Impact: Content cannibalization
|
|
77
|
-
- Fix: Differentiate H1 tags
|
|
78
|
-
|
|
79
|
-
## Technical SEO Issues (5 queries)
|
|
80
|
-
|
|
81
|
-
**Infrastructure and architecture problems**
|
|
82
|
-
|
|
83
|
-
12. **redirects.sql** - Pages with 3xx redirect status
|
|
84
|
-
- Priority: MEDIUM
|
|
85
|
-
- Impact: Crawl budget waste, slow page speed
|
|
86
|
-
- Fix: Update links to final destinations
|
|
87
|
-
|
|
88
|
-
13. **orphan-pages.sql** - Pages with no internal links
|
|
89
|
-
- Priority: MEDIUM
|
|
90
|
-
- Impact: Difficult to discover and crawl
|
|
91
|
-
- Fix: Add internal links from related pages
|
|
92
|
-
|
|
93
|
-
14. **canonical-issues.sql** - Canonical URL differs from actual URL
|
|
94
|
-
- Priority: MEDIUM
|
|
95
|
-
- Impact: Duplicate content confusion
|
|
96
|
-
- Fix: Review canonical implementation
|
|
97
|
-
|
|
98
|
-
15. **non-https.sql** - Pages not using HTTPS
|
|
99
|
-
- Priority: MEDIUM
|
|
100
|
-
- Impact: Security warnings, ranking penalty
|
|
101
|
-
- Fix: Migrate to HTTPS
|
|
102
|
-
|
|
103
|
-
16. **heading-hierarchy-issues.sql** - Non-sequential headings
|
|
104
|
-
- Priority: MEDIUM
|
|
105
|
-
- Impact: Poor document structure
|
|
106
|
-
- Fix: Correct heading order (h1 → h2 → h3)
|
|
107
|
-
|
|
108
|
-
## Security Issues (6 queries)
|
|
109
|
-
|
|
110
|
-
**Security header and link security problems**
|
|
111
|
-
|
|
112
|
-
17. **missing-hsts.sql** - No Strict-Transport-Security header
|
|
113
|
-
- Priority: HIGH
|
|
114
|
-
- Impact: HTTPS downgrade attacks possible
|
|
115
|
-
- Fix: Add HSTS header to server config
|
|
116
|
-
|
|
117
|
-
18. **missing-csp.sql** - No Content-Security-Policy header
|
|
118
|
-
- Priority: MEDIUM
|
|
119
|
-
- Impact: XSS vulnerability
|
|
120
|
-
- Fix: Implement CSP header
|
|
121
|
-
|
|
122
|
-
19. **missing-x-frame-options.sql** - No X-Frame-Options header
|
|
123
|
-
- Priority: MEDIUM
|
|
124
|
-
- Impact: Clickjacking vulnerability
|
|
125
|
-
- Fix: Add X-Frame-Options: DENY
|
|
126
|
-
|
|
127
|
-
20. **missing-referrer-policy.sql** - No Referrer-Policy header
|
|
128
|
-
- Priority: LOW
|
|
129
|
-
- Impact: Privacy leaks
|
|
130
|
-
- Fix: Add Referrer-Policy header
|
|
131
|
-
|
|
132
|
-
21. **unsafe-external-links.sql** - target="_blank" without rel="noopener"
|
|
133
|
-
- Priority: MEDIUM
|
|
134
|
-
- Impact: Tabnabbing vulnerability
|
|
135
|
-
- Fix: Add rel="noopener noreferrer"
|
|
136
|
-
|
|
137
|
-
22. **protocol-relative-links.sql** - Links using //example.com format
|
|
138
|
-
- Priority: LOW
|
|
139
|
-
- Impact: Mixed content warnings
|
|
140
|
-
- Fix: Use absolute HTTPS URLs
|
|
141
|
-
|
|
142
|
-
## Optimization Opportunities (6 queries)
|
|
143
|
-
|
|
144
|
-
**Enhancement opportunities for better SEO**
|
|
145
|
-
|
|
146
|
-
23. **title-length.sql** - Titles too short (< 30) or too long (> 60)
|
|
147
|
-
- Priority: MEDIUM
|
|
148
|
-
- Impact: Truncated or poor SERP display
|
|
149
|
-
- Fix: Optimize title length to 30-60 characters
|
|
150
|
-
|
|
151
|
-
24. **meta-description-length.sql** - Descriptions too short/long
|
|
152
|
-
- Priority: LOW
|
|
153
|
-
- Impact: Suboptimal SERP snippets
|
|
154
|
-
- Fix: Optimize to 120-160 characters
|
|
155
|
-
|
|
156
|
-
25. **title-equals-h1.sql** - Title and H1 are identical
|
|
157
|
-
- Priority: LOW
|
|
158
|
-
- Impact: Missed keyword opportunity
|
|
159
|
-
- Fix: Differentiate title and H1 slightly
|
|
160
|
-
|
|
161
|
-
26. **no-outbound-links.sql** - Pages with no links
|
|
162
|
-
- Priority: LOW
|
|
163
|
-
- Impact: Poor user experience, low PageRank flow
|
|
164
|
-
- Fix: Add relevant internal/external links
|
|
165
|
-
|
|
166
|
-
27. **high-external-links.sql** - Pages with > 20 external links
|
|
167
|
-
- Priority: LOW
|
|
168
|
-
- Impact: Excessive PageRank dilution
|
|
169
|
-
- Fix: Review and reduce external links
|
|
170
|
-
|
|
171
|
-
28. **missing-images.sql** - Content pages without images
|
|
172
|
-
- Priority: LOW
|
|
173
|
-
- Impact: Poor engagement, no image search visibility
|
|
174
|
-
- Fix: Add relevant images with alt text
|
|
175
|
-
|
|
176
|
-
## Query Performance
|
|
177
|
-
|
|
178
|
-
All queries are optimized for SQLite with:
|
|
179
|
-
- Indexed columns (url, status_code, depth)
|
|
180
|
-
- LIMIT clauses to prevent excessive results
|
|
181
|
-
- Strategic WHERE clauses to filter early
|
|
182
|
-
- Simple JOINs where necessary
|
|
183
|
-
|
|
184
|
-
**Expected Performance:**
|
|
185
|
-
- Simple queries: < 10ms
|
|
186
|
-
- Complex queries (duplicates, orphans): < 100ms
|
|
187
|
-
- Join queries (broken links): < 200ms
|
|
188
|
-
|
|
189
|
-
## Usage Patterns
|
|
190
|
-
|
|
191
|
-
### Direct SQL Execution
|
|
192
|
-
```typescript
|
|
193
|
-
import Database from 'better-sqlite3';
|
|
194
|
-
import fs from 'fs';
|
|
195
|
-
|
|
196
|
-
const db = new Database('./crawl-data.db');
|
|
197
|
-
const query = fs.readFileSync('./queries/critical/missing-titles.sql', 'utf-8');
|
|
198
|
-
const results = db.prepare(query).all();
|
|
199
|
-
```
|
|
200
|
-
|
|
201
|
-
### Programmatic Analysis
|
|
202
|
-
```typescript
|
|
203
|
-
import { SQLAnalyzer } from './SQLAnalyzer.js';
|
|
204
|
-
|
|
205
|
-
const analyzer = new SQLAnalyzer(crawlId);
|
|
206
|
-
const report = await analyzer.generateReport();
|
|
207
|
-
// Returns structured SEOAnalysisReport
|
|
208
|
-
```
|
|
209
|
-
|
|
210
|
-
### MCP Tool Integration
|
|
211
|
-
```bash
|
|
212
|
-
seo-crawler-mcp:analyze_seo crawlId="431841d4"
|
|
213
|
-
# Returns JSON report with all 25 issues checked
|
|
214
|
-
```
|
|
215
|
-
|
|
216
|
-
## Future Enhancements
|
|
217
|
-
|
|
218
|
-
**Not yet implemented (requires additional data capture):**
|
|
219
|
-
- Core Web Vitals analysis (requires Playwright)
|
|
220
|
-
- Robots.txt validation (requires separate parser)
|
|
221
|
-
- Readability scoring (requires text analysis library)
|
|
222
|
-
- Mobile rendering issues (requires device emulation)
|
|
223
|
-
|
|
224
|
-
---
|
|
225
|
-
|
|
226
|
-
**Query Coverage: 25 Production-Ready SEO Checks**
|
|
227
|
-
**Status: Production Ready**
|
|
228
|
-
**Next Step: Build SQLAnalyzer.ts class**
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
-- Duplicate H1 Tags (MEDIUM)
|
|
2
|
-
-- Multiple pages sharing the same H1 tag
|
|
3
|
-
-- Priority: MEDIUM
|
|
4
|
-
-- Category: content
|
|
5
|
-
|
|
6
|
-
SELECT
|
|
7
|
-
h1,
|
|
8
|
-
COUNT(*) as page_count,
|
|
9
|
-
GROUP_CONCAT(url, '|||') as urls,
|
|
10
|
-
MIN(title) as example_title
|
|
11
|
-
FROM pages
|
|
12
|
-
WHERE h1 IS NOT NULL
|
|
13
|
-
AND TRIM(h1) != ''
|
|
14
|
-
AND status_code = 200
|
|
15
|
-
GROUP BY h1
|
|
16
|
-
HAVING COUNT(*) > 1
|
|
17
|
-
ORDER BY page_count DESC
|
|
18
|
-
LIMIT 50;
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
-- Duplicate Meta Descriptions (MEDIUM)
|
|
2
|
-
-- Multiple pages sharing the same meta description
|
|
3
|
-
-- Priority: MEDIUM
|
|
4
|
-
-- Category: content
|
|
5
|
-
|
|
6
|
-
SELECT
|
|
7
|
-
meta_description,
|
|
8
|
-
COUNT(*) as page_count,
|
|
9
|
-
GROUP_CONCAT(url, '|||') as urls,
|
|
10
|
-
MIN(title) as example_title
|
|
11
|
-
FROM pages
|
|
12
|
-
WHERE meta_description IS NOT NULL
|
|
13
|
-
AND TRIM(meta_description) != ''
|
|
14
|
-
AND status_code = 200
|
|
15
|
-
GROUP BY meta_description
|
|
16
|
-
HAVING COUNT(*) > 1
|
|
17
|
-
ORDER BY page_count DESC
|
|
18
|
-
LIMIT 50;
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
-- Duplicate Title Tags (HIGH)
|
|
2
|
-
-- Multiple pages sharing the same title tag
|
|
3
|
-
-- Priority: HIGH
|
|
4
|
-
-- Category: content
|
|
5
|
-
-- Impact: Title duplication causes keyword cannibalization where pages compete against each other. Reduces click-through rates as users can't distinguish between pages in search results.
|
|
6
|
-
-- Fix: Create unique, descriptive title tags for each page that accurately reflect the page's specific content and target different keyword variations.
|
|
7
|
-
|
|
8
|
-
SELECT
|
|
9
|
-
title,
|
|
10
|
-
GROUP_CONCAT(url, ', ') as duplicate_urls,
|
|
11
|
-
COUNT(*) as count
|
|
12
|
-
FROM pages
|
|
13
|
-
WHERE title IS NOT NULL
|
|
14
|
-
AND title != ''
|
|
15
|
-
AND status_code = 200
|
|
16
|
-
GROUP BY title
|
|
17
|
-
HAVING count > 1
|
|
18
|
-
ORDER BY count DESC
|
|
19
|
-
LIMIT 100;
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
-- Missing H1 Tags (HIGH)
|
|
2
|
-
-- Pages without H1 headings
|
|
3
|
-
-- Priority: HIGH
|
|
4
|
-
-- Category: content
|
|
5
|
-
-- Impact: H1 tags are critical for SEO and accessibility. Their absence makes it unclear to search engines and users what the page is about. Impacts topic clarity and ranking potential.
|
|
6
|
-
-- Fix: Ensure every important page has exactly one descriptive H1 tag that clearly indicates the main topic. Include primary keywords naturally without keyword stuffing.
|
|
7
|
-
|
|
8
|
-
SELECT
|
|
9
|
-
url,
|
|
10
|
-
title,
|
|
11
|
-
word_count,
|
|
12
|
-
depth
|
|
13
|
-
FROM pages
|
|
14
|
-
WHERE (h1 IS NULL OR h1 = '' OR TRIM(h1) = '')
|
|
15
|
-
AND status_code = 200
|
|
16
|
-
AND content_type LIKE '%text/html%'
|
|
17
|
-
ORDER BY word_count DESC
|
|
18
|
-
LIMIT 100;
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
-- Missing Meta Descriptions (MEDIUM)
|
|
2
|
-
-- Pages without meta descriptions
|
|
3
|
-
-- Priority: MEDIUM
|
|
4
|
-
-- Category: content
|
|
5
|
-
-- Impact: Search engines auto-generate snippets which are often poor quality and don't entice clicks. Missed opportunity to control your SERP messaging and improve click-through rates.
|
|
6
|
-
-- Fix: Write unique, compelling meta descriptions (120-160 characters) for key pages. Focus on benefits and include a call-to-action. Avoid duplicating title tag content.
|
|
7
|
-
|
|
8
|
-
SELECT
|
|
9
|
-
url,
|
|
10
|
-
title,
|
|
11
|
-
word_count,
|
|
12
|
-
depth
|
|
13
|
-
FROM pages
|
|
14
|
-
WHERE (meta_description IS NULL OR meta_description = '' OR TRIM(meta_description) = '')
|
|
15
|
-
AND status_code = 200
|
|
16
|
-
AND content_type LIKE '%text/html%'
|
|
17
|
-
AND depth <= 5
|
|
18
|
-
ORDER BY word_count DESC
|
|
19
|
-
LIMIT 100;
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
-- Multiple H1 Tags (MEDIUM)
|
|
2
|
-
-- Pages with more than one H1 tag (can confuse search engines)
|
|
3
|
-
-- Priority: MEDIUM
|
|
4
|
-
-- Category: content
|
|
5
|
-
|
|
6
|
-
SELECT
|
|
7
|
-
url,
|
|
8
|
-
title,
|
|
9
|
-
heading_count_h1,
|
|
10
|
-
h1,
|
|
11
|
-
status_code,
|
|
12
|
-
depth
|
|
13
|
-
FROM pages
|
|
14
|
-
WHERE heading_count_h1 > 1
|
|
15
|
-
AND status_code = 200
|
|
16
|
-
ORDER BY heading_count_h1 DESC, depth ASC
|
|
17
|
-
LIMIT 100;
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
-- Thin Content (MEDIUM)
|
|
2
|
-
-- Pages with insufficient content (< 300 words)
|
|
3
|
-
-- Priority: MEDIUM
|
|
4
|
-
-- Category: content
|
|
5
|
-
|
|
6
|
-
SELECT
|
|
7
|
-
url,
|
|
8
|
-
title,
|
|
9
|
-
word_count,
|
|
10
|
-
status_code,
|
|
11
|
-
depth,
|
|
12
|
-
internal_links
|
|
13
|
-
FROM pages
|
|
14
|
-
WHERE word_count < 300
|
|
15
|
-
AND status_code = 200
|
|
16
|
-
AND content_type LIKE '%text/html%'
|
|
17
|
-
ORDER BY word_count ASC
|
|
18
|
-
LIMIT 100;
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
-- Broken Internal Links (CRITICAL)
|
|
2
|
-
-- Internal links pointing to pages that actually returned HTTP errors (404, 500, etc.)
|
|
3
|
-
-- Priority: CRITICAL
|
|
4
|
-
-- Category: critical
|
|
5
|
-
-- Impact: Poor user experience, wasted crawl budget, and loss of link equity. Users encounter dead ends and search engines waste resources crawling broken links.
|
|
6
|
-
-- Fix: View URLs that link to errors using the 'inlinks' tab and export them in bulk. Update broken links to point to correct URLs or remove them. Consider implementing 301 redirects for permanently moved content.
|
|
7
|
-
-- Note: This query only reports links to pages that were crawled AND returned errors. Un-crawled links are reported separately in the opportunities category.
|
|
8
|
-
|
|
9
|
-
SELECT
|
|
10
|
-
l.source_url,
|
|
11
|
-
l.target_url,
|
|
12
|
-
l.anchor_text,
|
|
13
|
-
l.placement,
|
|
14
|
-
p.status_code,
|
|
15
|
-
COUNT(*) as occurrences
|
|
16
|
-
FROM links l
|
|
17
|
-
INNER JOIN pages p ON l.target_url = p.url -- Only check pages we actually crawled
|
|
18
|
-
WHERE l.is_internal = 1
|
|
19
|
-
AND p.status_code >= 400 -- Only actual HTTP errors (404, 500, etc.)
|
|
20
|
-
GROUP BY l.target_url, l.source_url, p.status_code
|
|
21
|
-
ORDER BY occurrences DESC
|
|
22
|
-
LIMIT 100;
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
-- Missing Title Tags (CRITICAL)
|
|
2
|
-
-- Pages without title tags - catastrophic for SEO
|
|
3
|
-
-- Priority: CRITICAL
|
|
4
|
-
-- Category: critical
|
|
5
|
-
-- Impact: Pages are essentially invisible to search engines. Title tags are the most important on-page SEO element and their absence prevents proper indexing and ranking.
|
|
6
|
-
-- Fix: Add unique, descriptive title tags (50-60 characters) to all pages immediately. Include primary keywords and brand name where appropriate.
|
|
7
|
-
|
|
8
|
-
SELECT
|
|
9
|
-
url,
|
|
10
|
-
word_count,
|
|
11
|
-
heading_count_h1
|
|
12
|
-
FROM pages
|
|
13
|
-
WHERE (title IS NULL OR title = '' OR TRIM(title) = '')
|
|
14
|
-
AND status_code = 200
|
|
15
|
-
AND depth <= 5
|
|
16
|
-
ORDER BY word_count DESC
|
|
17
|
-
LIMIT 100;
|