@houtini/seo-crawler-mcp 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +59 -0
- package/LICENSE +190 -0
- package/NOTICE +8 -0
- package/README.md +694 -0
- package/build/analyzers/QueryLoader.d.ts +30 -0
- package/build/analyzers/QueryLoader.d.ts.map +1 -0
- package/build/analyzers/QueryLoader.js +126 -0
- package/build/analyzers/QueryLoader.js.map +1 -0
- package/build/cli.d.ts +3 -0
- package/build/cli.d.ts.map +1 -0
- package/build/cli.js +190 -0
- package/build/cli.js.map +1 -0
- package/build/core/ContentExtractor.d.ts +30 -0
- package/build/core/ContentExtractor.d.ts.map +1 -0
- package/build/core/ContentExtractor.js +362 -0
- package/build/core/ContentExtractor.js.map +1 -0
- package/build/core/CrawlDatabase.d.ts +25 -0
- package/build/core/CrawlDatabase.d.ts.map +1 -0
- package/build/core/CrawlDatabase.js +603 -0
- package/build/core/CrawlDatabase.js.map +1 -0
- package/build/core/CrawlOrchestrator.d.ts +27 -0
- package/build/core/CrawlOrchestrator.d.ts.map +1 -0
- package/build/core/CrawlOrchestrator.js +279 -0
- package/build/core/CrawlOrchestrator.js.map +1 -0
- package/build/core/CrawlStorage.d.ts +33 -0
- package/build/core/CrawlStorage.d.ts.map +1 -0
- package/build/core/CrawlStorage.js +94 -0
- package/build/core/CrawlStorage.js.map +1 -0
- package/build/core/LinkExtractor.d.ts +14 -0
- package/build/core/LinkExtractor.d.ts.map +1 -0
- package/build/core/LinkExtractor.js +91 -0
- package/build/core/LinkExtractor.js.map +1 -0
- package/build/core/UrlManager.d.ts +21 -0
- package/build/core/UrlManager.d.ts.map +1 -0
- package/build/core/UrlManager.js +87 -0
- package/build/core/UrlManager.js.map +1 -0
- package/build/formatters/structured-report-format.d.ts +48 -0
- package/build/formatters/structured-report-format.d.ts.map +1 -0
- package/build/formatters/structured-report-format.js +145 -0
- package/build/formatters/structured-report-format.js.map +1 -0
- package/build/index.d.ts +3 -0
- package/build/index.d.ts.map +1 -0
- package/build/index.js +214 -0
- package/build/index.js.map +1 -0
- package/build/schema/index.d.ts +627 -0
- package/build/schema/index.d.ts.map +1 -0
- package/build/schema/index.js +159 -0
- package/build/schema/index.js.map +1 -0
- package/build/tools/analyze-seo.d.ts +44 -0
- package/build/tools/analyze-seo.d.ts.map +1 -0
- package/build/tools/analyze-seo.js +110 -0
- package/build/tools/analyze-seo.js.map +1 -0
- package/build/tools/list-queries.d.ts +28 -0
- package/build/tools/list-queries.d.ts.map +1 -0
- package/build/tools/list-queries.js +30 -0
- package/build/tools/list-queries.js.map +1 -0
- package/build/tools/query-seo-data.d.ts +15 -0
- package/build/tools/query-seo-data.d.ts.map +1 -0
- package/build/tools/query-seo-data.js +43 -0
- package/build/tools/query-seo-data.js.map +1 -0
- package/build/tools/run-seo-audit.d.ts +3 -0
- package/build/tools/run-seo-audit.d.ts.map +1 -0
- package/build/tools/run-seo-audit.js +54 -0
- package/build/tools/run-seo-audit.js.map +1 -0
- package/build/types/index.d.ts +158 -0
- package/build/types/index.d.ts.map +1 -0
- package/build/types/index.js +2 -0
- package/build/types/index.js.map +1 -0
- package/build/utils/debug.d.ts +2 -0
- package/build/utils/debug.d.ts.map +1 -0
- package/build/utils/debug.js +7 -0
- package/build/utils/debug.js.map +1 -0
- package/package.json +49 -0
- package/server.json +31 -0
- package/src/analyzers/QueryLoader.ts +175 -0
- package/src/analyzers/queries/README.md +228 -0
- package/src/analyzers/queries/content/duplicate-h1.sql +18 -0
- package/src/analyzers/queries/content/duplicate-meta-descriptions.sql +18 -0
- package/src/analyzers/queries/content/duplicate-titles.sql +19 -0
- package/src/analyzers/queries/content/missing-h1.sql +18 -0
- package/src/analyzers/queries/content/missing-meta-descriptions.sql +19 -0
- package/src/analyzers/queries/content/multiple-h1.sql +17 -0
- package/src/analyzers/queries/content/thin-content.sql +18 -0
- package/src/analyzers/queries/critical/404-errors.sql +14 -0
- package/src/analyzers/queries/critical/broken-internal-links.sql +20 -0
- package/src/analyzers/queries/critical/missing-titles.sql +17 -0
- package/src/analyzers/queries/critical/server-errors.sql +15 -0
- package/src/analyzers/queries/opportunities/high-external-links.sql +18 -0
- package/src/analyzers/queries/opportunities/meta-description-length.sql +27 -0
- package/src/analyzers/queries/opportunities/missing-images.sql +18 -0
- package/src/analyzers/queries/opportunities/no-outbound-links.sql +18 -0
- package/src/analyzers/queries/opportunities/title-equals-h1.sql +21 -0
- package/src/analyzers/queries/opportunities/title-length.sql +27 -0
- package/src/analyzers/queries/security/missing-csp.sql +16 -0
- package/src/analyzers/queries/security/missing-hsts.sql +17 -0
- package/src/analyzers/queries/security/missing-referrer-policy.sql +16 -0
- package/src/analyzers/queries/security/missing-x-frame-options.sql +16 -0
- package/src/analyzers/queries/security/protocol-relative-links.sql +16 -0
- package/src/analyzers/queries/security/unsafe-external-links.sql +17 -0
- package/src/analyzers/queries/technical/canonical-issues.sql +20 -0
- package/src/analyzers/queries/technical/heading-hierarchy-issues.sql +19 -0
- package/src/analyzers/queries/technical/non-https.sql +16 -0
- package/src/analyzers/queries/technical/orphan-pages.sql +21 -0
- package/src/analyzers/queries/technical/redirects.sql +15 -0
- package/src/cli.ts +224 -0
- package/src/core/ContentExtractor.ts +480 -0
- package/src/core/CrawlDatabase.ts +736 -0
- package/src/core/CrawlOrchestrator.ts +346 -0
- package/src/core/CrawlStorage.ts +148 -0
- package/src/core/LinkExtractor.ts +123 -0
- package/src/core/UrlManager.ts +114 -0
- package/src/formatters/structured-report-format.ts +254 -0
- package/src/index.ts +259 -0
- package/src/schema/index.ts +176 -0
- package/src/tools/analyze-seo.ts +184 -0
- package/src/tools/list-queries.ts +70 -0
- package/src/tools/query-seo-data.ts +77 -0
- package/src/tools/run-seo-audit.ts +83 -0
- package/src/types/index.ts +179 -0
- package/src/utils/debug.ts +12 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
export interface CrawlConfig {
|
|
2
|
+
crawlId: string;
|
|
3
|
+
startUrl: string;
|
|
4
|
+
maxDepth: number;
|
|
5
|
+
maxPages: number;
|
|
6
|
+
userAgent: 'chrome' | 'googlebot';
|
|
7
|
+
crawlExternal: boolean;
|
|
8
|
+
respectRobots: boolean;
|
|
9
|
+
concurrency: number;
|
|
10
|
+
delay: number;
|
|
11
|
+
timeout: number;
|
|
12
|
+
includeExtensions: string[];
|
|
13
|
+
excludeExtensions: string[];
|
|
14
|
+
includePatterns: string[];
|
|
15
|
+
excludePatterns: string[];
|
|
16
|
+
outputPath: string;
|
|
17
|
+
createdAt: string;
|
|
18
|
+
}
|
|
19
|
+
export interface CrawlMetadata {
|
|
20
|
+
crawlId: string;
|
|
21
|
+
status: 'queued' | 'running' | 'paused' | 'completed' | 'failed';
|
|
22
|
+
startedAt: string | null;
|
|
23
|
+
completedAt: string | null;
|
|
24
|
+
duration: number | null;
|
|
25
|
+
stats: {
|
|
26
|
+
discovered: number;
|
|
27
|
+
crawled: number;
|
|
28
|
+
failed: number;
|
|
29
|
+
skipped: number;
|
|
30
|
+
depth: number;
|
|
31
|
+
speed: number;
|
|
32
|
+
};
|
|
33
|
+
errors: CrawlError[];
|
|
34
|
+
}
|
|
35
|
+
export interface CrawlError {
|
|
36
|
+
url: string;
|
|
37
|
+
errorType: 'timeout' | 'dns' | 'connection' | 'ssl' | 'auth' | 'not_found' | 'rate_limit' | 'server_error' | 'network' | 'parse' | 'unknown';
|
|
38
|
+
message: string;
|
|
39
|
+
timestamp: string;
|
|
40
|
+
}
|
|
41
|
+
export interface PageData {
|
|
42
|
+
url: string;
|
|
43
|
+
crawlId: string;
|
|
44
|
+
statusCode: number;
|
|
45
|
+
contentType: string;
|
|
46
|
+
responseTime: number;
|
|
47
|
+
size: number;
|
|
48
|
+
redirects: Redirect[];
|
|
49
|
+
depth: number;
|
|
50
|
+
isInternal: boolean;
|
|
51
|
+
linkedFrom: string[];
|
|
52
|
+
title: string;
|
|
53
|
+
metaDescription: string;
|
|
54
|
+
h1: string;
|
|
55
|
+
h2: string[];
|
|
56
|
+
h3: string[];
|
|
57
|
+
wordCount: number;
|
|
58
|
+
lang: string;
|
|
59
|
+
charset: string;
|
|
60
|
+
metaTags: Record<string, string>;
|
|
61
|
+
viewport: string;
|
|
62
|
+
robots: string;
|
|
63
|
+
author: string;
|
|
64
|
+
keywords: string;
|
|
65
|
+
generator: string;
|
|
66
|
+
themeColor: string;
|
|
67
|
+
canonicalUrl: string;
|
|
68
|
+
jsonLd: any[];
|
|
69
|
+
schemaOrg: SchemaItem[];
|
|
70
|
+
ogTags: Record<string, string>;
|
|
71
|
+
twitterTags: Record<string, string>;
|
|
72
|
+
images: ImageData[];
|
|
73
|
+
internalLinks: number;
|
|
74
|
+
externalLinks: number;
|
|
75
|
+
hreflang: HreflangLink[];
|
|
76
|
+
securityHeaders: {
|
|
77
|
+
contentSecurityPolicy: string | null;
|
|
78
|
+
strictTransportSecurity: string | null;
|
|
79
|
+
xFrameOptions: string | null;
|
|
80
|
+
referrerPolicy: string | null;
|
|
81
|
+
};
|
|
82
|
+
headingCounts: {
|
|
83
|
+
h1: number;
|
|
84
|
+
h2: number;
|
|
85
|
+
h3: number;
|
|
86
|
+
h4: number;
|
|
87
|
+
h5: number;
|
|
88
|
+
h6: number;
|
|
89
|
+
};
|
|
90
|
+
headingHierarchy: string[];
|
|
91
|
+
headingSequentialErrors: string[];
|
|
92
|
+
linkMetrics: {
|
|
93
|
+
externalTargetBlankCount: number;
|
|
94
|
+
externalTargetBlankNoRelCount: number;
|
|
95
|
+
protocolRelativeLinksCount: number;
|
|
96
|
+
};
|
|
97
|
+
analytics: {
|
|
98
|
+
googleAnalytics: boolean;
|
|
99
|
+
gtag: boolean;
|
|
100
|
+
ga4Id: string;
|
|
101
|
+
gtmId: string;
|
|
102
|
+
facebookPixel: boolean;
|
|
103
|
+
hotjar: boolean;
|
|
104
|
+
mixpanel: boolean;
|
|
105
|
+
};
|
|
106
|
+
crawledAt: string;
|
|
107
|
+
error: string | null;
|
|
108
|
+
}
|
|
109
|
+
export interface Redirect {
|
|
110
|
+
from: string;
|
|
111
|
+
to: string;
|
|
112
|
+
statusCode: number;
|
|
113
|
+
}
|
|
114
|
+
export interface SchemaItem {
|
|
115
|
+
type: string;
|
|
116
|
+
properties: Record<string, string>;
|
|
117
|
+
}
|
|
118
|
+
export interface ImageData {
|
|
119
|
+
src: string;
|
|
120
|
+
alt: string;
|
|
121
|
+
width: number | null;
|
|
122
|
+
height: number | null;
|
|
123
|
+
}
|
|
124
|
+
export interface HreflangLink {
|
|
125
|
+
lang: string;
|
|
126
|
+
url: string;
|
|
127
|
+
}
|
|
128
|
+
export interface LinkData {
|
|
129
|
+
crawlId: string;
|
|
130
|
+
sourceUrl: string;
|
|
131
|
+
targetUrl: string;
|
|
132
|
+
anchorText: string;
|
|
133
|
+
isInternal: boolean;
|
|
134
|
+
targetDomain: string;
|
|
135
|
+
targetStatus: number | null;
|
|
136
|
+
placement: 'navigation' | 'footer' | 'body';
|
|
137
|
+
discoveredAt: string;
|
|
138
|
+
}
|
|
139
|
+
export interface RunSeoAuditInput {
|
|
140
|
+
url: string;
|
|
141
|
+
maxPages?: number;
|
|
142
|
+
depth?: number;
|
|
143
|
+
userAgent?: 'chrome' | 'googlebot';
|
|
144
|
+
}
|
|
145
|
+
export interface RunSeoAuditOutput {
|
|
146
|
+
crawlId: string;
|
|
147
|
+
outputPath: string;
|
|
148
|
+
status: string;
|
|
149
|
+
stats: {
|
|
150
|
+
discovered: number;
|
|
151
|
+
crawled: number;
|
|
152
|
+
failed: number;
|
|
153
|
+
skipped: number;
|
|
154
|
+
depth: number;
|
|
155
|
+
speed: number;
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,QAAQ,GAAG,WAAW,CAAC;IAClC,aAAa,EAAE,OAAO,CAAC;IACvB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,QAAQ,GAAG,WAAW,GAAG,QAAQ,CAAC;IACjE,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,KAAK,EAAE;QACL,UAAU,EAAE,MAAM,CAAC;QACnB,OAAO,EAAE,MAAM,CAAC;QAChB,MAAM,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;IACF,MAAM,EAAE,UAAU,EAAE,CAAC;CACtB;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,SAAS,GAAG,KAAK,GAAG,YAAY,GAAG,KAAK,GAAG,MAAM,GAAG,WAAW,GAAG,YAAY,GAAG,cAAc,GAAG,SAAS,GAAG,OAAO,GAAG,SAAS,CAAC;IAC7I,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,QAAQ;IACvB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,OAAO,CAAC;IACpB,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,eAAe,EAAE,MAAM,CAAC;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,EAAE,EAAE,MAAM,EAAE,CAAC;IACb,EAAE,EAAE,MAAM,EAAE,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,GAAG,EAAE,CAAC;IACd,SAAS,EAAE,UAAU,EAAE,CAAC;IACxB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/B,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACpC,MAAM,EAAE,SAAS,EAAE,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,YAAY,EAAE,CAAC;IAGzB,eAAe,EAAE;QACf,qBAAqB,EAAE,MAAM,GAAG,IAAI,CAAC;QACrC,uBAAuB,EAAE,MAAM,GAAG,IAAI,CAAC;QACvC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;QAC7B,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;KAC/B,CAAC;IAGF,aAAa,EAAE;QACb,EAAE,EAAE,MAAM,CAAC;QACX,EAAE,EAAE,MAAM,CAAC;QACX,EAAE,EAAE,MAAM,CAAC;QACX,EAAE,EAAE,MAAM,CAAC;QACX,EAAE,EAAE,MAAM,CAAC;QACX,EAAE,EAAE,MAAM,CAAC;KACZ,CAAC;IACF,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,uBAAuB,EAAE,MAAM,EAAE,CAAC;IAGlC,WAAW,EAAE;QACX,wBAAwB,EAAE,MAAM,CAAC;QACjC,6BAA6B,EAAE,MAAM,CAAC;QACtC,0BAA0B,EAAE,MAAM,CAAC;KACpC,CAAC;IAEF,SAAS,EAAE;QACT,eAAe,EAAE,OAAO,CAAC;QACzB,IAAI,EAAE,OAAO,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,aAAa,EAAE,OAAO,CAAC;QACvB,MAAM,EAAE,OAAO,CAAC;QAChB,QAAQ,EAAE,OAAO,CAAC;KACnB,CAAC;IACF,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;CACtB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,MAAM,CAAC;IACX,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACpC;AAED,MAAM,WAAW,SAAS;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;CACvB;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,OAAO,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC5C,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,gBAAgB;IAC/B,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,QAAQ,GAAG,WAAW,CAAC;CACpC;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE;QACL,UAAU,EAAE,MAAM,CAAC;QACnB,OAAO,EAAE,MAAM,CAAC;QAChB,MAAM,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;CACH"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"debug.d.ts","sourceRoot":"","sources":["../../src/utils/debug.ts"],"names":[],"mappings":"AAOA,wBAAgB,KAAK,CAAC,GAAG,IAAI,EAAE,GAAG,EAAE,GAAG,IAAI,CAI1C"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"debug.js","sourceRoot":"","sources":["../../src/utils/debug.ts"],"names":[],"mappings":"AAKA,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,MAAM,CAAC;AAEnD,MAAM,UAAU,KAAK,CAAC,GAAG,IAAW;IAClC,IAAI,aAAa,EAAE,CAAC;QAClB,OAAO,CAAC,KAAK,CAAC,SAAS,EAAE,GAAG,IAAI,CAAC,CAAC;IACpC,CAAC;AACH,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@houtini/seo-crawler-mcp",
|
|
3
|
+
"version": "2.0.1",
|
|
4
|
+
"mcpName": "io.github.houtini-ai/seo-crawler-mcp",
|
|
5
|
+
"description": "Crawl and analyse websites for SEO errors and issues using Crawlee with SQLite storage",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"main": "build/index.js",
|
|
8
|
+
"bin": {
|
|
9
|
+
"seo-crawler-mcp": "build/cli.js"
|
|
10
|
+
},
|
|
11
|
+
"author": "Richard Baxter <hello@houtini.com>",
|
|
12
|
+
"license": "Apache-2.0",
|
|
13
|
+
"homepage": "https://github.com/houtini-ai/seo-crawler-mcp",
|
|
14
|
+
"repository": {
|
|
15
|
+
"type": "git",
|
|
16
|
+
"url": "https://github.com/houtini-ai/seo-crawler-mcp.git"
|
|
17
|
+
},
|
|
18
|
+
"keywords": [
|
|
19
|
+
"mcp",
|
|
20
|
+
"seo",
|
|
21
|
+
"crawler",
|
|
22
|
+
"crawlee",
|
|
23
|
+
"audit",
|
|
24
|
+
"model-context-protocol"
|
|
25
|
+
],
|
|
26
|
+
"engines": {
|
|
27
|
+
"node": ">=18.0.0"
|
|
28
|
+
},
|
|
29
|
+
"scripts": {
|
|
30
|
+
"build": "tsc",
|
|
31
|
+
"dev": "tsc --watch",
|
|
32
|
+
"start": "node build/index.js",
|
|
33
|
+
"test": "npm run build && echo 'Build successful - all checks passed'",
|
|
34
|
+
"clean": "rm -rf build"
|
|
35
|
+
},
|
|
36
|
+
"dependencies": {
|
|
37
|
+
"@crawlee/memory-storage": "^3.7.0",
|
|
38
|
+
"@modelcontextprotocol/sdk": "^1.0.4",
|
|
39
|
+
"@types/better-sqlite3": "^7.6.13",
|
|
40
|
+
"better-sqlite3": "^12.6.2",
|
|
41
|
+
"cheerio": "^1.0.0-rc.12",
|
|
42
|
+
"crawlee": "^3.7.0",
|
|
43
|
+
"zod": "^3.25.1"
|
|
44
|
+
},
|
|
45
|
+
"devDependencies": {
|
|
46
|
+
"@types/node": "^20.11.0",
|
|
47
|
+
"typescript": "^5.3.3"
|
|
48
|
+
}
|
|
49
|
+
}
|
package/server.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "v0.1.0",
|
|
3
|
+
"name": "io.github.houtini-ai/seo-crawler-mcp",
|
|
4
|
+
"description": "Crawl and analyse websites for SEO errors and issues using Crawlee with SQLite storage",
|
|
5
|
+
"version": "2.0.1",
|
|
6
|
+
"license": "Apache-2.0",
|
|
7
|
+
"sourceUrl": "https://github.com/houtini-ai/seo-crawler-mcp",
|
|
8
|
+
"packages": [
|
|
9
|
+
{
|
|
10
|
+
"type": "npm",
|
|
11
|
+
"name": "@houtini/seo-crawler-mcp",
|
|
12
|
+
"version": "2.0.1"
|
|
13
|
+
}
|
|
14
|
+
],
|
|
15
|
+
"vendor": {
|
|
16
|
+
"name": "Houtini",
|
|
17
|
+
"url": "https://houtini.ai"
|
|
18
|
+
},
|
|
19
|
+
"environmentVariables": [
|
|
20
|
+
{
|
|
21
|
+
"name": "OUTPUT_DIR",
|
|
22
|
+
"description": "Directory where crawl results are saved",
|
|
23
|
+
"required": true
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"name": "DEBUG",
|
|
27
|
+
"description": "Enable verbose debug logging (set to 'true' to enable)",
|
|
28
|
+
"required": false
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
|
|
5
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
6
|
+
const __dirname = path.dirname(__filename);
|
|
7
|
+
|
|
8
|
+
export interface QueryMetadata {
|
|
9
|
+
name: string;
|
|
10
|
+
category: 'critical' | 'content' | 'technical' | 'security' | 'opportunities';
|
|
11
|
+
priority: 'CRITICAL' | 'HIGH' | 'MEDIUM' | 'LOW';
|
|
12
|
+
description: string;
|
|
13
|
+
impact: string;
|
|
14
|
+
fix: string;
|
|
15
|
+
sql: string;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function findQueriesPath(): string {
|
|
19
|
+
// Try multiple possible locations
|
|
20
|
+
const candidates = [
|
|
21
|
+
// When running from compiled build/analyzers/QueryLoader.js
|
|
22
|
+
path.join(__dirname, 'queries'),
|
|
23
|
+
// When running from src/analyzers/QueryLoader.ts
|
|
24
|
+
path.join(__dirname, '..', '..', 'src', 'analyzers', 'queries'),
|
|
25
|
+
// When running from project root
|
|
26
|
+
path.join(process.cwd(), 'src', 'analyzers', 'queries'),
|
|
27
|
+
// When running tests from project root
|
|
28
|
+
path.join(process.cwd(), 'build', 'analyzers', 'queries'),
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
for (const candidate of candidates) {
|
|
32
|
+
if (fs.existsSync(candidate)) {
|
|
33
|
+
return candidate;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
throw new Error(`Could not find queries directory. Tried: ${candidates.join(', ')}`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export class QueryLoader {
|
|
41
|
+
private queriesPath: string;
|
|
42
|
+
private queries: Map<string, QueryMetadata>;
|
|
43
|
+
|
|
44
|
+
constructor() {
|
|
45
|
+
this.queriesPath = findQueriesPath();
|
|
46
|
+
this.queries = new Map();
|
|
47
|
+
this.loadAllQueries();
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
private loadAllQueries(): void {
|
|
51
|
+
const categories = ['critical', 'content', 'technical', 'security', 'opportunities'];
|
|
52
|
+
|
|
53
|
+
for (const category of categories) {
|
|
54
|
+
const categoryPath = path.join(this.queriesPath, category);
|
|
55
|
+
|
|
56
|
+
if (!fs.existsSync(categoryPath)) {
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const files = fs.readdirSync(categoryPath).filter(f => f.endsWith('.sql'));
|
|
61
|
+
|
|
62
|
+
for (const file of files) {
|
|
63
|
+
const filePath = path.join(categoryPath, file);
|
|
64
|
+
const queryName = file.replace('.sql', '');
|
|
65
|
+
const metadata = this.parseQueryFile(filePath, queryName, category as any);
|
|
66
|
+
|
|
67
|
+
this.queries.set(queryName, metadata);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
private parseQueryFile(
|
|
73
|
+
filePath: string,
|
|
74
|
+
name: string,
|
|
75
|
+
category: QueryMetadata['category']
|
|
76
|
+
): QueryMetadata {
|
|
77
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
78
|
+
const lines = content.split('\n');
|
|
79
|
+
|
|
80
|
+
let description = '';
|
|
81
|
+
let priority: QueryMetadata['priority'] = 'MEDIUM';
|
|
82
|
+
let impact = '';
|
|
83
|
+
let fix = '';
|
|
84
|
+
const sqlLines: string[] = [];
|
|
85
|
+
let inSQL = false;
|
|
86
|
+
|
|
87
|
+
for (const line of lines) {
|
|
88
|
+
const trimmed = line.trim();
|
|
89
|
+
|
|
90
|
+
if (trimmed.startsWith('--')) {
|
|
91
|
+
const comment = trimmed.substring(2).trim();
|
|
92
|
+
|
|
93
|
+
if (comment.startsWith('Priority:')) {
|
|
94
|
+
priority = comment.replace('Priority:', '').trim() as QueryMetadata['priority'];
|
|
95
|
+
} else if (comment.startsWith('Impact:')) {
|
|
96
|
+
impact = comment.replace('Impact:', '').trim();
|
|
97
|
+
} else if (comment.startsWith('Fix:')) {
|
|
98
|
+
fix = comment.replace('Fix:', '').trim();
|
|
99
|
+
} else if (comment && !comment.startsWith('Category:')) {
|
|
100
|
+
if (!description) {
|
|
101
|
+
description = comment;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
} else if (trimmed.startsWith('SELECT')) {
|
|
105
|
+
inSQL = true;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (inSQL) {
|
|
109
|
+
sqlLines.push(line);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return {
|
|
114
|
+
name,
|
|
115
|
+
category,
|
|
116
|
+
priority,
|
|
117
|
+
description,
|
|
118
|
+
impact,
|
|
119
|
+
fix,
|
|
120
|
+
sql: sqlLines.join('\n').trim()
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
getQuery(name: string): QueryMetadata | undefined {
|
|
125
|
+
return this.queries.get(name);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
getAllQueries(): QueryMetadata[] {
|
|
129
|
+
return Array.from(this.queries.values());
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
getQueriesByCategory(category: QueryMetadata['category']): QueryMetadata[] {
|
|
133
|
+
return this.getAllQueries().filter(q => q.category === category);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
getQueriesByPriority(priority: QueryMetadata['priority']): QueryMetadata[] {
|
|
137
|
+
return this.getAllQueries().filter(q => q.priority === priority);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
getCriticalQueries(): QueryMetadata[] {
|
|
141
|
+
return this.getQueriesByPriority('CRITICAL');
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
getHighPriorityQueries(): QueryMetadata[] {
|
|
145
|
+
return this.getQueriesByPriority('HIGH');
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
listQueryNames(): string[] {
|
|
149
|
+
return Array.from(this.queries.keys()).sort();
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
getQueryStats(): {
|
|
153
|
+
total: number;
|
|
154
|
+
byCategory: Record<string, number>;
|
|
155
|
+
byPriority: Record<string, number>;
|
|
156
|
+
} {
|
|
157
|
+
const queries = this.getAllQueries();
|
|
158
|
+
|
|
159
|
+
const byCategory: Record<string, number> = {};
|
|
160
|
+
const byPriority: Record<string, number> = {};
|
|
161
|
+
|
|
162
|
+
for (const query of queries) {
|
|
163
|
+
byCategory[query.category] = (byCategory[query.category] || 0) + 1;
|
|
164
|
+
byPriority[query.priority] = (byPriority[query.priority] || 0) + 1;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
total: queries.length,
|
|
169
|
+
byCategory,
|
|
170
|
+
byPriority
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
export const queryLoader = new QueryLoader();
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# SEO Analysis Query Library
|
|
2
|
+
|
|
3
|
+
**Version:** 1.0.0
|
|
4
|
+
**Last Updated:** 2026-02-01
|
|
5
|
+
**Coverage:** 25 detectable SEO issues
|
|
6
|
+
|
|
7
|
+
## Query Organization
|
|
8
|
+
|
|
9
|
+
All queries follow a standard format:
|
|
10
|
+
- SQL comments describing the issue
|
|
11
|
+
- Priority level (CRITICAL, HIGH, MEDIUM, LOW)
|
|
12
|
+
- Category classification
|
|
13
|
+
- Optimized SELECT statements with ORDER BY and LIMIT
|
|
14
|
+
- Results limited to 100 rows for performance
|
|
15
|
+
|
|
16
|
+
## Critical Issues (4 queries)
|
|
17
|
+
|
|
18
|
+
**Indexability issues that must be fixed immediately**
|
|
19
|
+
|
|
20
|
+
1. **missing-titles.sql** - Pages without title tags
|
|
21
|
+
- Priority: CRITICAL
|
|
22
|
+
- Impact: Major indexability problem
|
|
23
|
+
- Fix: Add unique, descriptive title tags
|
|
24
|
+
|
|
25
|
+
2. **broken-internal-links.sql** - Internal links to 404/5xx pages
|
|
26
|
+
- Priority: CRITICAL
|
|
27
|
+
- Impact: Poor user experience, crawl budget waste
|
|
28
|
+
- Fix: Update or remove broken links
|
|
29
|
+
|
|
30
|
+
3. **server-errors.sql** - Pages returning 5xx errors
|
|
31
|
+
- Priority: CRITICAL
|
|
32
|
+
- Impact: Prevents indexing
|
|
33
|
+
- Fix: Debug server issues immediately
|
|
34
|
+
|
|
35
|
+
4. **404-errors.sql** - Pages not found
|
|
36
|
+
- Priority: CRITICAL
|
|
37
|
+
- Impact: Dead ends for users and crawlers
|
|
38
|
+
- Fix: Redirect to relevant pages or restore content
|
|
39
|
+
|
|
40
|
+
## Content Quality Issues (7 queries)
|
|
41
|
+
|
|
42
|
+
**Problems with page content and metadata**
|
|
43
|
+
|
|
44
|
+
5. **duplicate-titles.sql** - Multiple pages with same title
|
|
45
|
+
- Priority: HIGH
|
|
46
|
+
- Impact: Cannibalization, poor CTR
|
|
47
|
+
- Fix: Create unique titles for each page
|
|
48
|
+
|
|
49
|
+
6. **duplicate-meta-descriptions.sql** - Duplicate meta descriptions
|
|
50
|
+
- Priority: MEDIUM
|
|
51
|
+
- Impact: Reduced CTR, missed opportunities
|
|
52
|
+
- Fix: Write unique descriptions
|
|
53
|
+
|
|
54
|
+
7. **missing-meta-descriptions.sql** - Pages without descriptions
|
|
55
|
+
- Priority: MEDIUM
|
|
56
|
+
- Impact: Search engines auto-generate poor snippets
|
|
57
|
+
- Fix: Add compelling meta descriptions
|
|
58
|
+
|
|
59
|
+
8. **thin-content.sql** - Pages with < 300 words
|
|
60
|
+
- Priority: MEDIUM
|
|
61
|
+
- Impact: Low quality signals
|
|
62
|
+
- Fix: Expand content or consolidate pages
|
|
63
|
+
|
|
64
|
+
9. **missing-h1.sql** - Pages without H1 tags
|
|
65
|
+
- Priority: HIGH
|
|
66
|
+
- Impact: Unclear page topic
|
|
67
|
+
- Fix: Add descriptive H1 tags
|
|
68
|
+
|
|
69
|
+
10. **multiple-h1.sql** - Pages with multiple H1 tags
|
|
70
|
+
- Priority: MEDIUM
|
|
71
|
+
- Impact: Diluted topical focus
|
|
72
|
+
- Fix: Use single H1 per page
|
|
73
|
+
|
|
74
|
+
11. **duplicate-h1.sql** - Multiple pages with same H1
|
|
75
|
+
- Priority: MEDIUM
|
|
76
|
+
- Impact: Content cannibalization
|
|
77
|
+
- Fix: Differentiate H1 tags
|
|
78
|
+
|
|
79
|
+
## Technical SEO Issues (5 queries)
|
|
80
|
+
|
|
81
|
+
**Infrastructure and architecture problems**
|
|
82
|
+
|
|
83
|
+
12. **redirects.sql** - Pages with 3xx redirect status
|
|
84
|
+
- Priority: MEDIUM
|
|
85
|
+
- Impact: Crawl budget waste, slow page speed
|
|
86
|
+
- Fix: Update links to final destinations
|
|
87
|
+
|
|
88
|
+
13. **orphan-pages.sql** - Pages with no internal links
|
|
89
|
+
- Priority: MEDIUM
|
|
90
|
+
- Impact: Difficult to discover and crawl
|
|
91
|
+
- Fix: Add internal links from related pages
|
|
92
|
+
|
|
93
|
+
14. **canonical-issues.sql** - Canonical URL differs from actual URL
|
|
94
|
+
- Priority: MEDIUM
|
|
95
|
+
- Impact: Duplicate content confusion
|
|
96
|
+
- Fix: Review canonical implementation
|
|
97
|
+
|
|
98
|
+
15. **non-https.sql** - Pages not using HTTPS
|
|
99
|
+
- Priority: MEDIUM
|
|
100
|
+
- Impact: Security warnings, ranking penalty
|
|
101
|
+
- Fix: Migrate to HTTPS
|
|
102
|
+
|
|
103
|
+
16. **heading-hierarchy-issues.sql** - Non-sequential headings
|
|
104
|
+
- Priority: MEDIUM
|
|
105
|
+
- Impact: Poor document structure
|
|
106
|
+
- Fix: Correct heading order (h1 → h2 → h3)
|
|
107
|
+
|
|
108
|
+
## Security Issues (6 queries)
|
|
109
|
+
|
|
110
|
+
**Security header and link security problems**
|
|
111
|
+
|
|
112
|
+
17. **missing-hsts.sql** - No Strict-Transport-Security header
|
|
113
|
+
- Priority: HIGH
|
|
114
|
+
- Impact: HTTPS downgrade attacks possible
|
|
115
|
+
- Fix: Add HSTS header to server config
|
|
116
|
+
|
|
117
|
+
18. **missing-csp.sql** - No Content-Security-Policy header
|
|
118
|
+
- Priority: MEDIUM
|
|
119
|
+
- Impact: XSS vulnerability
|
|
120
|
+
- Fix: Implement CSP header
|
|
121
|
+
|
|
122
|
+
19. **missing-x-frame-options.sql** - No X-Frame-Options header
|
|
123
|
+
- Priority: MEDIUM
|
|
124
|
+
- Impact: Clickjacking vulnerability
|
|
125
|
+
- Fix: Add X-Frame-Options: DENY
|
|
126
|
+
|
|
127
|
+
20. **missing-referrer-policy.sql** - No Referrer-Policy header
|
|
128
|
+
- Priority: LOW
|
|
129
|
+
- Impact: Privacy leaks
|
|
130
|
+
- Fix: Add Referrer-Policy header
|
|
131
|
+
|
|
132
|
+
21. **unsafe-external-links.sql** - target="_blank" without rel="noopener"
|
|
133
|
+
- Priority: MEDIUM
|
|
134
|
+
- Impact: Tabnabbing vulnerability
|
|
135
|
+
- Fix: Add rel="noopener noreferrer"
|
|
136
|
+
|
|
137
|
+
22. **protocol-relative-links.sql** - Links using //example.com format
|
|
138
|
+
- Priority: LOW
|
|
139
|
+
- Impact: Mixed content warnings
|
|
140
|
+
- Fix: Use absolute HTTPS URLs
|
|
141
|
+
|
|
142
|
+
## Optimization Opportunities (6 queries)
|
|
143
|
+
|
|
144
|
+
**Enhancement opportunities for better SEO**
|
|
145
|
+
|
|
146
|
+
23. **title-length.sql** - Titles too short (< 30) or too long (> 60)
|
|
147
|
+
- Priority: MEDIUM
|
|
148
|
+
- Impact: Truncated or poor SERP display
|
|
149
|
+
- Fix: Optimize title length to 30-60 characters
|
|
150
|
+
|
|
151
|
+
24. **meta-description-length.sql** - Descriptions too short/long
|
|
152
|
+
- Priority: LOW
|
|
153
|
+
- Impact: Suboptimal SERP snippets
|
|
154
|
+
- Fix: Optimize to 120-160 characters
|
|
155
|
+
|
|
156
|
+
25. **title-equals-h1.sql** - Title and H1 are identical
|
|
157
|
+
- Priority: LOW
|
|
158
|
+
- Impact: Missed keyword opportunity
|
|
159
|
+
- Fix: Differentiate title and H1 slightly
|
|
160
|
+
|
|
161
|
+
26. **no-outbound-links.sql** - Pages with no links
|
|
162
|
+
- Priority: LOW
|
|
163
|
+
- Impact: Poor user experience, low PageRank flow
|
|
164
|
+
- Fix: Add relevant internal/external links
|
|
165
|
+
|
|
166
|
+
27. **high-external-links.sql** - Pages with > 20 external links
|
|
167
|
+
- Priority: LOW
|
|
168
|
+
- Impact: Excessive PageRank dilution
|
|
169
|
+
- Fix: Review and reduce external links
|
|
170
|
+
|
|
171
|
+
28. **missing-images.sql** - Content pages without images
|
|
172
|
+
- Priority: LOW
|
|
173
|
+
- Impact: Poor engagement, no image search visibility
|
|
174
|
+
- Fix: Add relevant images with alt text
|
|
175
|
+
|
|
176
|
+
## Query Performance
|
|
177
|
+
|
|
178
|
+
All queries are optimized for SQLite with:
|
|
179
|
+
- Indexed columns (url, status_code, depth)
|
|
180
|
+
- LIMIT clauses to prevent excessive results
|
|
181
|
+
- Strategic WHERE clauses to filter early
|
|
182
|
+
- Simple JOINs where necessary
|
|
183
|
+
|
|
184
|
+
**Expected Performance:**
|
|
185
|
+
- Simple queries: < 10ms
|
|
186
|
+
- Complex queries (duplicates, orphans): < 100ms
|
|
187
|
+
- Join queries (broken links): < 200ms
|
|
188
|
+
|
|
189
|
+
## Usage Patterns
|
|
190
|
+
|
|
191
|
+
### Direct SQL Execution
|
|
192
|
+
```typescript
|
|
193
|
+
import Database from 'better-sqlite3';
|
|
194
|
+
import fs from 'fs';
|
|
195
|
+
|
|
196
|
+
const db = new Database('./crawl-data.db');
|
|
197
|
+
const query = fs.readFileSync('./queries/critical/missing-titles.sql', 'utf-8');
|
|
198
|
+
const results = db.prepare(query).all();
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### Programmatic Analysis
|
|
202
|
+
```typescript
|
|
203
|
+
import { SQLAnalyzer } from './SQLAnalyzer.js';
|
|
204
|
+
|
|
205
|
+
const analyzer = new SQLAnalyzer(crawlId);
|
|
206
|
+
const report = await analyzer.generateReport();
|
|
207
|
+
// Returns structured SEOAnalysisReport
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### MCP Tool Integration
|
|
211
|
+
```bash
|
|
212
|
+
seo-crawler-mcp:analyze_seo crawlId="431841d4"
|
|
213
|
+
# Returns JSON report with all 25 issues checked
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
## Future Enhancements
|
|
217
|
+
|
|
218
|
+
**Not yet implemented (requires additional data capture):**
|
|
219
|
+
- Core Web Vitals analysis (requires Playwright)
|
|
220
|
+
- Robots.txt validation (requires separate parser)
|
|
221
|
+
- Readability scoring (requires text analysis library)
|
|
222
|
+
- Mobile rendering issues (requires device emulation)
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
**Query Coverage: 25 Production-Ready SEO Checks**
|
|
227
|
+
**Status: Production Ready**
|
|
228
|
+
**Next Step: Build SQLAnalyzer.ts class**
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
-- Duplicate H1 Tags (MEDIUM)
|
|
2
|
+
-- Multiple pages sharing the same H1 tag
|
|
3
|
+
-- Priority: MEDIUM
|
|
4
|
+
-- Category: content
|
|
5
|
+
|
|
6
|
+
SELECT
|
|
7
|
+
h1,
|
|
8
|
+
COUNT(*) as page_count,
|
|
9
|
+
GROUP_CONCAT(url, '|||') as urls,
|
|
10
|
+
MIN(title) as example_title
|
|
11
|
+
FROM pages
|
|
12
|
+
WHERE h1 IS NOT NULL
|
|
13
|
+
AND TRIM(h1) != ''
|
|
14
|
+
AND status_code = 200
|
|
15
|
+
GROUP BY h1
|
|
16
|
+
HAVING COUNT(*) > 1
|
|
17
|
+
ORDER BY page_count DESC
|
|
18
|
+
LIMIT 50;
|