@houtini/seo-crawler-mcp 2.1.0 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/cli.js +36 -36
- package/build/core/LinkExtractor.d.ts.map +1 -1
- package/build/core/LinkExtractor.js +0 -3
- package/build/core/LinkExtractor.js.map +1 -1
- package/build/core/UrlManager.d.ts.map +1 -1
- package/build/core/UrlManager.js +0 -3
- package/build/core/UrlManager.js.map +1 -1
- package/package.json +1 -1
- package/src/analyzers/queries/critical/broken-internal-links.sql +6 -4
- package/src/analyzers/queries/opportunities/uncrawled-internal-links.sql +20 -0
- package/src/core/LinkExtractor.ts +0 -4
- package/src/core/UrlManager.ts +0 -4
- package/CROSS_PLATFORM_FIX.md +0 -102
- package/ISSUE_hardcoded_output_path.md +0 -69
- package/README_UPDATE.md +0 -66
- package/commit-msg.txt +0 -9
package/build/cli.js
CHANGED
|
@@ -12,42 +12,42 @@ async function runCli() {
|
|
|
12
12
|
const { analyzeSeo } = await import('./tools/analyze-seo.js');
|
|
13
13
|
const { listQueries } = await import('./tools/list-queries.js');
|
|
14
14
|
if (args[0] === '--help' || args[0] === '-h') {
|
|
15
|
-
console.log(`
|
|
16
|
-
SEO Crawler MCP - CLI Mode
|
|
17
|
-
|
|
18
|
-
USAGE:
|
|
19
|
-
seo-crawler-mcp crawl <url> [options] Run a crawl
|
|
20
|
-
seo-crawler-mcp analyze <path> [options] Analyze a crawl
|
|
21
|
-
seo-crawler-mcp queries [options] List available queries
|
|
22
|
-
|
|
23
|
-
CRAWL OPTIONS:
|
|
24
|
-
--max-pages=<number> Maximum pages to crawl (default: 1000)
|
|
25
|
-
--depth=<number> Maximum crawl depth (default: 3)
|
|
26
|
-
--user-agent=<chrome|googlebot> User agent (default: chrome)
|
|
27
|
-
|
|
28
|
-
ANALYZE OPTIONS:
|
|
29
|
-
--format=<structured|summary|detailed> Output format (default: structured)
|
|
30
|
-
--category=<category> Filter by category (critical, content, technical, security, opportunities)
|
|
31
|
-
--max-examples=<number> Max example URLs per issue (default: 10)
|
|
32
|
-
|
|
33
|
-
EXAMPLES:
|
|
34
|
-
# Run a crawl
|
|
35
|
-
seo-crawler-mcp crawl https://example.com --max-pages=500 --depth=5
|
|
36
|
-
|
|
37
|
-
# Analyze a crawl
|
|
38
|
-
seo-crawler-mcp analyze C:/seo-audits/example.com_2026-02-01_abc123
|
|
39
|
-
|
|
40
|
-
# List all queries
|
|
41
|
-
seo-crawler-mcp queries
|
|
42
|
-
|
|
43
|
-
# List security queries
|
|
44
|
-
seo-crawler-mcp queries --category=security
|
|
45
|
-
|
|
46
|
-
WORKFLOW:
|
|
47
|
-
1. Run crawl from terminal (for large sites or background processing)
|
|
48
|
-
2. Get the output path from crawl results
|
|
49
|
-
3. In Claude Desktop: "Analyze the crawl at <output-path>"
|
|
50
|
-
4. Claude uses MCP tools to query the SQLite database
|
|
15
|
+
console.log(`
|
|
16
|
+
SEO Crawler MCP - CLI Mode
|
|
17
|
+
|
|
18
|
+
USAGE:
|
|
19
|
+
seo-crawler-mcp crawl <url> [options] Run a crawl
|
|
20
|
+
seo-crawler-mcp analyze <path> [options] Analyze a crawl
|
|
21
|
+
seo-crawler-mcp queries [options] List available queries
|
|
22
|
+
|
|
23
|
+
CRAWL OPTIONS:
|
|
24
|
+
--max-pages=<number> Maximum pages to crawl (default: 1000)
|
|
25
|
+
--depth=<number> Maximum crawl depth (default: 3)
|
|
26
|
+
--user-agent=<chrome|googlebot> User agent (default: chrome)
|
|
27
|
+
|
|
28
|
+
ANALYZE OPTIONS:
|
|
29
|
+
--format=<structured|summary|detailed> Output format (default: structured)
|
|
30
|
+
--category=<category> Filter by category (critical, content, technical, security, opportunities)
|
|
31
|
+
--max-examples=<number> Max example URLs per issue (default: 10)
|
|
32
|
+
|
|
33
|
+
EXAMPLES:
|
|
34
|
+
# Run a crawl
|
|
35
|
+
seo-crawler-mcp crawl https://example.com --max-pages=500 --depth=5
|
|
36
|
+
|
|
37
|
+
# Analyze a crawl
|
|
38
|
+
seo-crawler-mcp analyze C:/seo-audits/example.com_2026-02-01_abc123
|
|
39
|
+
|
|
40
|
+
# List all queries
|
|
41
|
+
seo-crawler-mcp queries
|
|
42
|
+
|
|
43
|
+
# List security queries
|
|
44
|
+
seo-crawler-mcp queries --category=security
|
|
45
|
+
|
|
46
|
+
WORKFLOW:
|
|
47
|
+
1. Run crawl from terminal (for large sites or background processing)
|
|
48
|
+
2. Get the output path from crawl results
|
|
49
|
+
3. In Claude Desktop: "Analyze the crawl at <output-path>"
|
|
50
|
+
4. Claude uses MCP tools to query the SQLite database
|
|
51
51
|
`);
|
|
52
52
|
process.exit(0);
|
|
53
53
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LinkExtractor.d.ts","sourceRoot":"","sources":["../../src/core/LinkExtractor.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAC1C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAGlD,qBAAa,aAAa;IACxB,OAAO,CAAC,UAAU,CAAS;gBAEf,UAAU,EAAE,MAAM;IAI9B,OAAO,CAAC,CAAC,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,QAAQ,EAAE;IAgCtE,OAAO,CAAC,cAAc;IAKtB,OAAO,CAAC,iBAAiB;IAKzB,OAAO,CAAC,QAAQ;
|
|
1
|
+
{"version":3,"file":"LinkExtractor.d.ts","sourceRoot":"","sources":["../../src/core/LinkExtractor.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAC1C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAGlD,qBAAa,aAAa;IACxB,OAAO,CAAC,UAAU,CAAS;gBAEf,UAAU,EAAE,MAAM;IAI9B,OAAO,CAAC,CAAC,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,QAAQ,EAAE;IAgCtE,OAAO,CAAC,cAAc;IAKtB,OAAO,CAAC,iBAAiB;IAKzB,OAAO,CAAC,QAAQ;IAUhB,OAAO,CAAC,eAAe;IASvB,OAAO,CAAC,UAAU;IASlB,OAAO,CAAC,eAAe;CA0BxB"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LinkExtractor.js","sourceRoot":"","sources":["../../src/core/LinkExtractor.ts"],"names":[],"mappings":"AAaA,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAEpD,MAAM,OAAO,aAAa;IAChB,UAAU,CAAS;IAE3B,YAAY,UAAkB;QAC5B,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;IACrD,CAAC;IAED,OAAO,CAAC,CAAa,EAAE,SAAiB,EAAE,OAAe;QACvD,MAAM,KAAK,GAAe,EAAE,CAAC;QAE7B,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,EAAE,CAAC;YACxC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,EAAE,CAAC;gBACvC,OAAO;YACT,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;gBAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;gBAE1C,MAAM,QAAQ,GAAa;oBACzB,OAAO;oBACP,SAAS;oBACT,SAAS;oBACT,UAAU,EAAE,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACzC,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;oBACtC,YAAY,EAAE,QAAQ,CAAC,QAAQ;oBAC/B,YAAY,EAAE,IAAI;oBAClB,SAAS,EAAE,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACtC,YAAY,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;iBACvC,CAAC;gBAEF,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC7C,CAAC;YAAC,MAAM,CAAC,CAAA,CAAC;QACZ,CAAC,CAAC,CAAC;QAEH,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,cAAc,CAAC,IAAY;QACjC,MAAM,YAAY,GAAG,CAAC,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,aAAa,CAAC,CAAC;QAC7D,OAAO,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;IAC9D,CAAC;IAEO,iBAAiB,CAAC,GAAQ;QAChC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QAC/B,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,WAAW,CAAC;IAC3C,CAAC;IAEO,QAAQ,CAAC,GAAQ;QACvB,IAAI,KAAK,GAAG,GAAG,GAAG,CAAC,QAAQ,KAAK,GAAG,CAAC,QAAQ,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC;QAE9D,IAAI,GAAG,CAAC,MAAM,EAAE,CAAC;YACf,KAAK,IAAI,GAAG,CAAC,MAAM,CAAC;QACtB,CAAC;QAED,
|
|
1
|
+
{"version":3,"file":"LinkExtractor.js","sourceRoot":"","sources":["../../src/core/LinkExtractor.ts"],"names":[],"mappings":"AAaA,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAEpD,MAAM,OAAO,aAAa;IAChB,UAAU,CAAS;IAE3B,YAAY,UAAkB;QAC5B,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;IACrD,CAAC;IAED,OAAO,CAAC,CAAa,EAAE,SAAiB,EAAE,OAAe;QACvD,MAAM,KAAK,GAAe,EAAE,CAAC;QAE7B,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,EAAE,CAAC;YACxC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,EAAE,CAAC;gBACvC,OAAO;YACT,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;gBAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;gBAE1C,MAAM,QAAQ,GAAa;oBACzB,OAAO;oBACP,SAAS;oBACT,SAAS;oBACT,UAAU,EAAE,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACzC,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;oBACtC,YAAY,EAAE,QAAQ,CAAC,QAAQ;oBAC/B,YAAY,EAAE,IAAI;oBAClB,SAAS,EAAE,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACtC,YAAY,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;iBACvC,CAAC;gBAEF,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC7C,CAAC;YAAC,MAAM,CAAC,CAAA,CAAC;QACZ,CAAC,CAAC,CAAC;QAEH,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,cAAc,CAAC,IAAY;QACjC,MAAM,YAAY,GAAG,CAAC,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,aAAa,CAAC,CAAC;QAC7D,OAAO,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;IAC9D,CAAC;IAEO,iBAAiB,CAAC,GAAQ;QAChC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QAC/B,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,WAAW,CAAC;IAC3C,CAAC;IAEO,QAAQ,CAAC,GAAQ;QACvB,IAAI,KAAK,GAAG,GAAG,GAAG,CAAC,QAAQ,KAAK,GAAG,CAAC,QAAQ,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC;QAE9D,IAAI,GAAG,CAAC,MAAM,EAAE,CAAC;YACf,KAAK,IAAI,GAAG,CAAC,MAAM,CAAC;QACtB,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,eAAe,CAAC,MAAc;QACpC,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,MAAM,EAAE,CAAC;YACrE,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACrD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,MAAM,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAEO,UAAU,CAAC,GAAW;QAC5B,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;YAC9D,OAAO,SAAS,KAAK,IAAI,CAAC,UAAU,CAAC;QACvC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAEO,eAAe,CAAC,GAAQ;QAC9B,IAAI,OAAO,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;QAE3B,OAAO,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,CAAC;YACvD,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;YAC3D,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;YAEnD,IAAI,OAAO,KAAK,QAAQ,IAAI,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAChF,OAAO,QAAQ,CAAC;YAClB,CAAC;YAED,IAAI,OAAO,KAAK,KAAK,IAAI,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAC9C,OAAO,YAAY,CAAC;YACtB,CAAC;YAED,MAAM,WAAW,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;YAC9C,IAAI,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC;gBACnF,OAAO,YAAY,CAAC;YACtB,CAAC;YAED,OAAO,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QAC7B,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"UrlManager.d.ts","sourceRoot":"","sources":["../../src/core/UrlManager.ts"],"names":[],"mappings":"AAWA,qBAAa,UAAU;IACrB,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,UAAU,CAAkC;IACpD,OAAO,CAAC,OAAO,CAA0B;IACzC,OAAO,CAAC,cAAc,CAAuC;gBAEjD,UAAU,EAAE,MAAM;IAI9B,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;
|
|
1
|
+
{"version":3,"file":"UrlManager.d.ts","sourceRoot":"","sources":["../../src/core/UrlManager.ts"],"names":[],"mappings":"AAWA,qBAAa,UAAU;IACrB,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,UAAU,CAAkC;IACpD,OAAO,CAAC,OAAO,CAA0B;IACzC,OAAO,CAAC,cAAc,CAAuC;gBAEjD,UAAU,EAAE,MAAM;IAI9B,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;IAgBjC,OAAO,CAAC,eAAe;IASvB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAShC,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI;IAgBnE,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAK9B,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAI/B,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAIlC,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE;IAMrC,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;IAI7B,kBAAkB,IAAI,MAAM;IAI5B,eAAe,IAAI,MAAM;IAIzB,WAAW,IAAI,MAAM;IAIrB,gBAAgB,IAAI,MAAM,EAAE;CAG7B"}
|
package/build/core/UrlManager.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"UrlManager.js","sourceRoot":"","sources":["../../src/core/UrlManager.ts"],"names":[],"mappings":"AAWA,MAAM,OAAO,UAAU;IACb,UAAU,CAAS;IACnB,UAAU,GAAwB,IAAI,GAAG,EAAE,CAAC;IAC5C,OAAO,GAAgB,IAAI,GAAG,EAAE,CAAC;IACjC,cAAc,GAA6B,IAAI,GAAG,EAAE,CAAC;IAE7D,YAAY,UAAkB;QAC5B,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;IACrD,CAAC;IAED,YAAY,CAAC,GAAW;QACtB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YAE5B,IAAI,KAAK,GAAG,GAAG,MAAM,CAAC,QAAQ,KAAK,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;YAEvE,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;gBAClB,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC;YACzB,CAAC;YAED,
|
|
1
|
+
{"version":3,"file":"UrlManager.js","sourceRoot":"","sources":["../../src/core/UrlManager.ts"],"names":[],"mappings":"AAWA,MAAM,OAAO,UAAU;IACb,UAAU,CAAS;IACnB,UAAU,GAAwB,IAAI,GAAG,EAAE,CAAC;IAC5C,OAAO,GAAgB,IAAI,GAAG,EAAE,CAAC;IACjC,cAAc,GAA6B,IAAI,GAAG,EAAE,CAAC;IAE7D,YAAY,UAAkB;QAC5B,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;IACrD,CAAC;IAED,YAAY,CAAC,GAAW;QACtB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YAE5B,IAAI,KAAK,GAAG,GAAG,MAAM,CAAC,QAAQ,KAAK,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;YAEvE,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;gBAClB,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC;YACzB,CAAC;YAED,OAAO,KAAK,CAAC;QACf,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,GAAG,CAAC;QACb,CAAC;IACH,CAAC;IAEO,eAAe,CAAC,MAAc;QACpC,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,MAAM,EAAE,CAAC;YACrE,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACrD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,MAAM,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,UAAU,CAAC,GAAW;QACpB,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;YAC9D,OAAO,SAAS,KAAK,IAAI,CAAC,UAAU,CAAC;QACvC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,aAAa,CAAC,GAAW,EAAE,KAAa,EAAE,SAAkB;QAC1D,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QAE1C,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACrC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QACzC,CAAC;QAED,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,gBAAgB,GAAG,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;YACtD,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACzC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;YACjD,CAAC;YACD,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,UAAU,CAAE,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;QAC7D,CAAC;IACH,CAAC;IAED,WAAW,CAAC,GAAW;QACrB,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QAC1C,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAC/B,CAAC;IAED,SAAS,CAAC,GAAW;QACnB,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC;IAClD,CAAC;IAED,YAAY,CAAC,GAAW;QACtB,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC;IACrD,CAAC;IAED,cAAc,CAAC,GAAW;QACxB,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACpD,OAAO,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC5C,CAAC;IAED,QAAQ,CAAC,GAAW;QAClB,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;IAC1D,CAAC;IAED,kBAAkB;QAChB,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;IAC9B,CAAC;IAED,eAAe;QACb,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC;IAC3B,CAAC;IAED,WAAW;QACT,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC9D,CAAC;IAED,gBAAgB;QACd,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAClF,CAAC;CACF"}
|
package/package.json
CHANGED
|
@@ -1,20 +1,22 @@
|
|
|
1
1
|
-- Broken Internal Links (CRITICAL)
|
|
2
|
-
-- Internal links pointing to pages that
|
|
2
|
+
-- Internal links pointing to pages that actually returned HTTP errors (404, 500, etc.)
|
|
3
3
|
-- Priority: CRITICAL
|
|
4
4
|
-- Category: critical
|
|
5
5
|
-- Impact: Poor user experience, wasted crawl budget, and loss of link equity. Users encounter dead ends and search engines waste resources crawling broken links.
|
|
6
6
|
-- Fix: View URLs that link to errors using the 'inlinks' tab and export them in bulk. Update broken links to point to correct URLs or remove them. Consider implementing 301 redirects for permanently moved content.
|
|
7
|
+
-- Note: This query only reports links to pages that were crawled AND returned errors. Un-crawled links are reported separately in the opportunities category.
|
|
7
8
|
|
|
8
9
|
SELECT
|
|
9
10
|
l.source_url,
|
|
10
11
|
l.target_url,
|
|
11
12
|
l.anchor_text,
|
|
12
13
|
l.placement,
|
|
14
|
+
p.status_code,
|
|
13
15
|
COUNT(*) as occurrences
|
|
14
16
|
FROM links l
|
|
15
|
-
|
|
17
|
+
INNER JOIN pages p ON l.target_url = p.url -- Only check pages we actually crawled
|
|
16
18
|
WHERE l.is_internal = 1
|
|
17
|
-
AND
|
|
18
|
-
GROUP BY l.target_url, l.source_url
|
|
19
|
+
AND p.status_code >= 400 -- Only actual HTTP errors (404, 500, etc.)
|
|
20
|
+
GROUP BY l.target_url, l.source_url, p.status_code
|
|
19
21
|
ORDER BY occurrences DESC
|
|
20
22
|
LIMIT 100;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
-- Un-crawled Internal Links (OPPORTUNITIES)
|
|
2
|
+
-- Internal links discovered but not crawled (likely due to depth or page limits)
|
|
3
|
+
-- Priority: LOW
|
|
4
|
+
-- Category: opportunities
|
|
5
|
+
-- Impact: These links were discovered during the crawl but not visited, usually because the crawler reached its depth limit or page count limit. Not necessarily broken - just outside the crawl scope.
|
|
6
|
+
-- Fix: Review these URLs to determine if they should be included in future crawls. Consider increasing maxPages or maxDiscoveryDepth if important sections of your site are being missed.
|
|
7
|
+
|
|
8
|
+
SELECT
|
|
9
|
+
l.source_url,
|
|
10
|
+
l.target_url,
|
|
11
|
+
l.anchor_text,
|
|
12
|
+
l.placement,
|
|
13
|
+
COUNT(*) as occurrences
|
|
14
|
+
FROM links l
|
|
15
|
+
LEFT JOIN pages p ON l.target_url = p.url
|
|
16
|
+
WHERE l.is_internal = 1
|
|
17
|
+
AND p.url IS NULL -- Link discovered but page never crawled
|
|
18
|
+
GROUP BY l.target_url, l.source_url
|
|
19
|
+
ORDER BY occurrences DESC
|
|
20
|
+
LIMIT 100;
|
package/src/core/UrlManager.ts
CHANGED
package/CROSS_PLATFORM_FIX.md
DELETED
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
# Cross-Platform Output Path Fix - v2.1.0
|
|
2
|
-
|
|
3
|
-
## Summary
|
|
4
|
-
|
|
5
|
-
Fixed hardcoded Windows-only output path that prevented the MCP server from working on macOS and Linux.
|
|
6
|
-
|
|
7
|
-
## Changes Made
|
|
8
|
-
|
|
9
|
-
### 1. `src/tools/run-seo-audit.ts`
|
|
10
|
-
**Before:**
|
|
11
|
-
```typescript
|
|
12
|
-
const outputPath = `C:\\seo-audits\\${folderName}`;
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
**After:**
|
|
16
|
-
```typescript
|
|
17
|
-
import path from 'path';
|
|
18
|
-
import os from 'os';
|
|
19
|
-
|
|
20
|
-
// Cross-platform output directory resolution
|
|
21
|
-
// Priority: OUTPUT_DIR env var > home directory fallback
|
|
22
|
-
const baseDir = process.env.OUTPUT_DIR || path.join(os.homedir(), 'seo-audits');
|
|
23
|
-
const outputPath = path.join(baseDir, folderName);
|
|
24
|
-
|
|
25
|
-
debug('[MCP] Output path resolved:', outputPath);
|
|
26
|
-
```
|
|
27
|
-
|
|
28
|
-
**What this fixes:**
|
|
29
|
-
- Uses `OUTPUT_DIR` environment variable if set
|
|
30
|
-
- Falls back to user's home directory (`~/seo-audits`) on all platforms
|
|
31
|
-
- Uses Node.js `path.join()` for platform-agnostic path construction
|
|
32
|
-
- Uses `os.homedir()` to get correct home directory on any OS
|
|
33
|
-
|
|
34
|
-
### 2. `package.json`
|
|
35
|
-
- Bumped version from `2.0.2` to `2.1.0`
|
|
36
|
-
|
|
37
|
-
### 3. `src/index.ts`
|
|
38
|
-
- Updated `SERVER_VERSION` from `'2.0.1'` to `'2.1.0'`
|
|
39
|
-
- Updated startup message to include Phase 5 confirmation
|
|
40
|
-
|
|
41
|
-
### 4. `CHANGELOG.md`
|
|
42
|
-
- Added comprehensive v2.1.0 release notes
|
|
43
|
-
- Documented the cross-platform fix with examples
|
|
44
|
-
|
|
45
|
-
### 5. `README_UPDATE.md` (Created)
|
|
46
|
-
- New documentation section explaining cross-platform configuration
|
|
47
|
-
- Examples for Windows, macOS, and Linux
|
|
48
|
-
- Explains default behaviour when `OUTPUT_DIR` is not set
|
|
49
|
-
|
|
50
|
-
## Platform-Specific Defaults
|
|
51
|
-
|
|
52
|
-
**When `OUTPUT_DIR` is NOT set:**
|
|
53
|
-
- Windows: `C:\Users\YourName\seo-audits`
|
|
54
|
-
- macOS: `/Users/YourName/seo-audits`
|
|
55
|
-
- Linux: `/home/YourName/seo-audits`
|
|
56
|
-
|
|
57
|
-
**When `OUTPUT_DIR` IS set:**
|
|
58
|
-
- Uses the specified path on any platform
|
|
59
|
-
- Windows example: `C:\\custom\\path`
|
|
60
|
-
- Unix example: `/var/www/seo-audits`
|
|
61
|
-
|
|
62
|
-
## Testing
|
|
63
|
-
|
|
64
|
-
Build completed successfully:
|
|
65
|
-
```
|
|
66
|
-
> @houtini/seo-crawler-mcp@2.1.0 build
|
|
67
|
-
> tsc
|
|
68
|
-
|
|
69
|
-
✅ Process completed with exit code 0
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
## Next Steps
|
|
73
|
-
|
|
74
|
-
1. Update README.md with the new environment variable documentation from `README_UPDATE.md`
|
|
75
|
-
2. Test on Windows to ensure existing config still works
|
|
76
|
-
3. Test on macOS/Linux to verify cross-platform compatibility
|
|
77
|
-
4. Commit and tag as v2.1.0
|
|
78
|
-
5. Publish to npm
|
|
79
|
-
|
|
80
|
-
## Implementation Notes
|
|
81
|
-
|
|
82
|
-
- Uses standard Node.js modules (`path`, `os`)
|
|
83
|
-
- No breaking changes to existing configurations
|
|
84
|
-
- Backwards compatible - existing Windows users unaffected if they set `OUTPUT_DIR`
|
|
85
|
-
- More flexible - users can now specify any output directory
|
|
86
|
-
- Better default behaviour - uses home directory instead of hardcoded path
|
|
87
|
-
|
|
88
|
-
## Files Modified
|
|
89
|
-
|
|
90
|
-
- `src/tools/run-seo-audit.ts` - Core fix
|
|
91
|
-
- `package.json` - Version bump
|
|
92
|
-
- `src/index.ts` - Version constant update
|
|
93
|
-
- `CHANGELOG.md` - Release notes
|
|
94
|
-
- `README_UPDATE.md` - New documentation (created)
|
|
95
|
-
- `ISSUE_hardcoded_output_path.md` - Issue documentation (already existed)
|
|
96
|
-
|
|
97
|
-
## Resolves
|
|
98
|
-
|
|
99
|
-
- Cross-platform compatibility (Windows, macOS, Linux)
|
|
100
|
-
- User-configurable output directories
|
|
101
|
-
- Sensible defaults using home directory
|
|
102
|
-
- Environment variable support as documented
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
# Hardcoded Windows-only output path breaks cross-platform compatibility
|
|
2
|
-
|
|
3
|
-
## Description
|
|
4
|
-
|
|
5
|
-
The output path for SEO audit crawls is currently hardcoded to `C:\\seo-audits` in `src/tools/run-seo-audit.ts` (line 31). This causes several issues:
|
|
6
|
-
|
|
7
|
-
1. **Platform-specific**: Only works on Windows, will fail on macOS and Linux
|
|
8
|
-
2. **Not configurable**: Users cannot specify their own output directory
|
|
9
|
-
3. **No environment variable support**: Can't be overridden via configuration
|
|
10
|
-
|
|
11
|
-
## Current Code
|
|
12
|
-
|
|
13
|
-
```typescript
|
|
14
|
-
const outputPath = `C:\\seo-audits\\${folderName}`;
|
|
15
|
-
```
|
|
16
|
-
|
|
17
|
-
## Proposed Solution
|
|
18
|
-
|
|
19
|
-
Make the output directory cross-platform and configurable:
|
|
20
|
-
|
|
21
|
-
```typescript
|
|
22
|
-
import path from 'path';
|
|
23
|
-
import os from 'os';
|
|
24
|
-
|
|
25
|
-
// Option 1: Environment variable with sensible fallback
|
|
26
|
-
const baseDir = process.env.SEO_AUDIT_DIR || path.join(os.homedir(), 'seo-audits');
|
|
27
|
-
const outputPath = path.join(baseDir, folderName);
|
|
28
|
-
|
|
29
|
-
// Option 2: Add optional parameter to MCP tool
|
|
30
|
-
const outputPath = validated.outputDir
|
|
31
|
-
? path.join(validated.outputDir, folderName)
|
|
32
|
-
: path.join(os.homedir(), 'seo-audits', folderName);
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
## Benefits
|
|
36
|
-
|
|
37
|
-
- **Cross-platform**: Works on Windows, macOS, and Linux automatically
|
|
38
|
-
- Windows: `C:\Users\username\seo-audits`
|
|
39
|
-
- macOS: `/Users/username/seo-audits`
|
|
40
|
-
- Linux: `/home/username/seo-audits`
|
|
41
|
-
- **User configurable**: Via environment variable or tool parameter
|
|
42
|
-
- **Professional**: Better production-ready behavior
|
|
43
|
-
|
|
44
|
-
## Implementation Options
|
|
45
|
-
|
|
46
|
-
**Option A (Minimal)**: Use `os.homedir()` with `path.join()`
|
|
47
|
-
- Pros: Simple, cross-platform, no config needed
|
|
48
|
-
- Cons: Still fixed to home directory
|
|
49
|
-
|
|
50
|
-
**Option B (Environment Variable)**: Add `SEO_AUDIT_DIR` env var support
|
|
51
|
-
- Pros: User configurable without code changes
|
|
52
|
-
- Cons: Requires documentation update
|
|
53
|
-
|
|
54
|
-
**Option C (Tool Parameter)**: Add `outputDir` parameter to MCP tool
|
|
55
|
-
- Pros: Most flexible, can be set per-call
|
|
56
|
-
- Cons: Breaking change to API, requires schema update
|
|
57
|
-
|
|
58
|
-
## Recommendation
|
|
59
|
-
|
|
60
|
-
Implement Option B with fallback to Option A:
|
|
61
|
-
1. Check for `SEO_AUDIT_DIR` environment variable
|
|
62
|
-
2. Fall back to `path.join(os.homedir(), 'seo-audits')`
|
|
63
|
-
3. Document the environment variable in README
|
|
64
|
-
|
|
65
|
-
This provides flexibility while maintaining backward compatibility (just changes the default location to be cross-platform).
|
|
66
|
-
|
|
67
|
-
## Additional Context
|
|
68
|
-
|
|
69
|
-
Discovered during testing on Windows when comparing CLI vs MCP tool usage. The hardcoded path works for now but limits portability and user configuration options.
|
package/README_UPDATE.md
DELETED
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
# Installation Section Update
|
|
2
|
-
|
|
3
|
-
Replace the "Environment Variables" section in README.md with this improved version:
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
**Environment Variables:**
|
|
8
|
-
|
|
9
|
-
- `OUTPUT_DIR`: Directory where crawl results are saved
|
|
10
|
-
- **If not set**: Defaults to `~/seo-audits` (user's home directory)
|
|
11
|
-
- Windows: `C:\Users\YourName\seo-audits`
|
|
12
|
-
- macOS: `/Users/YourName/seo-audits`
|
|
13
|
-
- Linux: `/home/YourName/seo-audits`
|
|
14
|
-
- **If set**: Uses the specified path (cross-platform)
|
|
15
|
-
- Windows example: `C:\\custom\\path`
|
|
16
|
-
- Unix example: `/var/www/seo-audits`
|
|
17
|
-
|
|
18
|
-
- `DEBUG`: Set to `"true"` to enable verbose debug logging (optional, default: `"false"`)
|
|
19
|
-
|
|
20
|
-
**Cross-Platform Configuration Examples:**
|
|
21
|
-
|
|
22
|
-
Windows:
|
|
23
|
-
```json
|
|
24
|
-
{
|
|
25
|
-
"mcpServers": {
|
|
26
|
-
"seo-crawler-mcp": {
|
|
27
|
-
"command": "npx",
|
|
28
|
-
"args": ["-y", "@houtini/seo-crawler-mcp"],
|
|
29
|
-
"env": {
|
|
30
|
-
"OUTPUT_DIR": "C:\\seo-audits"
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
```
|
|
36
|
-
|
|
37
|
-
macOS/Linux:
|
|
38
|
-
```json
|
|
39
|
-
{
|
|
40
|
-
"mcpServers": {
|
|
41
|
-
"seo-crawler-mcp": {
|
|
42
|
-
"command": "npx",
|
|
43
|
-
"args": ["-y", "@houtini/seo-crawler-mcp"],
|
|
44
|
-
"env": {
|
|
45
|
-
"OUTPUT_DIR": "/Users/yourname/seo-audits"
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
Using default (home directory):
|
|
53
|
-
```json
|
|
54
|
-
{
|
|
55
|
-
"mcpServers": {
|
|
56
|
-
"seo-crawler-mcp": {
|
|
57
|
-
"command": "npx",
|
|
58
|
-
"args": ["-y", "@houtini/seo-crawler-mcp"]
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
```
|
|
63
|
-
|
|
64
|
-
---
|
|
65
|
-
|
|
66
|
-
This should replace lines 92-95 in the current README.md
|
package/commit-msg.txt
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
fix: cross-platform output path support (v2.1.0)
|
|
2
|
-
|
|
3
|
-
- Remove hardcoded Windows path C:\seo-audits
|
|
4
|
-
- Use OUTPUT_DIR environment variable with cross-platform fallback
|
|
5
|
-
- Default to ~/seo-audits using os.homedir() and path.join()
|
|
6
|
-
- Works on Windows, macOS, and Linux
|
|
7
|
-
- Adds debug logging for resolved output path
|
|
8
|
-
|
|
9
|
-
Fixes macOS and Linux compatibility issues
|