geo-ai-search-optimization 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/action.yml +130 -0
- package/package.json +15 -3
- package/src/auto-fix.js +349 -0
- package/src/batch-full-page-audit.js +151 -0
- package/src/citability.js +311 -0
- package/src/citation-check.js +1 -1
- package/src/cli-site-ops-commands.js +391 -2
- package/src/compare.js +175 -0
- package/src/config.js +105 -0
- package/src/crawlers.js +286 -0
- package/src/diagnose.js +221 -0
- package/src/eeat.js +251 -0
- package/src/freshness.js +281 -0
- package/src/full-audit.js +269 -0
- package/src/full-page-audit.js +273 -0
- package/src/heading-structure.js +287 -0
- package/src/index.d.ts +492 -0
- package/src/index.js +24 -0
- package/src/internal-links.js +298 -0
- package/src/page-audit.js +1 -1
- package/src/page-snapshot.js +198 -0
- package/src/pdf-report.js +205 -0
- package/src/platform-ready.js +238 -0
- package/src/plugins.js +126 -0
- package/src/readability.js +252 -0
- package/src/security.js +249 -0
- package/src/sitemap.js +323 -0
- package/src/social-meta.js +293 -0
- package/src/topics.js +275 -0
- package/src/url-onboarding.js +1 -1
- package/src/validate-llms.js +307 -0
- package/src/validate-schema.js +306 -0
package/src/config.js
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
|
|
4
|
+
const CONFIG_FILE_NAMES = [".georc.json", "geo.config.json"];
|
|
5
|
+
|
|
6
|
+
async function findConfigFile(startDir) {
|
|
7
|
+
let dir = path.resolve(startDir || ".");
|
|
8
|
+
|
|
9
|
+
for (let i = 0; i < 10; i++) {
|
|
10
|
+
for (const name of CONFIG_FILE_NAMES) {
|
|
11
|
+
const candidate = path.join(dir, name);
|
|
12
|
+
try {
|
|
13
|
+
await fs.access(candidate);
|
|
14
|
+
return candidate;
|
|
15
|
+
} catch {
|
|
16
|
+
// continue
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const parent = path.dirname(dir);
|
|
21
|
+
if (parent === dir) break;
|
|
22
|
+
dir = parent;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export async function loadConfig(options = {}) {
|
|
29
|
+
const configPath = options.configPath || await findConfigFile(options.startDir || ".");
|
|
30
|
+
|
|
31
|
+
if (!configPath) {
|
|
32
|
+
return { _source: null, _found: false };
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
try {
|
|
36
|
+
const content = await fs.readFile(configPath, "utf8");
|
|
37
|
+
const config = JSON.parse(content);
|
|
38
|
+
return { ...config, _source: configPath, _found: true };
|
|
39
|
+
} catch (err) {
|
|
40
|
+
throw new Error(`Failed to read config at ${configPath}: ${err.message}`);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export async function initConfig(options = {}) {
|
|
45
|
+
const targetDir = path.resolve(options.targetDir || ".");
|
|
46
|
+
const outputPath = path.join(targetDir, ".georc.json");
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
await fs.access(outputPath);
|
|
50
|
+
if (!options.overwrite) {
|
|
51
|
+
throw new Error(`Config already exists at ${outputPath}. Use --overwrite to replace.`);
|
|
52
|
+
}
|
|
53
|
+
} catch (err) {
|
|
54
|
+
if (err.message.includes("already exists")) throw err;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const config = {
|
|
58
|
+
$schema: "https://geo-ai-search-optimization.dev/schema/georc.json",
|
|
59
|
+
site: {
|
|
60
|
+
name: options.siteName || null,
|
|
61
|
+
url: options.siteUrl || null
|
|
62
|
+
},
|
|
63
|
+
audit: {
|
|
64
|
+
minScore: options.minScore || 40,
|
|
65
|
+
maxFileSize: 1_000_000,
|
|
66
|
+
maxExamples: 5
|
|
67
|
+
},
|
|
68
|
+
ci: {
|
|
69
|
+
minScore: options.minScore || 40,
|
|
70
|
+
failOnRegression: false,
|
|
71
|
+
baselineDir: ".geo-data"
|
|
72
|
+
},
|
|
73
|
+
crawlers: {
|
|
74
|
+
strategy: "open"
|
|
75
|
+
},
|
|
76
|
+
output: {
|
|
77
|
+
format: "markdown",
|
|
78
|
+
dataDir: ".geo-data"
|
|
79
|
+
},
|
|
80
|
+
plugins: []
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
await fs.mkdir(targetDir, { recursive: true });
|
|
84
|
+
await fs.writeFile(outputPath, `${JSON.stringify(config, null, 2)}\n`, "utf8");
|
|
85
|
+
|
|
86
|
+
return { outputPath, config };
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export function mergeConfigWithOptions(config, cliOptions) {
|
|
90
|
+
const merged = { ...cliOptions };
|
|
91
|
+
|
|
92
|
+
if (config._found) {
|
|
93
|
+
if (config.site?.url && !merged.siteUrl) merged.siteUrl = config.site.url;
|
|
94
|
+
if (config.site?.name && !merged.siteName) merged.siteName = config.site.name;
|
|
95
|
+
if (config.audit?.minScore !== undefined && merged.minScore === undefined) merged.minScore = config.audit.minScore;
|
|
96
|
+
if (config.audit?.maxFileSize !== undefined && merged.maxFileSize === undefined) merged.maxFileSize = config.audit.maxFileSize;
|
|
97
|
+
if (config.audit?.maxExamples !== undefined && merged.maxExamples === undefined) merged.maxExamples = config.audit.maxExamples;
|
|
98
|
+
if (config.ci?.failOnRegression && merged.failOnRegression === undefined) merged.failOnRegression = config.ci.failOnRegression;
|
|
99
|
+
if (config.output?.format && !merged.format) merged.format = config.output.format;
|
|
100
|
+
if (config.output?.dataDir && !merged.dataDir) merged.dataDir = config.output.dataDir;
|
|
101
|
+
if (config.crawlers?.strategy && !merged.strategy) merged.strategy = config.crawlers.strategy;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return merged;
|
|
105
|
+
}
|
package/src/crawlers.js
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { writeScanOutput } from "./scan.js";
|
|
4
|
+
|
|
5
|
+
const KNOWN_AI_CRAWLERS = [
|
|
6
|
+
{ name: "GPTBot", owner: "OpenAI", engine: "ChatGPT", purpose: "Training & search" },
|
|
7
|
+
{ name: "ChatGPT-User", owner: "OpenAI", engine: "ChatGPT", purpose: "Live browsing" },
|
|
8
|
+
{ name: "OAI-SearchBot", owner: "OpenAI", engine: "ChatGPT Search", purpose: "Search results" },
|
|
9
|
+
{ name: "Google-Extended", owner: "Google", engine: "Gemini / Bard", purpose: "Training" },
|
|
10
|
+
{ name: "Googlebot", owner: "Google", engine: "Google AI Overviews", purpose: "Indexing & AIO" },
|
|
11
|
+
{ name: "anthropic-ai", owner: "Anthropic", engine: "Claude", purpose: "Training" },
|
|
12
|
+
{ name: "ClaudeBot", owner: "Anthropic", engine: "Claude", purpose: "Web access" },
|
|
13
|
+
{ name: "PerplexityBot", owner: "Perplexity", engine: "Perplexity", purpose: "Search results" },
|
|
14
|
+
{ name: "Bytespider", owner: "ByteDance", engine: "Doubao / TikTok", purpose: "Training" },
|
|
15
|
+
{ name: "CCBot", owner: "Common Crawl", engine: "Multiple LLMs", purpose: "Training data" },
|
|
16
|
+
{ name: "cohere-ai", owner: "Cohere", engine: "Cohere", purpose: "Training" },
|
|
17
|
+
{ name: "meta-externalagent", owner: "Meta", engine: "Meta AI", purpose: "Training" },
|
|
18
|
+
{ name: "Applebot-Extended", owner: "Apple", engine: "Apple Intelligence", purpose: "Training" },
|
|
19
|
+
{ name: "FacebookBot", owner: "Meta", engine: "Meta AI", purpose: "Training & search" },
|
|
20
|
+
{ name: "Amazonbot", owner: "Amazon", engine: "Alexa / Rufus", purpose: "Search & shopping" }
|
|
21
|
+
];
|
|
22
|
+
|
|
23
|
+
function parseRobotsTxt(content) {
|
|
24
|
+
const lines = content.split("\n").map((l) => l.trim());
|
|
25
|
+
const blocks = [];
|
|
26
|
+
let current = null;
|
|
27
|
+
|
|
28
|
+
for (const line of lines) {
|
|
29
|
+
if (!line || line.startsWith("#")) continue;
|
|
30
|
+
|
|
31
|
+
const uaMatch = line.match(/^user-agent:\s*(.+)$/i);
|
|
32
|
+
if (uaMatch) {
|
|
33
|
+
current = { userAgent: uaMatch[1].trim(), rules: [] };
|
|
34
|
+
blocks.push(current);
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (!current) continue;
|
|
39
|
+
|
|
40
|
+
const allowMatch = line.match(/^allow:\s*(.+)$/i);
|
|
41
|
+
if (allowMatch) {
|
|
42
|
+
current.rules.push({ type: "allow", path: allowMatch[1].trim() });
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const disallowMatch = line.match(/^disallow:\s*(.+)$/i);
|
|
47
|
+
if (disallowMatch) {
|
|
48
|
+
current.rules.push({ type: "disallow", path: disallowMatch[1].trim() });
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const crawlDelayMatch = line.match(/^crawl-delay:\s*(\d+)$/i);
|
|
53
|
+
if (crawlDelayMatch) {
|
|
54
|
+
current.rules.push({ type: "crawl-delay", value: Number.parseInt(crawlDelayMatch[1], 10) });
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return blocks;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function analyzeCrawlerAccess(blocks) {
|
|
62
|
+
const results = [];
|
|
63
|
+
|
|
64
|
+
for (const crawler of KNOWN_AI_CRAWLERS) {
|
|
65
|
+
const specificBlock = blocks.find(
|
|
66
|
+
(b) => b.userAgent.toLowerCase() === crawler.name.toLowerCase()
|
|
67
|
+
);
|
|
68
|
+
const wildcardBlock = blocks.find((b) => b.userAgent === "*");
|
|
69
|
+
const activeBlock = specificBlock || wildcardBlock;
|
|
70
|
+
|
|
71
|
+
let status = "allowed";
|
|
72
|
+
let reason = "No specific rules found; defaults to allowed.";
|
|
73
|
+
let matchedAgent = null;
|
|
74
|
+
|
|
75
|
+
if (specificBlock) {
|
|
76
|
+
matchedAgent = specificBlock.userAgent;
|
|
77
|
+
const hasDisallow = specificBlock.rules.some(
|
|
78
|
+
(r) => r.type === "disallow" && (r.path === "/" || r.path === "")
|
|
79
|
+
);
|
|
80
|
+
const hasAllow = specificBlock.rules.some(
|
|
81
|
+
(r) => r.type === "allow" && r.path === "/"
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
if (hasDisallow && !hasAllow) {
|
|
85
|
+
status = "blocked";
|
|
86
|
+
reason = `Explicitly blocked via User-agent: ${matchedAgent}`;
|
|
87
|
+
} else if (hasDisallow && hasAllow) {
|
|
88
|
+
status = "partial";
|
|
89
|
+
reason = `Mixed rules for User-agent: ${matchedAgent}`;
|
|
90
|
+
} else {
|
|
91
|
+
status = "allowed";
|
|
92
|
+
reason = `Explicitly allowed via User-agent: ${matchedAgent}`;
|
|
93
|
+
}
|
|
94
|
+
} else if (wildcardBlock) {
|
|
95
|
+
matchedAgent = "*";
|
|
96
|
+
const hasDisallow = wildcardBlock.rules.some(
|
|
97
|
+
(r) => r.type === "disallow" && r.path === "/"
|
|
98
|
+
);
|
|
99
|
+
if (hasDisallow) {
|
|
100
|
+
status = "blocked";
|
|
101
|
+
reason = "Blocked by wildcard User-agent: * Disallow: /";
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const partialPaths = activeBlock
|
|
106
|
+
? activeBlock.rules.filter((r) => r.type === "disallow" && r.path !== "/" && r.path !== "").map((r) => r.path)
|
|
107
|
+
: [];
|
|
108
|
+
|
|
109
|
+
results.push({
|
|
110
|
+
crawler: crawler.name,
|
|
111
|
+
owner: crawler.owner,
|
|
112
|
+
engine: crawler.engine,
|
|
113
|
+
purpose: crawler.purpose,
|
|
114
|
+
status,
|
|
115
|
+
reason,
|
|
116
|
+
matchedAgent,
|
|
117
|
+
blockedPaths: partialPaths
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return results;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function computeCrawlerScore(crawlerResults) {
|
|
125
|
+
const total = crawlerResults.length;
|
|
126
|
+
if (total === 0) return 100;
|
|
127
|
+
|
|
128
|
+
let points = 0;
|
|
129
|
+
const keyEngines = ["GPTBot", "ChatGPT-User", "Googlebot", "PerplexityBot", "ClaudeBot"];
|
|
130
|
+
|
|
131
|
+
for (const result of crawlerResults) {
|
|
132
|
+
const isKey = keyEngines.includes(result.crawler);
|
|
133
|
+
const weight = isKey ? 2 : 1;
|
|
134
|
+
|
|
135
|
+
if (result.status === "allowed") points += 10 * weight;
|
|
136
|
+
else if (result.status === "partial") points += 5 * weight;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const maxPoints = crawlerResults.reduce((sum, r) => {
|
|
140
|
+
const isKey = keyEngines.includes(r.crawler);
|
|
141
|
+
return sum + 10 * (isKey ? 2 : 1);
|
|
142
|
+
}, 0);
|
|
143
|
+
|
|
144
|
+
return Math.round((points / maxPoints) * 100);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function generateOptimalRobotsTxt(crawlerResults, options = {}) {
|
|
148
|
+
const strategy = options.strategy || "open";
|
|
149
|
+
const lines = ["# AI Crawler Rules", `# Strategy: ${strategy}`, `# Generated by geo-ai-search-optimization`, ""];
|
|
150
|
+
|
|
151
|
+
if (strategy === "open") {
|
|
152
|
+
lines.push("# Allow all AI crawlers for maximum AI visibility");
|
|
153
|
+
for (const crawler of KNOWN_AI_CRAWLERS) {
|
|
154
|
+
lines.push(`User-agent: ${crawler.name}`);
|
|
155
|
+
lines.push("Allow: /");
|
|
156
|
+
lines.push("");
|
|
157
|
+
}
|
|
158
|
+
} else if (strategy === "selective") {
|
|
159
|
+
const allowList = options.allow || ["GPTBot", "ChatGPT-User", "Googlebot", "PerplexityBot", "ClaudeBot"];
|
|
160
|
+
lines.push("# Allow key AI crawlers, block training-only bots");
|
|
161
|
+
for (const crawler of KNOWN_AI_CRAWLERS) {
|
|
162
|
+
const isAllowed = allowList.some((a) => a.toLowerCase() === crawler.name.toLowerCase());
|
|
163
|
+
lines.push(`User-agent: ${crawler.name}`);
|
|
164
|
+
lines.push(isAllowed ? "Allow: /" : "Disallow: /");
|
|
165
|
+
lines.push("");
|
|
166
|
+
}
|
|
167
|
+
} else if (strategy === "block-all") {
|
|
168
|
+
lines.push("# Block all known AI crawlers");
|
|
169
|
+
for (const crawler of KNOWN_AI_CRAWLERS) {
|
|
170
|
+
lines.push(`User-agent: ${crawler.name}`);
|
|
171
|
+
lines.push("Disallow: /");
|
|
172
|
+
lines.push("");
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return lines.join("\n");
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
async function fetchRobotsTxt(url) {
|
|
180
|
+
const parsedUrl = new URL(url);
|
|
181
|
+
const robotsUrl = `${parsedUrl.protocol}//${parsedUrl.host}/robots.txt`;
|
|
182
|
+
const response = await fetch(robotsUrl, {
|
|
183
|
+
redirect: "follow",
|
|
184
|
+
headers: { "user-agent": "geo-ai-search-optimization/2.2.0" },
|
|
185
|
+
signal: AbortSignal.timeout(10_000)
|
|
186
|
+
});
|
|
187
|
+
if (!response.ok) {
|
|
188
|
+
return { found: false, url: robotsUrl, status: response.status, content: "" };
|
|
189
|
+
}
|
|
190
|
+
const content = await response.text();
|
|
191
|
+
return { found: true, url: robotsUrl, status: response.status, content };
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
export async function analyzeCrawlers(input, options = {}) {
|
|
195
|
+
let robotsContent = "";
|
|
196
|
+
let source = "";
|
|
197
|
+
|
|
198
|
+
if (/^https?:\/\//i.test(input)) {
|
|
199
|
+
const result = await fetchRobotsTxt(input);
|
|
200
|
+
robotsContent = result.content;
|
|
201
|
+
source = result.url;
|
|
202
|
+
if (!result.found) {
|
|
203
|
+
return {
|
|
204
|
+
kind: "geo-crawlers",
|
|
205
|
+
source,
|
|
206
|
+
found: false,
|
|
207
|
+
status: result.status,
|
|
208
|
+
crawlers: KNOWN_AI_CRAWLERS.map((c) => ({
|
|
209
|
+
...c, status: "allowed", reason: "No robots.txt found; all crawlers allowed by default.",
|
|
210
|
+
matchedAgent: null, blockedPaths: []
|
|
211
|
+
})),
|
|
212
|
+
score: 100,
|
|
213
|
+
summary: "No robots.txt found. All AI crawlers have unrestricted access by default.",
|
|
214
|
+
recommendation: generateOptimalRobotsTxt([], options)
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
} else {
|
|
218
|
+
const filePath = path.resolve(input);
|
|
219
|
+
robotsContent = await fs.readFile(filePath, "utf8");
|
|
220
|
+
source = filePath;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const blocks = parseRobotsTxt(robotsContent);
|
|
224
|
+
const crawlers = analyzeCrawlerAccess(blocks);
|
|
225
|
+
const score = computeCrawlerScore(crawlers);
|
|
226
|
+
const blocked = crawlers.filter((c) => c.status === "blocked");
|
|
227
|
+
const partial = crawlers.filter((c) => c.status === "partial");
|
|
228
|
+
|
|
229
|
+
let summary;
|
|
230
|
+
if (blocked.length === 0 && partial.length === 0) {
|
|
231
|
+
summary = "All known AI crawlers have full access. Maximum AI visibility.";
|
|
232
|
+
} else if (blocked.length > 0) {
|
|
233
|
+
summary = `${blocked.length} AI crawler(s) blocked: ${blocked.map((c) => c.crawler).join(", ")}. This may reduce visibility in ${blocked.map((c) => c.engine).join(", ")}.`;
|
|
234
|
+
} else {
|
|
235
|
+
summary = `${partial.length} AI crawler(s) have partial access. Some content may not be indexed.`;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
return {
|
|
239
|
+
kind: "geo-crawlers",
|
|
240
|
+
source,
|
|
241
|
+
found: true,
|
|
242
|
+
totalBlocks: blocks.length,
|
|
243
|
+
crawlers,
|
|
244
|
+
score,
|
|
245
|
+
blocked: blocked.map((c) => c.crawler),
|
|
246
|
+
partial: partial.map((c) => c.crawler),
|
|
247
|
+
summary,
|
|
248
|
+
recommendation: generateOptimalRobotsTxt(crawlers, options)
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
export function renderCrawlersMarkdown(report) {
|
|
253
|
+
const lines = [
|
|
254
|
+
"# AI Crawler Access Analysis",
|
|
255
|
+
"",
|
|
256
|
+
`- Source: \`${report.source}\``,
|
|
257
|
+
`- robots.txt found: \`${report.found}\``,
|
|
258
|
+
`- AI Visibility Score: \`${report.score}/100\``,
|
|
259
|
+
`- Summary: ${report.summary}`,
|
|
260
|
+
"",
|
|
261
|
+
"## Crawler Status Matrix",
|
|
262
|
+
"",
|
|
263
|
+
"| Crawler | Owner | Engine | Status | Reason |",
|
|
264
|
+
"|---------|-------|--------|--------|--------|"
|
|
265
|
+
];
|
|
266
|
+
|
|
267
|
+
for (const c of report.crawlers) {
|
|
268
|
+
const statusIcon = c.status === "allowed" ? "✅" : c.status === "blocked" ? "🚫" : "⚠️";
|
|
269
|
+
lines.push(`| ${c.crawler} | ${c.owner} | ${c.engine} | ${statusIcon} ${c.status} | ${c.reason} |`);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if (report.blocked && report.blocked.length > 0) {
|
|
273
|
+
lines.push("", "## Blocked Crawlers", "");
|
|
274
|
+
for (const name of report.blocked) {
|
|
275
|
+
const c = report.crawlers.find((cr) => cr.crawler === name);
|
|
276
|
+
lines.push(`- **${name}** (${c.engine}): ${c.reason}`);
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
lines.push("", "## Recommended robots.txt AI Section", "", "```", report.recommendation.trim(), "```", "");
|
|
281
|
+
return lines.join("\n");
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
export async function writeCrawlersOutput(outputPath, content) {
|
|
285
|
+
return writeScanOutput(outputPath, content);
|
|
286
|
+
}
|
package/src/diagnose.js
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { writeScanOutput } from "./scan.js";
|
|
4
|
+
import { fullPageAudit } from "./full-page-audit.js";
|
|
5
|
+
import { fullAudit } from "./full-audit.js";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Smart diagnose: auto-detects input type and runs the right analyses.
|
|
9
|
+
* - URL → full-page-audit (12 dimensions)
|
|
10
|
+
* - Directory → full-audit (project + infra)
|
|
11
|
+
* - HTML/MD file → full-page-audit (12 dimensions)
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
async function detectInputType(input) {
|
|
15
|
+
if (/^https?:\/\//i.test(input)) return "url";
|
|
16
|
+
|
|
17
|
+
const resolved = path.resolve(input);
|
|
18
|
+
try {
|
|
19
|
+
const stat = await fs.stat(resolved);
|
|
20
|
+
if (stat.isDirectory()) return "directory";
|
|
21
|
+
if (stat.isFile()) return "file";
|
|
22
|
+
} catch {
|
|
23
|
+
// Not found
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
return "unknown";
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function buildDiagnosisSummary(type, result) {
|
|
30
|
+
if (type === "directory") {
|
|
31
|
+
const score = result.overallScore;
|
|
32
|
+
const issues = result.actionPlan?.length || 0;
|
|
33
|
+
return `Project diagnosis complete. GEO Score: ${score}/100. ${issues} action item(s).`;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const score = result.compositeScore;
|
|
37
|
+
const weakDims = Object.entries(result.dimensions)
|
|
38
|
+
.filter(([, d]) => d.score < 40)
|
|
39
|
+
.map(([k]) => k);
|
|
40
|
+
|
|
41
|
+
if (weakDims.length === 0) {
|
|
42
|
+
return `Page diagnosis complete. Composite: ${score}/100. All dimensions healthy.`;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return `Page diagnosis complete. Composite: ${score}/100. Weak: ${weakDims.join(", ")}.`;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function buildPrioritizedActions(type, result) {
|
|
49
|
+
const actions = [];
|
|
50
|
+
|
|
51
|
+
if (type === "directory") {
|
|
52
|
+
// From full-audit action plan
|
|
53
|
+
for (const task of (result.actionPlan || []).slice(0, 10)) {
|
|
54
|
+
actions.push({
|
|
55
|
+
priority: task.priority,
|
|
56
|
+
action: task.action,
|
|
57
|
+
owner: task.owner || "Engineering",
|
|
58
|
+
source: task.source || "project-audit"
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
return actions;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Page-level: sort recommendations by dimension weakness
|
|
65
|
+
const dimOrder = Object.entries(result.dimensions)
|
|
66
|
+
.sort((a, b) => a[1].score - b[1].score);
|
|
67
|
+
|
|
68
|
+
for (const [dim, data] of dimOrder) {
|
|
69
|
+
if (data.score >= 80) continue;
|
|
70
|
+
|
|
71
|
+
const detail = result.details?.[dim];
|
|
72
|
+
const recs = detail?.recommendations || [];
|
|
73
|
+
for (const rec of recs.slice(0, 2)) {
|
|
74
|
+
const recText = typeof rec === "string" ? rec : rec.rec || rec.action || JSON.stringify(rec);
|
|
75
|
+
actions.push({
|
|
76
|
+
priority: data.score < 30 ? "P0" : data.score < 50 ? "P1" : "P2",
|
|
77
|
+
action: recText,
|
|
78
|
+
owner: dim === "security" ? "Engineering" : dim === "eeat" || dim === "citability" ? "Content" : "SEO",
|
|
79
|
+
source: dim
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return actions.slice(0, 15);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function buildQuickWins(type, result) {
|
|
88
|
+
const wins = [];
|
|
89
|
+
|
|
90
|
+
if (type !== "directory") {
|
|
91
|
+
const d = result.dimensions;
|
|
92
|
+
if (d.socialMeta?.score < 50) wins.push("Add Open Graph and Twitter Card meta tags for better social sharing and AI discoverability.");
|
|
93
|
+
if (d.headingStructure?.score < 40) wins.push("Add question-style H2 headings (e.g., 'What is...?', 'How to...?') for AI answer extraction.");
|
|
94
|
+
if (d.freshness?.score < 40) wins.push("Add datePublished and dateModified to your JSON-LD structured data.");
|
|
95
|
+
if (d.schema?.score < 40) wins.push("Add JSON-LD structured data. Run: geo-ai-search-optimization auto-fix <url>");
|
|
96
|
+
if (d.readability?.score < 40) wins.push("Simplify sentence structure. Target 15-20 word average sentence length.");
|
|
97
|
+
} else {
|
|
98
|
+
if (result.scores?.crawlers < 50) wins.push("Review robots.txt AI crawler rules. Run: geo-ai-search-optimization crawlers <url>");
|
|
99
|
+
if (result.scores?.llmsValidation < 50) wins.push("Create or fix llms.txt. Run: geo-ai-search-optimization init-llms");
|
|
100
|
+
if (result.scores?.base < 40) wins.push("Fix foundational GEO signals. Run: geo-ai-search-optimization audit <path>");
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return wins.slice(0, 5);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export async function diagnose(input, options = {}) {
|
|
107
|
+
const type = await detectInputType(input);
|
|
108
|
+
|
|
109
|
+
if (type === "unknown") {
|
|
110
|
+
throw new Error(`Cannot diagnose: "${input}" is not a valid URL, file, or directory.`);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
let result;
|
|
114
|
+
let analysisType;
|
|
115
|
+
|
|
116
|
+
if (type === "directory") {
|
|
117
|
+
result = await fullAudit(input, options);
|
|
118
|
+
analysisType = "project";
|
|
119
|
+
} else {
|
|
120
|
+
result = await fullPageAudit(input, options);
|
|
121
|
+
analysisType = "page";
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const summary = buildDiagnosisSummary(type, result);
|
|
125
|
+
const actions = buildPrioritizedActions(type, result);
|
|
126
|
+
const quickWins = buildQuickWins(type, result);
|
|
127
|
+
const score = type === "directory" ? result.overallScore : result.compositeScore;
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
kind: "geo-diagnose",
|
|
131
|
+
input,
|
|
132
|
+
inputType: type,
|
|
133
|
+
analysisType,
|
|
134
|
+
score,
|
|
135
|
+
scoreLabel: score >= 80 ? "Strong" : score >= 60 ? "Moderate" : score >= 40 ? "Weak" : "Critical",
|
|
136
|
+
summary,
|
|
137
|
+
quickWins,
|
|
138
|
+
actions,
|
|
139
|
+
nextCommands: buildNextCommands(type, result),
|
|
140
|
+
result, // Full underlying result for JSON consumers
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function buildNextCommands(type, result) {
|
|
145
|
+
const cmds = [];
|
|
146
|
+
|
|
147
|
+
if (type !== "directory") {
|
|
148
|
+
cmds.push({ cmd: "auto-fix", args: "<url>", reason: "Generate ready-to-use fix code" });
|
|
149
|
+
cmds.push({ cmd: "full-page-audit", args: "<url> --save", reason: "Save snapshot for trend tracking" });
|
|
150
|
+
cmds.push({ cmd: "page-trend", args: "<url>", reason: "View score history" });
|
|
151
|
+
} else {
|
|
152
|
+
cmds.push({ cmd: "full-audit", args: "<path> --sample-urls <url1,url2>", reason: "Add page sampling for deeper analysis" });
|
|
153
|
+
cmds.push({ cmd: "trend", args: "", reason: "View project score trend" });
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
cmds.push({ cmd: "benchmark", args: "<url> --competitors <url1,url2>", reason: "Compare against competitors" });
|
|
157
|
+
|
|
158
|
+
return cmds;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
export function renderDiagnoseMarkdown(report) {
|
|
162
|
+
const lines = [
|
|
163
|
+
"# GEO Diagnosis",
|
|
164
|
+
"",
|
|
165
|
+
`- Input: \`${report.input}\``,
|
|
166
|
+
`- Type: \`${report.inputType}\` → \`${report.analysisType}\` analysis`,
|
|
167
|
+
`- **Score: \`${report.score}/100\` (${report.scoreLabel})**`,
|
|
168
|
+
`- Summary: ${report.summary}`,
|
|
169
|
+
""
|
|
170
|
+
];
|
|
171
|
+
|
|
172
|
+
if (report.analysisType === "page" && report.result.dimensions) {
|
|
173
|
+
lines.push("## Dimension Scores", "");
|
|
174
|
+
const dimLabels = {
|
|
175
|
+
base: "Base", citability: "Citability", eeat: "E-E-A-T", readability: "Readability",
|
|
176
|
+
headingStructure: "Headings", internalLinks: "Links", socialMeta: "Social",
|
|
177
|
+
platformReady: "Platforms", schema: "Schema", freshness: "Freshness",
|
|
178
|
+
security: "Security", topics: "Topics"
|
|
179
|
+
};
|
|
180
|
+
|
|
181
|
+
const sorted = Object.entries(report.result.dimensions)
|
|
182
|
+
.sort((a, b) => a[1].score - b[1].score);
|
|
183
|
+
|
|
184
|
+
for (const [key, dim] of sorted) {
|
|
185
|
+
const icon = dim.score >= 70 ? "🟢" : dim.score >= 40 ? "🟡" : "🔴";
|
|
186
|
+
lines.push(`- ${icon} **${dimLabels[key] || key}**: ${dim.score}/100`);
|
|
187
|
+
}
|
|
188
|
+
lines.push("");
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (report.quickWins.length > 0) {
|
|
192
|
+
lines.push("## Quick Wins", "");
|
|
193
|
+
for (const win of report.quickWins) {
|
|
194
|
+
lines.push(`- ⚡ ${win}`);
|
|
195
|
+
}
|
|
196
|
+
lines.push("");
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
if (report.actions.length > 0) {
|
|
200
|
+
lines.push("## Prioritized Actions", "");
|
|
201
|
+
for (const action of report.actions) {
|
|
202
|
+
const badge = action.priority === "P0" ? "🔴" : action.priority === "P1" ? "🟠" : "🟡";
|
|
203
|
+
lines.push(`- ${badge} **${action.priority}** [${action.source}] ${action.action}`);
|
|
204
|
+
}
|
|
205
|
+
lines.push("");
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (report.nextCommands.length > 0) {
|
|
209
|
+
lines.push("## Suggested Next Commands", "");
|
|
210
|
+
for (const cmd of report.nextCommands) {
|
|
211
|
+
lines.push(`- \`geo-ai-search-optimization ${cmd.cmd} ${cmd.args}\` — ${cmd.reason}`);
|
|
212
|
+
}
|
|
213
|
+
lines.push("");
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return lines.join("\n");
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
export async function writeDiagnoseOutput(outputPath, content) {
|
|
220
|
+
return writeScanOutput(outputPath, content);
|
|
221
|
+
}
|