aeo.js 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +303 -6
- package/dist/angular.d.mts +29 -0
- package/dist/angular.d.ts +29 -0
- package/dist/angular.js +1314 -0
- package/dist/angular.js.map +1 -0
- package/dist/angular.mjs +1310 -0
- package/dist/angular.mjs.map +1 -0
- package/dist/astro.d.mts +15 -0
- package/dist/astro.d.ts +15 -0
- package/dist/astro.js +1421 -0
- package/dist/astro.js.map +1 -0
- package/dist/astro.mjs +1414 -0
- package/dist/astro.mjs.map +1 -0
- package/dist/cli.d.mts +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +1880 -2
- package/dist/cli.js.map +1 -0
- package/dist/cli.mjs +1878 -0
- package/dist/cli.mjs.map +1 -0
- package/dist/index.d.mts +191 -0
- package/dist/index.d.ts +191 -1
- package/dist/index.js +1829 -1
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +1801 -1
- package/dist/index.mjs.map +1 -0
- package/dist/next.d.mts +18 -0
- package/dist/next.d.ts +18 -0
- package/dist/next.js +1302 -0
- package/dist/next.js.map +1 -0
- package/dist/next.mjs +1295 -0
- package/dist/next.mjs.map +1 -0
- package/dist/nuxt.d.mts +13 -0
- package/dist/nuxt.d.ts +13 -0
- package/dist/nuxt.js +1344 -0
- package/dist/nuxt.js.map +1 -0
- package/dist/nuxt.mjs +1337 -0
- package/dist/nuxt.mjs.map +1 -0
- package/dist/react.d.mts +10 -0
- package/dist/react.d.ts +10 -0
- package/dist/react.js +1023 -0
- package/dist/react.js.map +1 -0
- package/dist/react.mjs +1020 -0
- package/dist/react.mjs.map +1 -0
- package/dist/types-Cn_Qbkmg.d.mts +166 -0
- package/dist/types-Cn_Qbkmg.d.ts +166 -0
- package/dist/vite.d.mts +5 -0
- package/dist/vite.d.ts +5 -0
- package/dist/vite.js +1370 -0
- package/dist/vite.js.map +1 -0
- package/dist/vite.mjs +1366 -0
- package/dist/vite.mjs.map +1 -0
- package/dist/vue.d.mts +19 -0
- package/dist/vue.d.ts +19 -0
- package/dist/vue.js +1078 -0
- package/dist/vue.js.map +1 -0
- package/dist/vue.mjs +1072 -0
- package/dist/vue.mjs.map +1 -0
- package/dist/webpack.d.mts +11 -0
- package/dist/webpack.d.ts +11 -0
- package/dist/webpack.js +1179 -0
- package/dist/webpack.js.map +1 -0
- package/dist/webpack.mjs +1173 -0
- package/dist/webpack.mjs.map +1 -0
- package/dist/widget.d.mts +37 -0
- package/dist/widget.d.ts +37 -0
- package/dist/widget.js +1004 -0
- package/dist/widget.js.map +1 -0
- package/dist/widget.mjs +1001 -0
- package/dist/widget.mjs.map +1 -0
- package/package.json +110 -10
package/dist/index.mjs
CHANGED
|
@@ -1 +1,1801 @@
|
|
|
1
|
-
|
|
1
|
+
import { existsSync, mkdirSync, writeFileSync, readFileSync, readdirSync, statSync, copyFileSync } from 'fs';
|
|
2
|
+
import { join, dirname, extname, relative } from 'path';
|
|
3
|
+
import 'minimatch';
|
|
4
|
+
import { createHash } from 'crypto';
|
|
5
|
+
|
|
6
|
+
// src/core/utils.ts
|
|
7
|
+
function validateConfig(config) {
|
|
8
|
+
var _a;
|
|
9
|
+
const warnings = [];
|
|
10
|
+
if (config.url && !/^https?:\/\//.test(config.url)) {
|
|
11
|
+
warnings.push(`url "${config.url}" should start with http:// or https://`);
|
|
12
|
+
}
|
|
13
|
+
if (config.url === "https://example.com") {
|
|
14
|
+
warnings.push('url is set to the default "https://example.com" \u2014 set your actual site URL');
|
|
15
|
+
}
|
|
16
|
+
if (!config.title) {
|
|
17
|
+
warnings.push('title is not set \u2014 your generated files will use "My Site"');
|
|
18
|
+
}
|
|
19
|
+
if (((_a = config.robots) == null ? void 0 : _a.crawlDelay) && config.robots.crawlDelay < 0) {
|
|
20
|
+
warnings.push("robots.crawlDelay should be a positive number");
|
|
21
|
+
}
|
|
22
|
+
return warnings;
|
|
23
|
+
}
|
|
24
|
+
function resolveConfig(config = {}) {
|
|
25
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z, _A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M;
|
|
26
|
+
const frameworkInfo = detectFramework();
|
|
27
|
+
return {
|
|
28
|
+
title: config.title || "My Site",
|
|
29
|
+
description: config.description || "",
|
|
30
|
+
url: config.url || "https://example.com",
|
|
31
|
+
contentDir: config.contentDir || frameworkInfo.contentDir,
|
|
32
|
+
outDir: config.outDir || frameworkInfo.outDir,
|
|
33
|
+
pages: config.pages || [],
|
|
34
|
+
generators: {
|
|
35
|
+
robotsTxt: ((_a = config.generators) == null ? void 0 : _a.robotsTxt) !== false,
|
|
36
|
+
llmsTxt: ((_b = config.generators) == null ? void 0 : _b.llmsTxt) !== false,
|
|
37
|
+
llmsFullTxt: ((_c = config.generators) == null ? void 0 : _c.llmsFullTxt) !== false,
|
|
38
|
+
rawMarkdown: ((_d = config.generators) == null ? void 0 : _d.rawMarkdown) !== false,
|
|
39
|
+
manifest: ((_e = config.generators) == null ? void 0 : _e.manifest) !== false,
|
|
40
|
+
sitemap: ((_f = config.generators) == null ? void 0 : _f.sitemap) !== false,
|
|
41
|
+
aiIndex: ((_g = config.generators) == null ? void 0 : _g.aiIndex) !== false,
|
|
42
|
+
schema: ((_h = config.generators) == null ? void 0 : _h.schema) !== false
|
|
43
|
+
},
|
|
44
|
+
robots: {
|
|
45
|
+
allow: ((_i = config.robots) == null ? void 0 : _i.allow) || ["/"],
|
|
46
|
+
disallow: ((_j = config.robots) == null ? void 0 : _j.disallow) || [],
|
|
47
|
+
crawlDelay: ((_k = config.robots) == null ? void 0 : _k.crawlDelay) || 0,
|
|
48
|
+
sitemap: ((_l = config.robots) == null ? void 0 : _l.sitemap) || ""
|
|
49
|
+
},
|
|
50
|
+
schema: {
|
|
51
|
+
enabled: ((_m = config.schema) == null ? void 0 : _m.enabled) !== false,
|
|
52
|
+
organization: {
|
|
53
|
+
name: ((_o = (_n = config.schema) == null ? void 0 : _n.organization) == null ? void 0 : _o.name) || config.title || "My Site",
|
|
54
|
+
url: ((_q = (_p = config.schema) == null ? void 0 : _p.organization) == null ? void 0 : _q.url) || config.url || "https://example.com",
|
|
55
|
+
logo: ((_s = (_r = config.schema) == null ? void 0 : _r.organization) == null ? void 0 : _s.logo) || "",
|
|
56
|
+
sameAs: ((_u = (_t = config.schema) == null ? void 0 : _t.organization) == null ? void 0 : _u.sameAs) || []
|
|
57
|
+
},
|
|
58
|
+
defaultType: ((_v = config.schema) == null ? void 0 : _v.defaultType) || "WebPage"
|
|
59
|
+
},
|
|
60
|
+
og: {
|
|
61
|
+
enabled: ((_w = config.og) == null ? void 0 : _w.enabled) !== false,
|
|
62
|
+
image: ((_x = config.og) == null ? void 0 : _x.image) || "",
|
|
63
|
+
twitterHandle: ((_y = config.og) == null ? void 0 : _y.twitterHandle) || "",
|
|
64
|
+
type: ((_z = config.og) == null ? void 0 : _z.type) || "website"
|
|
65
|
+
},
|
|
66
|
+
widget: {
|
|
67
|
+
enabled: ((_A = config.widget) == null ? void 0 : _A.enabled) !== false,
|
|
68
|
+
position: ((_B = config.widget) == null ? void 0 : _B.position) || "bottom-right",
|
|
69
|
+
theme: {
|
|
70
|
+
background: ((_D = (_C = config.widget) == null ? void 0 : _C.theme) == null ? void 0 : _D.background) || "rgba(18, 18, 24, 0.9)",
|
|
71
|
+
text: ((_F = (_E = config.widget) == null ? void 0 : _E.theme) == null ? void 0 : _F.text) || "#C0C0C5",
|
|
72
|
+
accent: ((_H = (_G = config.widget) == null ? void 0 : _G.theme) == null ? void 0 : _H.accent) || "#E8E8EA",
|
|
73
|
+
badge: ((_J = (_I = config.widget) == null ? void 0 : _I.theme) == null ? void 0 : _J.badge) || "#4ADE80"
|
|
74
|
+
},
|
|
75
|
+
humanLabel: ((_K = config.widget) == null ? void 0 : _K.humanLabel) || "Human",
|
|
76
|
+
aiLabel: ((_L = config.widget) == null ? void 0 : _L.aiLabel) || "AI",
|
|
77
|
+
showBadge: ((_M = config.widget) == null ? void 0 : _M.showBadge) !== false
|
|
78
|
+
}
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
function parseFrontmatter(content) {
|
|
82
|
+
const frontmatterMatch = content.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)/);
|
|
83
|
+
if (frontmatterMatch) {
|
|
84
|
+
const frontmatterStr = frontmatterMatch[1];
|
|
85
|
+
const contentWithoutFrontmatter = frontmatterMatch[2];
|
|
86
|
+
const frontmatter = {};
|
|
87
|
+
const lines = frontmatterStr.split("\n");
|
|
88
|
+
for (const line of lines) {
|
|
89
|
+
const [key, ...valueParts] = line.split(":");
|
|
90
|
+
if (key && valueParts.length > 0) {
|
|
91
|
+
const value = valueParts.join(":").trim();
|
|
92
|
+
frontmatter[key.trim()] = value.replace(/^["']|["']$/g, "");
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return { frontmatter, content: contentWithoutFrontmatter };
|
|
96
|
+
}
|
|
97
|
+
return { frontmatter: {}, content };
|
|
98
|
+
}
|
|
99
|
+
function bumpHeadings(content, levels = 1) {
|
|
100
|
+
return content.replace(/^(#{1,6})\s/gm, (match, hashes) => {
|
|
101
|
+
const newLevel = Math.min(hashes.length + levels, 6);
|
|
102
|
+
return "#".repeat(newLevel) + " ";
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
function extractTitle(content) {
|
|
106
|
+
const h1Match = content.match(/^#\s+(.+)$/m);
|
|
107
|
+
if (h1Match) return h1Match[1];
|
|
108
|
+
const h2Match = content.match(/^##\s+(.+)$/m);
|
|
109
|
+
if (h2Match) return h2Match[1];
|
|
110
|
+
const firstLine = content.split("\n")[0];
|
|
111
|
+
return firstLine.slice(0, 100);
|
|
112
|
+
}
|
|
113
|
+
function readPackageJson(projectRoot = process.cwd()) {
|
|
114
|
+
const packageJsonPath = join(projectRoot, "package.json");
|
|
115
|
+
if (!existsSync(packageJsonPath)) {
|
|
116
|
+
return {};
|
|
117
|
+
}
|
|
118
|
+
try {
|
|
119
|
+
const content = readFileSync(packageJsonPath, "utf-8");
|
|
120
|
+
return JSON.parse(content);
|
|
121
|
+
} catch {
|
|
122
|
+
return {};
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// src/core/detect.ts
|
|
127
|
+
function detectFramework(projectRoot = process.cwd()) {
|
|
128
|
+
const packageJson = readPackageJson(projectRoot);
|
|
129
|
+
const dependencies = {
|
|
130
|
+
...packageJson.dependencies,
|
|
131
|
+
...packageJson.devDependencies
|
|
132
|
+
};
|
|
133
|
+
if (dependencies["next"]) {
|
|
134
|
+
return {
|
|
135
|
+
framework: "next",
|
|
136
|
+
contentDir: "app",
|
|
137
|
+
outDir: "out"
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
if (dependencies["nuxt"] || dependencies["@nuxt/kit"]) {
|
|
141
|
+
return {
|
|
142
|
+
framework: "nuxt",
|
|
143
|
+
contentDir: "content",
|
|
144
|
+
outDir: ".output/public"
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
if (dependencies["astro"] || dependencies["@astrojs/astro"]) {
|
|
148
|
+
return {
|
|
149
|
+
framework: "astro",
|
|
150
|
+
contentDir: "src/content",
|
|
151
|
+
outDir: "dist"
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
if (dependencies["@remix-run/dev"]) {
|
|
155
|
+
return {
|
|
156
|
+
framework: "remix",
|
|
157
|
+
contentDir: "app",
|
|
158
|
+
outDir: "build/client"
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
if (dependencies["@sveltejs/kit"]) {
|
|
162
|
+
return {
|
|
163
|
+
framework: "sveltekit",
|
|
164
|
+
contentDir: "src",
|
|
165
|
+
outDir: "build"
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
if (dependencies["@angular/core"]) {
|
|
169
|
+
return {
|
|
170
|
+
framework: "angular",
|
|
171
|
+
contentDir: "src",
|
|
172
|
+
outDir: "dist"
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
if (dependencies["@docusaurus/core"]) {
|
|
176
|
+
return {
|
|
177
|
+
framework: "docusaurus",
|
|
178
|
+
contentDir: "docs",
|
|
179
|
+
outDir: "build"
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
if (dependencies["vite"]) {
|
|
183
|
+
return {
|
|
184
|
+
framework: "vite",
|
|
185
|
+
contentDir: "src",
|
|
186
|
+
outDir: "dist"
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
return {
|
|
190
|
+
framework: "unknown",
|
|
191
|
+
contentDir: "src",
|
|
192
|
+
outDir: "dist"
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// src/core/robots.ts
|
|
197
|
+
var AI_CRAWLERS = [
|
|
198
|
+
"GPTBot",
|
|
199
|
+
"OAI-SearchBot",
|
|
200
|
+
"ChatGPT-User",
|
|
201
|
+
"ClaudeBot",
|
|
202
|
+
"Claude-Web",
|
|
203
|
+
"anthropic-ai",
|
|
204
|
+
"PerplexityBot",
|
|
205
|
+
"Google-Extended",
|
|
206
|
+
"Gemini-Deep-Research",
|
|
207
|
+
"Bingbot",
|
|
208
|
+
"FacebookBot",
|
|
209
|
+
"meta-externalagent",
|
|
210
|
+
"Amazonbot",
|
|
211
|
+
"Applebot",
|
|
212
|
+
"DeepSeekBot",
|
|
213
|
+
"Bytespider",
|
|
214
|
+
"cohere-ai",
|
|
215
|
+
"CCBot",
|
|
216
|
+
"DiffBot",
|
|
217
|
+
"YouBot",
|
|
218
|
+
"FirecrawlAgent",
|
|
219
|
+
"Crawl4AI",
|
|
220
|
+
"BraveBot",
|
|
221
|
+
"SemrushBot",
|
|
222
|
+
"AhrefsBot",
|
|
223
|
+
"MJ12bot",
|
|
224
|
+
"DotBot",
|
|
225
|
+
"DataForSeoBot",
|
|
226
|
+
"Screaming Frog SEO Spider",
|
|
227
|
+
"SEOkicks",
|
|
228
|
+
"SEMrushBot",
|
|
229
|
+
"BLEXBot",
|
|
230
|
+
"Yandex",
|
|
231
|
+
"Baiduspider",
|
|
232
|
+
"Sogou",
|
|
233
|
+
"Exabot",
|
|
234
|
+
"facebookexternalhit",
|
|
235
|
+
"LinkedInBot",
|
|
236
|
+
"WhatsApp",
|
|
237
|
+
"Slackbot",
|
|
238
|
+
"TwitterBot",
|
|
239
|
+
"TelegramBot",
|
|
240
|
+
"Discordbot",
|
|
241
|
+
"PinterestBot",
|
|
242
|
+
"TumblrBot",
|
|
243
|
+
"ViberBot",
|
|
244
|
+
"SkypeUriPreview",
|
|
245
|
+
"redditbot",
|
|
246
|
+
"Snapchat",
|
|
247
|
+
"TikTok"
|
|
248
|
+
];
|
|
249
|
+
function generateRobotsTxt(config) {
|
|
250
|
+
const lines = [
|
|
251
|
+
"# robots.txt generated by aeo.js",
|
|
252
|
+
"# Allow AI crawlers to index this site",
|
|
253
|
+
"",
|
|
254
|
+
"# Traditional search engines",
|
|
255
|
+
"User-agent: Googlebot",
|
|
256
|
+
"Allow: /",
|
|
257
|
+
"",
|
|
258
|
+
"User-agent: Bingbot",
|
|
259
|
+
"Allow: /",
|
|
260
|
+
"",
|
|
261
|
+
"# AI crawlers and answer engines"
|
|
262
|
+
];
|
|
263
|
+
for (const crawler of AI_CRAWLERS) {
|
|
264
|
+
lines.push(`User-agent: ${crawler}`);
|
|
265
|
+
lines.push("Allow: /");
|
|
266
|
+
lines.push("");
|
|
267
|
+
}
|
|
268
|
+
lines.push("# Default for all other bots");
|
|
269
|
+
lines.push("User-agent: *");
|
|
270
|
+
lines.push("Allow: /");
|
|
271
|
+
lines.push("");
|
|
272
|
+
if (config.url) {
|
|
273
|
+
lines.push(`Sitemap: ${config.url}/sitemap.xml`);
|
|
274
|
+
}
|
|
275
|
+
lines.push("");
|
|
276
|
+
lines.push("# AEO (Answer Engine Optimization) files");
|
|
277
|
+
lines.push("# These help LLMs understand your content better");
|
|
278
|
+
lines.push(`# ${config.url}/llms.txt`);
|
|
279
|
+
lines.push(`# ${config.url}/llms-full.txt`);
|
|
280
|
+
lines.push(`# ${config.url}/docs.json`);
|
|
281
|
+
lines.push(`# ${config.url}/ai-index.json`);
|
|
282
|
+
return lines.join("\n");
|
|
283
|
+
}
|
|
284
|
+
function collectMarkdownFiles(dir, base = dir) {
|
|
285
|
+
const files = [];
|
|
286
|
+
try {
|
|
287
|
+
const entries = readdirSync(dir);
|
|
288
|
+
for (const entry of entries) {
|
|
289
|
+
const fullPath = join(dir, entry);
|
|
290
|
+
const stat = statSync(fullPath);
|
|
291
|
+
if (stat.isDirectory() && !entry.startsWith(".") && entry !== "node_modules") {
|
|
292
|
+
files.push(...collectMarkdownFiles(fullPath, base));
|
|
293
|
+
} else if (stat.isFile() && (extname(entry) === ".md" || extname(entry) === ".mdx")) {
|
|
294
|
+
const content = readFileSync(fullPath, "utf-8");
|
|
295
|
+
const { frontmatter, content: mainContent } = parseFrontmatter(content);
|
|
296
|
+
const relativePath = relative(base, fullPath);
|
|
297
|
+
files.push({
|
|
298
|
+
path: relativePath,
|
|
299
|
+
content: mainContent,
|
|
300
|
+
title: frontmatter.title || extractTitle(mainContent),
|
|
301
|
+
description: frontmatter.description,
|
|
302
|
+
frontmatter
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
} catch (error) {
|
|
307
|
+
console.warn(`Warning: Could not read directory ${dir}:`, error);
|
|
308
|
+
}
|
|
309
|
+
return files;
|
|
310
|
+
}
|
|
311
|
+
function generateLlmsTxt(config) {
|
|
312
|
+
const lines = [
|
|
313
|
+
`# ${config.title}`,
|
|
314
|
+
""
|
|
315
|
+
];
|
|
316
|
+
if (config.description) {
|
|
317
|
+
lines.push(`> ${config.description}`);
|
|
318
|
+
lines.push("");
|
|
319
|
+
}
|
|
320
|
+
lines.push("## About");
|
|
321
|
+
lines.push("");
|
|
322
|
+
lines.push("This file provides a structured overview of the documentation and content available on this site,");
|
|
323
|
+
lines.push("optimized for consumption by Large Language Models (LLMs) and AI assistants.");
|
|
324
|
+
lines.push("");
|
|
325
|
+
if (config.pages && config.pages.length > 0) {
|
|
326
|
+
lines.push("## Pages");
|
|
327
|
+
lines.push("");
|
|
328
|
+
for (const page of config.pages) {
|
|
329
|
+
const url = `${config.url}${page.pathname === "/" ? "" : page.pathname}`;
|
|
330
|
+
const title = page.title || page.pathname;
|
|
331
|
+
lines.push(`- [${title}](${url})`);
|
|
332
|
+
if (page.description) {
|
|
333
|
+
lines.push(` ${page.description}`);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
lines.push("");
|
|
337
|
+
}
|
|
338
|
+
const markdownFiles = collectMarkdownFiles(config.contentDir);
|
|
339
|
+
if (markdownFiles.length > 0) {
|
|
340
|
+
lines.push("## Documentation");
|
|
341
|
+
lines.push("");
|
|
342
|
+
const grouped = {};
|
|
343
|
+
for (const file of markdownFiles) {
|
|
344
|
+
const dir = file.path.split("/")[0] || "root";
|
|
345
|
+
if (!grouped[dir]) grouped[dir] = [];
|
|
346
|
+
grouped[dir].push(file);
|
|
347
|
+
}
|
|
348
|
+
for (const [dir, files] of Object.entries(grouped)) {
|
|
349
|
+
lines.push(`### ${dir === "root" ? "Main Documentation" : dir}`);
|
|
350
|
+
lines.push("");
|
|
351
|
+
for (const file of files) {
|
|
352
|
+
const url = `${config.url}/${file.path.replace(/\.mdx?$/, "")}`;
|
|
353
|
+
lines.push(`- [${file.title}](${url})`);
|
|
354
|
+
if (file.description) {
|
|
355
|
+
lines.push(` ${file.description}`);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
lines.push("");
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
lines.push("## Quick Links");
|
|
362
|
+
lines.push("");
|
|
363
|
+
lines.push(`- Full Documentation: ${config.url}/llms-full.txt`);
|
|
364
|
+
lines.push(`- Documentation Manifest: ${config.url}/docs.json`);
|
|
365
|
+
lines.push(`- AI-Optimized Index: ${config.url}/ai-index.json`);
|
|
366
|
+
lines.push(`- Sitemap: ${config.url}/sitemap.xml`);
|
|
367
|
+
lines.push("");
|
|
368
|
+
lines.push("## For LLMs");
|
|
369
|
+
lines.push("");
|
|
370
|
+
lines.push("To get the complete documentation in a single file, request:");
|
|
371
|
+
lines.push(`${config.url}/llms-full.txt`);
|
|
372
|
+
lines.push("");
|
|
373
|
+
lines.push("For structured access to individual pages with metadata:");
|
|
374
|
+
lines.push(`${config.url}/docs.json`);
|
|
375
|
+
lines.push("");
|
|
376
|
+
lines.push("For RAG (Retrieval Augmented Generation) systems:");
|
|
377
|
+
lines.push(`${config.url}/ai-index.json`);
|
|
378
|
+
lines.push("");
|
|
379
|
+
lines.push("---");
|
|
380
|
+
lines.push("Generated by aeo.js - Answer Engine Optimization for the modern web");
|
|
381
|
+
lines.push("Learn more at https://aeojs.org");
|
|
382
|
+
return lines.join("\n");
|
|
383
|
+
}
|
|
384
|
+
function collectAndConcatenateMarkdown(dir, base = dir) {
|
|
385
|
+
const sections = [];
|
|
386
|
+
try {
|
|
387
|
+
const entries = readdirSync(dir).sort();
|
|
388
|
+
for (const entry of entries) {
|
|
389
|
+
const fullPath = join(dir, entry);
|
|
390
|
+
const stat = statSync(fullPath);
|
|
391
|
+
if (stat.isDirectory() && !entry.startsWith(".") && entry !== "node_modules") {
|
|
392
|
+
const subSections = collectAndConcatenateMarkdown(fullPath, base);
|
|
393
|
+
if (subSections.length > 0) {
|
|
394
|
+
sections.push(...subSections);
|
|
395
|
+
}
|
|
396
|
+
} else if (stat.isFile() && (extname(entry) === ".md" || extname(entry) === ".mdx")) {
|
|
397
|
+
const content = readFileSync(fullPath, "utf-8");
|
|
398
|
+
const { frontmatter, content: mainContent } = parseFrontmatter(content);
|
|
399
|
+
const relativePath = relative(base, fullPath);
|
|
400
|
+
const sectionLines = [
|
|
401
|
+
"---",
|
|
402
|
+
"",
|
|
403
|
+
`# ${frontmatter.title || relativePath}`,
|
|
404
|
+
"",
|
|
405
|
+
`Source: ${relativePath}`,
|
|
406
|
+
""
|
|
407
|
+
];
|
|
408
|
+
if (frontmatter.description) {
|
|
409
|
+
sectionLines.push(`> ${frontmatter.description}`);
|
|
410
|
+
sectionLines.push("");
|
|
411
|
+
}
|
|
412
|
+
const bumpedContent = bumpHeadings(mainContent, 1);
|
|
413
|
+
sectionLines.push(bumpedContent);
|
|
414
|
+
sectionLines.push("");
|
|
415
|
+
sections.push(sectionLines.join("\n"));
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
} catch (error) {
|
|
419
|
+
console.warn(`Warning: Could not read directory ${dir}:`, error);
|
|
420
|
+
}
|
|
421
|
+
return sections;
|
|
422
|
+
}
|
|
423
|
+
function generateLlmsFullTxt(config) {
|
|
424
|
+
const lines = [
|
|
425
|
+
`# ${config.title} - Complete Documentation`,
|
|
426
|
+
"",
|
|
427
|
+
`This file contains all documentation concatenated into a single file for easy consumption by LLMs.`,
|
|
428
|
+
""
|
|
429
|
+
];
|
|
430
|
+
if (config.description) {
|
|
431
|
+
lines.push(`> ${config.description}`);
|
|
432
|
+
lines.push("");
|
|
433
|
+
}
|
|
434
|
+
lines.push("## Table of Contents");
|
|
435
|
+
lines.push("");
|
|
436
|
+
lines.push("This document includes all content from this project.");
|
|
437
|
+
lines.push("Each section is separated by a horizontal rule (---) for easy parsing.");
|
|
438
|
+
lines.push("");
|
|
439
|
+
let hasContent = false;
|
|
440
|
+
if (config.pages && config.pages.length > 0) {
|
|
441
|
+
for (const page of config.pages) {
|
|
442
|
+
const url = `${config.url}${page.pathname === "/" ? "" : page.pathname}`;
|
|
443
|
+
const title = page.title || page.pathname;
|
|
444
|
+
const sectionLines = [
|
|
445
|
+
"---",
|
|
446
|
+
"",
|
|
447
|
+
`# ${title}`,
|
|
448
|
+
"",
|
|
449
|
+
`URL: ${url}`,
|
|
450
|
+
""
|
|
451
|
+
];
|
|
452
|
+
if (page.description) {
|
|
453
|
+
sectionLines.push(`> ${page.description}`);
|
|
454
|
+
sectionLines.push("");
|
|
455
|
+
}
|
|
456
|
+
if (page.content) {
|
|
457
|
+
sectionLines.push(page.content);
|
|
458
|
+
sectionLines.push("");
|
|
459
|
+
}
|
|
460
|
+
lines.push(sectionLines.join("\n"));
|
|
461
|
+
hasContent = true;
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
const sections = collectAndConcatenateMarkdown(config.contentDir);
|
|
465
|
+
if (sections.length > 0) {
|
|
466
|
+
lines.push(...sections);
|
|
467
|
+
hasContent = true;
|
|
468
|
+
}
|
|
469
|
+
if (!hasContent) {
|
|
470
|
+
lines.push("---");
|
|
471
|
+
lines.push("");
|
|
472
|
+
lines.push(`# ${config.title}`);
|
|
473
|
+
lines.push("");
|
|
474
|
+
lines.push(`URL: ${config.url}`);
|
|
475
|
+
lines.push("");
|
|
476
|
+
if (config.description) {
|
|
477
|
+
lines.push(config.description);
|
|
478
|
+
lines.push("");
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
lines.push("---");
|
|
482
|
+
lines.push("");
|
|
483
|
+
lines.push("## About This Document");
|
|
484
|
+
lines.push("");
|
|
485
|
+
lines.push("This concatenated documentation file is generated automatically by aeo.js");
|
|
486
|
+
lines.push("to make it easier for AI systems to understand the complete context of this project.");
|
|
487
|
+
lines.push("");
|
|
488
|
+
lines.push(`For a structured index, see: ${config.url}/llms.txt`);
|
|
489
|
+
lines.push(`For individual files, see: ${config.url}/docs.json`);
|
|
490
|
+
lines.push("");
|
|
491
|
+
lines.push("Generated by aeo.js - https://aeojs.org");
|
|
492
|
+
return lines.join("\n");
|
|
493
|
+
}
|
|
494
|
+
function ensureDir(path) {
|
|
495
|
+
mkdirSync(path, { recursive: true });
|
|
496
|
+
}
|
|
497
|
+
function copyMarkdownFiles(config) {
|
|
498
|
+
const copiedFiles = [];
|
|
499
|
+
function copyRecursive(dir, base = config.contentDir) {
|
|
500
|
+
try {
|
|
501
|
+
const entries = readdirSync(dir);
|
|
502
|
+
for (const entry of entries) {
|
|
503
|
+
const fullPath = join(dir, entry);
|
|
504
|
+
const stat = statSync(fullPath);
|
|
505
|
+
if (stat.isDirectory() && !entry.startsWith(".") && entry !== "node_modules") {
|
|
506
|
+
copyRecursive(fullPath, base);
|
|
507
|
+
} else if (stat.isFile() && extname(entry) === ".md") {
|
|
508
|
+
const relativePath = relative(base, fullPath);
|
|
509
|
+
const destPath = join(config.outDir, relativePath);
|
|
510
|
+
ensureDir(dirname(destPath));
|
|
511
|
+
try {
|
|
512
|
+
copyFileSync(fullPath, destPath);
|
|
513
|
+
copiedFiles.push({
|
|
514
|
+
source: fullPath,
|
|
515
|
+
destination: destPath
|
|
516
|
+
});
|
|
517
|
+
} catch (error) {
|
|
518
|
+
console.warn(`Warning: Could not copy ${fullPath}:`, error);
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
} catch (error) {
|
|
523
|
+
console.warn(`Warning: Could not read directory ${dir}:`, error);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
copyRecursive(config.contentDir);
|
|
527
|
+
return copiedFiles;
|
|
528
|
+
}
|
|
529
|
+
function generatePageMarkdownFiles(config) {
|
|
530
|
+
const generated = [];
|
|
531
|
+
const pages = config.pages || [];
|
|
532
|
+
for (const page of pages) {
|
|
533
|
+
if (!page.content) continue;
|
|
534
|
+
const pageTitle = page.title || (page.pathname === "/" ? config.title : void 0);
|
|
535
|
+
let filename;
|
|
536
|
+
if (page.pathname === "/") {
|
|
537
|
+
filename = "index.md";
|
|
538
|
+
} else {
|
|
539
|
+
const clean = page.pathname.replace(/^\//, "").replace(/\/$/, "");
|
|
540
|
+
filename = clean.includes("/") ? `${clean}.md` : `${clean}.md`;
|
|
541
|
+
}
|
|
542
|
+
const destPath = join(config.outDir, filename);
|
|
543
|
+
const pageUrl = page.pathname === "/" ? config.url : `${config.url.replace(/\/$/, "")}${page.pathname}`;
|
|
544
|
+
const lines = [];
|
|
545
|
+
lines.push("---");
|
|
546
|
+
if (pageTitle) lines.push(`title: "${pageTitle}"`);
|
|
547
|
+
if (page.description) lines.push(`description: "${page.description}"`);
|
|
548
|
+
lines.push(`url: ${pageUrl}`);
|
|
549
|
+
lines.push(`source: ${pageUrl}`);
|
|
550
|
+
lines.push(`generated_by: aeo.js`);
|
|
551
|
+
lines.push("---", "");
|
|
552
|
+
if (pageTitle) {
|
|
553
|
+
lines.push(`# ${pageTitle}`, "");
|
|
554
|
+
}
|
|
555
|
+
if (page.description) {
|
|
556
|
+
lines.push(`${page.description}`, "");
|
|
557
|
+
}
|
|
558
|
+
if (page.content) {
|
|
559
|
+
lines.push(page.content);
|
|
560
|
+
}
|
|
561
|
+
const content = lines.join("\n");
|
|
562
|
+
ensureDir(dirname(destPath));
|
|
563
|
+
try {
|
|
564
|
+
writeFileSync(destPath, content, "utf-8");
|
|
565
|
+
generated.push({ pathname: page.pathname, destination: destPath });
|
|
566
|
+
} catch {
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
return generated;
|
|
570
|
+
}
|
|
571
|
+
function collectManifestEntries(dir, config, base = dir) {
|
|
572
|
+
const entries = [];
|
|
573
|
+
try {
|
|
574
|
+
const files = readdirSync(dir);
|
|
575
|
+
for (const file of files) {
|
|
576
|
+
const fullPath = join(dir, file);
|
|
577
|
+
const stat = statSync(fullPath);
|
|
578
|
+
if (stat.isDirectory() && !file.startsWith(".") && file !== "node_modules") {
|
|
579
|
+
entries.push(...collectManifestEntries(fullPath, config, base));
|
|
580
|
+
} else if (stat.isFile() && (extname(file) === ".md" || extname(file) === ".mdx")) {
|
|
581
|
+
const content = readFileSync(fullPath, "utf-8");
|
|
582
|
+
const { frontmatter, content: mainContent } = parseFrontmatter(content);
|
|
583
|
+
const relativePath = relative(base, fullPath);
|
|
584
|
+
const urlPath = relativePath.replace(/\.mdx?$/, "");
|
|
585
|
+
entries.push({
|
|
586
|
+
url: `${config.url}/${urlPath}`,
|
|
587
|
+
title: frontmatter.title || extractTitle(mainContent),
|
|
588
|
+
description: frontmatter.description,
|
|
589
|
+
lastModified: stat.mtime.toISOString()
|
|
590
|
+
});
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
} catch (error) {
|
|
594
|
+
console.warn(`Warning: Could not read directory ${dir}:`, error);
|
|
595
|
+
}
|
|
596
|
+
return entries;
|
|
597
|
+
}
|
|
598
|
+
function generateManifest(config) {
|
|
599
|
+
const entries = [];
|
|
600
|
+
if (config.pages && config.pages.length > 0) {
|
|
601
|
+
for (const page of config.pages) {
|
|
602
|
+
entries.push({
|
|
603
|
+
url: `${config.url}${page.pathname === "/" ? "" : page.pathname}`,
|
|
604
|
+
title: page.title || page.pathname,
|
|
605
|
+
description: page.description
|
|
606
|
+
});
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
entries.push(...collectManifestEntries(config.contentDir, config));
|
|
610
|
+
const manifest = {
|
|
611
|
+
version: "1.0",
|
|
612
|
+
generated: (/* @__PURE__ */ new Date()).toISOString(),
|
|
613
|
+
site: {
|
|
614
|
+
title: config.title,
|
|
615
|
+
description: config.description,
|
|
616
|
+
url: config.url
|
|
617
|
+
},
|
|
618
|
+
documents: entries.sort((a, b) => a.url.localeCompare(b.url)),
|
|
619
|
+
metadata: {
|
|
620
|
+
totalDocuments: entries.length,
|
|
621
|
+
generator: "aeo.js",
|
|
622
|
+
generatorUrl: "https://aeojs.org"
|
|
623
|
+
}
|
|
624
|
+
};
|
|
625
|
+
return JSON.stringify(manifest, null, 2);
|
|
626
|
+
}
|
|
627
|
+
function collectUrls(dir, config, base = dir) {
|
|
628
|
+
const urls = [];
|
|
629
|
+
try {
|
|
630
|
+
const entries = readdirSync(dir);
|
|
631
|
+
for (const entry of entries) {
|
|
632
|
+
const fullPath = join(dir, entry);
|
|
633
|
+
const stat = statSync(fullPath);
|
|
634
|
+
if (stat.isDirectory() && !entry.startsWith(".") && entry !== "node_modules") {
|
|
635
|
+
urls.push(...collectUrls(fullPath, config, base));
|
|
636
|
+
} else if (stat.isFile() && (extname(entry) === ".md" || extname(entry) === ".mdx" || extname(entry) === ".html")) {
|
|
637
|
+
const relativePath = relative(base, fullPath);
|
|
638
|
+
const urlPath = relativePath.replace(/\.(md|mdx|html)$/, "");
|
|
639
|
+
urls.push(`${config.url}/${urlPath}`);
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
} catch (error) {
|
|
643
|
+
console.warn(`Warning: Could not read directory ${dir}:`, error);
|
|
644
|
+
}
|
|
645
|
+
return urls;
|
|
646
|
+
}
|
|
647
|
+
function escapeXml(str) {
|
|
648
|
+
return str.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
649
|
+
}
|
|
650
|
+
function generateSitemap(config) {
|
|
651
|
+
const urls = [];
|
|
652
|
+
if (config.pages && config.pages.length > 0) {
|
|
653
|
+
for (const page of config.pages) {
|
|
654
|
+
urls.push(`${config.url}${page.pathname === "/" ? "" : page.pathname}`);
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
if (config.contentDir) {
|
|
658
|
+
urls.push(...collectUrls(config.contentDir, config));
|
|
659
|
+
}
|
|
660
|
+
const lines = [
|
|
661
|
+
'<?xml version="1.0" encoding="UTF-8"?>',
|
|
662
|
+
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
|
|
663
|
+
];
|
|
664
|
+
urls.push(config.url);
|
|
665
|
+
const uniqueUrls = [...new Set(urls)].sort();
|
|
666
|
+
for (const url of uniqueUrls) {
|
|
667
|
+
lines.push(" <url>");
|
|
668
|
+
lines.push(` <loc>${escapeXml(url)}</loc>`);
|
|
669
|
+
lines.push(` <lastmod>${(/* @__PURE__ */ new Date()).toISOString().split("T")[0]}</lastmod>`);
|
|
670
|
+
lines.push(" <changefreq>weekly</changefreq>");
|
|
671
|
+
lines.push(" <priority>0.8</priority>");
|
|
672
|
+
lines.push(" </url>");
|
|
673
|
+
}
|
|
674
|
+
lines.push("</urlset>");
|
|
675
|
+
return lines.join("\n");
|
|
676
|
+
}
|
|
677
|
+
function extractKeywords(content) {
|
|
678
|
+
const words = content.toLowerCase().replace(/[^a-z0-9\s]/g, " ").split(/\s+/).filter((word) => word.length > 3);
|
|
679
|
+
const wordCount = {};
|
|
680
|
+
for (const word of words) {
|
|
681
|
+
wordCount[word] = (wordCount[word] || 0) + 1;
|
|
682
|
+
}
|
|
683
|
+
return Object.entries(wordCount).sort((a, b) => b[1] - a[1]).slice(0, 10).map(([word]) => word);
|
|
684
|
+
}
|
|
685
|
+
function chunkContent(content, maxLength = 2e3) {
|
|
686
|
+
const chunks = [];
|
|
687
|
+
const paragraphs = content.split("\n\n");
|
|
688
|
+
let currentChunk = "";
|
|
689
|
+
for (const paragraph of paragraphs) {
|
|
690
|
+
if (currentChunk.length + paragraph.length > maxLength && currentChunk.length > 0) {
|
|
691
|
+
chunks.push(currentChunk.trim());
|
|
692
|
+
currentChunk = "";
|
|
693
|
+
}
|
|
694
|
+
currentChunk += paragraph + "\n\n";
|
|
695
|
+
}
|
|
696
|
+
if (currentChunk.trim()) {
|
|
697
|
+
chunks.push(currentChunk.trim());
|
|
698
|
+
}
|
|
699
|
+
return chunks;
|
|
700
|
+
}
|
|
701
|
+
function collectAIIndexEntries(dir, config, base = dir) {
|
|
702
|
+
const entries = [];
|
|
703
|
+
try {
|
|
704
|
+
const files = readdirSync(dir);
|
|
705
|
+
for (const file of files) {
|
|
706
|
+
const fullPath = join(dir, file);
|
|
707
|
+
const stat = statSync(fullPath);
|
|
708
|
+
if (stat.isDirectory() && !file.startsWith(".") && file !== "node_modules") {
|
|
709
|
+
entries.push(...collectAIIndexEntries(fullPath, config, base));
|
|
710
|
+
} else if (stat.isFile() && (extname(file) === ".md" || extname(file) === ".mdx")) {
|
|
711
|
+
const content = readFileSync(fullPath, "utf-8");
|
|
712
|
+
const { frontmatter, content: mainContent } = parseFrontmatter(content);
|
|
713
|
+
const relativePath = relative(base, fullPath);
|
|
714
|
+
const urlPath = relativePath.replace(/\.mdx?$/, "");
|
|
715
|
+
const url = `${config.url}/${urlPath}`;
|
|
716
|
+
const chunks = chunkContent(mainContent);
|
|
717
|
+
const title = frontmatter.title || extractTitle(mainContent);
|
|
718
|
+
const keywords = extractKeywords(mainContent);
|
|
719
|
+
chunks.forEach((chunk, index) => {
|
|
720
|
+
const id = createHash("sha256").update(`${url}-${index}`).digest("hex").slice(0, 16);
|
|
721
|
+
entries.push({
|
|
722
|
+
id,
|
|
723
|
+
url,
|
|
724
|
+
title: chunks.length > 1 ? `${title} (Part ${index + 1})` : title,
|
|
725
|
+
content: chunk,
|
|
726
|
+
description: frontmatter.description,
|
|
727
|
+
keywords,
|
|
728
|
+
metadata: {
|
|
729
|
+
...frontmatter,
|
|
730
|
+
chunkIndex: index,
|
|
731
|
+
totalChunks: chunks.length,
|
|
732
|
+
sourcePath: relativePath
|
|
733
|
+
}
|
|
734
|
+
});
|
|
735
|
+
});
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
} catch (error) {
|
|
739
|
+
console.warn(`Warning: Could not read directory ${dir}:`, error);
|
|
740
|
+
}
|
|
741
|
+
return entries;
|
|
742
|
+
}
|
|
743
|
+
function generateAIIndex(config) {
|
|
744
|
+
const entries = [];
|
|
745
|
+
if (config.pages && config.pages.length > 0) {
|
|
746
|
+
for (const page of config.pages) {
|
|
747
|
+
const url = `${config.url}${page.pathname === "/" ? "" : page.pathname}`;
|
|
748
|
+
const title = page.title || page.pathname;
|
|
749
|
+
const content = page.content || "";
|
|
750
|
+
if (content) {
|
|
751
|
+
const chunks = chunkContent(content);
|
|
752
|
+
const keywords = extractKeywords(content);
|
|
753
|
+
chunks.forEach((chunk, index2) => {
|
|
754
|
+
const id = createHash("sha256").update(`${url}-${index2}`).digest("hex").slice(0, 16);
|
|
755
|
+
entries.push({
|
|
756
|
+
id,
|
|
757
|
+
url,
|
|
758
|
+
title: chunks.length > 1 ? `${title} (Part ${index2 + 1})` : title,
|
|
759
|
+
content: chunk,
|
|
760
|
+
description: page.description,
|
|
761
|
+
keywords,
|
|
762
|
+
metadata: {
|
|
763
|
+
chunkIndex: index2,
|
|
764
|
+
totalChunks: chunks.length,
|
|
765
|
+
sourcePath: page.pathname
|
|
766
|
+
}
|
|
767
|
+
});
|
|
768
|
+
});
|
|
769
|
+
} else {
|
|
770
|
+
const id = createHash("sha256").update(url).digest("hex").slice(0, 16);
|
|
771
|
+
entries.push({
|
|
772
|
+
id,
|
|
773
|
+
url,
|
|
774
|
+
title,
|
|
775
|
+
content: page.description || title,
|
|
776
|
+
description: page.description,
|
|
777
|
+
keywords: []
|
|
778
|
+
});
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
entries.push(...collectAIIndexEntries(config.contentDir, config));
|
|
783
|
+
const index = {
|
|
784
|
+
version: "1.0",
|
|
785
|
+
generated: (/* @__PURE__ */ new Date()).toISOString(),
|
|
786
|
+
site: {
|
|
787
|
+
title: config.title,
|
|
788
|
+
description: config.description,
|
|
789
|
+
url: config.url
|
|
790
|
+
},
|
|
791
|
+
entries: entries.sort((a, b) => a.id.localeCompare(b.id)),
|
|
792
|
+
metadata: {
|
|
793
|
+
totalEntries: entries.length,
|
|
794
|
+
generator: "aeo.js",
|
|
795
|
+
generatorUrl: "https://aeojs.org",
|
|
796
|
+
embedding: {
|
|
797
|
+
recommended: "text-embedding-ada-002",
|
|
798
|
+
dimensions: 1536
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
};
|
|
802
|
+
return JSON.stringify(index, null, 2);
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
// src/core/schema.ts
|
|
806
|
+
function generateSchema(config) {
|
|
807
|
+
const output = generateSchemaObjects(config);
|
|
808
|
+
return JSON.stringify(output, null, 2);
|
|
809
|
+
}
|
|
810
|
+
function generateSchemaObjects(config) {
|
|
811
|
+
const siteSchemas = generateSiteSchemas(config);
|
|
812
|
+
const pageSchemas = {};
|
|
813
|
+
for (const page of config.pages) {
|
|
814
|
+
const schemas = generatePageSchemas(page, config);
|
|
815
|
+
if (schemas.length > 0) {
|
|
816
|
+
pageSchemas[page.pathname] = schemas;
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
return { site: siteSchemas, pages: pageSchemas };
|
|
820
|
+
}
|
|
821
|
+
function generateSiteSchemas(config) {
|
|
822
|
+
const schemas = [];
|
|
823
|
+
schemas.push({
|
|
824
|
+
"@context": "https://schema.org",
|
|
825
|
+
"@type": "WebSite",
|
|
826
|
+
name: config.title,
|
|
827
|
+
description: config.description || void 0,
|
|
828
|
+
url: config.url
|
|
829
|
+
});
|
|
830
|
+
const org = config.schema.organization;
|
|
831
|
+
if (org.name || org.sameAs.length > 0) {
|
|
832
|
+
const orgSchema = {
|
|
833
|
+
"@context": "https://schema.org",
|
|
834
|
+
"@type": "Organization",
|
|
835
|
+
name: org.name,
|
|
836
|
+
url: org.url
|
|
837
|
+
};
|
|
838
|
+
if (org.logo) orgSchema.logo = org.logo;
|
|
839
|
+
if (org.sameAs.length > 0) orgSchema.sameAs = org.sameAs;
|
|
840
|
+
schemas.push(orgSchema);
|
|
841
|
+
}
|
|
842
|
+
return schemas;
|
|
843
|
+
}
|
|
844
|
+
function generatePageSchemas(page, config) {
|
|
845
|
+
const schemas = [];
|
|
846
|
+
const pageUrl = page.pathname === "/" ? config.url : `${config.url.replace(/\/$/, "")}${page.pathname}`;
|
|
847
|
+
const faqItems = detectFaqPatterns(page.content || "");
|
|
848
|
+
if (faqItems.length > 0) {
|
|
849
|
+
schemas.push({
|
|
850
|
+
"@context": "https://schema.org",
|
|
851
|
+
"@type": "FAQPage",
|
|
852
|
+
mainEntity: faqItems.map(({ question, answer }) => ({
|
|
853
|
+
"@type": "Question",
|
|
854
|
+
name: question,
|
|
855
|
+
acceptedAnswer: {
|
|
856
|
+
"@type": "Answer",
|
|
857
|
+
text: answer
|
|
858
|
+
}
|
|
859
|
+
}))
|
|
860
|
+
});
|
|
861
|
+
}
|
|
862
|
+
const pageType = config.schema.defaultType;
|
|
863
|
+
const pageSchema = {
|
|
864
|
+
"@context": "https://schema.org",
|
|
865
|
+
"@type": pageType,
|
|
866
|
+
name: page.title || config.title,
|
|
867
|
+
url: pageUrl
|
|
868
|
+
};
|
|
869
|
+
if (page.description) pageSchema.description = page.description;
|
|
870
|
+
if (pageType === "Article") {
|
|
871
|
+
pageSchema.headline = page.title || config.title;
|
|
872
|
+
pageSchema.author = {
|
|
873
|
+
"@type": "Organization",
|
|
874
|
+
name: config.schema.organization.name
|
|
875
|
+
};
|
|
876
|
+
}
|
|
877
|
+
schemas.push(pageSchema);
|
|
878
|
+
if (page.pathname !== "/") {
|
|
879
|
+
const breadcrumbs = generateBreadcrumbs(page.pathname, config);
|
|
880
|
+
if (breadcrumbs.length > 1) {
|
|
881
|
+
schemas.push({
|
|
882
|
+
"@context": "https://schema.org",
|
|
883
|
+
"@type": "BreadcrumbList",
|
|
884
|
+
itemListElement: breadcrumbs.map((crumb, i) => ({
|
|
885
|
+
"@type": "ListItem",
|
|
886
|
+
position: i + 1,
|
|
887
|
+
name: crumb.name,
|
|
888
|
+
item: crumb.url
|
|
889
|
+
}))
|
|
890
|
+
});
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
return schemas;
|
|
894
|
+
}
|
|
895
|
+
function generateBreadcrumbs(pathname, config) {
|
|
896
|
+
const baseUrl = config.url.replace(/\/$/, "");
|
|
897
|
+
const parts = pathname.split("/").filter(Boolean);
|
|
898
|
+
const crumbs = [
|
|
899
|
+
{ name: "Home", url: baseUrl + "/" }
|
|
900
|
+
];
|
|
901
|
+
let currentPath = "";
|
|
902
|
+
for (const part of parts) {
|
|
903
|
+
currentPath += "/" + part;
|
|
904
|
+
crumbs.push({
|
|
905
|
+
name: part.charAt(0).toUpperCase() + part.slice(1).replace(/-/g, " "),
|
|
906
|
+
url: baseUrl + currentPath
|
|
907
|
+
});
|
|
908
|
+
}
|
|
909
|
+
return crumbs;
|
|
910
|
+
}
|
|
911
|
+
function detectFaqPatterns(content) {
|
|
912
|
+
const items = [];
|
|
913
|
+
const lines = content.split("\n");
|
|
914
|
+
for (let i = 0; i < lines.length; i++) {
|
|
915
|
+
const line = lines[i].trim();
|
|
916
|
+
const headingMatch = line.match(/^#{1,6}\s+(.+\?)\s*$/);
|
|
917
|
+
if (headingMatch) {
|
|
918
|
+
const answerLines = [];
|
|
919
|
+
for (let j = i + 1; j < lines.length; j++) {
|
|
920
|
+
const nextLine = lines[j].trim();
|
|
921
|
+
if (!nextLine) {
|
|
922
|
+
if (answerLines.length > 0) break;
|
|
923
|
+
continue;
|
|
924
|
+
}
|
|
925
|
+
if (/^#{1,6}\s/.test(nextLine)) break;
|
|
926
|
+
answerLines.push(nextLine);
|
|
927
|
+
}
|
|
928
|
+
if (answerLines.length > 0) {
|
|
929
|
+
items.push({
|
|
930
|
+
question: headingMatch[1],
|
|
931
|
+
answer: answerLines.join(" ").slice(0, 500)
|
|
932
|
+
});
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
return items;
|
|
937
|
+
}
|
|
938
|
+
function generateJsonLdScript(schemas) {
|
|
939
|
+
if (schemas.length === 0) return "";
|
|
940
|
+
if (schemas.length === 1) {
|
|
941
|
+
return `<script type="application/ld+json">${JSON.stringify(schemas[0])}</script>`;
|
|
942
|
+
}
|
|
943
|
+
return schemas.map((s) => `<script type="application/ld+json">${JSON.stringify(s)}</script>`).join("\n");
|
|
944
|
+
}
|
|
945
|
+
async function generateAEOFiles(configOrRoot, maybeConfig) {
|
|
946
|
+
var _a;
|
|
947
|
+
let config;
|
|
948
|
+
if (typeof configOrRoot === "string") {
|
|
949
|
+
config = resolveConfig({ ...maybeConfig, outDir: (maybeConfig == null ? void 0 : maybeConfig.outDir) || configOrRoot });
|
|
950
|
+
} else if (configOrRoot && typeof configOrRoot === "object" && "generators" in configOrRoot && typeof ((_a = configOrRoot.generators) == null ? void 0 : _a.robotsTxt) === "boolean") {
|
|
951
|
+
config = configOrRoot;
|
|
952
|
+
} else {
|
|
953
|
+
config = resolveConfig(configOrRoot);
|
|
954
|
+
}
|
|
955
|
+
const outDir = config.outDir;
|
|
956
|
+
const files = [];
|
|
957
|
+
const errors = [];
|
|
958
|
+
if (!existsSync(outDir)) {
|
|
959
|
+
mkdirSync(outDir, { recursive: true });
|
|
960
|
+
}
|
|
961
|
+
if (config.generators.robotsTxt) {
|
|
962
|
+
try {
|
|
963
|
+
const content = generateRobotsTxt(config);
|
|
964
|
+
writeFileSync(join(outDir, "robots.txt"), content, "utf-8");
|
|
965
|
+
files.push("robots.txt");
|
|
966
|
+
} catch (e) {
|
|
967
|
+
errors.push(`robots.txt: ${e.message}`);
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
if (config.generators.llmsTxt) {
|
|
971
|
+
try {
|
|
972
|
+
const content = generateLlmsTxt(config);
|
|
973
|
+
writeFileSync(join(outDir, "llms.txt"), content, "utf-8");
|
|
974
|
+
files.push("llms.txt");
|
|
975
|
+
} catch (e) {
|
|
976
|
+
errors.push(`llms.txt: ${e.message}`);
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
if (config.generators.llmsFullTxt) {
|
|
980
|
+
try {
|
|
981
|
+
const content = generateLlmsFullTxt(config);
|
|
982
|
+
writeFileSync(join(outDir, "llms-full.txt"), content, "utf-8");
|
|
983
|
+
files.push("llms-full.txt");
|
|
984
|
+
} catch (e) {
|
|
985
|
+
errors.push(`llms-full.txt: ${e.message}`);
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
if (config.generators.rawMarkdown) {
|
|
989
|
+
try {
|
|
990
|
+
const generated = generatePageMarkdownFiles(config);
|
|
991
|
+
for (const f of generated) {
|
|
992
|
+
files.push(f.destination);
|
|
993
|
+
}
|
|
994
|
+
} catch (e) {
|
|
995
|
+
errors.push(`page-markdown: ${e.message}`);
|
|
996
|
+
}
|
|
997
|
+
try {
|
|
998
|
+
const copied = copyMarkdownFiles(config);
|
|
999
|
+
for (const f of copied) {
|
|
1000
|
+
files.push(f.destination);
|
|
1001
|
+
}
|
|
1002
|
+
} catch (e) {
|
|
1003
|
+
errors.push(`raw-markdown: ${e.message}`);
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
if (config.generators.manifest) {
|
|
1007
|
+
try {
|
|
1008
|
+
const content = generateManifest(config);
|
|
1009
|
+
writeFileSync(join(outDir, "docs.json"), content, "utf-8");
|
|
1010
|
+
files.push("docs.json");
|
|
1011
|
+
} catch (e) {
|
|
1012
|
+
errors.push(`docs.json: ${e.message}`);
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
if (config.generators.sitemap) {
|
|
1016
|
+
try {
|
|
1017
|
+
const content = generateSitemap(config);
|
|
1018
|
+
writeFileSync(join(outDir, "sitemap.xml"), content, "utf-8");
|
|
1019
|
+
files.push("sitemap.xml");
|
|
1020
|
+
} catch (e) {
|
|
1021
|
+
errors.push(`sitemap.xml: ${e.message}`);
|
|
1022
|
+
}
|
|
1023
|
+
}
|
|
1024
|
+
if (config.generators.aiIndex) {
|
|
1025
|
+
try {
|
|
1026
|
+
const content = generateAIIndex(config);
|
|
1027
|
+
writeFileSync(join(outDir, "ai-index.json"), content, "utf-8");
|
|
1028
|
+
files.push("ai-index.json");
|
|
1029
|
+
} catch (e) {
|
|
1030
|
+
errors.push(`ai-index.json: ${e.message}`);
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
if (config.generators.schema && config.schema.enabled) {
|
|
1034
|
+
try {
|
|
1035
|
+
const content = generateSchema(config);
|
|
1036
|
+
writeFileSync(join(outDir, "schema.json"), content, "utf-8");
|
|
1037
|
+
files.push("schema.json");
|
|
1038
|
+
} catch (e) {
|
|
1039
|
+
errors.push(`schema.json: ${e.message}`);
|
|
1040
|
+
}
|
|
1041
|
+
}
|
|
1042
|
+
return { files, errors };
|
|
1043
|
+
}
|
|
1044
|
+
|
|
1045
|
+
// src/core/html-extract.ts
|
|
1046
|
+
function extractTextFromHtml(html) {
|
|
1047
|
+
let text = html;
|
|
1048
|
+
text = text.replace(/<script[\s\S]*?<\/script>/gi, "");
|
|
1049
|
+
text = text.replace(/<style[\s\S]*?<\/style>/gi, "");
|
|
1050
|
+
text = text.replace(/<svg[\s\S]*?<\/svg>/gi, "");
|
|
1051
|
+
const mainMatch = text.match(/<main[^>]*>([\s\S]*)<\/main>/i);
|
|
1052
|
+
if (mainMatch) {
|
|
1053
|
+
text = mainMatch[1];
|
|
1054
|
+
} else {
|
|
1055
|
+
text = text.replace(/<nav[\s\S]*?<\/nav>/gi, "");
|
|
1056
|
+
text = text.replace(/<header[\s\S]*?<\/header>/gi, "");
|
|
1057
|
+
text = text.replace(/<footer[\s\S]*?<\/footer>/gi, "");
|
|
1058
|
+
}
|
|
1059
|
+
text = text.replace(/<a[^>]+href=["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi, (_, url, inner) => {
|
|
1060
|
+
if (/<(?:h[1-6]|div|p|section)[^>]*>/i.test(inner)) {
|
|
1061
|
+
const cleanInner = inner.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
|
|
1062
|
+
return `
|
|
1063
|
+
[${cleanInner.slice(0, 120).trim()}](${url})
|
|
1064
|
+
`;
|
|
1065
|
+
}
|
|
1066
|
+
return `[${inner}](${url})`;
|
|
1067
|
+
});
|
|
1068
|
+
text = text.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, "\n\n## $1\n\n");
|
|
1069
|
+
text = text.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, "\n\n## $1\n\n");
|
|
1070
|
+
text = text.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, "\n\n### $1\n\n");
|
|
1071
|
+
text = text.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, "\n\n#### $1\n\n");
|
|
1072
|
+
text = text.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, "\n\n##### $1\n\n");
|
|
1073
|
+
text = text.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, "\n\n###### $1\n\n");
|
|
1074
|
+
text = text.replace(/<a[^>]+href=["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi, "[$2]($1)");
|
|
1075
|
+
text = text.replace(/<(?:strong|b)[^>]*>([\s\S]*?)<\/(?:strong|b)>/gi, "**$1**");
|
|
1076
|
+
text = text.replace(/<(?:em|i)[^>]*>([\s\S]*?)<\/(?:em|i)>/gi, "*$1*");
|
|
1077
|
+
text = text.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, "\n- $1");
|
|
1078
|
+
text = text.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, "\n\n> $1\n\n");
|
|
1079
|
+
text = text.replace(/<hr[^>]*\/?>/gi, "\n\n---\n\n");
|
|
1080
|
+
text = text.replace(/<br[^>]*\/?>/gi, "\n");
|
|
1081
|
+
text = text.replace(/<\/p>/gi, "\n\n");
|
|
1082
|
+
text = text.replace(/<p[^>]*>/gi, "");
|
|
1083
|
+
text = text.replace(/<\/?(?:div|section|article|header|main|aside|figure|figcaption|table|thead|tbody|tr|td|th|ul|ol|dl|dt|dd)[^>]*>/gi, "\n");
|
|
1084
|
+
text = text.replace(/<[^>]+>/g, "");
|
|
1085
|
+
text = text.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'").replace(/ /g, " ").replace(/©/g, "(c)");
|
|
1086
|
+
text = text.replace(/[\u{1F1E0}-\u{1FAFF}\u{2600}-\u{27BF}\u{FE00}-\u{FE0F}\u{200D}\u{20E3}]/gu, "");
|
|
1087
|
+
text = text.split("\n").map((l) => l.replace(/\s+/g, " ").trim()).join("\n");
|
|
1088
|
+
text = text.replace(/\n{3,}/g, "\n\n");
|
|
1089
|
+
text = text.replace(/\[[\s\n]+/g, "[").replace(/[\s\n]+\]/g, "]");
|
|
1090
|
+
text = text.replace(/(#{2,6})\s*\n+\s*/g, "$1 ");
|
|
1091
|
+
text = text.replace(/^#{2,6}\s*$/gm, "");
|
|
1092
|
+
text = text.replace(/\n{3,}/g, "\n\n");
|
|
1093
|
+
return text.trim().slice(0, 8e3);
|
|
1094
|
+
}
|
|
1095
|
+
function extractTitle2(html) {
|
|
1096
|
+
var _a, _b;
|
|
1097
|
+
const match = html.match(/<title>([^<]*)<\/title>/i);
|
|
1098
|
+
if (!match) return void 0;
|
|
1099
|
+
return ((_b = (_a = match[1]) == null ? void 0 : _a.split("|")[0]) == null ? void 0 : _b.trim()) || match[1];
|
|
1100
|
+
}
|
|
1101
|
+
function extractDescription(html) {
|
|
1102
|
+
const match = html.match(/<meta\s+name=["']description["']\s+content=["']([^"']*)["']/i);
|
|
1103
|
+
return match == null ? void 0 : match[1];
|
|
1104
|
+
}
|
|
1105
|
+
function extractJsonLd(html) {
|
|
1106
|
+
const schemas = [];
|
|
1107
|
+
const regex = /<script\s+type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
|
|
1108
|
+
let match;
|
|
1109
|
+
while ((match = regex.exec(html)) !== null) {
|
|
1110
|
+
try {
|
|
1111
|
+
const parsed = JSON.parse(match[1]);
|
|
1112
|
+
schemas.push(parsed);
|
|
1113
|
+
} catch {
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
return schemas;
|
|
1117
|
+
}
|
|
1118
|
+
function htmlToMarkdown(html, pagePath, config) {
|
|
1119
|
+
const rawTitle = extractTitle2(html);
|
|
1120
|
+
const description = extractDescription(html);
|
|
1121
|
+
const textContent = extractTextFromHtml(html);
|
|
1122
|
+
const pageUrl = pagePath === "/" ? config.url : `${config.url.replace(/\/$/, "")}${pagePath}`;
|
|
1123
|
+
const lines = [];
|
|
1124
|
+
lines.push("---");
|
|
1125
|
+
if (rawTitle) lines.push(`title: "${rawTitle}"`);
|
|
1126
|
+
if (description) lines.push(`description: "${description}"`);
|
|
1127
|
+
lines.push(`url: ${pageUrl}`);
|
|
1128
|
+
lines.push(`source: ${pageUrl}`);
|
|
1129
|
+
lines.push(`generated_by: aeo.js`);
|
|
1130
|
+
lines.push("---", "");
|
|
1131
|
+
if (rawTitle) lines.push(`# ${rawTitle}`, "");
|
|
1132
|
+
if (description) lines.push(`${description}`, "");
|
|
1133
|
+
if (textContent) lines.push(textContent);
|
|
1134
|
+
return lines.join("\n");
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
// src/core/opengraph.ts
|
|
1138
|
+
function generateOGTags(page, config) {
|
|
1139
|
+
const tags = [];
|
|
1140
|
+
const pageUrl = page.pathname === "/" ? config.url : `${config.url.replace(/\/$/, "")}${page.pathname}`;
|
|
1141
|
+
const title = page.title || config.title;
|
|
1142
|
+
const description = page.description || config.description;
|
|
1143
|
+
tags.push({ property: "og:type", content: config.og.type });
|
|
1144
|
+
tags.push({ property: "og:title", content: title });
|
|
1145
|
+
if (description) tags.push({ property: "og:description", content: description });
|
|
1146
|
+
tags.push({ property: "og:url", content: pageUrl });
|
|
1147
|
+
tags.push({ property: "og:site_name", content: config.title });
|
|
1148
|
+
if (config.og.image) tags.push({ property: "og:image", content: config.og.image });
|
|
1149
|
+
tags.push({ name: "twitter:card", content: config.og.image ? "summary_large_image" : "summary" });
|
|
1150
|
+
tags.push({ name: "twitter:title", content: title });
|
|
1151
|
+
if (description) tags.push({ name: "twitter:description", content: description });
|
|
1152
|
+
if (config.og.twitterHandle) tags.push({ name: "twitter:site", content: config.og.twitterHandle });
|
|
1153
|
+
if (config.og.image) tags.push({ name: "twitter:image", content: config.og.image });
|
|
1154
|
+
return tags;
|
|
1155
|
+
}
|
|
1156
|
+
function generateOGTagsHtml(page, config) {
|
|
1157
|
+
const tags = generateOGTags(page, config);
|
|
1158
|
+
return tags.map((tag) => {
|
|
1159
|
+
if (tag.property) return `<meta property="${tag.property}" content="${escapeAttr(tag.content)}" />`;
|
|
1160
|
+
return `<meta name="${tag.name}" content="${escapeAttr(tag.content)}" />`;
|
|
1161
|
+
}).join("\n ");
|
|
1162
|
+
}
|
|
1163
|
+
function escapeAttr(str) {
|
|
1164
|
+
return str.replace(/&/g, "&").replace(/"/g, """).replace(/</g, "<").replace(/>/g, ">");
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
// src/core/audit.ts
|
|
1168
|
+
function auditSite(config) {
|
|
1169
|
+
const categories = [];
|
|
1170
|
+
const issues = [];
|
|
1171
|
+
const suggestions = [];
|
|
1172
|
+
categories.push(auditAIAccess(config, issues));
|
|
1173
|
+
categories.push(auditContentStructure(config, issues, suggestions));
|
|
1174
|
+
categories.push(auditSchemaPresence(config, issues, suggestions));
|
|
1175
|
+
categories.push(auditMetaQuality(config, issues, suggestions));
|
|
1176
|
+
categories.push(auditCitability(config, issues, suggestions));
|
|
1177
|
+
const score = categories.reduce((sum, c) => sum + c.score, 0);
|
|
1178
|
+
return { score, categories, issues, suggestions };
|
|
1179
|
+
}
|
|
1180
|
+
function auditAIAccess(config, issues) {
|
|
1181
|
+
const checks = [];
|
|
1182
|
+
config.outDir;
|
|
1183
|
+
const robotsEnabled = config.generators.robotsTxt;
|
|
1184
|
+
checks.push({ label: "robots.txt generation enabled", passed: robotsEnabled, points: robotsEnabled ? 4 : 0 });
|
|
1185
|
+
if (!robotsEnabled) {
|
|
1186
|
+
issues.push({ category: "AI Access", severity: "error", message: "robots.txt generation is disabled", fix: "Set generators.robotsTxt: true" });
|
|
1187
|
+
}
|
|
1188
|
+
const llmsEnabled = config.generators.llmsTxt;
|
|
1189
|
+
checks.push({ label: "llms.txt generation enabled", passed: llmsEnabled, points: llmsEnabled ? 4 : 0 });
|
|
1190
|
+
if (!llmsEnabled) {
|
|
1191
|
+
issues.push({ category: "AI Access", severity: "error", message: "llms.txt generation is disabled \u2014 AI crawlers won't find your content summary", fix: "Set generators.llmsTxt: true" });
|
|
1192
|
+
}
|
|
1193
|
+
const sitemapEnabled = config.generators.sitemap;
|
|
1194
|
+
checks.push({ label: "sitemap.xml generation enabled", passed: sitemapEnabled, points: sitemapEnabled ? 4 : 0 });
|
|
1195
|
+
if (!sitemapEnabled) {
|
|
1196
|
+
issues.push({ category: "AI Access", severity: "warning", message: "sitemap.xml generation is disabled", fix: "Set generators.sitemap: true" });
|
|
1197
|
+
}
|
|
1198
|
+
const aiIndexEnabled = config.generators.aiIndex;
|
|
1199
|
+
checks.push({ label: "ai-index.json generation enabled", passed: aiIndexEnabled, points: aiIndexEnabled ? 4 : 0 });
|
|
1200
|
+
if (!aiIndexEnabled) {
|
|
1201
|
+
issues.push({ category: "AI Access", severity: "warning", message: "ai-index.json generation is disabled", fix: "Set generators.aiIndex: true" });
|
|
1202
|
+
}
|
|
1203
|
+
const hasBlockingRules = config.robots.disallow.includes("/") || config.robots.disallow.includes("/*");
|
|
1204
|
+
const noBlocking = !hasBlockingRules;
|
|
1205
|
+
checks.push({ label: "No blanket disallow rules blocking AI crawlers", passed: noBlocking, points: noBlocking ? 4 : 0 });
|
|
1206
|
+
if (hasBlockingRules) {
|
|
1207
|
+
issues.push({ category: "AI Access", severity: "error", message: "robots.txt has blanket disallow (/ or /*) which blocks AI crawlers", fix: "Remove overly broad disallow rules from robots.disallow" });
|
|
1208
|
+
}
|
|
1209
|
+
return {
|
|
1210
|
+
name: "AI Access",
|
|
1211
|
+
score: checks.reduce((s, c) => s + c.points, 0),
|
|
1212
|
+
maxScore: 20,
|
|
1213
|
+
checks
|
|
1214
|
+
};
|
|
1215
|
+
}
|
|
1216
|
+
function auditContentStructure(config, issues, suggestions) {
|
|
1217
|
+
const checks = [];
|
|
1218
|
+
const pages = config.pages;
|
|
1219
|
+
const hasPages = pages.length > 0;
|
|
1220
|
+
checks.push({ label: "Pages are defined", passed: hasPages, points: hasPages ? 4 : 0 });
|
|
1221
|
+
if (!hasPages) {
|
|
1222
|
+
issues.push({ category: "Content Structure", severity: "error", message: "No pages defined \u2014 AEO files will have no content", fix: "Add pages to your config or ensure your framework plugin scans pages" });
|
|
1223
|
+
}
|
|
1224
|
+
const pagesWithContent = pages.filter((p) => p.content && p.content.length > 50);
|
|
1225
|
+
const hasContent = pagesWithContent.length > 0;
|
|
1226
|
+
checks.push({ label: "Pages have extractable content", passed: hasContent, points: hasContent ? 4 : 0 });
|
|
1227
|
+
if (!hasContent && hasPages) {
|
|
1228
|
+
issues.push({ category: "Content Structure", severity: "warning", message: "No pages have substantial text content (>50 chars)", fix: "Ensure pages have meaningful text content for AI extraction" });
|
|
1229
|
+
}
|
|
1230
|
+
const headingPattern = /^#{1,6}\s|<h[1-6]/m;
|
|
1231
|
+
const pagesWithHeadings = pagesWithContent.filter((p) => headingPattern.test(p.content || ""));
|
|
1232
|
+
const hasHeadings = pagesWithHeadings.length >= Math.max(1, Math.floor(pagesWithContent.length * 0.5));
|
|
1233
|
+
checks.push({ label: "Content uses heading hierarchy", passed: hasHeadings, points: hasHeadings ? 4 : 0 });
|
|
1234
|
+
if (!hasHeadings && pagesWithContent.length > 0) {
|
|
1235
|
+
suggestions.push("Add structured headings (H1-H6) to improve AI content extraction");
|
|
1236
|
+
}
|
|
1237
|
+
const goodParagraphLength = pagesWithContent.every((p) => {
|
|
1238
|
+
const paragraphs = (p.content || "").split(/\n\n+/).filter((p2) => p2.trim().length > 20);
|
|
1239
|
+
if (paragraphs.length === 0) return true;
|
|
1240
|
+
const avgWords = paragraphs.reduce((sum, para) => sum + para.split(/\s+/).length, 0) / paragraphs.length;
|
|
1241
|
+
return avgWords <= 200;
|
|
1242
|
+
});
|
|
1243
|
+
checks.push({ label: "Paragraphs are reasonable length (<200 words avg)", passed: goodParagraphLength, points: goodParagraphLength ? 4 : 0 });
|
|
1244
|
+
if (!goodParagraphLength) {
|
|
1245
|
+
suggestions.push("Break up long paragraphs (>200 words) for better AI extraction \u2014 aim for 100-167 words");
|
|
1246
|
+
}
|
|
1247
|
+
const multiplePages = pages.length >= 3;
|
|
1248
|
+
checks.push({ label: "Site has 3+ pages for comprehensive coverage", passed: multiplePages, points: multiplePages ? 4 : 0 });
|
|
1249
|
+
if (!multiplePages && hasPages) {
|
|
1250
|
+
suggestions.push("Add more pages to improve site coverage for AI crawlers");
|
|
1251
|
+
}
|
|
1252
|
+
return {
|
|
1253
|
+
name: "Content Structure",
|
|
1254
|
+
score: checks.reduce((s, c) => s + c.points, 0),
|
|
1255
|
+
maxScore: 20,
|
|
1256
|
+
checks
|
|
1257
|
+
};
|
|
1258
|
+
}
|
|
1259
|
+
function auditSchemaPresence(config, issues, suggestions) {
|
|
1260
|
+
const checks = [];
|
|
1261
|
+
const schemaEnabled = config.schema.enabled && config.generators.schema;
|
|
1262
|
+
checks.push({ label: "Schema.org/JSON-LD generation enabled", passed: schemaEnabled, points: schemaEnabled ? 4 : 0 });
|
|
1263
|
+
if (!schemaEnabled) {
|
|
1264
|
+
issues.push({ category: "Schema Presence", severity: "error", message: "Schema.org generation is disabled", fix: "Set schema.enabled: true and generators.schema: true" });
|
|
1265
|
+
}
|
|
1266
|
+
const hasOrgName = config.schema.organization.name !== "" && config.schema.organization.name !== "My Site";
|
|
1267
|
+
checks.push({ label: "Organization name configured", passed: hasOrgName, points: hasOrgName ? 4 : 0 });
|
|
1268
|
+
if (!hasOrgName) {
|
|
1269
|
+
issues.push({ category: "Schema Presence", severity: "warning", message: "Organization name is not configured (using default)", fix: "Set schema.organization.name to your actual organization name" });
|
|
1270
|
+
}
|
|
1271
|
+
const hasLogo = !!config.schema.organization.logo;
|
|
1272
|
+
checks.push({ label: "Organization logo URL set", passed: hasLogo, points: hasLogo ? 4 : 0 });
|
|
1273
|
+
if (!hasLogo) {
|
|
1274
|
+
suggestions.push("Add schema.organization.logo for richer search results and AI knowledge");
|
|
1275
|
+
}
|
|
1276
|
+
const hasSameAs = config.schema.organization.sameAs.length > 0;
|
|
1277
|
+
checks.push({ label: "Social profiles linked (sameAs)", passed: hasSameAs, points: hasSameAs ? 4 : 0 });
|
|
1278
|
+
if (!hasSameAs) {
|
|
1279
|
+
issues.push({ category: "Schema Presence", severity: "warning", message: "No social profiles (sameAs) \u2014 critical for GEO/E-E-A-T signals", fix: "Add schema.organization.sameAs with social profile URLs" });
|
|
1280
|
+
}
|
|
1281
|
+
const hasRealUrl = config.url !== "https://example.com" && config.url !== "";
|
|
1282
|
+
checks.push({ label: "Site URL is configured (not default)", passed: hasRealUrl, points: hasRealUrl ? 4 : 0 });
|
|
1283
|
+
if (!hasRealUrl) {
|
|
1284
|
+
issues.push({ category: "Schema Presence", severity: "error", message: "Site URL is still the default (https://example.com)", fix: "Set url to your actual site URL" });
|
|
1285
|
+
}
|
|
1286
|
+
return {
|
|
1287
|
+
name: "Schema Presence",
|
|
1288
|
+
score: checks.reduce((s, c) => s + c.points, 0),
|
|
1289
|
+
maxScore: 20,
|
|
1290
|
+
checks
|
|
1291
|
+
};
|
|
1292
|
+
}
|
|
1293
|
+
function auditMetaQuality(config, issues, suggestions) {
|
|
1294
|
+
const checks = [];
|
|
1295
|
+
const titleLen = config.title.length;
|
|
1296
|
+
const goodTitle = titleLen >= 10 && titleLen <= 70;
|
|
1297
|
+
checks.push({ label: "Site title is 10-70 characters", passed: goodTitle, points: goodTitle ? 4 : 0 });
|
|
1298
|
+
if (titleLen < 10) {
|
|
1299
|
+
issues.push({ category: "Meta Quality", severity: "warning", message: `Site title is too short (${titleLen} chars)`, fix: "Use a descriptive title between 10-70 characters" });
|
|
1300
|
+
} else if (titleLen > 70) {
|
|
1301
|
+
suggestions.push(`Site title is long (${titleLen} chars) \u2014 consider shortening to under 70`);
|
|
1302
|
+
}
|
|
1303
|
+
const descLen = config.description.length;
|
|
1304
|
+
const goodDesc = descLen >= 50 && descLen <= 200;
|
|
1305
|
+
checks.push({ label: "Site description is 50-200 characters", passed: goodDesc, points: goodDesc ? 4 : 0 });
|
|
1306
|
+
if (descLen === 0) {
|
|
1307
|
+
issues.push({ category: "Meta Quality", severity: "error", message: "No site description configured", fix: "Add a description (50-200 characters)" });
|
|
1308
|
+
} else if (descLen < 50) {
|
|
1309
|
+
issues.push({ category: "Meta Quality", severity: "warning", message: `Description is too short (${descLen} chars)`, fix: "Expand description to 50-200 characters" });
|
|
1310
|
+
}
|
|
1311
|
+
const ogEnabled = config.og.enabled;
|
|
1312
|
+
checks.push({ label: "Open Graph meta tags enabled", passed: ogEnabled, points: ogEnabled ? 4 : 0 });
|
|
1313
|
+
if (!ogEnabled) {
|
|
1314
|
+
issues.push({ category: "Meta Quality", severity: "warning", message: "OG meta tags are disabled \u2014 social sharing and AI previews will be limited", fix: "Set og.enabled: true" });
|
|
1315
|
+
}
|
|
1316
|
+
const pagesWithTitles = config.pages.filter((p) => p.title && p.title.length > 0);
|
|
1317
|
+
const titleCoverage = config.pages.length > 0 ? pagesWithTitles.length / config.pages.length : 0;
|
|
1318
|
+
const goodTitleCoverage = titleCoverage >= 0.8;
|
|
1319
|
+
checks.push({ label: "80%+ of pages have titles", passed: goodTitleCoverage, points: goodTitleCoverage ? 4 : 0 });
|
|
1320
|
+
if (!goodTitleCoverage && config.pages.length > 0) {
|
|
1321
|
+
issues.push({ category: "Meta Quality", severity: "warning", message: `Only ${pagesWithTitles.length}/${config.pages.length} pages have titles`, fix: "Add titles to all pages" });
|
|
1322
|
+
}
|
|
1323
|
+
const pagesWithDesc = config.pages.filter((p) => p.description && p.description.length > 0);
|
|
1324
|
+
const descCoverage = config.pages.length > 0 ? pagesWithDesc.length / config.pages.length : 0;
|
|
1325
|
+
const goodDescCoverage = descCoverage >= 0.5;
|
|
1326
|
+
checks.push({ label: "50%+ of pages have descriptions", passed: goodDescCoverage, points: goodDescCoverage ? 4 : 0 });
|
|
1327
|
+
if (!goodDescCoverage && config.pages.length > 0) {
|
|
1328
|
+
suggestions.push(`Only ${pagesWithDesc.length}/${config.pages.length} pages have descriptions \u2014 add per-page descriptions`);
|
|
1329
|
+
}
|
|
1330
|
+
return {
|
|
1331
|
+
name: "Meta Quality",
|
|
1332
|
+
score: checks.reduce((s, c) => s + c.points, 0),
|
|
1333
|
+
maxScore: 20,
|
|
1334
|
+
checks
|
|
1335
|
+
};
|
|
1336
|
+
}
|
|
1337
|
+
function auditCitability(config, issues, suggestions) {
|
|
1338
|
+
const checks = [];
|
|
1339
|
+
const allContent = config.pages.map((p) => p.content || "").join("\n\n");
|
|
1340
|
+
const directAnswerPattern = /^[A-Z][^.!?]{20,150}[.!?]/m;
|
|
1341
|
+
const hasDirectAnswers = directAnswerPattern.test(allContent);
|
|
1342
|
+
checks.push({ label: "Content has direct answer patterns", passed: hasDirectAnswers, points: hasDirectAnswers ? 4 : 0 });
|
|
1343
|
+
if (!hasDirectAnswers) {
|
|
1344
|
+
suggestions.push("Add self-contained factual statements that AI can cite directly");
|
|
1345
|
+
}
|
|
1346
|
+
const statPattern = /\d+%|\$[\d,.]+|\d{4}|\d+\s*(million|billion|thousand|users|customers|downloads)/i;
|
|
1347
|
+
const hasStats = statPattern.test(allContent);
|
|
1348
|
+
checks.push({ label: "Content includes statistics/factual claims", passed: hasStats, points: hasStats ? 4 : 0 });
|
|
1349
|
+
if (!hasStats) {
|
|
1350
|
+
suggestions.push("Include concrete statistics, numbers, and dates \u2014 AI favors content with factual claims");
|
|
1351
|
+
}
|
|
1352
|
+
const faqPattern = /^#{1,6}\s+.+\?\s*$/m;
|
|
1353
|
+
const hasFaq = faqPattern.test(allContent);
|
|
1354
|
+
checks.push({ label: "Content has FAQ patterns (question headings)", passed: hasFaq, points: hasFaq ? 4 : 0 });
|
|
1355
|
+
if (!hasFaq) {
|
|
1356
|
+
suggestions.push("Add FAQ sections with question headings \u2014 these generate FAQPage schema automatically");
|
|
1357
|
+
}
|
|
1358
|
+
const listPattern = /^[\s]*[-*+]\s|^\d+\.\s/m;
|
|
1359
|
+
const hasLists = listPattern.test(allContent);
|
|
1360
|
+
checks.push({ label: "Content uses lists for structured information", passed: hasLists, points: hasLists ? 4 : 0 });
|
|
1361
|
+
if (!hasLists) {
|
|
1362
|
+
suggestions.push("Use bullet or numbered lists for key information \u2014 improves AI extraction");
|
|
1363
|
+
}
|
|
1364
|
+
const wordCount = allContent.split(/\s+/).filter((w) => w.length > 0).length;
|
|
1365
|
+
const substantialContent = wordCount >= 500;
|
|
1366
|
+
checks.push({ label: "Total content is substantial (500+ words)", passed: substantialContent, points: substantialContent ? 4 : 0 });
|
|
1367
|
+
if (!substantialContent) {
|
|
1368
|
+
issues.push({ category: "Citability", severity: "warning", message: `Total content is only ${wordCount} words`, fix: "Add more content \u2014 aim for at least 500 words across all pages" });
|
|
1369
|
+
}
|
|
1370
|
+
return {
|
|
1371
|
+
name: "Citability",
|
|
1372
|
+
score: checks.reduce((s, c) => s + c.points, 0),
|
|
1373
|
+
maxScore: 20,
|
|
1374
|
+
checks
|
|
1375
|
+
};
|
|
1376
|
+
}
|
|
1377
|
+
function formatAuditReport(result) {
|
|
1378
|
+
const lines = [];
|
|
1379
|
+
const grade = getGrade(result.score);
|
|
1380
|
+
lines.push(`GEO Readiness Score: ${result.score}/100 (${grade})`);
|
|
1381
|
+
lines.push("\u2550".repeat(50));
|
|
1382
|
+
lines.push("");
|
|
1383
|
+
for (const cat of result.categories) {
|
|
1384
|
+
lines.push(`${cat.name}: ${cat.score}/${cat.maxScore}`);
|
|
1385
|
+
const bar = "\u2588".repeat(cat.score) + "\u2591".repeat(cat.maxScore - cat.score);
|
|
1386
|
+
lines.push(` ${bar}`);
|
|
1387
|
+
for (const check of cat.checks) {
|
|
1388
|
+
lines.push(` ${check.passed ? "+" : "-"} ${check.label}`);
|
|
1389
|
+
}
|
|
1390
|
+
lines.push("");
|
|
1391
|
+
}
|
|
1392
|
+
if (result.issues.length > 0) {
|
|
1393
|
+
lines.push("Issues:");
|
|
1394
|
+
for (const issue of result.issues) {
|
|
1395
|
+
const icon = issue.severity === "error" ? "!" : issue.severity === "warning" ? "~" : "i";
|
|
1396
|
+
lines.push(` ${icon} [${issue.category}] ${issue.message}`);
|
|
1397
|
+
if (issue.fix) lines.push(` Fix: ${issue.fix}`);
|
|
1398
|
+
}
|
|
1399
|
+
lines.push("");
|
|
1400
|
+
}
|
|
1401
|
+
if (result.suggestions.length > 0) {
|
|
1402
|
+
lines.push("Suggestions:");
|
|
1403
|
+
for (const suggestion of result.suggestions) {
|
|
1404
|
+
lines.push(` * ${suggestion}`);
|
|
1405
|
+
}
|
|
1406
|
+
lines.push("");
|
|
1407
|
+
}
|
|
1408
|
+
return lines.join("\n");
|
|
1409
|
+
}
|
|
1410
|
+
function getGrade(score) {
|
|
1411
|
+
if (score >= 90) return "Excellent";
|
|
1412
|
+
if (score >= 75) return "Good";
|
|
1413
|
+
if (score >= 50) return "Fair";
|
|
1414
|
+
if (score >= 25) return "Needs Work";
|
|
1415
|
+
return "Poor";
|
|
1416
|
+
}
|
|
1417
|
+
|
|
1418
|
+
// src/core/citability.ts
|
|
1419
|
+
function scorePageCitability(page) {
|
|
1420
|
+
const content = page.content || "";
|
|
1421
|
+
const dimensions = [];
|
|
1422
|
+
const hints = [];
|
|
1423
|
+
dimensions.push(scoreAnswerBlocks(content, hints));
|
|
1424
|
+
dimensions.push(scoreSelfContainment(content, hints));
|
|
1425
|
+
dimensions.push(scoreStatisticalDensity(content, hints));
|
|
1426
|
+
dimensions.push(scoreStructureQuality(content, hints));
|
|
1427
|
+
const score = dimensions.reduce((sum, d) => sum + d.score, 0);
|
|
1428
|
+
return {
|
|
1429
|
+
pathname: page.pathname,
|
|
1430
|
+
score,
|
|
1431
|
+
dimensions,
|
|
1432
|
+
hints
|
|
1433
|
+
};
|
|
1434
|
+
}
|
|
1435
|
+
function scoreSiteCitability(config) {
|
|
1436
|
+
const pages = config.pages.map((p) => scorePageCitability(p));
|
|
1437
|
+
const averageScore = pages.length > 0 ? Math.round(pages.reduce((sum, p) => sum + p.score, 0) / pages.length) : 0;
|
|
1438
|
+
return { averageScore, pages };
|
|
1439
|
+
}
|
|
1440
|
+
function scoreAnswerBlocks(content, hints) {
|
|
1441
|
+
if (!content.trim()) {
|
|
1442
|
+
return { name: "Answer Blocks", score: 0, maxScore: 25, details: "No content" };
|
|
1443
|
+
}
|
|
1444
|
+
const paragraphs = splitParagraphs(content);
|
|
1445
|
+
let answerCount = 0;
|
|
1446
|
+
for (let i = 0; i < paragraphs.length; i++) {
|
|
1447
|
+
const para = paragraphs[i];
|
|
1448
|
+
const words = para.text.split(/\s+/).length;
|
|
1449
|
+
if (para.text.startsWith("#") || words < 15) continue;
|
|
1450
|
+
const startsWithSubject = /^[A-Z][a-z]/.test(para.text) && !/^(This|That|These|Those|It|They|We|He|She|I)\b/.test(para.text);
|
|
1451
|
+
const goodLength = words >= 20 && words <= 200;
|
|
1452
|
+
if (startsWithSubject && goodLength) {
|
|
1453
|
+
answerCount++;
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
const answerRatio = paragraphs.length > 0 ? answerCount / Math.max(paragraphs.filter((p) => !p.text.startsWith("#")).length, 1) : 0;
|
|
1457
|
+
let score = 0;
|
|
1458
|
+
if (answerCount >= 3 && answerRatio >= 0.5) score = 25;
|
|
1459
|
+
else if (answerCount >= 2 && answerRatio >= 0.3) score = 20;
|
|
1460
|
+
else if (answerCount >= 1) score = 12;
|
|
1461
|
+
else score = 0;
|
|
1462
|
+
if (answerCount === 0) {
|
|
1463
|
+
hints.push({ type: "warning", message: "No direct answer paragraphs found \u2014 add self-contained factual paragraphs that start with a clear subject" });
|
|
1464
|
+
}
|
|
1465
|
+
for (const para of paragraphs) {
|
|
1466
|
+
const words = para.text.split(/\s+/).length;
|
|
1467
|
+
if (words > 200 && !para.text.startsWith("#")) {
|
|
1468
|
+
hints.push({ type: "suggestion", message: `Paragraph at line ${para.line} is ${words} words \u2014 split for better AI extraction (aim for <200)`, line: para.line });
|
|
1469
|
+
}
|
|
1470
|
+
}
|
|
1471
|
+
return { name: "Answer Blocks", score, maxScore: 25, details: `${answerCount} answer-quality paragraphs found` };
|
|
1472
|
+
}
|
|
1473
|
+
function scoreSelfContainment(content, hints) {
|
|
1474
|
+
if (!content.trim()) {
|
|
1475
|
+
return { name: "Self-Containment", score: 0, maxScore: 25, details: "No content" };
|
|
1476
|
+
}
|
|
1477
|
+
const paragraphs = splitParagraphs(content).filter((p) => !p.text.startsWith("#") && p.text.split(/\s+/).length >= 10);
|
|
1478
|
+
if (paragraphs.length === 0) {
|
|
1479
|
+
return { name: "Self-Containment", score: 5, maxScore: 25, details: "No substantial paragraphs" };
|
|
1480
|
+
}
|
|
1481
|
+
const contextPatterns = [
|
|
1482
|
+
/^(As mentioned|As noted|As described|As shown|As stated|As discussed)\b/i,
|
|
1483
|
+
/^(However|Furthermore|Moreover|Additionally|In addition|Nevertheless)\b/,
|
|
1484
|
+
/\b(above|below|previously|the following|as follows|see also)\b/i,
|
|
1485
|
+
/^(This|That|These|Those|It) (is|was|are|were|has|have|will|can|should|means)\b/
|
|
1486
|
+
];
|
|
1487
|
+
let contextDependentCount = 0;
|
|
1488
|
+
for (const para of paragraphs) {
|
|
1489
|
+
const isContextDependent = contextPatterns.some((p) => p.test(para.text));
|
|
1490
|
+
if (isContextDependent) {
|
|
1491
|
+
contextDependentCount++;
|
|
1492
|
+
}
|
|
1493
|
+
}
|
|
1494
|
+
const selfContainedRatio = 1 - contextDependentCount / paragraphs.length;
|
|
1495
|
+
let score;
|
|
1496
|
+
if (selfContainedRatio >= 0.8) score = 25;
|
|
1497
|
+
else if (selfContainedRatio >= 0.6) score = 18;
|
|
1498
|
+
else if (selfContainedRatio >= 0.4) score = 12;
|
|
1499
|
+
else score = 5;
|
|
1500
|
+
if (contextDependentCount > 0) {
|
|
1501
|
+
hints.push({ type: "suggestion", message: `${contextDependentCount} paragraph(s) depend on surrounding context \u2014 rephrase to be self-contained for better AI citation` });
|
|
1502
|
+
}
|
|
1503
|
+
return { name: "Self-Containment", score, maxScore: 25, details: `${Math.round(selfContainedRatio * 100)}% of paragraphs are self-contained` };
|
|
1504
|
+
}
|
|
1505
|
+
function scoreStatisticalDensity(content, hints) {
|
|
1506
|
+
if (!content.trim()) {
|
|
1507
|
+
return { name: "Statistical Density", score: 0, maxScore: 25, details: "No content" };
|
|
1508
|
+
}
|
|
1509
|
+
const patterns = {
|
|
1510
|
+
percentages: /\d+(\.\d+)?%/g,
|
|
1511
|
+
currencies: /\$[\d,.]+|\€[\d,.]+|£[\d,.]+/g,
|
|
1512
|
+
years: /\b(19|20)\d{2}\b/g,
|
|
1513
|
+
quantities: /\b\d+[\s,]*(million|billion|thousand|hundred|K|M|B)\b/gi,
|
|
1514
|
+
metrics: /\b\d+(\.\d+)?\s*(users|customers|downloads|visitors|employees|countries|cities|hours|minutes|seconds|pages|files|requests)\b/gi,
|
|
1515
|
+
comparisons: /\b\d+x\b|\b\d+(\.\d+)?%\s*(faster|slower|more|less|better|worse|increase|decrease|growth|reduction)\b/gi
|
|
1516
|
+
};
|
|
1517
|
+
let totalMatches = 0;
|
|
1518
|
+
for (const [, pattern] of Object.entries(patterns)) {
|
|
1519
|
+
const matches = content.match(pattern);
|
|
1520
|
+
if (matches) totalMatches += matches.length;
|
|
1521
|
+
}
|
|
1522
|
+
const words = content.split(/\s+/).length;
|
|
1523
|
+
const density = words > 0 ? totalMatches / (words / 100) : 0;
|
|
1524
|
+
let score;
|
|
1525
|
+
if (density >= 3) score = 25;
|
|
1526
|
+
else if (density >= 1.5) score = 20;
|
|
1527
|
+
else if (density >= 0.5) score = 12;
|
|
1528
|
+
else if (totalMatches > 0) score = 6;
|
|
1529
|
+
else score = 0;
|
|
1530
|
+
if (totalMatches === 0) {
|
|
1531
|
+
hints.push({ type: "suggestion", message: "No statistics or factual claims found \u2014 AI favors content with concrete numbers, percentages, and dates" });
|
|
1532
|
+
}
|
|
1533
|
+
return { name: "Statistical Density", score, maxScore: 25, details: `${totalMatches} statistical claims (${density.toFixed(1)} per 100 words)` };
|
|
1534
|
+
}
|
|
1535
|
+
function scoreStructureQuality(content, hints) {
|
|
1536
|
+
if (!content.trim()) {
|
|
1537
|
+
return { name: "Structure Quality", score: 0, maxScore: 25, details: "No content" };
|
|
1538
|
+
}
|
|
1539
|
+
let points = 0;
|
|
1540
|
+
const details = [];
|
|
1541
|
+
const headings = content.match(/^#{1,6}\s.+$/gm) || [];
|
|
1542
|
+
if (headings.length > 0) {
|
|
1543
|
+
points += 7;
|
|
1544
|
+
details.push(`${headings.length} headings`);
|
|
1545
|
+
} else {
|
|
1546
|
+
hints.push({ type: "warning", message: "No headings found \u2014 add H1-H6 headings to structure content for AI extraction" });
|
|
1547
|
+
}
|
|
1548
|
+
const listItems = content.match(/^[\s]*[-*+]\s.+$|^\d+\.\s.+$/gm) || [];
|
|
1549
|
+
if (listItems.length >= 3) {
|
|
1550
|
+
points += 6;
|
|
1551
|
+
details.push(`${listItems.length} list items`);
|
|
1552
|
+
} else if (listItems.length > 0) {
|
|
1553
|
+
points += 3;
|
|
1554
|
+
details.push(`${listItems.length} list items`);
|
|
1555
|
+
} else {
|
|
1556
|
+
hints.push({ type: "suggestion", message: "No lists found \u2014 use bullet or numbered lists for key information" });
|
|
1557
|
+
}
|
|
1558
|
+
const paragraphs = splitParagraphs(content).filter((p) => !p.text.startsWith("#") && p.text.split(/\s+/).length >= 10);
|
|
1559
|
+
const avgWords = paragraphs.length > 0 ? paragraphs.reduce((sum, p) => sum + p.text.split(/\s+/).length, 0) / paragraphs.length : 0;
|
|
1560
|
+
if (avgWords > 0 && avgWords <= 167) {
|
|
1561
|
+
points += 6;
|
|
1562
|
+
details.push(`avg ${Math.round(avgWords)} words/paragraph`);
|
|
1563
|
+
} else if (avgWords > 0 && avgWords <= 250) {
|
|
1564
|
+
points += 3;
|
|
1565
|
+
details.push(`avg ${Math.round(avgWords)} words/paragraph (aim for <167)`);
|
|
1566
|
+
}
|
|
1567
|
+
const hasSummary = /^(#{1,3}\s*)?(summary|tl;?dr|overview|introduction|key takeaways|in brief)\s*$/im.test(content);
|
|
1568
|
+
const firstPara = paragraphs[0];
|
|
1569
|
+
const hasShortIntro = firstPara && firstPara.text.split(/\s+/).length <= 60;
|
|
1570
|
+
if (hasSummary) {
|
|
1571
|
+
points += 6;
|
|
1572
|
+
details.push("has summary section");
|
|
1573
|
+
} else if (hasShortIntro) {
|
|
1574
|
+
points += 3;
|
|
1575
|
+
details.push("has short intro");
|
|
1576
|
+
} else {
|
|
1577
|
+
hints.push({ type: "suggestion", message: "Add a summary or TL;DR section at the top \u2014 helps AI extract the key message" });
|
|
1578
|
+
}
|
|
1579
|
+
return { name: "Structure Quality", score: Math.min(points, 25), maxScore: 25, details: details.join(", ") || "minimal structure" };
|
|
1580
|
+
}
|
|
1581
|
+
function splitParagraphs(content) {
|
|
1582
|
+
const lines = content.split("\n");
|
|
1583
|
+
const paragraphs = [];
|
|
1584
|
+
let currentText = "";
|
|
1585
|
+
let currentLine = 1;
|
|
1586
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1587
|
+
const line = lines[i].trim();
|
|
1588
|
+
if (line === "") {
|
|
1589
|
+
if (currentText) {
|
|
1590
|
+
paragraphs.push({ text: currentText.trim(), line: currentLine });
|
|
1591
|
+
currentText = "";
|
|
1592
|
+
}
|
|
1593
|
+
} else {
|
|
1594
|
+
if (!currentText) currentLine = i + 1;
|
|
1595
|
+
currentText += (currentText ? " " : "") + line;
|
|
1596
|
+
}
|
|
1597
|
+
}
|
|
1598
|
+
if (currentText) {
|
|
1599
|
+
paragraphs.push({ text: currentText.trim(), line: currentLine });
|
|
1600
|
+
}
|
|
1601
|
+
return paragraphs;
|
|
1602
|
+
}
|
|
1603
|
+
function formatPageCitability(result) {
|
|
1604
|
+
const lines = [];
|
|
1605
|
+
lines.push(`Page: ${result.pathname} \u2014 Score: ${result.score}/100`);
|
|
1606
|
+
lines.push("\u2500".repeat(40));
|
|
1607
|
+
for (const dim of result.dimensions) {
|
|
1608
|
+
const bar = "\u2588".repeat(dim.score) + "\u2591".repeat(dim.maxScore - dim.score);
|
|
1609
|
+
lines.push(` ${dim.name}: ${dim.score}/${dim.maxScore} ${bar}`);
|
|
1610
|
+
lines.push(` ${dim.details}`);
|
|
1611
|
+
}
|
|
1612
|
+
if (result.hints.length > 0) {
|
|
1613
|
+
lines.push("");
|
|
1614
|
+
lines.push(" Hints:");
|
|
1615
|
+
for (const hint of result.hints) {
|
|
1616
|
+
const icon = hint.type === "error" ? "!" : hint.type === "warning" ? "~" : "*";
|
|
1617
|
+
const loc = hint.line ? ` (line ${hint.line})` : "";
|
|
1618
|
+
lines.push(` ${icon} ${hint.message}${loc}`);
|
|
1619
|
+
}
|
|
1620
|
+
}
|
|
1621
|
+
return lines.join("\n");
|
|
1622
|
+
}
|
|
1623
|
+
|
|
1624
|
+
// src/core/platform-hints.ts
|
|
1625
|
+
function generatePlatformHints(audit, citability) {
|
|
1626
|
+
const hints = [];
|
|
1627
|
+
hints.push(chatgptHints(audit, citability));
|
|
1628
|
+
hints.push(perplexityHints(audit, citability));
|
|
1629
|
+
hints.push(googleAIHints(audit));
|
|
1630
|
+
hints.push(bingCopilotHints(audit, citability));
|
|
1631
|
+
return hints;
|
|
1632
|
+
}
|
|
1633
|
+
function chatgptHints(audit, citability) {
|
|
1634
|
+
const tips = [];
|
|
1635
|
+
const aiAccess = audit.categories.find((c) => c.name === "AI Access");
|
|
1636
|
+
const schema = audit.categories.find((c) => c.name === "Schema Presence");
|
|
1637
|
+
const content = audit.categories.find((c) => c.name === "Content Structure");
|
|
1638
|
+
const llmsCheck = aiAccess == null ? void 0 : aiAccess.checks.find((c) => c.label.includes("llms.txt"));
|
|
1639
|
+
if (!(llmsCheck == null ? void 0 : llmsCheck.passed)) {
|
|
1640
|
+
tips.push("Enable llms.txt \u2014 ChatGPT uses this to understand your site content");
|
|
1641
|
+
}
|
|
1642
|
+
const robotsCheck = aiAccess == null ? void 0 : aiAccess.checks.find((c) => c.label.includes("blanket disallow"));
|
|
1643
|
+
if (!(robotsCheck == null ? void 0 : robotsCheck.passed)) {
|
|
1644
|
+
tips.push("Allow GPTBot in robots.txt \u2014 blanket disallow blocks ChatGPT crawling");
|
|
1645
|
+
}
|
|
1646
|
+
if (schema && schema.score < 12) {
|
|
1647
|
+
tips.push("Add Article or WebPage schema \u2014 helps ChatGPT extract structured content");
|
|
1648
|
+
}
|
|
1649
|
+
if (citability && citability.averageScore < 50) {
|
|
1650
|
+
tips.push("Improve answer blocks \u2014 ChatGPT citations favor self-contained factual paragraphs");
|
|
1651
|
+
}
|
|
1652
|
+
if (content && content.score < 12) {
|
|
1653
|
+
tips.push("Add more structured content \u2014 headings and paragraphs help SearchGPT parse your pages");
|
|
1654
|
+
}
|
|
1655
|
+
if (tips.length === 0) tips.push("Your site is well-optimized for ChatGPT/SearchGPT");
|
|
1656
|
+
return {
|
|
1657
|
+
platform: "ChatGPT / SearchGPT",
|
|
1658
|
+
status: tips.length <= 1 ? "good" : tips.length <= 3 ? "needs-work" : "critical",
|
|
1659
|
+
tips
|
|
1660
|
+
};
|
|
1661
|
+
}
|
|
1662
|
+
function perplexityHints(audit, citability) {
|
|
1663
|
+
const tips = [];
|
|
1664
|
+
const meta = audit.categories.find((c) => c.name === "Meta Quality");
|
|
1665
|
+
const citabilityScore = audit.categories.find((c) => c.name === "Citability");
|
|
1666
|
+
if (citabilityScore && citabilityScore.score < 12) {
|
|
1667
|
+
tips.push("Add statistical claims and concrete data \u2014 Perplexity favors cite-able facts");
|
|
1668
|
+
}
|
|
1669
|
+
if (meta && meta.score < 12) {
|
|
1670
|
+
tips.push("Improve page titles and descriptions \u2014 Perplexity uses meta tags for source attribution");
|
|
1671
|
+
}
|
|
1672
|
+
const faqCheck = citabilityScore == null ? void 0 : citabilityScore.checks.find((c) => c.label.includes("FAQ"));
|
|
1673
|
+
if (!(faqCheck == null ? void 0 : faqCheck.passed)) {
|
|
1674
|
+
tips.push("Add FAQ sections with question headings \u2014 Perplexity surfaces Q&A content prominently");
|
|
1675
|
+
}
|
|
1676
|
+
if (citability && citability.averageScore < 40) {
|
|
1677
|
+
tips.push("Improve self-contained paragraphs \u2014 Perplexity extracts and cites individual passages");
|
|
1678
|
+
}
|
|
1679
|
+
if (tips.length === 0) tips.push("Your site is well-optimized for Perplexity");
|
|
1680
|
+
return {
|
|
1681
|
+
platform: "Perplexity",
|
|
1682
|
+
status: tips.length <= 1 ? "good" : tips.length <= 3 ? "needs-work" : "critical",
|
|
1683
|
+
tips
|
|
1684
|
+
};
|
|
1685
|
+
}
|
|
1686
|
+
function googleAIHints(audit, citability) {
|
|
1687
|
+
const tips = [];
|
|
1688
|
+
const schema = audit.categories.find((c) => c.name === "Schema Presence");
|
|
1689
|
+
const orgCheck = schema == null ? void 0 : schema.checks.find((c) => c.label.includes("Organization"));
|
|
1690
|
+
if (!(orgCheck == null ? void 0 : orgCheck.passed)) {
|
|
1691
|
+
tips.push("Configure Organization name \u2014 Google AI uses this for E-E-A-T authority signals");
|
|
1692
|
+
}
|
|
1693
|
+
const sameAsCheck = schema == null ? void 0 : schema.checks.find((c) => c.label.includes("sameAs"));
|
|
1694
|
+
if (!(sameAsCheck == null ? void 0 : sameAsCheck.passed)) {
|
|
1695
|
+
tips.push("Add sameAs social profiles \u2014 critical for Google Knowledge Panel and E-E-A-T");
|
|
1696
|
+
}
|
|
1697
|
+
const logoCheck = schema == null ? void 0 : schema.checks.find((c) => c.label.includes("logo"));
|
|
1698
|
+
if (!(logoCheck == null ? void 0 : logoCheck.passed)) {
|
|
1699
|
+
tips.push("Add Organization logo \u2014 enhances Google search presence and AI Overviews");
|
|
1700
|
+
}
|
|
1701
|
+
const schemaEnabled = schema == null ? void 0 : schema.checks.find((c) => c.label.includes("JSON-LD"));
|
|
1702
|
+
if (!(schemaEnabled == null ? void 0 : schemaEnabled.passed)) {
|
|
1703
|
+
tips.push("Enable Schema.org/JSON-LD \u2014 Google AI Overviews heavily rely on structured data");
|
|
1704
|
+
}
|
|
1705
|
+
if (tips.length === 0) tips.push("Your site is well-optimized for Google AI Overviews");
|
|
1706
|
+
return {
|
|
1707
|
+
platform: "Google AI Overviews",
|
|
1708
|
+
status: tips.length <= 1 ? "good" : tips.length <= 3 ? "needs-work" : "critical",
|
|
1709
|
+
tips
|
|
1710
|
+
};
|
|
1711
|
+
}
|
|
1712
|
+
function bingCopilotHints(audit, citability) {
|
|
1713
|
+
const tips = [];
|
|
1714
|
+
const aiAccess = audit.categories.find((c) => c.name === "AI Access");
|
|
1715
|
+
const meta = audit.categories.find((c) => c.name === "Meta Quality");
|
|
1716
|
+
const robotsCheck = aiAccess == null ? void 0 : aiAccess.checks.find((c) => c.label.includes("blanket disallow"));
|
|
1717
|
+
if (!(robotsCheck == null ? void 0 : robotsCheck.passed)) {
|
|
1718
|
+
tips.push("Remove blanket disallow \u2014 Bing Copilot requires Bingbot access");
|
|
1719
|
+
}
|
|
1720
|
+
const sitemapCheck = aiAccess == null ? void 0 : aiAccess.checks.find((c) => c.label.includes("sitemap"));
|
|
1721
|
+
if (!(sitemapCheck == null ? void 0 : sitemapCheck.passed)) {
|
|
1722
|
+
tips.push("Enable sitemap.xml \u2014 Bing Copilot relies on sitemaps for content discovery");
|
|
1723
|
+
}
|
|
1724
|
+
const ogCheck = meta == null ? void 0 : meta.checks.find((c) => c.label.includes("Open Graph"));
|
|
1725
|
+
if (!(ogCheck == null ? void 0 : ogCheck.passed)) {
|
|
1726
|
+
tips.push("Enable OG meta tags \u2014 Bing Copilot uses OG metadata for rich answers");
|
|
1727
|
+
}
|
|
1728
|
+
if (citability && citability.averageScore < 40) {
|
|
1729
|
+
tips.push("Improve content structure \u2014 Bing Copilot favors well-organized content with clear answers");
|
|
1730
|
+
}
|
|
1731
|
+
if (tips.length === 0) tips.push("Your site is well-optimized for Bing Copilot");
|
|
1732
|
+
return {
|
|
1733
|
+
platform: "Bing Copilot",
|
|
1734
|
+
status: tips.length <= 1 ? "good" : tips.length <= 3 ? "needs-work" : "critical",
|
|
1735
|
+
tips
|
|
1736
|
+
};
|
|
1737
|
+
}
|
|
1738
|
+
|
|
1739
|
+
// src/core/report.ts
|
|
1740
|
+
function generateReport(config) {
|
|
1741
|
+
const audit = auditSite(config);
|
|
1742
|
+
const citability = scoreSiteCitability(config);
|
|
1743
|
+
const platformHints = generatePlatformHints(audit, citability);
|
|
1744
|
+
return {
|
|
1745
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1746
|
+
site: {
|
|
1747
|
+
title: config.title,
|
|
1748
|
+
url: config.url,
|
|
1749
|
+
pageCount: config.pages.length
|
|
1750
|
+
},
|
|
1751
|
+
audit,
|
|
1752
|
+
citability,
|
|
1753
|
+
platformHints
|
|
1754
|
+
};
|
|
1755
|
+
}
|
|
1756
|
+
function formatReportMarkdown(report) {
|
|
1757
|
+
const lines = [];
|
|
1758
|
+
lines.push(`# AEO/GEO Report \u2014 ${report.site.title}`);
|
|
1759
|
+
lines.push("");
|
|
1760
|
+
lines.push(`Generated: ${report.generatedAt}`);
|
|
1761
|
+
lines.push(`URL: ${report.site.url}`);
|
|
1762
|
+
lines.push(`Pages: ${report.site.pageCount}`);
|
|
1763
|
+
lines.push("");
|
|
1764
|
+
lines.push("## GEO Readiness Score");
|
|
1765
|
+
lines.push("");
|
|
1766
|
+
lines.push(formatAuditReport(report.audit));
|
|
1767
|
+
lines.push("");
|
|
1768
|
+
lines.push("## AI Citability");
|
|
1769
|
+
lines.push("");
|
|
1770
|
+
lines.push(`Average Citability Score: ${report.citability.averageScore}/100`);
|
|
1771
|
+
lines.push("");
|
|
1772
|
+
for (const page of report.citability.pages) {
|
|
1773
|
+
lines.push(formatPageCitability(page));
|
|
1774
|
+
lines.push("");
|
|
1775
|
+
}
|
|
1776
|
+
lines.push("## Platform Optimization");
|
|
1777
|
+
lines.push("");
|
|
1778
|
+
for (const hint of report.platformHints) {
|
|
1779
|
+
const statusIcon = hint.status === "good" ? "+" : hint.status === "needs-work" ? "~" : "!";
|
|
1780
|
+
lines.push(`### ${statusIcon} ${hint.platform} (${hint.status})`);
|
|
1781
|
+
lines.push("");
|
|
1782
|
+
for (const tip of hint.tips) {
|
|
1783
|
+
lines.push(`- ${tip}`);
|
|
1784
|
+
}
|
|
1785
|
+
lines.push("");
|
|
1786
|
+
}
|
|
1787
|
+
return lines.join("\n");
|
|
1788
|
+
}
|
|
1789
|
+
function formatReportJson(report) {
|
|
1790
|
+
return JSON.stringify(report, null, 2);
|
|
1791
|
+
}
|
|
1792
|
+
|
|
1793
|
+
// src/index.ts
|
|
1794
|
+
var VERSION = "0.0.3";
|
|
1795
|
+
function defineConfig(config) {
|
|
1796
|
+
return config;
|
|
1797
|
+
}
|
|
1798
|
+
|
|
1799
|
+
export { VERSION, auditSite, defineConfig, detectFramework, extractDescription, extractJsonLd, extractTextFromHtml, extractTitle2 as extractTitle, formatAuditReport, formatPageCitability, formatReportJson, formatReportMarkdown, generateAEOFiles, generateAEOFiles as generateAll, generateJsonLdScript, generateOGTags, generateOGTagsHtml, generatePageSchemas, generatePlatformHints, generateReport, generateSchema, generateSchemaObjects, htmlToMarkdown, resolveConfig, scorePageCitability, scoreSiteCitability, validateConfig };
|
|
1800
|
+
//# sourceMappingURL=index.mjs.map
|
|
1801
|
+
//# sourceMappingURL=index.mjs.map
|