aeo.js 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +188 -6
- package/dist/astro.d.mts +9 -0
- package/dist/astro.d.ts +9 -0
- package/dist/astro.js +1179 -0
- package/dist/astro.js.map +1 -0
- package/dist/astro.mjs +1172 -0
- package/dist/astro.mjs.map +1 -0
- package/dist/index.d.mts +17 -0
- package/dist/index.d.ts +17 -1
- package/dist/index.js +876 -1
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +869 -1
- package/dist/index.mjs.map +1 -0
- package/dist/next.d.mts +33 -0
- package/dist/next.d.ts +33 -0
- package/dist/next.js +1113 -0
- package/dist/next.js.map +1 -0
- package/dist/next.mjs +1106 -0
- package/dist/next.mjs.map +1 -0
- package/dist/react.d.mts +10 -0
- package/dist/react.d.ts +10 -0
- package/dist/react.js +1023 -0
- package/dist/react.js.map +1 -0
- package/dist/react.mjs +1020 -0
- package/dist/react.mjs.map +1 -0
- package/dist/types-BTY-v-7i.d.mts +132 -0
- package/dist/types-BTY-v-7i.d.ts +132 -0
- package/dist/webpack.d.mts +11 -0
- package/dist/webpack.d.ts +11 -0
- package/dist/webpack.js +1019 -0
- package/dist/webpack.js.map +1 -0
- package/dist/webpack.mjs +1013 -0
- package/dist/webpack.mjs.map +1 -0
- package/dist/widget.d.mts +37 -0
- package/dist/widget.d.ts +37 -0
- package/dist/widget.js +1004 -0
- package/dist/widget.js.map +1 -0
- package/dist/widget.mjs +1001 -0
- package/dist/widget.mjs.map +1 -0
- package/package.json +64 -10
- package/dist/cli.js +0 -2
package/dist/astro.js
ADDED
|
@@ -0,0 +1,1179 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, '__esModule', { value: true });
|
|
4
|
+
|
|
5
|
+
var fs = require('fs');
|
|
6
|
+
var path = require('path');
|
|
7
|
+
require('minimatch');
|
|
8
|
+
var crypto = require('crypto');
|
|
9
|
+
|
|
10
|
+
// src/core/robots.ts
|
|
11
|
+
var AI_CRAWLERS = [
|
|
12
|
+
"GPTBot",
|
|
13
|
+
"OAI-SearchBot",
|
|
14
|
+
"ChatGPT-User",
|
|
15
|
+
"ClaudeBot",
|
|
16
|
+
"Claude-Web",
|
|
17
|
+
"anthropic-ai",
|
|
18
|
+
"PerplexityBot",
|
|
19
|
+
"Google-Extended",
|
|
20
|
+
"Gemini-Deep-Research",
|
|
21
|
+
"Bingbot",
|
|
22
|
+
"FacebookBot",
|
|
23
|
+
"meta-externalagent",
|
|
24
|
+
"Amazonbot",
|
|
25
|
+
"Applebot",
|
|
26
|
+
"DeepSeekBot",
|
|
27
|
+
"Bytespider",
|
|
28
|
+
"cohere-ai",
|
|
29
|
+
"CCBot",
|
|
30
|
+
"DiffBot",
|
|
31
|
+
"YouBot",
|
|
32
|
+
"FirecrawlAgent",
|
|
33
|
+
"Crawl4AI",
|
|
34
|
+
"BraveBot",
|
|
35
|
+
"SemrushBot",
|
|
36
|
+
"AhrefsBot",
|
|
37
|
+
"MJ12bot",
|
|
38
|
+
"DotBot",
|
|
39
|
+
"DataForSeoBot",
|
|
40
|
+
"Screaming Frog SEO Spider",
|
|
41
|
+
"SEOkicks",
|
|
42
|
+
"SEMrushBot",
|
|
43
|
+
"BLEXBot",
|
|
44
|
+
"Yandex",
|
|
45
|
+
"Baiduspider",
|
|
46
|
+
"Sogou",
|
|
47
|
+
"Exabot",
|
|
48
|
+
"facebookexternalhit",
|
|
49
|
+
"LinkedInBot",
|
|
50
|
+
"WhatsApp",
|
|
51
|
+
"Slackbot",
|
|
52
|
+
"TwitterBot",
|
|
53
|
+
"TelegramBot",
|
|
54
|
+
"Discordbot",
|
|
55
|
+
"PinterestBot",
|
|
56
|
+
"TumblrBot",
|
|
57
|
+
"ViberBot",
|
|
58
|
+
"SkypeUriPreview",
|
|
59
|
+
"redditbot",
|
|
60
|
+
"Snapchat",
|
|
61
|
+
"TikTok"
|
|
62
|
+
];
|
|
63
|
+
function generateRobotsTxt(config) {
|
|
64
|
+
const lines = [
|
|
65
|
+
"# robots.txt generated by aeo.js",
|
|
66
|
+
"# Allow AI crawlers to index this site",
|
|
67
|
+
"",
|
|
68
|
+
"# Traditional search engines",
|
|
69
|
+
"User-agent: Googlebot",
|
|
70
|
+
"Allow: /",
|
|
71
|
+
"",
|
|
72
|
+
"User-agent: Bingbot",
|
|
73
|
+
"Allow: /",
|
|
74
|
+
"",
|
|
75
|
+
"# AI crawlers and answer engines"
|
|
76
|
+
];
|
|
77
|
+
for (const crawler of AI_CRAWLERS) {
|
|
78
|
+
lines.push(`User-agent: ${crawler}`);
|
|
79
|
+
lines.push("Allow: /");
|
|
80
|
+
lines.push("");
|
|
81
|
+
}
|
|
82
|
+
lines.push("# Default for all other bots");
|
|
83
|
+
lines.push("User-agent: *");
|
|
84
|
+
lines.push("Allow: /");
|
|
85
|
+
lines.push("");
|
|
86
|
+
if (config.url) {
|
|
87
|
+
lines.push(`Sitemap: ${config.url}/sitemap.xml`);
|
|
88
|
+
}
|
|
89
|
+
lines.push("");
|
|
90
|
+
lines.push("# AEO (Answer Engine Optimization) files");
|
|
91
|
+
lines.push("# These help LLMs understand your content better");
|
|
92
|
+
lines.push(`# ${config.url}/llms.txt`);
|
|
93
|
+
lines.push(`# ${config.url}/llms-full.txt`);
|
|
94
|
+
lines.push(`# ${config.url}/docs.json`);
|
|
95
|
+
lines.push(`# ${config.url}/ai-index.json`);
|
|
96
|
+
return lines.join("\n");
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// src/core/detect.ts
|
|
100
|
+
function detectFramework(projectRoot = process.cwd()) {
|
|
101
|
+
const packageJson = readPackageJson(projectRoot);
|
|
102
|
+
const dependencies = {
|
|
103
|
+
...packageJson.dependencies,
|
|
104
|
+
...packageJson.devDependencies
|
|
105
|
+
};
|
|
106
|
+
if (dependencies["next"]) {
|
|
107
|
+
return {
|
|
108
|
+
framework: "next",
|
|
109
|
+
contentDir: "app",
|
|
110
|
+
outDir: "out"
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
if (dependencies["nuxt"] || dependencies["@nuxt/kit"]) {
|
|
114
|
+
return {
|
|
115
|
+
framework: "nuxt",
|
|
116
|
+
contentDir: "content",
|
|
117
|
+
outDir: ".output/public"
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
if (dependencies["astro"] || dependencies["@astrojs/astro"]) {
|
|
121
|
+
return {
|
|
122
|
+
framework: "astro",
|
|
123
|
+
contentDir: "src/content",
|
|
124
|
+
outDir: "dist"
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
if (dependencies["@remix-run/dev"]) {
|
|
128
|
+
return {
|
|
129
|
+
framework: "remix",
|
|
130
|
+
contentDir: "app",
|
|
131
|
+
outDir: "build/client"
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
if (dependencies["@sveltejs/kit"]) {
|
|
135
|
+
return {
|
|
136
|
+
framework: "sveltekit",
|
|
137
|
+
contentDir: "src",
|
|
138
|
+
outDir: "build"
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
if (dependencies["@angular/core"]) {
|
|
142
|
+
return {
|
|
143
|
+
framework: "angular",
|
|
144
|
+
contentDir: "src",
|
|
145
|
+
outDir: "dist"
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
if (dependencies["@docusaurus/core"]) {
|
|
149
|
+
return {
|
|
150
|
+
framework: "docusaurus",
|
|
151
|
+
contentDir: "docs",
|
|
152
|
+
outDir: "build"
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
if (dependencies["vite"]) {
|
|
156
|
+
return {
|
|
157
|
+
framework: "vite",
|
|
158
|
+
contentDir: "src",
|
|
159
|
+
outDir: "dist"
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
return {
|
|
163
|
+
framework: "unknown",
|
|
164
|
+
contentDir: "src",
|
|
165
|
+
outDir: "dist"
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
function resolveConfig(config = {}) {
|
|
169
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x;
|
|
170
|
+
const frameworkInfo = detectFramework();
|
|
171
|
+
return {
|
|
172
|
+
title: config.title || "My Site",
|
|
173
|
+
description: config.description || "",
|
|
174
|
+
url: config.url || "https://example.com",
|
|
175
|
+
contentDir: config.contentDir || frameworkInfo.contentDir,
|
|
176
|
+
outDir: config.outDir || frameworkInfo.outDir,
|
|
177
|
+
pages: config.pages || [],
|
|
178
|
+
generators: {
|
|
179
|
+
robotsTxt: ((_a = config.generators) == null ? void 0 : _a.robotsTxt) !== false,
|
|
180
|
+
llmsTxt: ((_b = config.generators) == null ? void 0 : _b.llmsTxt) !== false,
|
|
181
|
+
llmsFullTxt: ((_c = config.generators) == null ? void 0 : _c.llmsFullTxt) !== false,
|
|
182
|
+
rawMarkdown: ((_d = config.generators) == null ? void 0 : _d.rawMarkdown) !== false,
|
|
183
|
+
manifest: ((_e = config.generators) == null ? void 0 : _e.manifest) !== false,
|
|
184
|
+
sitemap: ((_f = config.generators) == null ? void 0 : _f.sitemap) !== false,
|
|
185
|
+
aiIndex: ((_g = config.generators) == null ? void 0 : _g.aiIndex) !== false
|
|
186
|
+
},
|
|
187
|
+
robots: {
|
|
188
|
+
allow: ((_h = config.robots) == null ? void 0 : _h.allow) || ["/"],
|
|
189
|
+
disallow: ((_i = config.robots) == null ? void 0 : _i.disallow) || [],
|
|
190
|
+
crawlDelay: ((_j = config.robots) == null ? void 0 : _j.crawlDelay) || 0,
|
|
191
|
+
sitemap: ((_k = config.robots) == null ? void 0 : _k.sitemap) || ""
|
|
192
|
+
},
|
|
193
|
+
widget: {
|
|
194
|
+
enabled: ((_l = config.widget) == null ? void 0 : _l.enabled) !== false,
|
|
195
|
+
position: ((_m = config.widget) == null ? void 0 : _m.position) || "bottom-right",
|
|
196
|
+
theme: {
|
|
197
|
+
background: ((_o = (_n = config.widget) == null ? void 0 : _n.theme) == null ? void 0 : _o.background) || "rgba(18, 18, 24, 0.9)",
|
|
198
|
+
text: ((_q = (_p = config.widget) == null ? void 0 : _p.theme) == null ? void 0 : _q.text) || "#C0C0C5",
|
|
199
|
+
accent: ((_s = (_r = config.widget) == null ? void 0 : _r.theme) == null ? void 0 : _s.accent) || "#E8E8EA",
|
|
200
|
+
badge: ((_u = (_t = config.widget) == null ? void 0 : _t.theme) == null ? void 0 : _u.badge) || "#4ADE80"
|
|
201
|
+
},
|
|
202
|
+
humanLabel: ((_v = config.widget) == null ? void 0 : _v.humanLabel) || "Human",
|
|
203
|
+
aiLabel: ((_w = config.widget) == null ? void 0 : _w.aiLabel) || "AI",
|
|
204
|
+
showBadge: ((_x = config.widget) == null ? void 0 : _x.showBadge) !== false
|
|
205
|
+
}
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
function parseFrontmatter(content) {
|
|
209
|
+
const frontmatterMatch = content.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)/);
|
|
210
|
+
if (frontmatterMatch) {
|
|
211
|
+
const frontmatterStr = frontmatterMatch[1];
|
|
212
|
+
const contentWithoutFrontmatter = frontmatterMatch[2];
|
|
213
|
+
const frontmatter = {};
|
|
214
|
+
const lines = frontmatterStr.split("\n");
|
|
215
|
+
for (const line of lines) {
|
|
216
|
+
const [key, ...valueParts] = line.split(":");
|
|
217
|
+
if (key && valueParts.length > 0) {
|
|
218
|
+
const value = valueParts.join(":").trim();
|
|
219
|
+
frontmatter[key.trim()] = value.replace(/^["']|["']$/g, "");
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
return { frontmatter, content: contentWithoutFrontmatter };
|
|
223
|
+
}
|
|
224
|
+
return { frontmatter: {}, content };
|
|
225
|
+
}
|
|
226
|
+
function bumpHeadings(content, levels = 1) {
|
|
227
|
+
return content.replace(/^(#{1,6})\s/gm, (match, hashes) => {
|
|
228
|
+
const newLevel = Math.min(hashes.length + levels, 6);
|
|
229
|
+
return "#".repeat(newLevel) + " ";
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
function extractTitle(content) {
|
|
233
|
+
const h1Match = content.match(/^#\s+(.+)$/m);
|
|
234
|
+
if (h1Match) return h1Match[1];
|
|
235
|
+
const h2Match = content.match(/^##\s+(.+)$/m);
|
|
236
|
+
if (h2Match) return h2Match[1];
|
|
237
|
+
const firstLine = content.split("\n")[0];
|
|
238
|
+
return firstLine.slice(0, 100);
|
|
239
|
+
}
|
|
240
|
+
function readPackageJson(projectRoot = process.cwd()) {
|
|
241
|
+
const packageJsonPath = path.join(projectRoot, "package.json");
|
|
242
|
+
if (!fs.existsSync(packageJsonPath)) {
|
|
243
|
+
return {};
|
|
244
|
+
}
|
|
245
|
+
try {
|
|
246
|
+
const content = fs.readFileSync(packageJsonPath, "utf-8");
|
|
247
|
+
return JSON.parse(content);
|
|
248
|
+
} catch {
|
|
249
|
+
return {};
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// src/core/llms-txt.ts
|
|
254
|
+
function collectMarkdownFiles(dir, base = dir) {
|
|
255
|
+
const files = [];
|
|
256
|
+
try {
|
|
257
|
+
const entries = fs.readdirSync(dir);
|
|
258
|
+
for (const entry of entries) {
|
|
259
|
+
const fullPath = path.join(dir, entry);
|
|
260
|
+
const stat = fs.statSync(fullPath);
|
|
261
|
+
if (stat.isDirectory() && !entry.startsWith(".") && entry !== "node_modules") {
|
|
262
|
+
files.push(...collectMarkdownFiles(fullPath, base));
|
|
263
|
+
} else if (stat.isFile() && (path.extname(entry) === ".md" || path.extname(entry) === ".mdx")) {
|
|
264
|
+
const content = fs.readFileSync(fullPath, "utf-8");
|
|
265
|
+
const { frontmatter, content: mainContent } = parseFrontmatter(content);
|
|
266
|
+
const relativePath = path.relative(base, fullPath);
|
|
267
|
+
files.push({
|
|
268
|
+
path: relativePath,
|
|
269
|
+
content: mainContent,
|
|
270
|
+
title: frontmatter.title || extractTitle(mainContent),
|
|
271
|
+
description: frontmatter.description,
|
|
272
|
+
frontmatter
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
} catch (error) {
|
|
277
|
+
console.warn(`Warning: Could not read directory ${dir}:`, error);
|
|
278
|
+
}
|
|
279
|
+
return files;
|
|
280
|
+
}
|
|
281
|
+
function generateLlmsTxt(config) {
|
|
282
|
+
const lines = [
|
|
283
|
+
`# ${config.title}`,
|
|
284
|
+
""
|
|
285
|
+
];
|
|
286
|
+
if (config.description) {
|
|
287
|
+
lines.push(`> ${config.description}`);
|
|
288
|
+
lines.push("");
|
|
289
|
+
}
|
|
290
|
+
lines.push("## About");
|
|
291
|
+
lines.push("");
|
|
292
|
+
lines.push("This file provides a structured overview of the documentation and content available on this site,");
|
|
293
|
+
lines.push("optimized for consumption by Large Language Models (LLMs) and AI assistants.");
|
|
294
|
+
lines.push("");
|
|
295
|
+
if (config.pages && config.pages.length > 0) {
|
|
296
|
+
lines.push("## Pages");
|
|
297
|
+
lines.push("");
|
|
298
|
+
for (const page of config.pages) {
|
|
299
|
+
const url = `${config.url}${page.pathname === "/" ? "" : page.pathname}`;
|
|
300
|
+
const title = page.title || page.pathname;
|
|
301
|
+
lines.push(`- [${title}](${url})`);
|
|
302
|
+
if (page.description) {
|
|
303
|
+
lines.push(` ${page.description}`);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
lines.push("");
|
|
307
|
+
}
|
|
308
|
+
const markdownFiles = collectMarkdownFiles(config.contentDir);
|
|
309
|
+
if (markdownFiles.length > 0) {
|
|
310
|
+
lines.push("## Documentation");
|
|
311
|
+
lines.push("");
|
|
312
|
+
const grouped = {};
|
|
313
|
+
for (const file of markdownFiles) {
|
|
314
|
+
const dir = file.path.split("/")[0] || "root";
|
|
315
|
+
if (!grouped[dir]) grouped[dir] = [];
|
|
316
|
+
grouped[dir].push(file);
|
|
317
|
+
}
|
|
318
|
+
for (const [dir, files] of Object.entries(grouped)) {
|
|
319
|
+
lines.push(`### ${dir === "root" ? "Main Documentation" : dir}`);
|
|
320
|
+
lines.push("");
|
|
321
|
+
for (const file of files) {
|
|
322
|
+
const url = `${config.url}/${file.path.replace(/\.mdx?$/, "")}`;
|
|
323
|
+
lines.push(`- [${file.title}](${url})`);
|
|
324
|
+
if (file.description) {
|
|
325
|
+
lines.push(` ${file.description}`);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
lines.push("");
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
lines.push("## Quick Links");
|
|
332
|
+
lines.push("");
|
|
333
|
+
lines.push(`- Full Documentation: ${config.url}/llms-full.txt`);
|
|
334
|
+
lines.push(`- Documentation Manifest: ${config.url}/docs.json`);
|
|
335
|
+
lines.push(`- AI-Optimized Index: ${config.url}/ai-index.json`);
|
|
336
|
+
lines.push(`- Sitemap: ${config.url}/sitemap.xml`);
|
|
337
|
+
lines.push("");
|
|
338
|
+
lines.push("## For LLMs");
|
|
339
|
+
lines.push("");
|
|
340
|
+
lines.push("To get the complete documentation in a single file, request:");
|
|
341
|
+
lines.push(`${config.url}/llms-full.txt`);
|
|
342
|
+
lines.push("");
|
|
343
|
+
lines.push("For structured access to individual pages with metadata:");
|
|
344
|
+
lines.push(`${config.url}/docs.json`);
|
|
345
|
+
lines.push("");
|
|
346
|
+
lines.push("For RAG (Retrieval Augmented Generation) systems:");
|
|
347
|
+
lines.push(`${config.url}/ai-index.json`);
|
|
348
|
+
lines.push("");
|
|
349
|
+
lines.push("---");
|
|
350
|
+
lines.push("Generated by aeo.js - Answer Engine Optimization for the modern web");
|
|
351
|
+
lines.push("Learn more at https://aeojs.org");
|
|
352
|
+
return lines.join("\n");
|
|
353
|
+
}
|
|
354
|
+
function collectAndConcatenateMarkdown(dir, base = dir) {
|
|
355
|
+
const sections = [];
|
|
356
|
+
try {
|
|
357
|
+
const entries = fs.readdirSync(dir).sort();
|
|
358
|
+
for (const entry of entries) {
|
|
359
|
+
const fullPath = path.join(dir, entry);
|
|
360
|
+
const stat = fs.statSync(fullPath);
|
|
361
|
+
if (stat.isDirectory() && !entry.startsWith(".") && entry !== "node_modules") {
|
|
362
|
+
const subSections = collectAndConcatenateMarkdown(fullPath, base);
|
|
363
|
+
if (subSections.length > 0) {
|
|
364
|
+
sections.push(...subSections);
|
|
365
|
+
}
|
|
366
|
+
} else if (stat.isFile() && (path.extname(entry) === ".md" || path.extname(entry) === ".mdx")) {
|
|
367
|
+
const content = fs.readFileSync(fullPath, "utf-8");
|
|
368
|
+
const { frontmatter, content: mainContent } = parseFrontmatter(content);
|
|
369
|
+
const relativePath = path.relative(base, fullPath);
|
|
370
|
+
const sectionLines = [
|
|
371
|
+
"---",
|
|
372
|
+
"",
|
|
373
|
+
`# ${frontmatter.title || relativePath}`,
|
|
374
|
+
"",
|
|
375
|
+
`Source: ${relativePath}`,
|
|
376
|
+
""
|
|
377
|
+
];
|
|
378
|
+
if (frontmatter.description) {
|
|
379
|
+
sectionLines.push(`> ${frontmatter.description}`);
|
|
380
|
+
sectionLines.push("");
|
|
381
|
+
}
|
|
382
|
+
const bumpedContent = bumpHeadings(mainContent, 1);
|
|
383
|
+
sectionLines.push(bumpedContent);
|
|
384
|
+
sectionLines.push("");
|
|
385
|
+
sections.push(sectionLines.join("\n"));
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
} catch (error) {
|
|
389
|
+
console.warn(`Warning: Could not read directory ${dir}:`, error);
|
|
390
|
+
}
|
|
391
|
+
return sections;
|
|
392
|
+
}
|
|
393
|
+
function generateLlmsFullTxt(config) {
|
|
394
|
+
const lines = [
|
|
395
|
+
`# ${config.title} - Complete Documentation`,
|
|
396
|
+
"",
|
|
397
|
+
`This file contains all documentation concatenated into a single file for easy consumption by LLMs.`,
|
|
398
|
+
""
|
|
399
|
+
];
|
|
400
|
+
if (config.description) {
|
|
401
|
+
lines.push(`> ${config.description}`);
|
|
402
|
+
lines.push("");
|
|
403
|
+
}
|
|
404
|
+
lines.push("## Table of Contents");
|
|
405
|
+
lines.push("");
|
|
406
|
+
lines.push("This document includes all content from this project.");
|
|
407
|
+
lines.push("Each section is separated by a horizontal rule (---) for easy parsing.");
|
|
408
|
+
lines.push("");
|
|
409
|
+
let hasContent = false;
|
|
410
|
+
if (config.pages && config.pages.length > 0) {
|
|
411
|
+
for (const page of config.pages) {
|
|
412
|
+
const url = `${config.url}${page.pathname === "/" ? "" : page.pathname}`;
|
|
413
|
+
const title = page.title || page.pathname;
|
|
414
|
+
const sectionLines = [
|
|
415
|
+
"---",
|
|
416
|
+
"",
|
|
417
|
+
`# ${title}`,
|
|
418
|
+
"",
|
|
419
|
+
`URL: ${url}`,
|
|
420
|
+
""
|
|
421
|
+
];
|
|
422
|
+
if (page.description) {
|
|
423
|
+
sectionLines.push(`> ${page.description}`);
|
|
424
|
+
sectionLines.push("");
|
|
425
|
+
}
|
|
426
|
+
if (page.content) {
|
|
427
|
+
sectionLines.push(page.content);
|
|
428
|
+
sectionLines.push("");
|
|
429
|
+
}
|
|
430
|
+
lines.push(sectionLines.join("\n"));
|
|
431
|
+
hasContent = true;
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
const sections = collectAndConcatenateMarkdown(config.contentDir);
|
|
435
|
+
if (sections.length > 0) {
|
|
436
|
+
lines.push(...sections);
|
|
437
|
+
hasContent = true;
|
|
438
|
+
}
|
|
439
|
+
if (!hasContent) {
|
|
440
|
+
lines.push("---");
|
|
441
|
+
lines.push("");
|
|
442
|
+
lines.push(`# ${config.title}`);
|
|
443
|
+
lines.push("");
|
|
444
|
+
lines.push(`URL: ${config.url}`);
|
|
445
|
+
lines.push("");
|
|
446
|
+
if (config.description) {
|
|
447
|
+
lines.push(config.description);
|
|
448
|
+
lines.push("");
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
lines.push("---");
|
|
452
|
+
lines.push("");
|
|
453
|
+
lines.push("## About This Document");
|
|
454
|
+
lines.push("");
|
|
455
|
+
lines.push("This concatenated documentation file is generated automatically by aeo.js");
|
|
456
|
+
lines.push("to make it easier for AI systems to understand the complete context of this project.");
|
|
457
|
+
lines.push("");
|
|
458
|
+
lines.push(`For a structured index, see: ${config.url}/llms.txt`);
|
|
459
|
+
lines.push(`For individual files, see: ${config.url}/docs.json`);
|
|
460
|
+
lines.push("");
|
|
461
|
+
lines.push("Generated by aeo.js - https://aeojs.org");
|
|
462
|
+
return lines.join("\n");
|
|
463
|
+
}
|
|
464
|
+
function ensureDir(path) {
|
|
465
|
+
fs.mkdirSync(path, { recursive: true });
|
|
466
|
+
}
|
|
467
|
+
function copyMarkdownFiles(config) {
|
|
468
|
+
const copiedFiles = [];
|
|
469
|
+
function copyRecursive(dir, base = config.contentDir) {
|
|
470
|
+
try {
|
|
471
|
+
const entries = fs.readdirSync(dir);
|
|
472
|
+
for (const entry of entries) {
|
|
473
|
+
const fullPath = path.join(dir, entry);
|
|
474
|
+
const stat = fs.statSync(fullPath);
|
|
475
|
+
if (stat.isDirectory() && !entry.startsWith(".") && entry !== "node_modules") {
|
|
476
|
+
copyRecursive(fullPath, base);
|
|
477
|
+
} else if (stat.isFile() && path.extname(entry) === ".md") {
|
|
478
|
+
const relativePath = path.relative(base, fullPath);
|
|
479
|
+
const destPath = path.join(config.outDir, relativePath);
|
|
480
|
+
ensureDir(path.dirname(destPath));
|
|
481
|
+
try {
|
|
482
|
+
fs.copyFileSync(fullPath, destPath);
|
|
483
|
+
copiedFiles.push({
|
|
484
|
+
source: fullPath,
|
|
485
|
+
destination: destPath
|
|
486
|
+
});
|
|
487
|
+
} catch (error) {
|
|
488
|
+
console.warn(`Warning: Could not copy ${fullPath}:`, error);
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
} catch (error) {
|
|
493
|
+
console.warn(`Warning: Could not read directory ${dir}:`, error);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
copyRecursive(config.contentDir);
|
|
497
|
+
return copiedFiles;
|
|
498
|
+
}
|
|
499
|
+
function generatePageMarkdownFiles(config) {
|
|
500
|
+
const generated = [];
|
|
501
|
+
const pages = config.pages || [];
|
|
502
|
+
for (const page of pages) {
|
|
503
|
+
const pageTitle = page.title || (page.pathname === "/" ? config.title : void 0);
|
|
504
|
+
if (!page.content && !pageTitle) continue;
|
|
505
|
+
let filename;
|
|
506
|
+
if (page.pathname === "/") {
|
|
507
|
+
filename = "index.md";
|
|
508
|
+
} else {
|
|
509
|
+
const clean = page.pathname.replace(/^\//, "").replace(/\/$/, "");
|
|
510
|
+
filename = clean.includes("/") ? `${clean}.md` : `${clean}.md`;
|
|
511
|
+
}
|
|
512
|
+
const destPath = path.join(config.outDir, filename);
|
|
513
|
+
const pageUrl = page.pathname === "/" ? config.url : `${config.url.replace(/\/$/, "")}${page.pathname}`;
|
|
514
|
+
const lines = [];
|
|
515
|
+
lines.push("---");
|
|
516
|
+
if (pageTitle) lines.push(`title: "${pageTitle}"`);
|
|
517
|
+
if (page.description) lines.push(`description: "${page.description}"`);
|
|
518
|
+
lines.push(`url: ${pageUrl}`);
|
|
519
|
+
lines.push(`source: ${pageUrl}`);
|
|
520
|
+
lines.push(`generated_by: aeo.js`);
|
|
521
|
+
lines.push("---", "");
|
|
522
|
+
if (pageTitle) {
|
|
523
|
+
lines.push(`# ${pageTitle}`, "");
|
|
524
|
+
}
|
|
525
|
+
if (page.description) {
|
|
526
|
+
lines.push(`${page.description}`, "");
|
|
527
|
+
}
|
|
528
|
+
if (page.content) {
|
|
529
|
+
lines.push(page.content);
|
|
530
|
+
}
|
|
531
|
+
const content = lines.join("\n");
|
|
532
|
+
ensureDir(path.dirname(destPath));
|
|
533
|
+
try {
|
|
534
|
+
fs.writeFileSync(destPath, content, "utf-8");
|
|
535
|
+
generated.push({ pathname: page.pathname, destination: destPath });
|
|
536
|
+
} catch {
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
return generated;
|
|
540
|
+
}
|
|
541
|
+
function collectManifestEntries(dir, config, base = dir) {
|
|
542
|
+
const entries = [];
|
|
543
|
+
try {
|
|
544
|
+
const files = fs.readdirSync(dir);
|
|
545
|
+
for (const file of files) {
|
|
546
|
+
const fullPath = path.join(dir, file);
|
|
547
|
+
const stat = fs.statSync(fullPath);
|
|
548
|
+
if (stat.isDirectory() && !file.startsWith(".") && file !== "node_modules") {
|
|
549
|
+
entries.push(...collectManifestEntries(fullPath, config, base));
|
|
550
|
+
} else if (stat.isFile() && (path.extname(file) === ".md" || path.extname(file) === ".mdx")) {
|
|
551
|
+
const content = fs.readFileSync(fullPath, "utf-8");
|
|
552
|
+
const { frontmatter, content: mainContent } = parseFrontmatter(content);
|
|
553
|
+
const relativePath = path.relative(base, fullPath);
|
|
554
|
+
const urlPath = relativePath.replace(/\.mdx?$/, "");
|
|
555
|
+
entries.push({
|
|
556
|
+
url: `${config.url}/${urlPath}`,
|
|
557
|
+
title: frontmatter.title || extractTitle(mainContent),
|
|
558
|
+
description: frontmatter.description,
|
|
559
|
+
lastModified: stat.mtime.toISOString()
|
|
560
|
+
});
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
} catch (error) {
|
|
564
|
+
console.warn(`Warning: Could not read directory ${dir}:`, error);
|
|
565
|
+
}
|
|
566
|
+
return entries;
|
|
567
|
+
}
|
|
568
|
+
function generateManifest(config) {
|
|
569
|
+
const entries = [];
|
|
570
|
+
if (config.pages && config.pages.length > 0) {
|
|
571
|
+
for (const page of config.pages) {
|
|
572
|
+
entries.push({
|
|
573
|
+
url: `${config.url}${page.pathname === "/" ? "" : page.pathname}`,
|
|
574
|
+
title: page.title || page.pathname,
|
|
575
|
+
description: page.description
|
|
576
|
+
});
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
entries.push(...collectManifestEntries(config.contentDir, config));
|
|
580
|
+
const manifest = {
|
|
581
|
+
version: "1.0",
|
|
582
|
+
generated: (/* @__PURE__ */ new Date()).toISOString(),
|
|
583
|
+
site: {
|
|
584
|
+
title: config.title,
|
|
585
|
+
description: config.description,
|
|
586
|
+
url: config.url
|
|
587
|
+
},
|
|
588
|
+
documents: entries.sort((a, b) => a.url.localeCompare(b.url)),
|
|
589
|
+
metadata: {
|
|
590
|
+
totalDocuments: entries.length,
|
|
591
|
+
generator: "aeo.js",
|
|
592
|
+
generatorUrl: "https://aeojs.org"
|
|
593
|
+
}
|
|
594
|
+
};
|
|
595
|
+
return JSON.stringify(manifest, null, 2);
|
|
596
|
+
}
|
|
597
|
+
function collectUrls(dir, config, base = dir) {
|
|
598
|
+
const urls = [];
|
|
599
|
+
try {
|
|
600
|
+
const entries = fs.readdirSync(dir);
|
|
601
|
+
for (const entry of entries) {
|
|
602
|
+
const fullPath = path.join(dir, entry);
|
|
603
|
+
const stat = fs.statSync(fullPath);
|
|
604
|
+
if (stat.isDirectory() && !entry.startsWith(".") && entry !== "node_modules") {
|
|
605
|
+
urls.push(...collectUrls(fullPath, config, base));
|
|
606
|
+
} else if (stat.isFile() && (path.extname(entry) === ".md" || path.extname(entry) === ".mdx" || path.extname(entry) === ".html")) {
|
|
607
|
+
const relativePath = path.relative(base, fullPath);
|
|
608
|
+
const urlPath = relativePath.replace(/\.(md|mdx|html)$/, "");
|
|
609
|
+
urls.push(`${config.url}/${urlPath}`);
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
} catch (error) {
|
|
613
|
+
console.warn(`Warning: Could not read directory ${dir}:`, error);
|
|
614
|
+
}
|
|
615
|
+
return urls;
|
|
616
|
+
}
|
|
617
|
+
function escapeXml(str) {
|
|
618
|
+
return str.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
619
|
+
}
|
|
620
|
+
function generateSitemap(config) {
|
|
621
|
+
const urls = [];
|
|
622
|
+
if (config.pages && config.pages.length > 0) {
|
|
623
|
+
for (const page of config.pages) {
|
|
624
|
+
urls.push(`${config.url}${page.pathname === "/" ? "" : page.pathname}`);
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
if (config.contentDir) {
|
|
628
|
+
urls.push(...collectUrls(config.contentDir, config));
|
|
629
|
+
}
|
|
630
|
+
const lines = [
|
|
631
|
+
'<?xml version="1.0" encoding="UTF-8"?>',
|
|
632
|
+
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
|
|
633
|
+
];
|
|
634
|
+
urls.push(config.url);
|
|
635
|
+
const uniqueUrls = [...new Set(urls)].sort();
|
|
636
|
+
for (const url of uniqueUrls) {
|
|
637
|
+
lines.push(" <url>");
|
|
638
|
+
lines.push(` <loc>${escapeXml(url)}</loc>`);
|
|
639
|
+
lines.push(` <lastmod>${(/* @__PURE__ */ new Date()).toISOString().split("T")[0]}</lastmod>`);
|
|
640
|
+
lines.push(" <changefreq>weekly</changefreq>");
|
|
641
|
+
lines.push(" <priority>0.8</priority>");
|
|
642
|
+
lines.push(" </url>");
|
|
643
|
+
}
|
|
644
|
+
lines.push("</urlset>");
|
|
645
|
+
return lines.join("\n");
|
|
646
|
+
}
|
|
647
|
+
function extractKeywords(content) {
|
|
648
|
+
const words = content.toLowerCase().replace(/[^a-z0-9\s]/g, " ").split(/\s+/).filter((word) => word.length > 3);
|
|
649
|
+
const wordCount = {};
|
|
650
|
+
for (const word of words) {
|
|
651
|
+
wordCount[word] = (wordCount[word] || 0) + 1;
|
|
652
|
+
}
|
|
653
|
+
return Object.entries(wordCount).sort((a, b) => b[1] - a[1]).slice(0, 10).map(([word]) => word);
|
|
654
|
+
}
|
|
655
|
+
function chunkContent(content, maxLength = 2e3) {
|
|
656
|
+
const chunks = [];
|
|
657
|
+
const paragraphs = content.split("\n\n");
|
|
658
|
+
let currentChunk = "";
|
|
659
|
+
for (const paragraph of paragraphs) {
|
|
660
|
+
if (currentChunk.length + paragraph.length > maxLength && currentChunk.length > 0) {
|
|
661
|
+
chunks.push(currentChunk.trim());
|
|
662
|
+
currentChunk = "";
|
|
663
|
+
}
|
|
664
|
+
currentChunk += paragraph + "\n\n";
|
|
665
|
+
}
|
|
666
|
+
if (currentChunk.trim()) {
|
|
667
|
+
chunks.push(currentChunk.trim());
|
|
668
|
+
}
|
|
669
|
+
return chunks;
|
|
670
|
+
}
|
|
671
|
+
function collectAIIndexEntries(dir, config, base = dir) {
|
|
672
|
+
const entries = [];
|
|
673
|
+
try {
|
|
674
|
+
const files = fs.readdirSync(dir);
|
|
675
|
+
for (const file of files) {
|
|
676
|
+
const fullPath = path.join(dir, file);
|
|
677
|
+
const stat = fs.statSync(fullPath);
|
|
678
|
+
if (stat.isDirectory() && !file.startsWith(".") && file !== "node_modules") {
|
|
679
|
+
entries.push(...collectAIIndexEntries(fullPath, config, base));
|
|
680
|
+
} else if (stat.isFile() && (path.extname(file) === ".md" || path.extname(file) === ".mdx")) {
|
|
681
|
+
const content = fs.readFileSync(fullPath, "utf-8");
|
|
682
|
+
const { frontmatter, content: mainContent } = parseFrontmatter(content);
|
|
683
|
+
const relativePath = path.relative(base, fullPath);
|
|
684
|
+
const urlPath = relativePath.replace(/\.mdx?$/, "");
|
|
685
|
+
const url = `${config.url}/${urlPath}`;
|
|
686
|
+
const chunks = chunkContent(mainContent);
|
|
687
|
+
const title = frontmatter.title || extractTitle(mainContent);
|
|
688
|
+
const keywords = extractKeywords(mainContent);
|
|
689
|
+
chunks.forEach((chunk, index) => {
|
|
690
|
+
const id = crypto.createHash("sha256").update(`${url}-${index}`).digest("hex").slice(0, 16);
|
|
691
|
+
entries.push({
|
|
692
|
+
id,
|
|
693
|
+
url,
|
|
694
|
+
title: chunks.length > 1 ? `${title} (Part ${index + 1})` : title,
|
|
695
|
+
content: chunk,
|
|
696
|
+
description: frontmatter.description,
|
|
697
|
+
keywords,
|
|
698
|
+
metadata: {
|
|
699
|
+
...frontmatter,
|
|
700
|
+
chunkIndex: index,
|
|
701
|
+
totalChunks: chunks.length,
|
|
702
|
+
sourcePath: relativePath
|
|
703
|
+
}
|
|
704
|
+
});
|
|
705
|
+
});
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
} catch (error) {
|
|
709
|
+
console.warn(`Warning: Could not read directory ${dir}:`, error);
|
|
710
|
+
}
|
|
711
|
+
return entries;
|
|
712
|
+
}
|
|
713
|
+
function generateAIIndex(config) {
|
|
714
|
+
const entries = [];
|
|
715
|
+
if (config.pages && config.pages.length > 0) {
|
|
716
|
+
for (const page of config.pages) {
|
|
717
|
+
const url = `${config.url}${page.pathname === "/" ? "" : page.pathname}`;
|
|
718
|
+
const title = page.title || page.pathname;
|
|
719
|
+
const content = page.content || "";
|
|
720
|
+
if (content) {
|
|
721
|
+
const chunks = chunkContent(content);
|
|
722
|
+
const keywords = extractKeywords(content);
|
|
723
|
+
chunks.forEach((chunk, index2) => {
|
|
724
|
+
const id = crypto.createHash("sha256").update(`${url}-${index2}`).digest("hex").slice(0, 16);
|
|
725
|
+
entries.push({
|
|
726
|
+
id,
|
|
727
|
+
url,
|
|
728
|
+
title: chunks.length > 1 ? `${title} (Part ${index2 + 1})` : title,
|
|
729
|
+
content: chunk,
|
|
730
|
+
description: page.description,
|
|
731
|
+
keywords,
|
|
732
|
+
metadata: {
|
|
733
|
+
chunkIndex: index2,
|
|
734
|
+
totalChunks: chunks.length,
|
|
735
|
+
sourcePath: page.pathname
|
|
736
|
+
}
|
|
737
|
+
});
|
|
738
|
+
});
|
|
739
|
+
} else {
|
|
740
|
+
const id = crypto.createHash("sha256").update(url).digest("hex").slice(0, 16);
|
|
741
|
+
entries.push({
|
|
742
|
+
id,
|
|
743
|
+
url,
|
|
744
|
+
title,
|
|
745
|
+
content: page.description || title,
|
|
746
|
+
description: page.description,
|
|
747
|
+
keywords: []
|
|
748
|
+
});
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
entries.push(...collectAIIndexEntries(config.contentDir, config));
|
|
753
|
+
const index = {
|
|
754
|
+
version: "1.0",
|
|
755
|
+
generated: (/* @__PURE__ */ new Date()).toISOString(),
|
|
756
|
+
site: {
|
|
757
|
+
title: config.title,
|
|
758
|
+
description: config.description,
|
|
759
|
+
url: config.url
|
|
760
|
+
},
|
|
761
|
+
entries: entries.sort((a, b) => a.id.localeCompare(b.id)),
|
|
762
|
+
metadata: {
|
|
763
|
+
totalEntries: entries.length,
|
|
764
|
+
generator: "aeo.js",
|
|
765
|
+
generatorUrl: "https://aeojs.org",
|
|
766
|
+
embedding: {
|
|
767
|
+
recommended: "text-embedding-ada-002",
|
|
768
|
+
dimensions: 1536
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
};
|
|
772
|
+
return JSON.stringify(index, null, 2);
|
|
773
|
+
}
|
|
774
|
+
async function generateAEOFiles(configOrRoot, maybeConfig) {
|
|
775
|
+
var _a;
|
|
776
|
+
let config;
|
|
777
|
+
if (typeof configOrRoot === "string") {
|
|
778
|
+
config = resolveConfig({ ...maybeConfig, outDir: configOrRoot });
|
|
779
|
+
} else if (configOrRoot && typeof configOrRoot === "object" && "generators" in configOrRoot && typeof ((_a = configOrRoot.generators) == null ? void 0 : _a.robotsTxt) === "boolean") {
|
|
780
|
+
config = configOrRoot;
|
|
781
|
+
} else {
|
|
782
|
+
config = resolveConfig(configOrRoot);
|
|
783
|
+
}
|
|
784
|
+
const outDir = config.outDir;
|
|
785
|
+
const files = [];
|
|
786
|
+
const errors = [];
|
|
787
|
+
if (!fs.existsSync(outDir)) {
|
|
788
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
789
|
+
}
|
|
790
|
+
if (config.generators.robotsTxt) {
|
|
791
|
+
try {
|
|
792
|
+
const content = generateRobotsTxt(config);
|
|
793
|
+
fs.writeFileSync(path.join(outDir, "robots.txt"), content, "utf-8");
|
|
794
|
+
files.push("robots.txt");
|
|
795
|
+
} catch (e) {
|
|
796
|
+
errors.push(`robots.txt: ${e.message}`);
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
if (config.generators.llmsTxt) {
|
|
800
|
+
try {
|
|
801
|
+
const content = generateLlmsTxt(config);
|
|
802
|
+
fs.writeFileSync(path.join(outDir, "llms.txt"), "\uFEFF" + content, "utf-8");
|
|
803
|
+
files.push("llms.txt");
|
|
804
|
+
} catch (e) {
|
|
805
|
+
errors.push(`llms.txt: ${e.message}`);
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
if (config.generators.llmsFullTxt) {
|
|
809
|
+
try {
|
|
810
|
+
const content = generateLlmsFullTxt(config);
|
|
811
|
+
fs.writeFileSync(path.join(outDir, "llms-full.txt"), "\uFEFF" + content, "utf-8");
|
|
812
|
+
files.push("llms-full.txt");
|
|
813
|
+
} catch (e) {
|
|
814
|
+
errors.push(`llms-full.txt: ${e.message}`);
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
if (config.generators.rawMarkdown) {
|
|
818
|
+
try {
|
|
819
|
+
const generated = generatePageMarkdownFiles(config);
|
|
820
|
+
for (const f of generated) {
|
|
821
|
+
files.push(f.destination);
|
|
822
|
+
}
|
|
823
|
+
} catch (e) {
|
|
824
|
+
errors.push(`page-markdown: ${e.message}`);
|
|
825
|
+
}
|
|
826
|
+
try {
|
|
827
|
+
const copied = copyMarkdownFiles(config);
|
|
828
|
+
for (const f of copied) {
|
|
829
|
+
files.push(f.destination);
|
|
830
|
+
}
|
|
831
|
+
} catch (e) {
|
|
832
|
+
errors.push(`raw-markdown: ${e.message}`);
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
if (config.generators.manifest) {
|
|
836
|
+
try {
|
|
837
|
+
const content = generateManifest(config);
|
|
838
|
+
fs.writeFileSync(path.join(outDir, "docs.json"), content, "utf-8");
|
|
839
|
+
files.push("docs.json");
|
|
840
|
+
} catch (e) {
|
|
841
|
+
errors.push(`docs.json: ${e.message}`);
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
if (config.generators.sitemap) {
|
|
845
|
+
try {
|
|
846
|
+
const content = generateSitemap(config);
|
|
847
|
+
fs.writeFileSync(path.join(outDir, "sitemap.xml"), content, "utf-8");
|
|
848
|
+
files.push("sitemap.xml");
|
|
849
|
+
} catch (e) {
|
|
850
|
+
errors.push(`sitemap.xml: ${e.message}`);
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
if (config.generators.aiIndex) {
|
|
854
|
+
try {
|
|
855
|
+
const content = generateAIIndex(config);
|
|
856
|
+
fs.writeFileSync(path.join(outDir, "ai-index.json"), content, "utf-8");
|
|
857
|
+
files.push("ai-index.json");
|
|
858
|
+
} catch (e) {
|
|
859
|
+
errors.push(`ai-index.json: ${e.message}`);
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
return { files, errors };
|
|
863
|
+
}
|
|
864
|
+
function scanBuiltPages(dir, baseUrl) {
|
|
865
|
+
const pages = [];
|
|
866
|
+
function walk(currentDir) {
|
|
867
|
+
var _a;
|
|
868
|
+
try {
|
|
869
|
+
const entries = fs.readdirSync(currentDir);
|
|
870
|
+
for (const entry of entries) {
|
|
871
|
+
const fullPath = path.join(currentDir, entry);
|
|
872
|
+
const stat = fs.statSync(fullPath);
|
|
873
|
+
if (stat.isDirectory() && !entry.startsWith(".") && entry !== "_astro") {
|
|
874
|
+
walk(fullPath);
|
|
875
|
+
} else if (entry === "index.html" || entry.endsWith(".html") && entry !== "404.html" && entry !== "500.html") {
|
|
876
|
+
try {
|
|
877
|
+
const html = fs.readFileSync(fullPath, "utf-8");
|
|
878
|
+
const titleMatch = html.match(/<title>([^<]*)<\/title>/i);
|
|
879
|
+
const descMatch = html.match(/<meta\s+name=["']description["']\s+content=["']([^"']*)["']/i);
|
|
880
|
+
const textContent = extractTextFromHtml(html);
|
|
881
|
+
let pathname;
|
|
882
|
+
const relative8 = fullPath.slice(dir.length);
|
|
883
|
+
if (entry === "index.html") {
|
|
884
|
+
pathname = "/" + relative8.replace(/\/?index\.html$/, "");
|
|
885
|
+
if (pathname !== "/") pathname = pathname.replace(/\/$/, "");
|
|
886
|
+
} else {
|
|
887
|
+
pathname = "/" + relative8.replace(/\.html$/, "");
|
|
888
|
+
}
|
|
889
|
+
pathname = pathname.replace(/\/+/g, "/") || "/";
|
|
890
|
+
const rawTitle = titleMatch ? titleMatch[1] : void 0;
|
|
891
|
+
const title = ((_a = rawTitle == null ? void 0 : rawTitle.split("|")[0]) == null ? void 0 : _a.trim()) || rawTitle;
|
|
892
|
+
pages.push({
|
|
893
|
+
pathname,
|
|
894
|
+
title,
|
|
895
|
+
description: descMatch ? descMatch[1] : void 0,
|
|
896
|
+
content: textContent
|
|
897
|
+
});
|
|
898
|
+
} catch {
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
}
|
|
902
|
+
} catch {
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
walk(dir);
|
|
906
|
+
return pages;
|
|
907
|
+
}
|
|
908
|
+
function scanDevPages(pagesDir) {
|
|
909
|
+
const pages = [];
|
|
910
|
+
function walk(currentDir, base) {
|
|
911
|
+
try {
|
|
912
|
+
const entries = fs.readdirSync(currentDir);
|
|
913
|
+
for (const entry of entries) {
|
|
914
|
+
const fullPath = path.join(currentDir, entry);
|
|
915
|
+
const stat = fs.statSync(fullPath);
|
|
916
|
+
if (stat.isDirectory() && !entry.startsWith(".") && !entry.startsWith("_")) {
|
|
917
|
+
walk(fullPath, base);
|
|
918
|
+
} else if (entry.endsWith(".astro") || entry.endsWith(".md") || entry.endsWith(".mdx")) {
|
|
919
|
+
if (entry.startsWith("404") || entry.startsWith("500") || entry.startsWith("[")) continue;
|
|
920
|
+
const relative8 = fullPath.slice(base.length);
|
|
921
|
+
let pathname = "/" + relative8.replace(/\.(astro|md|mdx)$/, "");
|
|
922
|
+
if (pathname.endsWith("/index")) pathname = pathname.slice(0, -6) || "/";
|
|
923
|
+
pathname = pathname.replace(/\/+/g, "/") || "/";
|
|
924
|
+
const name = entry.replace(/\.(astro|md|mdx)$/, "");
|
|
925
|
+
pages.push({
|
|
926
|
+
pathname,
|
|
927
|
+
title: name === "index" ? void 0 : name.charAt(0).toUpperCase() + name.slice(1)
|
|
928
|
+
});
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
} catch {
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
const resolvedPagesDir = path.join(process.cwd(), pagesDir);
|
|
935
|
+
if (fs.existsSync(resolvedPagesDir)) {
|
|
936
|
+
walk(resolvedPagesDir, resolvedPagesDir);
|
|
937
|
+
}
|
|
938
|
+
return pages;
|
|
939
|
+
}
|
|
940
|
+
function extractTextFromHtml(html) {
|
|
941
|
+
let text = html;
|
|
942
|
+
text = text.replace(/<script[\s\S]*?<\/script>/gi, "");
|
|
943
|
+
text = text.replace(/<style[\s\S]*?<\/style>/gi, "");
|
|
944
|
+
text = text.replace(/<svg[\s\S]*?<\/svg>/gi, "");
|
|
945
|
+
const mainMatch = text.match(/<main[^>]*>([\s\S]*)<\/main>/i);
|
|
946
|
+
if (mainMatch) {
|
|
947
|
+
text = mainMatch[1];
|
|
948
|
+
} else {
|
|
949
|
+
text = text.replace(/<nav[\s\S]*?<\/nav>/gi, "");
|
|
950
|
+
text = text.replace(/<header[\s\S]*?<\/header>/gi, "");
|
|
951
|
+
text = text.replace(/<footer[\s\S]*?<\/footer>/gi, "");
|
|
952
|
+
}
|
|
953
|
+
text = text.replace(/<a[^>]+href=["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi, (_, url, inner) => {
|
|
954
|
+
if (/<(?:h[1-6]|div|p|section)[^>]*>/i.test(inner)) {
|
|
955
|
+
const cleanInner = inner.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
|
|
956
|
+
return `
|
|
957
|
+
[${cleanInner.slice(0, 120).trim()}](${url})
|
|
958
|
+
`;
|
|
959
|
+
}
|
|
960
|
+
return `[${inner}](${url})`;
|
|
961
|
+
});
|
|
962
|
+
text = text.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, "\n\n## $1\n\n");
|
|
963
|
+
text = text.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, "\n\n## $1\n\n");
|
|
964
|
+
text = text.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, "\n\n### $1\n\n");
|
|
965
|
+
text = text.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, "\n\n#### $1\n\n");
|
|
966
|
+
text = text.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, "\n\n##### $1\n\n");
|
|
967
|
+
text = text.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, "\n\n###### $1\n\n");
|
|
968
|
+
text = text.replace(/<a[^>]+href=["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi, "[$2]($1)");
|
|
969
|
+
text = text.replace(/<(?:strong|b)[^>]*>([\s\S]*?)<\/(?:strong|b)>/gi, "**$1**");
|
|
970
|
+
text = text.replace(/<(?:em|i)[^>]*>([\s\S]*?)<\/(?:em|i)>/gi, "*$1*");
|
|
971
|
+
text = text.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, "\n- $1");
|
|
972
|
+
text = text.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, "\n\n> $1\n\n");
|
|
973
|
+
text = text.replace(/<hr[^>]*\/?>/gi, "\n\n---\n\n");
|
|
974
|
+
text = text.replace(/<br[^>]*\/?>/gi, "\n");
|
|
975
|
+
text = text.replace(/<\/p>/gi, "\n\n");
|
|
976
|
+
text = text.replace(/<p[^>]*>/gi, "");
|
|
977
|
+
text = text.replace(/<\/?(?:div|section|article|header|main|aside|figure|figcaption|table|thead|tbody|tr|td|th|ul|ol|dl|dt|dd)[^>]*>/gi, "\n");
|
|
978
|
+
text = text.replace(/<[^>]+>/g, "");
|
|
979
|
+
text = text.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'").replace(/ /g, " ").replace(/©/g, "(c)");
|
|
980
|
+
text = text.replace(/[\u{1F1E0}-\u{1FAFF}\u{2600}-\u{27BF}\u{FE00}-\u{FE0F}\u{200D}\u{20E3}]/gu, "");
|
|
981
|
+
text = text.split("\n").map((l) => l.replace(/\s+/g, " ").trim()).join("\n");
|
|
982
|
+
text = text.replace(/\n{3,}/g, "\n\n");
|
|
983
|
+
text = text.replace(/\[[\s\n]+/g, "[").replace(/[\s\n]+\]/g, "]");
|
|
984
|
+
text = text.replace(/(#{2,6})\s*\n+\s*/g, "$1 ");
|
|
985
|
+
text = text.replace(/^#{2,6}\s*$/gm, "");
|
|
986
|
+
text = text.replace(/\n{3,}/g, "\n\n");
|
|
987
|
+
return text.trim().slice(0, 8e3);
|
|
988
|
+
}
|
|
989
|
+
function htmlToMarkdown(html, pagePath, config) {
|
|
990
|
+
var _a, _b;
|
|
991
|
+
const titleMatch = html.match(/<title>([^<]*)<\/title>/i);
|
|
992
|
+
const descMatch = html.match(/<meta\s+name=["']description["']\s+content=["']([^"']*)["']/i);
|
|
993
|
+
const textContent = extractTextFromHtml(html);
|
|
994
|
+
const rawTitle = titleMatch ? (_b = (_a = titleMatch[1]) == null ? void 0 : _a.split("|")[0]) == null ? void 0 : _b.trim() : void 0;
|
|
995
|
+
const description = descMatch == null ? void 0 : descMatch[1];
|
|
996
|
+
const pageUrl = pagePath === "/" ? config.url : `${config.url.replace(/\/$/, "")}${pagePath}`;
|
|
997
|
+
const lines = [];
|
|
998
|
+
lines.push("---");
|
|
999
|
+
if (rawTitle) lines.push(`title: "${rawTitle}"`);
|
|
1000
|
+
if (description) lines.push(`description: "${description}"`);
|
|
1001
|
+
lines.push(`url: ${pageUrl}`);
|
|
1002
|
+
lines.push(`source: ${pageUrl}`);
|
|
1003
|
+
lines.push(`generated_by: aeo.js`);
|
|
1004
|
+
lines.push("---", "");
|
|
1005
|
+
if (rawTitle) lines.push(`# ${rawTitle}`, "");
|
|
1006
|
+
if (description) lines.push(`${description}`, "");
|
|
1007
|
+
if (textContent) lines.push(textContent);
|
|
1008
|
+
return lines.join("\n");
|
|
1009
|
+
}
|
|
1010
|
+
function aeoAstroIntegration(options = {}) {
|
|
1011
|
+
let resolvedConfig = resolveConfig(options);
|
|
1012
|
+
let astroConfig;
|
|
1013
|
+
return {
|
|
1014
|
+
name: "aeo-astro",
|
|
1015
|
+
hooks: {
|
|
1016
|
+
"astro:config:setup": ({ config, command, injectScript }) => {
|
|
1017
|
+
astroConfig = config;
|
|
1018
|
+
resolvedConfig = resolveConfig({
|
|
1019
|
+
...options,
|
|
1020
|
+
contentDir: options.contentDir || "src/content",
|
|
1021
|
+
outDir: options.outDir || (command === "build" ? config.outDir.pathname : config.publicDir.pathname)
|
|
1022
|
+
});
|
|
1023
|
+
if (command === "dev") {
|
|
1024
|
+
const publicPath = config.publicDir.pathname;
|
|
1025
|
+
if (!fs.existsSync(publicPath)) {
|
|
1026
|
+
fs.mkdirSync(publicPath, { recursive: true });
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
if (resolvedConfig.widget.enabled && injectScript) {
|
|
1030
|
+
const widgetOpts = JSON.stringify(resolvedConfig.widget);
|
|
1031
|
+
injectScript(
|
|
1032
|
+
"page",
|
|
1033
|
+
`import { AeoWidget } from 'aeo.js/widget';
|
|
1034
|
+
if (document.readyState === 'loading') {
|
|
1035
|
+
document.addEventListener('DOMContentLoaded', () => { new AeoWidget(${widgetOpts}); });
|
|
1036
|
+
} else {
|
|
1037
|
+
new AeoWidget(${widgetOpts});
|
|
1038
|
+
}`
|
|
1039
|
+
);
|
|
1040
|
+
}
|
|
1041
|
+
},
|
|
1042
|
+
"astro:build:done": async ({ dir, logger }) => {
|
|
1043
|
+
const buildLogger = logger.fork("aeo.js");
|
|
1044
|
+
buildLogger.info("Generating AEO files...");
|
|
1045
|
+
const outPath = dir instanceof URL ? dir.pathname : dir || astroConfig.outDir.pathname;
|
|
1046
|
+
options.url || astroConfig.site || "https://example.com";
|
|
1047
|
+
const discoveredPages = scanBuiltPages(outPath);
|
|
1048
|
+
buildLogger.info(`Discovered ${discoveredPages.length} pages from build output`);
|
|
1049
|
+
resolvedConfig = resolveConfig({
|
|
1050
|
+
...options,
|
|
1051
|
+
outDir: options.outDir || outPath,
|
|
1052
|
+
pages: [...options.pages || [], ...discoveredPages]
|
|
1053
|
+
});
|
|
1054
|
+
try {
|
|
1055
|
+
const result = await generateAEOFiles(resolvedConfig);
|
|
1056
|
+
if (result.files.length > 0) {
|
|
1057
|
+
buildLogger.info(`Generated ${result.files.length} files`);
|
|
1058
|
+
result.files.forEach((file) => {
|
|
1059
|
+
buildLogger.debug(` - ${file}`);
|
|
1060
|
+
});
|
|
1061
|
+
}
|
|
1062
|
+
if (result.errors.length > 0) {
|
|
1063
|
+
buildLogger.error("Errors during generation:");
|
|
1064
|
+
result.errors.forEach((error) => {
|
|
1065
|
+
buildLogger.error(` - ${error}`);
|
|
1066
|
+
});
|
|
1067
|
+
}
|
|
1068
|
+
} catch (error) {
|
|
1069
|
+
buildLogger.error(`Failed to generate AEO files: ${error}`);
|
|
1070
|
+
}
|
|
1071
|
+
},
|
|
1072
|
+
"astro:server:setup": async ({ server, logger }) => {
|
|
1073
|
+
const devLogger = logger.fork("aeo.js");
|
|
1074
|
+
devLogger.info("Generating AEO files for development...");
|
|
1075
|
+
const devPages = scanDevPages("src/pages");
|
|
1076
|
+
resolvedConfig = resolveConfig({
|
|
1077
|
+
...options,
|
|
1078
|
+
contentDir: options.contentDir || "src/content",
|
|
1079
|
+
outDir: resolvedConfig.outDir,
|
|
1080
|
+
pages: [...options.pages || [], ...devPages]
|
|
1081
|
+
});
|
|
1082
|
+
try {
|
|
1083
|
+
const result = await generateAEOFiles(resolvedConfig);
|
|
1084
|
+
if (result.files.length > 0) {
|
|
1085
|
+
devLogger.info(`Generated ${result.files.length} files`);
|
|
1086
|
+
}
|
|
1087
|
+
if (result.errors.length > 0) {
|
|
1088
|
+
devLogger.error("Errors during generation:", result.errors);
|
|
1089
|
+
}
|
|
1090
|
+
} catch (error) {
|
|
1091
|
+
devLogger.error(`Failed to generate AEO files: ${error}`);
|
|
1092
|
+
}
|
|
1093
|
+
const mdHandler = async (req, res, next) => {
|
|
1094
|
+
var _a, _b;
|
|
1095
|
+
if (!((_a = req.url) == null ? void 0 : _a.endsWith(".md"))) return next();
|
|
1096
|
+
if (req.headers["x-aeo-internal"]) return next();
|
|
1097
|
+
const filename = req.url.startsWith("/") ? req.url.slice(1) : req.url;
|
|
1098
|
+
if (resolvedConfig.contentDir) {
|
|
1099
|
+
const contentFile = path.join(process.cwd(), resolvedConfig.contentDir, filename);
|
|
1100
|
+
if (fs.existsSync(contentFile)) {
|
|
1101
|
+
res.setHeader("Content-Type", "text/markdown; charset=utf-8");
|
|
1102
|
+
res.end(fs.readFileSync(contentFile, "utf-8"));
|
|
1103
|
+
return;
|
|
1104
|
+
}
|
|
1105
|
+
}
|
|
1106
|
+
let pagePath = req.url.replace(/\.md$/, "") || "/";
|
|
1107
|
+
if (pagePath === "/index") pagePath = "/";
|
|
1108
|
+
try {
|
|
1109
|
+
const host = req.headers.host || "localhost:4321";
|
|
1110
|
+
const protocol = ((_b = req.connection) == null ? void 0 : _b.encrypted) ? "https" : "http";
|
|
1111
|
+
const response = await fetch(`${protocol}://${host}${pagePath}`, {
|
|
1112
|
+
headers: { "x-aeo-internal": "1" }
|
|
1113
|
+
});
|
|
1114
|
+
if (response.ok) {
|
|
1115
|
+
const html = await response.text();
|
|
1116
|
+
const md = htmlToMarkdown(html, pagePath, resolvedConfig);
|
|
1117
|
+
res.setHeader("Content-Type", "text/markdown; charset=utf-8");
|
|
1118
|
+
res.end(md);
|
|
1119
|
+
return;
|
|
1120
|
+
}
|
|
1121
|
+
} catch {
|
|
1122
|
+
}
|
|
1123
|
+
const filepath = path.join(resolvedConfig.outDir, filename);
|
|
1124
|
+
if (fs.existsSync(filepath)) {
|
|
1125
|
+
res.setHeader("Content-Type", "text/markdown; charset=utf-8");
|
|
1126
|
+
res.end(fs.readFileSync(filepath, "utf-8"));
|
|
1127
|
+
return;
|
|
1128
|
+
}
|
|
1129
|
+
next();
|
|
1130
|
+
};
|
|
1131
|
+
server.middlewares.stack.unshift({ route: "", handle: mdHandler });
|
|
1132
|
+
if (resolvedConfig.contentDir) {
|
|
1133
|
+
const contentPath = path.join(process.cwd(), resolvedConfig.contentDir);
|
|
1134
|
+
server.watcher.add(path.join(contentPath, "**/*.md"));
|
|
1135
|
+
server.watcher.add(path.join(contentPath, "**/*.mdx"));
|
|
1136
|
+
server.watcher.on("change", async (file) => {
|
|
1137
|
+
if (file.endsWith(".md") || file.endsWith(".mdx")) {
|
|
1138
|
+
devLogger.info("Content file changed, regenerating AEO files...");
|
|
1139
|
+
try {
|
|
1140
|
+
const result = await generateAEOFiles(resolvedConfig);
|
|
1141
|
+
if (result.files.length > 0) {
|
|
1142
|
+
devLogger.info(`Regenerated ${result.files.length} files`);
|
|
1143
|
+
}
|
|
1144
|
+
if (result.errors.length > 0) {
|
|
1145
|
+
devLogger.error("Errors during regeneration:", result.errors);
|
|
1146
|
+
}
|
|
1147
|
+
} catch (error) {
|
|
1148
|
+
devLogger.error(`Failed to regenerate AEO files: ${error}`);
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
});
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1154
|
+
}
|
|
1155
|
+
};
|
|
1156
|
+
}
|
|
1157
|
+
var AeoMetaTags = ({ config }) => {
|
|
1158
|
+
const resolvedConfig = resolveConfig(config);
|
|
1159
|
+
return `
|
|
1160
|
+
<link rel="alternate" type="text/plain" href="/llms.txt" title="LLM Summary" />
|
|
1161
|
+
<link rel="alternate" type="text/plain" href="/llms-full.txt" title="Full Content for LLMs" />
|
|
1162
|
+
<link rel="alternate" type="application/json" href="/docs.json" title="Documentation Manifest" />
|
|
1163
|
+
<link rel="alternate" type="application/json" href="/ai-index.json" title="AI-Optimized Index" />
|
|
1164
|
+
<meta name="aeo:title" content="${resolvedConfig.title}" />
|
|
1165
|
+
<meta name="aeo:description" content="${resolvedConfig.description}" />
|
|
1166
|
+
<meta name="aeo:url" content="${resolvedConfig.url}" />
|
|
1167
|
+
`;
|
|
1168
|
+
};
|
|
1169
|
+
function defineAeoConfig(config) {
|
|
1170
|
+
return config;
|
|
1171
|
+
}
|
|
1172
|
+
var astro_default = aeoAstroIntegration;
|
|
1173
|
+
|
|
1174
|
+
exports.AeoMetaTags = AeoMetaTags;
|
|
1175
|
+
exports.aeoAstroIntegration = aeoAstroIntegration;
|
|
1176
|
+
exports.default = astro_default;
|
|
1177
|
+
exports.defineAeoConfig = defineAeoConfig;
|
|
1178
|
+
//# sourceMappingURL=astro.js.map
|
|
1179
|
+
//# sourceMappingURL=astro.js.map
|