aeo.js 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/next.mjs ADDED
@@ -0,0 +1,1106 @@
1
+ import { existsSync, mkdirSync, writeFileSync, readdirSync, statSync, readFileSync, copyFileSync } from 'fs';
2
+ import { join, dirname, extname, relative } from 'path';
3
+ import 'minimatch';
4
+ import { createHash } from 'crypto';
5
+
6
+ // src/core/robots.ts
7
+ var AI_CRAWLERS = [
8
+ "GPTBot",
9
+ "OAI-SearchBot",
10
+ "ChatGPT-User",
11
+ "ClaudeBot",
12
+ "Claude-Web",
13
+ "anthropic-ai",
14
+ "PerplexityBot",
15
+ "Google-Extended",
16
+ "Gemini-Deep-Research",
17
+ "Bingbot",
18
+ "FacebookBot",
19
+ "meta-externalagent",
20
+ "Amazonbot",
21
+ "Applebot",
22
+ "DeepSeekBot",
23
+ "Bytespider",
24
+ "cohere-ai",
25
+ "CCBot",
26
+ "DiffBot",
27
+ "YouBot",
28
+ "FirecrawlAgent",
29
+ "Crawl4AI",
30
+ "BraveBot",
31
+ "SemrushBot",
32
+ "AhrefsBot",
33
+ "MJ12bot",
34
+ "DotBot",
35
+ "DataForSeoBot",
36
+ "Screaming Frog SEO Spider",
37
+ "SEOkicks",
38
+ "SEMrushBot",
39
+ "BLEXBot",
40
+ "Yandex",
41
+ "Baiduspider",
42
+ "Sogou",
43
+ "Exabot",
44
+ "facebookexternalhit",
45
+ "LinkedInBot",
46
+ "WhatsApp",
47
+ "Slackbot",
48
+ "TwitterBot",
49
+ "TelegramBot",
50
+ "Discordbot",
51
+ "PinterestBot",
52
+ "TumblrBot",
53
+ "ViberBot",
54
+ "SkypeUriPreview",
55
+ "redditbot",
56
+ "Snapchat",
57
+ "TikTok"
58
+ ];
59
+ function generateRobotsTxt(config) {
60
+ const lines = [
61
+ "# robots.txt generated by aeo.js",
62
+ "# Allow AI crawlers to index this site",
63
+ "",
64
+ "# Traditional search engines",
65
+ "User-agent: Googlebot",
66
+ "Allow: /",
67
+ "",
68
+ "User-agent: Bingbot",
69
+ "Allow: /",
70
+ "",
71
+ "# AI crawlers and answer engines"
72
+ ];
73
+ for (const crawler of AI_CRAWLERS) {
74
+ lines.push(`User-agent: ${crawler}`);
75
+ lines.push("Allow: /");
76
+ lines.push("");
77
+ }
78
+ lines.push("# Default for all other bots");
79
+ lines.push("User-agent: *");
80
+ lines.push("Allow: /");
81
+ lines.push("");
82
+ if (config.url) {
83
+ lines.push(`Sitemap: ${config.url}/sitemap.xml`);
84
+ }
85
+ lines.push("");
86
+ lines.push("# AEO (Answer Engine Optimization) files");
87
+ lines.push("# These help LLMs understand your content better");
88
+ lines.push(`# ${config.url}/llms.txt`);
89
+ lines.push(`# ${config.url}/llms-full.txt`);
90
+ lines.push(`# ${config.url}/docs.json`);
91
+ lines.push(`# ${config.url}/ai-index.json`);
92
+ return lines.join("\n");
93
+ }
94
+
95
+ // src/core/detect.ts
96
+ function detectFramework(projectRoot = process.cwd()) {
97
+ const packageJson = readPackageJson(projectRoot);
98
+ const dependencies = {
99
+ ...packageJson.dependencies,
100
+ ...packageJson.devDependencies
101
+ };
102
+ if (dependencies["next"]) {
103
+ return {
104
+ framework: "next",
105
+ contentDir: "app",
106
+ outDir: "out"
107
+ };
108
+ }
109
+ if (dependencies["nuxt"] || dependencies["@nuxt/kit"]) {
110
+ return {
111
+ framework: "nuxt",
112
+ contentDir: "content",
113
+ outDir: ".output/public"
114
+ };
115
+ }
116
+ if (dependencies["astro"] || dependencies["@astrojs/astro"]) {
117
+ return {
118
+ framework: "astro",
119
+ contentDir: "src/content",
120
+ outDir: "dist"
121
+ };
122
+ }
123
+ if (dependencies["@remix-run/dev"]) {
124
+ return {
125
+ framework: "remix",
126
+ contentDir: "app",
127
+ outDir: "build/client"
128
+ };
129
+ }
130
+ if (dependencies["@sveltejs/kit"]) {
131
+ return {
132
+ framework: "sveltekit",
133
+ contentDir: "src",
134
+ outDir: "build"
135
+ };
136
+ }
137
+ if (dependencies["@angular/core"]) {
138
+ return {
139
+ framework: "angular",
140
+ contentDir: "src",
141
+ outDir: "dist"
142
+ };
143
+ }
144
+ if (dependencies["@docusaurus/core"]) {
145
+ return {
146
+ framework: "docusaurus",
147
+ contentDir: "docs",
148
+ outDir: "build"
149
+ };
150
+ }
151
+ if (dependencies["vite"]) {
152
+ return {
153
+ framework: "vite",
154
+ contentDir: "src",
155
+ outDir: "dist"
156
+ };
157
+ }
158
+ return {
159
+ framework: "unknown",
160
+ contentDir: "src",
161
+ outDir: "dist"
162
+ };
163
+ }
164
+ function resolveConfig(config = {}) {
165
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x;
166
+ const frameworkInfo = detectFramework();
167
+ return {
168
+ title: config.title || "My Site",
169
+ description: config.description || "",
170
+ url: config.url || "https://example.com",
171
+ contentDir: config.contentDir || frameworkInfo.contentDir,
172
+ outDir: config.outDir || frameworkInfo.outDir,
173
+ pages: config.pages || [],
174
+ generators: {
175
+ robotsTxt: ((_a = config.generators) == null ? void 0 : _a.robotsTxt) !== false,
176
+ llmsTxt: ((_b = config.generators) == null ? void 0 : _b.llmsTxt) !== false,
177
+ llmsFullTxt: ((_c = config.generators) == null ? void 0 : _c.llmsFullTxt) !== false,
178
+ rawMarkdown: ((_d = config.generators) == null ? void 0 : _d.rawMarkdown) !== false,
179
+ manifest: ((_e = config.generators) == null ? void 0 : _e.manifest) !== false,
180
+ sitemap: ((_f = config.generators) == null ? void 0 : _f.sitemap) !== false,
181
+ aiIndex: ((_g = config.generators) == null ? void 0 : _g.aiIndex) !== false
182
+ },
183
+ robots: {
184
+ allow: ((_h = config.robots) == null ? void 0 : _h.allow) || ["/"],
185
+ disallow: ((_i = config.robots) == null ? void 0 : _i.disallow) || [],
186
+ crawlDelay: ((_j = config.robots) == null ? void 0 : _j.crawlDelay) || 0,
187
+ sitemap: ((_k = config.robots) == null ? void 0 : _k.sitemap) || ""
188
+ },
189
+ widget: {
190
+ enabled: ((_l = config.widget) == null ? void 0 : _l.enabled) !== false,
191
+ position: ((_m = config.widget) == null ? void 0 : _m.position) || "bottom-right",
192
+ theme: {
193
+ background: ((_o = (_n = config.widget) == null ? void 0 : _n.theme) == null ? void 0 : _o.background) || "rgba(18, 18, 24, 0.9)",
194
+ text: ((_q = (_p = config.widget) == null ? void 0 : _p.theme) == null ? void 0 : _q.text) || "#C0C0C5",
195
+ accent: ((_s = (_r = config.widget) == null ? void 0 : _r.theme) == null ? void 0 : _s.accent) || "#E8E8EA",
196
+ badge: ((_u = (_t = config.widget) == null ? void 0 : _t.theme) == null ? void 0 : _u.badge) || "#4ADE80"
197
+ },
198
+ humanLabel: ((_v = config.widget) == null ? void 0 : _v.humanLabel) || "Human",
199
+ aiLabel: ((_w = config.widget) == null ? void 0 : _w.aiLabel) || "AI",
200
+ showBadge: ((_x = config.widget) == null ? void 0 : _x.showBadge) !== false
201
+ }
202
+ };
203
+ }
204
+ function parseFrontmatter(content) {
205
+ const frontmatterMatch = content.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)/);
206
+ if (frontmatterMatch) {
207
+ const frontmatterStr = frontmatterMatch[1];
208
+ const contentWithoutFrontmatter = frontmatterMatch[2];
209
+ const frontmatter = {};
210
+ const lines = frontmatterStr.split("\n");
211
+ for (const line of lines) {
212
+ const [key, ...valueParts] = line.split(":");
213
+ if (key && valueParts.length > 0) {
214
+ const value = valueParts.join(":").trim();
215
+ frontmatter[key.trim()] = value.replace(/^["']|["']$/g, "");
216
+ }
217
+ }
218
+ return { frontmatter, content: contentWithoutFrontmatter };
219
+ }
220
+ return { frontmatter: {}, content };
221
+ }
222
+ function bumpHeadings(content, levels = 1) {
223
+ return content.replace(/^(#{1,6})\s/gm, (match, hashes) => {
224
+ const newLevel = Math.min(hashes.length + levels, 6);
225
+ return "#".repeat(newLevel) + " ";
226
+ });
227
+ }
228
+ function extractTitle(content) {
229
+ const h1Match = content.match(/^#\s+(.+)$/m);
230
+ if (h1Match) return h1Match[1];
231
+ const h2Match = content.match(/^##\s+(.+)$/m);
232
+ if (h2Match) return h2Match[1];
233
+ const firstLine = content.split("\n")[0];
234
+ return firstLine.slice(0, 100);
235
+ }
236
+ function readPackageJson(projectRoot = process.cwd()) {
237
+ const packageJsonPath = join(projectRoot, "package.json");
238
+ if (!existsSync(packageJsonPath)) {
239
+ return {};
240
+ }
241
+ try {
242
+ const content = readFileSync(packageJsonPath, "utf-8");
243
+ return JSON.parse(content);
244
+ } catch {
245
+ return {};
246
+ }
247
+ }
248
+
249
+ // src/core/llms-txt.ts
250
+ function collectMarkdownFiles(dir, base = dir) {
251
+ const files = [];
252
+ try {
253
+ const entries = readdirSync(dir);
254
+ for (const entry of entries) {
255
+ const fullPath = join(dir, entry);
256
+ const stat = statSync(fullPath);
257
+ if (stat.isDirectory() && !entry.startsWith(".") && entry !== "node_modules") {
258
+ files.push(...collectMarkdownFiles(fullPath, base));
259
+ } else if (stat.isFile() && (extname(entry) === ".md" || extname(entry) === ".mdx")) {
260
+ const content = readFileSync(fullPath, "utf-8");
261
+ const { frontmatter, content: mainContent } = parseFrontmatter(content);
262
+ const relativePath = relative(base, fullPath);
263
+ files.push({
264
+ path: relativePath,
265
+ content: mainContent,
266
+ title: frontmatter.title || extractTitle(mainContent),
267
+ description: frontmatter.description,
268
+ frontmatter
269
+ });
270
+ }
271
+ }
272
+ } catch (error) {
273
+ console.warn(`Warning: Could not read directory ${dir}:`, error);
274
+ }
275
+ return files;
276
+ }
277
+ function generateLlmsTxt(config) {
278
+ const lines = [
279
+ `# ${config.title}`,
280
+ ""
281
+ ];
282
+ if (config.description) {
283
+ lines.push(`> ${config.description}`);
284
+ lines.push("");
285
+ }
286
+ lines.push("## About");
287
+ lines.push("");
288
+ lines.push("This file provides a structured overview of the documentation and content available on this site,");
289
+ lines.push("optimized for consumption by Large Language Models (LLMs) and AI assistants.");
290
+ lines.push("");
291
+ if (config.pages && config.pages.length > 0) {
292
+ lines.push("## Pages");
293
+ lines.push("");
294
+ for (const page of config.pages) {
295
+ const url = `${config.url}${page.pathname === "/" ? "" : page.pathname}`;
296
+ const title = page.title || page.pathname;
297
+ lines.push(`- [${title}](${url})`);
298
+ if (page.description) {
299
+ lines.push(` ${page.description}`);
300
+ }
301
+ }
302
+ lines.push("");
303
+ }
304
+ const markdownFiles = collectMarkdownFiles(config.contentDir);
305
+ if (markdownFiles.length > 0) {
306
+ lines.push("## Documentation");
307
+ lines.push("");
308
+ const grouped = {};
309
+ for (const file of markdownFiles) {
310
+ const dir = file.path.split("/")[0] || "root";
311
+ if (!grouped[dir]) grouped[dir] = [];
312
+ grouped[dir].push(file);
313
+ }
314
+ for (const [dir, files] of Object.entries(grouped)) {
315
+ lines.push(`### ${dir === "root" ? "Main Documentation" : dir}`);
316
+ lines.push("");
317
+ for (const file of files) {
318
+ const url = `${config.url}/${file.path.replace(/\.mdx?$/, "")}`;
319
+ lines.push(`- [${file.title}](${url})`);
320
+ if (file.description) {
321
+ lines.push(` ${file.description}`);
322
+ }
323
+ }
324
+ lines.push("");
325
+ }
326
+ }
327
+ lines.push("## Quick Links");
328
+ lines.push("");
329
+ lines.push(`- Full Documentation: ${config.url}/llms-full.txt`);
330
+ lines.push(`- Documentation Manifest: ${config.url}/docs.json`);
331
+ lines.push(`- AI-Optimized Index: ${config.url}/ai-index.json`);
332
+ lines.push(`- Sitemap: ${config.url}/sitemap.xml`);
333
+ lines.push("");
334
+ lines.push("## For LLMs");
335
+ lines.push("");
336
+ lines.push("To get the complete documentation in a single file, request:");
337
+ lines.push(`${config.url}/llms-full.txt`);
338
+ lines.push("");
339
+ lines.push("For structured access to individual pages with metadata:");
340
+ lines.push(`${config.url}/docs.json`);
341
+ lines.push("");
342
+ lines.push("For RAG (Retrieval Augmented Generation) systems:");
343
+ lines.push(`${config.url}/ai-index.json`);
344
+ lines.push("");
345
+ lines.push("---");
346
+ lines.push("Generated by aeo.js - Answer Engine Optimization for the modern web");
347
+ lines.push("Learn more at https://aeojs.org");
348
+ return lines.join("\n");
349
+ }
350
+ function collectAndConcatenateMarkdown(dir, base = dir) {
351
+ const sections = [];
352
+ try {
353
+ const entries = readdirSync(dir).sort();
354
+ for (const entry of entries) {
355
+ const fullPath = join(dir, entry);
356
+ const stat = statSync(fullPath);
357
+ if (stat.isDirectory() && !entry.startsWith(".") && entry !== "node_modules") {
358
+ const subSections = collectAndConcatenateMarkdown(fullPath, base);
359
+ if (subSections.length > 0) {
360
+ sections.push(...subSections);
361
+ }
362
+ } else if (stat.isFile() && (extname(entry) === ".md" || extname(entry) === ".mdx")) {
363
+ const content = readFileSync(fullPath, "utf-8");
364
+ const { frontmatter, content: mainContent } = parseFrontmatter(content);
365
+ const relativePath = relative(base, fullPath);
366
+ const sectionLines = [
367
+ "---",
368
+ "",
369
+ `# ${frontmatter.title || relativePath}`,
370
+ "",
371
+ `Source: ${relativePath}`,
372
+ ""
373
+ ];
374
+ if (frontmatter.description) {
375
+ sectionLines.push(`> ${frontmatter.description}`);
376
+ sectionLines.push("");
377
+ }
378
+ const bumpedContent = bumpHeadings(mainContent, 1);
379
+ sectionLines.push(bumpedContent);
380
+ sectionLines.push("");
381
+ sections.push(sectionLines.join("\n"));
382
+ }
383
+ }
384
+ } catch (error) {
385
+ console.warn(`Warning: Could not read directory ${dir}:`, error);
386
+ }
387
+ return sections;
388
+ }
389
+ function generateLlmsFullTxt(config) {
390
+ const lines = [
391
+ `# ${config.title} - Complete Documentation`,
392
+ "",
393
+ `This file contains all documentation concatenated into a single file for easy consumption by LLMs.`,
394
+ ""
395
+ ];
396
+ if (config.description) {
397
+ lines.push(`> ${config.description}`);
398
+ lines.push("");
399
+ }
400
+ lines.push("## Table of Contents");
401
+ lines.push("");
402
+ lines.push("This document includes all content from this project.");
403
+ lines.push("Each section is separated by a horizontal rule (---) for easy parsing.");
404
+ lines.push("");
405
+ let hasContent = false;
406
+ if (config.pages && config.pages.length > 0) {
407
+ for (const page of config.pages) {
408
+ const url = `${config.url}${page.pathname === "/" ? "" : page.pathname}`;
409
+ const title = page.title || page.pathname;
410
+ const sectionLines = [
411
+ "---",
412
+ "",
413
+ `# ${title}`,
414
+ "",
415
+ `URL: ${url}`,
416
+ ""
417
+ ];
418
+ if (page.description) {
419
+ sectionLines.push(`> ${page.description}`);
420
+ sectionLines.push("");
421
+ }
422
+ if (page.content) {
423
+ sectionLines.push(page.content);
424
+ sectionLines.push("");
425
+ }
426
+ lines.push(sectionLines.join("\n"));
427
+ hasContent = true;
428
+ }
429
+ }
430
+ const sections = collectAndConcatenateMarkdown(config.contentDir);
431
+ if (sections.length > 0) {
432
+ lines.push(...sections);
433
+ hasContent = true;
434
+ }
435
+ if (!hasContent) {
436
+ lines.push("---");
437
+ lines.push("");
438
+ lines.push(`# ${config.title}`);
439
+ lines.push("");
440
+ lines.push(`URL: ${config.url}`);
441
+ lines.push("");
442
+ if (config.description) {
443
+ lines.push(config.description);
444
+ lines.push("");
445
+ }
446
+ }
447
+ lines.push("---");
448
+ lines.push("");
449
+ lines.push("## About This Document");
450
+ lines.push("");
451
+ lines.push("This concatenated documentation file is generated automatically by aeo.js");
452
+ lines.push("to make it easier for AI systems to understand the complete context of this project.");
453
+ lines.push("");
454
+ lines.push(`For a structured index, see: ${config.url}/llms.txt`);
455
+ lines.push(`For individual files, see: ${config.url}/docs.json`);
456
+ lines.push("");
457
+ lines.push("Generated by aeo.js - https://aeojs.org");
458
+ return lines.join("\n");
459
+ }
460
+ function ensureDir(path) {
461
+ mkdirSync(path, { recursive: true });
462
+ }
463
+ function copyMarkdownFiles(config) {
464
+ const copiedFiles = [];
465
+ function copyRecursive(dir, base = config.contentDir) {
466
+ try {
467
+ const entries = readdirSync(dir);
468
+ for (const entry of entries) {
469
+ const fullPath = join(dir, entry);
470
+ const stat = statSync(fullPath);
471
+ if (stat.isDirectory() && !entry.startsWith(".") && entry !== "node_modules") {
472
+ copyRecursive(fullPath, base);
473
+ } else if (stat.isFile() && extname(entry) === ".md") {
474
+ const relativePath = relative(base, fullPath);
475
+ const destPath = join(config.outDir, relativePath);
476
+ ensureDir(dirname(destPath));
477
+ try {
478
+ copyFileSync(fullPath, destPath);
479
+ copiedFiles.push({
480
+ source: fullPath,
481
+ destination: destPath
482
+ });
483
+ } catch (error) {
484
+ console.warn(`Warning: Could not copy ${fullPath}:`, error);
485
+ }
486
+ }
487
+ }
488
+ } catch (error) {
489
+ console.warn(`Warning: Could not read directory ${dir}:`, error);
490
+ }
491
+ }
492
+ copyRecursive(config.contentDir);
493
+ return copiedFiles;
494
+ }
495
+ function generatePageMarkdownFiles(config) {
496
+ const generated = [];
497
+ const pages = config.pages || [];
498
+ for (const page of pages) {
499
+ const pageTitle = page.title || (page.pathname === "/" ? config.title : void 0);
500
+ if (!page.content && !pageTitle) continue;
501
+ let filename;
502
+ if (page.pathname === "/") {
503
+ filename = "index.md";
504
+ } else {
505
+ const clean = page.pathname.replace(/^\//, "").replace(/\/$/, "");
506
+ filename = clean.includes("/") ? `${clean}.md` : `${clean}.md`;
507
+ }
508
+ const destPath = join(config.outDir, filename);
509
+ const pageUrl = page.pathname === "/" ? config.url : `${config.url.replace(/\/$/, "")}${page.pathname}`;
510
+ const lines = [];
511
+ lines.push("---");
512
+ if (pageTitle) lines.push(`title: "${pageTitle}"`);
513
+ if (page.description) lines.push(`description: "${page.description}"`);
514
+ lines.push(`url: ${pageUrl}`);
515
+ lines.push(`source: ${pageUrl}`);
516
+ lines.push(`generated_by: aeo.js`);
517
+ lines.push("---", "");
518
+ if (pageTitle) {
519
+ lines.push(`# ${pageTitle}`, "");
520
+ }
521
+ if (page.description) {
522
+ lines.push(`${page.description}`, "");
523
+ }
524
+ if (page.content) {
525
+ lines.push(page.content);
526
+ }
527
+ const content = lines.join("\n");
528
+ ensureDir(dirname(destPath));
529
+ try {
530
+ writeFileSync(destPath, content, "utf-8");
531
+ generated.push({ pathname: page.pathname, destination: destPath });
532
+ } catch {
533
+ }
534
+ }
535
+ return generated;
536
+ }
537
+ function collectManifestEntries(dir, config, base = dir) {
538
+ const entries = [];
539
+ try {
540
+ const files = readdirSync(dir);
541
+ for (const file of files) {
542
+ const fullPath = join(dir, file);
543
+ const stat = statSync(fullPath);
544
+ if (stat.isDirectory() && !file.startsWith(".") && file !== "node_modules") {
545
+ entries.push(...collectManifestEntries(fullPath, config, base));
546
+ } else if (stat.isFile() && (extname(file) === ".md" || extname(file) === ".mdx")) {
547
+ const content = readFileSync(fullPath, "utf-8");
548
+ const { frontmatter, content: mainContent } = parseFrontmatter(content);
549
+ const relativePath = relative(base, fullPath);
550
+ const urlPath = relativePath.replace(/\.mdx?$/, "");
551
+ entries.push({
552
+ url: `${config.url}/${urlPath}`,
553
+ title: frontmatter.title || extractTitle(mainContent),
554
+ description: frontmatter.description,
555
+ lastModified: stat.mtime.toISOString()
556
+ });
557
+ }
558
+ }
559
+ } catch (error) {
560
+ console.warn(`Warning: Could not read directory ${dir}:`, error);
561
+ }
562
+ return entries;
563
+ }
564
+ function generateManifest(config) {
565
+ const entries = [];
566
+ if (config.pages && config.pages.length > 0) {
567
+ for (const page of config.pages) {
568
+ entries.push({
569
+ url: `${config.url}${page.pathname === "/" ? "" : page.pathname}`,
570
+ title: page.title || page.pathname,
571
+ description: page.description
572
+ });
573
+ }
574
+ }
575
+ entries.push(...collectManifestEntries(config.contentDir, config));
576
+ const manifest = {
577
+ version: "1.0",
578
+ generated: (/* @__PURE__ */ new Date()).toISOString(),
579
+ site: {
580
+ title: config.title,
581
+ description: config.description,
582
+ url: config.url
583
+ },
584
+ documents: entries.sort((a, b) => a.url.localeCompare(b.url)),
585
+ metadata: {
586
+ totalDocuments: entries.length,
587
+ generator: "aeo.js",
588
+ generatorUrl: "https://aeojs.org"
589
+ }
590
+ };
591
+ return JSON.stringify(manifest, null, 2);
592
+ }
593
+ function collectUrls(dir, config, base = dir) {
594
+ const urls = [];
595
+ try {
596
+ const entries = readdirSync(dir);
597
+ for (const entry of entries) {
598
+ const fullPath = join(dir, entry);
599
+ const stat = statSync(fullPath);
600
+ if (stat.isDirectory() && !entry.startsWith(".") && entry !== "node_modules") {
601
+ urls.push(...collectUrls(fullPath, config, base));
602
+ } else if (stat.isFile() && (extname(entry) === ".md" || extname(entry) === ".mdx" || extname(entry) === ".html")) {
603
+ const relativePath = relative(base, fullPath);
604
+ const urlPath = relativePath.replace(/\.(md|mdx|html)$/, "");
605
+ urls.push(`${config.url}/${urlPath}`);
606
+ }
607
+ }
608
+ } catch (error) {
609
+ console.warn(`Warning: Could not read directory ${dir}:`, error);
610
+ }
611
+ return urls;
612
+ }
613
+ function escapeXml(str) {
614
+ return str.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
615
+ }
616
+ function generateSitemap(config) {
617
+ const urls = [];
618
+ if (config.pages && config.pages.length > 0) {
619
+ for (const page of config.pages) {
620
+ urls.push(`${config.url}${page.pathname === "/" ? "" : page.pathname}`);
621
+ }
622
+ }
623
+ if (config.contentDir) {
624
+ urls.push(...collectUrls(config.contentDir, config));
625
+ }
626
+ const lines = [
627
+ '<?xml version="1.0" encoding="UTF-8"?>',
628
+ '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
629
+ ];
630
+ urls.push(config.url);
631
+ const uniqueUrls = [...new Set(urls)].sort();
632
+ for (const url of uniqueUrls) {
633
+ lines.push(" <url>");
634
+ lines.push(` <loc>${escapeXml(url)}</loc>`);
635
+ lines.push(` <lastmod>${(/* @__PURE__ */ new Date()).toISOString().split("T")[0]}</lastmod>`);
636
+ lines.push(" <changefreq>weekly</changefreq>");
637
+ lines.push(" <priority>0.8</priority>");
638
+ lines.push(" </url>");
639
+ }
640
+ lines.push("</urlset>");
641
+ return lines.join("\n");
642
+ }
643
+ function extractKeywords(content) {
644
+ const words = content.toLowerCase().replace(/[^a-z0-9\s]/g, " ").split(/\s+/).filter((word) => word.length > 3);
645
+ const wordCount = {};
646
+ for (const word of words) {
647
+ wordCount[word] = (wordCount[word] || 0) + 1;
648
+ }
649
+ return Object.entries(wordCount).sort((a, b) => b[1] - a[1]).slice(0, 10).map(([word]) => word);
650
+ }
651
+ function chunkContent(content, maxLength = 2e3) {
652
+ const chunks = [];
653
+ const paragraphs = content.split("\n\n");
654
+ let currentChunk = "";
655
+ for (const paragraph of paragraphs) {
656
+ if (currentChunk.length + paragraph.length > maxLength && currentChunk.length > 0) {
657
+ chunks.push(currentChunk.trim());
658
+ currentChunk = "";
659
+ }
660
+ currentChunk += paragraph + "\n\n";
661
+ }
662
+ if (currentChunk.trim()) {
663
+ chunks.push(currentChunk.trim());
664
+ }
665
+ return chunks;
666
+ }
667
+ function collectAIIndexEntries(dir, config, base = dir) {
668
+ const entries = [];
669
+ try {
670
+ const files = readdirSync(dir);
671
+ for (const file of files) {
672
+ const fullPath = join(dir, file);
673
+ const stat = statSync(fullPath);
674
+ if (stat.isDirectory() && !file.startsWith(".") && file !== "node_modules") {
675
+ entries.push(...collectAIIndexEntries(fullPath, config, base));
676
+ } else if (stat.isFile() && (extname(file) === ".md" || extname(file) === ".mdx")) {
677
+ const content = readFileSync(fullPath, "utf-8");
678
+ const { frontmatter, content: mainContent } = parseFrontmatter(content);
679
+ const relativePath = relative(base, fullPath);
680
+ const urlPath = relativePath.replace(/\.mdx?$/, "");
681
+ const url = `${config.url}/${urlPath}`;
682
+ const chunks = chunkContent(mainContent);
683
+ const title = frontmatter.title || extractTitle(mainContent);
684
+ const keywords = extractKeywords(mainContent);
685
+ chunks.forEach((chunk, index) => {
686
+ const id = createHash("sha256").update(`${url}-${index}`).digest("hex").slice(0, 16);
687
+ entries.push({
688
+ id,
689
+ url,
690
+ title: chunks.length > 1 ? `${title} (Part ${index + 1})` : title,
691
+ content: chunk,
692
+ description: frontmatter.description,
693
+ keywords,
694
+ metadata: {
695
+ ...frontmatter,
696
+ chunkIndex: index,
697
+ totalChunks: chunks.length,
698
+ sourcePath: relativePath
699
+ }
700
+ });
701
+ });
702
+ }
703
+ }
704
+ } catch (error) {
705
+ console.warn(`Warning: Could not read directory ${dir}:`, error);
706
+ }
707
+ return entries;
708
+ }
709
+ function generateAIIndex(config) {
710
+ const entries = [];
711
+ if (config.pages && config.pages.length > 0) {
712
+ for (const page of config.pages) {
713
+ const url = `${config.url}${page.pathname === "/" ? "" : page.pathname}`;
714
+ const title = page.title || page.pathname;
715
+ const content = page.content || "";
716
+ if (content) {
717
+ const chunks = chunkContent(content);
718
+ const keywords = extractKeywords(content);
719
+ chunks.forEach((chunk, index2) => {
720
+ const id = createHash("sha256").update(`${url}-${index2}`).digest("hex").slice(0, 16);
721
+ entries.push({
722
+ id,
723
+ url,
724
+ title: chunks.length > 1 ? `${title} (Part ${index2 + 1})` : title,
725
+ content: chunk,
726
+ description: page.description,
727
+ keywords,
728
+ metadata: {
729
+ chunkIndex: index2,
730
+ totalChunks: chunks.length,
731
+ sourcePath: page.pathname
732
+ }
733
+ });
734
+ });
735
+ } else {
736
+ const id = createHash("sha256").update(url).digest("hex").slice(0, 16);
737
+ entries.push({
738
+ id,
739
+ url,
740
+ title,
741
+ content: page.description || title,
742
+ description: page.description,
743
+ keywords: []
744
+ });
745
+ }
746
+ }
747
+ }
748
+ entries.push(...collectAIIndexEntries(config.contentDir, config));
749
+ const index = {
750
+ version: "1.0",
751
+ generated: (/* @__PURE__ */ new Date()).toISOString(),
752
+ site: {
753
+ title: config.title,
754
+ description: config.description,
755
+ url: config.url
756
+ },
757
+ entries: entries.sort((a, b) => a.id.localeCompare(b.id)),
758
+ metadata: {
759
+ totalEntries: entries.length,
760
+ generator: "aeo.js",
761
+ generatorUrl: "https://aeojs.org",
762
+ embedding: {
763
+ recommended: "text-embedding-ada-002",
764
+ dimensions: 1536
765
+ }
766
+ }
767
+ };
768
+ return JSON.stringify(index, null, 2);
769
+ }
770
+ async function generateAEOFiles(configOrRoot, maybeConfig) {
771
+ var _a;
772
+ let config;
773
+ if (typeof configOrRoot === "string") {
774
+ config = resolveConfig({ ...maybeConfig, outDir: configOrRoot });
775
+ } else if (configOrRoot && typeof configOrRoot === "object" && "generators" in configOrRoot && typeof ((_a = configOrRoot.generators) == null ? void 0 : _a.robotsTxt) === "boolean") {
776
+ config = configOrRoot;
777
+ } else {
778
+ config = resolveConfig(configOrRoot);
779
+ }
780
+ const outDir = config.outDir;
781
+ const files = [];
782
+ const errors = [];
783
+ if (!existsSync(outDir)) {
784
+ mkdirSync(outDir, { recursive: true });
785
+ }
786
+ if (config.generators.robotsTxt) {
787
+ try {
788
+ const content = generateRobotsTxt(config);
789
+ writeFileSync(join(outDir, "robots.txt"), content, "utf-8");
790
+ files.push("robots.txt");
791
+ } catch (e) {
792
+ errors.push(`robots.txt: ${e.message}`);
793
+ }
794
+ }
795
+ if (config.generators.llmsTxt) {
796
+ try {
797
+ const content = generateLlmsTxt(config);
798
+ writeFileSync(join(outDir, "llms.txt"), "\uFEFF" + content, "utf-8");
799
+ files.push("llms.txt");
800
+ } catch (e) {
801
+ errors.push(`llms.txt: ${e.message}`);
802
+ }
803
+ }
804
+ if (config.generators.llmsFullTxt) {
805
+ try {
806
+ const content = generateLlmsFullTxt(config);
807
+ writeFileSync(join(outDir, "llms-full.txt"), "\uFEFF" + content, "utf-8");
808
+ files.push("llms-full.txt");
809
+ } catch (e) {
810
+ errors.push(`llms-full.txt: ${e.message}`);
811
+ }
812
+ }
813
+ if (config.generators.rawMarkdown) {
814
+ try {
815
+ const generated = generatePageMarkdownFiles(config);
816
+ for (const f of generated) {
817
+ files.push(f.destination);
818
+ }
819
+ } catch (e) {
820
+ errors.push(`page-markdown: ${e.message}`);
821
+ }
822
+ try {
823
+ const copied = copyMarkdownFiles(config);
824
+ for (const f of copied) {
825
+ files.push(f.destination);
826
+ }
827
+ } catch (e) {
828
+ errors.push(`raw-markdown: ${e.message}`);
829
+ }
830
+ }
831
+ if (config.generators.manifest) {
832
+ try {
833
+ const content = generateManifest(config);
834
+ writeFileSync(join(outDir, "docs.json"), content, "utf-8");
835
+ files.push("docs.json");
836
+ } catch (e) {
837
+ errors.push(`docs.json: ${e.message}`);
838
+ }
839
+ }
840
+ if (config.generators.sitemap) {
841
+ try {
842
+ const content = generateSitemap(config);
843
+ writeFileSync(join(outDir, "sitemap.xml"), content, "utf-8");
844
+ files.push("sitemap.xml");
845
+ } catch (e) {
846
+ errors.push(`sitemap.xml: ${e.message}`);
847
+ }
848
+ }
849
+ if (config.generators.aiIndex) {
850
+ try {
851
+ const content = generateAIIndex(config);
852
+ writeFileSync(join(outDir, "ai-index.json"), content, "utf-8");
853
+ files.push("ai-index.json");
854
+ } catch (e) {
855
+ errors.push(`ai-index.json: ${e.message}`);
856
+ }
857
+ }
858
+ return { files, errors };
859
+ }
860
+ function scanNextPages(projectRoot) {
861
+ const pages = [];
862
+ for (const base of ["app", "src/app"]) {
863
+ const dir = join(projectRoot, base);
864
+ if (existsSync(dir)) scanAppRouter(dir, dir, pages);
865
+ }
866
+ for (const base of ["pages", "src/pages"]) {
867
+ const dir = join(projectRoot, base);
868
+ if (existsSync(dir)) scanPagesRouter(dir, dir, pages);
869
+ }
870
+ return pages;
871
+ }
872
+ function scanAppRouter(dir, base, pages) {
873
+ try {
874
+ const entries = readdirSync(dir);
875
+ for (const entry of entries) {
876
+ const fullPath = join(dir, entry);
877
+ const stat = statSync(fullPath);
878
+ if (stat.isDirectory() && !entry.startsWith(".") && !entry.startsWith("_") && !entry.startsWith("(") && entry !== "api") {
879
+ scanAppRouter(fullPath, base, pages);
880
+ } else if (entry.match(/^page\.(tsx?|jsx?|mdx?)$/)) {
881
+ const relative8 = dir.slice(base.length);
882
+ const pathname = relative8 || "/";
883
+ const name = pathname.split("/").filter(Boolean).pop();
884
+ pages.push({
885
+ pathname,
886
+ title: name ? name.charAt(0).toUpperCase() + name.slice(1) : void 0
887
+ });
888
+ }
889
+ }
890
+ } catch {
891
+ }
892
+ }
893
+ function scanPagesRouter(dir, base, pages) {
894
+ try {
895
+ const entries = readdirSync(dir);
896
+ for (const entry of entries) {
897
+ const fullPath = join(dir, entry);
898
+ const stat = statSync(fullPath);
899
+ if (stat.isDirectory() && !entry.startsWith(".") && !entry.startsWith("_") && entry !== "api") {
900
+ scanPagesRouter(fullPath, base, pages);
901
+ } else if (entry.match(/\.(tsx?|jsx?|mdx?)$/) && !entry.startsWith("_") && !entry.startsWith("[")) {
902
+ const relative8 = fullPath.slice(base.length);
903
+ let pathname = relative8.replace(/\.(tsx?|jsx?|mdx?)$/, "");
904
+ if (pathname.endsWith("/index")) pathname = pathname.slice(0, -6) || "/";
905
+ pathname = pathname.replace(/\/+/g, "/") || "/";
906
+ const name = entry.replace(/\.(tsx?|jsx?|mdx?)$/, "");
907
+ pages.push({
908
+ pathname,
909
+ title: name === "index" ? void 0 : name.charAt(0).toUpperCase() + name.slice(1)
910
+ });
911
+ }
912
+ }
913
+ } catch {
914
+ }
915
+ }
916
+ function withAeo(nextConfig = {}) {
917
+ const { aeo: aeoOptions = {}, ...restConfig } = nextConfig;
918
+ return {
919
+ ...restConfig,
920
+ webpack(config, options) {
921
+ if (typeof nextConfig.webpack === "function") {
922
+ config = nextConfig.webpack(config, options);
923
+ }
924
+ if (!options.isServer && !options.dev) {
925
+ const projectRoot = process.cwd();
926
+ const discoveredPages = scanNextPages(projectRoot);
927
+ for (const page of discoveredPages) {
928
+ if (page.pathname === "/" && !page.title) {
929
+ page.title = aeoOptions.title;
930
+ }
931
+ if (!page.description && aeoOptions.description) {
932
+ page.description = aeoOptions.description;
933
+ }
934
+ }
935
+ const contentDir = aeoOptions.contentDir || (existsSync(join(projectRoot, "src")) ? join(projectRoot, "src") : projectRoot);
936
+ const resolvedConfig = resolveConfig({
937
+ ...aeoOptions,
938
+ outDir: aeoOptions.outDir || join(projectRoot, "public"),
939
+ contentDir,
940
+ pages: [...aeoOptions.pages || [], ...discoveredPages]
941
+ });
942
+ if (!existsSync(resolvedConfig.outDir)) {
943
+ mkdirSync(resolvedConfig.outDir, { recursive: true });
944
+ }
945
+ config.plugins.push({
946
+ apply: (compiler) => {
947
+ compiler.hooks.afterEmit.tapAsync("AeoPlugin", async (_compilation, callback) => {
948
+ console.log("[aeo.js] Generating AEO files for Next.js...");
949
+ try {
950
+ const result = await generateAEOFiles(resolvedConfig);
951
+ if (result.files.length > 0) {
952
+ console.log(`[aeo.js] Generated ${result.files.length} files`);
953
+ }
954
+ if (result.errors.length > 0) {
955
+ console.error("[aeo.js] Errors:", result.errors);
956
+ }
957
+ } catch (error) {
958
+ console.error("[aeo.js] Failed to generate AEO files:", error);
959
+ }
960
+ callback();
961
+ });
962
+ }
963
+ });
964
+ }
965
+ return config;
966
+ }
967
+ };
968
+ }
969
+ async function generateAeoMetadata(config) {
970
+ const resolvedConfig = resolveConfig(config);
971
+ if (process.env.NODE_ENV === "production") {
972
+ await generateAEOFiles(resolvedConfig);
973
+ }
974
+ return {
975
+ title: resolvedConfig.title,
976
+ description: resolvedConfig.description,
977
+ alternates: {
978
+ types: {
979
+ "text/plain": [
980
+ { url: "/llms.txt", title: "LLM Summary" },
981
+ { url: "/llms-full.txt", title: "Full Content for LLMs" }
982
+ ],
983
+ "application/json": [
984
+ { url: "/docs.json", title: "Documentation Manifest" },
985
+ { url: "/ai-index.json", title: "AI-Optimized Index" }
986
+ ]
987
+ }
988
+ }
989
+ };
990
+ }
991
+ function extractText(html) {
992
+ let text = html;
993
+ text = text.replace(/<script[\s\S]*?<\/script>/gi, "");
994
+ text = text.replace(/<style[\s\S]*?<\/style>/gi, "");
995
+ text = text.replace(/<svg[\s\S]*?<\/svg>/gi, "");
996
+ const mainMatch = text.match(/<main[^>]*>([\s\S]*)<\/main>/i);
997
+ if (mainMatch) {
998
+ text = mainMatch[1];
999
+ } else {
1000
+ text = text.replace(/<nav[\s\S]*?<\/nav>/gi, "");
1001
+ text = text.replace(/<header[\s\S]*?<\/header>/gi, "");
1002
+ text = text.replace(/<footer[\s\S]*?<\/footer>/gi, "");
1003
+ }
1004
+ text = text.replace(/<a[^>]+href=["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi, (_, url, inner) => {
1005
+ if (/<(?:h[1-6]|div|p|section)[^>]*>/i.test(inner)) {
1006
+ const cleanInner = inner.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
1007
+ return `
1008
+ [${cleanInner.slice(0, 120).trim()}](${url})
1009
+ `;
1010
+ }
1011
+ return `[${inner}](${url})`;
1012
+ });
1013
+ text = text.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, "\n\n## $1\n\n");
1014
+ text = text.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, "\n\n## $1\n\n");
1015
+ text = text.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, "\n\n### $1\n\n");
1016
+ text = text.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, "\n\n#### $1\n\n");
1017
+ text = text.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, "\n\n##### $1\n\n");
1018
+ text = text.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, "\n\n###### $1\n\n");
1019
+ text = text.replace(/<a[^>]+href=["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi, "[$2]($1)");
1020
+ text = text.replace(/<(?:strong|b)[^>]*>([\s\S]*?)<\/(?:strong|b)>/gi, "**$1**");
1021
+ text = text.replace(/<(?:em|i)[^>]*>([\s\S]*?)<\/(?:em|i)>/gi, "*$1*");
1022
+ text = text.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, "\n- $1");
1023
+ text = text.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, "\n\n> $1\n\n");
1024
+ text = text.replace(/<hr[^>]*\/?>/gi, "\n\n---\n\n");
1025
+ text = text.replace(/<br[^>]*\/?>/gi, "\n");
1026
+ text = text.replace(/<\/p>/gi, "\n\n");
1027
+ text = text.replace(/<p[^>]*>/gi, "");
1028
+ text = text.replace(/<\/?(?:div|section|article|header|main|aside|figure|figcaption|table|thead|tbody|tr|td|th|ul|ol|dl|dt|dd)[^>]*>/gi, "\n");
1029
+ text = text.replace(/<[^>]+>/g, "");
1030
+ text = text.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&#39;/g, "'").replace(/&nbsp;/g, " ").replace(/&copy;/g, "(c)");
1031
+ text = text.replace(/[\u{1F1E0}-\u{1FAFF}\u{2600}-\u{27BF}\u{FE00}-\u{FE0F}\u{200D}\u{20E3}]/gu, "");
1032
+ text = text.split("\n").map((l) => l.replace(/\s+/g, " ").trim()).join("\n");
1033
+ text = text.replace(/\n{3,}/g, "\n\n");
1034
+ text = text.replace(/\[[\s\n]+/g, "[").replace(/[\s\n]+\]/g, "]");
1035
+ text = text.replace(/(#{2,6})\s*\n+\s*/g, "$1 ");
1036
+ text = text.replace(/^#{2,6}\s*$/gm, "");
1037
+ text = text.replace(/\n{3,}/g, "\n\n");
1038
+ return text.trim().slice(0, 8e3);
1039
+ }
1040
+ function scanNextBuildOutput(projectRoot) {
1041
+ const pages = [];
1042
+ const serverAppDir = join(projectRoot, ".next", "server", "app");
1043
+ if (!existsSync(serverAppDir)) return pages;
1044
+ function walk(dir, basePath = "") {
1045
+ var _a, _b;
1046
+ try {
1047
+ const entries = readdirSync(dir);
1048
+ for (const entry of entries) {
1049
+ const fullPath = join(dir, entry);
1050
+ const stat = statSync(fullPath);
1051
+ if (stat.isDirectory() && !entry.startsWith("_") && !entry.startsWith(".")) {
1052
+ walk(fullPath, `${basePath}/${entry}`);
1053
+ } else if (entry === "index.html") {
1054
+ const html = readFileSync(fullPath, "utf-8");
1055
+ const titleMatch = html.match(/<title>([^<]*)<\/title>/i);
1056
+ const descMatch = html.match(/<meta\s+name=["']description["']\s+content=["']([^"']*)["']/i);
1057
+ const textContent = extractText(html);
1058
+ const pathname = basePath || "/";
1059
+ pages.push({
1060
+ pathname,
1061
+ title: (_b = (_a = titleMatch == null ? void 0 : titleMatch[1]) == null ? void 0 : _a.split("|")[0]) == null ? void 0 : _b.trim(),
1062
+ description: descMatch == null ? void 0 : descMatch[1],
1063
+ content: textContent
1064
+ });
1065
+ }
1066
+ }
1067
+ } catch {
1068
+ }
1069
+ }
1070
+ walk(serverAppDir);
1071
+ return pages;
1072
+ }
1073
+ async function postBuild(config = {}) {
1074
+ const projectRoot = process.cwd();
1075
+ const discoveredPages = scanNextBuildOutput(projectRoot);
1076
+ if (discoveredPages.length > 0) {
1077
+ console.log(`[aeo.js] Discovered ${discoveredPages.length} pages from Next.js build output`);
1078
+ }
1079
+ for (const page of discoveredPages) {
1080
+ if (page.pathname === "/" && !page.title && config.title) {
1081
+ page.title = config.title;
1082
+ }
1083
+ if (!page.description && config.description) {
1084
+ page.description = config.description;
1085
+ }
1086
+ }
1087
+ const contentDir = config.contentDir || (existsSync(join(projectRoot, "src")) ? join(projectRoot, "src") : projectRoot);
1088
+ const resolvedConfig = resolveConfig({
1089
+ ...config,
1090
+ outDir: config.outDir || join(projectRoot, "public"),
1091
+ contentDir,
1092
+ pages: [...config.pages || [], ...discoveredPages]
1093
+ });
1094
+ const result = await generateAEOFiles(resolvedConfig);
1095
+ if (result.files.length > 0) {
1096
+ console.log(`[aeo.js] Generated ${result.files.length} files`);
1097
+ }
1098
+ if (result.errors.length > 0) {
1099
+ console.error("[aeo.js] Errors:", result.errors);
1100
+ }
1101
+ }
1102
+ var next_default = withAeo;
1103
+
1104
+ export { next_default as default, generateAeoMetadata, postBuild, withAeo };
1105
+ //# sourceMappingURL=next.mjs.map
1106
+ //# sourceMappingURL=next.mjs.map