aeo.js 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +132 -17
  2. package/dist/angular.d.mts +29 -0
  3. package/dist/angular.d.ts +29 -0
  4. package/dist/angular.js +1314 -0
  5. package/dist/angular.js.map +1 -0
  6. package/dist/angular.mjs +1310 -0
  7. package/dist/angular.mjs.map +1 -0
  8. package/dist/astro.d.mts +8 -2
  9. package/dist/astro.d.ts +8 -2
  10. package/dist/astro.js +400 -100
  11. package/dist/astro.js.map +1 -1
  12. package/dist/astro.mjs +400 -100
  13. package/dist/astro.mjs.map +1 -1
  14. package/dist/cli.d.mts +1 -0
  15. package/dist/cli.d.ts +1 -0
  16. package/dist/cli.js +1880 -0
  17. package/dist/cli.js.map +1 -0
  18. package/dist/cli.mjs +1878 -0
  19. package/dist/cli.mjs.map +1 -0
  20. package/dist/index.d.mts +183 -4
  21. package/dist/index.d.ts +183 -4
  22. package/dist/index.js +974 -19
  23. package/dist/index.js.map +1 -1
  24. package/dist/index.mjs +952 -20
  25. package/dist/index.mjs.map +1 -1
  26. package/dist/next.d.mts +2 -17
  27. package/dist/next.d.ts +2 -17
  28. package/dist/next.js +262 -73
  29. package/dist/next.js.map +1 -1
  30. package/dist/next.mjs +262 -73
  31. package/dist/next.mjs.map +1 -1
  32. package/dist/nuxt.d.mts +13 -0
  33. package/dist/nuxt.d.ts +13 -0
  34. package/dist/nuxt.js +1344 -0
  35. package/dist/nuxt.js.map +1 -0
  36. package/dist/nuxt.mjs +1337 -0
  37. package/dist/nuxt.mjs.map +1 -0
  38. package/dist/react.d.mts +1 -1
  39. package/dist/react.d.ts +1 -1
  40. package/dist/react.js +330 -4
  41. package/dist/react.js.map +1 -1
  42. package/dist/react.mjs +330 -4
  43. package/dist/react.mjs.map +1 -1
  44. package/dist/{types-BTY-v-7i.d.mts → types-Cn_Qbkmg.d.mts} +34 -0
  45. package/dist/{types-BTY-v-7i.d.ts → types-Cn_Qbkmg.d.ts} +34 -0
  46. package/dist/vite.d.mts +5 -0
  47. package/dist/vite.d.ts +5 -0
  48. package/dist/vite.js +1370 -0
  49. package/dist/vite.js.map +1 -0
  50. package/dist/vite.mjs +1366 -0
  51. package/dist/vite.mjs.map +1 -0
  52. package/dist/vue.d.mts +19 -0
  53. package/dist/vue.d.ts +19 -0
  54. package/dist/vue.js +1404 -0
  55. package/dist/vue.js.map +1 -0
  56. package/dist/vue.mjs +1398 -0
  57. package/dist/vue.mjs.map +1 -0
  58. package/dist/webpack.d.mts +1 -1
  59. package/dist/webpack.d.ts +1 -1
  60. package/dist/webpack.js +178 -18
  61. package/dist/webpack.js.map +1 -1
  62. package/dist/webpack.mjs +178 -18
  63. package/dist/webpack.mjs.map +1 -1
  64. package/dist/widget.d.mts +11 -1
  65. package/dist/widget.d.ts +11 -1
  66. package/dist/widget.js +330 -4
  67. package/dist/widget.js.map +1 -1
  68. package/dist/widget.mjs +330 -4
  69. package/dist/widget.mjs.map +1 -1
  70. package/package.json +48 -2
package/dist/next.mjs CHANGED
@@ -162,7 +162,7 @@ function detectFramework(projectRoot = process.cwd()) {
162
162
  };
163
163
  }
164
164
  function resolveConfig(config = {}) {
165
- var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x;
165
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z, _A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M;
166
166
  const frameworkInfo = detectFramework();
167
167
  return {
168
168
  title: config.title || "My Site",
@@ -178,26 +178,43 @@ function resolveConfig(config = {}) {
178
178
  rawMarkdown: ((_d = config.generators) == null ? void 0 : _d.rawMarkdown) !== false,
179
179
  manifest: ((_e = config.generators) == null ? void 0 : _e.manifest) !== false,
180
180
  sitemap: ((_f = config.generators) == null ? void 0 : _f.sitemap) !== false,
181
- aiIndex: ((_g = config.generators) == null ? void 0 : _g.aiIndex) !== false
181
+ aiIndex: ((_g = config.generators) == null ? void 0 : _g.aiIndex) !== false,
182
+ schema: ((_h = config.generators) == null ? void 0 : _h.schema) !== false
182
183
  },
183
184
  robots: {
184
- allow: ((_h = config.robots) == null ? void 0 : _h.allow) || ["/"],
185
- disallow: ((_i = config.robots) == null ? void 0 : _i.disallow) || [],
186
- crawlDelay: ((_j = config.robots) == null ? void 0 : _j.crawlDelay) || 0,
187
- sitemap: ((_k = config.robots) == null ? void 0 : _k.sitemap) || ""
185
+ allow: ((_i = config.robots) == null ? void 0 : _i.allow) || ["/"],
186
+ disallow: ((_j = config.robots) == null ? void 0 : _j.disallow) || [],
187
+ crawlDelay: ((_k = config.robots) == null ? void 0 : _k.crawlDelay) || 0,
188
+ sitemap: ((_l = config.robots) == null ? void 0 : _l.sitemap) || ""
189
+ },
190
+ schema: {
191
+ enabled: ((_m = config.schema) == null ? void 0 : _m.enabled) !== false,
192
+ organization: {
193
+ name: ((_o = (_n = config.schema) == null ? void 0 : _n.organization) == null ? void 0 : _o.name) || config.title || "My Site",
194
+ url: ((_q = (_p = config.schema) == null ? void 0 : _p.organization) == null ? void 0 : _q.url) || config.url || "https://example.com",
195
+ logo: ((_s = (_r = config.schema) == null ? void 0 : _r.organization) == null ? void 0 : _s.logo) || "",
196
+ sameAs: ((_u = (_t = config.schema) == null ? void 0 : _t.organization) == null ? void 0 : _u.sameAs) || []
197
+ },
198
+ defaultType: ((_v = config.schema) == null ? void 0 : _v.defaultType) || "WebPage"
199
+ },
200
+ og: {
201
+ enabled: ((_w = config.og) == null ? void 0 : _w.enabled) !== false,
202
+ image: ((_x = config.og) == null ? void 0 : _x.image) || "",
203
+ twitterHandle: ((_y = config.og) == null ? void 0 : _y.twitterHandle) || "",
204
+ type: ((_z = config.og) == null ? void 0 : _z.type) || "website"
188
205
  },
189
206
  widget: {
190
- enabled: ((_l = config.widget) == null ? void 0 : _l.enabled) !== false,
191
- position: ((_m = config.widget) == null ? void 0 : _m.position) || "bottom-right",
207
+ enabled: ((_A = config.widget) == null ? void 0 : _A.enabled) !== false,
208
+ position: ((_B = config.widget) == null ? void 0 : _B.position) || "bottom-right",
192
209
  theme: {
193
- background: ((_o = (_n = config.widget) == null ? void 0 : _n.theme) == null ? void 0 : _o.background) || "rgba(18, 18, 24, 0.9)",
194
- text: ((_q = (_p = config.widget) == null ? void 0 : _p.theme) == null ? void 0 : _q.text) || "#C0C0C5",
195
- accent: ((_s = (_r = config.widget) == null ? void 0 : _r.theme) == null ? void 0 : _s.accent) || "#E8E8EA",
196
- badge: ((_u = (_t = config.widget) == null ? void 0 : _t.theme) == null ? void 0 : _u.badge) || "#4ADE80"
210
+ background: ((_D = (_C = config.widget) == null ? void 0 : _C.theme) == null ? void 0 : _D.background) || "rgba(18, 18, 24, 0.9)",
211
+ text: ((_F = (_E = config.widget) == null ? void 0 : _E.theme) == null ? void 0 : _F.text) || "#C0C0C5",
212
+ accent: ((_H = (_G = config.widget) == null ? void 0 : _G.theme) == null ? void 0 : _H.accent) || "#E8E8EA",
213
+ badge: ((_J = (_I = config.widget) == null ? void 0 : _I.theme) == null ? void 0 : _J.badge) || "#4ADE80"
197
214
  },
198
- humanLabel: ((_v = config.widget) == null ? void 0 : _v.humanLabel) || "Human",
199
- aiLabel: ((_w = config.widget) == null ? void 0 : _w.aiLabel) || "AI",
200
- showBadge: ((_x = config.widget) == null ? void 0 : _x.showBadge) !== false
215
+ humanLabel: ((_K = config.widget) == null ? void 0 : _K.humanLabel) || "Human",
216
+ aiLabel: ((_L = config.widget) == null ? void 0 : _L.aiLabel) || "AI",
217
+ showBadge: ((_M = config.widget) == null ? void 0 : _M.showBadge) !== false
201
218
  }
202
219
  };
203
220
  }
@@ -496,8 +513,8 @@ function generatePageMarkdownFiles(config) {
496
513
  const generated = [];
497
514
  const pages = config.pages || [];
498
515
  for (const page of pages) {
516
+ if (!page.content) continue;
499
517
  const pageTitle = page.title || (page.pathname === "/" ? config.title : void 0);
500
- if (!page.content && !pageTitle) continue;
501
518
  let filename;
502
519
  if (page.pathname === "/") {
503
520
  filename = "index.md";
@@ -767,6 +784,140 @@ function generateAIIndex(config) {
767
784
  };
768
785
  return JSON.stringify(index, null, 2);
769
786
  }
787
+
788
+ // src/core/schema.ts
789
+ function generateSchema(config) {
790
+ const output = generateSchemaObjects(config);
791
+ return JSON.stringify(output, null, 2);
792
+ }
793
+ function generateSchemaObjects(config) {
794
+ const siteSchemas = generateSiteSchemas(config);
795
+ const pageSchemas = {};
796
+ for (const page of config.pages) {
797
+ const schemas = generatePageSchemas(page, config);
798
+ if (schemas.length > 0) {
799
+ pageSchemas[page.pathname] = schemas;
800
+ }
801
+ }
802
+ return { site: siteSchemas, pages: pageSchemas };
803
+ }
804
+ function generateSiteSchemas(config) {
805
+ const schemas = [];
806
+ schemas.push({
807
+ "@context": "https://schema.org",
808
+ "@type": "WebSite",
809
+ name: config.title,
810
+ description: config.description || void 0,
811
+ url: config.url
812
+ });
813
+ const org = config.schema.organization;
814
+ if (org.name || org.sameAs.length > 0) {
815
+ const orgSchema = {
816
+ "@context": "https://schema.org",
817
+ "@type": "Organization",
818
+ name: org.name,
819
+ url: org.url
820
+ };
821
+ if (org.logo) orgSchema.logo = org.logo;
822
+ if (org.sameAs.length > 0) orgSchema.sameAs = org.sameAs;
823
+ schemas.push(orgSchema);
824
+ }
825
+ return schemas;
826
+ }
827
+ function generatePageSchemas(page, config) {
828
+ const schemas = [];
829
+ const pageUrl = page.pathname === "/" ? config.url : `${config.url.replace(/\/$/, "")}${page.pathname}`;
830
+ const faqItems = detectFaqPatterns(page.content || "");
831
+ if (faqItems.length > 0) {
832
+ schemas.push({
833
+ "@context": "https://schema.org",
834
+ "@type": "FAQPage",
835
+ mainEntity: faqItems.map(({ question, answer }) => ({
836
+ "@type": "Question",
837
+ name: question,
838
+ acceptedAnswer: {
839
+ "@type": "Answer",
840
+ text: answer
841
+ }
842
+ }))
843
+ });
844
+ }
845
+ const pageType = config.schema.defaultType;
846
+ const pageSchema = {
847
+ "@context": "https://schema.org",
848
+ "@type": pageType,
849
+ name: page.title || config.title,
850
+ url: pageUrl
851
+ };
852
+ if (page.description) pageSchema.description = page.description;
853
+ if (pageType === "Article") {
854
+ pageSchema.headline = page.title || config.title;
855
+ pageSchema.author = {
856
+ "@type": "Organization",
857
+ name: config.schema.organization.name
858
+ };
859
+ }
860
+ schemas.push(pageSchema);
861
+ if (page.pathname !== "/") {
862
+ const breadcrumbs = generateBreadcrumbs(page.pathname, config);
863
+ if (breadcrumbs.length > 1) {
864
+ schemas.push({
865
+ "@context": "https://schema.org",
866
+ "@type": "BreadcrumbList",
867
+ itemListElement: breadcrumbs.map((crumb, i) => ({
868
+ "@type": "ListItem",
869
+ position: i + 1,
870
+ name: crumb.name,
871
+ item: crumb.url
872
+ }))
873
+ });
874
+ }
875
+ }
876
+ return schemas;
877
+ }
878
+ function generateBreadcrumbs(pathname, config) {
879
+ const baseUrl = config.url.replace(/\/$/, "");
880
+ const parts = pathname.split("/").filter(Boolean);
881
+ const crumbs = [
882
+ { name: "Home", url: baseUrl + "/" }
883
+ ];
884
+ let currentPath = "";
885
+ for (const part of parts) {
886
+ currentPath += "/" + part;
887
+ crumbs.push({
888
+ name: part.charAt(0).toUpperCase() + part.slice(1).replace(/-/g, " "),
889
+ url: baseUrl + currentPath
890
+ });
891
+ }
892
+ return crumbs;
893
+ }
894
+ function detectFaqPatterns(content) {
895
+ const items = [];
896
+ const lines = content.split("\n");
897
+ for (let i = 0; i < lines.length; i++) {
898
+ const line = lines[i].trim();
899
+ const headingMatch = line.match(/^#{1,6}\s+(.+\?)\s*$/);
900
+ if (headingMatch) {
901
+ const answerLines = [];
902
+ for (let j = i + 1; j < lines.length; j++) {
903
+ const nextLine = lines[j].trim();
904
+ if (!nextLine) {
905
+ if (answerLines.length > 0) break;
906
+ continue;
907
+ }
908
+ if (/^#{1,6}\s/.test(nextLine)) break;
909
+ answerLines.push(nextLine);
910
+ }
911
+ if (answerLines.length > 0) {
912
+ items.push({
913
+ question: headingMatch[1],
914
+ answer: answerLines.join(" ").slice(0, 500)
915
+ });
916
+ }
917
+ }
918
+ }
919
+ return items;
920
+ }
770
921
  async function generateAEOFiles(configOrRoot, maybeConfig) {
771
922
  var _a;
772
923
  let config;
@@ -795,7 +946,7 @@ async function generateAEOFiles(configOrRoot, maybeConfig) {
795
946
  if (config.generators.llmsTxt) {
796
947
  try {
797
948
  const content = generateLlmsTxt(config);
798
- writeFileSync(join(outDir, "llms.txt"), "\uFEFF" + content, "utf-8");
949
+ writeFileSync(join(outDir, "llms.txt"), content, "utf-8");
799
950
  files.push("llms.txt");
800
951
  } catch (e) {
801
952
  errors.push(`llms.txt: ${e.message}`);
@@ -804,7 +955,7 @@ async function generateAEOFiles(configOrRoot, maybeConfig) {
804
955
  if (config.generators.llmsFullTxt) {
805
956
  try {
806
957
  const content = generateLlmsFullTxt(config);
807
- writeFileSync(join(outDir, "llms-full.txt"), "\uFEFF" + content, "utf-8");
958
+ writeFileSync(join(outDir, "llms-full.txt"), content, "utf-8");
808
959
  files.push("llms-full.txt");
809
960
  } catch (e) {
810
961
  errors.push(`llms-full.txt: ${e.message}`);
@@ -855,8 +1006,78 @@ async function generateAEOFiles(configOrRoot, maybeConfig) {
855
1006
  errors.push(`ai-index.json: ${e.message}`);
856
1007
  }
857
1008
  }
1009
+ if (config.generators.schema && config.schema.enabled) {
1010
+ try {
1011
+ const content = generateSchema(config);
1012
+ writeFileSync(join(outDir, "schema.json"), content, "utf-8");
1013
+ files.push("schema.json");
1014
+ } catch (e) {
1015
+ errors.push(`schema.json: ${e.message}`);
1016
+ }
1017
+ }
858
1018
  return { files, errors };
859
1019
  }
1020
+
1021
+ // src/core/html-extract.ts
1022
+ function extractTextFromHtml(html) {
1023
+ let text = html;
1024
+ text = text.replace(/<script[\s\S]*?<\/script>/gi, "");
1025
+ text = text.replace(/<style[\s\S]*?<\/style>/gi, "");
1026
+ text = text.replace(/<svg[\s\S]*?<\/svg>/gi, "");
1027
+ const mainMatch = text.match(/<main[^>]*>([\s\S]*)<\/main>/i);
1028
+ if (mainMatch) {
1029
+ text = mainMatch[1];
1030
+ } else {
1031
+ text = text.replace(/<nav[\s\S]*?<\/nav>/gi, "");
1032
+ text = text.replace(/<header[\s\S]*?<\/header>/gi, "");
1033
+ text = text.replace(/<footer[\s\S]*?<\/footer>/gi, "");
1034
+ }
1035
+ text = text.replace(/<a[^>]+href=["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi, (_, url, inner) => {
1036
+ if (/<(?:h[1-6]|div|p|section)[^>]*>/i.test(inner)) {
1037
+ const cleanInner = inner.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
1038
+ return `
1039
+ [${cleanInner.slice(0, 120).trim()}](${url})
1040
+ `;
1041
+ }
1042
+ return `[${inner}](${url})`;
1043
+ });
1044
+ text = text.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, "\n\n## $1\n\n");
1045
+ text = text.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, "\n\n## $1\n\n");
1046
+ text = text.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, "\n\n### $1\n\n");
1047
+ text = text.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, "\n\n#### $1\n\n");
1048
+ text = text.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, "\n\n##### $1\n\n");
1049
+ text = text.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, "\n\n###### $1\n\n");
1050
+ text = text.replace(/<a[^>]+href=["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi, "[$2]($1)");
1051
+ text = text.replace(/<(?:strong|b)[^>]*>([\s\S]*?)<\/(?:strong|b)>/gi, "**$1**");
1052
+ text = text.replace(/<(?:em|i)[^>]*>([\s\S]*?)<\/(?:em|i)>/gi, "*$1*");
1053
+ text = text.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, "\n- $1");
1054
+ text = text.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, "\n\n> $1\n\n");
1055
+ text = text.replace(/<hr[^>]*\/?>/gi, "\n\n---\n\n");
1056
+ text = text.replace(/<br[^>]*\/?>/gi, "\n");
1057
+ text = text.replace(/<\/p>/gi, "\n\n");
1058
+ text = text.replace(/<p[^>]*>/gi, "");
1059
+ text = text.replace(/<\/?(?:div|section|article|header|main|aside|figure|figcaption|table|thead|tbody|tr|td|th|ul|ol|dl|dt|dd)[^>]*>/gi, "\n");
1060
+ text = text.replace(/<[^>]+>/g, "");
1061
+ text = text.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&#39;/g, "'").replace(/&nbsp;/g, " ").replace(/&copy;/g, "(c)");
1062
+ text = text.replace(/[\u{1F1E0}-\u{1FAFF}\u{2600}-\u{27BF}\u{FE00}-\u{FE0F}\u{200D}\u{20E3}]/gu, "");
1063
+ text = text.split("\n").map((l) => l.replace(/\s+/g, " ").trim()).join("\n");
1064
+ text = text.replace(/\n{3,}/g, "\n\n");
1065
+ text = text.replace(/\[[\s\n]+/g, "[").replace(/[\s\n]+\]/g, "]");
1066
+ text = text.replace(/(#{2,6})\s*\n+\s*/g, "$1 ");
1067
+ text = text.replace(/^#{2,6}\s*$/gm, "");
1068
+ text = text.replace(/\n{3,}/g, "\n\n");
1069
+ return text.trim().slice(0, 8e3);
1070
+ }
1071
+ function extractTitle2(html) {
1072
+ var _a, _b;
1073
+ const match = html.match(/<title>([^<]*)<\/title>/i);
1074
+ if (!match) return void 0;
1075
+ return ((_b = (_a = match[1]) == null ? void 0 : _a.split("|")[0]) == null ? void 0 : _b.trim()) || match[1];
1076
+ }
1077
+ function extractDescription(html) {
1078
+ const match = html.match(/<meta\s+name=["']description["']\s+content=["']([^"']*)["']/i);
1079
+ return match == null ? void 0 : match[1];
1080
+ }
860
1081
  function scanNextPages(projectRoot) {
861
1082
  const pages = [];
862
1083
  for (const base of ["app", "src/app"]) {
@@ -971,7 +1192,7 @@ async function generateAeoMetadata(config) {
971
1192
  if (process.env.NODE_ENV === "production") {
972
1193
  await generateAEOFiles(resolvedConfig);
973
1194
  }
974
- return {
1195
+ const metadata = {
975
1196
  title: resolvedConfig.title,
976
1197
  description: resolvedConfig.description,
977
1198
  alternates: {
@@ -987,62 +1208,30 @@ async function generateAeoMetadata(config) {
987
1208
  }
988
1209
  }
989
1210
  };
990
- }
991
- function extractText(html) {
992
- let text = html;
993
- text = text.replace(/<script[\s\S]*?<\/script>/gi, "");
994
- text = text.replace(/<style[\s\S]*?<\/style>/gi, "");
995
- text = text.replace(/<svg[\s\S]*?<\/svg>/gi, "");
996
- const mainMatch = text.match(/<main[^>]*>([\s\S]*)<\/main>/i);
997
- if (mainMatch) {
998
- text = mainMatch[1];
999
- } else {
1000
- text = text.replace(/<nav[\s\S]*?<\/nav>/gi, "");
1001
- text = text.replace(/<header[\s\S]*?<\/header>/gi, "");
1002
- text = text.replace(/<footer[\s\S]*?<\/footer>/gi, "");
1211
+ if (resolvedConfig.og.enabled) {
1212
+ metadata.openGraph = {
1213
+ type: resolvedConfig.og.type,
1214
+ title: resolvedConfig.title,
1215
+ description: resolvedConfig.description,
1216
+ url: resolvedConfig.url,
1217
+ siteName: resolvedConfig.title,
1218
+ ...resolvedConfig.og.image ? { images: [{ url: resolvedConfig.og.image }] } : {}
1219
+ };
1220
+ metadata.twitter = {
1221
+ card: resolvedConfig.og.image ? "summary_large_image" : "summary",
1222
+ title: resolvedConfig.title,
1223
+ description: resolvedConfig.description,
1224
+ ...resolvedConfig.og.twitterHandle ? { site: resolvedConfig.og.twitterHandle } : {},
1225
+ ...resolvedConfig.og.image ? { images: [resolvedConfig.og.image] } : {}
1226
+ };
1003
1227
  }
1004
- text = text.replace(/<a[^>]+href=["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi, (_, url, inner) => {
1005
- if (/<(?:h[1-6]|div|p|section)[^>]*>/i.test(inner)) {
1006
- const cleanInner = inner.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
1007
- return `
1008
- [${cleanInner.slice(0, 120).trim()}](${url})
1009
- `;
1010
- }
1011
- return `[${inner}](${url})`;
1012
- });
1013
- text = text.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, "\n\n## $1\n\n");
1014
- text = text.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, "\n\n## $1\n\n");
1015
- text = text.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, "\n\n### $1\n\n");
1016
- text = text.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, "\n\n#### $1\n\n");
1017
- text = text.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, "\n\n##### $1\n\n");
1018
- text = text.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, "\n\n###### $1\n\n");
1019
- text = text.replace(/<a[^>]+href=["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi, "[$2]($1)");
1020
- text = text.replace(/<(?:strong|b)[^>]*>([\s\S]*?)<\/(?:strong|b)>/gi, "**$1**");
1021
- text = text.replace(/<(?:em|i)[^>]*>([\s\S]*?)<\/(?:em|i)>/gi, "*$1*");
1022
- text = text.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, "\n- $1");
1023
- text = text.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, "\n\n> $1\n\n");
1024
- text = text.replace(/<hr[^>]*\/?>/gi, "\n\n---\n\n");
1025
- text = text.replace(/<br[^>]*\/?>/gi, "\n");
1026
- text = text.replace(/<\/p>/gi, "\n\n");
1027
- text = text.replace(/<p[^>]*>/gi, "");
1028
- text = text.replace(/<\/?(?:div|section|article|header|main|aside|figure|figcaption|table|thead|tbody|tr|td|th|ul|ol|dl|dt|dd)[^>]*>/gi, "\n");
1029
- text = text.replace(/<[^>]+>/g, "");
1030
- text = text.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&#39;/g, "'").replace(/&nbsp;/g, " ").replace(/&copy;/g, "(c)");
1031
- text = text.replace(/[\u{1F1E0}-\u{1FAFF}\u{2600}-\u{27BF}\u{FE00}-\u{FE0F}\u{200D}\u{20E3}]/gu, "");
1032
- text = text.split("\n").map((l) => l.replace(/\s+/g, " ").trim()).join("\n");
1033
- text = text.replace(/\n{3,}/g, "\n\n");
1034
- text = text.replace(/\[[\s\n]+/g, "[").replace(/[\s\n]+\]/g, "]");
1035
- text = text.replace(/(#{2,6})\s*\n+\s*/g, "$1 ");
1036
- text = text.replace(/^#{2,6}\s*$/gm, "");
1037
- text = text.replace(/\n{3,}/g, "\n\n");
1038
- return text.trim().slice(0, 8e3);
1228
+ return metadata;
1039
1229
  }
1040
1230
  function scanNextBuildOutput(projectRoot) {
1041
1231
  const pages = [];
1042
1232
  const serverAppDir = join(projectRoot, ".next", "server", "app");
1043
1233
  if (!existsSync(serverAppDir)) return pages;
1044
1234
  function walk(dir, basePath = "") {
1045
- var _a, _b;
1046
1235
  try {
1047
1236
  const entries = readdirSync(dir);
1048
1237
  for (const entry of entries) {
@@ -1052,14 +1241,14 @@ function scanNextBuildOutput(projectRoot) {
1052
1241
  walk(fullPath, `${basePath}/${entry}`);
1053
1242
  } else if (entry === "index.html") {
1054
1243
  const html = readFileSync(fullPath, "utf-8");
1055
- const titleMatch = html.match(/<title>([^<]*)<\/title>/i);
1056
- const descMatch = html.match(/<meta\s+name=["']description["']\s+content=["']([^"']*)["']/i);
1057
- const textContent = extractText(html);
1244
+ const title = extractTitle2(html);
1245
+ const description = extractDescription(html);
1246
+ const textContent = extractTextFromHtml(html);
1058
1247
  const pathname = basePath || "/";
1059
1248
  pages.push({
1060
1249
  pathname,
1061
- title: (_b = (_a = titleMatch == null ? void 0 : titleMatch[1]) == null ? void 0 : _a.split("|")[0]) == null ? void 0 : _b.trim(),
1062
- description: descMatch == null ? void 0 : descMatch[1],
1250
+ title,
1251
+ description,
1063
1252
  content: textContent
1064
1253
  });
1065
1254
  }