@voicenter-team/nuxt-llms-generator 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,10 +4,10 @@ import Mustache from 'mustache';
4
4
  import Anthropic from '@anthropic-ai/sdk';
5
5
  import { createHash } from 'crypto';
6
6
  import { JSONPath } from 'jsonpath-plus';
7
- import { T as TemplateError, E as ErrorCode, w as withErrorHandling } from '../shared/nuxt-llms-generator.ab079b9f.mjs';
8
- import { NodeHtmlMarkdown } from 'node-html-markdown';
7
+ import { T as TemplateError, E as ErrorCode, w as withErrorHandling } from '../shared/nuxt-llms-generator.11eb2a36.mjs';
9
8
  import '@nuxt/kit';
10
9
  import 'zod';
10
+ import 'node-html-markdown';
11
11
 
12
12
  class AnthropicClient {
13
13
  client;
@@ -1128,13 +1128,10 @@ class TemplateGenerator {
1128
1128
  promptAnalyzer;
1129
1129
  cache;
1130
1130
  config;
1131
- nhm = new NodeHtmlMarkdown(
1132
- {},
1133
- void 0,
1134
- void 0
1135
- );
1136
- constructor(config) {
1131
+ umbracoData;
1132
+ constructor(config, umbracoData) {
1137
1133
  this.config = config;
1134
+ this.umbracoData = umbracoData;
1138
1135
  this.anthropicClient = new AnthropicClient(config);
1139
1136
  this.promptAnalyzer = new PromptAnalyzer();
1140
1137
  this.cache = new LLMSCache(config.templatesDir || "./.llms-templates");
@@ -1153,22 +1150,26 @@ class TemplateGenerator {
1153
1150
  }
1154
1151
  return await this.generateTemplateWithAI(pageContent, urlItem);
1155
1152
  }
1156
- async generateAllTemplates(umbracoData) {
1153
+ async generateAllTemplates() {
1157
1154
  const templates = [];
1158
1155
  const maxConcurrent = this.config.maxConcurrent || 5;
1159
- await performAutomaticCleanup(umbracoData, this.config.templatesDir || "./.llms-templates", {
1160
- enableAutoCleanup: this.config.enableAutoCleanup ?? true,
1161
- cleanupOrphaned: this.config.cleanupOrphaned ?? true,
1162
- cleanupHidden: this.config.cleanupHidden ?? true,
1163
- dryRun: false
1164
- });
1165
- const visibilityStats = getPageVisibilityStats(umbracoData);
1156
+ await performAutomaticCleanup(
1157
+ this.umbracoData,
1158
+ this.config.templatesDir || "./.llms-templates",
1159
+ {
1160
+ enableAutoCleanup: this.config.enableAutoCleanup ?? true,
1161
+ cleanupOrphaned: this.config.cleanupOrphaned ?? true,
1162
+ cleanupHidden: this.config.cleanupHidden ?? true,
1163
+ dryRun: false
1164
+ }
1165
+ );
1166
+ const visibilityStats = getPageVisibilityStats(this.umbracoData);
1166
1167
  console.log("\u{1F4CA} Page visibility stats:", visibilityStats);
1167
- const visiblePages = umbracoData.urlList.filter(
1168
- (urlItem) => shouldGenerateTemplate(umbracoData, urlItem)
1168
+ const visiblePages = this.umbracoData.urlList.filter(
1169
+ (urlItem) => shouldGenerateTemplate(this.umbracoData, urlItem)
1169
1170
  );
1170
- console.log(`Checking ${visiblePages.length}/${umbracoData.urlList.length} visible pages for cache status...`);
1171
- const { cached, needGeneration } = this.identifyTemplatesNeeded(umbracoData, visiblePages);
1171
+ console.log(`Checking ${visiblePages.length}/${this.umbracoData.urlList.length} visible pages for cache status...`);
1172
+ const { cached, needGeneration } = this.identifyTemplatesNeeded(visiblePages);
1172
1173
  console.log(`\u{1F4C8} Template status: ${cached.length} cached, ${needGeneration.length} need generation`);
1173
1174
  templates.push(...cached);
1174
1175
  if (needGeneration.length === 0) {
@@ -1197,11 +1198,11 @@ class TemplateGenerator {
1197
1198
  console.log(`Generated ${templates.length} total templates (${cached.length} from cache, ${templates.length - cached.length} newly generated)`);
1198
1199
  return templates;
1199
1200
  }
1200
- identifyTemplatesNeeded(umbracoData, visiblePages) {
1201
+ identifyTemplatesNeeded(visiblePages) {
1201
1202
  const cached = [];
1202
1203
  const needGeneration = [];
1203
1204
  for (const urlItem of visiblePages) {
1204
- const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
1205
+ const pageContent = extractPageContent(this.umbracoData, urlItem.Jpath);
1205
1206
  if (!pageContent) {
1206
1207
  console.warn(`No content found for ${urlItem.url} (${urlItem.Jpath})`);
1207
1208
  continue;
@@ -1267,11 +1268,7 @@ class TemplateGenerator {
1267
1268
  async renderTemplate(template, data) {
1268
1269
  return withErrorHandling(async () => {
1269
1270
  const validatedTemplate = await templateValidationPipeline.validateAndFix(template);
1270
- const renderedContent = Mustache.render(validatedTemplate, data);
1271
- if (this.config.enableHtmlToMarkdown) {
1272
- return this.nhm.translate(renderedContent);
1273
- }
1274
- return renderedContent;
1271
+ return Mustache.render(validatedTemplate, data);
1275
1272
  }, {
1276
1273
  template: template.substring(0, 200) + "...",
1277
1274
  dataKeys: Object.keys(data)
@@ -1325,21 +1322,22 @@ class TemplateGenerator {
1325
1322
  class LLMSFilesGenerator {
1326
1323
  config;
1327
1324
  templateGenerator;
1328
- nhm = new NodeHtmlMarkdown();
1329
- constructor(config) {
1325
+ umbracoData;
1326
+ constructor(config, umbracoData) {
1330
1327
  this.config = config;
1331
- this.templateGenerator = new TemplateGenerator(config);
1328
+ this.umbracoData = umbracoData;
1329
+ this.templateGenerator = new TemplateGenerator(config, umbracoData);
1332
1330
  }
1333
- async generateAllFiles(umbracoData) {
1331
+ async generateAllFiles() {
1334
1332
  const startTime = Date.now();
1335
1333
  console.log("\u{1F680} Starting LLMS files generation...");
1336
- const templates = await this.templateGenerator.generateAllTemplates(umbracoData);
1334
+ const templates = await this.templateGenerator.generateAllTemplates();
1337
1335
  console.log("\u{1F4C4} Generating individual markdown files...");
1338
- const individualMdFiles = this.config.enableIndividualMd ? await this.generateIndividualMarkdownFiles(umbracoData, templates) : void 0;
1336
+ const individualMdFiles = this.config.enableIndividualMd ? await this.generateIndividualMarkdownFiles(templates) : void 0;
1339
1337
  console.log("\u{1F4DD} Generating llms.txt navigation file...");
1340
- const llmsTxt = this.generateLLMSTxt(umbracoData, individualMdFiles || []);
1338
+ const llmsTxt = this.generateLLMSTxt(individualMdFiles || []);
1341
1339
  console.log("\u{1F4DA} Generating llms-full.txt...");
1342
- const llmsFullTxt = this.config.enableLLMSFullTxt ? this.generateLLMSFullTxt(umbracoData, individualMdFiles || []) : void 0;
1340
+ const llmsFullTxt = this.config.enableLLMSFullTxt ? this.generateLLMSFullTxt(individualMdFiles || []) : void 0;
1343
1341
  const files = {
1344
1342
  llmsTxt,
1345
1343
  llmsFullTxt,
@@ -1350,18 +1348,18 @@ class LLMSFilesGenerator {
1350
1348
  console.log(`\u2705 LLMS files generation completed in ${duration}ms`);
1351
1349
  return files;
1352
1350
  }
1353
- async generateIndividualMarkdownFiles(umbracoData, templates) {
1351
+ async generateIndividualMarkdownFiles(templates) {
1354
1352
  const mdFiles = [];
1355
1353
  for (const template of templates) {
1356
1354
  try {
1357
- const urlItem = umbracoData.urlList.find(
1355
+ const urlItem = this.umbracoData.urlList.find(
1358
1356
  (item) => generatePageId(item) === template.pageId
1359
1357
  );
1360
1358
  if (!urlItem) {
1361
1359
  console.warn(`URL item not found for template ${template.pageId}`);
1362
1360
  continue;
1363
1361
  }
1364
- const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
1362
+ const pageContent = extractPageContent(this.umbracoData, urlItem.Jpath);
1365
1363
  if (!pageContent) {
1366
1364
  console.warn(`Page content not found for ${urlItem.url}`);
1367
1365
  continue;
@@ -1385,9 +1383,9 @@ class LLMSFilesGenerator {
1385
1383
  }
1386
1384
  return mdFiles;
1387
1385
  }
1388
- generateLLMSTxt(umbracoData, mdFiles) {
1389
- const siteTitle = this.extractSiteTitle(umbracoData);
1390
- const siteDescription = this.extractSiteDescription(umbracoData);
1386
+ generateLLMSTxt(mdFiles) {
1387
+ const siteTitle = this.extractSiteTitle();
1388
+ const siteDescription = this.extractSiteDescription();
1391
1389
  let content = `# ${siteTitle}
1392
1390
 
1393
1391
  `;
@@ -1399,7 +1397,7 @@ class LLMSFilesGenerator {
1399
1397
  content += `This website contains comprehensive information about ${siteTitle.toLowerCase()}. The content is organized into the following sections:
1400
1398
 
1401
1399
  `;
1402
- const pagesByCategory = this.groupPagesByCategory(umbracoData, mdFiles);
1400
+ const pagesByCategory = this.groupPagesByCategory(mdFiles);
1403
1401
  for (const [category, pages] of Object.entries(pagesByCategory)) {
1404
1402
  if (pages.length === 0)
1405
1403
  continue;
@@ -1407,33 +1405,32 @@ class LLMSFilesGenerator {
1407
1405
 
1408
1406
  `;
1409
1407
  for (const page of pages) {
1410
- const urlItem = umbracoData.urlList.find((item) => item.url === page.url);
1411
- const pageTitle = this.extractPageTitle(umbracoData, urlItem);
1408
+ const urlItem = this.umbracoData.urlList.find((item) => item.url === page.url);
1409
+ const pageTitle = this.extractPageTitle(urlItem);
1412
1410
  const relativeFilePath = this.getLLMSFilePath(page.path);
1413
- content += `- [${pageTitle}](${relativeFilePath}): ${this.generatePageDescription(umbracoData, urlItem)}
1411
+ content += `- [${pageTitle}](${relativeFilePath}): ${this.generatePageDescription(urlItem)}
1414
1412
  `;
1415
1413
  }
1416
1414
  content += "\n";
1417
1415
  }
1418
- const visiblePages = getVisiblePages(umbracoData);
1419
- const hiddenCount = umbracoData.urlList.length - visiblePages.length;
1416
+ const visiblePages = getVisiblePages(this.umbracoData);
1417
+ const hiddenCount = this.umbracoData.urlList.length - visiblePages.length;
1420
1418
  if (hiddenCount > 0) {
1421
1419
  content += `*Note: ${hiddenCount} pages are excluded from this documentation as they are marked as hidden.*
1422
1420
 
1423
1421
  `;
1424
1422
  }
1425
1423
  content += "## Optional\n\n";
1426
- content += "- [Complete Documentation](llms-full.txt): All content combined in a single file\n";
1427
- content += "- [Site Map](sitemap.xml): XML sitemap of all pages\n";
1424
+ content += `- [Complete Documentation](${this.makeUrl("/llms-full.txt")}): All content combined in a single file`;
1428
1425
  const outputPath = join(this.getOutputDir(), "llms.txt");
1429
1426
  return {
1430
1427
  path: outputPath,
1431
1428
  content: content.trim()
1432
1429
  };
1433
1430
  }
1434
- generateLLMSFullTxt(umbracoData, mdFiles) {
1435
- const siteTitle = this.extractSiteTitle(umbracoData);
1436
- const siteDescription = this.extractSiteDescription(umbracoData);
1431
+ generateLLMSFullTxt(mdFiles) {
1432
+ const siteTitle = this.extractSiteTitle();
1433
+ const siteDescription = this.extractSiteDescription();
1437
1434
  let content = `# ${siteTitle} - Complete Documentation
1438
1435
 
1439
1436
  `;
@@ -1444,15 +1441,10 @@ class LLMSFilesGenerator {
1444
1441
  }
1445
1442
  content += "---\n\n";
1446
1443
  for (const mdFile of mdFiles) {
1447
- const urlItem = umbracoData.urlList.find((item) => item.url === mdFile.url);
1444
+ const urlItem = this.umbracoData.urlList.find((item) => item.url === mdFile.url);
1448
1445
  if (!urlItem)
1449
1446
  continue;
1450
- content += `## Page: ${mdFile.url}
1451
-
1452
- `;
1453
- content += `**Template**: ${urlItem.TemplateAlias}
1454
- `;
1455
- content += `**Node ID**: ${urlItem.nodeID}
1447
+ content += `## Page: ${this.makeUrl(mdFile.url)}
1456
1448
 
1457
1449
  `;
1458
1450
  content += mdFile.content;
@@ -1481,111 +1473,127 @@ class LLMSFilesGenerator {
1481
1473
  console.log(`\u{1F4DD} Saved: ${files.individualMdFiles.length} markdown files to llms/ subdirectory`);
1482
1474
  }
1483
1475
  }
1484
- groupPagesByCategory(umbracoData, mdFiles) {
1485
- const categories = {
1486
- main: [],
1487
- blog: [],
1488
- services: [],
1489
- products: [],
1490
- info: [],
1491
- other: []
1492
- };
1476
+ /**
1477
+ * Groups pages by their first-level URL segment.
1478
+ * Example:
1479
+ * /ai-marketplace -> category "ai-marketplace"
1480
+ * /ai-marketplace/asda -> category "ai-marketplace"
1481
+ * /marketplace -> category "marketplace"
1482
+ * / -> category "main"
1483
+ */
1484
+ groupPagesByCategory(mdFiles) {
1485
+ const categories = {};
1493
1486
  for (const mdFile of mdFiles) {
1494
- const urlItem = umbracoData.urlList.find((item) => item.url === mdFile.url);
1487
+ const urlItem = this.umbracoData.urlList.find((item) => item.url === mdFile.url);
1495
1488
  if (!urlItem)
1496
1489
  continue;
1497
1490
  const category = this.categorizeUrlItem(urlItem);
1498
- if (!categories[category]) {
1491
+ if (!categories[category])
1499
1492
  categories[category] = [];
1500
- }
1501
1493
  categories[category].push(mdFile);
1502
1494
  }
1503
1495
  return categories;
1504
1496
  }
1497
+ /**
1498
+ * Determines a logical category name based on the URL structure.
1499
+ * Uses the first path segment as the category.
1500
+ */
1505
1501
  categorizeUrlItem(urlItem) {
1506
- const { url, TemplateAlias } = urlItem;
1507
- const alias = (TemplateAlias || "unknown").toLowerCase();
1508
- url.toLowerCase();
1509
- if (url === "/" || alias.includes("home"))
1502
+ const url = urlItem.url.toLowerCase().trim();
1503
+ if (url === "/" || url === "")
1510
1504
  return "main";
1511
- if (alias.includes("blog") || alias.includes("article") || alias.includes("news"))
1512
- return "blog";
1513
- if (alias.includes("service") || alias.includes("product") || alias.includes("camp"))
1514
- return "services";
1515
- if (alias.includes("about") || alias.includes("contact") || alias.includes("info"))
1516
- return "info";
1517
- return "other";
1518
- }
1519
- extractSiteTitle(umbracoData) {
1520
- const siteData = umbracoData.SiteData;
1505
+ const cleaned = url.replace(/^https?:\/\/[^/]+/, "").replace(/\/+$/, "");
1506
+ const segments = cleaned.split("/").filter(Boolean);
1507
+ if (segments.length === 0)
1508
+ return "main";
1509
+ const firstSegment = segments[0];
1510
+ const ignored = ["media", "assets", "static", "files", "uploads"];
1511
+ if (ignored.includes(firstSegment))
1512
+ return "other";
1513
+ if (firstSegment.length < 2 || /^\d+$/.test(firstSegment))
1514
+ return "other";
1515
+ return firstSegment;
1516
+ }
1517
+ /**
1518
+ * Returns a formatted, human-readable category name for llms.txt output.
1519
+ */
1520
+ formatCategoryName(category) {
1521
+ if (category === "main")
1522
+ return "Main Pages";
1523
+ if (category === "other")
1524
+ return "Other Pages";
1525
+ return category.split("-").map((word) => word.length <= 3 ? word.toUpperCase() : word.charAt(0).toUpperCase() + word.slice(1)).join(" ");
1526
+ }
1527
+ extractSiteTitle() {
1528
+ const siteData = this.umbracoData.SiteData;
1521
1529
  const rawTitle = siteData?.pageTitle || siteData?.mainHeaderBlockTitle || "Website Documentation";
1522
- return this.cleanHtmlContent(rawTitle);
1530
+ return rawTitle;
1523
1531
  }
1524
- extractSiteDescription(umbracoData) {
1525
- const siteData = umbracoData.SiteData;
1532
+ extractSiteDescription() {
1533
+ const siteData = this.umbracoData.SiteData;
1526
1534
  const rawDescription = siteData?.pageDescription || siteData?.ogDescription || null;
1527
- return rawDescription ? this.cleanHtmlContent(rawDescription) : null;
1528
- }
1529
- cleanHtmlContent(content) {
1530
- if (!this.config.enableHtmlToMarkdown) {
1531
- return content;
1532
- }
1533
- if (/<[^>]+>/.test(content)) {
1534
- return this.nhm.translate(content).trim();
1535
- }
1536
- return content;
1535
+ return rawDescription ? rawDescription : null;
1537
1536
  }
1538
- extractPageTitle(umbracoData, urlItem) {
1537
+ extractPageTitle(urlItem) {
1539
1538
  if (!urlItem)
1540
1539
  return "Untitled Page";
1541
- const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
1540
+ const pageContent = extractPageContent(this.umbracoData, urlItem.Jpath);
1542
1541
  if (!pageContent)
1543
1542
  return urlItem.TemplateAlias;
1544
- return pageContent.pageTitle || pageContent.title || pageContent.headerBlockTitle || urlItem.TemplateAlias;
1543
+ return String(
1544
+ pageContent.pageTitle || pageContent.title || pageContent.pageTittle || urlItem.TemplateAlias
1545
+ );
1545
1546
  }
1546
- generatePageDescription(umbracoData, urlItem) {
1547
+ generatePageDescription(urlItem) {
1547
1548
  if (!urlItem)
1548
1549
  return "Page information";
1549
- const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
1550
+ const pageContent = extractPageContent(this.umbracoData, urlItem.Jpath);
1550
1551
  if (!pageContent)
1551
1552
  return `${urlItem.TemplateAlias} page`;
1552
1553
  const desc = pageContent.pageDescription || pageContent.description || pageContent.headerBlockSubtitle;
1553
1554
  if (desc && typeof desc === "string") {
1554
- return desc.length > 100 ? `${desc.substring(0, 97)}...` : desc;
1555
+ return desc;
1555
1556
  }
1556
1557
  return `Information about ${urlItem.url}`;
1557
1558
  }
1558
- formatCategoryName(category) {
1559
- const names = {
1560
- main: "Main Pages",
1561
- blog: "Blog & Articles",
1562
- services: "Services & Products",
1563
- info: "Information Pages",
1564
- other: "Other Pages"
1565
- };
1566
- return names[category] || category.charAt(0).toUpperCase() + category.slice(1);
1567
- }
1568
1559
  sanitizeUrlForFilename(url) {
1569
- let filename = url.replace(/^\//, "").replace(/\/$/, "").replace(/\//g, "-").replace(/[^a-zA-Z0-9\-_.]/g, "").replace(/--+/g, "-").replace(/^-+|-+$/g, "");
1560
+ if (url === "/") {
1561
+ return "index";
1562
+ }
1563
+ let filename = url.replace(/^\//, "").replace(/\/$/, "").replace(/\//g, "-").replace(/--+/g, "-").replace(/^-+|-+$/g, "");
1570
1564
  if (!filename || filename === "") {
1571
- filename = "index";
1565
+ filename = `index_${url.length}_${Date.now()}`;
1572
1566
  }
1573
1567
  if (filename.startsWith("-") || filename.startsWith(".")) {
1574
1568
  filename = "page-" + filename.replace(/^[-.]/, "");
1575
1569
  }
1576
1570
  return filename;
1577
1571
  }
1578
- getRelativeFilePath(fullPath) {
1579
- const filename = fullPath.split("/").pop() || "";
1580
- return filename;
1581
- }
1582
1572
  getLLMSFilePath(fullPath) {
1583
1573
  const filename = basename(fullPath);
1584
- return `/llms/${filename}`;
1574
+ return this.makeUrl(`/llms/${filename}`);
1585
1575
  }
1586
1576
  getOutputDir() {
1587
1577
  return this.config.finalOutputDir || "dist";
1588
1578
  }
1579
+ getBaseSiteUrl() {
1580
+ if (this.config.baseSiteUrl) {
1581
+ return this.config.baseSiteUrl;
1582
+ } else if (this.config.baseSiteUrlUmbracoDataKey) {
1583
+ return this.config.baseSiteUrlUmbracoDataKey in this.umbracoData.SiteData ? String(this.umbracoData.SiteData[this.config.baseSiteUrlUmbracoDataKey]) : "";
1584
+ }
1585
+ return "";
1586
+ }
1587
+ makeUrl(path) {
1588
+ const base = this.getBaseSiteUrl();
1589
+ try {
1590
+ return new URL(path, base).toString();
1591
+ } catch {
1592
+ const baseClean = base?.replace(/\/+$/, "") || "";
1593
+ const pathClean = path?.replace(/^\/+/, "") || "";
1594
+ return baseClean && pathClean ? `${baseClean}/${pathClean}` : baseClean || pathClean;
1595
+ }
1596
+ }
1589
1597
  }
1590
1598
 
1591
1599
  export { LLMSFilesGenerator };
package/dist/module.d.mts CHANGED
@@ -1,19 +1,33 @@
1
1
  import * as _nuxt_schema from '@nuxt/schema';
2
+ import { z } from 'zod';
2
3
 
3
- interface LLMSConfig {
4
- anthropicApiKey: string;
5
- umbracoDataPath: string;
6
- templatesDir: string;
7
- finalOutputDir: string;
8
- anthropicModel?: string;
9
- maxConcurrent?: number;
10
- enableLLMSFullTxt?: boolean;
11
- enableIndividualMd?: boolean;
12
- enableAutoCleanup?: boolean;
13
- cleanupOrphaned?: boolean;
14
- cleanupHidden?: boolean;
15
- enableHtmlToMarkdown?: boolean;
4
+ /**
5
+ * Zod validation schemas for LLMS generator
6
+ */
7
+
8
+ declare const LLMSConfigSchema: z.ZodObject<{
9
+ anthropicApiKey: z.ZodString;
10
+ umbracoDataPath: z.ZodString;
11
+ templatesDir: z.ZodString;
12
+ finalOutputDir: z.ZodDefault<z.ZodOptional<z.ZodString>>;
13
+ anthropicModel: z.ZodOptional<z.ZodString>;
14
+ baseSiteUrl: z.ZodOptional<z.ZodString>;
15
+ baseSiteUrlUmbracoDataKey: z.ZodOptional<z.ZodString>;
16
+ maxConcurrent: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
17
+ enableLLMSFullTxt: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
18
+ enableIndividualMd: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
19
+ enableAutoCleanup: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
20
+ cleanupOrphaned: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
21
+ cleanupHidden: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
22
+ enableHtmlToMarkdown: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
23
+ }, z.core.$strict>;
24
+ type ValidatedLLMSConfig = z.infer<typeof LLMSConfigSchema>;
25
+
26
+ interface LLMSModuleOptions extends Partial<ValidatedLLMSConfig> {
27
+ enabled?: boolean;
16
28
  }
29
+ declare const _default: _nuxt_schema.NuxtModule<LLMSModuleOptions, LLMSModuleOptions, false>;
30
+
17
31
  interface UmbracoUrlItem {
18
32
  nodeID: number;
19
33
  url: string;
@@ -98,11 +112,6 @@ interface AnthropicGenerationResponse {
98
112
  tags?: string[];
99
113
  };
100
114
  }
101
- interface LLMSGeneratorOptions {
102
- config: LLMSConfig;
103
- umbracoData: UmbracoData;
104
- templateCache?: TemplateCache;
105
- }
106
115
  interface PageStructureInfo {
107
116
  keys: string[];
108
117
  excludedKeys: string[];
@@ -114,21 +123,6 @@ interface HashGenerationOptions {
114
123
  excludeKeys?: string[];
115
124
  includeOnlyKeys?: string[];
116
125
  }
117
- interface GenerationStats {
118
- totalPages: number;
119
- templatesGenerated: number;
120
- templatesFromCache: number;
121
- mdFilesGenerated: number;
122
- llmsTxtGenerated: true;
123
- llmsFullTxtGenerated: boolean;
124
- duration: number;
125
- apiCallsUsed: number;
126
- }
127
-
128
- interface LLMSModuleOptions extends Partial<LLMSConfig> {
129
- enabled?: boolean;
130
- }
131
- declare const _default: _nuxt_schema.NuxtModule<LLMSModuleOptions, LLMSModuleOptions, false>;
132
126
 
133
127
  export { _default as default };
134
- export type { AnthropicGenerationRequest, AnthropicGenerationResponse, GeneratedTemplate, GenerationStats, HashGenerationOptions, LLMSConfig, LLMSFiles, LLMSGeneratorOptions, PageContentHash, PageStructureInfo, TemplateCache, UmbracoData, UmbracoPageContent, UmbracoSiteData, UmbracoUrlItem };
128
+ export type { AnthropicGenerationRequest, AnthropicGenerationResponse, GeneratedTemplate, HashGenerationOptions, LLMSFiles, PageContentHash, PageStructureInfo, TemplateCache, UmbracoData, UmbracoPageContent, UmbracoSiteData, UmbracoUrlItem };
package/dist/module.d.ts CHANGED
@@ -1,19 +1,33 @@
1
1
  import * as _nuxt_schema from '@nuxt/schema';
2
+ import { z } from 'zod';
2
3
 
3
- interface LLMSConfig {
4
- anthropicApiKey: string;
5
- umbracoDataPath: string;
6
- templatesDir: string;
7
- finalOutputDir: string;
8
- anthropicModel?: string;
9
- maxConcurrent?: number;
10
- enableLLMSFullTxt?: boolean;
11
- enableIndividualMd?: boolean;
12
- enableAutoCleanup?: boolean;
13
- cleanupOrphaned?: boolean;
14
- cleanupHidden?: boolean;
15
- enableHtmlToMarkdown?: boolean;
4
+ /**
5
+ * Zod validation schemas for LLMS generator
6
+ */
7
+
8
+ declare const LLMSConfigSchema: z.ZodObject<{
9
+ anthropicApiKey: z.ZodString;
10
+ umbracoDataPath: z.ZodString;
11
+ templatesDir: z.ZodString;
12
+ finalOutputDir: z.ZodDefault<z.ZodOptional<z.ZodString>>;
13
+ anthropicModel: z.ZodOptional<z.ZodString>;
14
+ baseSiteUrl: z.ZodOptional<z.ZodString>;
15
+ baseSiteUrlUmbracoDataKey: z.ZodOptional<z.ZodString>;
16
+ maxConcurrent: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
17
+ enableLLMSFullTxt: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
18
+ enableIndividualMd: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
19
+ enableAutoCleanup: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
20
+ cleanupOrphaned: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
21
+ cleanupHidden: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
22
+ enableHtmlToMarkdown: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
23
+ }, z.core.$strict>;
24
+ type ValidatedLLMSConfig = z.infer<typeof LLMSConfigSchema>;
25
+
26
+ interface LLMSModuleOptions extends Partial<ValidatedLLMSConfig> {
27
+ enabled?: boolean;
16
28
  }
29
+ declare const _default: _nuxt_schema.NuxtModule<LLMSModuleOptions, LLMSModuleOptions, false>;
30
+
17
31
  interface UmbracoUrlItem {
18
32
  nodeID: number;
19
33
  url: string;
@@ -98,11 +112,6 @@ interface AnthropicGenerationResponse {
98
112
  tags?: string[];
99
113
  };
100
114
  }
101
- interface LLMSGeneratorOptions {
102
- config: LLMSConfig;
103
- umbracoData: UmbracoData;
104
- templateCache?: TemplateCache;
105
- }
106
115
  interface PageStructureInfo {
107
116
  keys: string[];
108
117
  excludedKeys: string[];
@@ -114,21 +123,6 @@ interface HashGenerationOptions {
114
123
  excludeKeys?: string[];
115
124
  includeOnlyKeys?: string[];
116
125
  }
117
- interface GenerationStats {
118
- totalPages: number;
119
- templatesGenerated: number;
120
- templatesFromCache: number;
121
- mdFilesGenerated: number;
122
- llmsTxtGenerated: true;
123
- llmsFullTxtGenerated: boolean;
124
- duration: number;
125
- apiCallsUsed: number;
126
- }
127
-
128
- interface LLMSModuleOptions extends Partial<LLMSConfig> {
129
- enabled?: boolean;
130
- }
131
- declare const _default: _nuxt_schema.NuxtModule<LLMSModuleOptions, LLMSModuleOptions, false>;
132
126
 
133
127
  export { _default as default };
134
- export type { AnthropicGenerationRequest, AnthropicGenerationResponse, GeneratedTemplate, GenerationStats, HashGenerationOptions, LLMSConfig, LLMSFiles, LLMSGeneratorOptions, PageContentHash, PageStructureInfo, TemplateCache, UmbracoData, UmbracoPageContent, UmbracoSiteData, UmbracoUrlItem };
128
+ export type { AnthropicGenerationRequest, AnthropicGenerationResponse, GeneratedTemplate, HashGenerationOptions, LLMSFiles, PageContentHash, PageStructureInfo, TemplateCache, UmbracoData, UmbracoPageContent, UmbracoSiteData, UmbracoUrlItem };
package/dist/module.json CHANGED
@@ -4,5 +4,5 @@
4
4
  "compatibility": {
5
5
  "nuxt": "^3.0.0"
6
6
  },
7
- "version": "0.1.4"
7
+ "version": "0.1.6"
8
8
  }
package/dist/module.mjs CHANGED
@@ -1,5 +1,6 @@
1
- export { l as default } from './shared/nuxt-llms-generator.ab079b9f.mjs';
1
+ export { l as default } from './shared/nuxt-llms-generator.11eb2a36.mjs';
2
2
  import '@nuxt/kit';
3
3
  import 'fs';
4
4
  import 'path';
5
5
  import 'zod';
6
+ import 'node-html-markdown';
@@ -2,6 +2,7 @@ import { defineNuxtModule, useLogger, addTemplate } from '@nuxt/kit';
2
2
  import { existsSync, readFileSync } from 'fs';
3
3
  import { resolve } from 'path';
4
4
  import { z } from 'zod';
5
+ import { NodeHtmlMarkdown } from 'node-html-markdown';
5
6
 
6
7
  const existingPath = z.string().refine(
7
8
  (path) => existsSync(path)
@@ -13,6 +14,29 @@ const LLMSConfigSchema = z.object({
13
14
  templatesDir: z.string().min(1, "Templates directory is required"),
14
15
  finalOutputDir: z.string().optional().default("public"),
15
16
  anthropicModel: z.string().optional(),
17
+ baseSiteUrl: z.string().optional().refine((url) => {
18
+ try {
19
+ if (!url) {
20
+ return true;
21
+ }
22
+ const parsed = new URL(url);
23
+ if (!["http:", "https:"].includes(parsed.protocol))
24
+ return false;
25
+ if (parsed.pathname !== "" && parsed.pathname !== "/")
26
+ return false;
27
+ if (parsed.pathname === "/") {
28
+ if (url.endsWith("/"))
29
+ return false;
30
+ }
31
+ return !(parsed.search || parsed.hash);
32
+ } catch {
33
+ return false;
34
+ }
35
+ }, "Must be a base domain URL like 'https://example.com' (no path, no trailing slash)").refine(
36
+ (url) => !url || !url.endsWith("/"),
37
+ "Must not end with a trailing slash"
38
+ ).describe("The base URL of the website to append to links in generated llms files"),
39
+ baseSiteUrlUmbracoDataKey: z.string().optional().describe("If the SiteData of UmbracoData has the key with the base URL you can pass here the key to auto extract the base url"),
16
40
  maxConcurrent: z.number().int().min(1, "maxConcurrent must be at least 1").max(10, "maxConcurrent should not exceed 10 to avoid rate limits").optional().default(3),
17
41
  enableLLMSFullTxt: z.boolean().optional().default(true),
18
42
  enableIndividualMd: z.boolean().optional().default(true),
@@ -20,19 +44,17 @@ const LLMSConfigSchema = z.object({
20
44
  cleanupOrphaned: z.boolean().optional().default(true),
21
45
  cleanupHidden: z.boolean().optional().default(true),
22
46
  enableHtmlToMarkdown: z.boolean().optional().default(true)
23
- }).strict();
47
+ }).refine(
48
+ (data) => data.baseSiteUrl || data.baseSiteUrlUmbracoDataKey,
49
+ {
50
+ message: 'At least one of "baseSiteUrl" or "baseSiteUrlUmbracoDataKey" must be provided.',
51
+ path: ["baseSiteUrl"]
52
+ // or omit 'path' to make it a general error
53
+ }
54
+ ).strict();
24
55
  class SchemaValidator {
25
56
  static validateConfig(config) {
26
- try {
27
- return LLMSConfigSchema.parse(config);
28
- } catch (error) {
29
- if (error instanceof z.ZodError) {
30
- const { errors } = z.treeifyError(error);
31
- const message = ["Configuration validation failed:", ...errors].join("\n");
32
- throw new Error(message);
33
- }
34
- throw error;
35
- }
57
+ return LLMSConfigSchema.parse(config);
36
58
  }
37
59
  }
38
60
 
@@ -201,6 +223,33 @@ async function withErrorHandling(operation, context) {
201
223
  }
202
224
  }
203
225
 
226
+ const nhm = new NodeHtmlMarkdown();
227
+ function convertHtmlToMarkdownDeep(input) {
228
+ if (input === null || input === void 0)
229
+ return input;
230
+ if (typeof input === "string") {
231
+ if (/<[a-z][\s\S]*>/i.test(input)) {
232
+ try {
233
+ return nhm.translate(input).trim();
234
+ } catch {
235
+ return input;
236
+ }
237
+ }
238
+ return input;
239
+ }
240
+ if (Array.isArray(input)) {
241
+ return input.map(convertHtmlToMarkdownDeep);
242
+ }
243
+ if (typeof input === "object") {
244
+ const result = {};
245
+ for (const [key, value] of Object.entries(input)) {
246
+ result[key] = convertHtmlToMarkdownDeep(value);
247
+ }
248
+ return result;
249
+ }
250
+ return input;
251
+ }
252
+
204
253
  const DEFAULT_OPTIONS = {
205
254
  anthropicModel: "claude-3-7-sonnet-latest",
206
255
  maxConcurrent: 5,
@@ -255,7 +304,9 @@ const llmsModule = defineNuxtModule({
255
304
  enableAutoCleanup: options.enableAutoCleanup ?? DEFAULT_OPTIONS.enableAutoCleanup,
256
305
  cleanupOrphaned: options.cleanupOrphaned ?? DEFAULT_OPTIONS.cleanupOrphaned,
257
306
  cleanupHidden: options.cleanupHidden ?? DEFAULT_OPTIONS.cleanupHidden,
258
- enableHtmlToMarkdown: options.enableHtmlToMarkdown ?? DEFAULT_OPTIONS.enableHtmlToMarkdown
307
+ enableHtmlToMarkdown: options.enableHtmlToMarkdown ?? DEFAULT_OPTIONS.enableHtmlToMarkdown,
308
+ baseSiteUrl: options.baseSiteUrl,
309
+ baseSiteUrlUmbracoDataKey: options.baseSiteUrlUmbracoDataKey
259
310
  };
260
311
  let moduleOptions;
261
312
  try {
@@ -273,7 +324,10 @@ const llmsModule = defineNuxtModule({
273
324
  }
274
325
  try {
275
326
  const umbracoDataContent = readFileSync(moduleOptions.umbracoDataPath, "utf-8");
276
- const umbracoData = JSON.parse(umbracoDataContent);
327
+ let umbracoData = JSON.parse(umbracoDataContent);
328
+ if (moduleOptions.enableHtmlToMarkdown) {
329
+ umbracoData = convertHtmlToMarkdownDeep(umbracoData);
330
+ }
277
331
  logger.info(`Loaded Umbraco data with ${umbracoData.urlList.length} pages`);
278
332
  nuxt.options.runtimeConfig.llmsGenerator = {
279
333
  enabled: true,
@@ -302,7 +356,7 @@ const llmsModule = defineNuxtModule({
302
356
  async function generateLLMSFiles(config, umbracoData, logger) {
303
357
  try {
304
358
  const { LLMSFilesGenerator } = await import('../chunks/llms-files-generator.mjs');
305
- const generator = new LLMSFilesGenerator(config);
359
+ const generator = new LLMSFilesGenerator(config, umbracoData);
306
360
  logger.info("Testing Anthropic API connection...");
307
361
  const connectionOk = await generator["templateGenerator"].testConnection();
308
362
  if (!connectionOk) {
@@ -310,7 +364,7 @@ async function generateLLMSFiles(config, umbracoData, logger) {
310
364
  return;
311
365
  }
312
366
  logger.success("Anthropic API connection successful");
313
- const files = await generator.generateAllFiles(umbracoData);
367
+ const files = await generator.generateAllFiles();
314
368
  logger.success("Generated LLMS files:");
315
369
  logger.info(`- llms.txt: ${files.llmsTxt.path}`);
316
370
  if (files.llmsFullTxt) {
package/package.json CHANGED
@@ -1,16 +1,10 @@
1
1
  {
2
2
  "name": "@voicenter-team/nuxt-llms-generator",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "description": "Nuxt 3 module for automatically generating AI-optimized documentation files (llms.txt, llms-full.txt, and individual .md files) from Umbraco CMS data using Anthropic's Claude API.",
5
- "repository": "your-org/my-module",
5
+ "repository": "https://github.com/VoicenterTeam/nuxt-llms-generator",
6
6
  "license": "MIT",
7
7
  "type": "module",
8
- "jiti": {
9
- "alias": {
10
- "@": "./src",
11
- "@/*": "./src/*"
12
- }
13
- },
14
8
  "exports": {
15
9
  ".": {
16
10
  "types": "./dist/types.d.ts",