@voicenter-team/nuxt-llms-generator 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunks/llms-files-generator.mjs +129 -121
- package/dist/module.d.mts +28 -34
- package/dist/module.d.ts +28 -34
- package/dist/module.json +1 -1
- package/dist/module.mjs +2 -1
- package/dist/shared/{nuxt-llms-generator.ab079b9f.mjs → nuxt-llms-generator.11eb2a36.mjs} +69 -15
- package/package.json +2 -8
|
@@ -4,10 +4,10 @@ import Mustache from 'mustache';
|
|
|
4
4
|
import Anthropic from '@anthropic-ai/sdk';
|
|
5
5
|
import { createHash } from 'crypto';
|
|
6
6
|
import { JSONPath } from 'jsonpath-plus';
|
|
7
|
-
import { T as TemplateError, E as ErrorCode, w as withErrorHandling } from '../shared/nuxt-llms-generator.
|
|
8
|
-
import { NodeHtmlMarkdown } from 'node-html-markdown';
|
|
7
|
+
import { T as TemplateError, E as ErrorCode, w as withErrorHandling } from '../shared/nuxt-llms-generator.11eb2a36.mjs';
|
|
9
8
|
import '@nuxt/kit';
|
|
10
9
|
import 'zod';
|
|
10
|
+
import 'node-html-markdown';
|
|
11
11
|
|
|
12
12
|
class AnthropicClient {
|
|
13
13
|
client;
|
|
@@ -1128,13 +1128,10 @@ class TemplateGenerator {
|
|
|
1128
1128
|
promptAnalyzer;
|
|
1129
1129
|
cache;
|
|
1130
1130
|
config;
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
void 0,
|
|
1134
|
-
void 0
|
|
1135
|
-
);
|
|
1136
|
-
constructor(config) {
|
|
1131
|
+
umbracoData;
|
|
1132
|
+
constructor(config, umbracoData) {
|
|
1137
1133
|
this.config = config;
|
|
1134
|
+
this.umbracoData = umbracoData;
|
|
1138
1135
|
this.anthropicClient = new AnthropicClient(config);
|
|
1139
1136
|
this.promptAnalyzer = new PromptAnalyzer();
|
|
1140
1137
|
this.cache = new LLMSCache(config.templatesDir || "./.llms-templates");
|
|
@@ -1153,22 +1150,26 @@ class TemplateGenerator {
|
|
|
1153
1150
|
}
|
|
1154
1151
|
return await this.generateTemplateWithAI(pageContent, urlItem);
|
|
1155
1152
|
}
|
|
1156
|
-
async generateAllTemplates(
|
|
1153
|
+
async generateAllTemplates() {
|
|
1157
1154
|
const templates = [];
|
|
1158
1155
|
const maxConcurrent = this.config.maxConcurrent || 5;
|
|
1159
|
-
await performAutomaticCleanup(
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1156
|
+
await performAutomaticCleanup(
|
|
1157
|
+
this.umbracoData,
|
|
1158
|
+
this.config.templatesDir || "./.llms-templates",
|
|
1159
|
+
{
|
|
1160
|
+
enableAutoCleanup: this.config.enableAutoCleanup ?? true,
|
|
1161
|
+
cleanupOrphaned: this.config.cleanupOrphaned ?? true,
|
|
1162
|
+
cleanupHidden: this.config.cleanupHidden ?? true,
|
|
1163
|
+
dryRun: false
|
|
1164
|
+
}
|
|
1165
|
+
);
|
|
1166
|
+
const visibilityStats = getPageVisibilityStats(this.umbracoData);
|
|
1166
1167
|
console.log("\u{1F4CA} Page visibility stats:", visibilityStats);
|
|
1167
|
-
const visiblePages = umbracoData.urlList.filter(
|
|
1168
|
-
(urlItem) => shouldGenerateTemplate(umbracoData, urlItem)
|
|
1168
|
+
const visiblePages = this.umbracoData.urlList.filter(
|
|
1169
|
+
(urlItem) => shouldGenerateTemplate(this.umbracoData, urlItem)
|
|
1169
1170
|
);
|
|
1170
|
-
console.log(`Checking ${visiblePages.length}/${umbracoData.urlList.length} visible pages for cache status...`);
|
|
1171
|
-
const { cached, needGeneration } = this.identifyTemplatesNeeded(
|
|
1171
|
+
console.log(`Checking ${visiblePages.length}/${this.umbracoData.urlList.length} visible pages for cache status...`);
|
|
1172
|
+
const { cached, needGeneration } = this.identifyTemplatesNeeded(visiblePages);
|
|
1172
1173
|
console.log(`\u{1F4C8} Template status: ${cached.length} cached, ${needGeneration.length} need generation`);
|
|
1173
1174
|
templates.push(...cached);
|
|
1174
1175
|
if (needGeneration.length === 0) {
|
|
@@ -1197,11 +1198,11 @@ class TemplateGenerator {
|
|
|
1197
1198
|
console.log(`Generated ${templates.length} total templates (${cached.length} from cache, ${templates.length - cached.length} newly generated)`);
|
|
1198
1199
|
return templates;
|
|
1199
1200
|
}
|
|
1200
|
-
identifyTemplatesNeeded(
|
|
1201
|
+
identifyTemplatesNeeded(visiblePages) {
|
|
1201
1202
|
const cached = [];
|
|
1202
1203
|
const needGeneration = [];
|
|
1203
1204
|
for (const urlItem of visiblePages) {
|
|
1204
|
-
const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
|
|
1205
|
+
const pageContent = extractPageContent(this.umbracoData, urlItem.Jpath);
|
|
1205
1206
|
if (!pageContent) {
|
|
1206
1207
|
console.warn(`No content found for ${urlItem.url} (${urlItem.Jpath})`);
|
|
1207
1208
|
continue;
|
|
@@ -1267,11 +1268,7 @@ class TemplateGenerator {
|
|
|
1267
1268
|
async renderTemplate(template, data) {
|
|
1268
1269
|
return withErrorHandling(async () => {
|
|
1269
1270
|
const validatedTemplate = await templateValidationPipeline.validateAndFix(template);
|
|
1270
|
-
|
|
1271
|
-
if (this.config.enableHtmlToMarkdown) {
|
|
1272
|
-
return this.nhm.translate(renderedContent);
|
|
1273
|
-
}
|
|
1274
|
-
return renderedContent;
|
|
1271
|
+
return Mustache.render(validatedTemplate, data);
|
|
1275
1272
|
}, {
|
|
1276
1273
|
template: template.substring(0, 200) + "...",
|
|
1277
1274
|
dataKeys: Object.keys(data)
|
|
@@ -1325,21 +1322,22 @@ class TemplateGenerator {
|
|
|
1325
1322
|
class LLMSFilesGenerator {
|
|
1326
1323
|
config;
|
|
1327
1324
|
templateGenerator;
|
|
1328
|
-
|
|
1329
|
-
constructor(config) {
|
|
1325
|
+
umbracoData;
|
|
1326
|
+
constructor(config, umbracoData) {
|
|
1330
1327
|
this.config = config;
|
|
1331
|
-
this.
|
|
1328
|
+
this.umbracoData = umbracoData;
|
|
1329
|
+
this.templateGenerator = new TemplateGenerator(config, umbracoData);
|
|
1332
1330
|
}
|
|
1333
|
-
async generateAllFiles(
|
|
1331
|
+
async generateAllFiles() {
|
|
1334
1332
|
const startTime = Date.now();
|
|
1335
1333
|
console.log("\u{1F680} Starting LLMS files generation...");
|
|
1336
|
-
const templates = await this.templateGenerator.generateAllTemplates(
|
|
1334
|
+
const templates = await this.templateGenerator.generateAllTemplates();
|
|
1337
1335
|
console.log("\u{1F4C4} Generating individual markdown files...");
|
|
1338
|
-
const individualMdFiles = this.config.enableIndividualMd ? await this.generateIndividualMarkdownFiles(
|
|
1336
|
+
const individualMdFiles = this.config.enableIndividualMd ? await this.generateIndividualMarkdownFiles(templates) : void 0;
|
|
1339
1337
|
console.log("\u{1F4DD} Generating llms.txt navigation file...");
|
|
1340
|
-
const llmsTxt = this.generateLLMSTxt(
|
|
1338
|
+
const llmsTxt = this.generateLLMSTxt(individualMdFiles || []);
|
|
1341
1339
|
console.log("\u{1F4DA} Generating llms-full.txt...");
|
|
1342
|
-
const llmsFullTxt = this.config.enableLLMSFullTxt ? this.generateLLMSFullTxt(
|
|
1340
|
+
const llmsFullTxt = this.config.enableLLMSFullTxt ? this.generateLLMSFullTxt(individualMdFiles || []) : void 0;
|
|
1343
1341
|
const files = {
|
|
1344
1342
|
llmsTxt,
|
|
1345
1343
|
llmsFullTxt,
|
|
@@ -1350,18 +1348,18 @@ class LLMSFilesGenerator {
|
|
|
1350
1348
|
console.log(`\u2705 LLMS files generation completed in ${duration}ms`);
|
|
1351
1349
|
return files;
|
|
1352
1350
|
}
|
|
1353
|
-
async generateIndividualMarkdownFiles(
|
|
1351
|
+
async generateIndividualMarkdownFiles(templates) {
|
|
1354
1352
|
const mdFiles = [];
|
|
1355
1353
|
for (const template of templates) {
|
|
1356
1354
|
try {
|
|
1357
|
-
const urlItem = umbracoData.urlList.find(
|
|
1355
|
+
const urlItem = this.umbracoData.urlList.find(
|
|
1358
1356
|
(item) => generatePageId(item) === template.pageId
|
|
1359
1357
|
);
|
|
1360
1358
|
if (!urlItem) {
|
|
1361
1359
|
console.warn(`URL item not found for template ${template.pageId}`);
|
|
1362
1360
|
continue;
|
|
1363
1361
|
}
|
|
1364
|
-
const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
|
|
1362
|
+
const pageContent = extractPageContent(this.umbracoData, urlItem.Jpath);
|
|
1365
1363
|
if (!pageContent) {
|
|
1366
1364
|
console.warn(`Page content not found for ${urlItem.url}`);
|
|
1367
1365
|
continue;
|
|
@@ -1385,9 +1383,9 @@ class LLMSFilesGenerator {
|
|
|
1385
1383
|
}
|
|
1386
1384
|
return mdFiles;
|
|
1387
1385
|
}
|
|
1388
|
-
generateLLMSTxt(
|
|
1389
|
-
const siteTitle = this.extractSiteTitle(
|
|
1390
|
-
const siteDescription = this.extractSiteDescription(
|
|
1386
|
+
generateLLMSTxt(mdFiles) {
|
|
1387
|
+
const siteTitle = this.extractSiteTitle();
|
|
1388
|
+
const siteDescription = this.extractSiteDescription();
|
|
1391
1389
|
let content = `# ${siteTitle}
|
|
1392
1390
|
|
|
1393
1391
|
`;
|
|
@@ -1399,7 +1397,7 @@ class LLMSFilesGenerator {
|
|
|
1399
1397
|
content += `This website contains comprehensive information about ${siteTitle.toLowerCase()}. The content is organized into the following sections:
|
|
1400
1398
|
|
|
1401
1399
|
`;
|
|
1402
|
-
const pagesByCategory = this.groupPagesByCategory(
|
|
1400
|
+
const pagesByCategory = this.groupPagesByCategory(mdFiles);
|
|
1403
1401
|
for (const [category, pages] of Object.entries(pagesByCategory)) {
|
|
1404
1402
|
if (pages.length === 0)
|
|
1405
1403
|
continue;
|
|
@@ -1407,33 +1405,32 @@ class LLMSFilesGenerator {
|
|
|
1407
1405
|
|
|
1408
1406
|
`;
|
|
1409
1407
|
for (const page of pages) {
|
|
1410
|
-
const urlItem = umbracoData.urlList.find((item) => item.url === page.url);
|
|
1411
|
-
const pageTitle = this.extractPageTitle(
|
|
1408
|
+
const urlItem = this.umbracoData.urlList.find((item) => item.url === page.url);
|
|
1409
|
+
const pageTitle = this.extractPageTitle(urlItem);
|
|
1412
1410
|
const relativeFilePath = this.getLLMSFilePath(page.path);
|
|
1413
|
-
content += `- [${pageTitle}](${relativeFilePath}): ${this.generatePageDescription(
|
|
1411
|
+
content += `- [${pageTitle}](${relativeFilePath}): ${this.generatePageDescription(urlItem)}
|
|
1414
1412
|
`;
|
|
1415
1413
|
}
|
|
1416
1414
|
content += "\n";
|
|
1417
1415
|
}
|
|
1418
|
-
const visiblePages = getVisiblePages(umbracoData);
|
|
1419
|
-
const hiddenCount = umbracoData.urlList.length - visiblePages.length;
|
|
1416
|
+
const visiblePages = getVisiblePages(this.umbracoData);
|
|
1417
|
+
const hiddenCount = this.umbracoData.urlList.length - visiblePages.length;
|
|
1420
1418
|
if (hiddenCount > 0) {
|
|
1421
1419
|
content += `*Note: ${hiddenCount} pages are excluded from this documentation as they are marked as hidden.*
|
|
1422
1420
|
|
|
1423
1421
|
`;
|
|
1424
1422
|
}
|
|
1425
1423
|
content += "## Optional\n\n";
|
|
1426
|
-
content +=
|
|
1427
|
-
content += "- [Site Map](sitemap.xml): XML sitemap of all pages\n";
|
|
1424
|
+
content += `- [Complete Documentation](${this.makeUrl("/llms-full.txt")}): All content combined in a single file`;
|
|
1428
1425
|
const outputPath = join(this.getOutputDir(), "llms.txt");
|
|
1429
1426
|
return {
|
|
1430
1427
|
path: outputPath,
|
|
1431
1428
|
content: content.trim()
|
|
1432
1429
|
};
|
|
1433
1430
|
}
|
|
1434
|
-
generateLLMSFullTxt(
|
|
1435
|
-
const siteTitle = this.extractSiteTitle(
|
|
1436
|
-
const siteDescription = this.extractSiteDescription(
|
|
1431
|
+
generateLLMSFullTxt(mdFiles) {
|
|
1432
|
+
const siteTitle = this.extractSiteTitle();
|
|
1433
|
+
const siteDescription = this.extractSiteDescription();
|
|
1437
1434
|
let content = `# ${siteTitle} - Complete Documentation
|
|
1438
1435
|
|
|
1439
1436
|
`;
|
|
@@ -1444,15 +1441,10 @@ class LLMSFilesGenerator {
|
|
|
1444
1441
|
}
|
|
1445
1442
|
content += "---\n\n";
|
|
1446
1443
|
for (const mdFile of mdFiles) {
|
|
1447
|
-
const urlItem = umbracoData.urlList.find((item) => item.url === mdFile.url);
|
|
1444
|
+
const urlItem = this.umbracoData.urlList.find((item) => item.url === mdFile.url);
|
|
1448
1445
|
if (!urlItem)
|
|
1449
1446
|
continue;
|
|
1450
|
-
content += `## Page: ${mdFile.url}
|
|
1451
|
-
|
|
1452
|
-
`;
|
|
1453
|
-
content += `**Template**: ${urlItem.TemplateAlias}
|
|
1454
|
-
`;
|
|
1455
|
-
content += `**Node ID**: ${urlItem.nodeID}
|
|
1447
|
+
content += `## Page: ${this.makeUrl(mdFile.url)}
|
|
1456
1448
|
|
|
1457
1449
|
`;
|
|
1458
1450
|
content += mdFile.content;
|
|
@@ -1481,111 +1473,127 @@ class LLMSFilesGenerator {
|
|
|
1481
1473
|
console.log(`\u{1F4DD} Saved: ${files.individualMdFiles.length} markdown files to llms/ subdirectory`);
|
|
1482
1474
|
}
|
|
1483
1475
|
}
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1476
|
+
/**
|
|
1477
|
+
* Groups pages by their first-level URL segment.
|
|
1478
|
+
* Example:
|
|
1479
|
+
* /ai-marketplace -> category "ai-marketplace"
|
|
1480
|
+
* /ai-marketplace/asda -> category "ai-marketplace"
|
|
1481
|
+
* /marketplace -> category "marketplace"
|
|
1482
|
+
* / -> category "main"
|
|
1483
|
+
*/
|
|
1484
|
+
groupPagesByCategory(mdFiles) {
|
|
1485
|
+
const categories = {};
|
|
1493
1486
|
for (const mdFile of mdFiles) {
|
|
1494
|
-
const urlItem = umbracoData.urlList.find((item) => item.url === mdFile.url);
|
|
1487
|
+
const urlItem = this.umbracoData.urlList.find((item) => item.url === mdFile.url);
|
|
1495
1488
|
if (!urlItem)
|
|
1496
1489
|
continue;
|
|
1497
1490
|
const category = this.categorizeUrlItem(urlItem);
|
|
1498
|
-
if (!categories[category])
|
|
1491
|
+
if (!categories[category])
|
|
1499
1492
|
categories[category] = [];
|
|
1500
|
-
}
|
|
1501
1493
|
categories[category].push(mdFile);
|
|
1502
1494
|
}
|
|
1503
1495
|
return categories;
|
|
1504
1496
|
}
|
|
1497
|
+
/**
|
|
1498
|
+
* Determines a logical category name based on the URL structure.
|
|
1499
|
+
* Uses the first path segment as the category.
|
|
1500
|
+
*/
|
|
1505
1501
|
categorizeUrlItem(urlItem) {
|
|
1506
|
-
const
|
|
1507
|
-
|
|
1508
|
-
url.toLowerCase();
|
|
1509
|
-
if (url === "/" || alias.includes("home"))
|
|
1502
|
+
const url = urlItem.url.toLowerCase().trim();
|
|
1503
|
+
if (url === "/" || url === "")
|
|
1510
1504
|
return "main";
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
if (
|
|
1514
|
-
return "
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1505
|
+
const cleaned = url.replace(/^https?:\/\/[^/]+/, "").replace(/\/+$/, "");
|
|
1506
|
+
const segments = cleaned.split("/").filter(Boolean);
|
|
1507
|
+
if (segments.length === 0)
|
|
1508
|
+
return "main";
|
|
1509
|
+
const firstSegment = segments[0];
|
|
1510
|
+
const ignored = ["media", "assets", "static", "files", "uploads"];
|
|
1511
|
+
if (ignored.includes(firstSegment))
|
|
1512
|
+
return "other";
|
|
1513
|
+
if (firstSegment.length < 2 || /^\d+$/.test(firstSegment))
|
|
1514
|
+
return "other";
|
|
1515
|
+
return firstSegment;
|
|
1516
|
+
}
|
|
1517
|
+
/**
|
|
1518
|
+
* Returns a formatted, human-readable category name for llms.txt output.
|
|
1519
|
+
*/
|
|
1520
|
+
formatCategoryName(category) {
|
|
1521
|
+
if (category === "main")
|
|
1522
|
+
return "Main Pages";
|
|
1523
|
+
if (category === "other")
|
|
1524
|
+
return "Other Pages";
|
|
1525
|
+
return category.split("-").map((word) => word.length <= 3 ? word.toUpperCase() : word.charAt(0).toUpperCase() + word.slice(1)).join(" ");
|
|
1526
|
+
}
|
|
1527
|
+
extractSiteTitle() {
|
|
1528
|
+
const siteData = this.umbracoData.SiteData;
|
|
1521
1529
|
const rawTitle = siteData?.pageTitle || siteData?.mainHeaderBlockTitle || "Website Documentation";
|
|
1522
|
-
return
|
|
1530
|
+
return rawTitle;
|
|
1523
1531
|
}
|
|
1524
|
-
extractSiteDescription(
|
|
1525
|
-
const siteData = umbracoData.SiteData;
|
|
1532
|
+
extractSiteDescription() {
|
|
1533
|
+
const siteData = this.umbracoData.SiteData;
|
|
1526
1534
|
const rawDescription = siteData?.pageDescription || siteData?.ogDescription || null;
|
|
1527
|
-
return rawDescription ?
|
|
1528
|
-
}
|
|
1529
|
-
cleanHtmlContent(content) {
|
|
1530
|
-
if (!this.config.enableHtmlToMarkdown) {
|
|
1531
|
-
return content;
|
|
1532
|
-
}
|
|
1533
|
-
if (/<[^>]+>/.test(content)) {
|
|
1534
|
-
return this.nhm.translate(content).trim();
|
|
1535
|
-
}
|
|
1536
|
-
return content;
|
|
1535
|
+
return rawDescription ? rawDescription : null;
|
|
1537
1536
|
}
|
|
1538
|
-
extractPageTitle(
|
|
1537
|
+
extractPageTitle(urlItem) {
|
|
1539
1538
|
if (!urlItem)
|
|
1540
1539
|
return "Untitled Page";
|
|
1541
|
-
const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
|
|
1540
|
+
const pageContent = extractPageContent(this.umbracoData, urlItem.Jpath);
|
|
1542
1541
|
if (!pageContent)
|
|
1543
1542
|
return urlItem.TemplateAlias;
|
|
1544
|
-
return
|
|
1543
|
+
return String(
|
|
1544
|
+
pageContent.pageTitle || pageContent.title || pageContent.pageTittle || urlItem.TemplateAlias
|
|
1545
|
+
);
|
|
1545
1546
|
}
|
|
1546
|
-
generatePageDescription(
|
|
1547
|
+
generatePageDescription(urlItem) {
|
|
1547
1548
|
if (!urlItem)
|
|
1548
1549
|
return "Page information";
|
|
1549
|
-
const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
|
|
1550
|
+
const pageContent = extractPageContent(this.umbracoData, urlItem.Jpath);
|
|
1550
1551
|
if (!pageContent)
|
|
1551
1552
|
return `${urlItem.TemplateAlias} page`;
|
|
1552
1553
|
const desc = pageContent.pageDescription || pageContent.description || pageContent.headerBlockSubtitle;
|
|
1553
1554
|
if (desc && typeof desc === "string") {
|
|
1554
|
-
return desc
|
|
1555
|
+
return desc;
|
|
1555
1556
|
}
|
|
1556
1557
|
return `Information about ${urlItem.url}`;
|
|
1557
1558
|
}
|
|
1558
|
-
formatCategoryName(category) {
|
|
1559
|
-
const names = {
|
|
1560
|
-
main: "Main Pages",
|
|
1561
|
-
blog: "Blog & Articles",
|
|
1562
|
-
services: "Services & Products",
|
|
1563
|
-
info: "Information Pages",
|
|
1564
|
-
other: "Other Pages"
|
|
1565
|
-
};
|
|
1566
|
-
return names[category] || category.charAt(0).toUpperCase() + category.slice(1);
|
|
1567
|
-
}
|
|
1568
1559
|
sanitizeUrlForFilename(url) {
|
|
1569
|
-
|
|
1560
|
+
if (url === "/") {
|
|
1561
|
+
return "index";
|
|
1562
|
+
}
|
|
1563
|
+
let filename = url.replace(/^\//, "").replace(/\/$/, "").replace(/\//g, "-").replace(/--+/g, "-").replace(/^-+|-+$/g, "");
|
|
1570
1564
|
if (!filename || filename === "") {
|
|
1571
|
-
filename =
|
|
1565
|
+
filename = `index_${url.length}_${Date.now()}`;
|
|
1572
1566
|
}
|
|
1573
1567
|
if (filename.startsWith("-") || filename.startsWith(".")) {
|
|
1574
1568
|
filename = "page-" + filename.replace(/^[-.]/, "");
|
|
1575
1569
|
}
|
|
1576
1570
|
return filename;
|
|
1577
1571
|
}
|
|
1578
|
-
getRelativeFilePath(fullPath) {
|
|
1579
|
-
const filename = fullPath.split("/").pop() || "";
|
|
1580
|
-
return filename;
|
|
1581
|
-
}
|
|
1582
1572
|
getLLMSFilePath(fullPath) {
|
|
1583
1573
|
const filename = basename(fullPath);
|
|
1584
|
-
return `/llms/${filename}
|
|
1574
|
+
return this.makeUrl(`/llms/${filename}`);
|
|
1585
1575
|
}
|
|
1586
1576
|
getOutputDir() {
|
|
1587
1577
|
return this.config.finalOutputDir || "dist";
|
|
1588
1578
|
}
|
|
1579
|
+
getBaseSiteUrl() {
|
|
1580
|
+
if (this.config.baseSiteUrl) {
|
|
1581
|
+
return this.config.baseSiteUrl;
|
|
1582
|
+
} else if (this.config.baseSiteUrlUmbracoDataKey) {
|
|
1583
|
+
return this.config.baseSiteUrlUmbracoDataKey in this.umbracoData.SiteData ? String(this.umbracoData.SiteData[this.config.baseSiteUrlUmbracoDataKey]) : "";
|
|
1584
|
+
}
|
|
1585
|
+
return "";
|
|
1586
|
+
}
|
|
1587
|
+
makeUrl(path) {
|
|
1588
|
+
const base = this.getBaseSiteUrl();
|
|
1589
|
+
try {
|
|
1590
|
+
return new URL(path, base).toString();
|
|
1591
|
+
} catch {
|
|
1592
|
+
const baseClean = base?.replace(/\/+$/, "") || "";
|
|
1593
|
+
const pathClean = path?.replace(/^\/+/, "") || "";
|
|
1594
|
+
return baseClean && pathClean ? `${baseClean}/${pathClean}` : baseClean || pathClean;
|
|
1595
|
+
}
|
|
1596
|
+
}
|
|
1589
1597
|
}
|
|
1590
1598
|
|
|
1591
1599
|
export { LLMSFilesGenerator };
|
package/dist/module.d.mts
CHANGED
|
@@ -1,19 +1,33 @@
|
|
|
1
1
|
import * as _nuxt_schema from '@nuxt/schema';
|
|
2
|
+
import { z } from 'zod';
|
|
2
3
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
4
|
+
/**
|
|
5
|
+
* Zod validation schemas for LLMS generator
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
declare const LLMSConfigSchema: z.ZodObject<{
|
|
9
|
+
anthropicApiKey: z.ZodString;
|
|
10
|
+
umbracoDataPath: z.ZodString;
|
|
11
|
+
templatesDir: z.ZodString;
|
|
12
|
+
finalOutputDir: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
13
|
+
anthropicModel: z.ZodOptional<z.ZodString>;
|
|
14
|
+
baseSiteUrl: z.ZodOptional<z.ZodString>;
|
|
15
|
+
baseSiteUrlUmbracoDataKey: z.ZodOptional<z.ZodString>;
|
|
16
|
+
maxConcurrent: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
17
|
+
enableLLMSFullTxt: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
18
|
+
enableIndividualMd: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
19
|
+
enableAutoCleanup: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
20
|
+
cleanupOrphaned: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
21
|
+
cleanupHidden: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
22
|
+
enableHtmlToMarkdown: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
23
|
+
}, z.core.$strict>;
|
|
24
|
+
type ValidatedLLMSConfig = z.infer<typeof LLMSConfigSchema>;
|
|
25
|
+
|
|
26
|
+
interface LLMSModuleOptions extends Partial<ValidatedLLMSConfig> {
|
|
27
|
+
enabled?: boolean;
|
|
16
28
|
}
|
|
29
|
+
declare const _default: _nuxt_schema.NuxtModule<LLMSModuleOptions, LLMSModuleOptions, false>;
|
|
30
|
+
|
|
17
31
|
interface UmbracoUrlItem {
|
|
18
32
|
nodeID: number;
|
|
19
33
|
url: string;
|
|
@@ -98,11 +112,6 @@ interface AnthropicGenerationResponse {
|
|
|
98
112
|
tags?: string[];
|
|
99
113
|
};
|
|
100
114
|
}
|
|
101
|
-
interface LLMSGeneratorOptions {
|
|
102
|
-
config: LLMSConfig;
|
|
103
|
-
umbracoData: UmbracoData;
|
|
104
|
-
templateCache?: TemplateCache;
|
|
105
|
-
}
|
|
106
115
|
interface PageStructureInfo {
|
|
107
116
|
keys: string[];
|
|
108
117
|
excludedKeys: string[];
|
|
@@ -114,21 +123,6 @@ interface HashGenerationOptions {
|
|
|
114
123
|
excludeKeys?: string[];
|
|
115
124
|
includeOnlyKeys?: string[];
|
|
116
125
|
}
|
|
117
|
-
interface GenerationStats {
|
|
118
|
-
totalPages: number;
|
|
119
|
-
templatesGenerated: number;
|
|
120
|
-
templatesFromCache: number;
|
|
121
|
-
mdFilesGenerated: number;
|
|
122
|
-
llmsTxtGenerated: true;
|
|
123
|
-
llmsFullTxtGenerated: boolean;
|
|
124
|
-
duration: number;
|
|
125
|
-
apiCallsUsed: number;
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
interface LLMSModuleOptions extends Partial<LLMSConfig> {
|
|
129
|
-
enabled?: boolean;
|
|
130
|
-
}
|
|
131
|
-
declare const _default: _nuxt_schema.NuxtModule<LLMSModuleOptions, LLMSModuleOptions, false>;
|
|
132
126
|
|
|
133
127
|
export { _default as default };
|
|
134
|
-
export type { AnthropicGenerationRequest, AnthropicGenerationResponse, GeneratedTemplate,
|
|
128
|
+
export type { AnthropicGenerationRequest, AnthropicGenerationResponse, GeneratedTemplate, HashGenerationOptions, LLMSFiles, PageContentHash, PageStructureInfo, TemplateCache, UmbracoData, UmbracoPageContent, UmbracoSiteData, UmbracoUrlItem };
|
package/dist/module.d.ts
CHANGED
|
@@ -1,19 +1,33 @@
|
|
|
1
1
|
import * as _nuxt_schema from '@nuxt/schema';
|
|
2
|
+
import { z } from 'zod';
|
|
2
3
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
4
|
+
/**
|
|
5
|
+
* Zod validation schemas for LLMS generator
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
declare const LLMSConfigSchema: z.ZodObject<{
|
|
9
|
+
anthropicApiKey: z.ZodString;
|
|
10
|
+
umbracoDataPath: z.ZodString;
|
|
11
|
+
templatesDir: z.ZodString;
|
|
12
|
+
finalOutputDir: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
13
|
+
anthropicModel: z.ZodOptional<z.ZodString>;
|
|
14
|
+
baseSiteUrl: z.ZodOptional<z.ZodString>;
|
|
15
|
+
baseSiteUrlUmbracoDataKey: z.ZodOptional<z.ZodString>;
|
|
16
|
+
maxConcurrent: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
17
|
+
enableLLMSFullTxt: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
18
|
+
enableIndividualMd: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
19
|
+
enableAutoCleanup: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
20
|
+
cleanupOrphaned: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
21
|
+
cleanupHidden: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
22
|
+
enableHtmlToMarkdown: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
23
|
+
}, z.core.$strict>;
|
|
24
|
+
type ValidatedLLMSConfig = z.infer<typeof LLMSConfigSchema>;
|
|
25
|
+
|
|
26
|
+
interface LLMSModuleOptions extends Partial<ValidatedLLMSConfig> {
|
|
27
|
+
enabled?: boolean;
|
|
16
28
|
}
|
|
29
|
+
declare const _default: _nuxt_schema.NuxtModule<LLMSModuleOptions, LLMSModuleOptions, false>;
|
|
30
|
+
|
|
17
31
|
interface UmbracoUrlItem {
|
|
18
32
|
nodeID: number;
|
|
19
33
|
url: string;
|
|
@@ -98,11 +112,6 @@ interface AnthropicGenerationResponse {
|
|
|
98
112
|
tags?: string[];
|
|
99
113
|
};
|
|
100
114
|
}
|
|
101
|
-
interface LLMSGeneratorOptions {
|
|
102
|
-
config: LLMSConfig;
|
|
103
|
-
umbracoData: UmbracoData;
|
|
104
|
-
templateCache?: TemplateCache;
|
|
105
|
-
}
|
|
106
115
|
interface PageStructureInfo {
|
|
107
116
|
keys: string[];
|
|
108
117
|
excludedKeys: string[];
|
|
@@ -114,21 +123,6 @@ interface HashGenerationOptions {
|
|
|
114
123
|
excludeKeys?: string[];
|
|
115
124
|
includeOnlyKeys?: string[];
|
|
116
125
|
}
|
|
117
|
-
interface GenerationStats {
|
|
118
|
-
totalPages: number;
|
|
119
|
-
templatesGenerated: number;
|
|
120
|
-
templatesFromCache: number;
|
|
121
|
-
mdFilesGenerated: number;
|
|
122
|
-
llmsTxtGenerated: true;
|
|
123
|
-
llmsFullTxtGenerated: boolean;
|
|
124
|
-
duration: number;
|
|
125
|
-
apiCallsUsed: number;
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
interface LLMSModuleOptions extends Partial<LLMSConfig> {
|
|
129
|
-
enabled?: boolean;
|
|
130
|
-
}
|
|
131
|
-
declare const _default: _nuxt_schema.NuxtModule<LLMSModuleOptions, LLMSModuleOptions, false>;
|
|
132
126
|
|
|
133
127
|
export { _default as default };
|
|
134
|
-
export type { AnthropicGenerationRequest, AnthropicGenerationResponse, GeneratedTemplate,
|
|
128
|
+
export type { AnthropicGenerationRequest, AnthropicGenerationResponse, GeneratedTemplate, HashGenerationOptions, LLMSFiles, PageContentHash, PageStructureInfo, TemplateCache, UmbracoData, UmbracoPageContent, UmbracoSiteData, UmbracoUrlItem };
|
package/dist/module.json
CHANGED
package/dist/module.mjs
CHANGED
|
@@ -2,6 +2,7 @@ import { defineNuxtModule, useLogger, addTemplate } from '@nuxt/kit';
|
|
|
2
2
|
import { existsSync, readFileSync } from 'fs';
|
|
3
3
|
import { resolve } from 'path';
|
|
4
4
|
import { z } from 'zod';
|
|
5
|
+
import { NodeHtmlMarkdown } from 'node-html-markdown';
|
|
5
6
|
|
|
6
7
|
const existingPath = z.string().refine(
|
|
7
8
|
(path) => existsSync(path)
|
|
@@ -13,6 +14,29 @@ const LLMSConfigSchema = z.object({
|
|
|
13
14
|
templatesDir: z.string().min(1, "Templates directory is required"),
|
|
14
15
|
finalOutputDir: z.string().optional().default("public"),
|
|
15
16
|
anthropicModel: z.string().optional(),
|
|
17
|
+
baseSiteUrl: z.string().optional().refine((url) => {
|
|
18
|
+
try {
|
|
19
|
+
if (!url) {
|
|
20
|
+
return true;
|
|
21
|
+
}
|
|
22
|
+
const parsed = new URL(url);
|
|
23
|
+
if (!["http:", "https:"].includes(parsed.protocol))
|
|
24
|
+
return false;
|
|
25
|
+
if (parsed.pathname !== "" && parsed.pathname !== "/")
|
|
26
|
+
return false;
|
|
27
|
+
if (parsed.pathname === "/") {
|
|
28
|
+
if (url.endsWith("/"))
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
return !(parsed.search || parsed.hash);
|
|
32
|
+
} catch {
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
}, "Must be a base domain URL like 'https://example.com' (no path, no trailing slash)").refine(
|
|
36
|
+
(url) => !url || !url.endsWith("/"),
|
|
37
|
+
"Must not end with a trailing slash"
|
|
38
|
+
).describe("The base URL of the website to append to links in generated llms files"),
|
|
39
|
+
baseSiteUrlUmbracoDataKey: z.string().optional().describe("If the SiteData of UmbracoData has the key with the base URL you can pass here the key to auto extract the base url"),
|
|
16
40
|
maxConcurrent: z.number().int().min(1, "maxConcurrent must be at least 1").max(10, "maxConcurrent should not exceed 10 to avoid rate limits").optional().default(3),
|
|
17
41
|
enableLLMSFullTxt: z.boolean().optional().default(true),
|
|
18
42
|
enableIndividualMd: z.boolean().optional().default(true),
|
|
@@ -20,19 +44,17 @@ const LLMSConfigSchema = z.object({
|
|
|
20
44
|
cleanupOrphaned: z.boolean().optional().default(true),
|
|
21
45
|
cleanupHidden: z.boolean().optional().default(true),
|
|
22
46
|
enableHtmlToMarkdown: z.boolean().optional().default(true)
|
|
23
|
-
}).
|
|
47
|
+
}).refine(
|
|
48
|
+
(data) => data.baseSiteUrl || data.baseSiteUrlUmbracoDataKey,
|
|
49
|
+
{
|
|
50
|
+
message: 'At least one of "baseSiteUrl" or "baseSiteUrlUmbracoDataKey" must be provided.',
|
|
51
|
+
path: ["baseSiteUrl"]
|
|
52
|
+
// or omit 'path' to make it a general error
|
|
53
|
+
}
|
|
54
|
+
).strict();
|
|
24
55
|
class SchemaValidator {
|
|
25
56
|
static validateConfig(config) {
|
|
26
|
-
|
|
27
|
-
return LLMSConfigSchema.parse(config);
|
|
28
|
-
} catch (error) {
|
|
29
|
-
if (error instanceof z.ZodError) {
|
|
30
|
-
const { errors } = z.treeifyError(error);
|
|
31
|
-
const message = ["Configuration validation failed:", ...errors].join("\n");
|
|
32
|
-
throw new Error(message);
|
|
33
|
-
}
|
|
34
|
-
throw error;
|
|
35
|
-
}
|
|
57
|
+
return LLMSConfigSchema.parse(config);
|
|
36
58
|
}
|
|
37
59
|
}
|
|
38
60
|
|
|
@@ -201,6 +223,33 @@ async function withErrorHandling(operation, context) {
|
|
|
201
223
|
}
|
|
202
224
|
}
|
|
203
225
|
|
|
226
|
+
const nhm = new NodeHtmlMarkdown();
|
|
227
|
+
function convertHtmlToMarkdownDeep(input) {
|
|
228
|
+
if (input === null || input === void 0)
|
|
229
|
+
return input;
|
|
230
|
+
if (typeof input === "string") {
|
|
231
|
+
if (/<[a-z][\s\S]*>/i.test(input)) {
|
|
232
|
+
try {
|
|
233
|
+
return nhm.translate(input).trim();
|
|
234
|
+
} catch {
|
|
235
|
+
return input;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
return input;
|
|
239
|
+
}
|
|
240
|
+
if (Array.isArray(input)) {
|
|
241
|
+
return input.map(convertHtmlToMarkdownDeep);
|
|
242
|
+
}
|
|
243
|
+
if (typeof input === "object") {
|
|
244
|
+
const result = {};
|
|
245
|
+
for (const [key, value] of Object.entries(input)) {
|
|
246
|
+
result[key] = convertHtmlToMarkdownDeep(value);
|
|
247
|
+
}
|
|
248
|
+
return result;
|
|
249
|
+
}
|
|
250
|
+
return input;
|
|
251
|
+
}
|
|
252
|
+
|
|
204
253
|
const DEFAULT_OPTIONS = {
|
|
205
254
|
anthropicModel: "claude-3-7-sonnet-latest",
|
|
206
255
|
maxConcurrent: 5,
|
|
@@ -255,7 +304,9 @@ const llmsModule = defineNuxtModule({
|
|
|
255
304
|
enableAutoCleanup: options.enableAutoCleanup ?? DEFAULT_OPTIONS.enableAutoCleanup,
|
|
256
305
|
cleanupOrphaned: options.cleanupOrphaned ?? DEFAULT_OPTIONS.cleanupOrphaned,
|
|
257
306
|
cleanupHidden: options.cleanupHidden ?? DEFAULT_OPTIONS.cleanupHidden,
|
|
258
|
-
enableHtmlToMarkdown: options.enableHtmlToMarkdown ?? DEFAULT_OPTIONS.enableHtmlToMarkdown
|
|
307
|
+
enableHtmlToMarkdown: options.enableHtmlToMarkdown ?? DEFAULT_OPTIONS.enableHtmlToMarkdown,
|
|
308
|
+
baseSiteUrl: options.baseSiteUrl,
|
|
309
|
+
baseSiteUrlUmbracoDataKey: options.baseSiteUrlUmbracoDataKey
|
|
259
310
|
};
|
|
260
311
|
let moduleOptions;
|
|
261
312
|
try {
|
|
@@ -273,7 +324,10 @@ const llmsModule = defineNuxtModule({
|
|
|
273
324
|
}
|
|
274
325
|
try {
|
|
275
326
|
const umbracoDataContent = readFileSync(moduleOptions.umbracoDataPath, "utf-8");
|
|
276
|
-
|
|
327
|
+
let umbracoData = JSON.parse(umbracoDataContent);
|
|
328
|
+
if (moduleOptions.enableHtmlToMarkdown) {
|
|
329
|
+
umbracoData = convertHtmlToMarkdownDeep(umbracoData);
|
|
330
|
+
}
|
|
277
331
|
logger.info(`Loaded Umbraco data with ${umbracoData.urlList.length} pages`);
|
|
278
332
|
nuxt.options.runtimeConfig.llmsGenerator = {
|
|
279
333
|
enabled: true,
|
|
@@ -302,7 +356,7 @@ const llmsModule = defineNuxtModule({
|
|
|
302
356
|
async function generateLLMSFiles(config, umbracoData, logger) {
|
|
303
357
|
try {
|
|
304
358
|
const { LLMSFilesGenerator } = await import('../chunks/llms-files-generator.mjs');
|
|
305
|
-
const generator = new LLMSFilesGenerator(config);
|
|
359
|
+
const generator = new LLMSFilesGenerator(config, umbracoData);
|
|
306
360
|
logger.info("Testing Anthropic API connection...");
|
|
307
361
|
const connectionOk = await generator["templateGenerator"].testConnection();
|
|
308
362
|
if (!connectionOk) {
|
|
@@ -310,7 +364,7 @@ async function generateLLMSFiles(config, umbracoData, logger) {
|
|
|
310
364
|
return;
|
|
311
365
|
}
|
|
312
366
|
logger.success("Anthropic API connection successful");
|
|
313
|
-
const files = await generator.generateAllFiles(
|
|
367
|
+
const files = await generator.generateAllFiles();
|
|
314
368
|
logger.success("Generated LLMS files:");
|
|
315
369
|
logger.info(`- llms.txt: ${files.llmsTxt.path}`);
|
|
316
370
|
if (files.llmsFullTxt) {
|
package/package.json
CHANGED
|
@@ -1,16 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@voicenter-team/nuxt-llms-generator",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6",
|
|
4
4
|
"description": "Nuxt 3 module for automatically generating AI-optimized documentation files (llms.txt, llms-full.txt, and individual .md files) from Umbraco CMS data using Anthropic's Claude API.",
|
|
5
|
-
"repository": "
|
|
5
|
+
"repository": "https://github.com/VoicenterTeam/nuxt-llms-generator",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"type": "module",
|
|
8
|
-
"jiti": {
|
|
9
|
-
"alias": {
|
|
10
|
-
"@": "./src",
|
|
11
|
-
"@/*": "./src/*"
|
|
12
|
-
}
|
|
13
|
-
},
|
|
14
8
|
"exports": {
|
|
15
9
|
".": {
|
|
16
10
|
"types": "./dist/types.d.ts",
|