@voicenter-team/nuxt-llms-generator 0.1.12 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunks/llms-files-generator.mjs +65 -10
- package/dist/module.json +1 -1
- package/package.json +1 -1
|
@@ -269,6 +269,17 @@ users[2]{id,name,role}:
|
|
|
269
269
|
|
|
270
270
|
**CRITICAL:** Use the EXACT property names shown in TOON \`{braces}\` for your Mustache bindings.
|
|
271
271
|
|
|
272
|
+
---
|
|
273
|
+
## \u{1F6AB} Content Exclusion Rules
|
|
274
|
+
When generating the template, **DO NOT** create sections for these types of data even if they appear in TOON:
|
|
275
|
+
- **Image/media properties**: URLs to images, avatars, thumbnails, icons, or any media files
|
|
276
|
+
- **UI-only labels**: Search placeholders, filter menu text, "Show More"/"Show Less", pagination labels
|
|
277
|
+
- **SEO/meta fields**: ogTitle, ogDescription, ogImage, canonical URLs, changefreq, priority, sitemap fields
|
|
278
|
+
- **Legal boilerplate**: Copyright text, "All rights reserved"
|
|
279
|
+
- **Navigation chrome**: Breadcrumbs, menu items, footer links \u2014 unless they ARE the primary page content
|
|
280
|
+
- **System identifiers**: Internal IDs, GUIDs, sort orders, node paths, template aliases
|
|
281
|
+
- **Empty/null values**: Skip any property that holds no meaningful value
|
|
282
|
+
**Focus ONLY on**: titles, descriptions, features, pricing, reviews, specifications, contact info, and other business-relevant content.
|
|
272
283
|
---
|
|
273
284
|
|
|
274
285
|
## \u{1F3AF} TRUE PURPOSE: Help LLMs Answer Questions Efficiently
|
|
@@ -980,6 +991,11 @@ function getValueType(value) {
|
|
|
980
991
|
return typeof value;
|
|
981
992
|
}
|
|
982
993
|
|
|
994
|
+
const PLACEHOLDER_PATTERNS = [
|
|
995
|
+
"lorem ipsum",
|
|
996
|
+
"dolor sit amet",
|
|
997
|
+
"consectetuer adipi"
|
|
998
|
+
];
|
|
983
999
|
function shouldGenerateTemplate(umbracoData, urlItem) {
|
|
984
1000
|
try {
|
|
985
1001
|
const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
|
|
@@ -992,6 +1008,16 @@ function shouldGenerateTemplate(umbracoData, urlItem) {
|
|
|
992
1008
|
console.log(`Page ${urlItem.url} is hidden (hidePage: ${hidePage}), skipping template generation`);
|
|
993
1009
|
return false;
|
|
994
1010
|
}
|
|
1011
|
+
const title = pageContent.pageTitle ?? pageContent.pageTittle ?? pageContent.ogTitle ?? pageContent.headerBlockTitle;
|
|
1012
|
+
if (!title || title === "undefined" || title === "null") {
|
|
1013
|
+
console.log(`Page ${urlItem.url} has no valid title, skipping template generation`);
|
|
1014
|
+
return false;
|
|
1015
|
+
}
|
|
1016
|
+
const bodyText = JSON.stringify(pageContent).toLowerCase();
|
|
1017
|
+
if (PLACEHOLDER_PATTERNS.some((p) => bodyText.includes(p))) {
|
|
1018
|
+
console.log(`Page ${urlItem.url} contains placeholder text, skipping template generation`);
|
|
1019
|
+
return false;
|
|
1020
|
+
}
|
|
995
1021
|
return true;
|
|
996
1022
|
} catch (error) {
|
|
997
1023
|
console.error(`Error checking visibility for ${urlItem.url}:`, error);
|
|
@@ -1207,6 +1233,19 @@ async function performAutomaticCleanup(umbracoData, cacheDir, options = {}) {
|
|
|
1207
1233
|
return stats;
|
|
1208
1234
|
}
|
|
1209
1235
|
|
|
1236
|
+
function sanitizeRenderedMarkdown(markdown) {
|
|
1237
|
+
let output = markdown;
|
|
1238
|
+
output = output.replace(/!\[.*?]\(.*?\)/g, "");
|
|
1239
|
+
output = output.replace(/^(#{1,6})\s+\d+,\s*/gm, "$1 ");
|
|
1240
|
+
output = output.replace(///g, "/").replace(/'/g, "'").replace(/'/g, "'").replace(/"/g, '"').replace(/&/g, "&").replace(/=/g, "=").replace(/`/g, "`").replace(/</g, "<").replace(/>/g, ">");
|
|
1241
|
+
output = output.replace(/^- .+?:\s*$/gm, "");
|
|
1242
|
+
output = output.replace(/\[הרחבה]\([^)]*\)/g, "");
|
|
1243
|
+
output = output.replace(/(?<!:)\/{2,}/g, "/");
|
|
1244
|
+
output = output.replace(/^(#{2,6})\s+.+\n(\s*\n)+(?=#{1,6}\s|$)/gm, "");
|
|
1245
|
+
output = output.replace(/\n{3,}/g, "\n\n");
|
|
1246
|
+
return output.trim();
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1210
1249
|
class TemplateGenerator {
|
|
1211
1250
|
anthropicClient;
|
|
1212
1251
|
promptAnalyzer;
|
|
@@ -1320,7 +1359,6 @@ class TemplateGenerator {
|
|
|
1320
1359
|
if (tokensBeforeTruncation > tokensAfterTruncation) {
|
|
1321
1360
|
console.warn(`Page ${pageId} content truncated: ${tokensBeforeTruncation} -> ${tokensAfterTruncation} tokens`);
|
|
1322
1361
|
}
|
|
1323
|
-
this.promptAnalyzer.analyzeContent(truncatedContent, urlItem);
|
|
1324
1362
|
const request = {
|
|
1325
1363
|
pageContent: truncatedContent,
|
|
1326
1364
|
templateAlias: urlItem.TemplateAlias,
|
|
@@ -1351,7 +1389,14 @@ class TemplateGenerator {
|
|
|
1351
1389
|
}
|
|
1352
1390
|
async renderTemplate(template, data) {
|
|
1353
1391
|
return withErrorHandling(async () => {
|
|
1354
|
-
|
|
1392
|
+
const originalEscape = Mustache.escape;
|
|
1393
|
+
Mustache.escape = (text) => text;
|
|
1394
|
+
try {
|
|
1395
|
+
const rendered = Mustache.render(template, data);
|
|
1396
|
+
return sanitizeRenderedMarkdown(rendered);
|
|
1397
|
+
} finally {
|
|
1398
|
+
Mustache.escape = originalEscape;
|
|
1399
|
+
}
|
|
1355
1400
|
}, {
|
|
1356
1401
|
template: template.substring(0, 200) + "...",
|
|
1357
1402
|
dataKeys: Object.keys(data)
|
|
@@ -1480,7 +1525,8 @@ class LLMSFilesGenerator {
|
|
|
1480
1525
|
content += `This website contains comprehensive information about ${siteTitle.toLowerCase()}. The content is organized into the following sections:
|
|
1481
1526
|
|
|
1482
1527
|
`;
|
|
1483
|
-
const
|
|
1528
|
+
const deduplicatedFiles = this.deduplicateByUrl(mdFiles);
|
|
1529
|
+
const pagesByCategory = this.groupPagesByCategory(deduplicatedFiles);
|
|
1484
1530
|
for (const [category, pages] of Object.entries(pagesByCategory)) {
|
|
1485
1531
|
if (pages.length === 0)
|
|
1486
1532
|
continue;
|
|
@@ -1523,7 +1569,8 @@ class LLMSFilesGenerator {
|
|
|
1523
1569
|
`;
|
|
1524
1570
|
}
|
|
1525
1571
|
content += "---\n\n";
|
|
1526
|
-
|
|
1572
|
+
const deduplicatedFiles = this.deduplicateByUrl(mdFiles);
|
|
1573
|
+
for (const mdFile of deduplicatedFiles) {
|
|
1527
1574
|
const urlItem = this.umbracoData.urlList.find((item) => item.url === mdFile.url);
|
|
1528
1575
|
if (!urlItem)
|
|
1529
1576
|
continue;
|
|
@@ -1564,6 +1611,15 @@ class LLMSFilesGenerator {
|
|
|
1564
1611
|
* /marketplace -> category "marketplace"
|
|
1565
1612
|
* / -> category "main"
|
|
1566
1613
|
*/
|
|
1614
|
+
deduplicateByUrl(mdFiles) {
|
|
1615
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1616
|
+
return mdFiles.filter((file) => {
|
|
1617
|
+
if (seen.has(file.url))
|
|
1618
|
+
return false;
|
|
1619
|
+
seen.add(file.url);
|
|
1620
|
+
return true;
|
|
1621
|
+
});
|
|
1622
|
+
}
|
|
1567
1623
|
groupPagesByCategory(mdFiles) {
|
|
1568
1624
|
const categories = {};
|
|
1569
1625
|
for (const mdFile of mdFiles) {
|
|
@@ -1609,8 +1665,7 @@ class LLMSFilesGenerator {
|
|
|
1609
1665
|
}
|
|
1610
1666
|
extractSiteTitle() {
|
|
1611
1667
|
const siteData = this.umbracoData.SiteData;
|
|
1612
|
-
|
|
1613
|
-
return rawTitle;
|
|
1668
|
+
return siteData?.pageTitle || siteData?.mainHeaderBlockTitle || "Website Documentation";
|
|
1614
1669
|
}
|
|
1615
1670
|
extractSiteDescription() {
|
|
1616
1671
|
const siteData = this.umbracoData.SiteData;
|
|
@@ -1633,11 +1688,11 @@ class LLMSFilesGenerator {
|
|
|
1633
1688
|
const pageContent = extractPageContent(this.umbracoData, urlItem.Jpath);
|
|
1634
1689
|
if (!pageContent)
|
|
1635
1690
|
return `${urlItem.TemplateAlias} page`;
|
|
1636
|
-
const desc = pageContent.pageDescription || pageContent.description || pageContent.headerBlockSubtitle;
|
|
1637
|
-
if (desc && typeof desc === "string") {
|
|
1638
|
-
return desc;
|
|
1691
|
+
const desc = pageContent.pageDescription || pageContent.description || pageContent.headerBlockSubtitle || pageContent.ogDescription;
|
|
1692
|
+
if (desc && typeof desc === "string" && desc.trim().length > 0) {
|
|
1693
|
+
return desc.trim();
|
|
1639
1694
|
}
|
|
1640
|
-
return
|
|
1695
|
+
return `${urlItem.TemplateAlias} page`;
|
|
1641
1696
|
}
|
|
1642
1697
|
sanitizeUrlForFilename(url) {
|
|
1643
1698
|
if (!url || url === "/")
|
package/dist/module.json
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@voicenter-team/nuxt-llms-generator",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.13",
|
|
4
4
|
"description": "Nuxt 3 module for automatically generating AI-optimized documentation files (llms.txt, llms-full.txt, and individual .md files) from Umbraco CMS data using Anthropic's Claude API.",
|
|
5
5
|
"repository": "https://github.com/VoicenterTeam/nuxt-llms-generator",
|
|
6
6
|
"license": "MIT",
|