@voicenter-team/nuxt-llms-generator 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -4,74 +4,38 @@ import Mustache from 'mustache';
|
|
|
4
4
|
import Anthropic from '@anthropic-ai/sdk';
|
|
5
5
|
import { createHash } from 'crypto';
|
|
6
6
|
import { JSONPath } from 'jsonpath-plus';
|
|
7
|
-
import {
|
|
7
|
+
import { w as withErrorHandling } from '../shared/nuxt-llms-generator.bc139143.mjs';
|
|
8
8
|
import '@nuxt/kit';
|
|
9
9
|
import 'zod';
|
|
10
10
|
import 'node-html-markdown';
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
model;
|
|
15
|
-
maxRetries = 3;
|
|
16
|
-
retryDelayMs = 1e3;
|
|
17
|
-
constructor(config) {
|
|
18
|
-
this.client = new Anthropic({
|
|
19
|
-
apiKey: config.anthropicApiKey
|
|
20
|
-
});
|
|
21
|
-
this.model = config.anthropicModel || "claude-3-5-sonnet-20241022";
|
|
22
|
-
}
|
|
23
|
-
async generateTemplate(request) {
|
|
24
|
-
const prompt = this.buildPrompt(request);
|
|
25
|
-
for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
|
|
26
|
-
try {
|
|
27
|
-
const response = await this.client.messages.create({
|
|
28
|
-
model: this.model,
|
|
29
|
-
max_tokens: 4e3,
|
|
30
|
-
temperature: 0.1,
|
|
31
|
-
messages: [{
|
|
32
|
-
role: "user",
|
|
33
|
-
content: prompt
|
|
34
|
-
}]
|
|
35
|
-
});
|
|
36
|
-
const content = response.content[0];
|
|
37
|
-
if (content.type !== "text") {
|
|
38
|
-
throw new Error("Unexpected response type from Anthropic API");
|
|
39
|
-
}
|
|
40
|
-
return this.parseResponse(content.text);
|
|
41
|
-
} catch (error) {
|
|
42
|
-
if (attempt === this.maxRetries) {
|
|
43
|
-
throw new Error(`Anthropic API failed after ${this.maxRetries} attempts: ${error}`);
|
|
44
|
-
}
|
|
45
|
-
console.warn(`Anthropic API attempt ${attempt} failed, retrying...`, error);
|
|
46
|
-
await this.delay(this.retryDelayMs * attempt);
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
throw new Error("Failed to generate template");
|
|
50
|
-
}
|
|
51
|
-
buildPrompt(request) {
|
|
52
|
-
return `# llms\u2011aware Mustache Template Generator Prompt (Key\u2011Agnostic)
|
|
12
|
+
function buildLLMSTemplatePrompt(request) {
|
|
13
|
+
return `# LLMS.txt-Optimized Mustache Template Generator
|
|
53
14
|
|
|
54
|
-
You are an expert
|
|
15
|
+
You are an expert at creating **Mustache.js templates** that generate **LLM knowledge base entries** following the [\`llms.txt\` standard](https://llmstxt.org/).
|
|
55
16
|
|
|
56
17
|
---
|
|
57
18
|
|
|
58
|
-
## \u{1F3AF}
|
|
19
|
+
## \u{1F3AF} TRUE PURPOSE: Help LLMs Answer Questions Efficiently
|
|
59
20
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
Each \`.md\` file provides semantic, human-readable, and hierarchically organized content for LLMs to learn from.
|
|
21
|
+
**Critical Understanding:**
|
|
22
|
+
These \`.md\` files are **NOT website copies** \u2014 they are **LLM knowledge base entries** designed for **inference** (understanding), not training.
|
|
63
23
|
|
|
64
|
-
|
|
24
|
+
**Primary Goal:** Enable LLMs to quickly answer user questions about this website page within **limited context windows** (typically 200K tokens).
|
|
65
25
|
|
|
66
|
-
|
|
26
|
+
**What This Means:**
|
|
27
|
+
- An LLM will read this file to understand "What is this page about?"
|
|
28
|
+
- Users will ask questions like "What does this page offer?", "Who is this for?", "What are the key features?"
|
|
29
|
+
- Content must be **concise**, **scannable**, and **fact-dense**
|
|
30
|
+
- Prioritize information that helps LLMs **infer meaning** over complete content reproduction
|
|
67
31
|
|
|
68
32
|
---
|
|
69
33
|
|
|
70
|
-
## \u{1F4E5} Context Supplied
|
|
34
|
+
## \u{1F4E5} Context Supplied
|
|
71
35
|
|
|
72
|
-
- URL
|
|
73
|
-
- Template Alias
|
|
74
|
-
- JSON Path
|
|
36
|
+
- **URL:** ${request.url}
|
|
37
|
+
- **Template Alias:** ${request.templateAlias}
|
|
38
|
+
- **JSON Path:** ${request.jpath}
|
|
75
39
|
|
|
76
40
|
### Available Data
|
|
77
41
|
\`\`\`json
|
|
@@ -80,102 +44,200 @@ ${JSON.stringify(request.pageContent, null, 2)}
|
|
|
80
44
|
|
|
81
45
|
---
|
|
82
46
|
|
|
83
|
-
## \u{
|
|
47
|
+
## \u{1F9E0} Content Philosophy: Think "Knowledge Base Entry"
|
|
84
48
|
|
|
85
|
-
1.
|
|
86
|
-
|
|
49
|
+
### 1. Start with Expert-Level Summary
|
|
50
|
+
- **First impression matters:** What would an expert say about this page in 1-2 sentences?
|
|
51
|
+
- Lead with **value proposition** or **core purpose**
|
|
52
|
+
- Use the blockquote format (\`> \`) for the summary \u2014 this signals importance
|
|
87
53
|
|
|
88
|
-
2.
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
54
|
+
### 2. Structure for Question-Answering
|
|
55
|
+
Anticipate questions an LLM might need to answer:
|
|
56
|
+
- "What is this?" \u2192 Main heading + summary
|
|
57
|
+
- "What does it do/offer?" \u2192 Key features/benefits section
|
|
58
|
+
- "Who is it for?" \u2192 Target audience/use cases
|
|
59
|
+
- "How does it work?" \u2192 Process/methodology
|
|
60
|
+
- "What are the details?" \u2192 Technical specs/pricing/etc.
|
|
92
61
|
|
|
93
|
-
3.
|
|
94
|
-
|
|
95
|
-
|
|
62
|
+
### 3. Prioritize Information by Importance
|
|
63
|
+
**Essential First:**
|
|
64
|
+
- What this page represents
|
|
65
|
+
- Primary value/purpose
|
|
66
|
+
- Key differentiators
|
|
96
67
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
- Arrays of objects \u2192 repeated sub-sections or tables.
|
|
102
|
-
- Objects \u2192 nested sections with humanized headings.
|
|
68
|
+
**Supporting Details Second:**
|
|
69
|
+
- Features, benefits, specifications
|
|
70
|
+
- Use cases, examples
|
|
71
|
+
- Technical details
|
|
103
72
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
73
|
+
**Peripheral Information Last:**
|
|
74
|
+
- Meta information, related links
|
|
75
|
+
- Supplementary context
|
|
107
76
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
77
|
+
### 4. Optimize for Scanability
|
|
78
|
+
- Use **hierarchical headings** (\`#\`, \`##\`, \`###\`) to create clear structure
|
|
79
|
+
- Employ **bullet lists** for scannable facts
|
|
80
|
+
- Keep paragraphs **short and dense** (2-3 sentences max)
|
|
81
|
+
- Use **semantic Markdown** only \u2014 no HTML, entities, or attributes
|
|
112
82
|
|
|
113
|
-
|
|
114
|
-
Exclude non-content fields like IDs, timestamps, internal flags, etc.
|
|
83
|
+
---
|
|
115
84
|
|
|
116
|
-
|
|
117
|
-
- If a value looks like a URL, render \`[Label]({{key}})\`.
|
|
118
|
-
- If image URL, render \`\`.
|
|
85
|
+
## \u{1F527} Technical Principles (Key-Agnostic Design)
|
|
119
86
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
87
|
+
### 1. Dynamic Property Inference
|
|
88
|
+
**Do not assume fixed property names.** Infer content type and importance from:
|
|
89
|
+
- **Value structure:** Object, array, string, number
|
|
90
|
+
- **Value length:** Short strings = titles; long text = descriptions
|
|
91
|
+
- **Position in JSON:** Root-level = high importance; nested = contextual details
|
|
92
|
+
- **Semantic patterns:** URLs, images, dates, IDs
|
|
123
93
|
|
|
124
|
-
|
|
94
|
+
### 2. Exact Property Bindings
|
|
95
|
+
- Always use the **exact property name** from JSON: \`{{actualKeyName}}\`
|
|
96
|
+
- Do NOT rename or modify binding identifiers
|
|
97
|
+
- The Mustache bindings must match JSON precisely
|
|
98
|
+
|
|
99
|
+
### 3. Humanized Section Headings
|
|
100
|
+
While bindings stay exact, convert keys to readable headings:
|
|
101
|
+
- \`productFeatures\` \u2192 "Product Features"
|
|
102
|
+
- \`pricing_tiers\` \u2192 "Pricing Tiers"
|
|
103
|
+
- \`techSpecs\` \u2192 "Technical Specifications"
|
|
125
104
|
|
|
126
|
-
|
|
105
|
+
### 4. Semantic Interpretation Guide
|
|
106
|
+
- **Short root strings (5-50 chars)** \u2192 Likely page title
|
|
107
|
+
- **Medium text (50-300 chars)** \u2192 Likely summary/tagline
|
|
108
|
+
- **Long text (300+ chars)** \u2192 Likely detailed description
|
|
109
|
+
- **Arrays of primitives** \u2192 Bullet lists
|
|
110
|
+
- **Arrays of objects** \u2192 Repeated sections or tables
|
|
111
|
+
- **Nested objects** \u2192 Sub-sections with logical hierarchy
|
|
112
|
+
- **URL-like strings** \u2192 Render as \`[Label]({{url}})\`
|
|
113
|
+
- **Image URLs** \u2192 Render as \`\`
|
|
127
114
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
115
|
+
### 5. Noise Filtering
|
|
116
|
+
**Exclude non-content fields:**
|
|
117
|
+
- IDs (\`id\`, \`nodeId\`, \`_id\`)
|
|
118
|
+
- Timestamps (\`createdAt\`, \`updatedAt\`, \`lastModified\`)
|
|
119
|
+
- Internal flags (\`isPublished\`, \`sortOrder\`, \`hidden\`)
|
|
120
|
+
- System metadata (\`_type\`, \`contentType\`, \`template\`)
|
|
121
|
+
|
|
122
|
+
### 6. Hierarchy & Nesting
|
|
123
|
+
- **Root level** \u2192 \`#\` (H1) \u2014 one per document
|
|
124
|
+
- **Primary sections** \u2192 \`##\` (H2)
|
|
125
|
+
- **Sub-sections** \u2192 \`###\` (H3)
|
|
126
|
+
- **Details** \u2192 \`####\` (H4) \u2014 avoid going deeper
|
|
127
|
+
- Heading depth corresponds to JSON nesting, but stay practical
|
|
132
128
|
|
|
133
129
|
---
|
|
134
130
|
|
|
135
|
-
## \u{
|
|
131
|
+
## \u{1F4D0} Template Structure Pattern
|
|
132
|
+
|
|
133
|
+
### Mandatory Opening
|
|
134
|
+
\`\`\`mustache
|
|
135
|
+
# {{primaryTitle}}
|
|
136
|
+
|
|
137
|
+
{{#summaryOrTagline}}
|
|
138
|
+
> {{summaryOrTagline}}
|
|
139
|
+
{{/summaryOrTagline}}
|
|
140
|
+
\`\`\`
|
|
136
141
|
|
|
137
|
-
###
|
|
142
|
+
### Recommended Sections (adapt to JSON)
|
|
138
143
|
\`\`\`mustache
|
|
139
|
-
|
|
144
|
+
{{#mainDescription}}
|
|
145
|
+
{{mainDescription}}
|
|
146
|
+
{{/mainDescription}}
|
|
140
147
|
|
|
141
|
-
{{#
|
|
142
|
-
|
|
143
|
-
{{
|
|
148
|
+
{{#keyFeatures.0}}
|
|
149
|
+
## Key Features
|
|
150
|
+
{{#keyFeatures}}
|
|
151
|
+
- **{{featureName}}**: {{featureDescription}}
|
|
152
|
+
{{/keyFeatures}}
|
|
153
|
+
{{/keyFeatures.0}}
|
|
144
154
|
|
|
145
|
-
{{#
|
|
146
|
-
##
|
|
147
|
-
{{#
|
|
148
|
-
### {{
|
|
149
|
-
{{
|
|
150
|
-
{{/
|
|
151
|
-
{{/
|
|
155
|
+
{{#useCases.0}}
|
|
156
|
+
## Use Cases
|
|
157
|
+
{{#useCases}}
|
|
158
|
+
### {{caseTitle}}
|
|
159
|
+
{{caseDescription}}
|
|
160
|
+
{{/useCases}}
|
|
161
|
+
{{/useCases.0}}
|
|
152
162
|
|
|
153
|
-
{{#
|
|
154
|
-
##
|
|
155
|
-
{{#
|
|
156
|
-
-
|
|
157
|
-
{{/
|
|
158
|
-
{{/
|
|
163
|
+
{{#technicalDetails.0}}
|
|
164
|
+
## Technical Details
|
|
165
|
+
{{#technicalDetails}}
|
|
166
|
+
- **{{detailLabel}}**: {{detailValue}}
|
|
167
|
+
{{/technicalDetails}}
|
|
168
|
+
{{/technicalDetails.0}}
|
|
159
169
|
\`\`\`
|
|
160
170
|
|
|
161
|
-
|
|
171
|
+
**Note:** This is an illustrative pattern. Adapt section names and structure to match the actual JSON dynamically.
|
|
162
172
|
|
|
163
173
|
---
|
|
164
174
|
|
|
165
175
|
## \u2705 Output Requirements
|
|
166
176
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
177
|
+
1. **Output ONLY the Mustache template** \u2014 no explanations, no code fences, no preamble
|
|
178
|
+
2. **Use exact JSON property names** in all bindings
|
|
179
|
+
3. **Generate clean Markdown** \u2014 no HTML, entities, or attributes
|
|
180
|
+
4. **Prioritize content** \u2014 most important information first
|
|
181
|
+
5. **Be concise** \u2014 optimize for limited context windows
|
|
182
|
+
6. **Structure for questions** \u2014 LLMs should easily extract facts
|
|
183
|
+
7. **Stay domain-agnostic** \u2014 template should work for any JSON shape
|
|
172
184
|
|
|
173
185
|
---
|
|
174
186
|
|
|
175
|
-
## \u{1F680} Task
|
|
187
|
+
## \u{1F680} Your Task
|
|
176
188
|
|
|
177
|
-
Analyze the provided JSON and **generate
|
|
189
|
+
Analyze the provided JSON structure and **generate a Mustache template** that produces an **LLM knowledge base entry** following these principles.
|
|
190
|
+
|
|
191
|
+
**Think:**
|
|
192
|
+
- What would an LLM need to know to answer questions about this page?
|
|
193
|
+
- What's the core value/purpose this page communicates?
|
|
194
|
+
- How can I structure this for maximum inference efficiency?
|
|
195
|
+
|
|
196
|
+
Generate the template now.
|
|
178
197
|
`;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
class AnthropicClient {
|
|
201
|
+
client;
|
|
202
|
+
model;
|
|
203
|
+
maxRetries = 3;
|
|
204
|
+
retryDelayMs = 1e3;
|
|
205
|
+
constructor(config) {
|
|
206
|
+
this.client = new Anthropic({
|
|
207
|
+
apiKey: config.anthropicApiKey
|
|
208
|
+
});
|
|
209
|
+
this.model = config.anthropicModel || "claude-3-5-sonnet-20241022";
|
|
210
|
+
}
|
|
211
|
+
async generateTemplate(request) {
|
|
212
|
+
const prompt = this.buildPrompt(request);
|
|
213
|
+
for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
|
|
214
|
+
try {
|
|
215
|
+
const response = await this.client.messages.create({
|
|
216
|
+
model: this.model,
|
|
217
|
+
max_tokens: 4e3,
|
|
218
|
+
temperature: 0.1,
|
|
219
|
+
messages: [{
|
|
220
|
+
role: "user",
|
|
221
|
+
content: prompt
|
|
222
|
+
}]
|
|
223
|
+
});
|
|
224
|
+
const content = response.content[0];
|
|
225
|
+
if (content.type !== "text") {
|
|
226
|
+
throw new Error("Unexpected response type from Anthropic API");
|
|
227
|
+
}
|
|
228
|
+
return this.parseResponse(content.text);
|
|
229
|
+
} catch (error) {
|
|
230
|
+
if (attempt === this.maxRetries) {
|
|
231
|
+
throw new Error(`Anthropic API failed after ${this.maxRetries} attempts: ${error}`);
|
|
232
|
+
}
|
|
233
|
+
console.warn(`Anthropic API attempt ${attempt} failed, retrying...`, error);
|
|
234
|
+
await this.delay(this.retryDelayMs * attempt);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
throw new Error("Failed to generate template");
|
|
238
|
+
}
|
|
239
|
+
buildPrompt(request) {
|
|
240
|
+
return buildLLMSTemplatePrompt(request);
|
|
179
241
|
}
|
|
180
242
|
parseResponse(responseText) {
|
|
181
243
|
const codeBlockRegex = /```(?:mustache)?\n?([\s\S]*?)```/;
|
|
@@ -896,245 +958,6 @@ async function performAutomaticCleanup(umbracoData, cacheDir, options = {}) {
|
|
|
896
958
|
return stats;
|
|
897
959
|
}
|
|
898
960
|
|
|
899
|
-
class MustacheSyntaxValidator {
|
|
900
|
-
name = "mustache-syntax";
|
|
901
|
-
canFix = true;
|
|
902
|
-
validate(template) {
|
|
903
|
-
const result = {
|
|
904
|
-
isValid: true,
|
|
905
|
-
errors: [],
|
|
906
|
-
warnings: []
|
|
907
|
-
};
|
|
908
|
-
try {
|
|
909
|
-
Mustache.parse(template);
|
|
910
|
-
return result;
|
|
911
|
-
} catch (error) {
|
|
912
|
-
result.isValid = false;
|
|
913
|
-
result.errors.push(`Mustache syntax error: ${error.message}`);
|
|
914
|
-
if (this.canFix) {
|
|
915
|
-
try {
|
|
916
|
-
const fixedTemplate = this.fix(template);
|
|
917
|
-
result.fixedTemplate = fixedTemplate;
|
|
918
|
-
result.warnings.push("Template was automatically fixed");
|
|
919
|
-
} catch (fixError) {
|
|
920
|
-
result.errors.push(`Could not fix template: ${fixError.message}`);
|
|
921
|
-
}
|
|
922
|
-
}
|
|
923
|
-
return result;
|
|
924
|
-
}
|
|
925
|
-
}
|
|
926
|
-
fix(template) {
|
|
927
|
-
let fixedTemplate = template;
|
|
928
|
-
const openSectionRegex = /\{\{\#([a-zA-Z0-9_.]+)\}\}/g;
|
|
929
|
-
const closeSectionRegex = /\{\{\/([a-zA-Z0-9_.]+)\}\}/g;
|
|
930
|
-
const openSections = [];
|
|
931
|
-
const closeSections = [];
|
|
932
|
-
let match;
|
|
933
|
-
while ((match = openSectionRegex.exec(template)) !== null) {
|
|
934
|
-
openSections.push({
|
|
935
|
-
name: match[1],
|
|
936
|
-
pos: match.index
|
|
937
|
-
});
|
|
938
|
-
}
|
|
939
|
-
while ((match = closeSectionRegex.exec(template)) !== null) {
|
|
940
|
-
closeSections.push({
|
|
941
|
-
name: match[1],
|
|
942
|
-
pos: match.index
|
|
943
|
-
});
|
|
944
|
-
}
|
|
945
|
-
const unmatchedOpens = openSections.filter(
|
|
946
|
-
(open) => !closeSections.some((close) => close.name === open.name)
|
|
947
|
-
);
|
|
948
|
-
const unmatchedCloses = closeSections.filter(
|
|
949
|
-
(close) => !openSections.some((open) => open.name === close.name)
|
|
950
|
-
);
|
|
951
|
-
unmatchedOpens.forEach((unmatched) => {
|
|
952
|
-
const sectionRegex = new RegExp(`\\{\\{#${unmatched.name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\}\\}`, "g");
|
|
953
|
-
fixedTemplate = fixedTemplate.replace(sectionRegex, "");
|
|
954
|
-
});
|
|
955
|
-
unmatchedCloses.forEach((unmatched) => {
|
|
956
|
-
const sectionRegex = new RegExp(`\\{\\{/${unmatched.name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\}\\}`, "g");
|
|
957
|
-
fixedTemplate = fixedTemplate.replace(sectionRegex, "");
|
|
958
|
-
});
|
|
959
|
-
fixedTemplate = fixedTemplate.replace(/\n\s*\n\s*\n/g, "\n\n");
|
|
960
|
-
return fixedTemplate;
|
|
961
|
-
}
|
|
962
|
-
}
|
|
963
|
-
class TemplateStructureValidator {
|
|
964
|
-
name = "template-structure";
|
|
965
|
-
validate(template) {
|
|
966
|
-
const result = {
|
|
967
|
-
isValid: true,
|
|
968
|
-
errors: [],
|
|
969
|
-
warnings: []
|
|
970
|
-
};
|
|
971
|
-
if (!template.match(/^#\s+/m)) {
|
|
972
|
-
result.warnings.push("Template should start with a heading (# Title)");
|
|
973
|
-
}
|
|
974
|
-
const headingLevels = (template.match(/^#{4,}/gm) || []).length;
|
|
975
|
-
if (headingLevels > 0) {
|
|
976
|
-
result.warnings.push("Template has deeply nested headings (4+ levels), consider flattening structure");
|
|
977
|
-
}
|
|
978
|
-
const emptySections = template.match(/\{\{#\w+\}\}\s*\{\{\/\w+\}\}/g);
|
|
979
|
-
if (emptySections) {
|
|
980
|
-
result.warnings.push(`Found ${emptySections.length} empty sections that may not render content`);
|
|
981
|
-
}
|
|
982
|
-
const commonTypos = template.match(/\{\{\s*pageTittle\s*\}\}/g);
|
|
983
|
-
if (commonTypos) {
|
|
984
|
-
result.warnings.push('Found "pageTittle" - check if this should be "pageTitle"');
|
|
985
|
-
}
|
|
986
|
-
return result;
|
|
987
|
-
}
|
|
988
|
-
}
|
|
989
|
-
class ContentCompletenessValidator {
|
|
990
|
-
name = "content-completeness";
|
|
991
|
-
validate(template) {
|
|
992
|
-
const result = {
|
|
993
|
-
isValid: true,
|
|
994
|
-
errors: [],
|
|
995
|
-
warnings: []
|
|
996
|
-
};
|
|
997
|
-
const variables = this.extractVariables(template);
|
|
998
|
-
const hasTitle = variables.some((v) => v.includes("title") || v.includes("Title"));
|
|
999
|
-
if (!hasTitle) {
|
|
1000
|
-
result.warnings.push("Template missing title variable (pageTitle, title, etc.)");
|
|
1001
|
-
}
|
|
1002
|
-
const hasDescription = variables.some((v) => v.includes("description") || v.includes("Description"));
|
|
1003
|
-
if (!hasDescription) {
|
|
1004
|
-
result.warnings.push("Template missing description variable");
|
|
1005
|
-
}
|
|
1006
|
-
if (template.length < 50) {
|
|
1007
|
-
result.warnings.push("Template is very short, may not provide sufficient content");
|
|
1008
|
-
}
|
|
1009
|
-
const sectionsOnly = variables.filter((v) => !v.includes(".") && !v.includes("["));
|
|
1010
|
-
if (sectionsOnly.length < 2) {
|
|
1011
|
-
result.warnings.push("Template has limited content variables, consider adding more sections");
|
|
1012
|
-
}
|
|
1013
|
-
return result;
|
|
1014
|
-
}
|
|
1015
|
-
extractVariables(template) {
|
|
1016
|
-
const variableRegex = /\{\{\s*([^#\/][^}]*?)\s*\}\}/g;
|
|
1017
|
-
const variables = [];
|
|
1018
|
-
let match;
|
|
1019
|
-
while ((match = variableRegex.exec(template)) !== null) {
|
|
1020
|
-
variables.push(match[1].trim());
|
|
1021
|
-
}
|
|
1022
|
-
return variables;
|
|
1023
|
-
}
|
|
1024
|
-
}
|
|
1025
|
-
class LLMSTxtComplianceValidator {
|
|
1026
|
-
name = "llms-txt-compliance";
|
|
1027
|
-
validate(template) {
|
|
1028
|
-
const result = {
|
|
1029
|
-
isValid: true,
|
|
1030
|
-
errors: [],
|
|
1031
|
-
warnings: []
|
|
1032
|
-
};
|
|
1033
|
-
const headings = template.match(/^#+\s+.+$/gm) || [];
|
|
1034
|
-
let lastLevel = 0;
|
|
1035
|
-
let hasProperHierarchy = true;
|
|
1036
|
-
headings.forEach((heading) => {
|
|
1037
|
-
const level = (heading.match(/^#+/) || [""])[0].length;
|
|
1038
|
-
if (level > lastLevel + 1) {
|
|
1039
|
-
hasProperHierarchy = false;
|
|
1040
|
-
}
|
|
1041
|
-
lastLevel = level;
|
|
1042
|
-
});
|
|
1043
|
-
if (!hasProperHierarchy) {
|
|
1044
|
-
result.warnings.push("Heading hierarchy should increment by one level (# -> ## -> ###)");
|
|
1045
|
-
}
|
|
1046
|
-
if (template.includes("pageDescription") && !template.includes(">")) {
|
|
1047
|
-
result.warnings.push("Consider using blockquote (>) for page description as per LLMS.txt standard");
|
|
1048
|
-
}
|
|
1049
|
-
const hasLists = template.includes("- ") || template.includes("* ");
|
|
1050
|
-
if (!hasLists && template.length > 200) {
|
|
1051
|
-
result.warnings.push("Long content without lists - consider breaking into bullet points for better AI consumption");
|
|
1052
|
-
}
|
|
1053
|
-
const htmlTags = (template.match(/<[^>]+>/g) || []).length;
|
|
1054
|
-
if (htmlTags > 3) {
|
|
1055
|
-
result.warnings.push("Template contains HTML tags - prefer pure markdown for LLMS.txt compliance");
|
|
1056
|
-
}
|
|
1057
|
-
return result;
|
|
1058
|
-
}
|
|
1059
|
-
}
|
|
1060
|
-
class TemplateValidationPipeline {
|
|
1061
|
-
validators = [];
|
|
1062
|
-
constructor() {
|
|
1063
|
-
this.addValidator(new MustacheSyntaxValidator());
|
|
1064
|
-
this.addValidator(new TemplateStructureValidator());
|
|
1065
|
-
this.addValidator(new ContentCompletenessValidator());
|
|
1066
|
-
this.addValidator(new LLMSTxtComplianceValidator());
|
|
1067
|
-
}
|
|
1068
|
-
addValidator(validator) {
|
|
1069
|
-
this.validators.push(validator);
|
|
1070
|
-
}
|
|
1071
|
-
removeValidator(name) {
|
|
1072
|
-
this.validators = this.validators.filter((v) => v.name !== name);
|
|
1073
|
-
}
|
|
1074
|
-
async validateTemplate(template, options = {}) {
|
|
1075
|
-
const { autoFix = true, throwOnError = false } = options;
|
|
1076
|
-
let currentTemplate = template;
|
|
1077
|
-
const allResults = {
|
|
1078
|
-
isValid: true,
|
|
1079
|
-
errors: [],
|
|
1080
|
-
warnings: []
|
|
1081
|
-
};
|
|
1082
|
-
for (const validator of this.validators) {
|
|
1083
|
-
const result = validator.validate(currentTemplate);
|
|
1084
|
-
allResults.errors.push(...result.errors);
|
|
1085
|
-
allResults.warnings.push(...result.warnings);
|
|
1086
|
-
if (!result.isValid) {
|
|
1087
|
-
allResults.isValid = false;
|
|
1088
|
-
if (autoFix && validator.canFix && validator.fix) {
|
|
1089
|
-
const fixedTemplate = validator.fix(currentTemplate);
|
|
1090
|
-
const fixResult = validator.validate(fixedTemplate);
|
|
1091
|
-
if (fixResult.isValid) {
|
|
1092
|
-
currentTemplate = fixedTemplate;
|
|
1093
|
-
allResults.fixedTemplate = currentTemplate;
|
|
1094
|
-
console.log(`Template fixed by ${validator.name} validator`);
|
|
1095
|
-
allResults.errors = allResults.errors.filter((e) => !result.errors.includes(e));
|
|
1096
|
-
if (allResults.errors.length === 0) {
|
|
1097
|
-
allResults.isValid = true;
|
|
1098
|
-
}
|
|
1099
|
-
}
|
|
1100
|
-
}
|
|
1101
|
-
}
|
|
1102
|
-
}
|
|
1103
|
-
if (allResults.errors.length > 0 && throwOnError) {
|
|
1104
|
-
throw new TemplateError(
|
|
1105
|
-
ErrorCode.TEMPLATE_VALIDATION_FAILED,
|
|
1106
|
-
`Template validation failed: ${allResults.errors.join(", ")}`,
|
|
1107
|
-
{ template: template.substring(0, 200) + "..." }
|
|
1108
|
-
);
|
|
1109
|
-
}
|
|
1110
|
-
return allResults;
|
|
1111
|
-
}
|
|
1112
|
-
async validateAndFix(template) {
|
|
1113
|
-
const result = await this.validateTemplate(template, {
|
|
1114
|
-
autoFix: true,
|
|
1115
|
-
throwOnError: false
|
|
1116
|
-
});
|
|
1117
|
-
if (result.fixedTemplate) {
|
|
1118
|
-
return result.fixedTemplate;
|
|
1119
|
-
}
|
|
1120
|
-
if (result.errors.length > 0) {
|
|
1121
|
-
console.warn("Could not fix template, using fallback");
|
|
1122
|
-
return `# {{pageTitle}}
|
|
1123
|
-
|
|
1124
|
-
> {{pageDescription}}
|
|
1125
|
-
|
|
1126
|
-
## Content
|
|
1127
|
-
|
|
1128
|
-
This page content could not be processed due to template formatting issues.`;
|
|
1129
|
-
}
|
|
1130
|
-
return template;
|
|
1131
|
-
}
|
|
1132
|
-
getValidatorNames() {
|
|
1133
|
-
return this.validators.map((v) => v.name);
|
|
1134
|
-
}
|
|
1135
|
-
}
|
|
1136
|
-
const templateValidationPipeline = new TemplateValidationPipeline();
|
|
1137
|
-
|
|
1138
961
|
class TemplateGenerator {
|
|
1139
962
|
anthropicClient;
|
|
1140
963
|
promptAnalyzer;
|
|
@@ -1279,8 +1102,7 @@ class TemplateGenerator {
|
|
|
1279
1102
|
}
|
|
1280
1103
|
async renderTemplate(template, data) {
|
|
1281
1104
|
return withErrorHandling(async () => {
|
|
1282
|
-
|
|
1283
|
-
return Mustache.render(validatedTemplate, data);
|
|
1105
|
+
return Mustache.render(template, data);
|
|
1284
1106
|
}, {
|
|
1285
1107
|
template: template.substring(0, 200) + "...",
|
|
1286
1108
|
dataKeys: Object.keys(data)
|
package/dist/module.json
CHANGED
package/dist/module.mjs
CHANGED
|
@@ -58,28 +58,6 @@ class SchemaValidator {
|
|
|
58
58
|
}
|
|
59
59
|
}
|
|
60
60
|
|
|
61
|
-
var ErrorCode = /* @__PURE__ */ ((ErrorCode2) => {
|
|
62
|
-
ErrorCode2["INVALID_CONFIG"] = "INVALID_CONFIG";
|
|
63
|
-
ErrorCode2["MISSING_API_KEY"] = "MISSING_API_KEY";
|
|
64
|
-
ErrorCode2["INVALID_DATA_PATH"] = "INVALID_DATA_PATH";
|
|
65
|
-
ErrorCode2["INVALID_UMBRACO_DATA"] = "INVALID_UMBRACO_DATA";
|
|
66
|
-
ErrorCode2["PAGE_CONTENT_EXTRACTION_FAILED"] = "PAGE_CONTENT_EXTRACTION_FAILED";
|
|
67
|
-
ErrorCode2["INVALID_JPATH"] = "INVALID_JPATH";
|
|
68
|
-
ErrorCode2["ANTHROPIC_API_ERROR"] = "ANTHROPIC_API_ERROR";
|
|
69
|
-
ErrorCode2["ANTHROPIC_CONNECTION_FAILED"] = "ANTHROPIC_CONNECTION_FAILED";
|
|
70
|
-
ErrorCode2["TEMPLATE_GENERATION_FAILED"] = "TEMPLATE_GENERATION_FAILED";
|
|
71
|
-
ErrorCode2["RATE_LIMIT_EXCEEDED"] = "RATE_LIMIT_EXCEEDED";
|
|
72
|
-
ErrorCode2["TEMPLATE_VALIDATION_FAILED"] = "TEMPLATE_VALIDATION_FAILED";
|
|
73
|
-
ErrorCode2["TEMPLATE_RENDERING_FAILED"] = "TEMPLATE_RENDERING_FAILED";
|
|
74
|
-
ErrorCode2["MUSTACHE_SYNTAX_ERROR"] = "MUSTACHE_SYNTAX_ERROR";
|
|
75
|
-
ErrorCode2["FILE_READ_ERROR"] = "FILE_READ_ERROR";
|
|
76
|
-
ErrorCode2["FILE_WRITE_ERROR"] = "FILE_WRITE_ERROR";
|
|
77
|
-
ErrorCode2["DIRECTORY_CREATION_FAILED"] = "DIRECTORY_CREATION_FAILED";
|
|
78
|
-
ErrorCode2["CACHE_READ_ERROR"] = "CACHE_READ_ERROR";
|
|
79
|
-
ErrorCode2["CACHE_WRITE_ERROR"] = "CACHE_WRITE_ERROR";
|
|
80
|
-
ErrorCode2["CACHE_CORRUPTED"] = "CACHE_CORRUPTED";
|
|
81
|
-
return ErrorCode2;
|
|
82
|
-
})(ErrorCode || {});
|
|
83
61
|
class LLMSError extends Error {
|
|
84
62
|
code;
|
|
85
63
|
context;
|
|
@@ -117,12 +95,6 @@ class AnthropicAPIError extends LLMSError {
|
|
|
117
95
|
this.retryable = retryable;
|
|
118
96
|
}
|
|
119
97
|
}
|
|
120
|
-
class TemplateError extends LLMSError {
|
|
121
|
-
constructor(code, message, context, cause) {
|
|
122
|
-
super(code, message, context, cause);
|
|
123
|
-
this.name = "TemplateError";
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
98
|
class FileSystemError extends LLMSError {
|
|
127
99
|
path;
|
|
128
100
|
constructor(code, message, path, context, cause) {
|
|
@@ -379,4 +351,4 @@ async function generateLLMSFiles(config, umbracoData, logger) {
|
|
|
379
351
|
}
|
|
380
352
|
}
|
|
381
353
|
|
|
382
|
-
export {
|
|
354
|
+
export { llmsModule as l, withErrorHandling as w };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@voicenter-team/nuxt-llms-generator",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.8",
|
|
4
4
|
"description": "Nuxt 3 module for automatically generating AI-optimized documentation files (llms.txt, llms-full.txt, and individual .md files) from Umbraco CMS data using Anthropic's Claude API.",
|
|
5
5
|
"repository": "https://github.com/VoicenterTeam/nuxt-llms-generator",
|
|
6
6
|
"license": "MIT",
|