@voicenter-team/nuxt-llms-generator 0.1.10 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +630 -625
- package/dist/chunks/llms-files-generator.mjs +414 -166
- package/dist/module.d.mts +1 -0
- package/dist/module.d.ts +1 -0
- package/dist/module.json +1 -1
- package/dist/module.mjs +1 -1
- package/dist/shared/{nuxt-llms-generator.bc139143.mjs → nuxt-llms-generator.db76a78e.mjs} +3 -0
- package/package.json +64 -63
|
@@ -3,24 +3,278 @@ import { join, dirname, basename } from 'path';
|
|
|
3
3
|
import { slugify } from 'transliteration';
|
|
4
4
|
import Mustache from 'mustache';
|
|
5
5
|
import Anthropic from '@anthropic-ai/sdk';
|
|
6
|
-
import {
|
|
6
|
+
import { encode } from '@toon-format/toon';
|
|
7
7
|
import { JSONPath } from 'jsonpath-plus';
|
|
8
|
-
import {
|
|
8
|
+
import { createHash } from 'crypto';
|
|
9
|
+
import { w as withErrorHandling } from '../shared/nuxt-llms-generator.db76a78e.mjs';
|
|
9
10
|
import '@nuxt/kit';
|
|
10
11
|
import 'zod';
|
|
11
12
|
import 'node-html-markdown';
|
|
12
13
|
|
|
14
|
+
function extractPageContent(umbracoData, jpath) {
|
|
15
|
+
try {
|
|
16
|
+
const result = JSONPath({
|
|
17
|
+
path: jpath,
|
|
18
|
+
json: umbracoData.SiteData,
|
|
19
|
+
wrap: false
|
|
20
|
+
});
|
|
21
|
+
if (!result || Array.isArray(result) && result.length === 0) {
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
24
|
+
const pageContent = Array.isArray(result) ? result[0] : result;
|
|
25
|
+
return excludeChildrenFromContent(pageContent);
|
|
26
|
+
} catch (error) {
|
|
27
|
+
console.error(`Failed to extract content for path ${jpath}:`, error);
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
function excludeChildrenFromContent(content) {
|
|
32
|
+
if (!content || typeof content !== "object") {
|
|
33
|
+
return content;
|
|
34
|
+
}
|
|
35
|
+
const cleanContent = { ...content };
|
|
36
|
+
if ("children" in cleanContent) {
|
|
37
|
+
delete cleanContent.children;
|
|
38
|
+
}
|
|
39
|
+
return cleanContent;
|
|
40
|
+
}
|
|
41
|
+
function generatePageId(urlItem) {
|
|
42
|
+
const templateAlias = urlItem.TemplateAlias || "UnknownTemplate";
|
|
43
|
+
const nodeID = urlItem.nodeID || "UnknownNode";
|
|
44
|
+
return `${templateAlias}_${nodeID}`;
|
|
45
|
+
}
|
|
46
|
+
function isImportantKey(key) {
|
|
47
|
+
const importantPatterns = [
|
|
48
|
+
"title",
|
|
49
|
+
"name",
|
|
50
|
+
"heading",
|
|
51
|
+
"description",
|
|
52
|
+
"summary",
|
|
53
|
+
"content",
|
|
54
|
+
"text",
|
|
55
|
+
"body",
|
|
56
|
+
"value",
|
|
57
|
+
"label",
|
|
58
|
+
"caption",
|
|
59
|
+
"alt",
|
|
60
|
+
"message",
|
|
61
|
+
"url",
|
|
62
|
+
"link",
|
|
63
|
+
"href"
|
|
64
|
+
];
|
|
65
|
+
const lowerKey = key.toLowerCase();
|
|
66
|
+
return importantPatterns.some((pattern) => lowerKey.includes(pattern));
|
|
67
|
+
}
|
|
68
|
+
function isMetadataKey(key) {
|
|
69
|
+
const metadataPatterns = [
|
|
70
|
+
"id",
|
|
71
|
+
"guid",
|
|
72
|
+
"key",
|
|
73
|
+
"_id",
|
|
74
|
+
"nodeid",
|
|
75
|
+
"created",
|
|
76
|
+
"updated",
|
|
77
|
+
"modified",
|
|
78
|
+
"timestamp",
|
|
79
|
+
"date",
|
|
80
|
+
"sort",
|
|
81
|
+
"order",
|
|
82
|
+
"index",
|
|
83
|
+
"position",
|
|
84
|
+
"published",
|
|
85
|
+
"hidden",
|
|
86
|
+
"visible",
|
|
87
|
+
"enabled",
|
|
88
|
+
"status",
|
|
89
|
+
"type",
|
|
90
|
+
"contenttype",
|
|
91
|
+
"template",
|
|
92
|
+
"alias",
|
|
93
|
+
"path",
|
|
94
|
+
"meta",
|
|
95
|
+
"metadata",
|
|
96
|
+
"seo",
|
|
97
|
+
"schema",
|
|
98
|
+
"properties"
|
|
99
|
+
];
|
|
100
|
+
const lowerKey = key.toLowerCase();
|
|
101
|
+
return metadataPatterns.some((pattern) => lowerKey.includes(pattern));
|
|
102
|
+
}
|
|
103
|
+
function recursiveTruncate(content, maxTokens, currentDepth = 0) {
|
|
104
|
+
if (currentDepth > 10) {
|
|
105
|
+
return { _truncated: "Max depth reached" };
|
|
106
|
+
}
|
|
107
|
+
if (maxTokens < 10) {
|
|
108
|
+
return void 0;
|
|
109
|
+
}
|
|
110
|
+
if (content === null || content === void 0) {
|
|
111
|
+
return content;
|
|
112
|
+
}
|
|
113
|
+
if (typeof content !== "object") {
|
|
114
|
+
if (typeof content === "string" && content.length > 2e3) {
|
|
115
|
+
return content.substring(0, 2e3) + "...";
|
|
116
|
+
}
|
|
117
|
+
return content;
|
|
118
|
+
}
|
|
119
|
+
if (Array.isArray(content)) {
|
|
120
|
+
if (content.length === 0)
|
|
121
|
+
return content;
|
|
122
|
+
const itemLimit = Math.max(3, Math.floor(15 / (currentDepth + 1)));
|
|
123
|
+
const tokensPerItem = Math.floor(maxTokens / Math.min(content.length, itemLimit));
|
|
124
|
+
const truncatedArray = content.slice(0, itemLimit).map((item) => recursiveTruncate(item, tokensPerItem, currentDepth + 1)).filter((item) => item !== void 0);
|
|
125
|
+
if (content.length > truncatedArray.length) {
|
|
126
|
+
truncatedArray.push({
|
|
127
|
+
_note: `... and ${content.length - truncatedArray.length} more items`
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
return truncatedArray;
|
|
131
|
+
}
|
|
132
|
+
const truncatedObj = {};
|
|
133
|
+
const entries = Object.entries(content);
|
|
134
|
+
const withoutMetadata = entries.filter(([key]) => !isMetadataKey(key));
|
|
135
|
+
if (withoutMetadata.length === 0) {
|
|
136
|
+
return { _note: "Only metadata, removed" };
|
|
137
|
+
}
|
|
138
|
+
const importantEntries = withoutMetadata.filter(([key]) => isImportantKey(key));
|
|
139
|
+
const normalEntries = withoutMetadata.filter(([key]) => !isImportantKey(key));
|
|
140
|
+
const importantBudget = Math.floor(maxTokens * 0.4);
|
|
141
|
+
const tokensPerImportant = importantEntries.length > 0 ? Math.floor(importantBudget / importantEntries.length) : 0;
|
|
142
|
+
for (const [key, value] of importantEntries) {
|
|
143
|
+
const processedValue = recursiveTruncate(value, tokensPerImportant, currentDepth + 1);
|
|
144
|
+
if (processedValue !== void 0) {
|
|
145
|
+
truncatedObj[key] = processedValue;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
const usedTokens = estimateContentTokens(truncatedObj);
|
|
149
|
+
const remainingBudget = maxTokens - usedTokens;
|
|
150
|
+
if (remainingBudget > 100 && normalEntries.length > 0) {
|
|
151
|
+
const sortedNormal = normalEntries.sort(([_a, valueA], [_b, valueB]) => {
|
|
152
|
+
const sizeA = JSON.stringify(valueA).length;
|
|
153
|
+
const sizeB = JSON.stringify(valueB).length;
|
|
154
|
+
return sizeA - sizeB;
|
|
155
|
+
});
|
|
156
|
+
const tokensPerNormal = Math.floor(remainingBudget / sortedNormal.length);
|
|
157
|
+
for (const [key, value] of sortedNormal) {
|
|
158
|
+
const processedValue = recursiveTruncate(value, tokensPerNormal, currentDepth + 1);
|
|
159
|
+
if (processedValue !== void 0) {
|
|
160
|
+
truncatedObj[key] = processedValue;
|
|
161
|
+
const newSize = estimateContentTokens(truncatedObj);
|
|
162
|
+
if (newSize > maxTokens) {
|
|
163
|
+
delete truncatedObj[key];
|
|
164
|
+
break;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return Object.keys(truncatedObj).length > 0 ? truncatedObj : void 0;
|
|
170
|
+
}
|
|
171
|
+
function emergencyTruncate(content, maxTokens) {
|
|
172
|
+
const result = { ...content };
|
|
173
|
+
const keys = Object.keys(result).sort((a, b) => {
|
|
174
|
+
const aImportant = isImportantKey(a) ? 1 : 0;
|
|
175
|
+
const bImportant = isImportantKey(b) ? 1 : 0;
|
|
176
|
+
return aImportant - bImportant;
|
|
177
|
+
});
|
|
178
|
+
for (const key of keys) {
|
|
179
|
+
if (estimateContentTokens(result) <= maxTokens)
|
|
180
|
+
break;
|
|
181
|
+
delete result[key];
|
|
182
|
+
console.warn(` Emergency: removed "${key}"`);
|
|
183
|
+
}
|
|
184
|
+
return result;
|
|
185
|
+
}
|
|
186
|
+
function estimateContentTokens(content) {
|
|
187
|
+
try {
|
|
188
|
+
const jsonString = JSON.stringify(content);
|
|
189
|
+
return Math.ceil(jsonString.length / 3);
|
|
190
|
+
} catch {
|
|
191
|
+
return 0;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
function truncateContentIfNeeded(content, maxTokens = 1e5) {
|
|
195
|
+
const estimatedTokens = estimateContentTokens(content);
|
|
196
|
+
if (estimatedTokens <= maxTokens) {
|
|
197
|
+
return content;
|
|
198
|
+
}
|
|
199
|
+
console.warn(`\u26A0\uFE0F Content too large (${estimatedTokens} tokens > ${maxTokens} limit), truncating recursively...`);
|
|
200
|
+
const truncatedContent = recursiveTruncate(content, maxTokens, 0);
|
|
201
|
+
const result = truncatedContent && typeof truncatedContent === "object" && !Array.isArray(truncatedContent) ? truncatedContent : {
|
|
202
|
+
_error: "Content truncation failed",
|
|
203
|
+
original: content
|
|
204
|
+
};
|
|
205
|
+
const finalTokens = estimateContentTokens(result);
|
|
206
|
+
const preservedKeys = Object.keys(result).length;
|
|
207
|
+
const originalKeys = Object.keys(content).length;
|
|
208
|
+
console.log(`\u2705 Content truncated: ${estimatedTokens} \u2192 ${finalTokens} tokens (preserved ${preservedKeys}/${originalKeys} root keys)`);
|
|
209
|
+
if (finalTokens > maxTokens) {
|
|
210
|
+
console.error(`\u274C Recursive truncation insufficient (${finalTokens} > ${maxTokens}), performing emergency truncation...`);
|
|
211
|
+
return emergencyTruncate(result, maxTokens);
|
|
212
|
+
}
|
|
213
|
+
return result;
|
|
214
|
+
}
|
|
215
|
+
|
|
13
216
|
function buildLLMSTemplatePrompt(request) {
|
|
217
|
+
const jsonTokens = estimateContentTokens(request.pageContent);
|
|
218
|
+
const toonData = encode(request.pageContent, { delimiter: " " });
|
|
219
|
+
const toonTokens = estimateContentTokens(toonData);
|
|
220
|
+
console.log(`\u{1F4CA} ${request.url}: JSON ${jsonTokens} \u2192 TOON ${toonTokens} (${((1 - toonTokens / jsonTokens) * 100).toFixed(0)}% saved)`);
|
|
14
221
|
return `# LLMS.txt-Optimized Mustache Template Generator
|
|
15
222
|
|
|
16
223
|
You are an expert at creating **Mustache.js templates** that generate **LLM knowledge base entries** following the [\`llms.txt\` standard](https://llmstxt.org/).
|
|
17
224
|
|
|
18
225
|
---
|
|
19
226
|
|
|
227
|
+
## \u26A0\uFE0F CRITICAL RULES - NEVER VIOLATE
|
|
228
|
+
|
|
229
|
+
### 1. DATA-DRIVEN CONTENT ONLY
|
|
230
|
+
- **EVERY piece of content** must come from a Mustache binding: \`{{propertyName}}\`
|
|
231
|
+
- **NEVER invent, assume, or add content** that doesn't exist in the provided data
|
|
232
|
+
- **NO hardcoded descriptions, lists, or facts**
|
|
233
|
+
- If a property doesn't exist in data, don't create a section for it
|
|
234
|
+
|
|
235
|
+
### 2. ALLOWED CONTEXTUAL ADDITIONS
|
|
236
|
+
You MAY add:
|
|
237
|
+
- **Section headings** that describe what the data represents (e.g., "Key Features", "Technical Details")
|
|
238
|
+
- **Brief introductory phrases** that set context (e.g., "The following items are available:")
|
|
239
|
+
- **Structural markers** for clarity (e.g., "Navigation:", "Metadata:")
|
|
240
|
+
|
|
241
|
+
You MAY NOT add:
|
|
242
|
+
- Descriptions of features/benefits not in data
|
|
243
|
+
- Explanatory text about what something does
|
|
244
|
+
- Lists of items not present in data
|
|
245
|
+
- Assumptions about the page purpose
|
|
246
|
+
|
|
247
|
+
### 3. UNDERSTANDING TOON FORMAT
|
|
248
|
+
|
|
249
|
+
The data below is in **TOON format** (Token-Oriented Object Notation) for efficiency.
|
|
250
|
+
|
|
251
|
+
**How to read TOON:**
|
|
252
|
+
- \`propertyName: value\` \u2192 Single property
|
|
253
|
+
- \`array[3]{prop1,prop2}\` \u2192 Array of 3 objects with properties prop1, prop2
|
|
254
|
+
- Properties in \`{braces}\` are the **exact field names** to use in Mustache bindings
|
|
255
|
+
|
|
256
|
+
**Example:**
|
|
257
|
+
\`\`\`toon
|
|
258
|
+
users[2]{id,name,role}:
|
|
259
|
+
1 Alice admin
|
|
260
|
+
2 Bob user
|
|
261
|
+
\`\`\`
|
|
262
|
+
|
|
263
|
+
**Your Mustache template:**
|
|
264
|
+
\`\`\`mustache
|
|
265
|
+
{{#users}}
|
|
266
|
+
- {{id}}: {{name}} ({{role}})
|
|
267
|
+
{{/users}}
|
|
268
|
+
\`\`\`
|
|
269
|
+
|
|
270
|
+
**CRITICAL:** Use the EXACT property names shown in TOON \`{braces}\` for your Mustache bindings.
|
|
271
|
+
|
|
272
|
+
---
|
|
273
|
+
|
|
20
274
|
## \u{1F3AF} TRUE PURPOSE: Help LLMs Answer Questions Efficiently
|
|
21
275
|
|
|
22
276
|
**Critical Understanding:**
|
|
23
|
-
These \`.md\` files are **
|
|
277
|
+
These \`.md\` files are **LLM knowledge base entries** designed for **inference** (understanding), not training.
|
|
24
278
|
|
|
25
279
|
**Primary Goal:** Enable LLMs to quickly answer user questions about this website page within **limited context windows** (typically 200K tokens).
|
|
26
280
|
|
|
@@ -38,94 +292,102 @@ These \`.md\` files are **NOT website copies** \u2014 they are **LLM knowledge b
|
|
|
38
292
|
- **Template Alias:** ${request.templateAlias}
|
|
39
293
|
- **JSON Path:** ${request.jpath}
|
|
40
294
|
|
|
41
|
-
### Available Data
|
|
42
|
-
|
|
43
|
-
|
|
295
|
+
### Available Data (TOON Format)
|
|
296
|
+
|
|
297
|
+
\`\`\`toon
|
|
298
|
+
${toonData}
|
|
44
299
|
\`\`\`
|
|
45
300
|
|
|
46
301
|
---
|
|
47
302
|
|
|
48
303
|
## \u{1F9E0} Content Philosophy: Think "Knowledge Base Entry"
|
|
49
304
|
|
|
50
|
-
### 1. Start with
|
|
51
|
-
-
|
|
52
|
-
-
|
|
53
|
-
- Use
|
|
305
|
+
### 1. Start with the Most Important Data
|
|
306
|
+
- Lead with title/heading properties
|
|
307
|
+
- Add main description/summary if available
|
|
308
|
+
- Use blockquote (\`> \`) for key summaries
|
|
54
309
|
|
|
55
310
|
### 2. Structure for Question-Answering
|
|
56
311
|
Anticipate questions an LLM might need to answer:
|
|
57
|
-
- "What is this?" \u2192 Main heading +
|
|
58
|
-
- "What does it
|
|
59
|
-
- "Who is it for?" \u2192 Target audience
|
|
60
|
-
- "
|
|
61
|
-
- "What are the details?" \u2192 Technical specs/pricing/etc.
|
|
312
|
+
- "What is this?" \u2192 Main heading + description properties
|
|
313
|
+
- "What does it offer?" \u2192 Lists of items/features from data
|
|
314
|
+
- "Who is it for?" \u2192 Target audience properties (if they exist)
|
|
315
|
+
- "What are the details?" \u2192 Technical/metadata properties
|
|
62
316
|
|
|
63
|
-
### 3. Prioritize
|
|
317
|
+
### 3. Prioritize by Data Importance
|
|
64
318
|
**Essential First:**
|
|
65
|
-
-
|
|
66
|
-
-
|
|
67
|
-
-
|
|
319
|
+
- Title/name/heading properties
|
|
320
|
+
- Description/summary properties
|
|
321
|
+
- Main content arrays
|
|
68
322
|
|
|
69
323
|
**Supporting Details Second:**
|
|
70
|
-
-
|
|
71
|
-
-
|
|
72
|
-
-
|
|
324
|
+
- Feature lists, item arrays
|
|
325
|
+
- Nested objects with details
|
|
326
|
+
- Links and references
|
|
73
327
|
|
|
74
|
-
**
|
|
75
|
-
-
|
|
76
|
-
-
|
|
328
|
+
**Metadata Last:**
|
|
329
|
+
- URLs, IDs (if useful for context)
|
|
330
|
+
- Timestamps, technical details
|
|
77
331
|
|
|
78
332
|
### 4. Optimize for Scanability
|
|
79
|
-
- Use **hierarchical headings** (\`#\`, \`##\`, \`###\`)
|
|
80
|
-
- Employ **bullet lists** for
|
|
81
|
-
- Keep
|
|
82
|
-
- Use
|
|
333
|
+
- Use **hierarchical headings** (\`#\`, \`##\`, \`###\`)
|
|
334
|
+
- Employ **bullet lists** for arrays
|
|
335
|
+
- Keep structure **clean and semantic**
|
|
336
|
+
- Use Markdown only (no HTML)
|
|
83
337
|
|
|
84
338
|
---
|
|
85
339
|
|
|
86
340
|
## \u{1F527} Technical Principles (Key-Agnostic Design)
|
|
87
341
|
|
|
88
|
-
### 1.
|
|
89
|
-
|
|
90
|
-
-
|
|
91
|
-
-
|
|
92
|
-
- **Position in JSON:** Root-level = high importance; nested = contextual details
|
|
93
|
-
- **Semantic patterns:** URLs, images, dates, IDs
|
|
342
|
+
### 1. Extract Property Names from TOON
|
|
343
|
+
Look at TOON headers to identify properties:
|
|
344
|
+
- \`{id,name,role}\` \u2192 Use \`{{id}}\`, \`{{name}}\`, \`{{role}}\`
|
|
345
|
+
- \`breadcrumbsLinks[5]{title,link}\` \u2192 Use \`{{#breadcrumbsLinks}}{{title}} {{link}}{{/breadcrumbsLinks}}\`
|
|
94
346
|
|
|
95
347
|
### 2. Exact Property Bindings
|
|
96
|
-
- Always use
|
|
348
|
+
- Always use **exact property name** from TOON: \`{{actualKeyName}}\`
|
|
97
349
|
- Do NOT rename or modify binding identifiers
|
|
98
|
-
-
|
|
350
|
+
- Mustache bindings must match TOON property names precisely
|
|
99
351
|
|
|
100
352
|
### 3. Humanized Section Headings
|
|
101
353
|
While bindings stay exact, convert keys to readable headings:
|
|
102
354
|
- \`productFeatures\` \u2192 "Product Features"
|
|
103
|
-
- \`
|
|
104
|
-
- \`
|
|
105
|
-
|
|
106
|
-
### 4.
|
|
107
|
-
|
|
108
|
-
-
|
|
109
|
-
-
|
|
110
|
-
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
355
|
+
- \`supportPageItems\` \u2192 "Available Support Topics"
|
|
356
|
+
- \`breadcrumbsLinks\` \u2192 "Navigation Path"
|
|
357
|
+
|
|
358
|
+
### 4. Working with Arrays
|
|
359
|
+
When you see \`arrayName[N]{prop1,prop2}\`:
|
|
360
|
+
- Use \`{{#arrayName.0}}\` to check if array exists
|
|
361
|
+
- Iterate with \`{{#arrayName}}\`
|
|
362
|
+
- Access properties with \`{{prop1}}\`, \`{{prop2}}\`
|
|
363
|
+
|
|
364
|
+
**Example:**
|
|
365
|
+
\`\`\`toon
|
|
366
|
+
items[3]{title,description}:
|
|
367
|
+
...
|
|
368
|
+
\`\`\`
|
|
369
|
+
\u2192
|
|
370
|
+
\`\`\`mustache
|
|
371
|
+
{{#items.0}}
|
|
372
|
+
## Items
|
|
373
|
+
{{#items}}
|
|
374
|
+
- {{title}}: {{description}}
|
|
375
|
+
{{/items}}
|
|
376
|
+
{{/items.0}}
|
|
377
|
+
\`\`\`
|
|
115
378
|
|
|
116
379
|
### 5. Noise Filtering
|
|
117
|
-
**Exclude
|
|
118
|
-
- IDs
|
|
119
|
-
- Timestamps
|
|
120
|
-
-
|
|
121
|
-
- System
|
|
380
|
+
**Exclude technical metadata** (if present in TOON):
|
|
381
|
+
- IDs: \`id\`, \`nodeId\`, \`_id\`, \`guid\`
|
|
382
|
+
- Timestamps: \`createdAt\`, \`updatedAt\`
|
|
383
|
+
- Flags: \`isPublished\`, \`sortOrder\`, \`hidden\`
|
|
384
|
+
- System: \`_type\`, \`contentType\`
|
|
122
385
|
|
|
123
386
|
### 6. Hierarchy & Nesting
|
|
124
387
|
- **Root level** \u2192 \`#\` (H1) \u2014 one per document
|
|
125
388
|
- **Primary sections** \u2192 \`##\` (H2)
|
|
126
389
|
- **Sub-sections** \u2192 \`###\` (H3)
|
|
127
|
-
- **Details** \u2192 \`####\` (H4) \u2014 avoid
|
|
128
|
-
- Heading depth corresponds to JSON nesting, but stay practical
|
|
390
|
+
- **Details** \u2192 \`####\` (H4) \u2014 avoid deeper
|
|
129
391
|
|
|
130
392
|
---
|
|
131
393
|
|
|
@@ -133,66 +395,122 @@ While bindings stay exact, convert keys to readable headings:
|
|
|
133
395
|
|
|
134
396
|
### Mandatory Opening
|
|
135
397
|
\`\`\`mustache
|
|
136
|
-
# {{
|
|
398
|
+
# {{primaryTitleProperty}}
|
|
137
399
|
|
|
138
|
-
{{#
|
|
139
|
-
> {{
|
|
140
|
-
{{/
|
|
400
|
+
{{#summaryProperty}}
|
|
401
|
+
> {{summaryProperty}}
|
|
402
|
+
{{/summaryProperty}}
|
|
141
403
|
\`\`\`
|
|
142
404
|
|
|
143
|
-
###
|
|
405
|
+
### Example Sections (adapt to actual TOON data)
|
|
144
406
|
\`\`\`mustache
|
|
145
407
|
{{#mainDescription}}
|
|
408
|
+
## Overview
|
|
146
409
|
{{mainDescription}}
|
|
147
410
|
{{/mainDescription}}
|
|
148
411
|
|
|
149
|
-
{{#
|
|
150
|
-
##
|
|
151
|
-
{{#
|
|
152
|
-
|
|
153
|
-
{{
|
|
154
|
-
{{/
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
{{
|
|
161
|
-
{{/
|
|
162
|
-
{{/
|
|
163
|
-
|
|
164
|
-
{{#technicalDetails.0}}
|
|
165
|
-
## Technical Details
|
|
166
|
-
{{#technicalDetails}}
|
|
167
|
-
- **{{detailLabel}}**: {{detailValue}}
|
|
168
|
-
{{/technicalDetails}}
|
|
169
|
-
{{/technicalDetails.0}}
|
|
412
|
+
{{#itemsArray.0}}
|
|
413
|
+
## Available Items
|
|
414
|
+
{{#itemsArray}}
|
|
415
|
+
### {{itemTitle}}
|
|
416
|
+
{{itemDescription}}
|
|
417
|
+
{{/itemsArray}}
|
|
418
|
+
{{/itemsArray.0}}
|
|
419
|
+
|
|
420
|
+
{{#navigationLinks.0}}
|
|
421
|
+
## Navigation
|
|
422
|
+
{{#navigationLinks}}
|
|
423
|
+
- [{{title}}]({{link}})
|
|
424
|
+
{{/navigationLinks}}
|
|
425
|
+
{{/navigationLinks.0}}
|
|
170
426
|
\`\`\`
|
|
171
427
|
|
|
172
|
-
**
|
|
428
|
+
**Important:** These are examples. Your template must match the ACTUAL TOON structure provided.
|
|
173
429
|
|
|
174
430
|
---
|
|
175
431
|
|
|
176
432
|
## \u2705 Output Requirements
|
|
177
433
|
|
|
178
|
-
1. **Output ONLY the Mustache template** \u2014 no explanations, no code fences, no preamble
|
|
179
|
-
2. **Use exact
|
|
434
|
+
1. **Output ONLY the Mustache template** \u2014 no explanations, no markdown code fences, no preamble
|
|
435
|
+
2. **Use exact property names from TOON \`{braces}\`** in all bindings
|
|
180
436
|
3. **Generate clean Markdown** \u2014 no HTML, entities, or attributes
|
|
181
|
-
4. **
|
|
182
|
-
5. **
|
|
183
|
-
6. **
|
|
184
|
-
7. **
|
|
437
|
+
4. **Data-driven content** \u2014 no invented facts or descriptions
|
|
438
|
+
5. **Contextual headings allowed** \u2014 but content must be from data
|
|
439
|
+
6. **Be concise** \u2014 optimize for limited context windows
|
|
440
|
+
7. **Structure for questions** \u2014 LLMs should easily extract facts
|
|
441
|
+
|
|
442
|
+
---
|
|
443
|
+
|
|
444
|
+
## \u26A0\uFE0F CRITICAL: Mustache Syntax Validation
|
|
445
|
+
|
|
446
|
+
**Every \`{{#tag}}\` MUST have matching \`{{/tag}}\`**
|
|
447
|
+
|
|
448
|
+
### Common Errors (from real failures):
|
|
449
|
+
|
|
450
|
+
\u274C **Missing closing tag:**
|
|
451
|
+
\`\`\`mustache
|
|
452
|
+
{{#pageDescription}}
|
|
453
|
+
content
|
|
454
|
+
// \u274C Missing {{/pageDescription}}
|
|
455
|
+
\`\`\`
|
|
456
|
+
|
|
457
|
+
\u274C **Nested check without outer closing:**
|
|
458
|
+
\`\`\`mustache
|
|
459
|
+
{{#items.0}}
|
|
460
|
+
{{#items}}...{{/items}}
|
|
461
|
+
// \u274C Missing {{/items.0}}
|
|
462
|
+
\`\`\`
|
|
463
|
+
|
|
464
|
+
\u274C **Capitalization mismatch:**
|
|
465
|
+
\`\`\`mustache
|
|
466
|
+
{{#aIFeaturesCTATitle}}
|
|
467
|
+
...
|
|
468
|
+
{{/aiFeaturesCTATitle}} \u274C Different capitalization!
|
|
469
|
+
\`\`\`
|
|
470
|
+
|
|
471
|
+
### Validation Checklist:
|
|
472
|
+
|
|
473
|
+
**Before output:**
|
|
474
|
+
1. Count \`{{#\` tags = ___
|
|
475
|
+
2. Count \`{{/\` tags = ___
|
|
476
|
+
3. Numbers match? If NO \u2192 Find and add missing closing tags
|
|
477
|
+
4. Tag names exact match (including dots, numbers, capitalization)?
|
|
478
|
+
|
|
479
|
+
\u2705 **Valid example:**
|
|
480
|
+
\`\`\`mustache
|
|
481
|
+
{{#section}} \u2190 1 open
|
|
482
|
+
{{#nested.0}} \u2190 2 open
|
|
483
|
+
content
|
|
484
|
+
{{/nested.0}} \u2190 2 close
|
|
485
|
+
{{/section}} \u2190 1 close
|
|
486
|
+
\`\`\`
|
|
487
|
+
Count: 2 = 2 \u2713
|
|
185
488
|
|
|
186
489
|
---
|
|
187
490
|
|
|
188
491
|
## \u{1F680} Your Task
|
|
189
492
|
|
|
190
|
-
Analyze the provided
|
|
493
|
+
Analyze the provided TOON data structure and **generate a Mustache template** that:
|
|
494
|
+
|
|
495
|
+
1. **Uses ONLY data from TOON** (no invented content)
|
|
496
|
+
2. **Extracts exact property names from \`{braces}\`**
|
|
497
|
+
3. **Adds logical section headings** for context
|
|
498
|
+
4. **Structures data for question-answering**
|
|
499
|
+
5. **Prioritizes most important properties first**
|
|
500
|
+
6. **Remains universal** (works for any data shape)
|
|
501
|
+
7. **\u2705 ALL Mustache tags properly closed**
|
|
502
|
+
|
|
503
|
+
**Remember:**
|
|
504
|
+
- Parse TOON structure naturally \u2705
|
|
505
|
+
- Use exact property names from \`{braces}\` \u2705\u2705\u2705
|
|
506
|
+
- Headings can be contextual \u2705
|
|
507
|
+
- Content must be from data \u2705\u2705\u2705
|
|
508
|
+
- No made-up descriptions \u274C
|
|
509
|
+
- No assumed features \u274C
|
|
510
|
+
- **Every {{#tag}} has {{/tag}}** \u2705\u2705\u2705
|
|
191
511
|
|
|
192
|
-
**
|
|
193
|
-
|
|
194
|
-
- What's the core value/purpose this page communicates?
|
|
195
|
-
- How can I structure this for maximum inference efficiency?
|
|
512
|
+
**Final Step Before Output:**
|
|
513
|
+
Count your \`{{#\` and \`{{/\` tags. If numbers don't match, find and add missing closing tags.
|
|
196
514
|
|
|
197
515
|
Generate the template now.
|
|
198
516
|
`;
|
|
@@ -216,7 +534,7 @@ class AnthropicClient {
|
|
|
216
534
|
const response = await this.client.messages.create({
|
|
217
535
|
model: this.model,
|
|
218
536
|
max_tokens: 4e3,
|
|
219
|
-
temperature: 0.
|
|
537
|
+
temperature: 0.3,
|
|
220
538
|
messages: [{
|
|
221
539
|
role: "user",
|
|
222
540
|
content: prompt
|
|
@@ -662,76 +980,6 @@ function getValueType(value) {
|
|
|
662
980
|
return typeof value;
|
|
663
981
|
}
|
|
664
982
|
|
|
665
|
-
function extractPageContent(umbracoData, jpath) {
|
|
666
|
-
try {
|
|
667
|
-
const result = JSONPath({
|
|
668
|
-
path: jpath,
|
|
669
|
-
json: umbracoData.SiteData,
|
|
670
|
-
wrap: false
|
|
671
|
-
});
|
|
672
|
-
if (!result || Array.isArray(result) && result.length === 0) {
|
|
673
|
-
return null;
|
|
674
|
-
}
|
|
675
|
-
const pageContent = Array.isArray(result) ? result[0] : result;
|
|
676
|
-
return excludeChildrenFromContent(pageContent);
|
|
677
|
-
} catch (error) {
|
|
678
|
-
console.error(`Failed to extract content for path ${jpath}:`, error);
|
|
679
|
-
return null;
|
|
680
|
-
}
|
|
681
|
-
}
|
|
682
|
-
function excludeChildrenFromContent(content) {
|
|
683
|
-
if (!content || typeof content !== "object") {
|
|
684
|
-
return content;
|
|
685
|
-
}
|
|
686
|
-
const cleanContent = { ...content };
|
|
687
|
-
if ("children" in cleanContent) {
|
|
688
|
-
delete cleanContent.children;
|
|
689
|
-
}
|
|
690
|
-
return cleanContent;
|
|
691
|
-
}
|
|
692
|
-
function generatePageId(urlItem) {
|
|
693
|
-
const templateAlias = urlItem.TemplateAlias || "UnknownTemplate";
|
|
694
|
-
const nodeID = urlItem.nodeID || "UnknownNode";
|
|
695
|
-
return `${templateAlias}_${nodeID}`;
|
|
696
|
-
}
|
|
697
|
-
function estimateContentTokens(content) {
|
|
698
|
-
try {
|
|
699
|
-
const jsonString = JSON.stringify(content);
|
|
700
|
-
return Math.ceil(jsonString.length / 4);
|
|
701
|
-
} catch {
|
|
702
|
-
return 0;
|
|
703
|
-
}
|
|
704
|
-
}
|
|
705
|
-
function truncateContentIfNeeded(content, maxTokens = 18e4) {
|
|
706
|
-
const estimatedTokens = estimateContentTokens(content);
|
|
707
|
-
if (estimatedTokens <= maxTokens) {
|
|
708
|
-
return content;
|
|
709
|
-
}
|
|
710
|
-
console.warn(`Content too large (${estimatedTokens} tokens > ${maxTokens} limit), truncating...`);
|
|
711
|
-
const truncatedContent = { ...content };
|
|
712
|
-
const sortedKeys = Object.keys(truncatedContent).sort((a, b) => {
|
|
713
|
-
const sizeA = estimateContentTokens({ [a]: truncatedContent[a] });
|
|
714
|
-
const sizeB = estimateContentTokens({ [b]: truncatedContent[b] });
|
|
715
|
-
return sizeB - sizeA;
|
|
716
|
-
});
|
|
717
|
-
for (const key of sortedKeys) {
|
|
718
|
-
if (estimateContentTokens(truncatedContent) <= maxTokens) {
|
|
719
|
-
break;
|
|
720
|
-
}
|
|
721
|
-
const value = truncatedContent[key];
|
|
722
|
-
if (Array.isArray(value) && value.length > 10) {
|
|
723
|
-
truncatedContent[key] = value.slice(0, 10);
|
|
724
|
-
console.warn(`Truncated array ${key} from ${value.length} to 10 items`);
|
|
725
|
-
} else if (typeof value === "string" && value.length > 5e3) {
|
|
726
|
-
truncatedContent[key] = value.substring(0, 5e3) + "...";
|
|
727
|
-
console.warn(`Truncated string ${key} from ${value.length} to 5000 chars`);
|
|
728
|
-
}
|
|
729
|
-
}
|
|
730
|
-
const finalTokens = estimateContentTokens(truncatedContent);
|
|
731
|
-
console.log(`Content truncated from ${estimatedTokens} to ${finalTokens} tokens`);
|
|
732
|
-
return truncatedContent;
|
|
733
|
-
}
|
|
734
|
-
|
|
735
983
|
function shouldGenerateTemplate(umbracoData, urlItem) {
|
|
736
984
|
try {
|
|
737
985
|
const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
|
|
@@ -1067,7 +1315,7 @@ class TemplateGenerator {
|
|
|
1067
1315
|
const pageId = generatePageId(urlItem);
|
|
1068
1316
|
console.log(`Generating new template for ${pageId} (${urlItem.url})`);
|
|
1069
1317
|
const tokensBeforeTruncation = estimateContentTokens(pageContent);
|
|
1070
|
-
const truncatedContent = truncateContentIfNeeded(pageContent,
|
|
1318
|
+
const truncatedContent = truncateContentIfNeeded(pageContent, this.config.maxTokens);
|
|
1071
1319
|
const tokensAfterTruncation = estimateContentTokens(truncatedContent);
|
|
1072
1320
|
if (tokensBeforeTruncation > tokensAfterTruncation) {
|
|
1073
1321
|
console.warn(`Page ${pageId} content truncated: ${tokensBeforeTruncation} -> ${tokensAfterTruncation} tokens`);
|