@voicenter-team/nuxt-llms-generator 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,24 +3,278 @@ import { join, dirname, basename } from 'path';
3
3
  import { slugify } from 'transliteration';
4
4
  import Mustache from 'mustache';
5
5
  import Anthropic from '@anthropic-ai/sdk';
6
- import { createHash } from 'crypto';
6
+ import { encode } from '@toon-format/toon';
7
7
  import { JSONPath } from 'jsonpath-plus';
8
- import { w as withErrorHandling } from '../shared/nuxt-llms-generator.bc139143.mjs';
8
+ import { createHash } from 'crypto';
9
+ import { w as withErrorHandling } from '../shared/nuxt-llms-generator.db76a78e.mjs';
9
10
  import '@nuxt/kit';
10
11
  import 'zod';
11
12
  import 'node-html-markdown';
12
13
 
14
+ function extractPageContent(umbracoData, jpath) {
15
+ try {
16
+ const result = JSONPath({
17
+ path: jpath,
18
+ json: umbracoData.SiteData,
19
+ wrap: false
20
+ });
21
+ if (!result || Array.isArray(result) && result.length === 0) {
22
+ return null;
23
+ }
24
+ const pageContent = Array.isArray(result) ? result[0] : result;
25
+ return excludeChildrenFromContent(pageContent);
26
+ } catch (error) {
27
+ console.error(`Failed to extract content for path ${jpath}:`, error);
28
+ return null;
29
+ }
30
+ }
31
+ function excludeChildrenFromContent(content) {
32
+ if (!content || typeof content !== "object") {
33
+ return content;
34
+ }
35
+ const cleanContent = { ...content };
36
+ if ("children" in cleanContent) {
37
+ delete cleanContent.children;
38
+ }
39
+ return cleanContent;
40
+ }
41
+ function generatePageId(urlItem) {
42
+ const templateAlias = urlItem.TemplateAlias || "UnknownTemplate";
43
+ const nodeID = urlItem.nodeID || "UnknownNode";
44
+ return `${templateAlias}_${nodeID}`;
45
+ }
46
+ function isImportantKey(key) {
47
+ const importantPatterns = [
48
+ "title",
49
+ "name",
50
+ "heading",
51
+ "description",
52
+ "summary",
53
+ "content",
54
+ "text",
55
+ "body",
56
+ "value",
57
+ "label",
58
+ "caption",
59
+ "alt",
60
+ "message",
61
+ "url",
62
+ "link",
63
+ "href"
64
+ ];
65
+ const lowerKey = key.toLowerCase();
66
+ return importantPatterns.some((pattern) => lowerKey.includes(pattern));
67
+ }
68
+ function isMetadataKey(key) {
69
+ const metadataPatterns = [
70
+ "id",
71
+ "guid",
72
+ "key",
73
+ "_id",
74
+ "nodeid",
75
+ "created",
76
+ "updated",
77
+ "modified",
78
+ "timestamp",
79
+ "date",
80
+ "sort",
81
+ "order",
82
+ "index",
83
+ "position",
84
+ "published",
85
+ "hidden",
86
+ "visible",
87
+ "enabled",
88
+ "status",
89
+ "type",
90
+ "contenttype",
91
+ "template",
92
+ "alias",
93
+ "path",
94
+ "meta",
95
+ "metadata",
96
+ "seo",
97
+ "schema",
98
+ "properties"
99
+ ];
100
+ const lowerKey = key.toLowerCase();
101
+ return metadataPatterns.some((pattern) => lowerKey.includes(pattern));
102
+ }
103
+ function recursiveTruncate(content, maxTokens, currentDepth = 0) {
104
+ if (currentDepth > 10) {
105
+ return { _truncated: "Max depth reached" };
106
+ }
107
+ if (maxTokens < 10) {
108
+ return void 0;
109
+ }
110
+ if (content === null || content === void 0) {
111
+ return content;
112
+ }
113
+ if (typeof content !== "object") {
114
+ if (typeof content === "string" && content.length > 2e3) {
115
+ return content.substring(0, 2e3) + "...";
116
+ }
117
+ return content;
118
+ }
119
+ if (Array.isArray(content)) {
120
+ if (content.length === 0)
121
+ return content;
122
+ const itemLimit = Math.max(3, Math.floor(15 / (currentDepth + 1)));
123
+ const tokensPerItem = Math.floor(maxTokens / Math.min(content.length, itemLimit));
124
+ const truncatedArray = content.slice(0, itemLimit).map((item) => recursiveTruncate(item, tokensPerItem, currentDepth + 1)).filter((item) => item !== void 0);
125
+ if (content.length > truncatedArray.length) {
126
+ truncatedArray.push({
127
+ _note: `... and ${content.length - truncatedArray.length} more items`
128
+ });
129
+ }
130
+ return truncatedArray;
131
+ }
132
+ const truncatedObj = {};
133
+ const entries = Object.entries(content);
134
+ const withoutMetadata = entries.filter(([key]) => !isMetadataKey(key));
135
+ if (withoutMetadata.length === 0) {
136
+ return { _note: "Only metadata, removed" };
137
+ }
138
+ const importantEntries = withoutMetadata.filter(([key]) => isImportantKey(key));
139
+ const normalEntries = withoutMetadata.filter(([key]) => !isImportantKey(key));
140
+ const importantBudget = Math.floor(maxTokens * 0.4);
141
+ const tokensPerImportant = importantEntries.length > 0 ? Math.floor(importantBudget / importantEntries.length) : 0;
142
+ for (const [key, value] of importantEntries) {
143
+ const processedValue = recursiveTruncate(value, tokensPerImportant, currentDepth + 1);
144
+ if (processedValue !== void 0) {
145
+ truncatedObj[key] = processedValue;
146
+ }
147
+ }
148
+ const usedTokens = estimateContentTokens(truncatedObj);
149
+ const remainingBudget = maxTokens - usedTokens;
150
+ if (remainingBudget > 100 && normalEntries.length > 0) {
151
+ const sortedNormal = normalEntries.sort(([_a, valueA], [_b, valueB]) => {
152
+ const sizeA = JSON.stringify(valueA).length;
153
+ const sizeB = JSON.stringify(valueB).length;
154
+ return sizeA - sizeB;
155
+ });
156
+ const tokensPerNormal = Math.floor(remainingBudget / sortedNormal.length);
157
+ for (const [key, value] of sortedNormal) {
158
+ const processedValue = recursiveTruncate(value, tokensPerNormal, currentDepth + 1);
159
+ if (processedValue !== void 0) {
160
+ truncatedObj[key] = processedValue;
161
+ const newSize = estimateContentTokens(truncatedObj);
162
+ if (newSize > maxTokens) {
163
+ delete truncatedObj[key];
164
+ break;
165
+ }
166
+ }
167
+ }
168
+ }
169
+ return Object.keys(truncatedObj).length > 0 ? truncatedObj : void 0;
170
+ }
171
+ function emergencyTruncate(content, maxTokens) {
172
+ const result = { ...content };
173
+ const keys = Object.keys(result).sort((a, b) => {
174
+ const aImportant = isImportantKey(a) ? 1 : 0;
175
+ const bImportant = isImportantKey(b) ? 1 : 0;
176
+ return aImportant - bImportant;
177
+ });
178
+ for (const key of keys) {
179
+ if (estimateContentTokens(result) <= maxTokens)
180
+ break;
181
+ delete result[key];
182
+ console.warn(` Emergency: removed "${key}"`);
183
+ }
184
+ return result;
185
+ }
186
+ function estimateContentTokens(content) {
187
+ try {
188
+ const jsonString = JSON.stringify(content);
189
+ return Math.ceil(jsonString.length / 3);
190
+ } catch {
191
+ return 0;
192
+ }
193
+ }
194
+ function truncateContentIfNeeded(content, maxTokens = 1e5) {
195
+ const estimatedTokens = estimateContentTokens(content);
196
+ if (estimatedTokens <= maxTokens) {
197
+ return content;
198
+ }
199
+ console.warn(`\u26A0\uFE0F Content too large (${estimatedTokens} tokens > ${maxTokens} limit), truncating recursively...`);
200
+ const truncatedContent = recursiveTruncate(content, maxTokens, 0);
201
+ const result = truncatedContent && typeof truncatedContent === "object" && !Array.isArray(truncatedContent) ? truncatedContent : {
202
+ _error: "Content truncation failed",
203
+ original: content
204
+ };
205
+ const finalTokens = estimateContentTokens(result);
206
+ const preservedKeys = Object.keys(result).length;
207
+ const originalKeys = Object.keys(content).length;
208
+ console.log(`\u2705 Content truncated: ${estimatedTokens} \u2192 ${finalTokens} tokens (preserved ${preservedKeys}/${originalKeys} root keys)`);
209
+ if (finalTokens > maxTokens) {
210
+ console.error(`\u274C Recursive truncation insufficient (${finalTokens} > ${maxTokens}), performing emergency truncation...`);
211
+ return emergencyTruncate(result, maxTokens);
212
+ }
213
+ return result;
214
+ }
215
+
13
216
  function buildLLMSTemplatePrompt(request) {
217
+ const jsonTokens = estimateContentTokens(request.pageContent);
218
+ const toonData = encode(request.pageContent, { delimiter: " " });
219
+ const toonTokens = estimateContentTokens(toonData);
220
+ console.log(`\u{1F4CA} ${request.url}: JSON ${jsonTokens} \u2192 TOON ${toonTokens} (${((1 - toonTokens / jsonTokens) * 100).toFixed(0)}% saved)`);
14
221
  return `# LLMS.txt-Optimized Mustache Template Generator
15
222
 
16
223
  You are an expert at creating **Mustache.js templates** that generate **LLM knowledge base entries** following the [\`llms.txt\` standard](https://llmstxt.org/).
17
224
 
18
225
  ---
19
226
 
227
+ ## \u26A0\uFE0F CRITICAL RULES - NEVER VIOLATE
228
+
229
+ ### 1. DATA-DRIVEN CONTENT ONLY
230
+ - **EVERY piece of content** must come from a Mustache binding: \`{{propertyName}}\`
231
+ - **NEVER invent, assume, or add content** that doesn't exist in the provided data
232
+ - **NO hardcoded descriptions, lists, or facts**
233
+ - If a property doesn't exist in data, don't create a section for it
234
+
235
+ ### 2. ALLOWED CONTEXTUAL ADDITIONS
236
+ You MAY add:
237
+ - **Section headings** that describe what the data represents (e.g., "Key Features", "Technical Details")
238
+ - **Brief introductory phrases** that set context (e.g., "The following items are available:")
239
+ - **Structural markers** for clarity (e.g., "Navigation:", "Metadata:")
240
+
241
+ You MAY NOT add:
242
+ - Descriptions of features/benefits not in data
243
+ - Explanatory text about what something does
244
+ - Lists of items not present in data
245
+ - Assumptions about the page purpose
246
+
247
+ ### 3. UNDERSTANDING TOON FORMAT
248
+
249
+ The data below is in **TOON format** (Token-Oriented Object Notation) for efficiency.
250
+
251
+ **How to read TOON:**
252
+ - \`propertyName: value\` \u2192 Single property
253
+ - \`array[3]{prop1,prop2}\` \u2192 Array of 3 objects with properties prop1, prop2
254
+ - Properties in \`{braces}\` are the **exact field names** to use in Mustache bindings
255
+
256
+ **Example:**
257
+ \`\`\`toon
258
+ users[2]{id,name,role}:
259
+ 1 Alice admin
260
+ 2 Bob user
261
+ \`\`\`
262
+
263
+ **Your Mustache template:**
264
+ \`\`\`mustache
265
+ {{#users}}
266
+ - {{id}}: {{name}} ({{role}})
267
+ {{/users}}
268
+ \`\`\`
269
+
270
+ **CRITICAL:** Use the EXACT property names shown in TOON \`{braces}\` for your Mustache bindings.
271
+
272
+ ---
273
+
20
274
  ## \u{1F3AF} TRUE PURPOSE: Help LLMs Answer Questions Efficiently
21
275
 
22
276
  **Critical Understanding:**
23
- These \`.md\` files are **NOT website copies** \u2014 they are **LLM knowledge base entries** designed for **inference** (understanding), not training.
277
+ These \`.md\` files are **LLM knowledge base entries** designed for **inference** (understanding), not training.
24
278
 
25
279
  **Primary Goal:** Enable LLMs to quickly answer user questions about this website page within **limited context windows** (typically 200K tokens).
26
280
 
@@ -38,94 +292,102 @@ These \`.md\` files are **NOT website copies** \u2014 they are **LLM knowledge b
38
292
  - **Template Alias:** ${request.templateAlias}
39
293
  - **JSON Path:** ${request.jpath}
40
294
 
41
- ### Available Data
42
- \`\`\`json
43
- ${JSON.stringify(request.pageContent, null, 2)}
295
+ ### Available Data (TOON Format)
296
+
297
+ \`\`\`toon
298
+ ${toonData}
44
299
  \`\`\`
45
300
 
46
301
  ---
47
302
 
48
303
  ## \u{1F9E0} Content Philosophy: Think "Knowledge Base Entry"
49
304
 
50
- ### 1. Start with Expert-Level Summary
51
- - **First impression matters:** What would an expert say about this page in 1-2 sentences?
52
- - Lead with **value proposition** or **core purpose**
53
- - Use the blockquote format (\`> \`) for the summary \u2014 this signals importance
305
+ ### 1. Start with the Most Important Data
306
+ - Lead with title/heading properties
307
+ - Add main description/summary if available
308
+ - Use blockquote (\`> \`) for key summaries
54
309
 
55
310
  ### 2. Structure for Question-Answering
56
311
  Anticipate questions an LLM might need to answer:
57
- - "What is this?" \u2192 Main heading + summary
58
- - "What does it do/offer?" \u2192 Key features/benefits section
59
- - "Who is it for?" \u2192 Target audience/use cases
60
- - "How does it work?" \u2192 Process/methodology
61
- - "What are the details?" \u2192 Technical specs/pricing/etc.
312
+ - "What is this?" \u2192 Main heading + description properties
313
+ - "What does it offer?" \u2192 Lists of items/features from data
314
+ - "Who is it for?" \u2192 Target audience properties (if they exist)
315
+ - "What are the details?" \u2192 Technical/metadata properties
62
316
 
63
- ### 3. Prioritize Information by Importance
317
+ ### 3. Prioritize by Data Importance
64
318
  **Essential First:**
65
- - What this page represents
66
- - Primary value/purpose
67
- - Key differentiators
319
+ - Title/name/heading properties
320
+ - Description/summary properties
321
+ - Main content arrays
68
322
 
69
323
  **Supporting Details Second:**
70
- - Features, benefits, specifications
71
- - Use cases, examples
72
- - Technical details
324
+ - Feature lists, item arrays
325
+ - Nested objects with details
326
+ - Links and references
73
327
 
74
- **Peripheral Information Last:**
75
- - Meta information, related links
76
- - Supplementary context
328
+ **Metadata Last:**
329
+ - URLs, IDs (if useful for context)
330
+ - Timestamps, technical details
77
331
 
78
332
  ### 4. Optimize for Scanability
79
- - Use **hierarchical headings** (\`#\`, \`##\`, \`###\`) to create clear structure
80
- - Employ **bullet lists** for scannable facts
81
- - Keep paragraphs **short and dense** (2-3 sentences max)
82
- - Use **semantic Markdown** only \u2014 no HTML, entities, or attributes
333
+ - Use **hierarchical headings** (\`#\`, \`##\`, \`###\`)
334
+ - Employ **bullet lists** for arrays
335
+ - Keep structure **clean and semantic**
336
+ - Use Markdown only (no HTML)
83
337
 
84
338
  ---
85
339
 
86
340
  ## \u{1F527} Technical Principles (Key-Agnostic Design)
87
341
 
88
- ### 1. Dynamic Property Inference
89
- **Do not assume fixed property names.** Infer content type and importance from:
90
- - **Value structure:** Object, array, string, number
91
- - **Value length:** Short strings = titles; long text = descriptions
92
- - **Position in JSON:** Root-level = high importance; nested = contextual details
93
- - **Semantic patterns:** URLs, images, dates, IDs
342
+ ### 1. Extract Property Names from TOON
343
+ Look at TOON headers to identify properties:
344
+ - \`{id,name,role}\` \u2192 Use \`{{id}}\`, \`{{name}}\`, \`{{role}}\`
345
+ - \`breadcrumbsLinks[5]{title,link}\` \u2192 Use \`{{#breadcrumbsLinks}}{{title}} {{link}}{{/breadcrumbsLinks}}\`
94
346
 
95
347
  ### 2. Exact Property Bindings
96
- - Always use the **exact property name** from JSON: \`{{actualKeyName}}\`
348
+ - Always use **exact property name** from TOON: \`{{actualKeyName}}\`
97
349
  - Do NOT rename or modify binding identifiers
98
- - The Mustache bindings must match JSON precisely
350
+ - Mustache bindings must match TOON property names precisely
99
351
 
100
352
  ### 3. Humanized Section Headings
101
353
  While bindings stay exact, convert keys to readable headings:
102
354
  - \`productFeatures\` \u2192 "Product Features"
103
- - \`pricing_tiers\` \u2192 "Pricing Tiers"
104
- - \`techSpecs\` \u2192 "Technical Specifications"
105
-
106
- ### 4. Semantic Interpretation Guide
107
- - **Short root strings (5-50 chars)** \u2192 Likely page title
108
- - **Medium text (50-300 chars)** \u2192 Likely summary/tagline
109
- - **Long text (300+ chars)** \u2192 Likely detailed description
110
- - **Arrays of primitives** \u2192 Bullet lists
111
- - **Arrays of objects** \u2192 Repeated sections or tables
112
- - **Nested objects** \u2192 Sub-sections with logical hierarchy
113
- - **URL-like strings** \u2192 Render as \`[Label]({{url}})\`
114
- - **Image URLs** \u2192 Render as \`![Description]({{imageUrl}})\`
355
+ - \`supportPageItems\` \u2192 "Available Support Topics"
356
+ - \`breadcrumbsLinks\` \u2192 "Navigation Path"
357
+
358
+ ### 4. Working with Arrays
359
+ When you see \`arrayName[N]{prop1,prop2}\`:
360
+ - Use \`{{#arrayName.0}}\` to check if array exists
361
+ - Iterate with \`{{#arrayName}}\`
362
+ - Access properties with \`{{prop1}}\`, \`{{prop2}}\`
363
+
364
+ **Example:**
365
+ \`\`\`toon
366
+ items[3]{title,description}:
367
+ ...
368
+ \`\`\`
369
+ \u2192
370
+ \`\`\`mustache
371
+ {{#items.0}}
372
+ ## Items
373
+ {{#items}}
374
+ - {{title}}: {{description}}
375
+ {{/items}}
376
+ {{/items.0}}
377
+ \`\`\`
115
378
 
116
379
  ### 5. Noise Filtering
117
- **Exclude non-content fields:**
118
- - IDs (\`id\`, \`nodeId\`, \`_id\`)
119
- - Timestamps (\`createdAt\`, \`updatedAt\`, \`lastModified\`)
120
- - Internal flags (\`isPublished\`, \`sortOrder\`, \`hidden\`)
121
- - System metadata (\`_type\`, \`contentType\`, \`template\`)
380
+ **Exclude technical metadata** (if present in TOON):
381
+ - IDs: \`id\`, \`nodeId\`, \`_id\`, \`guid\`
382
+ - Timestamps: \`createdAt\`, \`updatedAt\`
383
+ - Flags: \`isPublished\`, \`sortOrder\`, \`hidden\`
384
+ - System: \`_type\`, \`contentType\`
122
385
 
123
386
  ### 6. Hierarchy & Nesting
124
387
  - **Root level** \u2192 \`#\` (H1) \u2014 one per document
125
388
  - **Primary sections** \u2192 \`##\` (H2)
126
389
  - **Sub-sections** \u2192 \`###\` (H3)
127
- - **Details** \u2192 \`####\` (H4) \u2014 avoid going deeper
128
- - Heading depth corresponds to JSON nesting, but stay practical
390
+ - **Details** \u2192 \`####\` (H4) \u2014 avoid deeper
129
391
 
130
392
  ---
131
393
 
@@ -133,66 +395,122 @@ While bindings stay exact, convert keys to readable headings:
133
395
 
134
396
  ### Mandatory Opening
135
397
  \`\`\`mustache
136
- # {{primaryTitle}}
398
+ # {{primaryTitleProperty}}
137
399
 
138
- {{#summaryOrTagline}}
139
- > {{summaryOrTagline}}
140
- {{/summaryOrTagline}}
400
+ {{#summaryProperty}}
401
+ > {{summaryProperty}}
402
+ {{/summaryProperty}}
141
403
  \`\`\`
142
404
 
143
- ### Recommended Sections (adapt to JSON)
405
+ ### Example Sections (adapt to actual TOON data)
144
406
  \`\`\`mustache
145
407
  {{#mainDescription}}
408
+ ## Overview
146
409
  {{mainDescription}}
147
410
  {{/mainDescription}}
148
411
 
149
- {{#keyFeatures.0}}
150
- ## Key Features
151
- {{#keyFeatures}}
152
- - **{{featureName}}**: {{featureDescription}}
153
- {{/keyFeatures}}
154
- {{/keyFeatures.0}}
155
-
156
- {{#useCases.0}}
157
- ## Use Cases
158
- {{#useCases}}
159
- ### {{caseTitle}}
160
- {{caseDescription}}
161
- {{/useCases}}
162
- {{/useCases.0}}
163
-
164
- {{#technicalDetails.0}}
165
- ## Technical Details
166
- {{#technicalDetails}}
167
- - **{{detailLabel}}**: {{detailValue}}
168
- {{/technicalDetails}}
169
- {{/technicalDetails.0}}
412
+ {{#itemsArray.0}}
413
+ ## Available Items
414
+ {{#itemsArray}}
415
+ ### {{itemTitle}}
416
+ {{itemDescription}}
417
+ {{/itemsArray}}
418
+ {{/itemsArray.0}}
419
+
420
+ {{#navigationLinks.0}}
421
+ ## Navigation
422
+ {{#navigationLinks}}
423
+ - [{{title}}]({{link}})
424
+ {{/navigationLinks}}
425
+ {{/navigationLinks.0}}
170
426
  \`\`\`
171
427
 
172
- **Note:** This is an illustrative pattern. Adapt section names and structure to match the actual JSON dynamically.
428
+ **Important:** These are examples. Your template must match the ACTUAL TOON structure provided.
173
429
 
174
430
  ---
175
431
 
176
432
  ## \u2705 Output Requirements
177
433
 
178
- 1. **Output ONLY the Mustache template** \u2014 no explanations, no code fences, no preamble
179
- 2. **Use exact JSON property names** in all bindings
434
+ 1. **Output ONLY the Mustache template** \u2014 no explanations, no markdown code fences, no preamble
435
+ 2. **Use exact property names from TOON \`{braces}\`** in all bindings
180
436
  3. **Generate clean Markdown** \u2014 no HTML, entities, or attributes
181
- 4. **Prioritize content** \u2014 most important information first
182
- 5. **Be concise** \u2014 optimize for limited context windows
183
- 6. **Structure for questions** \u2014 LLMs should easily extract facts
184
- 7. **Stay domain-agnostic** \u2014 template should work for any JSON shape
437
+ 4. **Data-driven content** \u2014 no invented facts or descriptions
438
+ 5. **Contextual headings allowed** \u2014 but content must be from data
439
+ 6. **Be concise** \u2014 optimize for limited context windows
440
+ 7. **Structure for questions** \u2014 LLMs should easily extract facts
441
+
442
+ ---
443
+
444
+ ## \u26A0\uFE0F CRITICAL: Mustache Syntax Validation
445
+
446
+ **Every \`{{#tag}}\` MUST have matching \`{{/tag}}\`**
447
+
448
+ ### Common Errors (from real failures):
449
+
450
+ \u274C **Missing closing tag:**
451
+ \`\`\`mustache
452
+ {{#pageDescription}}
453
+ content
454
+ // \u274C Missing {{/pageDescription}}
455
+ \`\`\`
456
+
457
+ \u274C **Nested check without outer closing:**
458
+ \`\`\`mustache
459
+ {{#items.0}}
460
+ {{#items}}...{{/items}}
461
+ // \u274C Missing {{/items.0}}
462
+ \`\`\`
463
+
464
+ \u274C **Capitalization mismatch:**
465
+ \`\`\`mustache
466
+ {{#aIFeaturesCTATitle}}
467
+ ...
468
+ {{/aiFeaturesCTATitle}} \u274C Different capitalization!
469
+ \`\`\`
470
+
471
+ ### Validation Checklist:
472
+
473
+ **Before output:**
474
+ 1. Count \`{{#\` tags = ___
475
+ 2. Count \`{{/\` tags = ___
476
+ 3. Numbers match? If NO \u2192 Find and add missing closing tags
477
+ 4. Tag names exact match (including dots, numbers, capitalization)?
478
+
479
+ \u2705 **Valid example:**
480
+ \`\`\`mustache
481
+ {{#section}} \u2190 1 open
482
+ {{#nested.0}} \u2190 2 open
483
+ content
484
+ {{/nested.0}} \u2190 2 close
485
+ {{/section}} \u2190 1 close
486
+ \`\`\`
487
+ Count: 2 = 2 \u2713
185
488
 
186
489
  ---
187
490
 
188
491
  ## \u{1F680} Your Task
189
492
 
190
- Analyze the provided JSON structure and **generate a Mustache template** that produces an **LLM knowledge base entry** following these principles.
493
+ Analyze the provided TOON data structure and **generate a Mustache template** that:
494
+
495
+ 1. **Uses ONLY data from TOON** (no invented content)
496
+ 2. **Extracts exact property names from \`{braces}\`**
497
+ 3. **Adds logical section headings** for context
498
+ 4. **Structures data for question-answering**
499
+ 5. **Prioritizes most important properties first**
500
+ 6. **Remains universal** (works for any data shape)
501
+ 7. **\u2705 ALL Mustache tags properly closed**
502
+
503
+ **Remember:**
504
+ - Parse TOON structure naturally \u2705
505
+ - Use exact property names from \`{braces}\` \u2705\u2705\u2705
506
+ - Headings can be contextual \u2705
507
+ - Content must be from data \u2705\u2705\u2705
508
+ - No made-up descriptions \u274C
509
+ - No assumed features \u274C
510
+ - **Every {{#tag}} has {{/tag}}** \u2705\u2705\u2705
191
511
 
192
- **Think:**
193
- - What would an LLM need to know to answer questions about this page?
194
- - What's the core value/purpose this page communicates?
195
- - How can I structure this for maximum inference efficiency?
512
+ **Final Step Before Output:**
513
+ Count your \`{{#\` and \`{{/\` tags. If numbers don't match, find and add missing closing tags.
196
514
 
197
515
  Generate the template now.
198
516
  `;
@@ -216,7 +534,7 @@ class AnthropicClient {
216
534
  const response = await this.client.messages.create({
217
535
  model: this.model,
218
536
  max_tokens: 4e3,
219
- temperature: 0.1,
537
+ temperature: 0.3,
220
538
  messages: [{
221
539
  role: "user",
222
540
  content: prompt
@@ -662,76 +980,6 @@ function getValueType(value) {
662
980
  return typeof value;
663
981
  }
664
982
 
665
- function extractPageContent(umbracoData, jpath) {
666
- try {
667
- const result = JSONPath({
668
- path: jpath,
669
- json: umbracoData.SiteData,
670
- wrap: false
671
- });
672
- if (!result || Array.isArray(result) && result.length === 0) {
673
- return null;
674
- }
675
- const pageContent = Array.isArray(result) ? result[0] : result;
676
- return excludeChildrenFromContent(pageContent);
677
- } catch (error) {
678
- console.error(`Failed to extract content for path ${jpath}:`, error);
679
- return null;
680
- }
681
- }
682
- function excludeChildrenFromContent(content) {
683
- if (!content || typeof content !== "object") {
684
- return content;
685
- }
686
- const cleanContent = { ...content };
687
- if ("children" in cleanContent) {
688
- delete cleanContent.children;
689
- }
690
- return cleanContent;
691
- }
692
- function generatePageId(urlItem) {
693
- const templateAlias = urlItem.TemplateAlias || "UnknownTemplate";
694
- const nodeID = urlItem.nodeID || "UnknownNode";
695
- return `${templateAlias}_${nodeID}`;
696
- }
697
- function estimateContentTokens(content) {
698
- try {
699
- const jsonString = JSON.stringify(content);
700
- return Math.ceil(jsonString.length / 4);
701
- } catch {
702
- return 0;
703
- }
704
- }
705
- function truncateContentIfNeeded(content, maxTokens = 18e4) {
706
- const estimatedTokens = estimateContentTokens(content);
707
- if (estimatedTokens <= maxTokens) {
708
- return content;
709
- }
710
- console.warn(`Content too large (${estimatedTokens} tokens > ${maxTokens} limit), truncating...`);
711
- const truncatedContent = { ...content };
712
- const sortedKeys = Object.keys(truncatedContent).sort((a, b) => {
713
- const sizeA = estimateContentTokens({ [a]: truncatedContent[a] });
714
- const sizeB = estimateContentTokens({ [b]: truncatedContent[b] });
715
- return sizeB - sizeA;
716
- });
717
- for (const key of sortedKeys) {
718
- if (estimateContentTokens(truncatedContent) <= maxTokens) {
719
- break;
720
- }
721
- const value = truncatedContent[key];
722
- if (Array.isArray(value) && value.length > 10) {
723
- truncatedContent[key] = value.slice(0, 10);
724
- console.warn(`Truncated array ${key} from ${value.length} to 10 items`);
725
- } else if (typeof value === "string" && value.length > 5e3) {
726
- truncatedContent[key] = value.substring(0, 5e3) + "...";
727
- console.warn(`Truncated string ${key} from ${value.length} to 5000 chars`);
728
- }
729
- }
730
- const finalTokens = estimateContentTokens(truncatedContent);
731
- console.log(`Content truncated from ${estimatedTokens} to ${finalTokens} tokens`);
732
- return truncatedContent;
733
- }
734
-
735
983
  function shouldGenerateTemplate(umbracoData, urlItem) {
736
984
  try {
737
985
  const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
@@ -1067,7 +1315,7 @@ class TemplateGenerator {
1067
1315
  const pageId = generatePageId(urlItem);
1068
1316
  console.log(`Generating new template for ${pageId} (${urlItem.url})`);
1069
1317
  const tokensBeforeTruncation = estimateContentTokens(pageContent);
1070
- const truncatedContent = truncateContentIfNeeded(pageContent, 18e4);
1318
+ const truncatedContent = truncateContentIfNeeded(pageContent, this.config.maxTokens);
1071
1319
  const tokensAfterTruncation = estimateContentTokens(truncatedContent);
1072
1320
  if (tokensBeforeTruncation > tokensAfterTruncation) {
1073
1321
  console.warn(`Page ${pageId} content truncated: ${tokensBeforeTruncation} -> ${tokensAfterTruncation} tokens`);