@voicenter-team/nuxt-llms-generator 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +625 -625
- package/dist/chunks/llms-files-generator.mjs +313 -115
- package/dist/module.json +1 -1
- package/package.json +63 -63
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { existsSync, mkdirSync, readFileSync, writeFileSync, unlinkSync } from 'fs';
|
|
2
2
|
import { join, dirname, basename } from 'path';
|
|
3
|
-
import {
|
|
3
|
+
import { slugify } from 'transliteration';
|
|
4
4
|
import Mustache from 'mustache';
|
|
5
5
|
import Anthropic from '@anthropic-ai/sdk';
|
|
6
6
|
import { createHash } from 'crypto';
|
|
@@ -17,10 +17,72 @@ You are an expert at creating **Mustache.js templates** that generate **LLM know
|
|
|
17
17
|
|
|
18
18
|
---
|
|
19
19
|
|
|
20
|
+
## \u26A0\uFE0F CRITICAL RULES - NEVER VIOLATE
|
|
21
|
+
|
|
22
|
+
### 1. DATA-DRIVEN CONTENT ONLY
|
|
23
|
+
- **EVERY piece of content** must come from a Mustache binding: \`{{propertyName}}\`
|
|
24
|
+
- **NEVER invent, assume, or add content** that doesn't exist in the provided JSON
|
|
25
|
+
- **NO hardcoded descriptions, lists, or facts**
|
|
26
|
+
- If a property doesn't exist in JSON, don't create a section for it
|
|
27
|
+
|
|
28
|
+
### 2. ALLOWED CONTEXTUAL ADDITIONS
|
|
29
|
+
You MAY add:
|
|
30
|
+
- **Section headings** that describe what the data represents (e.g., "Key Features", "Technical Details")
|
|
31
|
+
- **Brief introductory phrases** that set context (e.g., "The following items are available:")
|
|
32
|
+
- **Structural markers** for clarity (e.g., "Navigation:", "Metadata:")
|
|
33
|
+
|
|
34
|
+
You MAY NOT add:
|
|
35
|
+
- Descriptions of features/benefits not in JSON
|
|
36
|
+
- Explanatory text about what something does
|
|
37
|
+
- Lists of items not present in data
|
|
38
|
+
- Assumptions about the page purpose
|
|
39
|
+
|
|
40
|
+
### 3. EXAMPLES OF VIOLATIONS
|
|
41
|
+
|
|
42
|
+
\u274C **BAD - Hardcoded content:**
|
|
43
|
+
\`\`\`mustache
|
|
44
|
+
## Key Benefits
|
|
45
|
+
- Real-time monitoring
|
|
46
|
+
- Detailed analytics
|
|
47
|
+
- Easy to use
|
|
48
|
+
\`\`\`
|
|
49
|
+
*Problem: These benefits are invented, not from JSON*
|
|
50
|
+
|
|
51
|
+
\u274C **BAD - Invented descriptions:**
|
|
52
|
+
\`\`\`mustache
|
|
53
|
+
This dashboard provides comprehensive monitoring capabilities for call centers...
|
|
54
|
+
\`\`\`
|
|
55
|
+
*Problem: Description is made up*
|
|
56
|
+
|
|
57
|
+
\u2705 **GOOD - Data-driven with context:**
|
|
58
|
+
\`\`\`mustache
|
|
59
|
+
{{#features.0}}
|
|
60
|
+
## Available Features
|
|
61
|
+
{{#features}}
|
|
62
|
+
- **{{name}}**: {{description}}
|
|
63
|
+
{{/features}}
|
|
64
|
+
{{/features.0}}
|
|
65
|
+
\`\`\`
|
|
66
|
+
*Good: Content comes from JSON, heading provides context*
|
|
67
|
+
|
|
68
|
+
\u2705 **GOOD - Minimal introduction:**
|
|
69
|
+
\`\`\`mustache
|
|
70
|
+
{{#items.0}}
|
|
71
|
+
## Items Overview
|
|
72
|
+
The following items are available:
|
|
73
|
+
{{#items}}
|
|
74
|
+
- {{title}}
|
|
75
|
+
{{/items}}
|
|
76
|
+
{{/items.0}}
|
|
77
|
+
\`\`\`
|
|
78
|
+
*Good: Brief intro, but content is from JSON*
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
20
82
|
## \u{1F3AF} TRUE PURPOSE: Help LLMs Answer Questions Efficiently
|
|
21
83
|
|
|
22
84
|
**Critical Understanding:**
|
|
23
|
-
These \`.md\` files are **
|
|
85
|
+
These \`.md\` files are **LLM knowledge base entries** designed for **inference** (understanding), not training.
|
|
24
86
|
|
|
25
87
|
**Primary Goal:** Enable LLMs to quickly answer user questions about this website page within **limited context windows** (typically 200K tokens).
|
|
26
88
|
|
|
@@ -47,85 +109,81 @@ ${JSON.stringify(request.pageContent, null, 2)}
|
|
|
47
109
|
|
|
48
110
|
## \u{1F9E0} Content Philosophy: Think "Knowledge Base Entry"
|
|
49
111
|
|
|
50
|
-
### 1. Start with
|
|
51
|
-
-
|
|
52
|
-
-
|
|
53
|
-
- Use
|
|
112
|
+
### 1. Start with the Most Important Data
|
|
113
|
+
- Lead with title/heading properties
|
|
114
|
+
- Add main description/summary if available
|
|
115
|
+
- Use blockquote (\`> \`) for key summaries
|
|
54
116
|
|
|
55
117
|
### 2. Structure for Question-Answering
|
|
56
118
|
Anticipate questions an LLM might need to answer:
|
|
57
|
-
- "What is this?" \u2192 Main heading +
|
|
58
|
-
- "What does it
|
|
59
|
-
- "Who is it for?" \u2192 Target audience
|
|
60
|
-
- "
|
|
61
|
-
- "What are the details?" \u2192 Technical specs/pricing/etc.
|
|
119
|
+
- "What is this?" \u2192 Main heading + description properties
|
|
120
|
+
- "What does it offer?" \u2192 Lists of items/features from JSON
|
|
121
|
+
- "Who is it for?" \u2192 Target audience properties (if they exist)
|
|
122
|
+
- "What are the details?" \u2192 Technical/metadata properties
|
|
62
123
|
|
|
63
|
-
### 3. Prioritize
|
|
124
|
+
### 3. Prioritize by JSON Structure
|
|
64
125
|
**Essential First:**
|
|
65
|
-
-
|
|
66
|
-
-
|
|
67
|
-
-
|
|
126
|
+
- Root-level title/name/heading properties
|
|
127
|
+
- Description/summary properties
|
|
128
|
+
- Main content arrays
|
|
68
129
|
|
|
69
130
|
**Supporting Details Second:**
|
|
70
|
-
-
|
|
71
|
-
-
|
|
72
|
-
-
|
|
131
|
+
- Feature lists, item arrays
|
|
132
|
+
- Nested objects with details
|
|
133
|
+
- Links and references
|
|
73
134
|
|
|
74
|
-
**
|
|
75
|
-
-
|
|
76
|
-
-
|
|
135
|
+
**Metadata Last:**
|
|
136
|
+
- URLs, IDs (if useful for context)
|
|
137
|
+
- Timestamps, technical details
|
|
77
138
|
|
|
78
139
|
### 4. Optimize for Scanability
|
|
79
|
-
- Use **hierarchical headings** (\`#\`, \`##\`, \`###\`)
|
|
80
|
-
- Employ **bullet lists** for
|
|
81
|
-
- Keep
|
|
82
|
-
- Use
|
|
140
|
+
- Use **hierarchical headings** (\`#\`, \`##\`, \`###\`)
|
|
141
|
+
- Employ **bullet lists** for arrays
|
|
142
|
+
- Keep structure **clean and semantic**
|
|
143
|
+
- Use Markdown only (no HTML)
|
|
83
144
|
|
|
84
145
|
---
|
|
85
146
|
|
|
86
147
|
## \u{1F527} Technical Principles (Key-Agnostic Design)
|
|
87
148
|
|
|
88
149
|
### 1. Dynamic Property Inference
|
|
89
|
-
**Do not assume fixed property names.** Infer content type
|
|
150
|
+
**Do not assume fixed property names.** Infer content type from:
|
|
90
151
|
- **Value structure:** Object, array, string, number
|
|
91
152
|
- **Value length:** Short strings = titles; long text = descriptions
|
|
92
|
-
- **Position in JSON:** Root-level = high importance
|
|
93
|
-
- **Semantic patterns:** URLs, images, dates
|
|
153
|
+
- **Position in JSON:** Root-level = high importance
|
|
154
|
+
- **Semantic patterns:** URLs, images, dates
|
|
94
155
|
|
|
95
156
|
### 2. Exact Property Bindings
|
|
96
|
-
- Always use
|
|
157
|
+
- Always use **exact property name** from JSON: \`{{actualKeyName}}\`
|
|
97
158
|
- Do NOT rename or modify binding identifiers
|
|
98
|
-
-
|
|
159
|
+
- Mustache bindings must match JSON precisely
|
|
99
160
|
|
|
100
161
|
### 3. Humanized Section Headings
|
|
101
162
|
While bindings stay exact, convert keys to readable headings:
|
|
102
163
|
- \`productFeatures\` \u2192 "Product Features"
|
|
103
|
-
- \`
|
|
104
|
-
- \`
|
|
164
|
+
- \`supportPageItems\` \u2192 "Available Support Topics"
|
|
165
|
+
- \`breadcrumbsLinks\` \u2192 "Navigation Path"
|
|
105
166
|
|
|
106
167
|
### 4. Semantic Interpretation Guide
|
|
107
168
|
- **Short root strings (5-50 chars)** \u2192 Likely page title
|
|
108
169
|
- **Medium text (50-300 chars)** \u2192 Likely summary/tagline
|
|
109
170
|
- **Long text (300+ chars)** \u2192 Likely detailed description
|
|
171
|
+
- **Arrays of objects** \u2192 Repeated sections with structure
|
|
110
172
|
- **Arrays of primitives** \u2192 Bullet lists
|
|
111
|
-
- **Arrays of objects** \u2192 Repeated sections or tables
|
|
112
|
-
- **Nested objects** \u2192 Sub-sections with logical hierarchy
|
|
113
173
|
- **URL-like strings** \u2192 Render as \`[Label]({{url}})\`
|
|
114
|
-
- **Image URLs** \u2192 Render as \`\`
|
|
115
174
|
|
|
116
175
|
### 5. Noise Filtering
|
|
117
|
-
**Exclude
|
|
118
|
-
- IDs
|
|
119
|
-
- Timestamps
|
|
120
|
-
-
|
|
121
|
-
- System
|
|
176
|
+
**Exclude technical metadata:**
|
|
177
|
+
- IDs: \`id\`, \`nodeId\`, \`_id\`, \`guid\`
|
|
178
|
+
- Timestamps: \`createdAt\`, \`updatedAt\`
|
|
179
|
+
- Flags: \`isPublished\`, \`sortOrder\`, \`hidden\`
|
|
180
|
+
- System: \`_type\`, \`contentType\`, \`template\`
|
|
122
181
|
|
|
123
182
|
### 6. Hierarchy & Nesting
|
|
124
183
|
- **Root level** \u2192 \`#\` (H1) \u2014 one per document
|
|
125
184
|
- **Primary sections** \u2192 \`##\` (H2)
|
|
126
185
|
- **Sub-sections** \u2192 \`###\` (H3)
|
|
127
|
-
- **Details** \u2192 \`####\` (H4) \u2014 avoid
|
|
128
|
-
- Heading depth corresponds to JSON nesting, but stay practical
|
|
186
|
+
- **Details** \u2192 \`####\` (H4) \u2014 avoid deeper
|
|
129
187
|
|
|
130
188
|
---
|
|
131
189
|
|
|
@@ -133,66 +191,73 @@ While bindings stay exact, convert keys to readable headings:
|
|
|
133
191
|
|
|
134
192
|
### Mandatory Opening
|
|
135
193
|
\`\`\`mustache
|
|
136
|
-
# {{
|
|
194
|
+
# {{primaryTitleProperty}}
|
|
137
195
|
|
|
138
|
-
{{#
|
|
139
|
-
> {{
|
|
140
|
-
{{/
|
|
196
|
+
{{#summaryProperty}}
|
|
197
|
+
> {{summaryProperty}}
|
|
198
|
+
{{/summaryProperty}}
|
|
141
199
|
\`\`\`
|
|
142
200
|
|
|
143
|
-
### Recommended Sections (adapt to JSON)
|
|
201
|
+
### Recommended Sections (adapt to actual JSON)
|
|
144
202
|
\`\`\`mustache
|
|
145
203
|
{{#mainDescription}}
|
|
204
|
+
## Overview
|
|
146
205
|
{{mainDescription}}
|
|
147
206
|
{{/mainDescription}}
|
|
148
207
|
|
|
149
|
-
{{#
|
|
150
|
-
##
|
|
151
|
-
{{#
|
|
152
|
-
|
|
153
|
-
{{
|
|
154
|
-
{{/
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
{{
|
|
161
|
-
{{/
|
|
162
|
-
{{/
|
|
163
|
-
|
|
164
|
-
{{#
|
|
165
|
-
## Technical
|
|
166
|
-
{{
|
|
167
|
-
- **
|
|
168
|
-
{{/
|
|
169
|
-
{{/technicalDetails.0}}
|
|
208
|
+
{{#itemsArray.0}}
|
|
209
|
+
## Available Items
|
|
210
|
+
{{#itemsArray}}
|
|
211
|
+
### {{itemTitle}}
|
|
212
|
+
{{itemDescription}}
|
|
213
|
+
{{/itemsArray}}
|
|
214
|
+
{{/itemsArray.0}}
|
|
215
|
+
|
|
216
|
+
{{#navigationLinks.0}}
|
|
217
|
+
## Navigation
|
|
218
|
+
{{#navigationLinks}}
|
|
219
|
+
- [{{title}}]({{link}})
|
|
220
|
+
{{/navigationLinks}}
|
|
221
|
+
{{/navigationLinks.0}}
|
|
222
|
+
|
|
223
|
+
{{#technicalData}}
|
|
224
|
+
## Technical Information
|
|
225
|
+
- **URL**: {{url}}
|
|
226
|
+
- **Type**: {{type}}
|
|
227
|
+
{{/technicalData}}
|
|
170
228
|
\`\`\`
|
|
171
229
|
|
|
172
|
-
**
|
|
230
|
+
**Important:** These are examples. Your template must match the ACTUAL JSON structure provided.
|
|
173
231
|
|
|
174
232
|
---
|
|
175
233
|
|
|
176
234
|
## \u2705 Output Requirements
|
|
177
235
|
|
|
178
|
-
1. **Output ONLY the Mustache template** \u2014 no explanations, no code fences, no preamble
|
|
236
|
+
1. **Output ONLY the Mustache template** \u2014 no explanations, no markdown code fences, no preamble
|
|
179
237
|
2. **Use exact JSON property names** in all bindings
|
|
180
238
|
3. **Generate clean Markdown** \u2014 no HTML, entities, or attributes
|
|
181
|
-
4. **
|
|
182
|
-
5. **
|
|
183
|
-
6. **
|
|
184
|
-
7. **
|
|
239
|
+
4. **Data-driven content** \u2014 no invented facts or descriptions
|
|
240
|
+
5. **Contextual headings allowed** \u2014 but content must be from JSON
|
|
241
|
+
6. **Be concise** \u2014 optimize for limited context windows
|
|
242
|
+
7. **Structure for questions** \u2014 LLMs should easily extract facts
|
|
185
243
|
|
|
186
244
|
---
|
|
187
245
|
|
|
188
246
|
## \u{1F680} Your Task
|
|
189
247
|
|
|
190
|
-
Analyze the provided JSON structure and **generate a Mustache template** that
|
|
248
|
+
Analyze the provided JSON structure and **generate a Mustache template** that:
|
|
191
249
|
|
|
192
|
-
**
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
250
|
+
1. **Uses ONLY data from JSON** (no invented content)
|
|
251
|
+
2. **Adds logical section headings** for context
|
|
252
|
+
3. **Structures data for question-answering**
|
|
253
|
+
4. **Prioritizes most important properties first**
|
|
254
|
+
5. **Remains universal** (works for any JSON shape)
|
|
255
|
+
|
|
256
|
+
**Remember:**
|
|
257
|
+
- Headings can be contextual: \u2705
|
|
258
|
+
- Content must be from JSON: \u2705\u2705\u2705
|
|
259
|
+
- No made-up descriptions: \u274C
|
|
260
|
+
- No assumed features: \u274C
|
|
196
261
|
|
|
197
262
|
Generate the template now.
|
|
198
263
|
`;
|
|
@@ -216,7 +281,7 @@ class AnthropicClient {
|
|
|
216
281
|
const response = await this.client.messages.create({
|
|
217
282
|
model: this.model,
|
|
218
283
|
max_tokens: 4e3,
|
|
219
|
-
temperature: 0.
|
|
284
|
+
temperature: 0.3,
|
|
220
285
|
messages: [{
|
|
221
286
|
role: "user",
|
|
222
287
|
content: prompt
|
|
@@ -694,42 +759,174 @@ function generatePageId(urlItem) {
|
|
|
694
759
|
const nodeID = urlItem.nodeID || "UnknownNode";
|
|
695
760
|
return `${templateAlias}_${nodeID}`;
|
|
696
761
|
}
|
|
762
|
+
function isImportantKey(key) {
|
|
763
|
+
const importantPatterns = [
|
|
764
|
+
"title",
|
|
765
|
+
"name",
|
|
766
|
+
"heading",
|
|
767
|
+
"description",
|
|
768
|
+
"summary",
|
|
769
|
+
"content",
|
|
770
|
+
"text",
|
|
771
|
+
"body",
|
|
772
|
+
"value",
|
|
773
|
+
"label",
|
|
774
|
+
"caption",
|
|
775
|
+
"alt",
|
|
776
|
+
"message",
|
|
777
|
+
"url",
|
|
778
|
+
"link",
|
|
779
|
+
"href"
|
|
780
|
+
];
|
|
781
|
+
const lowerKey = key.toLowerCase();
|
|
782
|
+
return importantPatterns.some((pattern) => lowerKey.includes(pattern));
|
|
783
|
+
}
|
|
784
|
+
function isMetadataKey(key) {
|
|
785
|
+
const metadataPatterns = [
|
|
786
|
+
"id",
|
|
787
|
+
"guid",
|
|
788
|
+
"key",
|
|
789
|
+
"_id",
|
|
790
|
+
"nodeid",
|
|
791
|
+
"created",
|
|
792
|
+
"updated",
|
|
793
|
+
"modified",
|
|
794
|
+
"timestamp",
|
|
795
|
+
"date",
|
|
796
|
+
"sort",
|
|
797
|
+
"order",
|
|
798
|
+
"index",
|
|
799
|
+
"position",
|
|
800
|
+
"published",
|
|
801
|
+
"hidden",
|
|
802
|
+
"visible",
|
|
803
|
+
"enabled",
|
|
804
|
+
"status",
|
|
805
|
+
"type",
|
|
806
|
+
"contenttype",
|
|
807
|
+
"template",
|
|
808
|
+
"alias",
|
|
809
|
+
"path",
|
|
810
|
+
"meta",
|
|
811
|
+
"metadata",
|
|
812
|
+
"seo",
|
|
813
|
+
"schema",
|
|
814
|
+
"properties"
|
|
815
|
+
];
|
|
816
|
+
const lowerKey = key.toLowerCase();
|
|
817
|
+
return metadataPatterns.some((pattern) => lowerKey.includes(pattern));
|
|
818
|
+
}
|
|
819
|
+
function recursiveTruncate(content, maxTokens, currentDepth = 0) {
|
|
820
|
+
if (currentDepth > 10) {
|
|
821
|
+
return { _truncated: "Max depth reached" };
|
|
822
|
+
}
|
|
823
|
+
if (maxTokens < 10) {
|
|
824
|
+
return void 0;
|
|
825
|
+
}
|
|
826
|
+
if (content === null || content === void 0) {
|
|
827
|
+
return content;
|
|
828
|
+
}
|
|
829
|
+
if (typeof content !== "object") {
|
|
830
|
+
if (typeof content === "string" && content.length > 2e3) {
|
|
831
|
+
return content.substring(0, 2e3) + "...";
|
|
832
|
+
}
|
|
833
|
+
return content;
|
|
834
|
+
}
|
|
835
|
+
if (Array.isArray(content)) {
|
|
836
|
+
if (content.length === 0)
|
|
837
|
+
return content;
|
|
838
|
+
const itemLimit = Math.max(3, Math.floor(15 / (currentDepth + 1)));
|
|
839
|
+
const tokensPerItem = Math.floor(maxTokens / Math.min(content.length, itemLimit));
|
|
840
|
+
const truncatedArray = content.slice(0, itemLimit).map((item) => recursiveTruncate(item, tokensPerItem, currentDepth + 1)).filter((item) => item !== void 0);
|
|
841
|
+
if (content.length > truncatedArray.length) {
|
|
842
|
+
truncatedArray.push({
|
|
843
|
+
_note: `... and ${content.length - truncatedArray.length} more items`
|
|
844
|
+
});
|
|
845
|
+
}
|
|
846
|
+
return truncatedArray;
|
|
847
|
+
}
|
|
848
|
+
const truncatedObj = {};
|
|
849
|
+
const entries = Object.entries(content);
|
|
850
|
+
const withoutMetadata = entries.filter(([key]) => !isMetadataKey(key));
|
|
851
|
+
if (withoutMetadata.length === 0) {
|
|
852
|
+
return { _note: "Only metadata, removed" };
|
|
853
|
+
}
|
|
854
|
+
const importantEntries = withoutMetadata.filter(([key]) => isImportantKey(key));
|
|
855
|
+
const normalEntries = withoutMetadata.filter(([key]) => !isImportantKey(key));
|
|
856
|
+
const importantBudget = Math.floor(maxTokens * 0.4);
|
|
857
|
+
const tokensPerImportant = importantEntries.length > 0 ? Math.floor(importantBudget / importantEntries.length) : 0;
|
|
858
|
+
for (const [key, value] of importantEntries) {
|
|
859
|
+
const processedValue = recursiveTruncate(value, tokensPerImportant, currentDepth + 1);
|
|
860
|
+
if (processedValue !== void 0) {
|
|
861
|
+
truncatedObj[key] = processedValue;
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
const usedTokens = estimateContentTokens(truncatedObj);
|
|
865
|
+
const remainingBudget = maxTokens - usedTokens;
|
|
866
|
+
if (remainingBudget > 100 && normalEntries.length > 0) {
|
|
867
|
+
const sortedNormal = normalEntries.sort(([_a, valueA], [_b, valueB]) => {
|
|
868
|
+
const sizeA = JSON.stringify(valueA).length;
|
|
869
|
+
const sizeB = JSON.stringify(valueB).length;
|
|
870
|
+
return sizeA - sizeB;
|
|
871
|
+
});
|
|
872
|
+
const tokensPerNormal = Math.floor(remainingBudget / sortedNormal.length);
|
|
873
|
+
for (const [key, value] of sortedNormal) {
|
|
874
|
+
const processedValue = recursiveTruncate(value, tokensPerNormal, currentDepth + 1);
|
|
875
|
+
if (processedValue !== void 0) {
|
|
876
|
+
truncatedObj[key] = processedValue;
|
|
877
|
+
const newSize = estimateContentTokens(truncatedObj);
|
|
878
|
+
if (newSize > maxTokens) {
|
|
879
|
+
delete truncatedObj[key];
|
|
880
|
+
break;
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
}
|
|
884
|
+
}
|
|
885
|
+
return Object.keys(truncatedObj).length > 0 ? truncatedObj : void 0;
|
|
886
|
+
}
|
|
887
|
+
function emergencyTruncate(content, maxTokens) {
|
|
888
|
+
const result = { ...content };
|
|
889
|
+
const keys = Object.keys(result).sort((a, b) => {
|
|
890
|
+
const aImportant = isImportantKey(a) ? 1 : 0;
|
|
891
|
+
const bImportant = isImportantKey(b) ? 1 : 0;
|
|
892
|
+
return aImportant - bImportant;
|
|
893
|
+
});
|
|
894
|
+
for (const key of keys) {
|
|
895
|
+
if (estimateContentTokens(result) <= maxTokens)
|
|
896
|
+
break;
|
|
897
|
+
delete result[key];
|
|
898
|
+
console.warn(` Emergency: removed "${key}"`);
|
|
899
|
+
}
|
|
900
|
+
return result;
|
|
901
|
+
}
|
|
697
902
|
function estimateContentTokens(content) {
|
|
698
903
|
try {
|
|
699
904
|
const jsonString = JSON.stringify(content);
|
|
700
|
-
return Math.ceil(jsonString.length /
|
|
905
|
+
return Math.ceil(jsonString.length / 3);
|
|
701
906
|
} catch {
|
|
702
907
|
return 0;
|
|
703
908
|
}
|
|
704
909
|
}
|
|
705
|
-
function truncateContentIfNeeded(content, maxTokens =
|
|
910
|
+
function truncateContentIfNeeded(content, maxTokens = 1e5) {
|
|
706
911
|
const estimatedTokens = estimateContentTokens(content);
|
|
707
912
|
if (estimatedTokens <= maxTokens) {
|
|
708
913
|
return content;
|
|
709
914
|
}
|
|
710
|
-
console.warn(
|
|
711
|
-
const truncatedContent =
|
|
712
|
-
const
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
} else if (typeof value === "string" && value.length > 5e3) {
|
|
726
|
-
truncatedContent[key] = value.substring(0, 5e3) + "...";
|
|
727
|
-
console.warn(`Truncated string ${key} from ${value.length} to 5000 chars`);
|
|
728
|
-
}
|
|
729
|
-
}
|
|
730
|
-
const finalTokens = estimateContentTokens(truncatedContent);
|
|
731
|
-
console.log(`Content truncated from ${estimatedTokens} to ${finalTokens} tokens`);
|
|
732
|
-
return truncatedContent;
|
|
915
|
+
console.warn(`\u26A0\uFE0F Content too large (${estimatedTokens} tokens > ${maxTokens} limit), truncating recursively...`);
|
|
916
|
+
const truncatedContent = recursiveTruncate(content, maxTokens, 0);
|
|
917
|
+
const result = truncatedContent && typeof truncatedContent === "object" && !Array.isArray(truncatedContent) ? truncatedContent : {
|
|
918
|
+
_error: "Content truncation failed",
|
|
919
|
+
original: content
|
|
920
|
+
};
|
|
921
|
+
const finalTokens = estimateContentTokens(result);
|
|
922
|
+
const preservedKeys = Object.keys(result).length;
|
|
923
|
+
const originalKeys = Object.keys(content).length;
|
|
924
|
+
console.log(`\u2705 Content truncated: ${estimatedTokens} \u2192 ${finalTokens} tokens (preserved ${preservedKeys}/${originalKeys} root keys)`);
|
|
925
|
+
if (finalTokens > maxTokens) {
|
|
926
|
+
console.error(`\u274C Recursive truncation insufficient (${finalTokens} > ${maxTokens}), performing emergency truncation...`);
|
|
927
|
+
return emergencyTruncate(result, maxTokens);
|
|
928
|
+
}
|
|
929
|
+
return result;
|
|
733
930
|
}
|
|
734
931
|
|
|
735
932
|
function shouldGenerateTemplate(umbracoData, urlItem) {
|
|
@@ -1067,7 +1264,7 @@ class TemplateGenerator {
|
|
|
1067
1264
|
const pageId = generatePageId(urlItem);
|
|
1068
1265
|
console.log(`Generating new template for ${pageId} (${urlItem.url})`);
|
|
1069
1266
|
const tokensBeforeTruncation = estimateContentTokens(pageContent);
|
|
1070
|
-
const truncatedContent = truncateContentIfNeeded(pageContent,
|
|
1267
|
+
const truncatedContent = truncateContentIfNeeded(pageContent, 65e3);
|
|
1071
1268
|
const tokensAfterTruncation = estimateContentTokens(truncatedContent);
|
|
1072
1269
|
if (tokensBeforeTruncation > tokensAfterTruncation) {
|
|
1073
1270
|
console.warn(`Page ${pageId} content truncated: ${tokensBeforeTruncation} -> ${tokensAfterTruncation} tokens`);
|
|
@@ -1392,17 +1589,18 @@ class LLMSFilesGenerator {
|
|
|
1392
1589
|
return `Information about ${urlItem.url}`;
|
|
1393
1590
|
}
|
|
1394
1591
|
sanitizeUrlForFilename(url) {
|
|
1395
|
-
if (url === "/")
|
|
1592
|
+
if (!url || url === "/")
|
|
1396
1593
|
return "index";
|
|
1397
|
-
}
|
|
1398
1594
|
let filename = url.replace(/^\//, "").replace(/\/$/, "").replace(/\//g, "-").replace(/--+/g, "-").replace(/^-+|-+$/g, "");
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1595
|
+
filename = slugify(filename, {
|
|
1596
|
+
lowercase: true,
|
|
1597
|
+
separator: "-"
|
|
1598
|
+
});
|
|
1599
|
+
if (!filename)
|
|
1600
|
+
filename = `index-${Date.now()}`;
|
|
1601
|
+
if (/^[.-]/.test(filename))
|
|
1602
|
+
filename = `page-${filename.replace(/^[.-]+/, "")}`;
|
|
1603
|
+
return filename;
|
|
1406
1604
|
}
|
|
1407
1605
|
getLLMSFilePath(fullPath) {
|
|
1408
1606
|
const filename = basename(fullPath);
|
package/dist/module.json
CHANGED