@voicenter-team/nuxt-llms-generator 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +617 -0
- package/dist/chunks/llms-files-generator.mjs +1513 -0
- package/dist/module.cjs +5 -0
- package/dist/module.d.mts +138 -0
- package/dist/module.d.ts +138 -0
- package/dist/module.json +8 -0
- package/dist/module.mjs +5 -0
- package/dist/shared/nuxt-llms-generator.dc009f50.mjs +327 -0
- package/dist/types.d.mts +8 -0
- package/dist/types.d.ts +8 -0
- package/package.json +65 -0
|
@@ -0,0 +1,1513 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
|
2
|
+
import { join, dirname } from 'path';
|
|
3
|
+
import Mustache from 'mustache';
|
|
4
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
5
|
+
import { createHash } from 'crypto';
|
|
6
|
+
import { JSONPath } from 'jsonpath-plus';
|
|
7
|
+
import { T as TemplateError, E as ErrorCode, w as withErrorHandling } from '../shared/nuxt-llms-generator.dc009f50.mjs';
|
|
8
|
+
import '@nuxt/kit';
|
|
9
|
+
import 'zod';
|
|
10
|
+
|
|
11
|
+
class AnthropicClient {
|
|
12
|
+
client;
|
|
13
|
+
model;
|
|
14
|
+
maxRetries = 3;
|
|
15
|
+
retryDelayMs = 1e3;
|
|
16
|
+
constructor(config) {
|
|
17
|
+
this.client = new Anthropic({
|
|
18
|
+
apiKey: config.anthropicApiKey
|
|
19
|
+
});
|
|
20
|
+
this.model = config.anthropicModel || "claude-3-5-sonnet-20241022";
|
|
21
|
+
}
|
|
22
|
+
async generateTemplate(request) {
|
|
23
|
+
const prompt = this.buildPrompt(request);
|
|
24
|
+
for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
|
|
25
|
+
try {
|
|
26
|
+
const response = await this.client.messages.create({
|
|
27
|
+
model: this.model,
|
|
28
|
+
max_tokens: 4e3,
|
|
29
|
+
temperature: 0.1,
|
|
30
|
+
messages: [{
|
|
31
|
+
role: "user",
|
|
32
|
+
content: prompt
|
|
33
|
+
}]
|
|
34
|
+
});
|
|
35
|
+
const content = response.content[0];
|
|
36
|
+
if (content.type !== "text") {
|
|
37
|
+
throw new Error("Unexpected response type from Anthropic API");
|
|
38
|
+
}
|
|
39
|
+
return this.parseResponse(content.text);
|
|
40
|
+
} catch (error) {
|
|
41
|
+
if (attempt === this.maxRetries) {
|
|
42
|
+
throw new Error(`Anthropic API failed after ${this.maxRetries} attempts: ${error}`);
|
|
43
|
+
}
|
|
44
|
+
console.warn(`Anthropic API attempt ${attempt} failed, retrying...`, error);
|
|
45
|
+
await this.delay(this.retryDelayMs * attempt);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
throw new Error("Failed to generate template");
|
|
49
|
+
}
|
|
50
|
+
buildPrompt(request) {
|
|
51
|
+
return `You are an expert creating Mustache.js templates for **Voicenter - Leading Cloud Communications Platform**, converting Umbraco CMS content into LLMS.txt-optimized markdown.
|
|
52
|
+
|
|
53
|
+
**BUSINESS CONTEXT**: Voicenter provides enterprise cloud telephony solutions including Contact Centers, Business Phone Services, Mobile Communications, API integrations, and AI-powered voice tools for 50,000+ users globally.
|
|
54
|
+
|
|
55
|
+
**CURRENT PAGE ANALYSIS:**
|
|
56
|
+
- URL: ${request.url}
|
|
57
|
+
- Template: ${request.templateAlias}
|
|
58
|
+
- JSONPath: ${request.jpath}
|
|
59
|
+
|
|
60
|
+
**AVAILABLE DATA PROPERTIES:**
|
|
61
|
+
\`\`\`json
|
|
62
|
+
${JSON.stringify(request.pageContent, null, 2)}
|
|
63
|
+
\`\`\`
|
|
64
|
+
|
|
65
|
+
**CRITICAL REQUIREMENTS (2024 LLMS.txt Standard):**
|
|
66
|
+
|
|
67
|
+
1. **USE EXACT PROPERTY NAMES**:
|
|
68
|
+
\u274C Wrong: {{pageTitle}}
|
|
69
|
+
\u2705 Correct: {{pageTittle}} or {{pageDescription}} (match actual JSON keys)
|
|
70
|
+
|
|
71
|
+
2. **BUSINESS-FOCUSED CONTENT HIERARCHY**:
|
|
72
|
+
- H1: Clear service/feature name
|
|
73
|
+
- Blockquote: Value proposition
|
|
74
|
+
- H2: Key capabilities/benefits
|
|
75
|
+
- H3: Technical details/specs
|
|
76
|
+
- Lists: Features, integrations, use cases
|
|
77
|
+
|
|
78
|
+
3. **VOICENTER-SPECIFIC CONTENT MAPPING**:
|
|
79
|
+
- Service descriptions \u2192 Clear business benefits
|
|
80
|
+
- Technical features \u2192 User-friendly explanations
|
|
81
|
+
- Integration lists \u2192 Specific partner names
|
|
82
|
+
- API documentation \u2192 Implementation clarity
|
|
83
|
+
- Contact info \u2192 Business value context
|
|
84
|
+
|
|
85
|
+
4. **SMART CONTENT SELECTION**:
|
|
86
|
+
- Prioritize business value over technical jargon
|
|
87
|
+
- Include specific numbers/metrics when available
|
|
88
|
+
- Map complex nested arrays to structured lists
|
|
89
|
+
- Extract key differentiators and benefits
|
|
90
|
+
|
|
91
|
+
**TEMPLATE PATTERNS FOR VOICENTER CONTENT:**
|
|
92
|
+
|
|
93
|
+
**Service Pages Pattern:**
|
|
94
|
+
\`\`\`mustache
|
|
95
|
+
# {{serviceName}}
|
|
96
|
+
|
|
97
|
+
{{#serviceSubtitle}}
|
|
98
|
+
> {{serviceSubtitle}}
|
|
99
|
+
{{/serviceSubtitle}}
|
|
100
|
+
|
|
101
|
+
{{#serviceDescription}}
|
|
102
|
+
## Overview
|
|
103
|
+
{{serviceDescription}}
|
|
104
|
+
{{/serviceDescription}}
|
|
105
|
+
|
|
106
|
+
{{#serviceTools.0}}
|
|
107
|
+
## Key Features
|
|
108
|
+
{{#serviceTools}}
|
|
109
|
+
- {{textItem}}
|
|
110
|
+
{{/serviceTools}}
|
|
111
|
+
{{/serviceTools.0}}
|
|
112
|
+
|
|
113
|
+
{{#serviceLink}}
|
|
114
|
+
## Learn More
|
|
115
|
+
[Explore {{serviceName}} \u2192]({{serviceLink}})
|
|
116
|
+
{{/serviceLink}}
|
|
117
|
+
\`\`\`
|
|
118
|
+
|
|
119
|
+
**Feature/API Pages Pattern:**
|
|
120
|
+
\`\`\`mustache
|
|
121
|
+
# {{cardTitle}}
|
|
122
|
+
|
|
123
|
+
{{#cardText}}
|
|
124
|
+
> {{cardText}}
|
|
125
|
+
{{/cardText}}
|
|
126
|
+
|
|
127
|
+
{{#featureDescription}}
|
|
128
|
+
## How It Works
|
|
129
|
+
{{featureDescription}}
|
|
130
|
+
{{/featureDescription}}
|
|
131
|
+
|
|
132
|
+
{{#capabilities.0}}
|
|
133
|
+
## Capabilities
|
|
134
|
+
{{#capabilities}}
|
|
135
|
+
### {{name}}
|
|
136
|
+
{{description}}
|
|
137
|
+
|
|
138
|
+
{{/capabilities}}
|
|
139
|
+
{{/capabilities.0}}
|
|
140
|
+
|
|
141
|
+
## Business Benefits
|
|
142
|
+
- Reduces operational costs
|
|
143
|
+
- Improves customer experience
|
|
144
|
+
- Seamless integration with existing systems
|
|
145
|
+
\`\`\`
|
|
146
|
+
|
|
147
|
+
**CONTENT EXTRACTION RULES:**
|
|
148
|
+
1. **Identify primary content** from JSON structure
|
|
149
|
+
2. **Map nested arrays** to organized sections
|
|
150
|
+
3. **Extract business value** from technical descriptions
|
|
151
|
+
4. **Include contact/action items** for lead generation
|
|
152
|
+
5. **Maintain SEO-friendly** heading structure
|
|
153
|
+
|
|
154
|
+
**OUTPUT REQUIREMENTS:**
|
|
155
|
+
- Return ONLY the Mustache template
|
|
156
|
+
- NO explanations or code blocks
|
|
157
|
+
- Use actual property names from the provided JSON
|
|
158
|
+
- Focus on business value for AI consumption
|
|
159
|
+
- Follow LLMS.txt hierarchical structure
|
|
160
|
+
- **CLEAN MARKDOWN ONLY**: No HTML tags, entities, or attributes
|
|
161
|
+
- **NO HTML**: Use pure Markdown syntax (##, **, -, etc.)
|
|
162
|
+
- **NO ENTITIES**: Use actual characters, not & or /
|
|
163
|
+
- **NO ATTRIBUTES**: No dir="RTL", style="", class="" etc.
|
|
164
|
+
|
|
165
|
+
Generate the optimized Mustache template:`;
|
|
166
|
+
}
|
|
167
|
+
parseResponse(responseText) {
|
|
168
|
+
const codeBlockRegex = /```(?:mustache)?\n?([\s\S]*?)```/;
|
|
169
|
+
const match = responseText.match(codeBlockRegex);
|
|
170
|
+
let template;
|
|
171
|
+
if (match) {
|
|
172
|
+
template = match[1].trim();
|
|
173
|
+
} else {
|
|
174
|
+
template = responseText.trim();
|
|
175
|
+
}
|
|
176
|
+
const metadata = this.extractMetadata(responseText);
|
|
177
|
+
return {
|
|
178
|
+
template,
|
|
179
|
+
metadata
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
extractMetadata(responseText) {
|
|
183
|
+
const titleMatch = responseText.match(/^#\s+(.+)$/m);
|
|
184
|
+
const descriptionMatch = responseText.match(/>\s*(.+)$/m);
|
|
185
|
+
return {
|
|
186
|
+
title: titleMatch ? titleMatch[1] : void 0,
|
|
187
|
+
description: descriptionMatch ? descriptionMatch[1] : void 0,
|
|
188
|
+
tags: []
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
delay(ms) {
|
|
192
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
193
|
+
}
|
|
194
|
+
async testConnection() {
|
|
195
|
+
try {
|
|
196
|
+
const response = await this.client.messages.create({
|
|
197
|
+
model: this.model,
|
|
198
|
+
max_tokens: 10,
|
|
199
|
+
messages: [{
|
|
200
|
+
role: "user",
|
|
201
|
+
content: "Hello"
|
|
202
|
+
}]
|
|
203
|
+
});
|
|
204
|
+
return response.content.length > 0;
|
|
205
|
+
} catch (error) {
|
|
206
|
+
console.error("Anthropic API connection test failed:", error);
|
|
207
|
+
return false;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
class PromptAnalyzer {
|
|
213
|
+
analyzeContent(pageContent, urlItem) {
|
|
214
|
+
const analysis = {
|
|
215
|
+
contentType: this.determineContentType(pageContent, urlItem),
|
|
216
|
+
hasImages: this.detectImages(pageContent),
|
|
217
|
+
hasHtml: this.detectHtml(pageContent),
|
|
218
|
+
hasLists: this.detectLists(pageContent),
|
|
219
|
+
hasNestedContent: this.detectNestedContent(pageContent),
|
|
220
|
+
keyProperties: this.identifyKeyProperties(pageContent),
|
|
221
|
+
suggestedHeaders: this.suggestHeaders(pageContent, urlItem),
|
|
222
|
+
complexityScore: 0
|
|
223
|
+
};
|
|
224
|
+
analysis.complexityScore = this.calculateComplexity(analysis, pageContent);
|
|
225
|
+
return analysis;
|
|
226
|
+
}
|
|
227
|
+
determineContentType(pageContent, urlItem) {
|
|
228
|
+
const { url, TemplateAlias } = urlItem;
|
|
229
|
+
const alias = (TemplateAlias || "unknown").toLowerCase();
|
|
230
|
+
if (url === "/" || alias.includes("home") || alias.includes("index")) {
|
|
231
|
+
return "homepage";
|
|
232
|
+
}
|
|
233
|
+
if (alias.includes("blog") || alias.includes("article") || alias.includes("news")) {
|
|
234
|
+
return "article";
|
|
235
|
+
}
|
|
236
|
+
if (alias.includes("list") || alias.includes("collection") || alias.includes("category") || alias.includes("archive")) {
|
|
237
|
+
return "listing";
|
|
238
|
+
}
|
|
239
|
+
if (alias.includes("detail") || alias.includes("product") || alias.includes("service") || alias.includes("camp")) {
|
|
240
|
+
return "detail";
|
|
241
|
+
}
|
|
242
|
+
if (alias.includes("form") || alias.includes("contact") || alias.includes("register") || this.hasFormFields(pageContent)) {
|
|
243
|
+
return "form";
|
|
244
|
+
}
|
|
245
|
+
if (alias.includes("about") || alias.includes("privacy") || alias.includes("terms") || alias.includes("static")) {
|
|
246
|
+
return "static";
|
|
247
|
+
}
|
|
248
|
+
return "unknown";
|
|
249
|
+
}
|
|
250
|
+
detectImages(content) {
|
|
251
|
+
const imageKeys = ["image", "img", "photo", "picture", "banner", "logo", "icon"];
|
|
252
|
+
return this.hasKeysContaining(content, imageKeys);
|
|
253
|
+
}
|
|
254
|
+
detectHtml(content) {
|
|
255
|
+
const htmlRegex = /<[^>]+>/;
|
|
256
|
+
return this.hasValuesMatching(content, htmlRegex);
|
|
257
|
+
}
|
|
258
|
+
detectLists(content) {
|
|
259
|
+
const listKeys = ["list", "items", "array", "collection", "features", "services"];
|
|
260
|
+
return this.hasKeysContaining(content, listKeys) || this.hasArrayValues(content);
|
|
261
|
+
}
|
|
262
|
+
detectNestedContent(content) {
|
|
263
|
+
return Object.values(content).some(
|
|
264
|
+
(value) => typeof value === "object" && value !== null && !Array.isArray(value) && Object.keys(value).length >= 2
|
|
265
|
+
);
|
|
266
|
+
}
|
|
267
|
+
identifyKeyProperties(content) {
|
|
268
|
+
const priorityKeys = [
|
|
269
|
+
"pageTitle",
|
|
270
|
+
"title",
|
|
271
|
+
"name",
|
|
272
|
+
"heading",
|
|
273
|
+
"pageDescription",
|
|
274
|
+
"description",
|
|
275
|
+
"summary",
|
|
276
|
+
"intro",
|
|
277
|
+
"content",
|
|
278
|
+
"body",
|
|
279
|
+
"text",
|
|
280
|
+
"mainContent",
|
|
281
|
+
"url",
|
|
282
|
+
"link",
|
|
283
|
+
"action"
|
|
284
|
+
];
|
|
285
|
+
const foundKeys = [];
|
|
286
|
+
const allKeys = this.getAllKeys(content);
|
|
287
|
+
for (const key of allKeys) {
|
|
288
|
+
const lowerKey = key.toLowerCase();
|
|
289
|
+
if (priorityKeys.some((priority) => lowerKey.includes(priority))) {
|
|
290
|
+
foundKeys.push(key);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
return foundKeys.length > 0 ? foundKeys : allKeys.slice(0, 8);
|
|
294
|
+
}
|
|
295
|
+
suggestHeaders(content, urlItem) {
|
|
296
|
+
const headers = [];
|
|
297
|
+
headers.push("# {{pageTitle}}");
|
|
298
|
+
if (this.hasKeysContaining(content, ["description", "summary", "intro"])) {
|
|
299
|
+
headers.push("## Overview");
|
|
300
|
+
}
|
|
301
|
+
if (this.hasKeysContaining(content, ["feature", "service", "benefit"])) {
|
|
302
|
+
headers.push("## Features");
|
|
303
|
+
}
|
|
304
|
+
if (this.hasKeysContaining(content, ["detail", "information", "about"])) {
|
|
305
|
+
headers.push("## Details");
|
|
306
|
+
}
|
|
307
|
+
if (this.hasKeysContaining(content, ["contact", "form", "action"])) {
|
|
308
|
+
headers.push("## Contact Information");
|
|
309
|
+
}
|
|
310
|
+
return headers;
|
|
311
|
+
}
|
|
312
|
+
calculateComplexity(analysis, content) {
|
|
313
|
+
let score = 0;
|
|
314
|
+
score += Object.keys(content).length;
|
|
315
|
+
score += analysis.hasImages ? 10 : 0;
|
|
316
|
+
score += analysis.hasHtml ? 15 : 0;
|
|
317
|
+
score += analysis.hasLists ? 10 : 0;
|
|
318
|
+
score += analysis.hasNestedContent ? 20 : 0;
|
|
319
|
+
if (analysis.contentType === "homepage")
|
|
320
|
+
score += 25;
|
|
321
|
+
if (analysis.contentType === "listing")
|
|
322
|
+
score += 15;
|
|
323
|
+
return Math.min(score, 100);
|
|
324
|
+
}
|
|
325
|
+
hasFormFields(content) {
|
|
326
|
+
const formKeys = ["input", "button", "form", "field", "submit", "action"];
|
|
327
|
+
return this.hasKeysContaining(content, formKeys);
|
|
328
|
+
}
|
|
329
|
+
hasKeysContaining(obj, searchKeys) {
|
|
330
|
+
const allKeys = this.getAllKeys(obj).map((key) => key.toLowerCase());
|
|
331
|
+
return searchKeys.some(
|
|
332
|
+
(searchKey) => allKeys.some((key) => key.includes(searchKey))
|
|
333
|
+
);
|
|
334
|
+
}
|
|
335
|
+
hasValuesMatching(obj, regex) {
|
|
336
|
+
const allValues = this.getAllValues(obj);
|
|
337
|
+
return allValues.some(
|
|
338
|
+
(value) => typeof value === "string" && regex.test(value)
|
|
339
|
+
);
|
|
340
|
+
}
|
|
341
|
+
hasArrayValues(obj) {
|
|
342
|
+
return Object.values(obj).some((value) => Array.isArray(value));
|
|
343
|
+
}
|
|
344
|
+
getAllKeys(obj, prefix = "") {
|
|
345
|
+
let keys = [];
|
|
346
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
347
|
+
const fullKey = prefix ? `${prefix}.${key}` : key;
|
|
348
|
+
keys.push(fullKey);
|
|
349
|
+
if (typeof value === "object" && value !== null && !Array.isArray(value)) {
|
|
350
|
+
keys = keys.concat(this.getAllKeys(value, fullKey));
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
return keys;
|
|
354
|
+
}
|
|
355
|
+
getAllValues(obj) {
|
|
356
|
+
let values = [];
|
|
357
|
+
for (const value of Object.values(obj)) {
|
|
358
|
+
values.push(value);
|
|
359
|
+
if (typeof value === "object" && value !== null && !Array.isArray(value)) {
|
|
360
|
+
values = values.concat(this.getAllValues(value));
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
return values;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
class LLMSCache {
|
|
368
|
+
cacheFilePath;
|
|
369
|
+
cache = {};
|
|
370
|
+
constructor(cacheDir) {
|
|
371
|
+
if (!existsSync(cacheDir)) {
|
|
372
|
+
mkdirSync(cacheDir, { recursive: true });
|
|
373
|
+
}
|
|
374
|
+
this.cacheFilePath = join(cacheDir, "templates.json");
|
|
375
|
+
this.loadCache();
|
|
376
|
+
}
|
|
377
|
+
loadCache() {
|
|
378
|
+
try {
|
|
379
|
+
if (existsSync(this.cacheFilePath)) {
|
|
380
|
+
const cacheContent = readFileSync(this.cacheFilePath, "utf-8");
|
|
381
|
+
this.cache = JSON.parse(cacheContent);
|
|
382
|
+
}
|
|
383
|
+
} catch (error) {
|
|
384
|
+
console.warn("Failed to load template cache:", error);
|
|
385
|
+
this.cache = {};
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
saveCache() {
|
|
389
|
+
try {
|
|
390
|
+
const cacheDir = dirname(this.cacheFilePath);
|
|
391
|
+
if (!existsSync(cacheDir)) {
|
|
392
|
+
mkdirSync(cacheDir, { recursive: true });
|
|
393
|
+
}
|
|
394
|
+
writeFileSync(this.cacheFilePath, JSON.stringify(this.cache, null, 2));
|
|
395
|
+
} catch (error) {
|
|
396
|
+
console.error("Failed to save template cache:", error);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
hasTemplate(pageId) {
|
|
400
|
+
return pageId in this.cache;
|
|
401
|
+
}
|
|
402
|
+
getTemplate(pageId) {
|
|
403
|
+
const cached = this.cache[pageId];
|
|
404
|
+
if (!cached)
|
|
405
|
+
return null;
|
|
406
|
+
return {
|
|
407
|
+
pageId,
|
|
408
|
+
templatePath: "",
|
|
409
|
+
template: cached.template,
|
|
410
|
+
hash: cached.hash,
|
|
411
|
+
metadata: {
|
|
412
|
+
url: cached.metadata.pageId,
|
|
413
|
+
// Contains the URL from setTemplate
|
|
414
|
+
templateAlias: cached.metadata.templateAlias,
|
|
415
|
+
jpath: cached.metadata.jpath,
|
|
416
|
+
generatedAt: new Date(cached.metadata.lastUpdated)
|
|
417
|
+
}
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
getTemplateHash(pageId) {
|
|
421
|
+
return this.cache[pageId]?.hash || null;
|
|
422
|
+
}
|
|
423
|
+
setTemplate(pageId, template, hash, metadata) {
|
|
424
|
+
this.cache[pageId] = {
|
|
425
|
+
hash,
|
|
426
|
+
template,
|
|
427
|
+
metadata: {
|
|
428
|
+
...metadata,
|
|
429
|
+
lastUpdated: /* @__PURE__ */ new Date()
|
|
430
|
+
}
|
|
431
|
+
};
|
|
432
|
+
this.saveCache();
|
|
433
|
+
}
|
|
434
|
+
removeTemplate(pageId) {
|
|
435
|
+
delete this.cache[pageId];
|
|
436
|
+
this.saveCache();
|
|
437
|
+
}
|
|
438
|
+
clearCache() {
|
|
439
|
+
this.cache = {};
|
|
440
|
+
this.saveCache();
|
|
441
|
+
}
|
|
442
|
+
getCacheStats() {
|
|
443
|
+
const templates = Object.values(this.cache);
|
|
444
|
+
const templatesByAlias = {};
|
|
445
|
+
let oldestTemplate = null;
|
|
446
|
+
let newestTemplate = null;
|
|
447
|
+
for (const template of templates) {
|
|
448
|
+
const alias = template.metadata.templateAlias;
|
|
449
|
+
templatesByAlias[alias] = (templatesByAlias[alias] || 0) + 1;
|
|
450
|
+
const date = new Date(template.metadata.lastUpdated);
|
|
451
|
+
if (!oldestTemplate || date < oldestTemplate) {
|
|
452
|
+
oldestTemplate = date;
|
|
453
|
+
}
|
|
454
|
+
if (!newestTemplate || date > newestTemplate) {
|
|
455
|
+
newestTemplate = date;
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
return {
|
|
459
|
+
totalTemplates: templates.length,
|
|
460
|
+
templatesByAlias,
|
|
461
|
+
oldestTemplate,
|
|
462
|
+
newestTemplate
|
|
463
|
+
};
|
|
464
|
+
}
|
|
465
|
+
cleanupOldTemplates(maxAgeMs) {
|
|
466
|
+
const cutoffDate = new Date(Date.now() - maxAgeMs);
|
|
467
|
+
let removedCount = 0;
|
|
468
|
+
for (const [pageId, template] of Object.entries(this.cache)) {
|
|
469
|
+
const templateDate = new Date(template.metadata.lastUpdated);
|
|
470
|
+
if (templateDate < cutoffDate) {
|
|
471
|
+
delete this.cache[pageId];
|
|
472
|
+
removedCount++;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
if (removedCount > 0) {
|
|
476
|
+
this.saveCache();
|
|
477
|
+
}
|
|
478
|
+
return removedCount;
|
|
479
|
+
}
|
|
480
|
+
getAllTemplates() {
|
|
481
|
+
return { ...this.cache };
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
function generatePageStructureHash(pageData, options = {}) {
|
|
486
|
+
const {
|
|
487
|
+
excludeChildren = true,
|
|
488
|
+
excludeKeys = [],
|
|
489
|
+
includeOnlyKeys = []
|
|
490
|
+
} = options;
|
|
491
|
+
const processedData = processPageDataForHash(pageData, {
|
|
492
|
+
excludeChildren,
|
|
493
|
+
excludeKeys,
|
|
494
|
+
includeOnlyKeys
|
|
495
|
+
});
|
|
496
|
+
const sortedKeys = Object.keys(processedData).sort();
|
|
497
|
+
const hashInput = sortedKeys.join("|");
|
|
498
|
+
return createHash("sha256").update(hashInput).digest("hex");
|
|
499
|
+
}
|
|
500
|
+
function processPageDataForHash(data, options) {
|
|
501
|
+
const { excludeChildren, excludeKeys = [], includeOnlyKeys = [] } = options;
|
|
502
|
+
const processed = {};
|
|
503
|
+
for (const [key, value] of Object.entries(data)) {
|
|
504
|
+
if (excludeChildren && key === "children") {
|
|
505
|
+
continue;
|
|
506
|
+
}
|
|
507
|
+
if (excludeKeys.includes(key)) {
|
|
508
|
+
continue;
|
|
509
|
+
}
|
|
510
|
+
if (includeOnlyKeys.length > 0 && !includeOnlyKeys.includes(key)) {
|
|
511
|
+
continue;
|
|
512
|
+
}
|
|
513
|
+
if (typeof value === "object" && value !== null && !Array.isArray(value)) {
|
|
514
|
+
processed[key] = processPageDataForHash(value, options);
|
|
515
|
+
} else {
|
|
516
|
+
processed[key] = getValueType(value);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
return processed;
|
|
520
|
+
}
|
|
521
|
+
function getValueType(value) {
|
|
522
|
+
if (value === null)
|
|
523
|
+
return "null";
|
|
524
|
+
if (Array.isArray(value))
|
|
525
|
+
return `array[${value.length}]`;
|
|
526
|
+
if (typeof value === "object")
|
|
527
|
+
return "object";
|
|
528
|
+
return typeof value;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
function extractPageContent(umbracoData, jpath) {
|
|
532
|
+
try {
|
|
533
|
+
const result = JSONPath({
|
|
534
|
+
path: jpath,
|
|
535
|
+
json: umbracoData.SiteData,
|
|
536
|
+
wrap: false
|
|
537
|
+
});
|
|
538
|
+
if (!result || Array.isArray(result) && result.length === 0) {
|
|
539
|
+
return null;
|
|
540
|
+
}
|
|
541
|
+
const pageContent = Array.isArray(result) ? result[0] : result;
|
|
542
|
+
return excludeChildrenFromContent(pageContent);
|
|
543
|
+
} catch (error) {
|
|
544
|
+
console.error(`Failed to extract content for path ${jpath}:`, error);
|
|
545
|
+
return null;
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
function excludeChildrenFromContent(content) {
|
|
549
|
+
if (!content || typeof content !== "object") {
|
|
550
|
+
return content;
|
|
551
|
+
}
|
|
552
|
+
const cleanContent = { ...content };
|
|
553
|
+
if ("children" in cleanContent) {
|
|
554
|
+
delete cleanContent.children;
|
|
555
|
+
}
|
|
556
|
+
return cleanContent;
|
|
557
|
+
}
|
|
558
|
+
function generatePageId(urlItem) {
|
|
559
|
+
return `${urlItem.TemplateAlias}_${urlItem.nodeID}`;
|
|
560
|
+
}
|
|
561
|
+
function estimateContentTokens(content) {
|
|
562
|
+
try {
|
|
563
|
+
const jsonString = JSON.stringify(content);
|
|
564
|
+
return Math.ceil(jsonString.length / 4);
|
|
565
|
+
} catch {
|
|
566
|
+
return 0;
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
function truncateContentIfNeeded(content, maxTokens = 18e4) {
|
|
570
|
+
const estimatedTokens = estimateContentTokens(content);
|
|
571
|
+
if (estimatedTokens <= maxTokens) {
|
|
572
|
+
return content;
|
|
573
|
+
}
|
|
574
|
+
console.warn(`Content too large (${estimatedTokens} tokens > ${maxTokens} limit), truncating...`);
|
|
575
|
+
const truncatedContent = { ...content };
|
|
576
|
+
const sortedKeys = Object.keys(truncatedContent).sort((a, b) => {
|
|
577
|
+
const sizeA = estimateContentTokens({ [a]: truncatedContent[a] });
|
|
578
|
+
const sizeB = estimateContentTokens({ [b]: truncatedContent[b] });
|
|
579
|
+
return sizeB - sizeA;
|
|
580
|
+
});
|
|
581
|
+
for (const key of sortedKeys) {
|
|
582
|
+
if (estimateContentTokens(truncatedContent) <= maxTokens) {
|
|
583
|
+
break;
|
|
584
|
+
}
|
|
585
|
+
const value = truncatedContent[key];
|
|
586
|
+
if (Array.isArray(value) && value.length > 10) {
|
|
587
|
+
truncatedContent[key] = value.slice(0, 10);
|
|
588
|
+
console.warn(`Truncated array ${key} from ${value.length} to 10 items`);
|
|
589
|
+
} else if (typeof value === "string" && value.length > 5e3) {
|
|
590
|
+
truncatedContent[key] = value.substring(0, 5e3) + "...";
|
|
591
|
+
console.warn(`Truncated string ${key} from ${value.length} to 5000 chars`);
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
const finalTokens = estimateContentTokens(truncatedContent);
|
|
595
|
+
console.log(`Content truncated from ${estimatedTokens} to ${finalTokens} tokens`);
|
|
596
|
+
return truncatedContent;
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
function shouldGenerateTemplate(umbracoData, urlItem) {
|
|
600
|
+
try {
|
|
601
|
+
const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
|
|
602
|
+
if (!pageContent) {
|
|
603
|
+
console.warn(`No content found for ${urlItem.url}, skipping template generation`);
|
|
604
|
+
return false;
|
|
605
|
+
}
|
|
606
|
+
const hidePage = pageContent.hidePage;
|
|
607
|
+
if (hidePage === "1" || hidePage === 1) {
|
|
608
|
+
console.log(`Page ${urlItem.url} is hidden (hidePage: ${hidePage}), skipping template generation`);
|
|
609
|
+
return false;
|
|
610
|
+
}
|
|
611
|
+
return true;
|
|
612
|
+
} catch (error) {
|
|
613
|
+
console.error(`Error checking visibility for ${urlItem.url}:`, error);
|
|
614
|
+
return false;
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
function getVisiblePages(umbracoData) {
|
|
618
|
+
return umbracoData.urlList.filter(
|
|
619
|
+
(urlItem) => shouldGenerateTemplate(umbracoData, urlItem)
|
|
620
|
+
);
|
|
621
|
+
}
|
|
622
|
+
function getPageVisibilityStats(umbracoData) {
|
|
623
|
+
let visible = 0;
|
|
624
|
+
let hidden = 0;
|
|
625
|
+
let error = 0;
|
|
626
|
+
for (const urlItem of umbracoData.urlList) {
|
|
627
|
+
try {
|
|
628
|
+
const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
|
|
629
|
+
if (!pageContent) {
|
|
630
|
+
error++;
|
|
631
|
+
continue;
|
|
632
|
+
}
|
|
633
|
+
const hidePage = pageContent.hidePage;
|
|
634
|
+
if (hidePage === "1" || hidePage === 1) {
|
|
635
|
+
hidden++;
|
|
636
|
+
} else {
|
|
637
|
+
visible++;
|
|
638
|
+
}
|
|
639
|
+
} catch {
|
|
640
|
+
error++;
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
return {
|
|
644
|
+
total: umbracoData.urlList.length,
|
|
645
|
+
visible,
|
|
646
|
+
hidden,
|
|
647
|
+
error
|
|
648
|
+
};
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
class TemplateCleanup {
|
|
652
|
+
cache;
|
|
653
|
+
constructor(cacheDir) {
|
|
654
|
+
this.cache = new LLMSCache(cacheDir);
|
|
655
|
+
}
|
|
656
|
+
/**
|
|
657
|
+
* Find templates that no longer have corresponding pages
|
|
658
|
+
*/
|
|
659
|
+
findOrphanedTemplates(umbracoData) {
|
|
660
|
+
const orphanedTemplates = [];
|
|
661
|
+
const cacheStats = this.cache.getCacheStats();
|
|
662
|
+
console.log(`\u{1F50D} Checking ${cacheStats.totalTemplates} cached templates for orphans...`);
|
|
663
|
+
const currentPageIds = /* @__PURE__ */ new Set();
|
|
664
|
+
for (const urlItem of umbracoData.urlList) {
|
|
665
|
+
try {
|
|
666
|
+
const pageId = generatePageId(urlItem);
|
|
667
|
+
currentPageIds.add(pageId);
|
|
668
|
+
} catch (error) {
|
|
669
|
+
console.warn(`Error generating page ID for ${urlItem.url}:`, error);
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
const allCachedTemplates = this.cache.getAllTemplates();
|
|
673
|
+
for (const [pageId, templateData] of Object.entries(allCachedTemplates)) {
|
|
674
|
+
if (!currentPageIds.has(pageId)) {
|
|
675
|
+
orphanedTemplates.push({
|
|
676
|
+
pageId,
|
|
677
|
+
templateAlias: templateData.metadata.templateAlias,
|
|
678
|
+
lastUpdated: new Date(templateData.metadata.lastUpdated),
|
|
679
|
+
url: templateData.metadata.pageId,
|
|
680
|
+
// URL stored in pageId field from cache
|
|
681
|
+
reason: "page_deleted"
|
|
682
|
+
});
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
return orphanedTemplates;
|
|
686
|
+
}
|
|
687
|
+
/**
|
|
688
|
+
* Find templates for pages that are now hidden (hidePage: "1")
|
|
689
|
+
*/
|
|
690
|
+
findHiddenPageTemplates(umbracoData) {
|
|
691
|
+
const hiddenTemplates = [];
|
|
692
|
+
const allCachedTemplates = this.cache.getAllTemplates();
|
|
693
|
+
for (const urlItem of umbracoData.urlList) {
|
|
694
|
+
try {
|
|
695
|
+
const pageId = generatePageId(urlItem);
|
|
696
|
+
const templateData = allCachedTemplates[pageId];
|
|
697
|
+
if (templateData) {
|
|
698
|
+
const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
|
|
699
|
+
if (pageContent?.hidePage === "1" || pageContent?.hidePage === 1) {
|
|
700
|
+
hiddenTemplates.push({
|
|
701
|
+
pageId,
|
|
702
|
+
templateAlias: templateData.metadata.templateAlias,
|
|
703
|
+
lastUpdated: new Date(templateData.metadata.lastUpdated),
|
|
704
|
+
url: urlItem.url,
|
|
705
|
+
reason: "page_hidden"
|
|
706
|
+
});
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
} catch (error) {
|
|
710
|
+
console.warn(`Error checking hidden status for ${urlItem.url}:`, error);
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
return hiddenTemplates;
|
|
714
|
+
}
|
|
715
|
+
/**
|
|
716
|
+
* Perform cleanup of orphaned templates
|
|
717
|
+
*/
|
|
718
|
+
performCleanup(umbracoData, options = {}) {
|
|
719
|
+
const {
|
|
720
|
+
removeOrphaned = true,
|
|
721
|
+
removeHidden = true,
|
|
722
|
+
dryRun = false
|
|
723
|
+
} = options;
|
|
724
|
+
const initialStats = this.cache.getCacheStats();
|
|
725
|
+
const orphanedTemplates = [];
|
|
726
|
+
let templatesRemoved = 0;
|
|
727
|
+
if (removeOrphaned) {
|
|
728
|
+
const orphaned = this.findOrphanedTemplates(umbracoData);
|
|
729
|
+
orphanedTemplates.push(...orphaned);
|
|
730
|
+
}
|
|
731
|
+
if (removeHidden) {
|
|
732
|
+
const hidden = this.findHiddenPageTemplates(umbracoData);
|
|
733
|
+
orphanedTemplates.push(...hidden);
|
|
734
|
+
}
|
|
735
|
+
console.log(`\u{1F5D1}\uFE0F Found ${orphanedTemplates.length} orphaned templates`);
|
|
736
|
+
if (orphanedTemplates.length > 0) {
|
|
737
|
+
orphanedTemplates.forEach((template) => {
|
|
738
|
+
console.log(` - ${template.pageId} (${template.reason}): ${template.templateAlias} - ${template.url}`);
|
|
739
|
+
});
|
|
740
|
+
if (!dryRun) {
|
|
741
|
+
orphanedTemplates.forEach((template) => {
|
|
742
|
+
this.cache.removeTemplate(template.pageId);
|
|
743
|
+
templatesRemoved++;
|
|
744
|
+
});
|
|
745
|
+
console.log(`\u2705 Removed ${templatesRemoved} orphaned templates`);
|
|
746
|
+
} else {
|
|
747
|
+
console.log(`\u{1F50D} DRY RUN: Would remove ${orphanedTemplates.length} templates`);
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
return {
|
|
751
|
+
totalTemplatesBeforeCleanup: initialStats.totalTemplates,
|
|
752
|
+
orphanedTemplatesFound: orphanedTemplates.length,
|
|
753
|
+
templatesRemoved,
|
|
754
|
+
orphanedTemplates
|
|
755
|
+
};
|
|
756
|
+
}
|
|
757
|
+
/**
|
|
758
|
+
* Get cleanup recommendations without performing cleanup
|
|
759
|
+
*/
|
|
760
|
+
getCleanupRecommendations(umbracoData) {
|
|
761
|
+
const orphaned = this.findOrphanedTemplates(umbracoData);
|
|
762
|
+
const hidden = this.findHiddenPageTemplates(umbracoData);
|
|
763
|
+
const totalOrphaned = orphaned.length + hidden.length;
|
|
764
|
+
const recommendations = [];
|
|
765
|
+
if (orphaned.length > 0) {
|
|
766
|
+
recommendations.push(`${orphaned.length} templates for deleted pages should be removed`);
|
|
767
|
+
}
|
|
768
|
+
if (hidden.length > 0) {
|
|
769
|
+
recommendations.push(`${hidden.length} templates for hidden pages should be removed`);
|
|
770
|
+
}
|
|
771
|
+
if (totalOrphaned === 0) {
|
|
772
|
+
recommendations.push("No cleanup needed - all templates are current");
|
|
773
|
+
}
|
|
774
|
+
return {
|
|
775
|
+
shouldCleanup: totalOrphaned > 0,
|
|
776
|
+
orphanedCount: totalOrphaned,
|
|
777
|
+
recommendations
|
|
778
|
+
};
|
|
779
|
+
}
|
|
780
|
+
/**
|
|
781
|
+
* Extract page content using JSONPath (simplified version)
|
|
782
|
+
*/
|
|
783
|
+
extractPageContent(umbracoData, jpath) {
|
|
784
|
+
try {
|
|
785
|
+
const pathParts = jpath.split(".");
|
|
786
|
+
let current = umbracoData;
|
|
787
|
+
for (const part of pathParts) {
|
|
788
|
+
if (part === "$")
|
|
789
|
+
continue;
|
|
790
|
+
if (current && typeof current === "object" && part in current) {
|
|
791
|
+
current = current[part];
|
|
792
|
+
} else {
|
|
793
|
+
return null;
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
return current;
|
|
797
|
+
} catch (error) {
|
|
798
|
+
console.warn(`Error extracting content for ${jpath}:`, error);
|
|
799
|
+
return null;
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
async function performAutomaticCleanup(umbracoData, cacheDir, options = {}) {
|
|
804
|
+
const {
|
|
805
|
+
enableAutoCleanup = true,
|
|
806
|
+
cleanupOrphaned = true,
|
|
807
|
+
cleanupHidden = true,
|
|
808
|
+
dryRun = false
|
|
809
|
+
} = options;
|
|
810
|
+
if (!enableAutoCleanup) {
|
|
811
|
+
return null;
|
|
812
|
+
}
|
|
813
|
+
const cleanup = new TemplateCleanup(cacheDir);
|
|
814
|
+
console.log("\u{1F9F9} Performing automatic template cleanup...");
|
|
815
|
+
const stats = cleanup.performCleanup(umbracoData, {
|
|
816
|
+
removeOrphaned: cleanupOrphaned,
|
|
817
|
+
removeHidden: cleanupHidden,
|
|
818
|
+
dryRun
|
|
819
|
+
});
|
|
820
|
+
if (stats.templatesRemoved > 0) {
|
|
821
|
+
console.log(`\u2705 Cleaned up ${stats.templatesRemoved} orphaned templates`);
|
|
822
|
+
}
|
|
823
|
+
return stats;
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
class MustacheSyntaxValidator {
|
|
827
|
+
name = "mustache-syntax";
|
|
828
|
+
canFix = true;
|
|
829
|
+
validate(template) {
|
|
830
|
+
const result = {
|
|
831
|
+
isValid: true,
|
|
832
|
+
errors: [],
|
|
833
|
+
warnings: []
|
|
834
|
+
};
|
|
835
|
+
try {
|
|
836
|
+
Mustache.parse(template);
|
|
837
|
+
return result;
|
|
838
|
+
} catch (error) {
|
|
839
|
+
result.isValid = false;
|
|
840
|
+
result.errors.push(`Mustache syntax error: ${error.message}`);
|
|
841
|
+
if (this.canFix) {
|
|
842
|
+
try {
|
|
843
|
+
const fixedTemplate = this.fix(template);
|
|
844
|
+
result.fixedTemplate = fixedTemplate;
|
|
845
|
+
result.warnings.push("Template was automatically fixed");
|
|
846
|
+
} catch (fixError) {
|
|
847
|
+
result.errors.push(`Could not fix template: ${fixError.message}`);
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
return result;
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
fix(template) {
|
|
854
|
+
let fixedTemplate = template;
|
|
855
|
+
const openSectionRegex = /\{\{\#([a-zA-Z0-9_.]+)\}\}/g;
|
|
856
|
+
const closeSectionRegex = /\{\{\/([a-zA-Z0-9_.]+)\}\}/g;
|
|
857
|
+
const openSections = [];
|
|
858
|
+
const closeSections = [];
|
|
859
|
+
let match;
|
|
860
|
+
while ((match = openSectionRegex.exec(template)) !== null) {
|
|
861
|
+
openSections.push({
|
|
862
|
+
name: match[1],
|
|
863
|
+
pos: match.index
|
|
864
|
+
});
|
|
865
|
+
}
|
|
866
|
+
while ((match = closeSectionRegex.exec(template)) !== null) {
|
|
867
|
+
closeSections.push({
|
|
868
|
+
name: match[1],
|
|
869
|
+
pos: match.index
|
|
870
|
+
});
|
|
871
|
+
}
|
|
872
|
+
const unmatchedOpens = openSections.filter(
|
|
873
|
+
(open) => !closeSections.some((close) => close.name === open.name)
|
|
874
|
+
);
|
|
875
|
+
const unmatchedCloses = closeSections.filter(
|
|
876
|
+
(close) => !openSections.some((open) => open.name === close.name)
|
|
877
|
+
);
|
|
878
|
+
unmatchedOpens.forEach((unmatched) => {
|
|
879
|
+
const sectionRegex = new RegExp(`\\{\\{#${unmatched.name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\}\\}`, "g");
|
|
880
|
+
fixedTemplate = fixedTemplate.replace(sectionRegex, "");
|
|
881
|
+
});
|
|
882
|
+
unmatchedCloses.forEach((unmatched) => {
|
|
883
|
+
const sectionRegex = new RegExp(`\\{\\{/${unmatched.name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\}\\}`, "g");
|
|
884
|
+
fixedTemplate = fixedTemplate.replace(sectionRegex, "");
|
|
885
|
+
});
|
|
886
|
+
fixedTemplate = fixedTemplate.replace(/\n\s*\n\s*\n/g, "\n\n");
|
|
887
|
+
return fixedTemplate;
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
class TemplateStructureValidator {
|
|
891
|
+
name = "template-structure";
|
|
892
|
+
validate(template) {
|
|
893
|
+
const result = {
|
|
894
|
+
isValid: true,
|
|
895
|
+
errors: [],
|
|
896
|
+
warnings: []
|
|
897
|
+
};
|
|
898
|
+
if (!template.match(/^#\s+/m)) {
|
|
899
|
+
result.warnings.push("Template should start with a heading (# Title)");
|
|
900
|
+
}
|
|
901
|
+
const headingLevels = (template.match(/^#{4,}/gm) || []).length;
|
|
902
|
+
if (headingLevels > 0) {
|
|
903
|
+
result.warnings.push("Template has deeply nested headings (4+ levels), consider flattening structure");
|
|
904
|
+
}
|
|
905
|
+
const emptySections = template.match(/\{\{#\w+\}\}\s*\{\{\/\w+\}\}/g);
|
|
906
|
+
if (emptySections) {
|
|
907
|
+
result.warnings.push(`Found ${emptySections.length} empty sections that may not render content`);
|
|
908
|
+
}
|
|
909
|
+
const commonTypos = template.match(/\{\{\s*pageTittle\s*\}\}/g);
|
|
910
|
+
if (commonTypos) {
|
|
911
|
+
result.warnings.push('Found "pageTittle" - check if this should be "pageTitle"');
|
|
912
|
+
}
|
|
913
|
+
return result;
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
class ContentCompletenessValidator {
|
|
917
|
+
name = "content-completeness";
|
|
918
|
+
validate(template) {
|
|
919
|
+
const result = {
|
|
920
|
+
isValid: true,
|
|
921
|
+
errors: [],
|
|
922
|
+
warnings: []
|
|
923
|
+
};
|
|
924
|
+
const variables = this.extractVariables(template);
|
|
925
|
+
const hasTitle = variables.some((v) => v.includes("title") || v.includes("Title"));
|
|
926
|
+
if (!hasTitle) {
|
|
927
|
+
result.warnings.push("Template missing title variable (pageTitle, title, etc.)");
|
|
928
|
+
}
|
|
929
|
+
const hasDescription = variables.some((v) => v.includes("description") || v.includes("Description"));
|
|
930
|
+
if (!hasDescription) {
|
|
931
|
+
result.warnings.push("Template missing description variable");
|
|
932
|
+
}
|
|
933
|
+
if (template.length < 50) {
|
|
934
|
+
result.warnings.push("Template is very short, may not provide sufficient content");
|
|
935
|
+
}
|
|
936
|
+
const sectionsOnly = variables.filter((v) => !v.includes(".") && !v.includes("["));
|
|
937
|
+
if (sectionsOnly.length < 2) {
|
|
938
|
+
result.warnings.push("Template has limited content variables, consider adding more sections");
|
|
939
|
+
}
|
|
940
|
+
return result;
|
|
941
|
+
}
|
|
942
|
+
extractVariables(template) {
|
|
943
|
+
const variableRegex = /\{\{\s*([^#\/][^}]*?)\s*\}\}/g;
|
|
944
|
+
const variables = [];
|
|
945
|
+
let match;
|
|
946
|
+
while ((match = variableRegex.exec(template)) !== null) {
|
|
947
|
+
variables.push(match[1].trim());
|
|
948
|
+
}
|
|
949
|
+
return variables;
|
|
950
|
+
}
|
|
951
|
+
}
|
|
952
|
+
class LLMSTxtComplianceValidator {
|
|
953
|
+
name = "llms-txt-compliance";
|
|
954
|
+
validate(template) {
|
|
955
|
+
const result = {
|
|
956
|
+
isValid: true,
|
|
957
|
+
errors: [],
|
|
958
|
+
warnings: []
|
|
959
|
+
};
|
|
960
|
+
const headings = template.match(/^#+\s+.+$/gm) || [];
|
|
961
|
+
let lastLevel = 0;
|
|
962
|
+
let hasProperHierarchy = true;
|
|
963
|
+
headings.forEach((heading) => {
|
|
964
|
+
const level = (heading.match(/^#+/) || [""])[0].length;
|
|
965
|
+
if (level > lastLevel + 1) {
|
|
966
|
+
hasProperHierarchy = false;
|
|
967
|
+
}
|
|
968
|
+
lastLevel = level;
|
|
969
|
+
});
|
|
970
|
+
if (!hasProperHierarchy) {
|
|
971
|
+
result.warnings.push("Heading hierarchy should increment by one level (# -> ## -> ###)");
|
|
972
|
+
}
|
|
973
|
+
if (template.includes("pageDescription") && !template.includes(">")) {
|
|
974
|
+
result.warnings.push("Consider using blockquote (>) for page description as per LLMS.txt standard");
|
|
975
|
+
}
|
|
976
|
+
const hasLists = template.includes("- ") || template.includes("* ");
|
|
977
|
+
if (!hasLists && template.length > 200) {
|
|
978
|
+
result.warnings.push("Long content without lists - consider breaking into bullet points for better AI consumption");
|
|
979
|
+
}
|
|
980
|
+
const htmlTags = (template.match(/<[^>]+>/g) || []).length;
|
|
981
|
+
if (htmlTags > 3) {
|
|
982
|
+
result.warnings.push("Template contains HTML tags - prefer pure markdown for LLMS.txt compliance");
|
|
983
|
+
}
|
|
984
|
+
return result;
|
|
985
|
+
}
|
|
986
|
+
}
|
|
987
|
+
class TemplateValidationPipeline {
|
|
988
|
+
validators = [];
|
|
989
|
+
constructor() {
|
|
990
|
+
this.addValidator(new MustacheSyntaxValidator());
|
|
991
|
+
this.addValidator(new TemplateStructureValidator());
|
|
992
|
+
this.addValidator(new ContentCompletenessValidator());
|
|
993
|
+
this.addValidator(new LLMSTxtComplianceValidator());
|
|
994
|
+
}
|
|
995
|
+
addValidator(validator) {
|
|
996
|
+
this.validators.push(validator);
|
|
997
|
+
}
|
|
998
|
+
removeValidator(name) {
|
|
999
|
+
this.validators = this.validators.filter((v) => v.name !== name);
|
|
1000
|
+
}
|
|
1001
|
+
async validateTemplate(template, options = {}) {
|
|
1002
|
+
const { autoFix = true, throwOnError = false } = options;
|
|
1003
|
+
let currentTemplate = template;
|
|
1004
|
+
const allResults = {
|
|
1005
|
+
isValid: true,
|
|
1006
|
+
errors: [],
|
|
1007
|
+
warnings: []
|
|
1008
|
+
};
|
|
1009
|
+
for (const validator of this.validators) {
|
|
1010
|
+
const result = validator.validate(currentTemplate);
|
|
1011
|
+
allResults.errors.push(...result.errors);
|
|
1012
|
+
allResults.warnings.push(...result.warnings);
|
|
1013
|
+
if (!result.isValid) {
|
|
1014
|
+
allResults.isValid = false;
|
|
1015
|
+
if (autoFix && validator.canFix && validator.fix) {
|
|
1016
|
+
const fixedTemplate = validator.fix(currentTemplate);
|
|
1017
|
+
const fixResult = validator.validate(fixedTemplate);
|
|
1018
|
+
if (fixResult.isValid) {
|
|
1019
|
+
currentTemplate = fixedTemplate;
|
|
1020
|
+
allResults.fixedTemplate = currentTemplate;
|
|
1021
|
+
console.log(`Template fixed by ${validator.name} validator`);
|
|
1022
|
+
allResults.errors = allResults.errors.filter((e) => !result.errors.includes(e));
|
|
1023
|
+
if (allResults.errors.length === 0) {
|
|
1024
|
+
allResults.isValid = true;
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
if (allResults.errors.length > 0 && throwOnError) {
|
|
1031
|
+
throw new TemplateError(
|
|
1032
|
+
ErrorCode.TEMPLATE_VALIDATION_FAILED,
|
|
1033
|
+
`Template validation failed: ${allResults.errors.join(", ")}`,
|
|
1034
|
+
{ template: template.substring(0, 200) + "..." }
|
|
1035
|
+
);
|
|
1036
|
+
}
|
|
1037
|
+
return allResults;
|
|
1038
|
+
}
|
|
1039
|
+
async validateAndFix(template) {
|
|
1040
|
+
const result = await this.validateTemplate(template, {
|
|
1041
|
+
autoFix: true,
|
|
1042
|
+
throwOnError: false
|
|
1043
|
+
});
|
|
1044
|
+
if (result.fixedTemplate) {
|
|
1045
|
+
return result.fixedTemplate;
|
|
1046
|
+
}
|
|
1047
|
+
if (result.errors.length > 0) {
|
|
1048
|
+
console.warn("Could not fix template, using fallback");
|
|
1049
|
+
return `# {{pageTitle}}
|
|
1050
|
+
|
|
1051
|
+
> {{pageDescription}}
|
|
1052
|
+
|
|
1053
|
+
## Content
|
|
1054
|
+
|
|
1055
|
+
This page content could not be processed due to template formatting issues.`;
|
|
1056
|
+
}
|
|
1057
|
+
return template;
|
|
1058
|
+
}
|
|
1059
|
+
getValidatorNames() {
|
|
1060
|
+
return this.validators.map((v) => v.name);
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
const templateValidationPipeline = new TemplateValidationPipeline();
|
|
1064
|
+
|
|
1065
|
+
class TemplateGenerator {
|
|
1066
|
+
anthropicClient;
|
|
1067
|
+
promptAnalyzer;
|
|
1068
|
+
cache;
|
|
1069
|
+
config;
|
|
1070
|
+
constructor(config) {
|
|
1071
|
+
this.config = config;
|
|
1072
|
+
this.anthropicClient = new AnthropicClient(config);
|
|
1073
|
+
this.promptAnalyzer = new PromptAnalyzer();
|
|
1074
|
+
if (config.cacheDir) {
|
|
1075
|
+
this.cache = new LLMSCache(config.cacheDir);
|
|
1076
|
+
} else {
|
|
1077
|
+
this.cache = new LLMSCache(".llms-cache");
|
|
1078
|
+
}
|
|
1079
|
+
this.ensureOutputDirectories();
|
|
1080
|
+
}
|
|
1081
|
+
async generateTemplate(pageContent, urlItem) {
|
|
1082
|
+
const pageId = generatePageId(urlItem);
|
|
1083
|
+
const currentHash = generatePageStructureHash(pageContent, { excludeChildren: true });
|
|
1084
|
+
const cachedHash = this.cache.getTemplateHash(pageId);
|
|
1085
|
+
if (cachedHash === currentHash) {
|
|
1086
|
+
console.log(`Using cached template for ${pageId}`);
|
|
1087
|
+
const cached = this.cache.getTemplate(pageId);
|
|
1088
|
+
if (cached) {
|
|
1089
|
+
return cached;
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
return await this.generateTemplateWithAI(pageContent, urlItem);
|
|
1093
|
+
}
|
|
1094
|
+
async generateAllTemplates(umbracoData) {
|
|
1095
|
+
const templates = [];
|
|
1096
|
+
const maxConcurrent = this.config.maxConcurrent || 5;
|
|
1097
|
+
await performAutomaticCleanup(umbracoData, this.config.cacheDir || "./cache", {
|
|
1098
|
+
enableAutoCleanup: this.config.enableAutoCleanup ?? true,
|
|
1099
|
+
cleanupOrphaned: this.config.cleanupOrphaned ?? true,
|
|
1100
|
+
cleanupHidden: this.config.cleanupHidden ?? true,
|
|
1101
|
+
dryRun: false
|
|
1102
|
+
});
|
|
1103
|
+
const visibilityStats = getPageVisibilityStats(umbracoData);
|
|
1104
|
+
console.log("\u{1F4CA} Page visibility stats:", visibilityStats);
|
|
1105
|
+
const visiblePages = umbracoData.urlList.filter(
|
|
1106
|
+
(urlItem) => shouldGenerateTemplate(umbracoData, urlItem)
|
|
1107
|
+
);
|
|
1108
|
+
console.log(`Checking ${visiblePages.length}/${umbracoData.urlList.length} visible pages for cache status...`);
|
|
1109
|
+
const { cached, needGeneration } = this.identifyTemplatesNeeded(umbracoData, visiblePages);
|
|
1110
|
+
console.log(`\u{1F4C8} Template status: ${cached.length} cached, ${needGeneration.length} need generation`);
|
|
1111
|
+
templates.push(...cached);
|
|
1112
|
+
if (needGeneration.length === 0) {
|
|
1113
|
+
console.log("All templates are cached, no AI generation needed");
|
|
1114
|
+
return templates;
|
|
1115
|
+
}
|
|
1116
|
+
console.log(`Generating ${needGeneration.length} missing templates (max ${maxConcurrent} concurrent)`);
|
|
1117
|
+
for (let i = 0; i < needGeneration.length; i += maxConcurrent) {
|
|
1118
|
+
const batch = needGeneration.slice(i, i + maxConcurrent);
|
|
1119
|
+
const batchPromises = batch.map(async ({ pageContent, urlItem }) => {
|
|
1120
|
+
try {
|
|
1121
|
+
return await this.generateTemplateWithAI(pageContent, urlItem);
|
|
1122
|
+
} catch (error) {
|
|
1123
|
+
console.error(`Error processing ${urlItem.url}:`, error);
|
|
1124
|
+
return null;
|
|
1125
|
+
}
|
|
1126
|
+
});
|
|
1127
|
+
const batchResults = await Promise.all(batchPromises);
|
|
1128
|
+
const validResults = batchResults.filter((result) => result !== null);
|
|
1129
|
+
templates.push(...validResults);
|
|
1130
|
+
if (i + maxConcurrent < needGeneration.length) {
|
|
1131
|
+
console.log(`Completed batch ${Math.floor(i / maxConcurrent) + 1}, waiting 1s...`);
|
|
1132
|
+
await this.delay(1e3);
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
console.log(`Generated ${templates.length} total templates (${cached.length} from cache, ${templates.length - cached.length} newly generated)`);
|
|
1136
|
+
return templates;
|
|
1137
|
+
}
|
|
1138
|
+
identifyTemplatesNeeded(umbracoData, visiblePages) {
|
|
1139
|
+
const cached = [];
|
|
1140
|
+
const needGeneration = [];
|
|
1141
|
+
for (const urlItem of visiblePages) {
|
|
1142
|
+
const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
|
|
1143
|
+
if (!pageContent) {
|
|
1144
|
+
console.warn(`No content found for ${urlItem.url} (${urlItem.Jpath})`);
|
|
1145
|
+
continue;
|
|
1146
|
+
}
|
|
1147
|
+
const pageId = generatePageId(urlItem);
|
|
1148
|
+
const currentHash = generatePageStructureHash(pageContent, { excludeChildren: true });
|
|
1149
|
+
const cachedHash = this.cache.getTemplateHash(pageId);
|
|
1150
|
+
if (cachedHash === currentHash) {
|
|
1151
|
+
const cachedTemplate = this.cache.getTemplate(pageId);
|
|
1152
|
+
if (cachedTemplate) {
|
|
1153
|
+
cached.push(cachedTemplate);
|
|
1154
|
+
continue;
|
|
1155
|
+
}
|
|
1156
|
+
}
|
|
1157
|
+
needGeneration.push({
|
|
1158
|
+
pageContent,
|
|
1159
|
+
urlItem
|
|
1160
|
+
});
|
|
1161
|
+
}
|
|
1162
|
+
return {
|
|
1163
|
+
cached,
|
|
1164
|
+
needGeneration
|
|
1165
|
+
};
|
|
1166
|
+
}
|
|
1167
|
+
async generateTemplateWithAI(pageContent, urlItem) {
|
|
1168
|
+
const pageId = generatePageId(urlItem);
|
|
1169
|
+
console.log(`Generating new template for ${pageId} (${urlItem.url})`);
|
|
1170
|
+
const tokensBeforeTruncation = estimateContentTokens(pageContent);
|
|
1171
|
+
const truncatedContent = truncateContentIfNeeded(pageContent, 18e4);
|
|
1172
|
+
const tokensAfterTruncation = estimateContentTokens(truncatedContent);
|
|
1173
|
+
if (tokensBeforeTruncation > tokensAfterTruncation) {
|
|
1174
|
+
console.warn(`Page ${pageId} content truncated: ${tokensBeforeTruncation} -> ${tokensAfterTruncation} tokens`);
|
|
1175
|
+
}
|
|
1176
|
+
this.promptAnalyzer.analyzeContent(truncatedContent, urlItem);
|
|
1177
|
+
const request = {
|
|
1178
|
+
pageContent: truncatedContent,
|
|
1179
|
+
templateAlias: urlItem.TemplateAlias,
|
|
1180
|
+
url: urlItem.url,
|
|
1181
|
+
jpath: urlItem.Jpath
|
|
1182
|
+
};
|
|
1183
|
+
const response = await this.anthropicClient.generateTemplate(request);
|
|
1184
|
+
const currentHash = generatePageStructureHash(pageContent, { excludeChildren: true });
|
|
1185
|
+
const templatePath = join(
|
|
1186
|
+
this.config.templatesOutputDir,
|
|
1187
|
+
`${pageId}.mustache`
|
|
1188
|
+
);
|
|
1189
|
+
this.saveTemplate(templatePath, response.template);
|
|
1190
|
+
const generatedTemplate = {
|
|
1191
|
+
pageId,
|
|
1192
|
+
templatePath,
|
|
1193
|
+
template: response.template,
|
|
1194
|
+
hash: currentHash,
|
|
1195
|
+
metadata: {
|
|
1196
|
+
url: urlItem.url,
|
|
1197
|
+
templateAlias: urlItem.TemplateAlias,
|
|
1198
|
+
jpath: urlItem.Jpath,
|
|
1199
|
+
generatedAt: /* @__PURE__ */ new Date()
|
|
1200
|
+
}
|
|
1201
|
+
};
|
|
1202
|
+
this.cache.setTemplate(pageId, response.template, currentHash, {
|
|
1203
|
+
pageId: urlItem.url,
|
|
1204
|
+
// Store URL as pageId for cache lookup
|
|
1205
|
+
jpath: urlItem.Jpath,
|
|
1206
|
+
templateAlias: urlItem.TemplateAlias,
|
|
1207
|
+
structureHash: currentHash
|
|
1208
|
+
});
|
|
1209
|
+
return generatedTemplate;
|
|
1210
|
+
}
|
|
1211
|
+
async renderTemplate(template, data) {
|
|
1212
|
+
return withErrorHandling(async () => {
|
|
1213
|
+
const validatedTemplate = await templateValidationPipeline.validateAndFix(template);
|
|
1214
|
+
return Mustache.render(validatedTemplate, data);
|
|
1215
|
+
}, {
|
|
1216
|
+
template: template.substring(0, 200) + "...",
|
|
1217
|
+
dataKeys: Object.keys(data)
|
|
1218
|
+
});
|
|
1219
|
+
}
|
|
1220
|
+
loadTemplate(templatePath) {
|
|
1221
|
+
try {
|
|
1222
|
+
return readFileSync(templatePath, "utf-8");
|
|
1223
|
+
} catch (error) {
|
|
1224
|
+
throw new Error(`Failed to load template ${templatePath}: ${error}`);
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
getTemplateStats() {
|
|
1228
|
+
const stats = this.cache.getCacheStats();
|
|
1229
|
+
return {
|
|
1230
|
+
totalCached: stats.totalTemplates,
|
|
1231
|
+
cacheHitRate: stats.totalTemplates > 0 ? 0.8 : 0,
|
|
1232
|
+
// Estimate, could be tracked more precisely
|
|
1233
|
+
templatesByAlias: stats.templatesByAlias
|
|
1234
|
+
};
|
|
1235
|
+
}
|
|
1236
|
+
saveTemplate(templatePath, content) {
|
|
1237
|
+
const dir = dirname(templatePath);
|
|
1238
|
+
if (!existsSync(dir)) {
|
|
1239
|
+
mkdirSync(dir, { recursive: true });
|
|
1240
|
+
}
|
|
1241
|
+
writeFileSync(templatePath, content, "utf-8");
|
|
1242
|
+
}
|
|
1243
|
+
ensureOutputDirectories() {
|
|
1244
|
+
if (!existsSync(this.config.templatesOutputDir)) {
|
|
1245
|
+
mkdirSync(this.config.templatesOutputDir, { recursive: true });
|
|
1246
|
+
}
|
|
1247
|
+
if (this.config.finalOutputDir && !existsSync(this.config.finalOutputDir)) {
|
|
1248
|
+
mkdirSync(this.config.finalOutputDir, { recursive: true });
|
|
1249
|
+
}
|
|
1250
|
+
}
|
|
1251
|
+
delay(ms) {
|
|
1252
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1253
|
+
}
|
|
1254
|
+
async testConnection() {
|
|
1255
|
+
return await this.anthropicClient.testConnection();
|
|
1256
|
+
}
|
|
1257
|
+
clearCache() {
|
|
1258
|
+
this.cache.clearCache();
|
|
1259
|
+
}
|
|
1260
|
+
cleanupOldTemplates(maxAgeMs = 30 * 24 * 60 * 60 * 1e3) {
|
|
1261
|
+
return this.cache.cleanupOldTemplates(maxAgeMs);
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
class LLMSFilesGenerator {
|
|
1266
|
+
config;
|
|
1267
|
+
templateGenerator;
|
|
1268
|
+
constructor(config) {
|
|
1269
|
+
this.config = config;
|
|
1270
|
+
this.templateGenerator = new TemplateGenerator(config);
|
|
1271
|
+
}
|
|
1272
|
+
async generateAllFiles(umbracoData) {
|
|
1273
|
+
const startTime = Date.now();
|
|
1274
|
+
console.log("\u{1F680} Starting LLMS files generation...");
|
|
1275
|
+
const templates = await this.templateGenerator.generateAllTemplates(umbracoData);
|
|
1276
|
+
console.log("\u{1F4C4} Generating individual markdown files...");
|
|
1277
|
+
const individualMdFiles = this.config.enableIndividualMd ? await this.generateIndividualMarkdownFiles(umbracoData, templates) : void 0;
|
|
1278
|
+
console.log("\u{1F4DD} Generating llms.txt navigation file...");
|
|
1279
|
+
const llmsTxt = this.generateLLMSTxt(umbracoData, individualMdFiles || []);
|
|
1280
|
+
console.log("\u{1F4DA} Generating llms-full.txt...");
|
|
1281
|
+
const llmsFullTxt = this.config.enableLLMSFullTxt ? this.generateLLMSFullTxt(umbracoData, individualMdFiles || []) : void 0;
|
|
1282
|
+
const files = {
|
|
1283
|
+
llmsTxt,
|
|
1284
|
+
llmsFullTxt,
|
|
1285
|
+
individualMdFiles
|
|
1286
|
+
};
|
|
1287
|
+
this.saveFilesToOutput(files);
|
|
1288
|
+
const duration = Date.now() - startTime;
|
|
1289
|
+
console.log(`\u2705 LLMS files generation completed in ${duration}ms`);
|
|
1290
|
+
return files;
|
|
1291
|
+
}
|
|
1292
|
+
async generateIndividualMarkdownFiles(umbracoData, templates) {
|
|
1293
|
+
const mdFiles = [];
|
|
1294
|
+
for (const template of templates) {
|
|
1295
|
+
try {
|
|
1296
|
+
const urlItem = umbracoData.urlList.find(
|
|
1297
|
+
(item) => generatePageId(item) === template.pageId
|
|
1298
|
+
);
|
|
1299
|
+
if (!urlItem) {
|
|
1300
|
+
console.warn(`URL item not found for template ${template.pageId}`);
|
|
1301
|
+
continue;
|
|
1302
|
+
}
|
|
1303
|
+
const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
|
|
1304
|
+
if (!pageContent) {
|
|
1305
|
+
console.warn(`Page content not found for ${urlItem.url}`);
|
|
1306
|
+
continue;
|
|
1307
|
+
}
|
|
1308
|
+
const renderedMarkdown = await this.templateGenerator.renderTemplate(
|
|
1309
|
+
template.template,
|
|
1310
|
+
pageContent
|
|
1311
|
+
);
|
|
1312
|
+
const sanitizedUrl = this.sanitizeUrlForFilename(urlItem.url);
|
|
1313
|
+
const filename = `${sanitizedUrl}.md`;
|
|
1314
|
+
const outputPath = join(this.getOutputDir(), "llms", filename);
|
|
1315
|
+
mdFiles.push({
|
|
1316
|
+
path: outputPath,
|
|
1317
|
+
content: renderedMarkdown,
|
|
1318
|
+
url: urlItem.url,
|
|
1319
|
+
pageId: template.pageId
|
|
1320
|
+
});
|
|
1321
|
+
} catch (error) {
|
|
1322
|
+
console.error(`Error generating markdown for ${template.pageId}:`, error);
|
|
1323
|
+
}
|
|
1324
|
+
}
|
|
1325
|
+
return mdFiles;
|
|
1326
|
+
}
|
|
1327
|
+
generateLLMSTxt(umbracoData, mdFiles) {
|
|
1328
|
+
const siteTitle = this.extractSiteTitle(umbracoData);
|
|
1329
|
+
const siteDescription = this.extractSiteDescription(umbracoData);
|
|
1330
|
+
let content = `# ${siteTitle}
|
|
1331
|
+
|
|
1332
|
+
`;
|
|
1333
|
+
if (siteDescription) {
|
|
1334
|
+
content += `> ${siteDescription}
|
|
1335
|
+
|
|
1336
|
+
`;
|
|
1337
|
+
}
|
|
1338
|
+
content += `This website contains comprehensive information about ${siteTitle.toLowerCase()}. The content is organized into the following sections:
|
|
1339
|
+
|
|
1340
|
+
`;
|
|
1341
|
+
const pagesByCategory = this.groupPagesByCategory(umbracoData, mdFiles);
|
|
1342
|
+
for (const [category, pages] of Object.entries(pagesByCategory)) {
|
|
1343
|
+
if (pages.length === 0)
|
|
1344
|
+
continue;
|
|
1345
|
+
content += `## ${this.formatCategoryName(category)}
|
|
1346
|
+
|
|
1347
|
+
`;
|
|
1348
|
+
for (const page of pages) {
|
|
1349
|
+
const urlItem = umbracoData.urlList.find((item) => item.url === page.url);
|
|
1350
|
+
const pageTitle = this.extractPageTitle(umbracoData, urlItem);
|
|
1351
|
+
const relativeFilePath = this.getLLMSFilePath(page.path);
|
|
1352
|
+
content += `- [${pageTitle}](${relativeFilePath}): ${this.generatePageDescription(umbracoData, urlItem)}
|
|
1353
|
+
`;
|
|
1354
|
+
}
|
|
1355
|
+
content += "\n";
|
|
1356
|
+
}
|
|
1357
|
+
const visiblePages = getVisiblePages(umbracoData);
|
|
1358
|
+
const hiddenCount = umbracoData.urlList.length - visiblePages.length;
|
|
1359
|
+
if (hiddenCount > 0) {
|
|
1360
|
+
content += `*Note: ${hiddenCount} pages are excluded from this documentation as they are marked as hidden.*
|
|
1361
|
+
|
|
1362
|
+
`;
|
|
1363
|
+
}
|
|
1364
|
+
content += "## Optional\n\n";
|
|
1365
|
+
content += "- [Complete Documentation](llms-full.txt): All content combined in a single file\n";
|
|
1366
|
+
content += "- [Site Map](sitemap.xml): XML sitemap of all pages\n";
|
|
1367
|
+
const outputPath = join(this.getOutputDir(), "llms.txt");
|
|
1368
|
+
return {
|
|
1369
|
+
path: outputPath,
|
|
1370
|
+
content: content.trim()
|
|
1371
|
+
};
|
|
1372
|
+
}
|
|
1373
|
+
generateLLMSFullTxt(umbracoData, mdFiles) {
|
|
1374
|
+
const siteTitle = this.extractSiteTitle(umbracoData);
|
|
1375
|
+
const siteDescription = this.extractSiteDescription(umbracoData);
|
|
1376
|
+
let content = `# ${siteTitle} - Complete Documentation
|
|
1377
|
+
|
|
1378
|
+
`;
|
|
1379
|
+
if (siteDescription) {
|
|
1380
|
+
content += `> ${siteDescription}
|
|
1381
|
+
|
|
1382
|
+
`;
|
|
1383
|
+
}
|
|
1384
|
+
content += "This document contains all website content in a single file for comprehensive AI analysis.\n\n";
|
|
1385
|
+
content += "---\n\n";
|
|
1386
|
+
for (const mdFile of mdFiles) {
|
|
1387
|
+
const urlItem = umbracoData.urlList.find((item) => item.url === mdFile.url);
|
|
1388
|
+
if (!urlItem)
|
|
1389
|
+
continue;
|
|
1390
|
+
content += `## Page: ${mdFile.url}
|
|
1391
|
+
|
|
1392
|
+
`;
|
|
1393
|
+
content += `**Template**: ${urlItem.TemplateAlias}
|
|
1394
|
+
`;
|
|
1395
|
+
content += `**Node ID**: ${urlItem.nodeID}
|
|
1396
|
+
|
|
1397
|
+
`;
|
|
1398
|
+
content += mdFile.content;
|
|
1399
|
+
content += "\n\n---\n\n";
|
|
1400
|
+
}
|
|
1401
|
+
const outputPath = join(this.getOutputDir(), "llms-full.txt");
|
|
1402
|
+
return {
|
|
1403
|
+
path: outputPath,
|
|
1404
|
+
content: content.trim()
|
|
1405
|
+
};
|
|
1406
|
+
}
|
|
1407
|
+
saveFilesToOutput(files) {
|
|
1408
|
+
const outputDir = this.getOutputDir();
|
|
1409
|
+
mkdirSync(outputDir, { recursive: true });
|
|
1410
|
+
mkdirSync(join(outputDir, "llms"), { recursive: true });
|
|
1411
|
+
writeFileSync(files.llmsTxt.path, files.llmsTxt.content, "utf-8");
|
|
1412
|
+
console.log(`\u{1F4C4} Saved: ${files.llmsTxt.path}`);
|
|
1413
|
+
if (files.llmsFullTxt) {
|
|
1414
|
+
writeFileSync(files.llmsFullTxt.path, files.llmsFullTxt.content, "utf-8");
|
|
1415
|
+
console.log(`\u{1F4DA} Saved: ${files.llmsFullTxt.path}`);
|
|
1416
|
+
}
|
|
1417
|
+
if (files.individualMdFiles) {
|
|
1418
|
+
for (const mdFile of files.individualMdFiles) {
|
|
1419
|
+
writeFileSync(mdFile.path, mdFile.content, "utf-8");
|
|
1420
|
+
}
|
|
1421
|
+
console.log(`\u{1F4DD} Saved: ${files.individualMdFiles.length} markdown files to llms/ subdirectory`);
|
|
1422
|
+
}
|
|
1423
|
+
}
|
|
1424
|
+
groupPagesByCategory(umbracoData, mdFiles) {
|
|
1425
|
+
const categories = {
|
|
1426
|
+
main: [],
|
|
1427
|
+
blog: [],
|
|
1428
|
+
services: [],
|
|
1429
|
+
products: [],
|
|
1430
|
+
info: [],
|
|
1431
|
+
other: []
|
|
1432
|
+
};
|
|
1433
|
+
for (const mdFile of mdFiles) {
|
|
1434
|
+
const urlItem = umbracoData.urlList.find((item) => item.url === mdFile.url);
|
|
1435
|
+
if (!urlItem)
|
|
1436
|
+
continue;
|
|
1437
|
+
const category = this.categorizeUrlItem(urlItem);
|
|
1438
|
+
if (!categories[category]) {
|
|
1439
|
+
categories[category] = [];
|
|
1440
|
+
}
|
|
1441
|
+
categories[category].push(mdFile);
|
|
1442
|
+
}
|
|
1443
|
+
return categories;
|
|
1444
|
+
}
|
|
1445
|
+
categorizeUrlItem(urlItem) {
|
|
1446
|
+
const { url, TemplateAlias } = urlItem;
|
|
1447
|
+
const alias = (TemplateAlias || "unknown").toLowerCase();
|
|
1448
|
+
url.toLowerCase();
|
|
1449
|
+
if (url === "/" || alias.includes("home"))
|
|
1450
|
+
return "main";
|
|
1451
|
+
if (alias.includes("blog") || alias.includes("article") || alias.includes("news"))
|
|
1452
|
+
return "blog";
|
|
1453
|
+
if (alias.includes("service") || alias.includes("product") || alias.includes("camp"))
|
|
1454
|
+
return "services";
|
|
1455
|
+
if (alias.includes("about") || alias.includes("contact") || alias.includes("info"))
|
|
1456
|
+
return "info";
|
|
1457
|
+
return "other";
|
|
1458
|
+
}
|
|
1459
|
+
extractSiteTitle(umbracoData) {
|
|
1460
|
+
const siteData = umbracoData.SiteData;
|
|
1461
|
+
return siteData?.pageTitle || siteData?.mainHeaderBlockTitle || "Website Documentation";
|
|
1462
|
+
}
|
|
1463
|
+
extractSiteDescription(umbracoData) {
|
|
1464
|
+
const siteData = umbracoData.SiteData;
|
|
1465
|
+
return siteData?.pageDescription || siteData?.ogDescription || null;
|
|
1466
|
+
}
|
|
1467
|
+
extractPageTitle(umbracoData, urlItem) {
|
|
1468
|
+
if (!urlItem)
|
|
1469
|
+
return "Untitled Page";
|
|
1470
|
+
const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
|
|
1471
|
+
if (!pageContent)
|
|
1472
|
+
return urlItem.TemplateAlias;
|
|
1473
|
+
return pageContent.pageTitle || pageContent.title || pageContent.headerBlockTitle || urlItem.TemplateAlias;
|
|
1474
|
+
}
|
|
1475
|
+
generatePageDescription(umbracoData, urlItem) {
|
|
1476
|
+
if (!urlItem)
|
|
1477
|
+
return "Page information";
|
|
1478
|
+
const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
|
|
1479
|
+
if (!pageContent)
|
|
1480
|
+
return `${urlItem.TemplateAlias} page`;
|
|
1481
|
+
const desc = pageContent.pageDescription || pageContent.description || pageContent.headerBlockSubtitle;
|
|
1482
|
+
if (desc && typeof desc === "string") {
|
|
1483
|
+
return desc.length > 100 ? `${desc.substring(0, 97)}...` : desc;
|
|
1484
|
+
}
|
|
1485
|
+
return `Information about ${urlItem.url}`;
|
|
1486
|
+
}
|
|
1487
|
+
formatCategoryName(category) {
|
|
1488
|
+
const names = {
|
|
1489
|
+
main: "Main Pages",
|
|
1490
|
+
blog: "Blog & Articles",
|
|
1491
|
+
services: "Services & Products",
|
|
1492
|
+
info: "Information Pages",
|
|
1493
|
+
other: "Other Pages"
|
|
1494
|
+
};
|
|
1495
|
+
return names[category] || category.charAt(0).toUpperCase() + category.slice(1);
|
|
1496
|
+
}
|
|
1497
|
+
sanitizeUrlForFilename(url) {
|
|
1498
|
+
return url.replace(/^\//, "").replace(/\/$/, "").replace(/\//g, "-").replace(/[^a-zA-Z0-9\-_]/g, "").replace(/^$/, "index");
|
|
1499
|
+
}
|
|
1500
|
+
getRelativeFilePath(fullPath) {
|
|
1501
|
+
const filename = fullPath.split("/").pop() || "";
|
|
1502
|
+
return filename;
|
|
1503
|
+
}
|
|
1504
|
+
getLLMSFilePath(fullPath) {
|
|
1505
|
+
const filename = fullPath.split("/").pop() || "";
|
|
1506
|
+
return `llms/${filename}`;
|
|
1507
|
+
}
|
|
1508
|
+
getOutputDir() {
|
|
1509
|
+
return this.config.finalOutputDir || "dist";
|
|
1510
|
+
}
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
export { LLMSFilesGenerator };
|