@voicenter-team/nuxt-llms-generator 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1513 @@
1
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
2
+ import { join, dirname } from 'path';
3
+ import Mustache from 'mustache';
4
+ import Anthropic from '@anthropic-ai/sdk';
5
+ import { createHash } from 'crypto';
6
+ import { JSONPath } from 'jsonpath-plus';
7
+ import { T as TemplateError, E as ErrorCode, w as withErrorHandling } from '../shared/nuxt-llms-generator.dc009f50.mjs';
8
+ import '@nuxt/kit';
9
+ import 'zod';
10
+
11
+ class AnthropicClient {
12
+ client;
13
+ model;
14
+ maxRetries = 3;
15
+ retryDelayMs = 1e3;
16
+ constructor(config) {
17
+ this.client = new Anthropic({
18
+ apiKey: config.anthropicApiKey
19
+ });
20
+ this.model = config.anthropicModel || "claude-3-5-sonnet-20241022";
21
+ }
22
+ async generateTemplate(request) {
23
+ const prompt = this.buildPrompt(request);
24
+ for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
25
+ try {
26
+ const response = await this.client.messages.create({
27
+ model: this.model,
28
+ max_tokens: 4e3,
29
+ temperature: 0.1,
30
+ messages: [{
31
+ role: "user",
32
+ content: prompt
33
+ }]
34
+ });
35
+ const content = response.content[0];
36
+ if (content.type !== "text") {
37
+ throw new Error("Unexpected response type from Anthropic API");
38
+ }
39
+ return this.parseResponse(content.text);
40
+ } catch (error) {
41
+ if (attempt === this.maxRetries) {
42
+ throw new Error(`Anthropic API failed after ${this.maxRetries} attempts: ${error}`);
43
+ }
44
+ console.warn(`Anthropic API attempt ${attempt} failed, retrying...`, error);
45
+ await this.delay(this.retryDelayMs * attempt);
46
+ }
47
+ }
48
+ throw new Error("Failed to generate template");
49
+ }
50
+ buildPrompt(request) {
51
+ return `You are an expert creating Mustache.js templates for **Voicenter - Leading Cloud Communications Platform**, converting Umbraco CMS content into LLMS.txt-optimized markdown.
52
+
53
+ **BUSINESS CONTEXT**: Voicenter provides enterprise cloud telephony solutions including Contact Centers, Business Phone Services, Mobile Communications, API integrations, and AI-powered voice tools for 50,000+ users globally.
54
+
55
+ **CURRENT PAGE ANALYSIS:**
56
+ - URL: ${request.url}
57
+ - Template: ${request.templateAlias}
58
+ - JSONPath: ${request.jpath}
59
+
60
+ **AVAILABLE DATA PROPERTIES:**
61
+ \`\`\`json
62
+ ${JSON.stringify(request.pageContent, null, 2)}
63
+ \`\`\`
64
+
65
+ **CRITICAL REQUIREMENTS (2024 LLMS.txt Standard):**
66
+
67
+ 1. **USE EXACT PROPERTY NAMES**:
68
+ \u274C Wrong: {{pageTitle}}
69
+ \u2705 Correct: {{pageTittle}} or {{pageDescription}} (match actual JSON keys)
70
+
71
+ 2. **BUSINESS-FOCUSED CONTENT HIERARCHY**:
72
+ - H1: Clear service/feature name
73
+ - Blockquote: Value proposition
74
+ - H2: Key capabilities/benefits
75
+ - H3: Technical details/specs
76
+ - Lists: Features, integrations, use cases
77
+
78
+ 3. **VOICENTER-SPECIFIC CONTENT MAPPING**:
79
+ - Service descriptions \u2192 Clear business benefits
80
+ - Technical features \u2192 User-friendly explanations
81
+ - Integration lists \u2192 Specific partner names
82
+ - API documentation \u2192 Implementation clarity
83
+ - Contact info \u2192 Business value context
84
+
85
+ 4. **SMART CONTENT SELECTION**:
86
+ - Prioritize business value over technical jargon
87
+ - Include specific numbers/metrics when available
88
+ - Map complex nested arrays to structured lists
89
+ - Extract key differentiators and benefits
90
+
91
+ **TEMPLATE PATTERNS FOR VOICENTER CONTENT:**
92
+
93
+ **Service Pages Pattern:**
94
+ \`\`\`mustache
95
+ # {{serviceName}}
96
+
97
+ {{#serviceSubtitle}}
98
+ > {{serviceSubtitle}}
99
+ {{/serviceSubtitle}}
100
+
101
+ {{#serviceDescription}}
102
+ ## Overview
103
+ {{serviceDescription}}
104
+ {{/serviceDescription}}
105
+
106
+ {{#serviceTools.0}}
107
+ ## Key Features
108
+ {{#serviceTools}}
109
+ - {{textItem}}
110
+ {{/serviceTools}}
111
+ {{/serviceTools.0}}
112
+
113
+ {{#serviceLink}}
114
+ ## Learn More
115
+ [Explore {{serviceName}} \u2192]({{serviceLink}})
116
+ {{/serviceLink}}
117
+ \`\`\`
118
+
119
+ **Feature/API Pages Pattern:**
120
+ \`\`\`mustache
121
+ # {{cardTitle}}
122
+
123
+ {{#cardText}}
124
+ > {{cardText}}
125
+ {{/cardText}}
126
+
127
+ {{#featureDescription}}
128
+ ## How It Works
129
+ {{featureDescription}}
130
+ {{/featureDescription}}
131
+
132
+ {{#capabilities.0}}
133
+ ## Capabilities
134
+ {{#capabilities}}
135
+ ### {{name}}
136
+ {{description}}
137
+
138
+ {{/capabilities}}
139
+ {{/capabilities.0}}
140
+
141
+ ## Business Benefits
142
+ - Reduces operational costs
143
+ - Improves customer experience
144
+ - Seamless integration with existing systems
145
+ \`\`\`
146
+
147
+ **CONTENT EXTRACTION RULES:**
148
+ 1. **Identify primary content** from JSON structure
149
+ 2. **Map nested arrays** to organized sections
150
+ 3. **Extract business value** from technical descriptions
151
+ 4. **Include contact/action items** for lead generation
152
+ 5. **Maintain SEO-friendly** heading structure
153
+
154
+ **OUTPUT REQUIREMENTS:**
155
+ - Return ONLY the Mustache template
156
+ - NO explanations or code blocks
157
+ - Use actual property names from the provided JSON
158
+ - Focus on business value for AI consumption
159
+ - Follow LLMS.txt hierarchical structure
160
+ - **CLEAN MARKDOWN ONLY**: No HTML tags, entities, or attributes
161
+ - **NO HTML**: Use pure Markdown syntax (##, **, -, etc.)
162
+ - **NO ENTITIES**: Use actual characters, not &amp; or &#x2F;
163
+ - **NO ATTRIBUTES**: No dir="RTL", style="", class="" etc.
164
+
165
+ Generate the optimized Mustache template:`;
166
+ }
167
+ parseResponse(responseText) {
168
+ const codeBlockRegex = /```(?:mustache)?\n?([\s\S]*?)```/;
169
+ const match = responseText.match(codeBlockRegex);
170
+ let template;
171
+ if (match) {
172
+ template = match[1].trim();
173
+ } else {
174
+ template = responseText.trim();
175
+ }
176
+ const metadata = this.extractMetadata(responseText);
177
+ return {
178
+ template,
179
+ metadata
180
+ };
181
+ }
182
+ extractMetadata(responseText) {
183
+ const titleMatch = responseText.match(/^#\s+(.+)$/m);
184
+ const descriptionMatch = responseText.match(/>\s*(.+)$/m);
185
+ return {
186
+ title: titleMatch ? titleMatch[1] : void 0,
187
+ description: descriptionMatch ? descriptionMatch[1] : void 0,
188
+ tags: []
189
+ };
190
+ }
191
+ delay(ms) {
192
+ return new Promise((resolve) => setTimeout(resolve, ms));
193
+ }
194
+ async testConnection() {
195
+ try {
196
+ const response = await this.client.messages.create({
197
+ model: this.model,
198
+ max_tokens: 10,
199
+ messages: [{
200
+ role: "user",
201
+ content: "Hello"
202
+ }]
203
+ });
204
+ return response.content.length > 0;
205
+ } catch (error) {
206
+ console.error("Anthropic API connection test failed:", error);
207
+ return false;
208
+ }
209
+ }
210
+ }
211
+
212
+ class PromptAnalyzer {
213
+ analyzeContent(pageContent, urlItem) {
214
+ const analysis = {
215
+ contentType: this.determineContentType(pageContent, urlItem),
216
+ hasImages: this.detectImages(pageContent),
217
+ hasHtml: this.detectHtml(pageContent),
218
+ hasLists: this.detectLists(pageContent),
219
+ hasNestedContent: this.detectNestedContent(pageContent),
220
+ keyProperties: this.identifyKeyProperties(pageContent),
221
+ suggestedHeaders: this.suggestHeaders(pageContent, urlItem),
222
+ complexityScore: 0
223
+ };
224
+ analysis.complexityScore = this.calculateComplexity(analysis, pageContent);
225
+ return analysis;
226
+ }
227
+ determineContentType(pageContent, urlItem) {
228
+ const { url, TemplateAlias } = urlItem;
229
+ const alias = (TemplateAlias || "unknown").toLowerCase();
230
+ if (url === "/" || alias.includes("home") || alias.includes("index")) {
231
+ return "homepage";
232
+ }
233
+ if (alias.includes("blog") || alias.includes("article") || alias.includes("news")) {
234
+ return "article";
235
+ }
236
+ if (alias.includes("list") || alias.includes("collection") || alias.includes("category") || alias.includes("archive")) {
237
+ return "listing";
238
+ }
239
+ if (alias.includes("detail") || alias.includes("product") || alias.includes("service") || alias.includes("camp")) {
240
+ return "detail";
241
+ }
242
+ if (alias.includes("form") || alias.includes("contact") || alias.includes("register") || this.hasFormFields(pageContent)) {
243
+ return "form";
244
+ }
245
+ if (alias.includes("about") || alias.includes("privacy") || alias.includes("terms") || alias.includes("static")) {
246
+ return "static";
247
+ }
248
+ return "unknown";
249
+ }
250
+ detectImages(content) {
251
+ const imageKeys = ["image", "img", "photo", "picture", "banner", "logo", "icon"];
252
+ return this.hasKeysContaining(content, imageKeys);
253
+ }
254
+ detectHtml(content) {
255
+ const htmlRegex = /<[^>]+>/;
256
+ return this.hasValuesMatching(content, htmlRegex);
257
+ }
258
+ detectLists(content) {
259
+ const listKeys = ["list", "items", "array", "collection", "features", "services"];
260
+ return this.hasKeysContaining(content, listKeys) || this.hasArrayValues(content);
261
+ }
262
+ detectNestedContent(content) {
263
+ return Object.values(content).some(
264
+ (value) => typeof value === "object" && value !== null && !Array.isArray(value) && Object.keys(value).length >= 2
265
+ );
266
+ }
267
+ identifyKeyProperties(content) {
268
+ const priorityKeys = [
269
+ "pageTitle",
270
+ "title",
271
+ "name",
272
+ "heading",
273
+ "pageDescription",
274
+ "description",
275
+ "summary",
276
+ "intro",
277
+ "content",
278
+ "body",
279
+ "text",
280
+ "mainContent",
281
+ "url",
282
+ "link",
283
+ "action"
284
+ ];
285
+ const foundKeys = [];
286
+ const allKeys = this.getAllKeys(content);
287
+ for (const key of allKeys) {
288
+ const lowerKey = key.toLowerCase();
289
+ if (priorityKeys.some((priority) => lowerKey.includes(priority))) {
290
+ foundKeys.push(key);
291
+ }
292
+ }
293
+ return foundKeys.length > 0 ? foundKeys : allKeys.slice(0, 8);
294
+ }
295
+ suggestHeaders(content, urlItem) {
296
+ const headers = [];
297
+ headers.push("# {{pageTitle}}");
298
+ if (this.hasKeysContaining(content, ["description", "summary", "intro"])) {
299
+ headers.push("## Overview");
300
+ }
301
+ if (this.hasKeysContaining(content, ["feature", "service", "benefit"])) {
302
+ headers.push("## Features");
303
+ }
304
+ if (this.hasKeysContaining(content, ["detail", "information", "about"])) {
305
+ headers.push("## Details");
306
+ }
307
+ if (this.hasKeysContaining(content, ["contact", "form", "action"])) {
308
+ headers.push("## Contact Information");
309
+ }
310
+ return headers;
311
+ }
312
+ calculateComplexity(analysis, content) {
313
+ let score = 0;
314
+ score += Object.keys(content).length;
315
+ score += analysis.hasImages ? 10 : 0;
316
+ score += analysis.hasHtml ? 15 : 0;
317
+ score += analysis.hasLists ? 10 : 0;
318
+ score += analysis.hasNestedContent ? 20 : 0;
319
+ if (analysis.contentType === "homepage")
320
+ score += 25;
321
+ if (analysis.contentType === "listing")
322
+ score += 15;
323
+ return Math.min(score, 100);
324
+ }
325
+ hasFormFields(content) {
326
+ const formKeys = ["input", "button", "form", "field", "submit", "action"];
327
+ return this.hasKeysContaining(content, formKeys);
328
+ }
329
+ hasKeysContaining(obj, searchKeys) {
330
+ const allKeys = this.getAllKeys(obj).map((key) => key.toLowerCase());
331
+ return searchKeys.some(
332
+ (searchKey) => allKeys.some((key) => key.includes(searchKey))
333
+ );
334
+ }
335
+ hasValuesMatching(obj, regex) {
336
+ const allValues = this.getAllValues(obj);
337
+ return allValues.some(
338
+ (value) => typeof value === "string" && regex.test(value)
339
+ );
340
+ }
341
+ hasArrayValues(obj) {
342
+ return Object.values(obj).some((value) => Array.isArray(value));
343
+ }
344
+ getAllKeys(obj, prefix = "") {
345
+ let keys = [];
346
+ for (const [key, value] of Object.entries(obj)) {
347
+ const fullKey = prefix ? `${prefix}.${key}` : key;
348
+ keys.push(fullKey);
349
+ if (typeof value === "object" && value !== null && !Array.isArray(value)) {
350
+ keys = keys.concat(this.getAllKeys(value, fullKey));
351
+ }
352
+ }
353
+ return keys;
354
+ }
355
+ getAllValues(obj) {
356
+ let values = [];
357
+ for (const value of Object.values(obj)) {
358
+ values.push(value);
359
+ if (typeof value === "object" && value !== null && !Array.isArray(value)) {
360
+ values = values.concat(this.getAllValues(value));
361
+ }
362
+ }
363
+ return values;
364
+ }
365
+ }
366
+
367
+ class LLMSCache {
368
+ cacheFilePath;
369
+ cache = {};
370
+ constructor(cacheDir) {
371
+ if (!existsSync(cacheDir)) {
372
+ mkdirSync(cacheDir, { recursive: true });
373
+ }
374
+ this.cacheFilePath = join(cacheDir, "templates.json");
375
+ this.loadCache();
376
+ }
377
+ loadCache() {
378
+ try {
379
+ if (existsSync(this.cacheFilePath)) {
380
+ const cacheContent = readFileSync(this.cacheFilePath, "utf-8");
381
+ this.cache = JSON.parse(cacheContent);
382
+ }
383
+ } catch (error) {
384
+ console.warn("Failed to load template cache:", error);
385
+ this.cache = {};
386
+ }
387
+ }
388
+ saveCache() {
389
+ try {
390
+ const cacheDir = dirname(this.cacheFilePath);
391
+ if (!existsSync(cacheDir)) {
392
+ mkdirSync(cacheDir, { recursive: true });
393
+ }
394
+ writeFileSync(this.cacheFilePath, JSON.stringify(this.cache, null, 2));
395
+ } catch (error) {
396
+ console.error("Failed to save template cache:", error);
397
+ }
398
+ }
399
+ hasTemplate(pageId) {
400
+ return pageId in this.cache;
401
+ }
402
+ getTemplate(pageId) {
403
+ const cached = this.cache[pageId];
404
+ if (!cached)
405
+ return null;
406
+ return {
407
+ pageId,
408
+ templatePath: "",
409
+ template: cached.template,
410
+ hash: cached.hash,
411
+ metadata: {
412
+ url: cached.metadata.pageId,
413
+ // Contains the URL from setTemplate
414
+ templateAlias: cached.metadata.templateAlias,
415
+ jpath: cached.metadata.jpath,
416
+ generatedAt: new Date(cached.metadata.lastUpdated)
417
+ }
418
+ };
419
+ }
420
+ getTemplateHash(pageId) {
421
+ return this.cache[pageId]?.hash || null;
422
+ }
423
+ setTemplate(pageId, template, hash, metadata) {
424
+ this.cache[pageId] = {
425
+ hash,
426
+ template,
427
+ metadata: {
428
+ ...metadata,
429
+ lastUpdated: /* @__PURE__ */ new Date()
430
+ }
431
+ };
432
+ this.saveCache();
433
+ }
434
+ removeTemplate(pageId) {
435
+ delete this.cache[pageId];
436
+ this.saveCache();
437
+ }
438
+ clearCache() {
439
+ this.cache = {};
440
+ this.saveCache();
441
+ }
442
+ getCacheStats() {
443
+ const templates = Object.values(this.cache);
444
+ const templatesByAlias = {};
445
+ let oldestTemplate = null;
446
+ let newestTemplate = null;
447
+ for (const template of templates) {
448
+ const alias = template.metadata.templateAlias;
449
+ templatesByAlias[alias] = (templatesByAlias[alias] || 0) + 1;
450
+ const date = new Date(template.metadata.lastUpdated);
451
+ if (!oldestTemplate || date < oldestTemplate) {
452
+ oldestTemplate = date;
453
+ }
454
+ if (!newestTemplate || date > newestTemplate) {
455
+ newestTemplate = date;
456
+ }
457
+ }
458
+ return {
459
+ totalTemplates: templates.length,
460
+ templatesByAlias,
461
+ oldestTemplate,
462
+ newestTemplate
463
+ };
464
+ }
465
+ cleanupOldTemplates(maxAgeMs) {
466
+ const cutoffDate = new Date(Date.now() - maxAgeMs);
467
+ let removedCount = 0;
468
+ for (const [pageId, template] of Object.entries(this.cache)) {
469
+ const templateDate = new Date(template.metadata.lastUpdated);
470
+ if (templateDate < cutoffDate) {
471
+ delete this.cache[pageId];
472
+ removedCount++;
473
+ }
474
+ }
475
+ if (removedCount > 0) {
476
+ this.saveCache();
477
+ }
478
+ return removedCount;
479
+ }
480
+ getAllTemplates() {
481
+ return { ...this.cache };
482
+ }
483
+ }
484
+
485
+ function generatePageStructureHash(pageData, options = {}) {
486
+ const {
487
+ excludeChildren = true,
488
+ excludeKeys = [],
489
+ includeOnlyKeys = []
490
+ } = options;
491
+ const processedData = processPageDataForHash(pageData, {
492
+ excludeChildren,
493
+ excludeKeys,
494
+ includeOnlyKeys
495
+ });
496
+ const sortedKeys = Object.keys(processedData).sort();
497
+ const hashInput = sortedKeys.join("|");
498
+ return createHash("sha256").update(hashInput).digest("hex");
499
+ }
500
+ function processPageDataForHash(data, options) {
501
+ const { excludeChildren, excludeKeys = [], includeOnlyKeys = [] } = options;
502
+ const processed = {};
503
+ for (const [key, value] of Object.entries(data)) {
504
+ if (excludeChildren && key === "children") {
505
+ continue;
506
+ }
507
+ if (excludeKeys.includes(key)) {
508
+ continue;
509
+ }
510
+ if (includeOnlyKeys.length > 0 && !includeOnlyKeys.includes(key)) {
511
+ continue;
512
+ }
513
+ if (typeof value === "object" && value !== null && !Array.isArray(value)) {
514
+ processed[key] = processPageDataForHash(value, options);
515
+ } else {
516
+ processed[key] = getValueType(value);
517
+ }
518
+ }
519
+ return processed;
520
+ }
521
+ function getValueType(value) {
522
+ if (value === null)
523
+ return "null";
524
+ if (Array.isArray(value))
525
+ return `array[${value.length}]`;
526
+ if (typeof value === "object")
527
+ return "object";
528
+ return typeof value;
529
+ }
530
+
531
+ function extractPageContent(umbracoData, jpath) {
532
+ try {
533
+ const result = JSONPath({
534
+ path: jpath,
535
+ json: umbracoData.SiteData,
536
+ wrap: false
537
+ });
538
+ if (!result || Array.isArray(result) && result.length === 0) {
539
+ return null;
540
+ }
541
+ const pageContent = Array.isArray(result) ? result[0] : result;
542
+ return excludeChildrenFromContent(pageContent);
543
+ } catch (error) {
544
+ console.error(`Failed to extract content for path ${jpath}:`, error);
545
+ return null;
546
+ }
547
+ }
548
+ function excludeChildrenFromContent(content) {
549
+ if (!content || typeof content !== "object") {
550
+ return content;
551
+ }
552
+ const cleanContent = { ...content };
553
+ if ("children" in cleanContent) {
554
+ delete cleanContent.children;
555
+ }
556
+ return cleanContent;
557
+ }
558
+ function generatePageId(urlItem) {
559
+ return `${urlItem.TemplateAlias}_${urlItem.nodeID}`;
560
+ }
561
+ function estimateContentTokens(content) {
562
+ try {
563
+ const jsonString = JSON.stringify(content);
564
+ return Math.ceil(jsonString.length / 4);
565
+ } catch {
566
+ return 0;
567
+ }
568
+ }
569
+ function truncateContentIfNeeded(content, maxTokens = 18e4) {
570
+ const estimatedTokens = estimateContentTokens(content);
571
+ if (estimatedTokens <= maxTokens) {
572
+ return content;
573
+ }
574
+ console.warn(`Content too large (${estimatedTokens} tokens > ${maxTokens} limit), truncating...`);
575
+ const truncatedContent = { ...content };
576
+ const sortedKeys = Object.keys(truncatedContent).sort((a, b) => {
577
+ const sizeA = estimateContentTokens({ [a]: truncatedContent[a] });
578
+ const sizeB = estimateContentTokens({ [b]: truncatedContent[b] });
579
+ return sizeB - sizeA;
580
+ });
581
+ for (const key of sortedKeys) {
582
+ if (estimateContentTokens(truncatedContent) <= maxTokens) {
583
+ break;
584
+ }
585
+ const value = truncatedContent[key];
586
+ if (Array.isArray(value) && value.length > 10) {
587
+ truncatedContent[key] = value.slice(0, 10);
588
+ console.warn(`Truncated array ${key} from ${value.length} to 10 items`);
589
+ } else if (typeof value === "string" && value.length > 5e3) {
590
+ truncatedContent[key] = value.substring(0, 5e3) + "...";
591
+ console.warn(`Truncated string ${key} from ${value.length} to 5000 chars`);
592
+ }
593
+ }
594
+ const finalTokens = estimateContentTokens(truncatedContent);
595
+ console.log(`Content truncated from ${estimatedTokens} to ${finalTokens} tokens`);
596
+ return truncatedContent;
597
+ }
598
+
599
+ function shouldGenerateTemplate(umbracoData, urlItem) {
600
+ try {
601
+ const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
602
+ if (!pageContent) {
603
+ console.warn(`No content found for ${urlItem.url}, skipping template generation`);
604
+ return false;
605
+ }
606
+ const hidePage = pageContent.hidePage;
607
+ if (hidePage === "1" || hidePage === 1) {
608
+ console.log(`Page ${urlItem.url} is hidden (hidePage: ${hidePage}), skipping template generation`);
609
+ return false;
610
+ }
611
+ return true;
612
+ } catch (error) {
613
+ console.error(`Error checking visibility for ${urlItem.url}:`, error);
614
+ return false;
615
+ }
616
+ }
617
+ function getVisiblePages(umbracoData) {
618
+ return umbracoData.urlList.filter(
619
+ (urlItem) => shouldGenerateTemplate(umbracoData, urlItem)
620
+ );
621
+ }
622
+ function getPageVisibilityStats(umbracoData) {
623
+ let visible = 0;
624
+ let hidden = 0;
625
+ let error = 0;
626
+ for (const urlItem of umbracoData.urlList) {
627
+ try {
628
+ const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
629
+ if (!pageContent) {
630
+ error++;
631
+ continue;
632
+ }
633
+ const hidePage = pageContent.hidePage;
634
+ if (hidePage === "1" || hidePage === 1) {
635
+ hidden++;
636
+ } else {
637
+ visible++;
638
+ }
639
+ } catch {
640
+ error++;
641
+ }
642
+ }
643
+ return {
644
+ total: umbracoData.urlList.length,
645
+ visible,
646
+ hidden,
647
+ error
648
+ };
649
+ }
650
+
651
+ class TemplateCleanup {
652
+ cache;
653
+ constructor(cacheDir) {
654
+ this.cache = new LLMSCache(cacheDir);
655
+ }
656
+ /**
657
+ * Find templates that no longer have corresponding pages
658
+ */
659
+ findOrphanedTemplates(umbracoData) {
660
+ const orphanedTemplates = [];
661
+ const cacheStats = this.cache.getCacheStats();
662
+ console.log(`\u{1F50D} Checking ${cacheStats.totalTemplates} cached templates for orphans...`);
663
+ const currentPageIds = /* @__PURE__ */ new Set();
664
+ for (const urlItem of umbracoData.urlList) {
665
+ try {
666
+ const pageId = generatePageId(urlItem);
667
+ currentPageIds.add(pageId);
668
+ } catch (error) {
669
+ console.warn(`Error generating page ID for ${urlItem.url}:`, error);
670
+ }
671
+ }
672
+ const allCachedTemplates = this.cache.getAllTemplates();
673
+ for (const [pageId, templateData] of Object.entries(allCachedTemplates)) {
674
+ if (!currentPageIds.has(pageId)) {
675
+ orphanedTemplates.push({
676
+ pageId,
677
+ templateAlias: templateData.metadata.templateAlias,
678
+ lastUpdated: new Date(templateData.metadata.lastUpdated),
679
+ url: templateData.metadata.pageId,
680
+ // URL stored in pageId field from cache
681
+ reason: "page_deleted"
682
+ });
683
+ }
684
+ }
685
+ return orphanedTemplates;
686
+ }
687
+ /**
688
+ * Find templates for pages that are now hidden (hidePage: "1")
689
+ */
690
+ findHiddenPageTemplates(umbracoData) {
691
+ const hiddenTemplates = [];
692
+ const allCachedTemplates = this.cache.getAllTemplates();
693
+ for (const urlItem of umbracoData.urlList) {
694
+ try {
695
+ const pageId = generatePageId(urlItem);
696
+ const templateData = allCachedTemplates[pageId];
697
+ if (templateData) {
698
+ const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
699
+ if (pageContent?.hidePage === "1" || pageContent?.hidePage === 1) {
700
+ hiddenTemplates.push({
701
+ pageId,
702
+ templateAlias: templateData.metadata.templateAlias,
703
+ lastUpdated: new Date(templateData.metadata.lastUpdated),
704
+ url: urlItem.url,
705
+ reason: "page_hidden"
706
+ });
707
+ }
708
+ }
709
+ } catch (error) {
710
+ console.warn(`Error checking hidden status for ${urlItem.url}:`, error);
711
+ }
712
+ }
713
+ return hiddenTemplates;
714
+ }
715
+ /**
716
+ * Perform cleanup of orphaned templates
717
+ */
718
+ performCleanup(umbracoData, options = {}) {
719
+ const {
720
+ removeOrphaned = true,
721
+ removeHidden = true,
722
+ dryRun = false
723
+ } = options;
724
+ const initialStats = this.cache.getCacheStats();
725
+ const orphanedTemplates = [];
726
+ let templatesRemoved = 0;
727
+ if (removeOrphaned) {
728
+ const orphaned = this.findOrphanedTemplates(umbracoData);
729
+ orphanedTemplates.push(...orphaned);
730
+ }
731
+ if (removeHidden) {
732
+ const hidden = this.findHiddenPageTemplates(umbracoData);
733
+ orphanedTemplates.push(...hidden);
734
+ }
735
+ console.log(`\u{1F5D1}\uFE0F Found ${orphanedTemplates.length} orphaned templates`);
736
+ if (orphanedTemplates.length > 0) {
737
+ orphanedTemplates.forEach((template) => {
738
+ console.log(` - ${template.pageId} (${template.reason}): ${template.templateAlias} - ${template.url}`);
739
+ });
740
+ if (!dryRun) {
741
+ orphanedTemplates.forEach((template) => {
742
+ this.cache.removeTemplate(template.pageId);
743
+ templatesRemoved++;
744
+ });
745
+ console.log(`\u2705 Removed ${templatesRemoved} orphaned templates`);
746
+ } else {
747
+ console.log(`\u{1F50D} DRY RUN: Would remove ${orphanedTemplates.length} templates`);
748
+ }
749
+ }
750
+ return {
751
+ totalTemplatesBeforeCleanup: initialStats.totalTemplates,
752
+ orphanedTemplatesFound: orphanedTemplates.length,
753
+ templatesRemoved,
754
+ orphanedTemplates
755
+ };
756
+ }
757
+ /**
758
+ * Get cleanup recommendations without performing cleanup
759
+ */
760
+ getCleanupRecommendations(umbracoData) {
761
+ const orphaned = this.findOrphanedTemplates(umbracoData);
762
+ const hidden = this.findHiddenPageTemplates(umbracoData);
763
+ const totalOrphaned = orphaned.length + hidden.length;
764
+ const recommendations = [];
765
+ if (orphaned.length > 0) {
766
+ recommendations.push(`${orphaned.length} templates for deleted pages should be removed`);
767
+ }
768
+ if (hidden.length > 0) {
769
+ recommendations.push(`${hidden.length} templates for hidden pages should be removed`);
770
+ }
771
+ if (totalOrphaned === 0) {
772
+ recommendations.push("No cleanup needed - all templates are current");
773
+ }
774
+ return {
775
+ shouldCleanup: totalOrphaned > 0,
776
+ orphanedCount: totalOrphaned,
777
+ recommendations
778
+ };
779
+ }
780
+ /**
781
+ * Extract page content using JSONPath (simplified version)
782
+ */
783
+ extractPageContent(umbracoData, jpath) {
784
+ try {
785
+ const pathParts = jpath.split(".");
786
+ let current = umbracoData;
787
+ for (const part of pathParts) {
788
+ if (part === "$")
789
+ continue;
790
+ if (current && typeof current === "object" && part in current) {
791
+ current = current[part];
792
+ } else {
793
+ return null;
794
+ }
795
+ }
796
+ return current;
797
+ } catch (error) {
798
+ console.warn(`Error extracting content for ${jpath}:`, error);
799
+ return null;
800
+ }
801
+ }
802
+ }
803
+ async function performAutomaticCleanup(umbracoData, cacheDir, options = {}) {
804
+ const {
805
+ enableAutoCleanup = true,
806
+ cleanupOrphaned = true,
807
+ cleanupHidden = true,
808
+ dryRun = false
809
+ } = options;
810
+ if (!enableAutoCleanup) {
811
+ return null;
812
+ }
813
+ const cleanup = new TemplateCleanup(cacheDir);
814
+ console.log("\u{1F9F9} Performing automatic template cleanup...");
815
+ const stats = cleanup.performCleanup(umbracoData, {
816
+ removeOrphaned: cleanupOrphaned,
817
+ removeHidden: cleanupHidden,
818
+ dryRun
819
+ });
820
+ if (stats.templatesRemoved > 0) {
821
+ console.log(`\u2705 Cleaned up ${stats.templatesRemoved} orphaned templates`);
822
+ }
823
+ return stats;
824
+ }
825
+
826
+ class MustacheSyntaxValidator {
827
+ name = "mustache-syntax";
828
+ canFix = true;
829
+ validate(template) {
830
+ const result = {
831
+ isValid: true,
832
+ errors: [],
833
+ warnings: []
834
+ };
835
+ try {
836
+ Mustache.parse(template);
837
+ return result;
838
+ } catch (error) {
839
+ result.isValid = false;
840
+ result.errors.push(`Mustache syntax error: ${error.message}`);
841
+ if (this.canFix) {
842
+ try {
843
+ const fixedTemplate = this.fix(template);
844
+ result.fixedTemplate = fixedTemplate;
845
+ result.warnings.push("Template was automatically fixed");
846
+ } catch (fixError) {
847
+ result.errors.push(`Could not fix template: ${fixError.message}`);
848
+ }
849
+ }
850
+ return result;
851
+ }
852
+ }
853
+ fix(template) {
854
+ let fixedTemplate = template;
855
+ const openSectionRegex = /\{\{\#([a-zA-Z0-9_.]+)\}\}/g;
856
+ const closeSectionRegex = /\{\{\/([a-zA-Z0-9_.]+)\}\}/g;
857
+ const openSections = [];
858
+ const closeSections = [];
859
+ let match;
860
+ while ((match = openSectionRegex.exec(template)) !== null) {
861
+ openSections.push({
862
+ name: match[1],
863
+ pos: match.index
864
+ });
865
+ }
866
+ while ((match = closeSectionRegex.exec(template)) !== null) {
867
+ closeSections.push({
868
+ name: match[1],
869
+ pos: match.index
870
+ });
871
+ }
872
+ const unmatchedOpens = openSections.filter(
873
+ (open) => !closeSections.some((close) => close.name === open.name)
874
+ );
875
+ const unmatchedCloses = closeSections.filter(
876
+ (close) => !openSections.some((open) => open.name === close.name)
877
+ );
878
+ unmatchedOpens.forEach((unmatched) => {
879
+ const sectionRegex = new RegExp(`\\{\\{#${unmatched.name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\}\\}`, "g");
880
+ fixedTemplate = fixedTemplate.replace(sectionRegex, "");
881
+ });
882
+ unmatchedCloses.forEach((unmatched) => {
883
+ const sectionRegex = new RegExp(`\\{\\{/${unmatched.name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\}\\}`, "g");
884
+ fixedTemplate = fixedTemplate.replace(sectionRegex, "");
885
+ });
886
+ fixedTemplate = fixedTemplate.replace(/\n\s*\n\s*\n/g, "\n\n");
887
+ return fixedTemplate;
888
+ }
889
+ }
890
+ class TemplateStructureValidator {
891
+ name = "template-structure";
892
+ validate(template) {
893
+ const result = {
894
+ isValid: true,
895
+ errors: [],
896
+ warnings: []
897
+ };
898
+ if (!template.match(/^#\s+/m)) {
899
+ result.warnings.push("Template should start with a heading (# Title)");
900
+ }
901
+ const headingLevels = (template.match(/^#{4,}/gm) || []).length;
902
+ if (headingLevels > 0) {
903
+ result.warnings.push("Template has deeply nested headings (4+ levels), consider flattening structure");
904
+ }
905
+ const emptySections = template.match(/\{\{#\w+\}\}\s*\{\{\/\w+\}\}/g);
906
+ if (emptySections) {
907
+ result.warnings.push(`Found ${emptySections.length} empty sections that may not render content`);
908
+ }
909
+ const commonTypos = template.match(/\{\{\s*pageTittle\s*\}\}/g);
910
+ if (commonTypos) {
911
+ result.warnings.push('Found "pageTittle" - check if this should be "pageTitle"');
912
+ }
913
+ return result;
914
+ }
915
+ }
916
+ class ContentCompletenessValidator {
917
+ name = "content-completeness";
918
+ validate(template) {
919
+ const result = {
920
+ isValid: true,
921
+ errors: [],
922
+ warnings: []
923
+ };
924
+ const variables = this.extractVariables(template);
925
+ const hasTitle = variables.some((v) => v.includes("title") || v.includes("Title"));
926
+ if (!hasTitle) {
927
+ result.warnings.push("Template missing title variable (pageTitle, title, etc.)");
928
+ }
929
+ const hasDescription = variables.some((v) => v.includes("description") || v.includes("Description"));
930
+ if (!hasDescription) {
931
+ result.warnings.push("Template missing description variable");
932
+ }
933
+ if (template.length < 50) {
934
+ result.warnings.push("Template is very short, may not provide sufficient content");
935
+ }
936
+ const sectionsOnly = variables.filter((v) => !v.includes(".") && !v.includes("["));
937
+ if (sectionsOnly.length < 2) {
938
+ result.warnings.push("Template has limited content variables, consider adding more sections");
939
+ }
940
+ return result;
941
+ }
942
+ extractVariables(template) {
943
+ const variableRegex = /\{\{\s*([^#\/][^}]*?)\s*\}\}/g;
944
+ const variables = [];
945
+ let match;
946
+ while ((match = variableRegex.exec(template)) !== null) {
947
+ variables.push(match[1].trim());
948
+ }
949
+ return variables;
950
+ }
951
+ }
952
+ class LLMSTxtComplianceValidator {
953
+ name = "llms-txt-compliance";
954
+ validate(template) {
955
+ const result = {
956
+ isValid: true,
957
+ errors: [],
958
+ warnings: []
959
+ };
960
+ const headings = template.match(/^#+\s+.+$/gm) || [];
961
+ let lastLevel = 0;
962
+ let hasProperHierarchy = true;
963
+ headings.forEach((heading) => {
964
+ const level = (heading.match(/^#+/) || [""])[0].length;
965
+ if (level > lastLevel + 1) {
966
+ hasProperHierarchy = false;
967
+ }
968
+ lastLevel = level;
969
+ });
970
+ if (!hasProperHierarchy) {
971
+ result.warnings.push("Heading hierarchy should increment by one level (# -> ## -> ###)");
972
+ }
973
+ if (template.includes("pageDescription") && !template.includes(">")) {
974
+ result.warnings.push("Consider using blockquote (>) for page description as per LLMS.txt standard");
975
+ }
976
+ const hasLists = template.includes("- ") || template.includes("* ");
977
+ if (!hasLists && template.length > 200) {
978
+ result.warnings.push("Long content without lists - consider breaking into bullet points for better AI consumption");
979
+ }
980
+ const htmlTags = (template.match(/<[^>]+>/g) || []).length;
981
+ if (htmlTags > 3) {
982
+ result.warnings.push("Template contains HTML tags - prefer pure markdown for LLMS.txt compliance");
983
+ }
984
+ return result;
985
+ }
986
+ }
987
+ class TemplateValidationPipeline {
988
+ validators = [];
989
+ constructor() {
990
+ this.addValidator(new MustacheSyntaxValidator());
991
+ this.addValidator(new TemplateStructureValidator());
992
+ this.addValidator(new ContentCompletenessValidator());
993
+ this.addValidator(new LLMSTxtComplianceValidator());
994
+ }
995
+ addValidator(validator) {
996
+ this.validators.push(validator);
997
+ }
998
+ removeValidator(name) {
999
+ this.validators = this.validators.filter((v) => v.name !== name);
1000
+ }
1001
+ async validateTemplate(template, options = {}) {
1002
+ const { autoFix = true, throwOnError = false } = options;
1003
+ let currentTemplate = template;
1004
+ const allResults = {
1005
+ isValid: true,
1006
+ errors: [],
1007
+ warnings: []
1008
+ };
1009
+ for (const validator of this.validators) {
1010
+ const result = validator.validate(currentTemplate);
1011
+ allResults.errors.push(...result.errors);
1012
+ allResults.warnings.push(...result.warnings);
1013
+ if (!result.isValid) {
1014
+ allResults.isValid = false;
1015
+ if (autoFix && validator.canFix && validator.fix) {
1016
+ const fixedTemplate = validator.fix(currentTemplate);
1017
+ const fixResult = validator.validate(fixedTemplate);
1018
+ if (fixResult.isValid) {
1019
+ currentTemplate = fixedTemplate;
1020
+ allResults.fixedTemplate = currentTemplate;
1021
+ console.log(`Template fixed by ${validator.name} validator`);
1022
+ allResults.errors = allResults.errors.filter((e) => !result.errors.includes(e));
1023
+ if (allResults.errors.length === 0) {
1024
+ allResults.isValid = true;
1025
+ }
1026
+ }
1027
+ }
1028
+ }
1029
+ }
1030
+ if (allResults.errors.length > 0 && throwOnError) {
1031
+ throw new TemplateError(
1032
+ ErrorCode.TEMPLATE_VALIDATION_FAILED,
1033
+ `Template validation failed: ${allResults.errors.join(", ")}`,
1034
+ { template: template.substring(0, 200) + "..." }
1035
+ );
1036
+ }
1037
+ return allResults;
1038
+ }
1039
+ async validateAndFix(template) {
1040
+ const result = await this.validateTemplate(template, {
1041
+ autoFix: true,
1042
+ throwOnError: false
1043
+ });
1044
+ if (result.fixedTemplate) {
1045
+ return result.fixedTemplate;
1046
+ }
1047
+ if (result.errors.length > 0) {
1048
+ console.warn("Could not fix template, using fallback");
1049
+ return `# {{pageTitle}}
1050
+
1051
+ > {{pageDescription}}
1052
+
1053
+ ## Content
1054
+
1055
+ This page content could not be processed due to template formatting issues.`;
1056
+ }
1057
+ return template;
1058
+ }
1059
+ getValidatorNames() {
1060
+ return this.validators.map((v) => v.name);
1061
+ }
1062
+ }
1063
+ const templateValidationPipeline = new TemplateValidationPipeline();
1064
+
1065
+ class TemplateGenerator {
1066
+ anthropicClient;
1067
+ promptAnalyzer;
1068
+ cache;
1069
+ config;
1070
+ constructor(config) {
1071
+ this.config = config;
1072
+ this.anthropicClient = new AnthropicClient(config);
1073
+ this.promptAnalyzer = new PromptAnalyzer();
1074
+ if (config.cacheDir) {
1075
+ this.cache = new LLMSCache(config.cacheDir);
1076
+ } else {
1077
+ this.cache = new LLMSCache(".llms-cache");
1078
+ }
1079
+ this.ensureOutputDirectories();
1080
+ }
1081
+ async generateTemplate(pageContent, urlItem) {
1082
+ const pageId = generatePageId(urlItem);
1083
+ const currentHash = generatePageStructureHash(pageContent, { excludeChildren: true });
1084
+ const cachedHash = this.cache.getTemplateHash(pageId);
1085
+ if (cachedHash === currentHash) {
1086
+ console.log(`Using cached template for ${pageId}`);
1087
+ const cached = this.cache.getTemplate(pageId);
1088
+ if (cached) {
1089
+ return cached;
1090
+ }
1091
+ }
1092
+ return await this.generateTemplateWithAI(pageContent, urlItem);
1093
+ }
1094
+ async generateAllTemplates(umbracoData) {
1095
+ const templates = [];
1096
+ const maxConcurrent = this.config.maxConcurrent || 5;
1097
+ await performAutomaticCleanup(umbracoData, this.config.cacheDir || "./cache", {
1098
+ enableAutoCleanup: this.config.enableAutoCleanup ?? true,
1099
+ cleanupOrphaned: this.config.cleanupOrphaned ?? true,
1100
+ cleanupHidden: this.config.cleanupHidden ?? true,
1101
+ dryRun: false
1102
+ });
1103
+ const visibilityStats = getPageVisibilityStats(umbracoData);
1104
+ console.log("\u{1F4CA} Page visibility stats:", visibilityStats);
1105
+ const visiblePages = umbracoData.urlList.filter(
1106
+ (urlItem) => shouldGenerateTemplate(umbracoData, urlItem)
1107
+ );
1108
+ console.log(`Checking ${visiblePages.length}/${umbracoData.urlList.length} visible pages for cache status...`);
1109
+ const { cached, needGeneration } = this.identifyTemplatesNeeded(umbracoData, visiblePages);
1110
+ console.log(`\u{1F4C8} Template status: ${cached.length} cached, ${needGeneration.length} need generation`);
1111
+ templates.push(...cached);
1112
+ if (needGeneration.length === 0) {
1113
+ console.log("All templates are cached, no AI generation needed");
1114
+ return templates;
1115
+ }
1116
+ console.log(`Generating ${needGeneration.length} missing templates (max ${maxConcurrent} concurrent)`);
1117
+ for (let i = 0; i < needGeneration.length; i += maxConcurrent) {
1118
+ const batch = needGeneration.slice(i, i + maxConcurrent);
1119
+ const batchPromises = batch.map(async ({ pageContent, urlItem }) => {
1120
+ try {
1121
+ return await this.generateTemplateWithAI(pageContent, urlItem);
1122
+ } catch (error) {
1123
+ console.error(`Error processing ${urlItem.url}:`, error);
1124
+ return null;
1125
+ }
1126
+ });
1127
+ const batchResults = await Promise.all(batchPromises);
1128
+ const validResults = batchResults.filter((result) => result !== null);
1129
+ templates.push(...validResults);
1130
+ if (i + maxConcurrent < needGeneration.length) {
1131
+ console.log(`Completed batch ${Math.floor(i / maxConcurrent) + 1}, waiting 1s...`);
1132
+ await this.delay(1e3);
1133
+ }
1134
+ }
1135
+ console.log(`Generated ${templates.length} total templates (${cached.length} from cache, ${templates.length - cached.length} newly generated)`);
1136
+ return templates;
1137
+ }
1138
+ identifyTemplatesNeeded(umbracoData, visiblePages) {
1139
+ const cached = [];
1140
+ const needGeneration = [];
1141
+ for (const urlItem of visiblePages) {
1142
+ const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
1143
+ if (!pageContent) {
1144
+ console.warn(`No content found for ${urlItem.url} (${urlItem.Jpath})`);
1145
+ continue;
1146
+ }
1147
+ const pageId = generatePageId(urlItem);
1148
+ const currentHash = generatePageStructureHash(pageContent, { excludeChildren: true });
1149
+ const cachedHash = this.cache.getTemplateHash(pageId);
1150
+ if (cachedHash === currentHash) {
1151
+ const cachedTemplate = this.cache.getTemplate(pageId);
1152
+ if (cachedTemplate) {
1153
+ cached.push(cachedTemplate);
1154
+ continue;
1155
+ }
1156
+ }
1157
+ needGeneration.push({
1158
+ pageContent,
1159
+ urlItem
1160
+ });
1161
+ }
1162
+ return {
1163
+ cached,
1164
+ needGeneration
1165
+ };
1166
+ }
1167
+ async generateTemplateWithAI(pageContent, urlItem) {
1168
+ const pageId = generatePageId(urlItem);
1169
+ console.log(`Generating new template for ${pageId} (${urlItem.url})`);
1170
+ const tokensBeforeTruncation = estimateContentTokens(pageContent);
1171
+ const truncatedContent = truncateContentIfNeeded(pageContent, 18e4);
1172
+ const tokensAfterTruncation = estimateContentTokens(truncatedContent);
1173
+ if (tokensBeforeTruncation > tokensAfterTruncation) {
1174
+ console.warn(`Page ${pageId} content truncated: ${tokensBeforeTruncation} -> ${tokensAfterTruncation} tokens`);
1175
+ }
1176
+ this.promptAnalyzer.analyzeContent(truncatedContent, urlItem);
1177
+ const request = {
1178
+ pageContent: truncatedContent,
1179
+ templateAlias: urlItem.TemplateAlias,
1180
+ url: urlItem.url,
1181
+ jpath: urlItem.Jpath
1182
+ };
1183
+ const response = await this.anthropicClient.generateTemplate(request);
1184
+ const currentHash = generatePageStructureHash(pageContent, { excludeChildren: true });
1185
+ const templatePath = join(
1186
+ this.config.templatesOutputDir,
1187
+ `${pageId}.mustache`
1188
+ );
1189
+ this.saveTemplate(templatePath, response.template);
1190
+ const generatedTemplate = {
1191
+ pageId,
1192
+ templatePath,
1193
+ template: response.template,
1194
+ hash: currentHash,
1195
+ metadata: {
1196
+ url: urlItem.url,
1197
+ templateAlias: urlItem.TemplateAlias,
1198
+ jpath: urlItem.Jpath,
1199
+ generatedAt: /* @__PURE__ */ new Date()
1200
+ }
1201
+ };
1202
+ this.cache.setTemplate(pageId, response.template, currentHash, {
1203
+ pageId: urlItem.url,
1204
+ // Store URL as pageId for cache lookup
1205
+ jpath: urlItem.Jpath,
1206
+ templateAlias: urlItem.TemplateAlias,
1207
+ structureHash: currentHash
1208
+ });
1209
+ return generatedTemplate;
1210
+ }
1211
+ async renderTemplate(template, data) {
1212
+ return withErrorHandling(async () => {
1213
+ const validatedTemplate = await templateValidationPipeline.validateAndFix(template);
1214
+ return Mustache.render(validatedTemplate, data);
1215
+ }, {
1216
+ template: template.substring(0, 200) + "...",
1217
+ dataKeys: Object.keys(data)
1218
+ });
1219
+ }
1220
+ loadTemplate(templatePath) {
1221
+ try {
1222
+ return readFileSync(templatePath, "utf-8");
1223
+ } catch (error) {
1224
+ throw new Error(`Failed to load template ${templatePath}: ${error}`);
1225
+ }
1226
+ }
1227
+ getTemplateStats() {
1228
+ const stats = this.cache.getCacheStats();
1229
+ return {
1230
+ totalCached: stats.totalTemplates,
1231
+ cacheHitRate: stats.totalTemplates > 0 ? 0.8 : 0,
1232
+ // Estimate, could be tracked more precisely
1233
+ templatesByAlias: stats.templatesByAlias
1234
+ };
1235
+ }
1236
+ saveTemplate(templatePath, content) {
1237
+ const dir = dirname(templatePath);
1238
+ if (!existsSync(dir)) {
1239
+ mkdirSync(dir, { recursive: true });
1240
+ }
1241
+ writeFileSync(templatePath, content, "utf-8");
1242
+ }
1243
+ ensureOutputDirectories() {
1244
+ if (!existsSync(this.config.templatesOutputDir)) {
1245
+ mkdirSync(this.config.templatesOutputDir, { recursive: true });
1246
+ }
1247
+ if (this.config.finalOutputDir && !existsSync(this.config.finalOutputDir)) {
1248
+ mkdirSync(this.config.finalOutputDir, { recursive: true });
1249
+ }
1250
+ }
1251
+ delay(ms) {
1252
+ return new Promise((resolve) => setTimeout(resolve, ms));
1253
+ }
1254
+ async testConnection() {
1255
+ return await this.anthropicClient.testConnection();
1256
+ }
1257
+ clearCache() {
1258
+ this.cache.clearCache();
1259
+ }
1260
+ cleanupOldTemplates(maxAgeMs = 30 * 24 * 60 * 60 * 1e3) {
1261
+ return this.cache.cleanupOldTemplates(maxAgeMs);
1262
+ }
1263
+ }
1264
+
1265
+ class LLMSFilesGenerator {
1266
+ config;
1267
+ templateGenerator;
1268
+ constructor(config) {
1269
+ this.config = config;
1270
+ this.templateGenerator = new TemplateGenerator(config);
1271
+ }
1272
+ async generateAllFiles(umbracoData) {
1273
+ const startTime = Date.now();
1274
+ console.log("\u{1F680} Starting LLMS files generation...");
1275
+ const templates = await this.templateGenerator.generateAllTemplates(umbracoData);
1276
+ console.log("\u{1F4C4} Generating individual markdown files...");
1277
+ const individualMdFiles = this.config.enableIndividualMd ? await this.generateIndividualMarkdownFiles(umbracoData, templates) : void 0;
1278
+ console.log("\u{1F4DD} Generating llms.txt navigation file...");
1279
+ const llmsTxt = this.generateLLMSTxt(umbracoData, individualMdFiles || []);
1280
+ console.log("\u{1F4DA} Generating llms-full.txt...");
1281
+ const llmsFullTxt = this.config.enableLLMSFullTxt ? this.generateLLMSFullTxt(umbracoData, individualMdFiles || []) : void 0;
1282
+ const files = {
1283
+ llmsTxt,
1284
+ llmsFullTxt,
1285
+ individualMdFiles
1286
+ };
1287
+ this.saveFilesToOutput(files);
1288
+ const duration = Date.now() - startTime;
1289
+ console.log(`\u2705 LLMS files generation completed in ${duration}ms`);
1290
+ return files;
1291
+ }
1292
+ async generateIndividualMarkdownFiles(umbracoData, templates) {
1293
+ const mdFiles = [];
1294
+ for (const template of templates) {
1295
+ try {
1296
+ const urlItem = umbracoData.urlList.find(
1297
+ (item) => generatePageId(item) === template.pageId
1298
+ );
1299
+ if (!urlItem) {
1300
+ console.warn(`URL item not found for template ${template.pageId}`);
1301
+ continue;
1302
+ }
1303
+ const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
1304
+ if (!pageContent) {
1305
+ console.warn(`Page content not found for ${urlItem.url}`);
1306
+ continue;
1307
+ }
1308
+ const renderedMarkdown = await this.templateGenerator.renderTemplate(
1309
+ template.template,
1310
+ pageContent
1311
+ );
1312
+ const sanitizedUrl = this.sanitizeUrlForFilename(urlItem.url);
1313
+ const filename = `${sanitizedUrl}.md`;
1314
+ const outputPath = join(this.getOutputDir(), "llms", filename);
1315
+ mdFiles.push({
1316
+ path: outputPath,
1317
+ content: renderedMarkdown,
1318
+ url: urlItem.url,
1319
+ pageId: template.pageId
1320
+ });
1321
+ } catch (error) {
1322
+ console.error(`Error generating markdown for ${template.pageId}:`, error);
1323
+ }
1324
+ }
1325
+ return mdFiles;
1326
+ }
1327
+ generateLLMSTxt(umbracoData, mdFiles) {
1328
+ const siteTitle = this.extractSiteTitle(umbracoData);
1329
+ const siteDescription = this.extractSiteDescription(umbracoData);
1330
+ let content = `# ${siteTitle}
1331
+
1332
+ `;
1333
+ if (siteDescription) {
1334
+ content += `> ${siteDescription}
1335
+
1336
+ `;
1337
+ }
1338
+ content += `This website contains comprehensive information about ${siteTitle.toLowerCase()}. The content is organized into the following sections:
1339
+
1340
+ `;
1341
+ const pagesByCategory = this.groupPagesByCategory(umbracoData, mdFiles);
1342
+ for (const [category, pages] of Object.entries(pagesByCategory)) {
1343
+ if (pages.length === 0)
1344
+ continue;
1345
+ content += `## ${this.formatCategoryName(category)}
1346
+
1347
+ `;
1348
+ for (const page of pages) {
1349
+ const urlItem = umbracoData.urlList.find((item) => item.url === page.url);
1350
+ const pageTitle = this.extractPageTitle(umbracoData, urlItem);
1351
+ const relativeFilePath = this.getLLMSFilePath(page.path);
1352
+ content += `- [${pageTitle}](${relativeFilePath}): ${this.generatePageDescription(umbracoData, urlItem)}
1353
+ `;
1354
+ }
1355
+ content += "\n";
1356
+ }
1357
+ const visiblePages = getVisiblePages(umbracoData);
1358
+ const hiddenCount = umbracoData.urlList.length - visiblePages.length;
1359
+ if (hiddenCount > 0) {
1360
+ content += `*Note: ${hiddenCount} pages are excluded from this documentation as they are marked as hidden.*
1361
+
1362
+ `;
1363
+ }
1364
+ content += "## Optional\n\n";
1365
+ content += "- [Complete Documentation](llms-full.txt): All content combined in a single file\n";
1366
+ content += "- [Site Map](sitemap.xml): XML sitemap of all pages\n";
1367
+ const outputPath = join(this.getOutputDir(), "llms.txt");
1368
+ return {
1369
+ path: outputPath,
1370
+ content: content.trim()
1371
+ };
1372
+ }
1373
+ generateLLMSFullTxt(umbracoData, mdFiles) {
1374
+ const siteTitle = this.extractSiteTitle(umbracoData);
1375
+ const siteDescription = this.extractSiteDescription(umbracoData);
1376
+ let content = `# ${siteTitle} - Complete Documentation
1377
+
1378
+ `;
1379
+ if (siteDescription) {
1380
+ content += `> ${siteDescription}
1381
+
1382
+ `;
1383
+ }
1384
+ content += "This document contains all website content in a single file for comprehensive AI analysis.\n\n";
1385
+ content += "---\n\n";
1386
+ for (const mdFile of mdFiles) {
1387
+ const urlItem = umbracoData.urlList.find((item) => item.url === mdFile.url);
1388
+ if (!urlItem)
1389
+ continue;
1390
+ content += `## Page: ${mdFile.url}
1391
+
1392
+ `;
1393
+ content += `**Template**: ${urlItem.TemplateAlias}
1394
+ `;
1395
+ content += `**Node ID**: ${urlItem.nodeID}
1396
+
1397
+ `;
1398
+ content += mdFile.content;
1399
+ content += "\n\n---\n\n";
1400
+ }
1401
+ const outputPath = join(this.getOutputDir(), "llms-full.txt");
1402
+ return {
1403
+ path: outputPath,
1404
+ content: content.trim()
1405
+ };
1406
+ }
1407
+ saveFilesToOutput(files) {
1408
+ const outputDir = this.getOutputDir();
1409
+ mkdirSync(outputDir, { recursive: true });
1410
+ mkdirSync(join(outputDir, "llms"), { recursive: true });
1411
+ writeFileSync(files.llmsTxt.path, files.llmsTxt.content, "utf-8");
1412
+ console.log(`\u{1F4C4} Saved: ${files.llmsTxt.path}`);
1413
+ if (files.llmsFullTxt) {
1414
+ writeFileSync(files.llmsFullTxt.path, files.llmsFullTxt.content, "utf-8");
1415
+ console.log(`\u{1F4DA} Saved: ${files.llmsFullTxt.path}`);
1416
+ }
1417
+ if (files.individualMdFiles) {
1418
+ for (const mdFile of files.individualMdFiles) {
1419
+ writeFileSync(mdFile.path, mdFile.content, "utf-8");
1420
+ }
1421
+ console.log(`\u{1F4DD} Saved: ${files.individualMdFiles.length} markdown files to llms/ subdirectory`);
1422
+ }
1423
+ }
1424
+ groupPagesByCategory(umbracoData, mdFiles) {
1425
+ const categories = {
1426
+ main: [],
1427
+ blog: [],
1428
+ services: [],
1429
+ products: [],
1430
+ info: [],
1431
+ other: []
1432
+ };
1433
+ for (const mdFile of mdFiles) {
1434
+ const urlItem = umbracoData.urlList.find((item) => item.url === mdFile.url);
1435
+ if (!urlItem)
1436
+ continue;
1437
+ const category = this.categorizeUrlItem(urlItem);
1438
+ if (!categories[category]) {
1439
+ categories[category] = [];
1440
+ }
1441
+ categories[category].push(mdFile);
1442
+ }
1443
+ return categories;
1444
+ }
1445
+ categorizeUrlItem(urlItem) {
1446
+ const { url, TemplateAlias } = urlItem;
1447
+ const alias = (TemplateAlias || "unknown").toLowerCase();
1448
+ url.toLowerCase();
1449
+ if (url === "/" || alias.includes("home"))
1450
+ return "main";
1451
+ if (alias.includes("blog") || alias.includes("article") || alias.includes("news"))
1452
+ return "blog";
1453
+ if (alias.includes("service") || alias.includes("product") || alias.includes("camp"))
1454
+ return "services";
1455
+ if (alias.includes("about") || alias.includes("contact") || alias.includes("info"))
1456
+ return "info";
1457
+ return "other";
1458
+ }
1459
+ extractSiteTitle(umbracoData) {
1460
+ const siteData = umbracoData.SiteData;
1461
+ return siteData?.pageTitle || siteData?.mainHeaderBlockTitle || "Website Documentation";
1462
+ }
1463
+ extractSiteDescription(umbracoData) {
1464
+ const siteData = umbracoData.SiteData;
1465
+ return siteData?.pageDescription || siteData?.ogDescription || null;
1466
+ }
1467
+ extractPageTitle(umbracoData, urlItem) {
1468
+ if (!urlItem)
1469
+ return "Untitled Page";
1470
+ const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
1471
+ if (!pageContent)
1472
+ return urlItem.TemplateAlias;
1473
+ return pageContent.pageTitle || pageContent.title || pageContent.headerBlockTitle || urlItem.TemplateAlias;
1474
+ }
1475
+ generatePageDescription(umbracoData, urlItem) {
1476
+ if (!urlItem)
1477
+ return "Page information";
1478
+ const pageContent = extractPageContent(umbracoData, urlItem.Jpath);
1479
+ if (!pageContent)
1480
+ return `${urlItem.TemplateAlias} page`;
1481
+ const desc = pageContent.pageDescription || pageContent.description || pageContent.headerBlockSubtitle;
1482
+ if (desc && typeof desc === "string") {
1483
+ return desc.length > 100 ? `${desc.substring(0, 97)}...` : desc;
1484
+ }
1485
+ return `Information about ${urlItem.url}`;
1486
+ }
1487
+ formatCategoryName(category) {
1488
+ const names = {
1489
+ main: "Main Pages",
1490
+ blog: "Blog & Articles",
1491
+ services: "Services & Products",
1492
+ info: "Information Pages",
1493
+ other: "Other Pages"
1494
+ };
1495
+ return names[category] || category.charAt(0).toUpperCase() + category.slice(1);
1496
+ }
1497
+ sanitizeUrlForFilename(url) {
1498
+ return url.replace(/^\//, "").replace(/\/$/, "").replace(/\//g, "-").replace(/[^a-zA-Z0-9\-_]/g, "").replace(/^$/, "index");
1499
+ }
1500
+ getRelativeFilePath(fullPath) {
1501
+ const filename = fullPath.split("/").pop() || "";
1502
+ return filename;
1503
+ }
1504
+ getLLMSFilePath(fullPath) {
1505
+ const filename = fullPath.split("/").pop() || "";
1506
+ return `llms/${filename}`;
1507
+ }
1508
+ getOutputDir() {
1509
+ return this.config.finalOutputDir || "dist";
1510
+ }
1511
+ }
1512
+
1513
+ export { LLMSFilesGenerator };