@pas7/llm-seo 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +164 -0
- package/dist/adapters/index.d.ts +3 -0
- package/dist/adapters/index.js +673 -0
- package/dist/adapters/index.js.map +1 -0
- package/dist/adapters/next/index.d.ts +292 -0
- package/dist/adapters/next/index.js +673 -0
- package/dist/adapters/next/index.js.map +1 -0
- package/dist/cli/bin.d.ts +1 -0
- package/dist/cli/bin.js +2232 -0
- package/dist/cli/bin.js.map +1 -0
- package/dist/config.schema-DCnBx3Gm.d.ts +824 -0
- package/dist/core/index.d.ts +752 -0
- package/dist/core/index.js +1330 -0
- package/dist/core/index.js.map +1 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +1909 -0
- package/dist/index.js.map +1 -0
- package/dist/manifest.schema-B_z3rxRV.d.ts +384 -0
- package/dist/schema/index.d.ts +72 -0
- package/dist/schema/index.js +422 -0
- package/dist/schema/index.js.map +1 -0
- package/package.json +83 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,1909 @@
|
|
|
1
|
+
import * as fs from 'fs/promises';
|
|
2
|
+
import { readFile, readdir } from 'fs/promises';
|
|
3
|
+
import { z } from 'zod';
|
|
4
|
+
import { existsSync } from 'fs';
|
|
5
|
+
import { join } from 'path';
|
|
6
|
+
|
|
7
|
+
// src/core/normalize/url.ts
|
|
8
|
+
function normalizePath(path, preserveTrailingSlash = false) {
|
|
9
|
+
if (!path || path === "/") {
|
|
10
|
+
return "/";
|
|
11
|
+
}
|
|
12
|
+
const hadTrailingSlash = path.endsWith("/") && path !== "/";
|
|
13
|
+
let normalized = path.startsWith("/") ? path : `/${path}`;
|
|
14
|
+
normalized = normalized.replace(/\/{2,}/g, "/");
|
|
15
|
+
const segments = [];
|
|
16
|
+
const parts = normalized.split("/");
|
|
17
|
+
for (const part of parts) {
|
|
18
|
+
if (part === "." || part === "") {
|
|
19
|
+
continue;
|
|
20
|
+
} else if (part === "..") {
|
|
21
|
+
if (segments.length > 0) {
|
|
22
|
+
segments.pop();
|
|
23
|
+
}
|
|
24
|
+
} else {
|
|
25
|
+
segments.push(part);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
let result = "/" + segments.join("/");
|
|
29
|
+
if (preserveTrailingSlash && hadTrailingSlash && result !== "/") {
|
|
30
|
+
result += "/";
|
|
31
|
+
}
|
|
32
|
+
return result;
|
|
33
|
+
}
|
|
34
|
+
function joinUrlParts(...parts) {
|
|
35
|
+
if (parts.length === 0) {
|
|
36
|
+
return "/";
|
|
37
|
+
}
|
|
38
|
+
const filteredParts = parts.filter((part) => part.length > 0);
|
|
39
|
+
if (filteredParts.length === 0) {
|
|
40
|
+
return "/";
|
|
41
|
+
}
|
|
42
|
+
const joined = filteredParts.map((part) => {
|
|
43
|
+
let p = part.replace(/^\/+/, "");
|
|
44
|
+
p = p.replace(/\/+$/, "");
|
|
45
|
+
return p;
|
|
46
|
+
}).filter((p) => p.length > 0).join("/");
|
|
47
|
+
return joined.length > 0 ? `/${joined}` : "/";
|
|
48
|
+
}
|
|
49
|
+
function normalizeUrl(options) {
|
|
50
|
+
const { baseUrl, path, trailingSlash, stripQuery = true, stripHash = true } = options;
|
|
51
|
+
let parsedBase;
|
|
52
|
+
try {
|
|
53
|
+
parsedBase = new URL(baseUrl);
|
|
54
|
+
} catch {
|
|
55
|
+
throw new TypeError(`Invalid baseUrl: ${baseUrl}`);
|
|
56
|
+
}
|
|
57
|
+
const shouldPreserveTrailingSlash = trailingSlash === "preserve";
|
|
58
|
+
const normalizedPath = normalizePath(path, shouldPreserveTrailingSlash);
|
|
59
|
+
let finalPath = normalizedPath;
|
|
60
|
+
if (trailingSlash === "always") {
|
|
61
|
+
if (!finalPath.endsWith("/")) {
|
|
62
|
+
finalPath = `${finalPath}/`;
|
|
63
|
+
}
|
|
64
|
+
} else if (trailingSlash === "never") {
|
|
65
|
+
if (finalPath !== "/" && finalPath.endsWith("/")) {
|
|
66
|
+
finalPath = finalPath.slice(0, -1);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
const protocol = parsedBase.protocol.toLowerCase();
|
|
70
|
+
let hostname = parsedBase.hostname.toLowerCase();
|
|
71
|
+
let port = parsedBase.port;
|
|
72
|
+
if (port) {
|
|
73
|
+
const isDefaultPort = protocol === "http:" && port === "80" || protocol === "https:" && port === "443";
|
|
74
|
+
if (!isDefaultPort) {
|
|
75
|
+
hostname = `${hostname}:${port}`;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
let fullUrl = `${protocol}//${hostname}${finalPath}`;
|
|
79
|
+
if (!stripQuery && parsedBase.search) {
|
|
80
|
+
fullUrl += parsedBase.search;
|
|
81
|
+
}
|
|
82
|
+
if (!stripHash && parsedBase.hash) {
|
|
83
|
+
fullUrl += parsedBase.hash;
|
|
84
|
+
}
|
|
85
|
+
return fullUrl;
|
|
86
|
+
}
|
|
87
|
+
function sortUrls(urls) {
|
|
88
|
+
return [...urls].sort((a, b) => a.localeCompare(b));
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// src/core/normalize/sort.ts
|
|
92
|
+
function compareStrings(a, b) {
|
|
93
|
+
return a.localeCompare(b, "en", { sensitivity: "case", numeric: true });
|
|
94
|
+
}
|
|
95
|
+
function sortStrings(items) {
|
|
96
|
+
return [...items].sort(compareStrings);
|
|
97
|
+
}
|
|
98
|
+
function sortBy(items, keyFn) {
|
|
99
|
+
return [...items].sort((a, b) => compareStrings(keyFn(a), keyFn(b)));
|
|
100
|
+
}
|
|
101
|
+
function stableSortStrings(items) {
|
|
102
|
+
return [...items].sort((a, b) => a.localeCompare(b, "en", { sensitivity: "case", numeric: true }));
|
|
103
|
+
}
|
|
104
|
+
function countPathSegments(url) {
|
|
105
|
+
try {
|
|
106
|
+
const parsed = new URL(url);
|
|
107
|
+
const path = parsed.pathname.replace(/^\/+/, "").replace(/\/+$/, "");
|
|
108
|
+
if (!path) return 0;
|
|
109
|
+
return path.split("/").length;
|
|
110
|
+
} catch {
|
|
111
|
+
const cleaned = url.replace(/^\/+/, "").replace(/\/+$/, "");
|
|
112
|
+
if (!cleaned) return 0;
|
|
113
|
+
return cleaned.split("/").length;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
function sortUrlsByPath(urls) {
|
|
117
|
+
return [...urls].sort((a, b) => {
|
|
118
|
+
const segmentsA = countPathSegments(a);
|
|
119
|
+
const segmentsB = countPathSegments(b);
|
|
120
|
+
if (segmentsA !== segmentsB) {
|
|
121
|
+
return segmentsA - segmentsB;
|
|
122
|
+
}
|
|
123
|
+
return a.localeCompare(b, "en", { sensitivity: "case", numeric: true });
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// src/core/normalize/text.ts
|
|
128
|
+
function normalizeWhitespace(text) {
|
|
129
|
+
return text.replace(/\s+/g, " ").trim();
|
|
130
|
+
}
|
|
131
|
+
function normalizeLineEndings(text, lineEndings) {
|
|
132
|
+
const normalized = text.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
|
133
|
+
return lineEndings === "crlf" ? normalized.replace(/\n/g, "\r\n") : normalized;
|
|
134
|
+
}
|
|
135
|
+
function normalizeLineWhitespace(text) {
|
|
136
|
+
const lines = text.split(/\r?\n/);
|
|
137
|
+
return lines.map((line) => line.trimEnd().replace(/[ \t]+/g, (match) => " ".repeat(match.length === 0 ? 0 : match.length))).map((line) => line.replace(/ +/g, " ")).join("\n");
|
|
138
|
+
}
|
|
139
|
+
function normalizeSeoText(text, maxLength) {
|
|
140
|
+
const normalized = normalizeWhitespace(text);
|
|
141
|
+
if (normalized.length <= maxLength) {
|
|
142
|
+
return normalized;
|
|
143
|
+
}
|
|
144
|
+
const truncated = normalized.slice(0, maxLength);
|
|
145
|
+
const lastSpace = truncated.lastIndexOf(" ");
|
|
146
|
+
return lastSpace > 0 ? `${truncated.slice(0, lastSpace)}\u2026` : `${truncated.slice(0, -1)}\u2026`;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// src/core/canonical/locale.ts
|
|
150
|
+
function selectCanonicalLocale(options) {
|
|
151
|
+
const { defaultLocale, availableLocales } = options;
|
|
152
|
+
if (!availableLocales || availableLocales.length === 0) {
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
const validLocales = availableLocales.filter(
|
|
156
|
+
(locale) => typeof locale === "string" && locale.length > 0
|
|
157
|
+
);
|
|
158
|
+
if (validLocales.length === 0) {
|
|
159
|
+
return null;
|
|
160
|
+
}
|
|
161
|
+
if (defaultLocale && validLocales.includes(defaultLocale)) {
|
|
162
|
+
return defaultLocale;
|
|
163
|
+
}
|
|
164
|
+
const sorted = stableSortStrings(validLocales);
|
|
165
|
+
return sorted[0] ?? null;
|
|
166
|
+
}
|
|
167
|
+
function localizePath(path, locale, config) {
|
|
168
|
+
const normalizedPath = path.startsWith("/") ? path : `/${path}`;
|
|
169
|
+
if (locale === config.default && config.strategy === "subdirectory") {
|
|
170
|
+
return normalizedPath;
|
|
171
|
+
}
|
|
172
|
+
if (config.strategy === "subdirectory") {
|
|
173
|
+
return `/${locale}${normalizedPath === "/" ? "" : normalizedPath}`;
|
|
174
|
+
}
|
|
175
|
+
return normalizedPath;
|
|
176
|
+
}
|
|
177
|
+
function extractLocaleFromPath(path, config) {
|
|
178
|
+
if (config.strategy !== "subdirectory") {
|
|
179
|
+
return [config.default, path];
|
|
180
|
+
}
|
|
181
|
+
const match = path.match(/^\/([a-z]{2}(?:-[A-Z]{2})?)(\/|$)/);
|
|
182
|
+
if (match?.[1] && config.supported.includes(match[1])) {
|
|
183
|
+
const locale = match[1];
|
|
184
|
+
const remainingPath = path.slice(locale.length + 1) || "/";
|
|
185
|
+
return [locale, remainingPath];
|
|
186
|
+
}
|
|
187
|
+
return [config.default, path];
|
|
188
|
+
}
|
|
189
|
+
function generateAlternateUrls(baseUrl, path, config) {
|
|
190
|
+
const urlMap = /* @__PURE__ */ new Map();
|
|
191
|
+
for (const locale of config.supported) {
|
|
192
|
+
const localePath = localizePath(path, locale, config);
|
|
193
|
+
const fullUrl = `${baseUrl}${localePath}`;
|
|
194
|
+
urlMap.set(locale, fullUrl);
|
|
195
|
+
}
|
|
196
|
+
return urlMap;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// src/core/canonical/canonical-from-manifest.ts
|
|
200
|
+
function generateCanonicalUrl(manifest, path, options = {}) {
|
|
201
|
+
const { trailingSlash = false, lowercase = true } = options;
|
|
202
|
+
const basePath = path.startsWith("/") ? path : `/${path}`;
|
|
203
|
+
const normalizedPath = trailingSlash ? `${basePath}${basePath.endsWith("/") ? "" : "/"}` : basePath.replace(/\/+$/, "") || "/";
|
|
204
|
+
const fullUrl = `${manifest.baseUrl}${normalizedPath}`;
|
|
205
|
+
return lowercase ? fullUrl.toLowerCase() : fullUrl;
|
|
206
|
+
}
|
|
207
|
+
function extractCanonicalUrls(manifest, options = {}) {
|
|
208
|
+
return manifest.pages.map((page) => generateCanonicalUrl(manifest, page.path, options));
|
|
209
|
+
}
|
|
210
|
+
function dedupeUrls(urls) {
|
|
211
|
+
return [...new Set(urls)];
|
|
212
|
+
}
|
|
213
|
+
function buildLocalePrefix(locale, strategy, defaultLocale) {
|
|
214
|
+
if (strategy === "none") {
|
|
215
|
+
return "";
|
|
216
|
+
}
|
|
217
|
+
if (strategy === "subdomain") {
|
|
218
|
+
return "";
|
|
219
|
+
}
|
|
220
|
+
if (strategy === "prefix" && locale === defaultLocale) {
|
|
221
|
+
return "";
|
|
222
|
+
}
|
|
223
|
+
return `/${locale}`;
|
|
224
|
+
}
|
|
225
|
+
function buildBaseUrlWithSubdomain(baseUrl, locale, strategy, defaultLocale) {
|
|
226
|
+
if (strategy !== "subdomain" || locale === defaultLocale) {
|
|
227
|
+
return baseUrl;
|
|
228
|
+
}
|
|
229
|
+
try {
|
|
230
|
+
const parsed = new URL(baseUrl);
|
|
231
|
+
return `${parsed.protocol}//${locale}.${parsed.host}`;
|
|
232
|
+
} catch {
|
|
233
|
+
return baseUrl;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
function createCanonicalUrlForItem(item, options) {
|
|
237
|
+
const { baseUrl, routePrefix, defaultLocale, trailingSlash, localeStrategy } = options;
|
|
238
|
+
if (item.canonicalOverride && typeof item.canonicalOverride === "string") {
|
|
239
|
+
return item.canonicalOverride;
|
|
240
|
+
}
|
|
241
|
+
const availableLocales = item.locales ?? [defaultLocale];
|
|
242
|
+
const canonicalLocale = selectCanonicalLocale({
|
|
243
|
+
defaultLocale,
|
|
244
|
+
availableLocales
|
|
245
|
+
});
|
|
246
|
+
const locale = canonicalLocale ?? defaultLocale;
|
|
247
|
+
const localePrefix = buildLocalePrefix(locale, localeStrategy, defaultLocale);
|
|
248
|
+
const effectiveBaseUrl = buildBaseUrlWithSubdomain(baseUrl, locale, localeStrategy, defaultLocale);
|
|
249
|
+
const parts = [];
|
|
250
|
+
if (localePrefix) {
|
|
251
|
+
parts.push(localePrefix);
|
|
252
|
+
}
|
|
253
|
+
if (routePrefix) {
|
|
254
|
+
parts.push(routePrefix);
|
|
255
|
+
}
|
|
256
|
+
parts.push(item.slug);
|
|
257
|
+
const fullPath = joinUrlParts(...parts);
|
|
258
|
+
return normalizeUrl({
|
|
259
|
+
baseUrl: effectiveBaseUrl,
|
|
260
|
+
path: fullPath,
|
|
261
|
+
trailingSlash,
|
|
262
|
+
stripQuery: true,
|
|
263
|
+
stripHash: true
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
function createCanonicalUrlsFromManifest(options) {
|
|
267
|
+
const { items } = options;
|
|
268
|
+
if (!items || items.length === 0) {
|
|
269
|
+
return [];
|
|
270
|
+
}
|
|
271
|
+
const urls = items.map((item) => createCanonicalUrlForItem(item, options));
|
|
272
|
+
const deduped = dedupeUrls(urls);
|
|
273
|
+
return sortUrlsByPath(deduped);
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// src/core/generate/llms-txt.ts
|
|
277
|
+
function createLlmsTxt(options) {
|
|
278
|
+
const { config, canonicalUrls } = options;
|
|
279
|
+
const lineEndings = config.format?.lineEndings ?? "lf";
|
|
280
|
+
const lines = [];
|
|
281
|
+
lines.push(`# ${config.brand.name}`);
|
|
282
|
+
lines.push("");
|
|
283
|
+
if (config.brand.tagline) {
|
|
284
|
+
lines.push(`> ${config.brand.tagline}`);
|
|
285
|
+
lines.push("");
|
|
286
|
+
}
|
|
287
|
+
if (config.brand.description) {
|
|
288
|
+
lines.push(config.brand.description);
|
|
289
|
+
lines.push("");
|
|
290
|
+
}
|
|
291
|
+
const hubs = config.sections?.hubs ?? [];
|
|
292
|
+
if (hubs.length > 0) {
|
|
293
|
+
lines.push("## Sections");
|
|
294
|
+
lines.push("");
|
|
295
|
+
const sortedHubs = sortStrings(hubs);
|
|
296
|
+
for (const hub of sortedHubs) {
|
|
297
|
+
const hubLabel = getHubLabel(hub);
|
|
298
|
+
lines.push(`- [${hub}](${hub}) - ${hubLabel}`);
|
|
299
|
+
}
|
|
300
|
+
lines.push("");
|
|
301
|
+
}
|
|
302
|
+
if (canonicalUrls.length > 0) {
|
|
303
|
+
lines.push("## URLs");
|
|
304
|
+
lines.push("");
|
|
305
|
+
const sortedUrls = sortStrings(canonicalUrls);
|
|
306
|
+
for (const url of sortedUrls) {
|
|
307
|
+
lines.push(`- ${url}`);
|
|
308
|
+
}
|
|
309
|
+
lines.push("");
|
|
310
|
+
}
|
|
311
|
+
const hasPolicies = config.policy?.geoPolicy || config.policy?.citationRules || config.policy?.restrictedClaims;
|
|
312
|
+
if (hasPolicies) {
|
|
313
|
+
lines.push("## Policies");
|
|
314
|
+
lines.push("");
|
|
315
|
+
if (config.policy?.geoPolicy) {
|
|
316
|
+
lines.push(`- GEO: ${config.policy.geoPolicy}`);
|
|
317
|
+
}
|
|
318
|
+
if (config.policy?.citationRules) {
|
|
319
|
+
lines.push(`- Citations: ${config.policy.citationRules}`);
|
|
320
|
+
}
|
|
321
|
+
if (config.policy?.restrictedClaims) {
|
|
322
|
+
const status = config.policy.restrictedClaims.enable ? "Enabled" : "Disabled";
|
|
323
|
+
lines.push(`- Restricted Claims: ${status}`);
|
|
324
|
+
}
|
|
325
|
+
lines.push("");
|
|
326
|
+
}
|
|
327
|
+
const hasContact = config.contact?.email || config.contact?.social || config.contact?.phone;
|
|
328
|
+
const hasBooking = config.booking?.url;
|
|
329
|
+
if (hasContact || hasBooking) {
|
|
330
|
+
lines.push("## Contact");
|
|
331
|
+
lines.push("");
|
|
332
|
+
if (config.contact?.email) {
|
|
333
|
+
lines.push(`- Email: ${config.contact.email}`);
|
|
334
|
+
}
|
|
335
|
+
if (config.contact?.phone) {
|
|
336
|
+
lines.push(`- Phone: ${config.contact.phone}`);
|
|
337
|
+
}
|
|
338
|
+
if (config.contact?.social) {
|
|
339
|
+
if (config.contact.social.twitter) {
|
|
340
|
+
lines.push(`- Twitter: ${config.contact.social.twitter}`);
|
|
341
|
+
}
|
|
342
|
+
if (config.contact.social.linkedin) {
|
|
343
|
+
lines.push(`- LinkedIn: ${config.contact.social.linkedin}`);
|
|
344
|
+
}
|
|
345
|
+
if (config.contact.social.github) {
|
|
346
|
+
lines.push(`- GitHub: ${config.contact.social.github}`);
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
if (config.booking?.url) {
|
|
350
|
+
const label = config.booking.label ?? "Book consultation";
|
|
351
|
+
lines.push(`- Booking: ${config.booking.url} (${label})`);
|
|
352
|
+
}
|
|
353
|
+
lines.push("");
|
|
354
|
+
}
|
|
355
|
+
const hasMachineHints = config.machineHints?.robots || config.machineHints?.sitemap || config.machineHints?.llmsTxt || config.machineHints?.llmsFullTxt;
|
|
356
|
+
if (hasMachineHints) {
|
|
357
|
+
lines.push("## Machine Hints");
|
|
358
|
+
lines.push("");
|
|
359
|
+
if (config.machineHints?.robots) {
|
|
360
|
+
lines.push(`- robots.txt: ${config.machineHints.robots}`);
|
|
361
|
+
}
|
|
362
|
+
if (config.machineHints?.sitemap) {
|
|
363
|
+
lines.push(`- sitemap.xml: ${config.machineHints.sitemap}`);
|
|
364
|
+
}
|
|
365
|
+
if (config.machineHints?.llmsTxt) {
|
|
366
|
+
lines.push(`- llms.txt: ${config.machineHints.llmsTxt}`);
|
|
367
|
+
}
|
|
368
|
+
if (config.machineHints?.llmsFullTxt) {
|
|
369
|
+
lines.push(`- llms-full.txt: ${config.machineHints.llmsFullTxt}`);
|
|
370
|
+
}
|
|
371
|
+
lines.push("");
|
|
372
|
+
}
|
|
373
|
+
let content = lines.join("\n");
|
|
374
|
+
content = normalizeLineWhitespace(content);
|
|
375
|
+
content = normalizeLineEndings(content, lineEndings);
|
|
376
|
+
const finalLines = content.split(lineEndings === "crlf" ? "\r\n" : "\n");
|
|
377
|
+
return {
|
|
378
|
+
content,
|
|
379
|
+
byteSize: Buffer.byteLength(content, "utf-8"),
|
|
380
|
+
lineCount: finalLines.length
|
|
381
|
+
};
|
|
382
|
+
}
|
|
383
|
+
function getHubLabel(hub) {
|
|
384
|
+
const labels = {
|
|
385
|
+
"/services": "Services overview",
|
|
386
|
+
"/blog": "Blog posts",
|
|
387
|
+
"/projects": "Our projects",
|
|
388
|
+
"/cases": "Case studies",
|
|
389
|
+
"/contact": "Contact us",
|
|
390
|
+
"/about": "About us",
|
|
391
|
+
"/products": "Products",
|
|
392
|
+
"/docs": "Documentation",
|
|
393
|
+
"/faq": "Frequently asked questions",
|
|
394
|
+
"/pricing": "Pricing information",
|
|
395
|
+
"/team": "Our team",
|
|
396
|
+
"/careers": "Career opportunities",
|
|
397
|
+
"/news": "News and updates",
|
|
398
|
+
"/resources": "Resources",
|
|
399
|
+
"/support": "Support center"
|
|
400
|
+
};
|
|
401
|
+
return labels[hub] ?? formatHubLabel(hub);
|
|
402
|
+
}
|
|
403
|
+
function formatHubLabel(hub) {
|
|
404
|
+
const clean = hub.replace(/^\//, "");
|
|
405
|
+
return clean.replace(/[-_]/g, " ").replace(/\b\w/g, (char) => char.toUpperCase());
|
|
406
|
+
}
|
|
407
|
+
function generateLlmsTxt(manifest, _options) {
|
|
408
|
+
const canonicalUrls = manifest.pages.map((page) => `${manifest.baseUrl}${page.path}`);
|
|
409
|
+
const config = {
|
|
410
|
+
site: { baseUrl: manifest.baseUrl },
|
|
411
|
+
brand: {
|
|
412
|
+
name: manifest.title,
|
|
413
|
+
locales: ["en"],
|
|
414
|
+
...manifest.description && { description: manifest.description }
|
|
415
|
+
},
|
|
416
|
+
manifests: {},
|
|
417
|
+
output: {
|
|
418
|
+
paths: {
|
|
419
|
+
llmsTxt: "public/llms.txt",
|
|
420
|
+
llmsFullTxt: "public/llms-full.txt"
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
};
|
|
424
|
+
return createLlmsTxt({ config, canonicalUrls }).content;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// src/core/generate/llms-full-txt.ts
|
|
428
|
+
function createLlmsFullTxt(options) {
|
|
429
|
+
const { config, canonicalUrls, manifestItems } = options;
|
|
430
|
+
const lineEndings = config.format?.lineEndings ?? "lf";
|
|
431
|
+
const lines = [];
|
|
432
|
+
lines.push(`# ${config.brand.name} - Full LLM Context`);
|
|
433
|
+
lines.push("");
|
|
434
|
+
if (config.brand.tagline) {
|
|
435
|
+
lines.push(`> ${config.brand.tagline}`);
|
|
436
|
+
lines.push("");
|
|
437
|
+
}
|
|
438
|
+
if (config.brand.description) {
|
|
439
|
+
lines.push(config.brand.description);
|
|
440
|
+
lines.push("");
|
|
441
|
+
}
|
|
442
|
+
if (config.brand.org) {
|
|
443
|
+
lines.push(`Organization: ${config.brand.org}`);
|
|
444
|
+
}
|
|
445
|
+
lines.push(`Locales: ${config.brand.locales.join(", ")}`);
|
|
446
|
+
lines.push("");
|
|
447
|
+
if (canonicalUrls.length > 0) {
|
|
448
|
+
lines.push("## All Canonical URLs");
|
|
449
|
+
lines.push("");
|
|
450
|
+
const sortedUrls = sortStrings(canonicalUrls);
|
|
451
|
+
for (const url of sortedUrls) {
|
|
452
|
+
lines.push(`- ${url}`);
|
|
453
|
+
}
|
|
454
|
+
lines.push("");
|
|
455
|
+
}
|
|
456
|
+
const hasPolicies = config.policy?.geoPolicy || config.policy?.citationRules || config.policy?.restrictedClaims;
|
|
457
|
+
if (hasPolicies) {
|
|
458
|
+
lines.push("## Policies");
|
|
459
|
+
lines.push("");
|
|
460
|
+
if (config.policy?.geoPolicy) {
|
|
461
|
+
lines.push("### GEO Policy");
|
|
462
|
+
lines.push(config.policy.geoPolicy);
|
|
463
|
+
lines.push("");
|
|
464
|
+
}
|
|
465
|
+
if (config.policy?.citationRules) {
|
|
466
|
+
lines.push("### Citation Rules");
|
|
467
|
+
lines.push(config.policy.citationRules);
|
|
468
|
+
lines.push("");
|
|
469
|
+
}
|
|
470
|
+
if (config.policy?.restrictedClaims) {
|
|
471
|
+
lines.push("### Restricted Claims");
|
|
472
|
+
const status = config.policy.restrictedClaims.enable ? "Enabled" : "Disabled";
|
|
473
|
+
lines.push(`Status: ${status}`);
|
|
474
|
+
if (config.policy.restrictedClaims.forbidden && config.policy.restrictedClaims.forbidden.length > 0) {
|
|
475
|
+
lines.push(`Forbidden terms: ${config.policy.restrictedClaims.forbidden.join(", ")}`);
|
|
476
|
+
}
|
|
477
|
+
if (config.policy.restrictedClaims.whitelist && config.policy.restrictedClaims.whitelist.length > 0) {
|
|
478
|
+
lines.push(`Exceptions: ${config.policy.restrictedClaims.whitelist.join(", ")}`);
|
|
479
|
+
}
|
|
480
|
+
lines.push("");
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
const hasSocial = config.contact?.social?.twitter || config.contact?.social?.linkedin || config.contact?.social?.github;
|
|
484
|
+
const hasBooking = config.booking?.url;
|
|
485
|
+
if (hasSocial || hasBooking) {
|
|
486
|
+
lines.push("## Social & Booking");
|
|
487
|
+
lines.push("");
|
|
488
|
+
if (config.contact?.social?.twitter) {
|
|
489
|
+
lines.push(`- Twitter: ${config.contact.social.twitter}`);
|
|
490
|
+
}
|
|
491
|
+
if (config.contact?.social?.linkedin) {
|
|
492
|
+
lines.push(`- LinkedIn: ${config.contact.social.linkedin}`);
|
|
493
|
+
}
|
|
494
|
+
if (config.contact?.social?.github) {
|
|
495
|
+
lines.push(`- GitHub: ${config.contact.social.github}`);
|
|
496
|
+
}
|
|
497
|
+
if (config.booking?.url) {
|
|
498
|
+
const label = config.booking.label ?? "Book consultation";
|
|
499
|
+
lines.push(`- Booking: ${config.booking.url} (${label})`);
|
|
500
|
+
}
|
|
501
|
+
lines.push("");
|
|
502
|
+
}
|
|
503
|
+
const hasMachineHints = config.machineHints?.robots || config.machineHints?.sitemap || config.machineHints?.llmsTxt || config.machineHints?.llmsFullTxt;
|
|
504
|
+
if (hasMachineHints) {
|
|
505
|
+
lines.push("## Machine Hints");
|
|
506
|
+
lines.push("");
|
|
507
|
+
if (config.machineHints?.robots) {
|
|
508
|
+
lines.push(`- robots.txt: ${config.machineHints.robots}`);
|
|
509
|
+
}
|
|
510
|
+
if (config.machineHints?.sitemap) {
|
|
511
|
+
lines.push(`- sitemap.xml: ${config.machineHints.sitemap}`);
|
|
512
|
+
}
|
|
513
|
+
if (config.machineHints?.llmsTxt) {
|
|
514
|
+
lines.push(`- llms.txt: ${config.machineHints.llmsTxt}`);
|
|
515
|
+
}
|
|
516
|
+
if (config.machineHints?.llmsFullTxt) {
|
|
517
|
+
lines.push(`- llms-full.txt: ${config.machineHints.llmsFullTxt}`);
|
|
518
|
+
}
|
|
519
|
+
lines.push("");
|
|
520
|
+
}
|
|
521
|
+
const hubs = config.sections?.hubs ?? [];
|
|
522
|
+
if (hubs.length > 0 || manifestItems.length > 0) {
|
|
523
|
+
lines.push("## Sitemap");
|
|
524
|
+
lines.push("");
|
|
525
|
+
if (hubs.length > 0) {
|
|
526
|
+
const sortedHubs = sortStrings(hubs);
|
|
527
|
+
for (const hub of sortedHubs) {
|
|
528
|
+
lines.push(`- [${hub}](${hub}) - ${getHubLabel2(hub)}`);
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
if (manifestItems.length > 0) {
|
|
532
|
+
const sortedItems = sortBy(manifestItems, (item) => item.slug);
|
|
533
|
+
for (const item of sortedItems) {
|
|
534
|
+
const url = item.canonicalOverride ?? `${config.site.baseUrl}${item.slug}`;
|
|
535
|
+
const title = item.title ?? item.slug;
|
|
536
|
+
const locales = item.locales?.join(", ") ?? config.brand.locales[0] ?? "en";
|
|
537
|
+
lines.push(`- [${title}](${url}) (${locales})`);
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
lines.push("");
|
|
541
|
+
}
|
|
542
|
+
let content = lines.join("\n");
|
|
543
|
+
content = normalizeLineWhitespace(content);
|
|
544
|
+
content = normalizeLineEndings(content, lineEndings);
|
|
545
|
+
const finalLines = content.split(lineEndings === "crlf" ? "\r\n" : "\n");
|
|
546
|
+
return {
|
|
547
|
+
content,
|
|
548
|
+
byteSize: Buffer.byteLength(content, "utf-8"),
|
|
549
|
+
lineCount: finalLines.length
|
|
550
|
+
};
|
|
551
|
+
}
|
|
552
|
+
function getHubLabel2(hub) {
|
|
553
|
+
const labels = {
|
|
554
|
+
"/services": "Services overview",
|
|
555
|
+
"/blog": "Blog posts",
|
|
556
|
+
"/projects": "Our projects",
|
|
557
|
+
"/cases": "Case studies",
|
|
558
|
+
"/contact": "Contact us",
|
|
559
|
+
"/about": "About us",
|
|
560
|
+
"/products": "Products",
|
|
561
|
+
"/docs": "Documentation",
|
|
562
|
+
"/faq": "Frequently asked questions",
|
|
563
|
+
"/pricing": "Pricing information",
|
|
564
|
+
"/team": "Our team",
|
|
565
|
+
"/careers": "Career opportunities",
|
|
566
|
+
"/news": "News and updates",
|
|
567
|
+
"/resources": "Resources",
|
|
568
|
+
"/support": "Support center"
|
|
569
|
+
};
|
|
570
|
+
return labels[hub] ?? formatHubLabel2(hub);
|
|
571
|
+
}
|
|
572
|
+
function formatHubLabel2(hub) {
|
|
573
|
+
const clean = hub.replace(/^\//, "");
|
|
574
|
+
return clean.replace(/[-_]/g, " ").replace(/\b\w/g, (char) => char.toUpperCase());
|
|
575
|
+
}
|
|
576
|
+
function generatePageContent(page, manifest, _options) {
|
|
577
|
+
const maxContentLength = _options?.maxContentLength ?? 0;
|
|
578
|
+
const lines = [];
|
|
579
|
+
const url = `${manifest.baseUrl}${page.path}`;
|
|
580
|
+
lines.push(`## ${page.title ?? page.path}`);
|
|
581
|
+
lines.push(`URL: ${url}`);
|
|
582
|
+
lines.push("");
|
|
583
|
+
if (page.description) {
|
|
584
|
+
lines.push(page.description);
|
|
585
|
+
lines.push("");
|
|
586
|
+
}
|
|
587
|
+
if (page.content) {
|
|
588
|
+
let content = page.content;
|
|
589
|
+
if (maxContentLength > 0 && content.length > maxContentLength) {
|
|
590
|
+
content = `${content.slice(0, maxContentLength)}...`;
|
|
591
|
+
}
|
|
592
|
+
lines.push(content);
|
|
593
|
+
lines.push("");
|
|
594
|
+
}
|
|
595
|
+
return lines.join("\n");
|
|
596
|
+
}
|
|
597
|
+
function generateLlmsFullTxt(manifest, _options) {
|
|
598
|
+
const canonicalUrls = manifest.pages.map((page) => `${manifest.baseUrl}${page.path}`);
|
|
599
|
+
const manifestItems = manifest.pages.map((page) => ({
|
|
600
|
+
slug: page.path,
|
|
601
|
+
title: page.title,
|
|
602
|
+
description: page.description
|
|
603
|
+
}));
|
|
604
|
+
const config = {
|
|
605
|
+
site: { baseUrl: manifest.baseUrl },
|
|
606
|
+
brand: {
|
|
607
|
+
name: manifest.title,
|
|
608
|
+
locales: ["en"],
|
|
609
|
+
...manifest.description && { description: manifest.description }
|
|
610
|
+
},
|
|
611
|
+
manifests: {},
|
|
612
|
+
output: {
|
|
613
|
+
paths: {
|
|
614
|
+
llmsTxt: "public/llms.txt",
|
|
615
|
+
llmsFullTxt: "public/llms-full.txt"
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
};
|
|
619
|
+
return createLlmsFullTxt({ config, canonicalUrls, manifestItems }).content;
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
// src/core/generate/citations.ts
|
|
623
|
+
function createCitationsJson(options) {
|
|
624
|
+
const { config, manifestItems, sectionName, fixedTimestamp } = options;
|
|
625
|
+
const sources = manifestItems.map((item) => {
|
|
626
|
+
const url = item.canonicalOverride ?? `${config.site.baseUrl}${item.slug}`;
|
|
627
|
+
const defaultLocale = config.site.defaultLocale ?? config.brand.locales[0] ?? "en";
|
|
628
|
+
return {
|
|
629
|
+
url,
|
|
630
|
+
priority: item.priority ?? 50,
|
|
631
|
+
section: sectionName,
|
|
632
|
+
locale: item.locales?.[0] ?? defaultLocale,
|
|
633
|
+
...item.publishedAt && { publishedAt: item.publishedAt },
|
|
634
|
+
...item.updatedAt && { updatedAt: item.updatedAt },
|
|
635
|
+
...item.title && { title: item.title }
|
|
636
|
+
};
|
|
637
|
+
});
|
|
638
|
+
const sortedSources = sources.sort((a, b) => {
|
|
639
|
+
if (a.priority !== b.priority) {
|
|
640
|
+
return b.priority - a.priority;
|
|
641
|
+
}
|
|
642
|
+
return a.url.localeCompare(b.url, "en", { sensitivity: "case", numeric: true });
|
|
643
|
+
});
|
|
644
|
+
const policy = {
|
|
645
|
+
restrictedClaimsEnabled: config.policy?.restrictedClaims?.enable ?? false,
|
|
646
|
+
...config.policy?.geoPolicy && { geoPolicy: config.policy.geoPolicy },
|
|
647
|
+
...config.policy?.citationRules && { citationRules: config.policy.citationRules }
|
|
648
|
+
};
|
|
649
|
+
return {
|
|
650
|
+
version: "1.0",
|
|
651
|
+
generated: fixedTimestamp ?? (/* @__PURE__ */ new Date()).toISOString(),
|
|
652
|
+
site: {
|
|
653
|
+
baseUrl: config.site.baseUrl,
|
|
654
|
+
name: config.brand.name
|
|
655
|
+
},
|
|
656
|
+
sources: sortedSources,
|
|
657
|
+
policy
|
|
658
|
+
};
|
|
659
|
+
}
|
|
660
|
+
function createCitationsJsonString(options) {
|
|
661
|
+
const citations = createCitationsJson(options);
|
|
662
|
+
return JSON.stringify(citations, null, 2);
|
|
663
|
+
}
|
|
664
|
+
function createCitation(page, manifest) {
|
|
665
|
+
const citation = {
|
|
666
|
+
url: `${manifest.baseUrl}${page.path}`,
|
|
667
|
+
title: page.title ?? page.path
|
|
668
|
+
};
|
|
669
|
+
if (page.description) {
|
|
670
|
+
citation.description = page.description;
|
|
671
|
+
}
|
|
672
|
+
return citation;
|
|
673
|
+
}
|
|
674
|
+
function citationToMarkdown(citation) {
|
|
675
|
+
if (citation.description) {
|
|
676
|
+
return `[${citation.title}](${citation.url}) - ${citation.description}`;
|
|
677
|
+
}
|
|
678
|
+
return `[${citation.title}](${citation.url})`;
|
|
679
|
+
}
|
|
680
|
+
function citationToJsonLd(citation) {
|
|
681
|
+
const jsonLd = {
|
|
682
|
+
"@type": "WebPage",
|
|
683
|
+
name: citation.title,
|
|
684
|
+
url: citation.url
|
|
685
|
+
};
|
|
686
|
+
if (citation.description) {
|
|
687
|
+
jsonLd.description = citation.description;
|
|
688
|
+
}
|
|
689
|
+
return jsonLd;
|
|
690
|
+
}
|
|
691
|
+
function generateReferenceList(citations) {
|
|
692
|
+
const lines = ["## References", ""];
|
|
693
|
+
for (let i = 0; i < citations.length; i++) {
|
|
694
|
+
const citation = citations[i];
|
|
695
|
+
if (citation) {
|
|
696
|
+
const num = i + 1;
|
|
697
|
+
lines.push(`${num}. ${citationToMarkdown(citation)}`);
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
lines.push("");
|
|
701
|
+
return lines.join("\n");
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
// src/core/check/issues.ts
|
|
705
|
+
function createCheckIssue(severity, code, message, path = "", context) {
|
|
706
|
+
const issue = {
|
|
707
|
+
path,
|
|
708
|
+
code,
|
|
709
|
+
message,
|
|
710
|
+
severity
|
|
711
|
+
};
|
|
712
|
+
if (context !== void 0) {
|
|
713
|
+
issue.context = context;
|
|
714
|
+
}
|
|
715
|
+
return issue;
|
|
716
|
+
}
|
|
717
|
+
function createIssue(overrides) {
|
|
718
|
+
return {
|
|
719
|
+
category: "content",
|
|
720
|
+
...overrides
|
|
721
|
+
};
|
|
722
|
+
}
|
|
723
|
+
function groupBySeverity(issues) {
|
|
724
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
725
|
+
for (const issue of issues) {
|
|
726
|
+
const existing = grouped.get(issue.severity) ?? [];
|
|
727
|
+
existing.push(issue);
|
|
728
|
+
grouped.set(issue.severity, existing);
|
|
729
|
+
}
|
|
730
|
+
return grouped;
|
|
731
|
+
}
|
|
732
|
+
function groupByPage(issues) {
|
|
733
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
734
|
+
for (const issue of issues) {
|
|
735
|
+
const existing = grouped.get(issue.pageId) ?? [];
|
|
736
|
+
existing.push(issue);
|
|
737
|
+
grouped.set(issue.pageId, existing);
|
|
738
|
+
}
|
|
739
|
+
return grouped;
|
|
740
|
+
}
|
|
741
|
+
function filterBySeverity(issues, minSeverity) {
|
|
742
|
+
const severityOrder = ["error", "warning", "info"];
|
|
743
|
+
const minIndex = severityOrder.indexOf(minSeverity);
|
|
744
|
+
return issues.filter((issue) => {
|
|
745
|
+
const issueIndex = severityOrder.indexOf(issue.severity);
|
|
746
|
+
return issueIndex <= minIndex;
|
|
747
|
+
});
|
|
748
|
+
}
|
|
749
|
+
function countSeverities(issues) {
|
|
750
|
+
const counts = {
|
|
751
|
+
error: 0,
|
|
752
|
+
warning: 0,
|
|
753
|
+
info: 0
|
|
754
|
+
};
|
|
755
|
+
for (const issue of issues) {
|
|
756
|
+
counts[issue.severity]++;
|
|
757
|
+
}
|
|
758
|
+
return counts;
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
// src/core/check/rules-linter.ts
|
|
762
|
+
var LINT_RULES = [
|
|
763
|
+
{
|
|
764
|
+
id: "heading-structure",
|
|
765
|
+
description: "Ensures proper heading structure (h1 -> h2 -> h3)",
|
|
766
|
+
enabled: true,
|
|
767
|
+
lint: lintHeadingStructure
|
|
768
|
+
},
|
|
769
|
+
{
|
|
770
|
+
id: "url-format",
|
|
771
|
+
description: "Validates URL format in links",
|
|
772
|
+
enabled: true,
|
|
773
|
+
lint: lintUrlFormat
|
|
774
|
+
},
|
|
775
|
+
{
|
|
776
|
+
id: "trailing-whitespace",
|
|
777
|
+
description: "Checks for trailing whitespace on lines",
|
|
778
|
+
enabled: true,
|
|
779
|
+
lint: lintTrailingWhitespace
|
|
780
|
+
},
|
|
781
|
+
{
|
|
782
|
+
id: "consistent-list-markers",
|
|
783
|
+
description: "Ensures consistent list marker usage",
|
|
784
|
+
enabled: true,
|
|
785
|
+
lint: lintListMarkers
|
|
786
|
+
}
|
|
787
|
+
];
|
|
788
|
+
function lintContent(content, filePath, rules = LINT_RULES.filter((r) => r.enabled)) {
|
|
789
|
+
const issues = [];
|
|
790
|
+
for (const rule of rules) {
|
|
791
|
+
const ruleIssues = rule.lint(content, filePath);
|
|
792
|
+
issues.push(...ruleIssues);
|
|
793
|
+
}
|
|
794
|
+
return {
|
|
795
|
+
filePath,
|
|
796
|
+
issues,
|
|
797
|
+
passed: issues.filter((i) => i.severity === "error").length === 0
|
|
798
|
+
};
|
|
799
|
+
}
|
|
800
|
+
function checkForbiddenTerms(content, forbidden, whitelist = []) {
|
|
801
|
+
const issues = [];
|
|
802
|
+
const lines = content.split("\n");
|
|
803
|
+
const whitelistLower = whitelist.map((w) => w.toLowerCase());
|
|
804
|
+
for (let i = 0; i < lines.length; i++) {
|
|
805
|
+
const line = lines[i];
|
|
806
|
+
if (line === void 0) continue;
|
|
807
|
+
const trimmed = line.trim();
|
|
808
|
+
const loweredTrimmed = trimmed.toLowerCase();
|
|
809
|
+
if (loweredTrimmed.startsWith("forbidden terms:") || loweredTrimmed.startsWith("exceptions:")) {
|
|
810
|
+
continue;
|
|
811
|
+
}
|
|
812
|
+
for (const term of forbidden) {
|
|
813
|
+
const termLower = term.toLowerCase();
|
|
814
|
+
const lineLower = line.toLowerCase();
|
|
815
|
+
if (lineLower.includes(termLower)) {
|
|
816
|
+
const isWhitelisted = whitelistLower.some(
|
|
817
|
+
(w) => lineLower.includes(w) && w.includes(termLower)
|
|
818
|
+
);
|
|
819
|
+
if (!isWhitelisted) {
|
|
820
|
+
issues.push({
|
|
821
|
+
path: "",
|
|
822
|
+
code: "forbidden_term",
|
|
823
|
+
message: `Term "${term}" is forbidden by policy`,
|
|
824
|
+
severity: "warning",
|
|
825
|
+
line: i + 1,
|
|
826
|
+
context: trimmed.substring(0, 100)
|
|
827
|
+
});
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
return issues;
|
|
833
|
+
}
|
|
834
|
+
function checkEmptySections(content) {
|
|
835
|
+
const issues = [];
|
|
836
|
+
const lines = content.split("\n");
|
|
837
|
+
let currentSection = null;
|
|
838
|
+
let sectionStartLine = 0;
|
|
839
|
+
let sectionHeadingLevel = 0;
|
|
840
|
+
let sectionHasContent = false;
|
|
841
|
+
let sectionWasFirst = false;
|
|
842
|
+
let isFirstHeading = true;
|
|
843
|
+
for (let i = 0; i < lines.length; i++) {
|
|
844
|
+
const line = lines[i];
|
|
845
|
+
if (line === void 0) continue;
|
|
846
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
847
|
+
if (headingMatch?.[2]) {
|
|
848
|
+
const nextHeadingLevel = headingMatch[1]?.length ?? 0;
|
|
849
|
+
if (currentSection !== null && nextHeadingLevel > sectionHeadingLevel) {
|
|
850
|
+
sectionHasContent = true;
|
|
851
|
+
}
|
|
852
|
+
if (currentSection !== null && !sectionHasContent && !sectionWasFirst) {
|
|
853
|
+
issues.push({
|
|
854
|
+
path: "",
|
|
855
|
+
code: "empty_section",
|
|
856
|
+
message: `Section "${currentSection}" has no content`,
|
|
857
|
+
severity: "info",
|
|
858
|
+
line: sectionStartLine
|
|
859
|
+
});
|
|
860
|
+
}
|
|
861
|
+
currentSection = headingMatch[2].trim();
|
|
862
|
+
sectionStartLine = i + 1;
|
|
863
|
+
sectionHeadingLevel = nextHeadingLevel;
|
|
864
|
+
sectionHasContent = false;
|
|
865
|
+
sectionWasFirst = isFirstHeading;
|
|
866
|
+
isFirstHeading = false;
|
|
867
|
+
} else if (currentSection !== null) {
|
|
868
|
+
if (line.trim().length > 0) {
|
|
869
|
+
sectionHasContent = true;
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
if (currentSection !== null && !sectionHasContent && !sectionWasFirst) {
|
|
874
|
+
issues.push({
|
|
875
|
+
path: "",
|
|
876
|
+
code: "empty_section",
|
|
877
|
+
message: `Section "${currentSection}" has no content`,
|
|
878
|
+
severity: "info",
|
|
879
|
+
line: sectionStartLine
|
|
880
|
+
});
|
|
881
|
+
}
|
|
882
|
+
return issues;
|
|
883
|
+
}
|
|
884
|
+
function checkDuplicateUrls(content) {
|
|
885
|
+
const issues = [];
|
|
886
|
+
const lines = content.split("\n");
|
|
887
|
+
const seenUrls = /* @__PURE__ */ new Map();
|
|
888
|
+
const urlPattern = /\[([^\]]*)\]\(([^)]+)\)/g;
|
|
889
|
+
for (let i = 0; i < lines.length; i++) {
|
|
890
|
+
const line = lines[i];
|
|
891
|
+
if (line === void 0) continue;
|
|
892
|
+
let match = urlPattern.exec(line);
|
|
893
|
+
while (match !== null) {
|
|
894
|
+
const url = match[2];
|
|
895
|
+
if (url !== void 0) {
|
|
896
|
+
const firstOccurrence = seenUrls.get(url);
|
|
897
|
+
if (firstOccurrence !== void 0) {
|
|
898
|
+
issues.push({
|
|
899
|
+
path: "",
|
|
900
|
+
code: "duplicate_url",
|
|
901
|
+
message: `URL "${url}" appears multiple times (first at line ${firstOccurrence})`,
|
|
902
|
+
severity: "warning",
|
|
903
|
+
line: i + 1,
|
|
904
|
+
context: url
|
|
905
|
+
});
|
|
906
|
+
} else {
|
|
907
|
+
seenUrls.set(url, i + 1);
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
match = urlPattern.exec(line);
|
|
911
|
+
}
|
|
912
|
+
urlPattern.lastIndex = 0;
|
|
913
|
+
}
|
|
914
|
+
return issues;
|
|
915
|
+
}
|
|
916
|
+
function lintHeadingStructure(content, filePath) {
|
|
917
|
+
const issues = [];
|
|
918
|
+
const lines = content.split("\n");
|
|
919
|
+
let prevLevel = 0;
|
|
920
|
+
for (let i = 0; i < lines.length; i++) {
|
|
921
|
+
const line = lines[i];
|
|
922
|
+
if (line === void 0) continue;
|
|
923
|
+
const match = line.match(/^(#{1,6})\s/);
|
|
924
|
+
if (match?.[1]) {
|
|
925
|
+
const level = match[1].length;
|
|
926
|
+
if (level > prevLevel + 1 && prevLevel > 0) {
|
|
927
|
+
issues.push(createIssue({
|
|
928
|
+
id: "heading-skip",
|
|
929
|
+
pageId: filePath,
|
|
930
|
+
severity: "warning",
|
|
931
|
+
message: `Heading level skipped: h${prevLevel} to h${level}`,
|
|
932
|
+
line: i + 1
|
|
933
|
+
}));
|
|
934
|
+
}
|
|
935
|
+
prevLevel = level;
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
return issues;
|
|
939
|
+
}
|
|
940
|
+
function lintUrlFormat(content, filePath) {
|
|
941
|
+
const issues = [];
|
|
942
|
+
const lines = content.split("\n");
|
|
943
|
+
const urlPattern = /\[([^\]]*)\]\(([^)]+)\)/g;
|
|
944
|
+
for (let i = 0; i < lines.length; i++) {
|
|
945
|
+
const line = lines[i];
|
|
946
|
+
if (line === void 0) continue;
|
|
947
|
+
let match = urlPattern.exec(line);
|
|
948
|
+
while (match !== null) {
|
|
949
|
+
const url = match[2];
|
|
950
|
+
if (url && !url.startsWith("/") && !url.startsWith("http") && !url.startsWith("#")) {
|
|
951
|
+
issues.push(createIssue({
|
|
952
|
+
id: "invalid-url",
|
|
953
|
+
pageId: filePath,
|
|
954
|
+
severity: "warning",
|
|
955
|
+
message: `Invalid URL format: ${url}`,
|
|
956
|
+
line: i + 1,
|
|
957
|
+
column: match.index + 1
|
|
958
|
+
}));
|
|
959
|
+
}
|
|
960
|
+
match = urlPattern.exec(line);
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
return issues;
|
|
964
|
+
}
|
|
965
|
+
function lintTrailingWhitespace(content, filePath) {
|
|
966
|
+
const issues = [];
|
|
967
|
+
const lines = content.split("\n");
|
|
968
|
+
for (let i = 0; i < lines.length; i++) {
|
|
969
|
+
const line = lines[i];
|
|
970
|
+
if (line !== void 0 && line.endsWith(" ")) {
|
|
971
|
+
issues.push(createIssue({
|
|
972
|
+
id: "trailing-whitespace",
|
|
973
|
+
pageId: filePath,
|
|
974
|
+
severity: "info",
|
|
975
|
+
message: "Line has trailing whitespace",
|
|
976
|
+
line: i + 1
|
|
977
|
+
}));
|
|
978
|
+
}
|
|
979
|
+
}
|
|
980
|
+
return issues;
|
|
981
|
+
}
|
|
982
|
+
function lintListMarkers(content, filePath) {
|
|
983
|
+
const issues = [];
|
|
984
|
+
const lines = content.split("\n");
|
|
985
|
+
const dashCount = lines.filter((l) => /^\s*-\s/.test(l)).length;
|
|
986
|
+
const asteriskCount = lines.filter((l) => /^\s*\*\s/.test(l)).length;
|
|
987
|
+
if (dashCount > 0 && asteriskCount > 0) {
|
|
988
|
+
issues.push(createIssue({
|
|
989
|
+
id: "inconsistent-list-markers",
|
|
990
|
+
pageId: filePath,
|
|
991
|
+
severity: "info",
|
|
992
|
+
message: "Mix of - and * list markers detected"
|
|
993
|
+
}));
|
|
994
|
+
}
|
|
995
|
+
return issues;
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
// src/core/check/checker.ts
|
|
999
|
+
var DEFAULT_CHECKER_CONFIG = {
|
|
1000
|
+
strict: false,
|
|
1001
|
+
maxTitleLength: 60,
|
|
1002
|
+
maxDescriptionLength: 160
|
|
1003
|
+
};
|
|
1004
|
+
function checkManifest(manifest, config = {}) {
|
|
1005
|
+
const fullConfig = { ...DEFAULT_CHECKER_CONFIG, ...config };
|
|
1006
|
+
const issues = [];
|
|
1007
|
+
for (const page of manifest.pages) {
|
|
1008
|
+
const pageIssues = checkPage(page, fullConfig);
|
|
1009
|
+
issues.push(...pageIssues);
|
|
1010
|
+
}
|
|
1011
|
+
const summary = {
|
|
1012
|
+
error: 0,
|
|
1013
|
+
warning: 0,
|
|
1014
|
+
info: 0
|
|
1015
|
+
};
|
|
1016
|
+
for (const issue of issues) {
|
|
1017
|
+
summary[issue.severity]++;
|
|
1018
|
+
}
|
|
1019
|
+
return {
|
|
1020
|
+
passed: summary.error === 0,
|
|
1021
|
+
issues,
|
|
1022
|
+
pagesChecked: manifest.pages.length,
|
|
1023
|
+
summary
|
|
1024
|
+
};
|
|
1025
|
+
}
|
|
1026
|
+
function checkPage(page, config) {
|
|
1027
|
+
const issues = [];
|
|
1028
|
+
const pageId = page.path;
|
|
1029
|
+
if (!page.title) {
|
|
1030
|
+
issues.push({
|
|
1031
|
+
id: "missing-title",
|
|
1032
|
+
pageId,
|
|
1033
|
+
severity: "warning",
|
|
1034
|
+
message: "Page is missing a title"
|
|
1035
|
+
});
|
|
1036
|
+
} else if (page.title.length > config.maxTitleLength) {
|
|
1037
|
+
issues.push({
|
|
1038
|
+
id: "title-too-long",
|
|
1039
|
+
pageId,
|
|
1040
|
+
severity: "warning",
|
|
1041
|
+
message: `Title exceeds ${config.maxTitleLength} characters (${page.title.length})`
|
|
1042
|
+
});
|
|
1043
|
+
}
|
|
1044
|
+
if (!page.description) {
|
|
1045
|
+
issues.push({
|
|
1046
|
+
id: "missing-description",
|
|
1047
|
+
pageId,
|
|
1048
|
+
severity: "warning",
|
|
1049
|
+
message: "Page is missing a description"
|
|
1050
|
+
});
|
|
1051
|
+
} else if (page.description.length > config.maxDescriptionLength) {
|
|
1052
|
+
issues.push({
|
|
1053
|
+
id: "description-too-long",
|
|
1054
|
+
pageId,
|
|
1055
|
+
severity: "info",
|
|
1056
|
+
message: `Description exceeds ${config.maxDescriptionLength} characters (${page.description.length})`
|
|
1057
|
+
});
|
|
1058
|
+
}
|
|
1059
|
+
return issues;
|
|
1060
|
+
}
|
|
1061
|
+
async function checkGeneratedFiles(options) {
|
|
1062
|
+
const issues = [];
|
|
1063
|
+
let filesChecked = 0;
|
|
1064
|
+
let filesMissing = 0;
|
|
1065
|
+
let filesMismatch = 0;
|
|
1066
|
+
const { config, failOn } = options;
|
|
1067
|
+
const llmsTxtPath = options.llmsTxtPath ?? config.output.paths.llmsTxt;
|
|
1068
|
+
const llmsFullTxtPath = options.llmsFullTxtPath ?? config.output.paths.llmsFullTxt;
|
|
1069
|
+
const citationsPath = options.citationsPath ?? config.output.paths.citations;
|
|
1070
|
+
const llmsTxtResult = await checkFile(llmsTxtPath);
|
|
1071
|
+
if (!llmsTxtResult.exists) {
|
|
1072
|
+
issues.push(createCheckIssue(
|
|
1073
|
+
"error",
|
|
1074
|
+
"file_missing",
|
|
1075
|
+
`Required file does not exist: ${llmsTxtPath}`,
|
|
1076
|
+
llmsTxtPath
|
|
1077
|
+
));
|
|
1078
|
+
filesMissing++;
|
|
1079
|
+
} else if (llmsTxtResult.content === "") {
|
|
1080
|
+
issues.push(createCheckIssue(
|
|
1081
|
+
"warning",
|
|
1082
|
+
"file_empty",
|
|
1083
|
+
`File is empty: ${llmsTxtPath}`,
|
|
1084
|
+
llmsTxtPath
|
|
1085
|
+
));
|
|
1086
|
+
filesChecked++;
|
|
1087
|
+
} else {
|
|
1088
|
+
filesChecked++;
|
|
1089
|
+
const lintIssues = await lintFile(llmsTxtPath, llmsTxtResult.content, config);
|
|
1090
|
+
issues.push(...lintIssues);
|
|
1091
|
+
}
|
|
1092
|
+
const llmsFullTxtResult = await checkFile(llmsFullTxtPath);
|
|
1093
|
+
if (!llmsFullTxtResult.exists) {
|
|
1094
|
+
issues.push(createCheckIssue(
|
|
1095
|
+
"error",
|
|
1096
|
+
"file_missing",
|
|
1097
|
+
`Required file does not exist: ${llmsFullTxtPath}`,
|
|
1098
|
+
llmsFullTxtPath
|
|
1099
|
+
));
|
|
1100
|
+
filesMissing++;
|
|
1101
|
+
} else if (llmsFullTxtResult.content === "") {
|
|
1102
|
+
issues.push(createCheckIssue(
|
|
1103
|
+
"warning",
|
|
1104
|
+
"file_empty",
|
|
1105
|
+
`File is empty: ${llmsFullTxtPath}`,
|
|
1106
|
+
llmsFullTxtPath
|
|
1107
|
+
));
|
|
1108
|
+
filesChecked++;
|
|
1109
|
+
} else {
|
|
1110
|
+
filesChecked++;
|
|
1111
|
+
const lintIssues = await lintFile(llmsFullTxtPath, llmsFullTxtResult.content, config);
|
|
1112
|
+
issues.push(...lintIssues);
|
|
1113
|
+
}
|
|
1114
|
+
if (citationsPath) {
|
|
1115
|
+
const citationsResult = await checkFile(citationsPath);
|
|
1116
|
+
if (!citationsResult.exists) {
|
|
1117
|
+
issues.push(createCheckIssue(
|
|
1118
|
+
"warning",
|
|
1119
|
+
"file_missing",
|
|
1120
|
+
`Optional citations file does not exist: ${citationsPath}`,
|
|
1121
|
+
citationsPath
|
|
1122
|
+
));
|
|
1123
|
+
filesMissing++;
|
|
1124
|
+
} else {
|
|
1125
|
+
filesChecked++;
|
|
1126
|
+
}
|
|
1127
|
+
}
|
|
1128
|
+
const severityCounts = countSeverities(issues);
|
|
1129
|
+
let exitCode;
|
|
1130
|
+
if (severityCounts.error > 0) {
|
|
1131
|
+
exitCode = 2;
|
|
1132
|
+
} else if (failOn === "warn" && severityCounts.warning > 0) {
|
|
1133
|
+
exitCode = 1;
|
|
1134
|
+
} else {
|
|
1135
|
+
exitCode = 0;
|
|
1136
|
+
}
|
|
1137
|
+
return {
|
|
1138
|
+
issues,
|
|
1139
|
+
summary: {
|
|
1140
|
+
errors: severityCounts.error,
|
|
1141
|
+
warnings: severityCounts.warning,
|
|
1142
|
+
info: severityCounts.info,
|
|
1143
|
+
filesChecked,
|
|
1144
|
+
filesMissing,
|
|
1145
|
+
filesMismatch
|
|
1146
|
+
},
|
|
1147
|
+
exitCode
|
|
1148
|
+
};
|
|
1149
|
+
}
|
|
1150
|
+
async function checkFileExists(filePath) {
|
|
1151
|
+
try {
|
|
1152
|
+
await fs.access(filePath);
|
|
1153
|
+
return true;
|
|
1154
|
+
} catch {
|
|
1155
|
+
return false;
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
async function checkFile(filePath) {
|
|
1159
|
+
try {
|
|
1160
|
+
const content = await fs.readFile(filePath, "utf-8");
|
|
1161
|
+
return { exists: true, content };
|
|
1162
|
+
} catch {
|
|
1163
|
+
return { exists: false, content: "" };
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
async function readFileContent(filePath) {
|
|
1167
|
+
try {
|
|
1168
|
+
return await fs.readFile(filePath, "utf-8");
|
|
1169
|
+
} catch {
|
|
1170
|
+
return null;
|
|
1171
|
+
}
|
|
1172
|
+
}
|
|
1173
|
+
function compareContent(expected, actual, maxContextLines = 5) {
|
|
1174
|
+
if (expected === actual) {
|
|
1175
|
+
return { match: true, context: "" };
|
|
1176
|
+
}
|
|
1177
|
+
const expectedLines = expected.split("\n");
|
|
1178
|
+
const actualLines = actual.split("\n");
|
|
1179
|
+
const contextLines = [];
|
|
1180
|
+
let diffCount = 0;
|
|
1181
|
+
const maxLines = Math.max(expectedLines.length, actualLines.length);
|
|
1182
|
+
for (let i = 0; i < maxLines && diffCount < maxContextLines; i++) {
|
|
1183
|
+
const expectedLine = expectedLines[i];
|
|
1184
|
+
const actualLine = actualLines[i];
|
|
1185
|
+
if (expectedLine !== actualLine) {
|
|
1186
|
+
diffCount++;
|
|
1187
|
+
const lineNum = i + 1;
|
|
1188
|
+
if (expectedLine !== void 0) {
|
|
1189
|
+
contextLines.push(`Expected line ${lineNum}: "${expectedLine}"`);
|
|
1190
|
+
}
|
|
1191
|
+
if (actualLine !== void 0) {
|
|
1192
|
+
contextLines.push(`Actual line ${lineNum}: "${actualLine}"`);
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
}
|
|
1196
|
+
return {
|
|
1197
|
+
match: false,
|
|
1198
|
+
context: contextLines.join("\n")
|
|
1199
|
+
};
|
|
1200
|
+
}
|
|
1201
|
+
async function lintFile(filePath, content, config) {
|
|
1202
|
+
const issues = [];
|
|
1203
|
+
const lintResult = lintContent(content, filePath);
|
|
1204
|
+
for (const issue of lintResult.issues) {
|
|
1205
|
+
const checkIssue = {
|
|
1206
|
+
path: filePath,
|
|
1207
|
+
code: issue.id,
|
|
1208
|
+
message: issue.message,
|
|
1209
|
+
severity: issue.severity
|
|
1210
|
+
};
|
|
1211
|
+
if (issue.line !== void 0) {
|
|
1212
|
+
checkIssue.line = issue.line;
|
|
1213
|
+
}
|
|
1214
|
+
if (issue.suggestion !== void 0) {
|
|
1215
|
+
checkIssue.context = issue.suggestion;
|
|
1216
|
+
}
|
|
1217
|
+
issues.push(checkIssue);
|
|
1218
|
+
}
|
|
1219
|
+
if (config.policy?.restrictedClaims?.enable) {
|
|
1220
|
+
const forbidden = config.policy.restrictedClaims.forbidden ?? [];
|
|
1221
|
+
const whitelist = config.policy.restrictedClaims.whitelist ?? [];
|
|
1222
|
+
const forbiddenIssues = checkForbiddenTerms(content, forbidden, whitelist);
|
|
1223
|
+
for (const issue of forbiddenIssues) {
|
|
1224
|
+
const checkIssue = {
|
|
1225
|
+
path: filePath,
|
|
1226
|
+
code: "forbidden_term",
|
|
1227
|
+
message: issue.message,
|
|
1228
|
+
severity: issue.severity
|
|
1229
|
+
};
|
|
1230
|
+
if (issue.line !== void 0) {
|
|
1231
|
+
checkIssue.line = issue.line;
|
|
1232
|
+
}
|
|
1233
|
+
if (issue.context !== void 0) {
|
|
1234
|
+
checkIssue.context = issue.context;
|
|
1235
|
+
}
|
|
1236
|
+
issues.push(checkIssue);
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
const emptySectionIssues = checkEmptySections(content);
|
|
1240
|
+
for (const issue of emptySectionIssues) {
|
|
1241
|
+
const checkIssue = {
|
|
1242
|
+
path: filePath,
|
|
1243
|
+
code: "empty_section",
|
|
1244
|
+
message: issue.message,
|
|
1245
|
+
severity: issue.severity
|
|
1246
|
+
};
|
|
1247
|
+
if (issue.line !== void 0) {
|
|
1248
|
+
checkIssue.line = issue.line;
|
|
1249
|
+
}
|
|
1250
|
+
if (issue.context !== void 0) {
|
|
1251
|
+
checkIssue.context = issue.context;
|
|
1252
|
+
}
|
|
1253
|
+
issues.push(checkIssue);
|
|
1254
|
+
}
|
|
1255
|
+
const duplicateUrlIssues = checkDuplicateUrls(content);
|
|
1256
|
+
for (const issue of duplicateUrlIssues) {
|
|
1257
|
+
const checkIssue = {
|
|
1258
|
+
path: filePath,
|
|
1259
|
+
code: "duplicate_url",
|
|
1260
|
+
message: issue.message,
|
|
1261
|
+
severity: issue.severity
|
|
1262
|
+
};
|
|
1263
|
+
if (issue.line !== void 0) {
|
|
1264
|
+
checkIssue.line = issue.line;
|
|
1265
|
+
}
|
|
1266
|
+
if (issue.context !== void 0) {
|
|
1267
|
+
checkIssue.context = issue.context;
|
|
1268
|
+
}
|
|
1269
|
+
issues.push(checkIssue);
|
|
1270
|
+
}
|
|
1271
|
+
return issues;
|
|
1272
|
+
}
|
|
1273
|
+
async function checkFilesAgainstExpected(llmsTxtPath, expectedLlmsTxt, llmsFullTxtPath, expectedLlmsFullTxt, maxContextLines = 5) {
|
|
1274
|
+
const issues = [];
|
|
1275
|
+
const llmsTxtContent = await readFileContent(llmsTxtPath);
|
|
1276
|
+
if (llmsTxtContent === null) {
|
|
1277
|
+
issues.push(createCheckIssue(
|
|
1278
|
+
"error",
|
|
1279
|
+
"file_missing",
|
|
1280
|
+
`Required file does not exist: ${llmsTxtPath}`,
|
|
1281
|
+
llmsTxtPath
|
|
1282
|
+
));
|
|
1283
|
+
} else if (llmsTxtContent === "") {
|
|
1284
|
+
issues.push(createCheckIssue(
|
|
1285
|
+
"warning",
|
|
1286
|
+
"file_empty",
|
|
1287
|
+
`File is empty: ${llmsTxtPath}`,
|
|
1288
|
+
llmsTxtPath
|
|
1289
|
+
));
|
|
1290
|
+
} else {
|
|
1291
|
+
const compareResult = compareContent(expectedLlmsTxt, llmsTxtContent, maxContextLines);
|
|
1292
|
+
if (!compareResult.match) {
|
|
1293
|
+
issues.push(createCheckIssue(
|
|
1294
|
+
"error",
|
|
1295
|
+
"file_mismatch",
|
|
1296
|
+
`Content differs from expected output`,
|
|
1297
|
+
llmsTxtPath,
|
|
1298
|
+
compareResult.context
|
|
1299
|
+
));
|
|
1300
|
+
}
|
|
1301
|
+
}
|
|
1302
|
+
const llmsFullTxtContent = await readFileContent(llmsFullTxtPath);
|
|
1303
|
+
if (llmsFullTxtContent === null) {
|
|
1304
|
+
issues.push(createCheckIssue(
|
|
1305
|
+
"error",
|
|
1306
|
+
"file_missing",
|
|
1307
|
+
`Required file does not exist: ${llmsFullTxtPath}`,
|
|
1308
|
+
llmsFullTxtPath
|
|
1309
|
+
));
|
|
1310
|
+
} else if (llmsFullTxtContent === "") {
|
|
1311
|
+
issues.push(createCheckIssue(
|
|
1312
|
+
"warning",
|
|
1313
|
+
"file_empty",
|
|
1314
|
+
`File is empty: ${llmsFullTxtPath}`,
|
|
1315
|
+
llmsFullTxtPath
|
|
1316
|
+
));
|
|
1317
|
+
} else {
|
|
1318
|
+
const compareResult = compareContent(expectedLlmsFullTxt, llmsFullTxtContent, maxContextLines);
|
|
1319
|
+
if (!compareResult.match) {
|
|
1320
|
+
issues.push(createCheckIssue(
|
|
1321
|
+
"error",
|
|
1322
|
+
"file_mismatch",
|
|
1323
|
+
`Content differs from expected output`,
|
|
1324
|
+
llmsFullTxtPath,
|
|
1325
|
+
compareResult.context
|
|
1326
|
+
));
|
|
1327
|
+
}
|
|
1328
|
+
}
|
|
1329
|
+
return issues;
|
|
1330
|
+
}
|
|
1331
|
+
var SiteConfigSchema = z.object({
|
|
1332
|
+
/** Site base URL - must be valid URL with http/https, no trailing slash */
|
|
1333
|
+
baseUrl: z.string().url({ message: "Must be a valid URL with http or https protocol" }).refine(
|
|
1334
|
+
(url) => !url.endsWith("/"),
|
|
1335
|
+
{ message: "Base URL must not have a trailing slash" }
|
|
1336
|
+
),
|
|
1337
|
+
/** Default locale - must be in locales if provided */
|
|
1338
|
+
defaultLocale: z.string().min(2).optional()
|
|
1339
|
+
});
|
|
1340
|
+
var BrandConfigSchema = z.object({
|
|
1341
|
+
/** Brand name - required */
|
|
1342
|
+
name: z.string().min(1, { message: "Brand name is required" }),
|
|
1343
|
+
/** Optional tagline */
|
|
1344
|
+
tagline: z.string().optional(),
|
|
1345
|
+
/** Optional description */
|
|
1346
|
+
description: z.string().optional(),
|
|
1347
|
+
/** Optional organization name */
|
|
1348
|
+
org: z.string().optional(),
|
|
1349
|
+
/** Supported locales - e.g., ["en", "uk", "de"] */
|
|
1350
|
+
locales: z.array(z.string().min(2)).min(1, { message: "At least one locale is required" })
|
|
1351
|
+
});
|
|
1352
|
+
var SectionsConfigSchema = z.object({
|
|
1353
|
+
/** Hub paths - e.g., ["/services", "/blog", "/projects"] */
|
|
1354
|
+
hubs: z.array(z.string()).default([])
|
|
1355
|
+
});
|
|
1356
|
+
var SocialConfigSchema = z.object({
|
|
1357
|
+
/** Twitter handle or URL */
|
|
1358
|
+
twitter: z.string().optional(),
|
|
1359
|
+
/** LinkedIn URL */
|
|
1360
|
+
linkedin: z.string().optional(),
|
|
1361
|
+
/** GitHub URL */
|
|
1362
|
+
github: z.string().optional()
|
|
1363
|
+
});
|
|
1364
|
+
var ContactConfigSchema = z.object({
|
|
1365
|
+
/** Contact email */
|
|
1366
|
+
email: z.string().email().optional(),
|
|
1367
|
+
/** Social links */
|
|
1368
|
+
social: SocialConfigSchema.optional(),
|
|
1369
|
+
/** Phone number */
|
|
1370
|
+
phone: z.string().optional()
|
|
1371
|
+
});
|
|
1372
|
+
var RestrictedClaimsConfigSchema = z.object({
|
|
1373
|
+
/** Enable restricted claims checking */
|
|
1374
|
+
enable: z.boolean(),
|
|
1375
|
+
/** Forbidden words/phrases - e.g., ["best", "#1", "guaranteed"] */
|
|
1376
|
+
forbidden: z.array(z.string()).optional(),
|
|
1377
|
+
/** Allowlisted phrases */
|
|
1378
|
+
whitelist: z.array(z.string()).optional()
|
|
1379
|
+
});
|
|
1380
|
+
var PolicyConfigSchema = z.object({
|
|
1381
|
+
/** Geographic policy statement */
|
|
1382
|
+
geoPolicy: z.string().optional(),
|
|
1383
|
+
/** Citation rules */
|
|
1384
|
+
citationRules: z.string().optional(),
|
|
1385
|
+
/** Restricted claims configuration */
|
|
1386
|
+
restrictedClaims: RestrictedClaimsConfigSchema.optional()
|
|
1387
|
+
});
|
|
1388
|
+
var BookingConfigSchema = z.object({
|
|
1389
|
+
/** Booking URL - e.g., Cal.com link */
|
|
1390
|
+
url: z.string().url().optional(),
|
|
1391
|
+
/** Booking label - e.g., "Book a consultation" */
|
|
1392
|
+
label: z.string().optional()
|
|
1393
|
+
});
|
|
1394
|
+
var MachineHintsConfigSchema = z.object({
|
|
1395
|
+
/** URL to robots.txt */
|
|
1396
|
+
robots: z.string().url().optional(),
|
|
1397
|
+
/** URL to sitemap.xml */
|
|
1398
|
+
sitemap: z.string().url().optional(),
|
|
1399
|
+
/** URL to llms.txt */
|
|
1400
|
+
llmsTxt: z.string().url().optional(),
|
|
1401
|
+
/** URL to llms-full.txt */
|
|
1402
|
+
llmsFullTxt: z.string().url().optional()
|
|
1403
|
+
});
|
|
1404
|
+
var OutputPathsConfigSchema = z.object({
|
|
1405
|
+
/** Path to llms.txt output - e.g., "public/llms.txt" */
|
|
1406
|
+
llmsTxt: z.string().min(1, { message: "llmsTxt output path is required" }),
|
|
1407
|
+
/** Path to llms-full.txt output - e.g., "public/llms-full.txt" */
|
|
1408
|
+
llmsFullTxt: z.string().min(1, { message: "llmsFullTxt output path is required" }),
|
|
1409
|
+
/** Path to citations.json output - e.g., "public/citations.json" */
|
|
1410
|
+
citations: z.string().optional()
|
|
1411
|
+
});
|
|
1412
|
+
var OutputConfigSchema = z.object({
|
|
1413
|
+
/** Output paths */
|
|
1414
|
+
paths: OutputPathsConfigSchema
|
|
1415
|
+
});
|
|
1416
|
+
var FormatConfigSchema = z.object({
|
|
1417
|
+
/** Trailing slash handling */
|
|
1418
|
+
trailingSlash: z.enum(["always", "never", "preserve"]).default("never"),
|
|
1419
|
+
/** Line endings format */
|
|
1420
|
+
lineEndings: z.enum(["lf", "crlf"]).default("lf"),
|
|
1421
|
+
/** Locale URL strategy */
|
|
1422
|
+
localeStrategy: z.enum(["prefix", "subdomain", "none"]).optional()
|
|
1423
|
+
});
|
|
1424
|
+
z.object({
|
|
1425
|
+
/** Site configuration */
|
|
1426
|
+
site: SiteConfigSchema,
|
|
1427
|
+
/** Brand configuration */
|
|
1428
|
+
brand: BrandConfigSchema,
|
|
1429
|
+
/** Sections configuration */
|
|
1430
|
+
sections: SectionsConfigSchema.optional(),
|
|
1431
|
+
/** Manifests configuration */
|
|
1432
|
+
manifests: z.record(z.unknown()).default({}),
|
|
1433
|
+
/** Contact configuration */
|
|
1434
|
+
contact: ContactConfigSchema.optional(),
|
|
1435
|
+
/** Policy configuration */
|
|
1436
|
+
policy: PolicyConfigSchema.optional(),
|
|
1437
|
+
/** Booking configuration */
|
|
1438
|
+
booking: BookingConfigSchema.optional(),
|
|
1439
|
+
/** Machine hints configuration */
|
|
1440
|
+
machineHints: MachineHintsConfigSchema.optional(),
|
|
1441
|
+
/** Output configuration */
|
|
1442
|
+
output: OutputConfigSchema,
|
|
1443
|
+
/** Format configuration */
|
|
1444
|
+
format: FormatConfigSchema.optional()
|
|
1445
|
+
});
|
|
1446
|
+
var ConfigSchema = z.object({
|
|
1447
|
+
baseUrl: z.string().url(),
|
|
1448
|
+
title: z.string().min(1),
|
|
1449
|
+
description: z.string().optional(),
|
|
1450
|
+
outputDir: z.string().default("./public"),
|
|
1451
|
+
includeOptionalSections: z.boolean().default(false),
|
|
1452
|
+
maxContentLength: z.number().int().nonnegative().default(0)
|
|
1453
|
+
});
|
|
1454
|
+
var LocaleConfigSchema = z.object({
|
|
1455
|
+
default: z.string().min(2),
|
|
1456
|
+
supported: z.array(z.string().min(2)).min(1),
|
|
1457
|
+
strategy: z.enum(["subdirectory", "subdomain", "domain"])
|
|
1458
|
+
});
|
|
1459
|
+
var CheckConfigSchema = z.object({
|
|
1460
|
+
strict: z.boolean().default(false),
|
|
1461
|
+
maxTitleLength: z.number().int().positive().default(60),
|
|
1462
|
+
maxDescriptionLength: z.number().int().positive().default(160),
|
|
1463
|
+
enableLint: z.boolean().default(true)
|
|
1464
|
+
});
|
|
1465
|
+
var FullConfigSchema = z.object({
|
|
1466
|
+
site: ConfigSchema,
|
|
1467
|
+
locale: LocaleConfigSchema.optional(),
|
|
1468
|
+
check: CheckConfigSchema.optional()
|
|
1469
|
+
});
|
|
1470
|
+
var ManifestItemSchema = z.object({
|
|
1471
|
+
/** URL path slug - required */
|
|
1472
|
+
slug: z.string().min(1, { message: "Slug is required and cannot be empty" }),
|
|
1473
|
+
/** Available locales for this page */
|
|
1474
|
+
locales: z.array(z.string().min(2)).optional(),
|
|
1475
|
+
/** Publication date (ISO 8601) */
|
|
1476
|
+
publishedAt: z.string().datetime({ message: "publishedAt must be a valid ISO 8601 date" }).optional(),
|
|
1477
|
+
/** Last update date (ISO 8601) */
|
|
1478
|
+
updatedAt: z.string().datetime({ message: "updatedAt must be a valid ISO 8601 date" }).optional(),
|
|
1479
|
+
/** Override canonical URL */
|
|
1480
|
+
canonicalOverride: z.string().url().optional(),
|
|
1481
|
+
/** Priority for citations (0-100) */
|
|
1482
|
+
priority: z.number().int().min(0).max(100).optional(),
|
|
1483
|
+
/** Title for display */
|
|
1484
|
+
title: z.string().optional(),
|
|
1485
|
+
/** Description for display */
|
|
1486
|
+
description: z.string().optional()
|
|
1487
|
+
});
|
|
1488
|
+
var ManifestSourceSchema = z.object({
|
|
1489
|
+
/** Source type */
|
|
1490
|
+
type: z.enum(["file", "url", "module"]),
|
|
1491
|
+
/** Source location */
|
|
1492
|
+
source: z.string(),
|
|
1493
|
+
/** Optional transform function */
|
|
1494
|
+
transform: z.string().optional()
|
|
1495
|
+
});
|
|
1496
|
+
var ManifestValueSchema = z.union([
|
|
1497
|
+
z.array(ManifestItemSchema),
|
|
1498
|
+
ManifestSourceSchema
|
|
1499
|
+
]);
|
|
1500
|
+
z.record(z.string(), ManifestValueSchema);
|
|
1501
|
+
var OptionalSectionSchema = z.object({
|
|
1502
|
+
/** Section title */
|
|
1503
|
+
title: z.string().min(1),
|
|
1504
|
+
/** Section content */
|
|
1505
|
+
content: z.string().optional()
|
|
1506
|
+
});
|
|
1507
|
+
var PageManifestSchema = z.object({
|
|
1508
|
+
/** Page path (e.g., /about) */
|
|
1509
|
+
path: z.string().startsWith("/"),
|
|
1510
|
+
/** Page title */
|
|
1511
|
+
title: z.string().optional(),
|
|
1512
|
+
/** Page description */
|
|
1513
|
+
description: z.string().optional(),
|
|
1514
|
+
/** Page content for llms-full.txt */
|
|
1515
|
+
content: z.string().optional(),
|
|
1516
|
+
/** Whether page is optional (included only in full) */
|
|
1517
|
+
optional: z.boolean().default(false),
|
|
1518
|
+
/** Last modified timestamp (ISO 8601) */
|
|
1519
|
+
lastModified: z.string().datetime().optional()
|
|
1520
|
+
});
|
|
1521
|
+
var SiteManifestSchema = z.object({
|
|
1522
|
+
/** Site base URL */
|
|
1523
|
+
baseUrl: z.string().url(),
|
|
1524
|
+
/** Site title */
|
|
1525
|
+
title: z.string().min(1),
|
|
1526
|
+
/** Site description */
|
|
1527
|
+
description: z.string().optional(),
|
|
1528
|
+
/** List of pages */
|
|
1529
|
+
pages: z.array(PageManifestSchema).min(1),
|
|
1530
|
+
/** Optional sections for llms.txt */
|
|
1531
|
+
optionalSections: z.array(OptionalSectionSchema).optional(),
|
|
1532
|
+
/** Site version */
|
|
1533
|
+
version: z.string().optional(),
|
|
1534
|
+
/** Generation timestamp (ISO 8601) */
|
|
1535
|
+
generatedAt: z.string().datetime().optional()
|
|
1536
|
+
});
|
|
1537
|
+
var BuildManifestSchema = z.object({
|
|
1538
|
+
/** Build ID */
|
|
1539
|
+
buildId: z.string(),
|
|
1540
|
+
/** List of static pages */
|
|
1541
|
+
pages: z.record(z.string(), z.array(z.string())),
|
|
1542
|
+
/** Dynamic routes */
|
|
1543
|
+
dynamicRoutes: z.record(z.string(), z.object({
|
|
1544
|
+
routeRegex: z.string(),
|
|
1545
|
+
dataRoute: z.string(),
|
|
1546
|
+
dataRouteRegex: z.string()
|
|
1547
|
+
})).optional()
|
|
1548
|
+
});
|
|
1549
|
+
|
|
1550
|
+
// src/schema/validate.ts
|
|
1551
|
+
function validate(schema, data) {
|
|
1552
|
+
const result = schema.safeParse(data);
|
|
1553
|
+
if (result.success) {
|
|
1554
|
+
return {
|
|
1555
|
+
success: true,
|
|
1556
|
+
data: result.data,
|
|
1557
|
+
issues: []
|
|
1558
|
+
};
|
|
1559
|
+
}
|
|
1560
|
+
return {
|
|
1561
|
+
success: false,
|
|
1562
|
+
issues: formatZodErrors(result.error)
|
|
1563
|
+
};
|
|
1564
|
+
}
|
|
1565
|
+
function formatZodErrors(error) {
|
|
1566
|
+
return error.issues.map((issue) => ({
|
|
1567
|
+
path: issue.path.join("."),
|
|
1568
|
+
message: issue.message,
|
|
1569
|
+
code: issue.code,
|
|
1570
|
+
severity: "error"
|
|
1571
|
+
}));
|
|
1572
|
+
}
|
|
1573
|
+
function validateOrThrow(schema, data) {
|
|
1574
|
+
const result = validate(schema, data);
|
|
1575
|
+
if (!result.success || !result.data) {
|
|
1576
|
+
const messages = result.issues?.map((e) => `${e.path}: ${e.message}`).join("; ") ?? "Unknown validation error";
|
|
1577
|
+
throw new Error(`Validation failed: ${messages}`);
|
|
1578
|
+
}
|
|
1579
|
+
return result.data;
|
|
1580
|
+
}
|
|
1581
|
+
function formatValidationErrors(issues) {
|
|
1582
|
+
const lines = ["Validation failed:", ""];
|
|
1583
|
+
for (const issue of issues) {
|
|
1584
|
+
lines.push(` - ${issue.path || "(root)"}: ${issue.message}`);
|
|
1585
|
+
}
|
|
1586
|
+
return lines.join("\n");
|
|
1587
|
+
}
|
|
1588
|
+
function fromNextContentManifest(manifest, options = {}) {
|
|
1589
|
+
const { slugPrefix = "", defaultLocale } = options;
|
|
1590
|
+
return manifest.items.map((item) => {
|
|
1591
|
+
const result = {
|
|
1592
|
+
slug: slugPrefix + item.slug
|
|
1593
|
+
};
|
|
1594
|
+
if (item.locales !== void 0) {
|
|
1595
|
+
result.locales = item.locales;
|
|
1596
|
+
} else if (defaultLocale !== void 0) {
|
|
1597
|
+
result.locales = [defaultLocale];
|
|
1598
|
+
}
|
|
1599
|
+
if (item.publishedAt !== void 0) {
|
|
1600
|
+
result.publishedAt = item.publishedAt;
|
|
1601
|
+
}
|
|
1602
|
+
if (item.updatedAt !== void 0) {
|
|
1603
|
+
result.updatedAt = item.updatedAt;
|
|
1604
|
+
}
|
|
1605
|
+
if (item.title !== void 0) {
|
|
1606
|
+
result.title = item.title;
|
|
1607
|
+
}
|
|
1608
|
+
if (item.description !== void 0) {
|
|
1609
|
+
result.description = item.description;
|
|
1610
|
+
}
|
|
1611
|
+
if (item.priority !== void 0) {
|
|
1612
|
+
result.priority = item.priority;
|
|
1613
|
+
}
|
|
1614
|
+
if (item.canonicalOverride !== void 0) {
|
|
1615
|
+
result.canonicalOverride = item.canonicalOverride;
|
|
1616
|
+
}
|
|
1617
|
+
return result;
|
|
1618
|
+
});
|
|
1619
|
+
}
|
|
1620
|
+
function parseFrontmatter(content) {
|
|
1621
|
+
const frontmatterRegex = /^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/;
|
|
1622
|
+
const match = content.match(frontmatterRegex);
|
|
1623
|
+
if (!match) {
|
|
1624
|
+
return { frontmatter: {}, body: content };
|
|
1625
|
+
}
|
|
1626
|
+
const frontmatterText = match[1] ?? "";
|
|
1627
|
+
const body = match[2] ?? content;
|
|
1628
|
+
const frontmatter = {};
|
|
1629
|
+
const lines = frontmatterText.split("\n");
|
|
1630
|
+
for (const line of lines) {
|
|
1631
|
+
const colonIndex = line.indexOf(":");
|
|
1632
|
+
if (colonIndex === -1) continue;
|
|
1633
|
+
const key = line.slice(0, colonIndex).trim();
|
|
1634
|
+
let value = line.slice(colonIndex + 1).trim();
|
|
1635
|
+
if (value.startsWith("[") && value.endsWith("]")) {
|
|
1636
|
+
value = value.slice(1, -1).split(",").map((item) => item.trim().replace(/^['"]|['"]$/g, "")).filter((item) => item.length > 0);
|
|
1637
|
+
} else {
|
|
1638
|
+
value = value.replace(/^['"]|['"]$/g, "");
|
|
1639
|
+
}
|
|
1640
|
+
if (key === "title") frontmatter.title = value;
|
|
1641
|
+
else if (key === "description") frontmatter.description = value;
|
|
1642
|
+
else if (key === "date" || key === "publishedAt") frontmatter.date = value;
|
|
1643
|
+
else if (key === "updated" || key === "updatedAt") frontmatter.updated = value;
|
|
1644
|
+
else if (key === "locale") frontmatter.locale = value;
|
|
1645
|
+
else if (key === "locales") frontmatter.locales = value;
|
|
1646
|
+
else if (key === "priority") frontmatter.priority = parseInt(value, 10);
|
|
1647
|
+
else frontmatter[key] = value;
|
|
1648
|
+
}
|
|
1649
|
+
return { frontmatter, body };
|
|
1650
|
+
}
|
|
1651
|
+
function filePathToSlug(filePath, extensions) {
|
|
1652
|
+
let slug = filePath;
|
|
1653
|
+
for (const ext of extensions) {
|
|
1654
|
+
if (slug.endsWith(ext)) {
|
|
1655
|
+
slug = slug.slice(0, -ext.length);
|
|
1656
|
+
break;
|
|
1657
|
+
}
|
|
1658
|
+
}
|
|
1659
|
+
if (slug.endsWith("/index")) {
|
|
1660
|
+
slug = slug.slice(0, -6);
|
|
1661
|
+
}
|
|
1662
|
+
if (slug === "index") {
|
|
1663
|
+
slug = "";
|
|
1664
|
+
}
|
|
1665
|
+
if (!slug.startsWith("/")) {
|
|
1666
|
+
slug = "/" + slug;
|
|
1667
|
+
}
|
|
1668
|
+
return slug || "/";
|
|
1669
|
+
}
|
|
1670
|
+
async function scanDirectory(dir, extensions, basePath = "") {
|
|
1671
|
+
const files = [];
|
|
1672
|
+
if (!existsSync(dir)) {
|
|
1673
|
+
return files;
|
|
1674
|
+
}
|
|
1675
|
+
const entries = await readdir(dir, { withFileTypes: true });
|
|
1676
|
+
for (const entry of entries) {
|
|
1677
|
+
const fullPath = join(dir, entry.name);
|
|
1678
|
+
const relativePath = basePath ? join(basePath, entry.name) : entry.name;
|
|
1679
|
+
if (entry.isDirectory()) {
|
|
1680
|
+
const subFiles = await scanDirectory(fullPath, extensions, relativePath);
|
|
1681
|
+
files.push(...subFiles);
|
|
1682
|
+
} else if (entry.isFile()) {
|
|
1683
|
+
const hasValidExtension = extensions.some((ext) => entry.name.endsWith(ext));
|
|
1684
|
+
if (hasValidExtension) {
|
|
1685
|
+
files.push(relativePath);
|
|
1686
|
+
}
|
|
1687
|
+
}
|
|
1688
|
+
}
|
|
1689
|
+
return files;
|
|
1690
|
+
}
|
|
1691
|
+
async function createManifestFromPagesDir(options) {
|
|
1692
|
+
const {
|
|
1693
|
+
pagesDir,
|
|
1694
|
+
routePrefix = "",
|
|
1695
|
+
defaultLocale,
|
|
1696
|
+
extensions = [".mdx", ".md"]
|
|
1697
|
+
} = options;
|
|
1698
|
+
const files = await scanDirectory(pagesDir, extensions);
|
|
1699
|
+
const items = [];
|
|
1700
|
+
for (const file of files) {
|
|
1701
|
+
const fullPath = join(pagesDir, file);
|
|
1702
|
+
const content = await readFile(fullPath, "utf-8");
|
|
1703
|
+
const { frontmatter } = parseFrontmatter(content);
|
|
1704
|
+
const slug = routePrefix + filePathToSlug(file, extensions);
|
|
1705
|
+
const item = {
|
|
1706
|
+
slug
|
|
1707
|
+
};
|
|
1708
|
+
if (frontmatter.locales !== void 0) {
|
|
1709
|
+
item.locales = frontmatter.locales;
|
|
1710
|
+
} else if (frontmatter.locale !== void 0) {
|
|
1711
|
+
item.locales = [frontmatter.locale];
|
|
1712
|
+
} else if (defaultLocale !== void 0) {
|
|
1713
|
+
item.locales = [defaultLocale];
|
|
1714
|
+
}
|
|
1715
|
+
if (frontmatter.date !== void 0) {
|
|
1716
|
+
item.publishedAt = frontmatter.date;
|
|
1717
|
+
}
|
|
1718
|
+
if (frontmatter.updated !== void 0) {
|
|
1719
|
+
item.updatedAt = frontmatter.updated;
|
|
1720
|
+
}
|
|
1721
|
+
if (frontmatter.title !== void 0) {
|
|
1722
|
+
item.title = frontmatter.title;
|
|
1723
|
+
}
|
|
1724
|
+
if (frontmatter.description !== void 0) {
|
|
1725
|
+
item.description = frontmatter.description;
|
|
1726
|
+
}
|
|
1727
|
+
if (frontmatter.priority !== void 0) {
|
|
1728
|
+
item.priority = frontmatter.priority;
|
|
1729
|
+
}
|
|
1730
|
+
items.push(item);
|
|
1731
|
+
}
|
|
1732
|
+
return items.sort((a, b) => a.slug.localeCompare(b.slug));
|
|
1733
|
+
}
|
|
1734
|
+
function createManifestFromData(items, options = {}) {
|
|
1735
|
+
const { slugPrefix = "", defaultLocale } = options;
|
|
1736
|
+
return items.map((item) => {
|
|
1737
|
+
const slugPath = Array.isArray(item.params.slug) ? "/" + item.params.slug.join("/") : item.params.slug.startsWith("/") ? item.params.slug : "/" + item.params.slug;
|
|
1738
|
+
const result = {
|
|
1739
|
+
slug: slugPrefix + slugPath
|
|
1740
|
+
};
|
|
1741
|
+
if (item.locale !== void 0) {
|
|
1742
|
+
result.locales = [item.locale];
|
|
1743
|
+
} else if (defaultLocale !== void 0) {
|
|
1744
|
+
result.locales = [defaultLocale];
|
|
1745
|
+
}
|
|
1746
|
+
if (item.publishedAt !== void 0) {
|
|
1747
|
+
result.publishedAt = item.publishedAt;
|
|
1748
|
+
}
|
|
1749
|
+
if (item.updatedAt !== void 0) {
|
|
1750
|
+
result.updatedAt = item.updatedAt;
|
|
1751
|
+
}
|
|
1752
|
+
if (item.title !== void 0) {
|
|
1753
|
+
result.title = item.title;
|
|
1754
|
+
}
|
|
1755
|
+
if (item.description !== void 0) {
|
|
1756
|
+
result.description = item.description;
|
|
1757
|
+
}
|
|
1758
|
+
if (item.priority !== void 0) {
|
|
1759
|
+
result.priority = item.priority;
|
|
1760
|
+
}
|
|
1761
|
+
return result;
|
|
1762
|
+
});
|
|
1763
|
+
}
|
|
1764
|
+
function extractPagePaths(buildManifest) {
|
|
1765
|
+
const pages = [];
|
|
1766
|
+
for (const pagePath of Object.keys(buildManifest.pages)) {
|
|
1767
|
+
if (pagePath.startsWith("/_")) {
|
|
1768
|
+
continue;
|
|
1769
|
+
}
|
|
1770
|
+
const normalizedPath = normalizeNextPath(pagePath);
|
|
1771
|
+
pages.push(normalizedPath);
|
|
1772
|
+
}
|
|
1773
|
+
return pages.sort((a, b) => a.localeCompare(b));
|
|
1774
|
+
}
|
|
1775
|
+
function normalizeNextPath(path) {
|
|
1776
|
+
let normalized = path.replace(/\.(html|json)$/, "");
|
|
1777
|
+
if (normalized === "/index" || normalized === "") {
|
|
1778
|
+
normalized = "/";
|
|
1779
|
+
}
|
|
1780
|
+
if (!normalized.startsWith("/")) {
|
|
1781
|
+
normalized = `/${normalized}`;
|
|
1782
|
+
}
|
|
1783
|
+
return normalized;
|
|
1784
|
+
}
|
|
1785
|
+
function generateNextManifest(options, buildManifest) {
|
|
1786
|
+
const pages = [];
|
|
1787
|
+
if (buildManifest) {
|
|
1788
|
+
const pagePaths = extractPagePaths(buildManifest);
|
|
1789
|
+
for (const path of pagePaths) {
|
|
1790
|
+
pages.push({
|
|
1791
|
+
path,
|
|
1792
|
+
title: void 0,
|
|
1793
|
+
// Will be filled by crawler or manually
|
|
1794
|
+
description: void 0,
|
|
1795
|
+
optional: false
|
|
1796
|
+
});
|
|
1797
|
+
}
|
|
1798
|
+
}
|
|
1799
|
+
return {
|
|
1800
|
+
baseUrl: options.baseUrl,
|
|
1801
|
+
title: options.title,
|
|
1802
|
+
description: options.description,
|
|
1803
|
+
pages
|
|
1804
|
+
};
|
|
1805
|
+
}
|
|
1806
|
+
|
|
1807
|
+
// src/adapters/next/build-hooks.ts
|
|
1808
|
+
function generateBuildScripts(options = {}) {
|
|
1809
|
+
const {
|
|
1810
|
+
configPath = "llm-seo.config.ts",
|
|
1811
|
+
emitCitations = false,
|
|
1812
|
+
packageManager = "pnpm"
|
|
1813
|
+
} = options;
|
|
1814
|
+
const configFlag = `--config ${configPath}`;
|
|
1815
|
+
const citationsFlag = emitCitations ? " --emit-citations" : "";
|
|
1816
|
+
const runCommand = packageManager === "npm" ? "npm run" : packageManager;
|
|
1817
|
+
return {
|
|
1818
|
+
"build:seo": `llm-seo generate ${configFlag}${citationsFlag}`,
|
|
1819
|
+
postbuild: `${runCommand} build:seo && llm-seo check --fail-on error`,
|
|
1820
|
+
"check:seo": "llm-seo check --fail-on warn"
|
|
1821
|
+
};
|
|
1822
|
+
}
|
|
1823
|
+
function createRobotsLlmsPolicySnippet(options) {
|
|
1824
|
+
const {
|
|
1825
|
+
baseUrl,
|
|
1826
|
+
allowLlmsTxt = true,
|
|
1827
|
+
allowLlmsFullTxt = true,
|
|
1828
|
+
allowSitemap = true,
|
|
1829
|
+
additionalPaths = [],
|
|
1830
|
+
userAgent = "*"
|
|
1831
|
+
} = options;
|
|
1832
|
+
const lines = ["# LLM SEO"];
|
|
1833
|
+
try {
|
|
1834
|
+
const url = new URL(baseUrl);
|
|
1835
|
+
lines.push(`Host: ${url.host}`);
|
|
1836
|
+
} catch {
|
|
1837
|
+
}
|
|
1838
|
+
lines.push(`User-agent: ${userAgent}`);
|
|
1839
|
+
if (allowLlmsTxt) {
|
|
1840
|
+
lines.push("Allow: /llms.txt");
|
|
1841
|
+
}
|
|
1842
|
+
if (allowLlmsFullTxt) {
|
|
1843
|
+
lines.push("Allow: /llms-full.txt");
|
|
1844
|
+
}
|
|
1845
|
+
if (allowSitemap) {
|
|
1846
|
+
lines.push("Allow: /sitemap.xml");
|
|
1847
|
+
}
|
|
1848
|
+
for (const path of additionalPaths) {
|
|
1849
|
+
lines.push(`Allow: ${path}`);
|
|
1850
|
+
}
|
|
1851
|
+
return lines.join("\n");
|
|
1852
|
+
}
|
|
1853
|
+
function createNextConfig(options) {
|
|
1854
|
+
const {
|
|
1855
|
+
locales = [],
|
|
1856
|
+
defaultLocale = "en",
|
|
1857
|
+
trailingSlash = false,
|
|
1858
|
+
unoptimizedImages = true
|
|
1859
|
+
} = options;
|
|
1860
|
+
const config = {
|
|
1861
|
+
output: "export",
|
|
1862
|
+
trailingSlash,
|
|
1863
|
+
images: {
|
|
1864
|
+
unoptimized: unoptimizedImages
|
|
1865
|
+
}
|
|
1866
|
+
};
|
|
1867
|
+
if (locales.length > 0) {
|
|
1868
|
+
config.i18n = {
|
|
1869
|
+
locales,
|
|
1870
|
+
defaultLocale
|
|
1871
|
+
};
|
|
1872
|
+
}
|
|
1873
|
+
return config;
|
|
1874
|
+
}
|
|
1875
|
+
async function postBuildHook(options) {
|
|
1876
|
+
const { outputDir, manifest, generateFull = false } = options;
|
|
1877
|
+
const files = [];
|
|
1878
|
+
try {
|
|
1879
|
+
generateLlmsTxt(manifest);
|
|
1880
|
+
const llmsTxtPath = `${outputDir}/llms.txt`;
|
|
1881
|
+
files.push(llmsTxtPath);
|
|
1882
|
+
if (generateFull) {
|
|
1883
|
+
generateLlmsFullTxt(manifest);
|
|
1884
|
+
const llmsFullTxtPath = `${outputDir}/llms-full.txt`;
|
|
1885
|
+
files.push(llmsFullTxtPath);
|
|
1886
|
+
}
|
|
1887
|
+
return {
|
|
1888
|
+
files,
|
|
1889
|
+
success: true
|
|
1890
|
+
};
|
|
1891
|
+
} catch (error) {
|
|
1892
|
+
const message = error instanceof Error ? error.message : "Unknown error";
|
|
1893
|
+
return {
|
|
1894
|
+
files,
|
|
1895
|
+
success: false,
|
|
1896
|
+
error: message
|
|
1897
|
+
};
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1900
|
+
function createNextPlugin() {
|
|
1901
|
+
return {
|
|
1902
|
+
name: "llm-seo",
|
|
1903
|
+
postBuild: postBuildHook
|
|
1904
|
+
};
|
|
1905
|
+
}
|
|
1906
|
+
|
|
1907
|
+
export { BuildManifestSchema, CheckConfigSchema, ConfigSchema, DEFAULT_CHECKER_CONFIG, FullConfigSchema, LINT_RULES, OptionalSectionSchema, PageManifestSchema, SiteManifestSchema, checkFileExists, checkFilesAgainstExpected, checkGeneratedFiles, checkManifest, citationToJsonLd, citationToMarkdown, compareContent, compareStrings, createCanonicalUrlForItem, createCanonicalUrlsFromManifest, createCitation, createCitationsJson, createCitationsJsonString, createIssue, createLlmsFullTxt, createLlmsTxt, createManifestFromData, createManifestFromPagesDir, createNextConfig, createNextPlugin, createRobotsLlmsPolicySnippet, dedupeUrls, extractCanonicalUrls, extractLocaleFromPath, extractPagePaths, filterBySeverity, formatValidationErrors, fromNextContentManifest, generateAlternateUrls, generateBuildScripts, generateCanonicalUrl, generateLlmsFullTxt, generateLlmsTxt, generateNextManifest, generatePageContent, generateReferenceList, groupByPage, groupBySeverity, lintContent, localizePath, normalizeLineEndings, normalizeLineWhitespace, normalizeSeoText, normalizeUrl, normalizeWhitespace, postBuildHook, readFileContent, sortBy, sortStrings, sortUrls, validate, validateOrThrow };
|
|
1908
|
+
//# sourceMappingURL=index.js.map
|
|
1909
|
+
//# sourceMappingURL=index.js.map
|