@pas7/llm-seo 0.1.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/bin.js CHANGED
@@ -48,9 +48,101 @@ var init_exit_codes = __esm({
48
48
  };
49
49
  }
50
50
  });
51
- var SiteConfigSchema, BrandConfigSchema, SectionsConfigSchema, SocialConfigSchema, ContactConfigSchema, RestrictedClaimsConfigSchema, PolicyConfigSchema, BookingConfigSchema, MachineHintsConfigSchema, OutputPathsConfigSchema, OutputConfigSchema, FormatConfigSchema, LlmsSeoConfigSchema, ConfigSchema, LocaleConfigSchema, CheckConfigSchema;
51
+ var ISO_DATE_OR_DATETIME, ManifestItemSchema, ManifestSourceSchema, ManifestValueSchema, OptionalSectionSchema, PageManifestSchema;
52
+ var init_manifest_schema = __esm({
53
+ "src/schema/manifest.schema.ts"() {
54
+ ISO_DATE_OR_DATETIME = /^\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}(?::\d{2}(?:\.\d{1,3})?)?(?:Z|[+-]\d{2}:\d{2})?)?$/;
55
+ ManifestItemSchema = z.object({
56
+ /** URL path slug - required */
57
+ slug: z.string().min(1, { message: "Slug is required and cannot be empty" }),
58
+ /** Available locales for this page */
59
+ locales: z.array(z.string().min(2)).optional(),
60
+ /** Publication date (ISO 8601) */
61
+ publishedAt: z.string().regex(
62
+ ISO_DATE_OR_DATETIME,
63
+ { message: "publishedAt must be ISO date (YYYY-MM-DD) or ISO datetime" }
64
+ ).optional(),
65
+ /** Last update date (ISO 8601) */
66
+ updatedAt: z.string().regex(
67
+ ISO_DATE_OR_DATETIME,
68
+ { message: "updatedAt must be ISO date (YYYY-MM-DD) or ISO datetime" }
69
+ ).optional(),
70
+ /** Override canonical URL */
71
+ canonicalOverride: z.string().url().optional(),
72
+ /** Priority for citations (0-100) */
73
+ priority: z.number().int().min(0).max(100).optional(),
74
+ /** Title for display */
75
+ title: z.string().optional(),
76
+ /** Description for display */
77
+ description: z.string().optional()
78
+ });
79
+ ManifestSourceSchema = z.object({
80
+ /** Source type */
81
+ type: z.enum(["file", "url", "module"]),
82
+ /** Source location */
83
+ source: z.string(),
84
+ /** Optional transform function */
85
+ transform: z.string().optional()
86
+ });
87
+ ManifestValueSchema = z.union([
88
+ z.array(ManifestItemSchema),
89
+ ManifestSourceSchema
90
+ ]);
91
+ z.record(z.string(), ManifestValueSchema);
92
+ OptionalSectionSchema = z.object({
93
+ /** Section title */
94
+ title: z.string().min(1),
95
+ /** Section content */
96
+ content: z.string().optional()
97
+ });
98
+ PageManifestSchema = z.object({
99
+ /** Page path (e.g., /about) */
100
+ path: z.string().startsWith("/"),
101
+ /** Page title */
102
+ title: z.string().optional(),
103
+ /** Page description */
104
+ description: z.string().optional(),
105
+ /** Page content for llms-full.txt */
106
+ content: z.string().optional(),
107
+ /** Whether page is optional (included only in full) */
108
+ optional: z.boolean().default(false),
109
+ /** Last modified timestamp (ISO 8601) */
110
+ lastModified: z.string().datetime().optional()
111
+ });
112
+ z.object({
113
+ /** Site base URL */
114
+ baseUrl: z.string().url(),
115
+ /** Site title */
116
+ title: z.string().min(1),
117
+ /** Site description */
118
+ description: z.string().optional(),
119
+ /** List of pages */
120
+ pages: z.array(PageManifestSchema).min(1),
121
+ /** Optional sections for llms.txt */
122
+ optionalSections: z.array(OptionalSectionSchema).optional(),
123
+ /** Site version */
124
+ version: z.string().optional(),
125
+ /** Generation timestamp (ISO 8601) */
126
+ generatedAt: z.string().datetime().optional()
127
+ });
128
+ z.object({
129
+ /** Build ID */
130
+ buildId: z.string(),
131
+ /** List of static pages */
132
+ pages: z.record(z.string(), z.array(z.string())),
133
+ /** Dynamic routes */
134
+ dynamicRoutes: z.record(z.string(), z.object({
135
+ routeRegex: z.string(),
136
+ dataRoute: z.string(),
137
+ dataRouteRegex: z.string()
138
+ })).optional()
139
+ });
140
+ }
141
+ });
142
+ var SiteConfigSchema, BrandConfigSchema, SectionsConfigSchema, RouteStyleSchema, ManifestSectionConfigSchema, ManifestConfigValueSchema, ManifestsConfigSchema2, SocialConfigSchema, ContactConfigSchema, RestrictedClaimsConfigSchema, PolicyConfigSchema, BookingConfigSchema, MachineHintsConfigSchema, OutputPathsConfigSchema, OutputConfigSchema, FormatConfigSchema, LlmsSeoConfigSchema, ConfigSchema, LocaleConfigSchema, CheckConfigSchema;
52
143
  var init_config_schema = __esm({
53
144
  "src/schema/config.schema.ts"() {
145
+ init_manifest_schema();
54
146
  SiteConfigSchema = z.object({
55
147
  /** Site base URL - must be valid URL with http/https, no trailing slash */
56
148
  baseUrl: z.string().url({ message: "Must be a valid URL with http or https protocol" }).refine(
@@ -76,6 +168,29 @@ var init_config_schema = __esm({
76
168
  /** Hub paths - e.g., ["/services", "/blog", "/projects"] */
77
169
  hubs: z.array(z.string()).default([])
78
170
  });
171
+ RouteStyleSchema = z.enum(["prefix", "suffix", "locale-segment", "custom"]);
172
+ ManifestSectionConfigSchema = z.object({
173
+ /** Items in this manifest section */
174
+ items: z.array(ManifestItemSchema),
175
+ /** Optional display name for section */
176
+ sectionName: z.string().optional(),
177
+ /** Routing behavior for this section */
178
+ routeStyle: RouteStyleSchema.optional(),
179
+ /** Section path prefix, e.g. "/blog" */
180
+ sectionPath: z.string().optional(),
181
+ /** Per-section default locale override */
182
+ defaultLocaleOverride: z.string().min(2).optional(),
183
+ /** Custom pathname factory for routeStyle="custom" */
184
+ pathnameFor: z.custom(
185
+ (value) => value === void 0 || typeof value === "function",
186
+ { message: "pathnameFor must be a function" }
187
+ ).optional()
188
+ });
189
+ ManifestConfigValueSchema = z.union([
190
+ z.array(ManifestItemSchema),
191
+ ManifestSectionConfigSchema
192
+ ]);
193
+ ManifestsConfigSchema2 = z.record(z.string(), ManifestConfigValueSchema);
79
194
  SocialConfigSchema = z.object({
80
195
  /** Twitter handle or URL */
81
196
  twitter: z.string().optional(),
@@ -152,7 +267,7 @@ var init_config_schema = __esm({
152
267
  /** Sections configuration */
153
268
  sections: SectionsConfigSchema.optional(),
154
269
  /** Manifests configuration */
155
- manifests: z.record(z.unknown()).default({}),
270
+ manifests: ManifestsConfigSchema2.default({}),
156
271
  /** Contact configuration */
157
272
  contact: ContactConfigSchema.optional(),
158
273
  /** Policy configuration */
@@ -629,18 +744,6 @@ var init_locale = __esm({
629
744
  function dedupeUrls(urls) {
630
745
  return [...new Set(urls)];
631
746
  }
632
- function buildLocalePrefix(locale, strategy, defaultLocale) {
633
- if (strategy === "none") {
634
- return "";
635
- }
636
- if (strategy === "subdomain") {
637
- return "";
638
- }
639
- if (strategy === "prefix" && locale === defaultLocale) {
640
- return "";
641
- }
642
- return `/${locale}`;
643
- }
644
747
  function buildBaseUrlWithSubdomain(baseUrl, locale, strategy, defaultLocale) {
645
748
  if (strategy !== "subdomain" || locale === defaultLocale) {
646
749
  return baseUrl;
@@ -653,7 +756,17 @@ function buildBaseUrlWithSubdomain(baseUrl, locale, strategy, defaultLocale) {
653
756
  }
654
757
  }
655
758
  function createCanonicalUrlForItem(item, options) {
656
- const { baseUrl, routePrefix, defaultLocale, trailingSlash, localeStrategy } = options;
759
+ const {
760
+ baseUrl,
761
+ routePrefix,
762
+ defaultLocale,
763
+ trailingSlash,
764
+ localeStrategy,
765
+ routeStyle,
766
+ sectionName,
767
+ sectionPath,
768
+ pathnameFor
769
+ } = options;
657
770
  if (item.canonicalOverride && typeof item.canonicalOverride === "string") {
658
771
  return item.canonicalOverride;
659
772
  }
@@ -663,33 +776,79 @@ function createCanonicalUrlForItem(item, options) {
663
776
  availableLocales
664
777
  });
665
778
  const locale = canonicalLocale ?? defaultLocale;
666
- const localePrefix = buildLocalePrefix(locale, localeStrategy, defaultLocale);
779
+ const sectionBase = normalizeSectionPath(sectionPath ?? routePrefix ?? "");
780
+ const effectiveRouteStyle = routeStyle ?? inferRouteStyleFromLocaleStrategy(localeStrategy);
781
+ const normalizedSlug = normalizeItemSlug(item.slug, sectionBase);
782
+ const customPath = effectiveRouteStyle === "custom" ? pathnameFor?.({
783
+ item,
784
+ sectionName: sectionName ?? "",
785
+ slug: normalizedSlug,
786
+ locale,
787
+ defaultLocale,
788
+ sectionPath: sectionBase
789
+ }) : void 0;
790
+ const resolvedPath = customPath ?? buildPathFromRouteStyle({
791
+ routeStyle: effectiveRouteStyle,
792
+ sectionPath: sectionBase,
793
+ slug: normalizedSlug,
794
+ locale,
795
+ defaultLocale
796
+ });
667
797
  const effectiveBaseUrl = buildBaseUrlWithSubdomain(baseUrl, locale, localeStrategy, defaultLocale);
668
- const parts = [];
669
- if (localePrefix) {
670
- parts.push(localePrefix);
671
- }
672
- if (routePrefix) {
673
- parts.push(routePrefix);
674
- }
675
- parts.push(item.slug);
676
- const fullPath = joinUrlParts(...parts);
677
798
  return normalizeUrl({
678
799
  baseUrl: effectiveBaseUrl,
679
- path: fullPath,
800
+ path: resolvedPath,
680
801
  trailingSlash,
681
802
  stripQuery: true,
682
803
  stripHash: true
683
804
  });
684
805
  }
685
- function createCanonicalUrlsFromManifest(options) {
686
- const { items } = options;
687
- if (!items || items.length === 0) {
688
- return [];
806
+ function inferRouteStyleFromLocaleStrategy(strategy) {
807
+ if (strategy !== "prefix") {
808
+ return "custom";
809
+ }
810
+ return "prefix";
811
+ }
812
+ function buildPathFromRouteStyle(args) {
813
+ const { routeStyle, sectionPath, slug, locale, defaultLocale } = args;
814
+ const includeLocale = locale !== defaultLocale;
815
+ if (routeStyle === "locale-segment") {
816
+ return joinUrlParts(sectionPath, locale, slug);
817
+ }
818
+ if (routeStyle === "suffix") {
819
+ if (includeLocale) {
820
+ return joinUrlParts(sectionPath, slug, locale);
821
+ }
822
+ return joinUrlParts(sectionPath, slug);
823
+ }
824
+ if (routeStyle === "custom") {
825
+ return joinUrlParts(sectionPath, slug);
826
+ }
827
+ if (includeLocale) {
828
+ return joinUrlParts(locale, sectionPath, slug);
829
+ }
830
+ return joinUrlParts(sectionPath, slug);
831
+ }
832
+ function normalizeSectionPath(sectionPath) {
833
+ if (!sectionPath || sectionPath === "/") {
834
+ return "";
835
+ }
836
+ return sectionPath.startsWith("/") ? sectionPath : `/${sectionPath}`;
837
+ }
838
+ function normalizeItemSlug(slug, sectionPath) {
839
+ const normalizedSlug = slug.startsWith("/") ? slug : `/${slug}`;
840
+ if (!sectionPath) {
841
+ return normalizedSlug;
842
+ }
843
+ if (normalizedSlug === sectionPath) {
844
+ return "/";
845
+ }
846
+ const withSlash = `${sectionPath}/`;
847
+ if (normalizedSlug.startsWith(withSlash)) {
848
+ const relative = normalizedSlug.slice(withSlash.length);
849
+ return relative ? `/${relative}` : "/";
689
850
  }
690
- const urls = items.map((item) => createCanonicalUrlForItem(item, options));
691
- const deduped = dedupeUrls(urls);
692
- return sortUrlsByPath(deduped);
851
+ return normalizedSlug;
693
852
  }
694
853
  var init_canonical_from_manifest = __esm({
695
854
  "src/core/canonical/canonical-from-manifest.ts"() {
@@ -699,6 +858,96 @@ var init_canonical_from_manifest = __esm({
699
858
  }
700
859
  });
701
860
 
861
+ // src/core/canonical/config-manifests.ts
862
+ function resolveManifestSections(config) {
863
+ const sections = [];
864
+ const manifestEntries = Object.entries(config.manifests).sort(
865
+ (a, b) => a[0].localeCompare(b[0], "en", { sensitivity: "base", numeric: true })
866
+ );
867
+ for (const [key, rawValue] of manifestEntries) {
868
+ const value = rawValue;
869
+ const resolved = resolveOneManifestSection(key, value);
870
+ sections.push(resolved);
871
+ }
872
+ return sections;
873
+ }
874
+ function createCanonicalBundleFromConfig(config) {
875
+ const sections = resolveManifestSections(config);
876
+ const entries = [];
877
+ const defaultLocale = config.site.defaultLocale ?? config.brand.locales[0] ?? "en";
878
+ const trailingSlash = config.format?.trailingSlash ?? "never";
879
+ const localeStrategy = config.format?.localeStrategy ?? "prefix";
880
+ for (const section of sections) {
881
+ const sectionDefaultLocale = section.defaultLocaleOverride ?? defaultLocale;
882
+ const sortedItems = sortBy(section.items, (item) => {
883
+ const localesKey = (item.locales ?? []).join(",");
884
+ return `${item.slug}|${localesKey}`;
885
+ });
886
+ for (const item of sortedItems) {
887
+ const itemOptions = {
888
+ baseUrl: config.site.baseUrl,
889
+ sectionName: section.sectionName,
890
+ defaultLocale: sectionDefaultLocale,
891
+ trailingSlash,
892
+ localeStrategy,
893
+ ...section.sectionPath && {
894
+ routePrefix: section.sectionPath,
895
+ sectionPath: section.sectionPath
896
+ },
897
+ ...section.routeStyle && { routeStyle: section.routeStyle },
898
+ ...section.pathnameFor && { pathnameFor: section.pathnameFor }
899
+ };
900
+ const canonicalUrl = createCanonicalUrlForItem(item, itemOptions);
901
+ entries.push({
902
+ sectionKey: section.key,
903
+ sectionName: section.sectionName,
904
+ item,
905
+ canonicalUrl
906
+ });
907
+ }
908
+ }
909
+ const sortedEntries = sortBy(entries, (entry) => {
910
+ return `${entry.canonicalUrl}|${entry.sectionKey}|${entry.item.slug}`;
911
+ });
912
+ const canonicalUrls = sortUrlsByPath(dedupeUrls(sortedEntries.map((entry) => entry.canonicalUrl)));
913
+ const manifestItems = sortedEntries.map((entry) => entry.item);
914
+ return {
915
+ canonicalUrls,
916
+ manifestItems,
917
+ entries: sortedEntries
918
+ };
919
+ }
920
+ function resolveOneManifestSection(key, value) {
921
+ if (Array.isArray(value)) {
922
+ return {
923
+ key,
924
+ sectionName: key,
925
+ items: normalizeManifestItems(value)
926
+ };
927
+ }
928
+ return {
929
+ key,
930
+ sectionName: value.sectionName ?? key,
931
+ items: normalizeManifestItems(value.items),
932
+ ...value.sectionPath && { sectionPath: value.sectionPath },
933
+ ...value.routeStyle && { routeStyle: value.routeStyle },
934
+ ...value.defaultLocaleOverride && { defaultLocaleOverride: value.defaultLocaleOverride },
935
+ ...value.pathnameFor && { pathnameFor: value.pathnameFor }
936
+ };
937
+ }
938
+ function normalizeManifestItems(items) {
939
+ return items.map((item) => {
940
+ const normalizedSlug = item.slug.startsWith("/") ? item.slug : `/${item.slug}`;
941
+ return { ...item, slug: normalizedSlug };
942
+ });
943
+ }
944
+ var init_config_manifests = __esm({
945
+ "src/core/canonical/config-manifests.ts"() {
946
+ init_canonical_from_manifest();
947
+ init_sort();
948
+ }
949
+ });
950
+
702
951
  // src/core/generate/llms-txt.ts
703
952
  function createLlmsTxt(options) {
704
953
  const { config, canonicalUrls } = options;
@@ -839,7 +1088,7 @@ var init_llms_txt = __esm({
839
1088
 
840
1089
  // src/core/generate/llms-full-txt.ts
841
1090
  function createLlmsFullTxt(options) {
842
- const { config, canonicalUrls, manifestItems } = options;
1091
+ const { config, canonicalUrls, manifestItems, manifestEntries } = options;
843
1092
  const lineEndings = config.format?.lineEndings ?? "lf";
844
1093
  const lines = [];
845
1094
  lines.push(`# ${config.brand.name} - Full LLM Context`);
@@ -856,6 +1105,10 @@ function createLlmsFullTxt(options) {
856
1105
  lines.push(`Organization: ${config.brand.org}`);
857
1106
  }
858
1107
  lines.push(`Locales: ${config.brand.locales.join(", ")}`);
1108
+ const lastUpdated = getLastUpdatedDate(manifestItems);
1109
+ if (lastUpdated) {
1110
+ lines.push(`Last Updated: ${lastUpdated}`);
1111
+ }
859
1112
  lines.push("");
860
1113
  if (canonicalUrls.length > 0) {
861
1114
  lines.push("## All Canonical URLs");
@@ -893,11 +1146,16 @@ function createLlmsFullTxt(options) {
893
1146
  lines.push("");
894
1147
  }
895
1148
  }
896
- const hasSocial = config.contact?.social?.twitter || config.contact?.social?.linkedin || config.contact?.social?.github;
897
- const hasBooking = config.booking?.url;
898
- if (hasSocial || hasBooking) {
899
- lines.push("## Social & Booking");
1149
+ const hasContact = config.contact?.email || config.contact?.phone || config.contact?.social?.twitter || config.contact?.social?.linkedin || config.contact?.social?.github || config.booking?.url;
1150
+ if (hasContact) {
1151
+ lines.push("## Contact");
900
1152
  lines.push("");
1153
+ if (config.contact?.email) {
1154
+ lines.push(`- Email: ${config.contact.email}`);
1155
+ }
1156
+ if (config.contact?.phone) {
1157
+ lines.push(`- Phone: ${config.contact.phone}`);
1158
+ }
901
1159
  if (config.contact?.social?.twitter) {
902
1160
  lines.push(`- Twitter: ${config.contact.social.twitter}`);
903
1161
  }
@@ -941,7 +1199,16 @@ function createLlmsFullTxt(options) {
941
1199
  lines.push(`- [${hub}](${hub}) - ${getHubLabel2(hub)}`);
942
1200
  }
943
1201
  }
944
- if (manifestItems.length > 0) {
1202
+ if (manifestEntries && manifestEntries.length > 0) {
1203
+ const sortedEntries = sortBy(manifestEntries, (entry) => {
1204
+ return `${entry.item.slug}|${entry.canonicalUrl}`;
1205
+ });
1206
+ for (const entry of sortedEntries) {
1207
+ const title = entry.item.title ?? entry.item.slug;
1208
+ const locales = entry.item.locales?.join(", ") ?? config.brand.locales[0] ?? "en";
1209
+ lines.push(`- [${title}](${entry.canonicalUrl}) (${locales})`);
1210
+ }
1211
+ } else if (manifestItems.length > 0) {
945
1212
  const sortedItems = sortBy(manifestItems, (item) => item.slug);
946
1213
  for (const item of sortedItems) {
947
1214
  const url = item.canonicalOverride ?? `${config.site.baseUrl}${item.slug}`;
@@ -962,6 +1229,24 @@ function createLlmsFullTxt(options) {
962
1229
  lineCount: finalLines.length
963
1230
  };
964
1231
  }
1232
+ function getLastUpdatedDate(items) {
1233
+ const timestamps = [];
1234
+ for (const item of items) {
1235
+ if (item.updatedAt) {
1236
+ timestamps.push(item.updatedAt);
1237
+ } else if (item.publishedAt) {
1238
+ timestamps.push(item.publishedAt);
1239
+ }
1240
+ }
1241
+ if (timestamps.length === 0) {
1242
+ return null;
1243
+ }
1244
+ const latest = timestamps.map((value) => new Date(value)).filter((value) => !Number.isNaN(value.getTime())).sort((a, b) => b.getTime() - a.getTime())[0];
1245
+ if (!latest) {
1246
+ return null;
1247
+ }
1248
+ return latest.toISOString().slice(0, 10);
1249
+ }
965
1250
  function getHubLabel2(hub) {
966
1251
  const labels = {
967
1252
  "/services": "Services overview",
@@ -995,14 +1280,20 @@ var init_llms_full_txt = __esm({
995
1280
 
996
1281
  // src/core/generate/citations.ts
997
1282
  function createCitationsJson(options) {
998
- const { config, manifestItems, sectionName, fixedTimestamp } = options;
999
- const sources = manifestItems.map((item) => {
1000
- const url = item.canonicalOverride ?? `${config.site.baseUrl}${item.slug}`;
1283
+ const { config, manifestItems, sectionName, fixedTimestamp, entries } = options;
1284
+ const sourceInput = entries ?? manifestItems.map((item) => ({
1285
+ item,
1286
+ canonicalUrl: item.canonicalOverride ?? `${config.site.baseUrl}${item.slug}`,
1287
+ sectionName
1288
+ }));
1289
+ const sources = sourceInput.map((entry) => {
1290
+ const url = entry.canonicalUrl;
1291
+ const item = entry.item;
1001
1292
  const defaultLocale = config.site.defaultLocale ?? config.brand.locales[0] ?? "en";
1002
1293
  return {
1003
1294
  url,
1004
1295
  priority: item.priority ?? 50,
1005
- section: sectionName,
1296
+ section: entry.sectionName,
1006
1297
  locale: item.locales?.[0] ?? defaultLocale,
1007
1298
  ...item.publishedAt && { publishedAt: item.publishedAt },
1008
1299
  ...item.updatedAt && { updatedAt: item.updatedAt },
@@ -1593,6 +1884,7 @@ var init_core = __esm({
1593
1884
  init_sort();
1594
1885
  init_text();
1595
1886
  init_canonical_from_manifest();
1887
+ init_config_manifests();
1596
1888
  init_locale();
1597
1889
  init_llms_txt();
1598
1890
  init_llms_full_txt();
@@ -1636,32 +1928,27 @@ async function generateCommand(options) {
1636
1928
  printVerbose(`Site: ${config.site.baseUrl}`);
1637
1929
  printVerbose(`Brand: ${config.brand.name}`);
1638
1930
  }
1639
- const manifestItems = extractManifestItems(config);
1931
+ const canonicalBundle = createCanonicalBundleFromConfig(config);
1932
+ const manifestItems = canonicalBundle.manifestItems;
1933
+ const canonicalUrls = canonicalBundle.canonicalUrls;
1640
1934
  if (verbose) {
1641
1935
  printVerbose(`Found ${manifestItems.length} manifest items`);
1642
- }
1643
- const canonicalUrls = createCanonicalUrlsFromManifest({
1644
- items: manifestItems,
1645
- baseUrl: config.site.baseUrl,
1646
- defaultLocale: config.site.defaultLocale ?? config.brand.locales[0] ?? "en",
1647
- trailingSlash: config.format?.trailingSlash ?? "never",
1648
- localeStrategy: "prefix"
1649
- });
1650
- if (verbose) {
1651
1936
  printVerbose(`Generated ${canonicalUrls.length} canonical URLs`);
1652
1937
  }
1653
1938
  const llmsTxtResult = createLlmsTxt({ config, canonicalUrls });
1654
1939
  const llmsFullTxtResult = createLlmsFullTxt({
1655
1940
  config,
1656
1941
  canonicalUrls,
1657
- manifestItems
1942
+ manifestItems,
1943
+ manifestEntries: canonicalBundle.entries
1658
1944
  });
1659
1945
  let citationsContent = null;
1660
1946
  if (emitCitations) {
1661
1947
  citationsContent = createCitationsJsonString({
1662
1948
  config,
1663
1949
  manifestItems,
1664
- sectionName: "all"
1950
+ sectionName: "all",
1951
+ entries: canonicalBundle.entries
1665
1952
  });
1666
1953
  }
1667
1954
  const result = {
@@ -1724,70 +2011,6 @@ async function generateCommand(options) {
1724
2011
  return ExitCodes.GENERATION_FAILED;
1725
2012
  }
1726
2013
  }
1727
- function extractManifestItems(config) {
1728
- const items = [];
1729
- const manifests = config.manifests;
1730
- for (const [_manifestName, manifestData] of Object.entries(manifests)) {
1731
- if (typeof manifestData === "object" && manifestData !== null) {
1732
- const data = manifestData;
1733
- if (Array.isArray(data.pages)) {
1734
- for (const page of data.pages) {
1735
- const normalized = toManifestItem(page);
1736
- if (normalized) {
1737
- items.push(normalized);
1738
- }
1739
- }
1740
- }
1741
- if (Array.isArray(data)) {
1742
- for (const item of data) {
1743
- const normalized = toManifestItem(item);
1744
- if (normalized) {
1745
- items.push(normalized);
1746
- }
1747
- }
1748
- }
1749
- }
1750
- }
1751
- return items;
1752
- }
1753
- function toManifestItem(value) {
1754
- if (typeof value !== "object" || value === null) {
1755
- return null;
1756
- }
1757
- const data = value;
1758
- const rawSlug = data.slug ?? data.path;
1759
- if (typeof rawSlug !== "string" || rawSlug.length === 0) {
1760
- return null;
1761
- }
1762
- const item = {
1763
- slug: rawSlug.startsWith("/") ? rawSlug : `/${rawSlug}`
1764
- };
1765
- if (typeof data.title === "string") {
1766
- item.title = data.title;
1767
- }
1768
- if (typeof data.description === "string") {
1769
- item.description = data.description;
1770
- }
1771
- if (Array.isArray(data.locales)) {
1772
- const locales = data.locales.filter((loc) => typeof loc === "string");
1773
- if (locales.length > 0) {
1774
- item.locales = locales;
1775
- }
1776
- }
1777
- if (typeof data.canonicalOverride === "string") {
1778
- item.canonicalOverride = data.canonicalOverride;
1779
- }
1780
- if (typeof data.publishedAt === "string") {
1781
- item.publishedAt = data.publishedAt;
1782
- }
1783
- if (typeof data.updatedAt === "string") {
1784
- item.updatedAt = data.updatedAt;
1785
- }
1786
- if (typeof data.priority === "number") {
1787
- item.priority = data.priority;
1788
- }
1789
- return item;
1790
- }
1791
2014
  var init_generate = __esm({
1792
2015
  "src/cli/commands/generate.ts"() {
1793
2016
  init_load_config();
@@ -1804,7 +2027,7 @@ __export(check_exports, {
1804
2027
  checkCommand: () => checkCommand
1805
2028
  });
1806
2029
  async function checkCommand(options) {
1807
- const { config: configPath, failOn, verbose } = options;
2030
+ const { config: configPath, failOn, checkMachineHintsLive, verbose } = options;
1808
2031
  try {
1809
2032
  if (verbose) {
1810
2033
  printVerbose(`Loading config from: ${configPath}`);
@@ -1841,19 +2064,15 @@ async function checkCommand(options) {
1841
2064
  ...config.output.paths.citations && { citationsPath: config.output.paths.citations }
1842
2065
  };
1843
2066
  const result = await checkGeneratedFiles(checkOptions);
1844
- const manifestItems = extractManifestItems2(config);
1845
- const canonicalUrls = createCanonicalUrlsFromManifest({
1846
- items: manifestItems,
1847
- baseUrl: config.site.baseUrl,
1848
- defaultLocale: config.site.defaultLocale ?? config.brand.locales[0] ?? "en",
1849
- trailingSlash: config.format?.trailingSlash ?? "never",
1850
- localeStrategy: "prefix"
1851
- });
2067
+ const canonicalBundle = createCanonicalBundleFromConfig(config);
2068
+ const manifestItems = canonicalBundle.manifestItems;
2069
+ const canonicalUrls = canonicalBundle.canonicalUrls;
1852
2070
  const expectedLlms = createLlmsTxt({ config, canonicalUrls });
1853
2071
  const expectedLlmsFull = createLlmsFullTxt({
1854
2072
  config,
1855
2073
  canonicalUrls,
1856
- manifestItems
2074
+ manifestItems,
2075
+ manifestEntries: canonicalBundle.entries
1857
2076
  });
1858
2077
  const requiredMissing = result.issues.some((issue) => {
1859
2078
  return issue.code === "file_missing" && (issue.path === config.output.paths.llmsTxt || issue.path === config.output.paths.llmsFullTxt);
@@ -1883,6 +2102,23 @@ async function checkCommand(options) {
1883
2102
  };
1884
2103
  }
1885
2104
  }
2105
+ if (checkMachineHintsLive) {
2106
+ const liveIssues = await checkMachineHintsLiveEndpoints(config, verbose);
2107
+ if (liveIssues.length > 0) {
2108
+ const issues = [...merged.issues, ...liveIssues];
2109
+ const counts = countSeverities(issues);
2110
+ merged = {
2111
+ ...merged,
2112
+ issues,
2113
+ summary: {
2114
+ ...merged.summary,
2115
+ errors: counts.error,
2116
+ warnings: counts.warning,
2117
+ info: counts.info
2118
+ }
2119
+ };
2120
+ }
2121
+ }
1886
2122
  printCheckReport(merged, verbose);
1887
2123
  if (merged.summary.errors > 0) {
1888
2124
  return ExitCodes.ERROR;
@@ -1897,69 +2133,43 @@ async function checkCommand(options) {
1897
2133
  return ExitCodes.ERROR;
1898
2134
  }
1899
2135
  }
1900
- function extractManifestItems2(config) {
1901
- const items = [];
1902
- const manifests = config.manifests;
1903
- for (const [_manifestName, manifestData] of Object.entries(manifests)) {
1904
- if (typeof manifestData === "object" && manifestData !== null) {
1905
- const data = manifestData;
1906
- if (Array.isArray(data.pages)) {
1907
- for (const page of data.pages) {
1908
- const normalized = toManifestItem2(page);
1909
- if (normalized) {
1910
- items.push(normalized);
1911
- }
1912
- }
2136
+ async function checkMachineHintsLiveEndpoints(config, verbose) {
2137
+ const baseUrl = config.site.baseUrl.replace(/\/+$/, "");
2138
+ const urls = [
2139
+ config.machineHints?.robots ?? `${baseUrl}/robots.txt`,
2140
+ config.machineHints?.sitemap ?? `${baseUrl}/sitemap.xml`,
2141
+ config.machineHints?.llmsTxt ?? `${baseUrl}/llms.txt`,
2142
+ config.machineHints?.llmsFullTxt ?? `${baseUrl}/llms-full.txt`
2143
+ ];
2144
+ const issues = [];
2145
+ for (const url of urls) {
2146
+ try {
2147
+ if (verbose) {
2148
+ printVerbose(`Live-checking ${url}`);
1913
2149
  }
1914
- if (Array.isArray(data)) {
1915
- for (const item of data) {
1916
- const normalized = toManifestItem2(item);
1917
- if (normalized) {
1918
- items.push(normalized);
1919
- }
1920
- }
2150
+ const response = await fetch(url, {
2151
+ method: "GET",
2152
+ signal: AbortSignal.timeout(1e4)
2153
+ });
2154
+ if (!response.ok) {
2155
+ issues.push({
2156
+ path: url,
2157
+ code: "invalid_url",
2158
+ message: `Live check failed with HTTP ${response.status}`,
2159
+ severity: "error"
2160
+ });
1921
2161
  }
2162
+ } catch (error) {
2163
+ const message = error instanceof Error ? error.message : String(error);
2164
+ issues.push({
2165
+ path: url,
2166
+ code: "invalid_url",
2167
+ message: `Live check request failed: ${message}`,
2168
+ severity: "error"
2169
+ });
1922
2170
  }
1923
2171
  }
1924
- return items;
1925
- }
1926
- function toManifestItem2(value) {
1927
- if (typeof value !== "object" || value === null) {
1928
- return null;
1929
- }
1930
- const data = value;
1931
- const rawSlug = data.slug ?? data.path;
1932
- if (typeof rawSlug !== "string" || rawSlug.length === 0) {
1933
- return null;
1934
- }
1935
- const item = {
1936
- slug: rawSlug.startsWith("/") ? rawSlug : `/${rawSlug}`
1937
- };
1938
- if (typeof data.title === "string") {
1939
- item.title = data.title;
1940
- }
1941
- if (typeof data.description === "string") {
1942
- item.description = data.description;
1943
- }
1944
- if (Array.isArray(data.locales)) {
1945
- const locales = data.locales.filter((loc) => typeof loc === "string");
1946
- if (locales.length > 0) {
1947
- item.locales = locales;
1948
- }
1949
- }
1950
- if (typeof data.canonicalOverride === "string") {
1951
- item.canonicalOverride = data.canonicalOverride;
1952
- }
1953
- if (typeof data.publishedAt === "string") {
1954
- item.publishedAt = data.publishedAt;
1955
- }
1956
- if (typeof data.updatedAt === "string") {
1957
- item.updatedAt = data.updatedAt;
1958
- }
1959
- if (typeof data.priority === "number") {
1960
- item.priority = data.priority;
1961
- }
1962
- return item;
2172
+ return issues;
1963
2173
  }
1964
2174
  var init_check = __esm({
1965
2175
  "src/cli/commands/check.ts"() {
@@ -2184,7 +2394,7 @@ program.command("generate").description("Generate llms.txt and llms-full.txt fro
2184
2394
  });
2185
2395
  process.exit(exitCode);
2186
2396
  });
2187
- program.command("check").description("Validate generated llms.txt files against configuration").option("-c, --config <path>", "Path to config file").option("--fail-on <level>", "Fail on warnings (warn) or only errors (error)", "error").option("-v, --verbose", "Show detailed output", false).action(async (options) => {
2397
+ program.command("check").description("Validate generated llms.txt files against configuration").option("-c, --config <path>", "Path to config file").option("--fail-on <level>", "Fail on warnings (warn) or only errors (error)", "error").option("--check-machine-hints-live", "Check machine hint URLs (robots/sitemap/llms) over HTTP", false).option("-v, --verbose", "Show detailed output", false).action(async (options) => {
2188
2398
  const { checkCommand: checkCommand2 } = await Promise.resolve().then(() => (init_check(), check_exports));
2189
2399
  const failOn = options.failOn;
2190
2400
  if (failOn !== "warn" && failOn !== "error") {
@@ -2194,6 +2404,7 @@ program.command("check").description("Validate generated llms.txt files against
2194
2404
  const exitCode = await checkCommand2({
2195
2405
  config: options.config ?? program.opts().config,
2196
2406
  failOn,
2407
+ checkMachineHintsLive: options.checkMachineHintsLive ?? false,
2197
2408
  verbose: options.verbose ?? program.opts().verbose ?? false
2198
2409
  });
2199
2410
  process.exit(exitCode);