@ijonis/geo-lint 0.1.5 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -179,7 +179,13 @@ var DEFAULT_CONFIG = {
179
179
  "Tabs",
180
180
  "Tab"
181
181
  ],
182
- enabledContentTypes: ["blog"]
182
+ enabledContentTypes: ["blog"],
183
+ organizationSameAs: [],
184
+ servicePagePatterns: []
185
+ },
186
+ technical: {
187
+ feedUrls: [],
188
+ llmsTxtUrl: ""
183
189
  },
184
190
  i18n: {
185
191
  locales: ["de", "en"],
@@ -251,12 +257,18 @@ function mergeWithDefaults(user) {
251
257
  acronymAllowlist: user.geo?.acronymAllowlist ?? DEFAULT_CONFIG.geo.acronymAllowlist,
252
258
  vagueHeadings: user.geo?.vagueHeadings ?? DEFAULT_CONFIG.geo.vagueHeadings,
253
259
  genericAuthorNames: user.geo?.genericAuthorNames ?? DEFAULT_CONFIG.geo.genericAuthorNames,
254
- allowedHtmlTags: user.geo?.allowedHtmlTags ?? DEFAULT_CONFIG.geo.allowedHtmlTags
260
+ allowedHtmlTags: user.geo?.allowedHtmlTags ?? DEFAULT_CONFIG.geo.allowedHtmlTags,
261
+ organizationSameAs: user.geo?.organizationSameAs ?? DEFAULT_CONFIG.geo.organizationSameAs,
262
+ servicePagePatterns: user.geo?.servicePagePatterns ?? DEFAULT_CONFIG.geo.servicePagePatterns
255
263
  },
256
264
  i18n: {
257
265
  locales: user.i18n?.locales ?? DEFAULT_CONFIG.i18n.locales,
258
266
  defaultLocale: user.i18n?.defaultLocale ?? DEFAULT_CONFIG.i18n.defaultLocale
259
267
  },
268
+ technical: {
269
+ feedUrls: user.technical?.feedUrls ?? DEFAULT_CONFIG.technical.feedUrls,
270
+ llmsTxtUrl: user.technical?.llmsTxtUrl ?? DEFAULT_CONFIG.technical.llmsTxtUrl
271
+ },
260
272
  rules: { ...DEFAULT_CONFIG.rules, ...user.rules ?? {} },
261
273
  thresholds: {
262
274
  title: { ...DEFAULT_CONFIG.thresholds.title, ...user.thresholds?.title ?? {} },
@@ -871,6 +883,68 @@ var duplicateRules = [
871
883
  duplicateDescription
872
884
  ];
873
885
 
886
+ // src/utils/plaintext-structure.ts
887
+ var MAX_HEADING_LENGTH = 80;
888
+ var MIN_TABLE_ROWS = 2;
889
+ function detectPlaintextHeadings(text) {
890
+ const lines = text.split("\n");
891
+ const headings = [];
892
+ for (let i = 0; i < lines.length; i++) {
893
+ const line = lines[i].trim();
894
+ if (!line || line.length > MAX_HEADING_LENGTH) continue;
895
+ const nextLine = lines[i + 1]?.trim() ?? "";
896
+ const isFollowedByBlank = i + 1 >= lines.length || nextLine === "";
897
+ if (!isFollowedByBlank) continue;
898
+ if (/[.,;:]$/.test(line)) continue;
899
+ const isTitleCase = /^[A-ZÄÖÜ]/.test(line) && line.split(/\s+/).length <= 12;
900
+ const isAllCaps = line === line.toUpperCase() && /[A-ZÄÖÜ]/.test(line) && line.length > 2;
901
+ const isQuestion = line.endsWith("?");
902
+ if (isTitleCase || isAllCaps || isQuestion) {
903
+ const level = isAllCaps || line.split(/\s+/).length <= 4 ? 2 : 3;
904
+ headings.push({ level, text: line, line: i + 1 });
905
+ }
906
+ }
907
+ return headings;
908
+ }
909
+ function detectPlaintextTable(text) {
910
+ const lines = text.split("\n").filter((l) => l.trim().length > 0);
911
+ const tabLines = lines.filter((l) => l.includes(" "));
912
+ if (tabLines.length >= MIN_TABLE_ROWS) {
913
+ const colCounts = tabLines.map((l) => l.split(" ").length);
914
+ const consistent = colCounts.every(
915
+ (c) => c === colCounts[0] && c >= 2
916
+ );
917
+ if (consistent) return true;
918
+ }
919
+ const spaceSeparated = lines.filter((l) => /\S {3,}\S/.test(l));
920
+ if (spaceSeparated.length >= MIN_TABLE_ROWS + 1) {
921
+ return true;
922
+ }
923
+ return false;
924
+ }
925
+ function detectPlaintextList(text) {
926
+ const listPattern = /^[\s]*[•·–—]\s+|^[\s]*\w\)\s+|^[\s]*\d+\)\s+/m;
927
+ const lines = text.split("\n").filter((l) => listPattern.test(l));
928
+ return lines.length >= 2;
929
+ }
930
+ function detectPlaintextFaq(text) {
931
+ const lines = text.split("\n");
932
+ let questionCount = 0;
933
+ for (let i = 0; i < lines.length; i++) {
934
+ const line = lines[i].trim();
935
+ if (!line.endsWith("?")) continue;
936
+ if (line.length > MAX_HEADING_LENGTH) continue;
937
+ const nextContent = lines.slice(i + 1).find((l) => l.trim().length > 0);
938
+ if (nextContent && nextContent.trim().length > line.length) {
939
+ questionCount++;
940
+ }
941
+ }
942
+ return {
943
+ hasFaq: questionCount >= 2,
944
+ questionCount
945
+ };
946
+ }
947
+
874
948
  // src/utils/heading-extractor.ts
875
949
  function isInCodeBlock(lines, lineIndex) {
876
950
  let inCodeBlock = false;
@@ -882,7 +956,7 @@ function isInCodeBlock(lines, lineIndex) {
882
956
  }
883
957
  return inCodeBlock;
884
958
  }
885
- function extractHeadings(mdxBody) {
959
+ function extractHeadings(mdxBody, contentSource) {
886
960
  const headings = [];
887
961
  const lines = mdxBody.split("\n");
888
962
  const headingRegex = /^(#{1,6})\s+(.+)$/;
@@ -899,6 +973,9 @@ function extractHeadings(mdxBody) {
899
973
  });
900
974
  }
901
975
  }
976
+ if (headings.length === 0 && contentSource === "url") {
977
+ return detectPlaintextHeadings(mdxBody);
978
+ }
902
979
  return headings;
903
980
  }
904
981
  function countH1s(headings) {
@@ -930,6 +1007,9 @@ var missingH1 = {
930
1007
  category: "seo",
931
1008
  fixStrategy: "Add an H1 heading (# Heading) at the start of the content",
932
1009
  run: (item) => {
1010
+ if (item.contentSource === "url") {
1011
+ return [];
1012
+ }
933
1013
  if (item.contentType === "blog") {
934
1014
  return [];
935
1015
  }
@@ -1216,8 +1296,16 @@ function countWords(text) {
1216
1296
  }
1217
1297
  function countSentences(text) {
1218
1298
  const stripped = stripMarkdown(text);
1219
- const sentences = stripped.match(/[.!?]+(?:\s|$)/g);
1220
- return sentences ? sentences.length : 0;
1299
+ const sentenceEndings = stripped.match(/[.!?]+(?:\s|$|(?=[A-ZÄÖÜ]))/g);
1300
+ if (sentenceEndings && sentenceEndings.length > 0) {
1301
+ return sentenceEndings.length;
1302
+ }
1303
+ const lines = stripped.split(/\n+/).filter((l) => l.trim().length > 20);
1304
+ if (lines.length > 1) {
1305
+ return lines.length;
1306
+ }
1307
+ const hasWords = /\w{2,}/.test(stripped);
1308
+ return hasWords ? 1 : 0;
1221
1309
  }
1222
1310
 
1223
1311
  // src/utils/readability.ts
@@ -1499,6 +1587,7 @@ var robotsRules = [
1499
1587
  // src/rules/slug-rules.ts
1500
1588
  var SLUG_DEFAULTS = { maxLength: 75 };
1501
1589
  var SLUG_PATTERN = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
1590
+ var URL_PATH_PATTERN = /^[a-z0-9]+(?:[-/][a-z0-9]+)*$/;
1502
1591
  var slugInvalidCharacters = {
1503
1592
  name: "slug-invalid-characters",
1504
1593
  severity: "error",
@@ -1506,8 +1595,10 @@ var slugInvalidCharacters = {
1506
1595
  fixStrategy: 'Use lowercase alphanumeric characters with hyphens only (e.g., "my-blog-post")',
1507
1596
  run: (item) => {
1508
1597
  if (!item.slug) return [];
1598
+ const isUrl = item.contentSource === "url";
1599
+ const pattern = isUrl ? URL_PATH_PATTERN : SLUG_PATTERN;
1509
1600
  const hasUppercase = /[A-Z]/.test(item.slug);
1510
- const matchesPattern = SLUG_PATTERN.test(item.slug);
1601
+ const matchesPattern = pattern.test(item.slug);
1511
1602
  if (hasUppercase || !matchesPattern) {
1512
1603
  return [{
1513
1604
  file: getDisplayPath(item),
@@ -1515,7 +1606,7 @@ var slugInvalidCharacters = {
1515
1606
  rule: "slug-invalid-characters",
1516
1607
  severity: "error",
1517
1608
  message: `Slug "${item.slug}" contains invalid characters`,
1518
- suggestion: 'Slugs must be lowercase alphanumeric with hyphens only (e.g., "my-blog-post")'
1609
+ suggestion: isUrl ? "URL paths must be lowercase alphanumeric with hyphens and slashes only" : 'Slugs must be lowercase alphanumeric with hyphens only (e.g., "my-blog-post")'
1519
1610
  }];
1520
1611
  }
1521
1612
  return [];
@@ -1780,8 +1871,8 @@ var WEAK_LEAD_STARTS = [
1780
1871
  "schauen wir uns"
1781
1872
  ];
1782
1873
  var TABLE_SEPARATOR_PATTERN = /\|\s*:?-{2,}/;
1783
- function countQuestionHeadings(body) {
1784
- const headings = extractHeadings(body);
1874
+ function countQuestionHeadings(body, contentSource) {
1875
+ const headings = extractHeadings(body, contentSource);
1785
1876
  let count = 0;
1786
1877
  for (const heading of headings) {
1787
1878
  const text = heading.text.trim();
@@ -1843,12 +1934,20 @@ function countStatistics(body) {
1843
1934
  }
1844
1935
  return matches.size;
1845
1936
  }
1846
- function hasFAQSection(body) {
1937
+ function hasFAQSection(body, contentSource) {
1847
1938
  const faqPattern = /#{2,3}\s*(FAQ|Häufige Fragen|Frequently Asked|Fragen und Antworten)/i;
1848
- return faqPattern.test(body);
1939
+ if (faqPattern.test(body)) return true;
1940
+ if (contentSource === "url") {
1941
+ return detectPlaintextFaq(body).hasFaq;
1942
+ }
1943
+ return false;
1849
1944
  }
1850
- function hasMarkdownTable(body) {
1851
- return TABLE_SEPARATOR_PATTERN.test(body);
1945
+ function hasMarkdownTable(body, contentSource) {
1946
+ if (TABLE_SEPARATOR_PATTERN.test(body)) return true;
1947
+ if (contentSource === "url") {
1948
+ return detectPlaintextTable(body);
1949
+ }
1950
+ return false;
1852
1951
  }
1853
1952
  function countEntityMentions(body, entity) {
1854
1953
  const escapedEntity = entity.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
@@ -2010,7 +2109,7 @@ function getParagraphs(body) {
2010
2109
  }
2011
2110
  return paragraphs;
2012
2111
  }
2013
- function hasMarkdownList(body) {
2112
+ function hasMarkdownList(body, contentSource) {
2014
2113
  const lines = body.split("\n");
2015
2114
  let inCodeBlock = false;
2016
2115
  for (const line of lines) {
@@ -2023,6 +2122,9 @@ function hasMarkdownList(body) {
2023
2122
  if (/^[-*]\s+/.test(trimmed)) return true;
2024
2123
  if (/^\d+\.\s+/.test(trimmed)) return true;
2025
2124
  }
2125
+ if (contentSource === "url") {
2126
+ return detectPlaintextList(body);
2127
+ }
2026
2128
  return false;
2027
2129
  }
2028
2130
  function countInternalLinks(body) {
@@ -2607,12 +2709,72 @@ var datasetSchemaReadiness = {
2607
2709
  return results;
2608
2710
  }
2609
2711
  };
2610
- var schemaRules = [
2712
+ var MIN_SAMEAS_ENTRIES = 2;
2713
+ function createSchemaSameAsRule(organizationSameAs) {
2714
+ let hasFired = false;
2715
+ return {
2716
+ name: "seo-schema-sameas-incomplete",
2717
+ severity: "warning",
2718
+ category: "seo",
2719
+ fixStrategy: "Add social profiles (LinkedIn, GitHub, Twitter), Wikidata QID, and Crunchbase URL to Organization schema sameAs array",
2720
+ run: (_item) => {
2721
+ if (hasFired) return [];
2722
+ hasFired = true;
2723
+ if (!organizationSameAs || organizationSameAs.length === 0) return [];
2724
+ if (organizationSameAs.length < MIN_SAMEAS_ENTRIES) {
2725
+ return [
2726
+ {
2727
+ file: "_site",
2728
+ field: "schema",
2729
+ rule: "seo-schema-sameas-incomplete",
2730
+ severity: "warning",
2731
+ message: `Organization sameAs has ${organizationSameAs.length} entry \u2014 include at least ${MIN_SAMEAS_ENTRIES} for entity verification`,
2732
+ suggestion: "AI models use sameAs to verify entity identity. Include at least LinkedIn + one other profile (GitHub, Wikidata QID, Crunchbase)."
2733
+ }
2734
+ ];
2735
+ }
2736
+ return [];
2737
+ }
2738
+ };
2739
+ }
2740
+ function createServicePageSchemaRule(servicePagePatterns) {
2741
+ return {
2742
+ name: "seo-service-page-no-schema",
2743
+ severity: "warning",
2744
+ category: "seo",
2745
+ fixStrategy: "Add Service structured data (JSON-LD) to service pages with name, description, provider, and areaServed.",
2746
+ run: (item) => {
2747
+ if (!servicePagePatterns || servicePagePatterns.length === 0) return [];
2748
+ const matchesPattern = servicePagePatterns.some(
2749
+ (pattern) => item.permalink.includes(pattern)
2750
+ );
2751
+ if (!matchesPattern) return [];
2752
+ return [
2753
+ {
2754
+ file: getDisplayPath(item),
2755
+ field: "schema",
2756
+ rule: "seo-service-page-no-schema",
2757
+ severity: "warning",
2758
+ message: `Service page "${item.permalink}" should have Service structured data`,
2759
+ suggestion: 'Service pages need schema markup to appear in AI answers for "[service] provider in [city]" queries. Add Service JSON-LD with name, description, provider, and areaServed.'
2760
+ }
2761
+ ];
2762
+ }
2763
+ };
2764
+ }
2765
+ var schemaStaticRules = [
2611
2766
  blogMissingSchemaFields,
2612
2767
  faqpageSchemaReadiness,
2613
2768
  breadcrumblistSchemaReadiness,
2614
2769
  datasetSchemaReadiness
2615
2770
  ];
2771
+ function createSchemaRules(geo) {
2772
+ return [
2773
+ ...schemaStaticRules,
2774
+ createSchemaSameAsRule(geo.organizationSameAs),
2775
+ createServicePageSchemaRule(geo.servicePagePatterns)
2776
+ ];
2777
+ }
2616
2778
 
2617
2779
  // src/rules/keyword-coherence-rules.ts
2618
2780
  var MIN_SIGNIFICANT_WORDS = 2;
@@ -13694,8 +13856,27 @@ function jaccardSimilarity(a, b) {
13694
13856
  const union = a.size + b.size - intersection;
13695
13857
  return union > 0 ? intersection / union : 0;
13696
13858
  }
13859
+ var REFERENCE_PATTERNS = [
13860
+ /archived from the original on/gi,
13861
+ /retrieved (?:on )?\d/gi,
13862
+ /accessed (?:on )?\d/gi,
13863
+ /cite (?:web|book|journal|news)/gi,
13864
+ /\^\s*\[?\d+\]?/g,
13865
+ /isbn \d/gi,
13866
+ /doi:\s*\d/gi,
13867
+ /pmid:\s*\d/gi
13868
+ ];
13869
+ function stripReferenceBoilerplate(text) {
13870
+ let result = text;
13871
+ for (const pattern of REFERENCE_PATTERNS) {
13872
+ result = result.replace(pattern, "");
13873
+ }
13874
+ result = result.replace(/\n(?:references|sources|bibliography|einzelnachweise|weblinks)\n[\s\S]*$/i, "");
13875
+ return result;
13876
+ }
13697
13877
  function analyzeRepetition(body) {
13698
- const plain = stripMarkdown(body).toLowerCase();
13878
+ const cleaned = stripReferenceBoilerplate(body);
13879
+ const plain = stripMarkdown(cleaned).toLowerCase();
13699
13880
  const words = plain.replace(/[^\p{L}\p{N}\s]/gu, " ").split(/\s+/).filter((w) => w.length > 0);
13700
13881
  const fiveGrams = extractNgrams(words, 5);
13701
13882
  const phraseCounts = /* @__PURE__ */ new Map();
@@ -13704,7 +13885,7 @@ function analyzeRepetition(body) {
13704
13885
  }
13705
13886
  const repeatedPhrases = [...phraseCounts.entries()].filter(([, count]) => count >= 3).sort((a, b) => b[1] - a[1]);
13706
13887
  const topRepeatedPhrases = repeatedPhrases.slice(0, 5).map(([phrase, count]) => ({ phrase, count }));
13707
- const paragraphs = body.split(/\n\s*\n/).map((p) => p.trim()).filter((p) => p.length > 0 && !p.startsWith("#") && !p.startsWith("|"));
13888
+ const paragraphs = cleaned.split(/\n\s*\n/).map((p) => p.trim()).filter((p) => p.length > 0 && !p.startsWith("#") && !p.startsWith("|"));
13708
13889
  let totalSimilarity = 0;
13709
13890
  let pairCount = 0;
13710
13891
  for (let i = 0; i < paragraphs.length; i++) {
@@ -14010,10 +14191,10 @@ var geoNoQuestionHeadings = {
14010
14191
  if (!geoTypes.includes(item.contentType)) return [];
14011
14192
  const wordCount = countWords(item.body);
14012
14193
  if (wordCount < GEO_MIN_WORDS) return [];
14013
- const headings = extractHeadings(item.body);
14194
+ const headings = extractHeadings(item.body, item.contentSource);
14014
14195
  const subHeadings = headings.filter((h) => h.level === 2 || h.level === 3);
14015
14196
  if (subHeadings.length === 0) return [];
14016
- const questionCount = countQuestionHeadings(item.body);
14197
+ const questionCount = countQuestionHeadings(item.body, item.contentSource);
14017
14198
  const ratio = questionCount / subHeadings.length;
14018
14199
  if (ratio < QUESTION_HEADING_THRESHOLD) {
14019
14200
  return [{
@@ -14103,7 +14284,7 @@ var geoMissingFaqSection = {
14103
14284
  if (!geoTypes.includes(item.contentType)) return [];
14104
14285
  const wordCount = countWords(item.body);
14105
14286
  if (wordCount < FAQ_MIN_WORDS) return [];
14106
- if (!hasFAQSection(item.body)) {
14287
+ if (!hasFAQSection(item.body, item.contentSource)) {
14107
14288
  return [{
14108
14289
  file: getDisplayPath(item),
14109
14290
  field: "body",
@@ -14148,7 +14329,7 @@ var geoMissingTable = {
14148
14329
  if (!geoTypes.includes(item.contentType)) return [];
14149
14330
  const wordCount = countWords(item.body);
14150
14331
  if (wordCount < TABLE_MIN_WORDS) return [];
14151
- if (!hasMarkdownTable(item.body)) {
14332
+ if (!hasMarkdownTable(item.body, item.contentSource)) {
14152
14333
  return [{
14153
14334
  file: getDisplayPath(item),
14154
14335
  field: "body",
@@ -14644,6 +14825,58 @@ var geoMissingTldr = {
14644
14825
  return [];
14645
14826
  }
14646
14827
  };
14828
+ var ORG_AUTHOR_PATTERNS = [
14829
+ /\bteam\b/i,
14830
+ /\bredaktion\b/i,
14831
+ /\beditorial\b/i,
14832
+ /\beditors?\b/i,
14833
+ /\bherausgeber\b/i,
14834
+ /\bverlag\b/i,
14835
+ /\bredaktionsteam\b/i
14836
+ ];
14837
+ function createGeoAuthorNotPersonRule(brandName) {
14838
+ return {
14839
+ name: "geo-author-not-person",
14840
+ severity: "warning",
14841
+ category: "geo",
14842
+ fixStrategy: "Replace organization name with individual author name. Use Person type in BlogPosting schema for stronger E-E-A-T signals.",
14843
+ run: (item, context) => {
14844
+ const geoTypes = context.geoEnabledContentTypes ?? ["blog"];
14845
+ if (!geoTypes.includes(item.contentType)) return [];
14846
+ if (!item.author || item.author.trim() === "") return [];
14847
+ if (!brandName || brandName.trim() === "") return [];
14848
+ const normalizedAuthor = item.author.trim().toLowerCase();
14849
+ if (normalizedAuthor === brandName.trim().toLowerCase()) {
14850
+ return [
14851
+ {
14852
+ file: getDisplayPath(item),
14853
+ field: "author",
14854
+ rule: "geo-author-not-person",
14855
+ severity: "warning",
14856
+ message: `Author "${item.author}" is the organization name \u2014 use a person's name instead`,
14857
+ suggestion: "AI models cite named experts over faceless organizations. Use the actual author's name for stronger E-E-A-T signals."
14858
+ }
14859
+ ];
14860
+ }
14861
+ const matchesOrgPattern = ORG_AUTHOR_PATTERNS.some(
14862
+ (pattern) => pattern.test(item.author)
14863
+ );
14864
+ if (matchesOrgPattern) {
14865
+ return [
14866
+ {
14867
+ file: getDisplayPath(item),
14868
+ field: "author",
14869
+ rule: "geo-author-not-person",
14870
+ severity: "warning",
14871
+ message: `Author "${item.author}" appears to be an organization or team name`,
14872
+ suggestion: "BlogPosting with author.@type: Person gets cited more than Organization. Use an individual person's name."
14873
+ }
14874
+ ];
14875
+ }
14876
+ return [];
14877
+ }
14878
+ };
14879
+ }
14647
14880
  var geoEeatStaticRules = [
14648
14881
  geoMissingSourceCitations,
14649
14882
  geoMissingExpertQuotes,
@@ -14656,7 +14889,8 @@ function createGeoEeatRules(geo) {
14656
14889
  return [
14657
14890
  ...geoEeatStaticRules,
14658
14891
  createGeoMissingAuthorRule(geo.genericAuthorNames ?? []),
14659
- createGeoHeadingTooVagueRule(geo.vagueHeadings ?? [])
14892
+ createGeoHeadingTooVagueRule(geo.vagueHeadings ?? []),
14893
+ createGeoAuthorNotPersonRule(geo.brandName)
14660
14894
  ];
14661
14895
  }
14662
14896
 
@@ -14743,7 +14977,7 @@ var geoMissingLists = {
14743
14977
  if (!geoTypes.includes(item.contentType)) return [];
14744
14978
  const wordCount = countWords(item.body);
14745
14979
  if (wordCount < STRUCTURE_MIN_WORDS) return [];
14746
- if (!hasMarkdownList(item.body)) {
14980
+ if (!hasMarkdownList(item.body, item.contentSource)) {
14747
14981
  return [{
14748
14982
  file: getDisplayPath(item),
14749
14983
  field: "body",
@@ -15645,6 +15879,68 @@ var contentQualityRules = [
15645
15879
  sentenceVariety
15646
15880
  ];
15647
15881
 
15882
+ // src/rules/technical-site-rules.ts
15883
+ function createNoFeedRule(feedUrls) {
15884
+ let hasFired = false;
15885
+ return {
15886
+ name: "technical-no-feed",
15887
+ severity: "warning",
15888
+ category: "technical",
15889
+ fixStrategy: "Add an RSS or JSON feed endpoint exposing blog posts with full content.",
15890
+ run: (_item, _context) => {
15891
+ if (hasFired) return [];
15892
+ hasFired = true;
15893
+ if (feedUrls === void 0) return [];
15894
+ if (feedUrls.length === 0) {
15895
+ return [
15896
+ {
15897
+ file: "_site",
15898
+ field: "feed",
15899
+ rule: "technical-no-feed",
15900
+ severity: "warning",
15901
+ message: "No RSS/Atom/JSON feed detected \u2014 AI systems lose a structured ingestion path",
15902
+ suggestion: "Feeds provide a structured ingestion path for AI systems beyond crawler discovery. Add an RSS or JSON feed endpoint."
15903
+ }
15904
+ ];
15905
+ }
15906
+ return [];
15907
+ }
15908
+ };
15909
+ }
15910
+ function createNoLlmsTxtRule(llmsTxtUrl) {
15911
+ let hasFired = false;
15912
+ return {
15913
+ name: "technical-no-llms-txt",
15914
+ severity: "warning",
15915
+ category: "technical",
15916
+ fixStrategy: "Create a /llms.txt endpoint that maps your most important content for LLM consumption in Markdown format.",
15917
+ run: (_item, _context) => {
15918
+ if (hasFired) return [];
15919
+ hasFired = true;
15920
+ if (llmsTxtUrl === void 0) return [];
15921
+ if (llmsTxtUrl.trim() === "") {
15922
+ return [
15923
+ {
15924
+ file: "_site",
15925
+ field: "llms-txt",
15926
+ rule: "technical-no-llms-txt",
15927
+ severity: "warning",
15928
+ message: "No /llms.txt endpoint detected \u2014 missing the emerging standard for LLM content declaration",
15929
+ suggestion: "llms.txt is the robots.txt equivalent for AI \u2014 trivial to add, future-proofs your site for LLM crawlers."
15930
+ }
15931
+ ];
15932
+ }
15933
+ return [];
15934
+ }
15935
+ };
15936
+ }
15937
+ function createTechnicalSiteRules(technical) {
15938
+ return [
15939
+ createNoFeedRule(technical.feedUrls),
15940
+ createNoLlmsTxtRule(technical.llmsTxtUrl)
15941
+ ];
15942
+ }
15943
+
15648
15944
  // src/rules/index.ts
15649
15945
  function buildRules(config, linkExtractor) {
15650
15946
  const rules = [
@@ -15664,7 +15960,7 @@ function buildRules(config, linkExtractor) {
15664
15960
  ...createI18nRules(config.i18n),
15665
15961
  ...dateRules,
15666
15962
  ...config.categories.length > 0 ? createCategoryRules(config.categories) : [],
15667
- ...schemaRules,
15963
+ ...createSchemaRules(config.geo),
15668
15964
  ...createGeoRules(config.geo),
15669
15965
  ...createGeoEeatRules(config.geo),
15670
15966
  ...geoStructureRules,
@@ -15672,7 +15968,8 @@ function buildRules(config, linkExtractor) {
15672
15968
  ...createGeoRagRules(config.geo),
15673
15969
  ...keywordCoherenceRules,
15674
15970
  ...createCanonicalRules(config.siteUrl),
15675
- ...contentQualityRules
15971
+ ...contentQualityRules,
15972
+ ...createTechnicalSiteRules(config.technical)
15676
15973
  ];
15677
15974
  return rules.map((rule) => applyRuleOverride(rule, config.rules));
15678
15975
  }