npm - @ijonis/geo-lint - Versions diffs - 0.1.5 → 0.2.1 - Mend

@ijonis/geo-lint 0.1.5 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/cli.cjs CHANGED Viewed

@@ -179,7 +179,13 @@ var DEFAULT_CONFIG = {
       "Tabs",
       "Tab"
     ],
-    enabledContentTypes: ["blog"]
+    enabledContentTypes: ["blog"],
+    organizationSameAs: [],
+    servicePagePatterns: []
+  },
+  technical: {
+    feedUrls: [],
+    llmsTxtUrl: ""
   },
   i18n: {
     locales: ["de", "en"],
@@ -251,12 +257,18 @@ function mergeWithDefaults(user) {
       acronymAllowlist: user.geo?.acronymAllowlist ?? DEFAULT_CONFIG.geo.acronymAllowlist,
       vagueHeadings: user.geo?.vagueHeadings ?? DEFAULT_CONFIG.geo.vagueHeadings,
       genericAuthorNames: user.geo?.genericAuthorNames ?? DEFAULT_CONFIG.geo.genericAuthorNames,
-      allowedHtmlTags: user.geo?.allowedHtmlTags ?? DEFAULT_CONFIG.geo.allowedHtmlTags
+      allowedHtmlTags: user.geo?.allowedHtmlTags ?? DEFAULT_CONFIG.geo.allowedHtmlTags,
+      organizationSameAs: user.geo?.organizationSameAs ?? DEFAULT_CONFIG.geo.organizationSameAs,
+      servicePagePatterns: user.geo?.servicePagePatterns ?? DEFAULT_CONFIG.geo.servicePagePatterns
     },
     i18n: {
       locales: user.i18n?.locales ?? DEFAULT_CONFIG.i18n.locales,
       defaultLocale: user.i18n?.defaultLocale ?? DEFAULT_CONFIG.i18n.defaultLocale
     },
+    technical: {
+      feedUrls: user.technical?.feedUrls ?? DEFAULT_CONFIG.technical.feedUrls,
+      llmsTxtUrl: user.technical?.llmsTxtUrl ?? DEFAULT_CONFIG.technical.llmsTxtUrl
+    },
     rules: { ...DEFAULT_CONFIG.rules, ...user.rules ?? {} },
     thresholds: {
       title: { ...DEFAULT_CONFIG.thresholds.title, ...user.thresholds?.title ?? {} },
@@ -871,6 +883,68 @@ var duplicateRules = [
   duplicateDescription
 ];
+// src/utils/plaintext-structure.ts
+var MAX_HEADING_LENGTH = 80;
+var MIN_TABLE_ROWS = 2;
+function detectPlaintextHeadings(text) {
+  const lines = text.split("\n");
+  const headings = [];
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i].trim();
+    if (!line || line.length > MAX_HEADING_LENGTH) continue;
+    const nextLine = lines[i + 1]?.trim() ?? "";
+    const isFollowedByBlank = i + 1 >= lines.length || nextLine === "";
+    if (!isFollowedByBlank) continue;
+    if (/[.,;:]$/.test(line)) continue;
+    const isTitleCase = /^[A-ZÄÖÜ]/.test(line) && line.split(/\s+/).length <= 12;
+    const isAllCaps = line === line.toUpperCase() && /[A-ZÄÖÜ]/.test(line) && line.length > 2;
+    const isQuestion = line.endsWith("?");
+    if (isTitleCase || isAllCaps || isQuestion) {
+      const level = isAllCaps || line.split(/\s+/).length <= 4 ? 2 : 3;
+      headings.push({ level, text: line, line: i + 1 });
+    }
+  }
+  return headings;
+}
+function detectPlaintextTable(text) {
+  const lines = text.split("\n").filter((l) => l.trim().length > 0);
+  const tabLines = lines.filter((l) => l.includes("	"));
+  if (tabLines.length >= MIN_TABLE_ROWS) {
+    const colCounts = tabLines.map((l) => l.split("	").length);
+    const consistent = colCounts.every(
+      (c) => c === colCounts[0] && c >= 2
+    );
+    if (consistent) return true;
+  }
+  const spaceSeparated = lines.filter((l) => /\S {3,}\S/.test(l));
+  if (spaceSeparated.length >= MIN_TABLE_ROWS + 1) {
+    return true;
+  }
+  return false;
+}
+function detectPlaintextList(text) {
+  const listPattern = /^[\s]*[•·–—]\s+|^[\s]*\w\)\s+|^[\s]*\d+\)\s+/m;
+  const lines = text.split("\n").filter((l) => listPattern.test(l));
+  return lines.length >= 2;
+}
+function detectPlaintextFaq(text) {
+  const lines = text.split("\n");
+  let questionCount = 0;
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i].trim();
+    if (!line.endsWith("?")) continue;
+    if (line.length > MAX_HEADING_LENGTH) continue;
+    const nextContent = lines.slice(i + 1).find((l) => l.trim().length > 0);
+    if (nextContent && nextContent.trim().length > line.length) {
+      questionCount++;
+    }
+  }
+  return {
+    hasFaq: questionCount >= 2,
+    questionCount
+  };
+}
 // src/utils/heading-extractor.ts
 function isInCodeBlock(lines, lineIndex) {
   let inCodeBlock = false;
@@ -882,7 +956,7 @@ function isInCodeBlock(lines, lineIndex) {
   }
   return inCodeBlock;
 }
-function extractHeadings(mdxBody) {
+function extractHeadings(mdxBody, contentSource) {
   const headings = [];
   const lines = mdxBody.split("\n");
   const headingRegex = /^(#{1,6})\s+(.+)$/;
@@ -899,6 +973,9 @@ function extractHeadings(mdxBody) {
       });
     }
   }
+  if (headings.length === 0 && contentSource === "url") {
+    return detectPlaintextHeadings(mdxBody);
+  }
   return headings;
 }
 function countH1s(headings) {
@@ -930,6 +1007,9 @@ var missingH1 = {
   category: "seo",
   fixStrategy: "Add an H1 heading (# Heading) at the start of the content",
   run: (item) => {
+    if (item.contentSource === "url") {
+      return [];
+    }
     if (item.contentType === "blog") {
       return [];
     }
@@ -1216,8 +1296,16 @@ function countWords(text) {
 }
 function countSentences(text) {
   const stripped = stripMarkdown(text);
-  const sentences = stripped.match(/[.!?]+(?:\s|$)/g);
-  return sentences ? sentences.length : 0;
+  const sentenceEndings = stripped.match(/[.!?]+(?:\s|$|(?=[A-ZÄÖÜ]))/g);
+  if (sentenceEndings && sentenceEndings.length > 0) {
+    return sentenceEndings.length;
+  }
+  const lines = stripped.split(/\n+/).filter((l) => l.trim().length > 20);
+  if (lines.length > 1) {
+    return lines.length;
+  }
+  const hasWords = /\w{2,}/.test(stripped);
+  return hasWords ? 1 : 0;
 }
 // src/utils/readability.ts
@@ -1499,6 +1587,7 @@ var robotsRules = [
 // src/rules/slug-rules.ts
 var SLUG_DEFAULTS = { maxLength: 75 };
 var SLUG_PATTERN = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
+var URL_PATH_PATTERN = /^[a-z0-9]+(?:[-/][a-z0-9]+)*$/;
 var slugInvalidCharacters = {
   name: "slug-invalid-characters",
   severity: "error",
@@ -1506,8 +1595,10 @@ var slugInvalidCharacters = {
   fixStrategy: 'Use lowercase alphanumeric characters with hyphens only (e.g., "my-blog-post")',
   run: (item) => {
     if (!item.slug) return [];
+    const isUrl = item.contentSource === "url";
+    const pattern = isUrl ? URL_PATH_PATTERN : SLUG_PATTERN;
     const hasUppercase = /[A-Z]/.test(item.slug);
-    const matchesPattern = SLUG_PATTERN.test(item.slug);
+    const matchesPattern = pattern.test(item.slug);
     if (hasUppercase || !matchesPattern) {
       return [{
         file: getDisplayPath(item),
@@ -1515,7 +1606,7 @@ var slugInvalidCharacters = {
         rule: "slug-invalid-characters",
         severity: "error",
         message: `Slug "${item.slug}" contains invalid characters`,
-        suggestion: 'Slugs must be lowercase alphanumeric with hyphens only (e.g., "my-blog-post")'
+        suggestion: isUrl ? "URL paths must be lowercase alphanumeric with hyphens and slashes only" : 'Slugs must be lowercase alphanumeric with hyphens only (e.g., "my-blog-post")'
       }];
     }
     return [];
@@ -1780,8 +1871,8 @@ var WEAK_LEAD_STARTS = [
   "schauen wir uns"
 ];
 var TABLE_SEPARATOR_PATTERN = /\|\s*:?-{2,}/;
-function countQuestionHeadings(body) {
-  const headings = extractHeadings(body);
+function countQuestionHeadings(body, contentSource) {
+  const headings = extractHeadings(body, contentSource);
   let count = 0;
   for (const heading of headings) {
     const text = heading.text.trim();
@@ -1843,12 +1934,20 @@ function countStatistics(body) {
   }
   return matches.size;
 }
-function hasFAQSection(body) {
+function hasFAQSection(body, contentSource) {
   const faqPattern = /#{2,3}\s*(FAQ|Häufige Fragen|Frequently Asked|Fragen und Antworten)/i;
-  return faqPattern.test(body);
+  if (faqPattern.test(body)) return true;
+  if (contentSource === "url") {
+    return detectPlaintextFaq(body).hasFaq;
+  }
+  return false;
 }
-function hasMarkdownTable(body) {
-  return TABLE_SEPARATOR_PATTERN.test(body);
+function hasMarkdownTable(body, contentSource) {
+  if (TABLE_SEPARATOR_PATTERN.test(body)) return true;
+  if (contentSource === "url") {
+    return detectPlaintextTable(body);
+  }
+  return false;
 }
 function countEntityMentions(body, entity) {
   const escapedEntity = entity.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
@@ -2010,7 +2109,7 @@ function getParagraphs(body) {
   }
   return paragraphs;
 }
-function hasMarkdownList(body) {
+function hasMarkdownList(body, contentSource) {
   const lines = body.split("\n");
   let inCodeBlock = false;
   for (const line of lines) {
@@ -2023,6 +2122,9 @@ function hasMarkdownList(body) {
     if (/^[-*]\s+/.test(trimmed)) return true;
     if (/^\d+\.\s+/.test(trimmed)) return true;
   }
+  if (contentSource === "url") {
+    return detectPlaintextList(body);
+  }
   return false;
 }
 function countInternalLinks(body) {
@@ -2607,12 +2709,72 @@ var datasetSchemaReadiness = {
     return results;
   }
 };
-var schemaRules = [
+var MIN_SAMEAS_ENTRIES = 2;
+function createSchemaSameAsRule(organizationSameAs) {
+  let hasFired = false;
+  return {
+    name: "seo-schema-sameas-incomplete",
+    severity: "warning",
+    category: "seo",
+    fixStrategy: "Add social profiles (LinkedIn, GitHub, Twitter), Wikidata QID, and Crunchbase URL to Organization schema sameAs array",
+    run: (_item) => {
+      if (hasFired) return [];
+      hasFired = true;
+      if (!organizationSameAs || organizationSameAs.length === 0) return [];
+      if (organizationSameAs.length < MIN_SAMEAS_ENTRIES) {
+        return [
+          {
+            file: "_site",
+            field: "schema",
+            rule: "seo-schema-sameas-incomplete",
+            severity: "warning",
+            message: `Organization sameAs has ${organizationSameAs.length} entry \u2014 include at least ${MIN_SAMEAS_ENTRIES} for entity verification`,
+            suggestion: "AI models use sameAs to verify entity identity. Include at least LinkedIn + one other profile (GitHub, Wikidata QID, Crunchbase)."
+          }
+        ];
+      }
+      return [];
+    }
+  };
+}
+function createServicePageSchemaRule(servicePagePatterns) {
+  return {
+    name: "seo-service-page-no-schema",
+    severity: "warning",
+    category: "seo",
+    fixStrategy: "Add Service structured data (JSON-LD) to service pages with name, description, provider, and areaServed.",
+    run: (item) => {
+      if (!servicePagePatterns || servicePagePatterns.length === 0) return [];
+      const matchesPattern = servicePagePatterns.some(
+        (pattern) => item.permalink.includes(pattern)
+      );
+      if (!matchesPattern) return [];
+      return [
+        {
+          file: getDisplayPath(item),
+          field: "schema",
+          rule: "seo-service-page-no-schema",
+          severity: "warning",
+          message: `Service page "${item.permalink}" should have Service structured data`,
+          suggestion: 'Service pages need schema markup to appear in AI answers for "[service] provider in [city]" queries. Add Service JSON-LD with name, description, provider, and areaServed.'
+        }
+      ];
+    }
+  };
+}
+var schemaStaticRules = [
   blogMissingSchemaFields,
   faqpageSchemaReadiness,
   breadcrumblistSchemaReadiness,
   datasetSchemaReadiness
 ];
+function createSchemaRules(geo) {
+  return [
+    ...schemaStaticRules,
+    createSchemaSameAsRule(geo.organizationSameAs),
+    createServicePageSchemaRule(geo.servicePagePatterns)
+  ];
+}
 // src/rules/keyword-coherence-rules.ts
 var MIN_SIGNIFICANT_WORDS = 2;
@@ -13694,8 +13856,27 @@ function jaccardSimilarity(a, b) {
   const union = a.size + b.size - intersection;
   return union > 0 ? intersection / union : 0;
 }
+var REFERENCE_PATTERNS = [
+  /archived from the original on/gi,
+  /retrieved (?:on )?\d/gi,
+  /accessed (?:on )?\d/gi,
+  /cite (?:web|book|journal|news)/gi,
+  /\^\s*\[?\d+\]?/g,
+  /isbn \d/gi,
+  /doi:\s*\d/gi,
+  /pmid:\s*\d/gi
+];
+function stripReferenceBoilerplate(text) {
+  let result = text;
+  for (const pattern of REFERENCE_PATTERNS) {
+    result = result.replace(pattern, "");
+  }
+  result = result.replace(/\n(?:references|sources|bibliography|einzelnachweise|weblinks)\n[\s\S]*$/i, "");
+  return result;
+}
 function analyzeRepetition(body) {
-  const plain = stripMarkdown(body).toLowerCase();
+  const cleaned = stripReferenceBoilerplate(body);
+  const plain = stripMarkdown(cleaned).toLowerCase();
   const words = plain.replace(/[^\p{L}\p{N}\s]/gu, " ").split(/\s+/).filter((w) => w.length > 0);
   const fiveGrams = extractNgrams(words, 5);
   const phraseCounts = /* @__PURE__ */ new Map();
@@ -13704,7 +13885,7 @@ function analyzeRepetition(body) {
   }
   const repeatedPhrases = [...phraseCounts.entries()].filter(([, count]) => count >= 3).sort((a, b) => b[1] - a[1]);
   const topRepeatedPhrases = repeatedPhrases.slice(0, 5).map(([phrase, count]) => ({ phrase, count }));
-  const paragraphs = body.split(/\n\s*\n/).map((p) => p.trim()).filter((p) => p.length > 0 && !p.startsWith("#") && !p.startsWith("|"));
+  const paragraphs = cleaned.split(/\n\s*\n/).map((p) => p.trim()).filter((p) => p.length > 0 && !p.startsWith("#") && !p.startsWith("|"));
   let totalSimilarity = 0;
   let pairCount = 0;
   for (let i = 0; i < paragraphs.length; i++) {
@@ -14010,10 +14191,10 @@ var geoNoQuestionHeadings = {
     if (!geoTypes.includes(item.contentType)) return [];
     const wordCount = countWords(item.body);
     if (wordCount < GEO_MIN_WORDS) return [];
-    const headings = extractHeadings(item.body);
+    const headings = extractHeadings(item.body, item.contentSource);
     const subHeadings = headings.filter((h) => h.level === 2 || h.level === 3);
     if (subHeadings.length === 0) return [];
-    const questionCount = countQuestionHeadings(item.body);
+    const questionCount = countQuestionHeadings(item.body, item.contentSource);
     const ratio = questionCount / subHeadings.length;
     if (ratio < QUESTION_HEADING_THRESHOLD) {
       return [{
@@ -14103,7 +14284,7 @@ var geoMissingFaqSection = {
     if (!geoTypes.includes(item.contentType)) return [];
     const wordCount = countWords(item.body);
     if (wordCount < FAQ_MIN_WORDS) return [];
-    if (!hasFAQSection(item.body)) {
+    if (!hasFAQSection(item.body, item.contentSource)) {
       return [{
         file: getDisplayPath(item),
         field: "body",
@@ -14148,7 +14329,7 @@ var geoMissingTable = {
     if (!geoTypes.includes(item.contentType)) return [];
     const wordCount = countWords(item.body);
     if (wordCount < TABLE_MIN_WORDS) return [];
-    if (!hasMarkdownTable(item.body)) {
+    if (!hasMarkdownTable(item.body, item.contentSource)) {
       return [{
         file: getDisplayPath(item),
         field: "body",
@@ -14644,6 +14825,58 @@ var geoMissingTldr = {
     return [];
   }
 };
+var ORG_AUTHOR_PATTERNS = [
+  /\bteam\b/i,
+  /\bredaktion\b/i,
+  /\beditorial\b/i,
+  /\beditors?\b/i,
+  /\bherausgeber\b/i,
+  /\bverlag\b/i,
+  /\bredaktionsteam\b/i
+];
+function createGeoAuthorNotPersonRule(brandName) {
+  return {
+    name: "geo-author-not-person",
+    severity: "warning",
+    category: "geo",
+    fixStrategy: "Replace organization name with individual author name. Use Person type in BlogPosting schema for stronger E-E-A-T signals.",
+    run: (item, context) => {
+      const geoTypes = context.geoEnabledContentTypes ?? ["blog"];
+      if (!geoTypes.includes(item.contentType)) return [];
+      if (!item.author || item.author.trim() === "") return [];
+      if (!brandName || brandName.trim() === "") return [];
+      const normalizedAuthor = item.author.trim().toLowerCase();
+      if (normalizedAuthor === brandName.trim().toLowerCase()) {
+        return [
+          {
+            file: getDisplayPath(item),
+            field: "author",
+            rule: "geo-author-not-person",
+            severity: "warning",
+            message: `Author "${item.author}" is the organization name \u2014 use a person's name instead`,
+            suggestion: "AI models cite named experts over faceless organizations. Use the actual author's name for stronger E-E-A-T signals."
+          }
+        ];
+      }
+      const matchesOrgPattern = ORG_AUTHOR_PATTERNS.some(
+        (pattern) => pattern.test(item.author)
+      );
+      if (matchesOrgPattern) {
+        return [
+          {
+            file: getDisplayPath(item),
+            field: "author",
+            rule: "geo-author-not-person",
+            severity: "warning",
+            message: `Author "${item.author}" appears to be an organization or team name`,
+            suggestion: "BlogPosting with author.@type: Person gets cited more than Organization. Use an individual person's name."
+          }
+        ];
+      }
+      return [];
+    }
+  };
+}
 var geoEeatStaticRules = [
   geoMissingSourceCitations,
   geoMissingExpertQuotes,
@@ -14656,7 +14889,8 @@ function createGeoEeatRules(geo) {
   return [
     ...geoEeatStaticRules,
     createGeoMissingAuthorRule(geo.genericAuthorNames ?? []),
-    createGeoHeadingTooVagueRule(geo.vagueHeadings ?? [])
+    createGeoHeadingTooVagueRule(geo.vagueHeadings ?? []),
+    createGeoAuthorNotPersonRule(geo.brandName)
   ];
 }
@@ -14743,7 +14977,7 @@ var geoMissingLists = {
     if (!geoTypes.includes(item.contentType)) return [];
     const wordCount = countWords(item.body);
     if (wordCount < STRUCTURE_MIN_WORDS) return [];
-    if (!hasMarkdownList(item.body)) {
+    if (!hasMarkdownList(item.body, item.contentSource)) {
       return [{
         file: getDisplayPath(item),
         field: "body",
@@ -15645,6 +15879,68 @@ var contentQualityRules = [
   sentenceVariety
 ];
+// src/rules/technical-site-rules.ts
+function createNoFeedRule(feedUrls) {
+  let hasFired = false;
+  return {
+    name: "technical-no-feed",
+    severity: "warning",
+    category: "technical",
+    fixStrategy: "Add an RSS or JSON feed endpoint exposing blog posts with full content.",
+    run: (_item, _context) => {
+      if (hasFired) return [];
+      hasFired = true;
+      if (feedUrls === void 0) return [];
+      if (feedUrls.length === 0) {
+        return [
+          {
+            file: "_site",
+            field: "feed",
+            rule: "technical-no-feed",
+            severity: "warning",
+            message: "No RSS/Atom/JSON feed detected \u2014 AI systems lose a structured ingestion path",
+            suggestion: "Feeds provide a structured ingestion path for AI systems beyond crawler discovery. Add an RSS or JSON feed endpoint."
+          }
+        ];
+      }
+      return [];
+    }
+  };
+}
+function createNoLlmsTxtRule(llmsTxtUrl) {
+  let hasFired = false;
+  return {
+    name: "technical-no-llms-txt",
+    severity: "warning",
+    category: "technical",
+    fixStrategy: "Create a /llms.txt endpoint that maps your most important content for LLM consumption in Markdown format.",
+    run: (_item, _context) => {
+      if (hasFired) return [];
+      hasFired = true;
+      if (llmsTxtUrl === void 0) return [];
+      if (llmsTxtUrl.trim() === "") {
+        return [
+          {
+            file: "_site",
+            field: "llms-txt",
+            rule: "technical-no-llms-txt",
+            severity: "warning",
+            message: "No /llms.txt endpoint detected \u2014 missing the emerging standard for LLM content declaration",
+            suggestion: "llms.txt is the robots.txt equivalent for AI \u2014 trivial to add, future-proofs your site for LLM crawlers."
+          }
+        ];
+      }
+      return [];
+    }
+  };
+}
+function createTechnicalSiteRules(technical) {
+  return [
+    createNoFeedRule(technical.feedUrls),
+    createNoLlmsTxtRule(technical.llmsTxtUrl)
+  ];
+}
 // src/rules/index.ts
 function buildRules(config, linkExtractor) {
   const rules = [
@@ -15664,7 +15960,7 @@ function buildRules(config, linkExtractor) {
     ...createI18nRules(config.i18n),
     ...dateRules,
     ...config.categories.length > 0 ? createCategoryRules(config.categories) : [],
-    ...schemaRules,
+    ...createSchemaRules(config.geo),
     ...createGeoRules(config.geo),
     ...createGeoEeatRules(config.geo),
     ...geoStructureRules,
@@ -15672,7 +15968,8 @@ function buildRules(config, linkExtractor) {
     ...createGeoRagRules(config.geo),
     ...keywordCoherenceRules,
     ...createCanonicalRules(config.siteUrl),
-    ...contentQualityRules
+    ...contentQualityRules,
+    ...createTechnicalSiteRules(config.technical)
   ];
   return rules.map((rule) => applyRuleOverride(rule, config.rules));
 }