@ijonis/geo-lint 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/cli.cjs +128 -19
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +128 -19
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +128 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +128 -19
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.2.1] - 2026-03-19
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `contentSource` field on `ContentItem` — allows rules to distinguish between file-based MDX and URL-extracted content
|
|
14
|
+
- Plain-text structure detection (`src/utils/plaintext-structure.ts`) — heuristic heading, table, list, and FAQ detection for content without markdown formatting
|
|
15
|
+
- GEO rules now fall back to plain-text structure detection when `contentSource` is `'url'`, fixing false 100/100 GEO scores on Readability-extracted content
|
|
16
|
+
- Integration tests for URL scanner compatibility
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
- **GEO rules never firing on URL-scanned content** — headings, tables, lists, and FAQ sections are now detected in plain text (no markdown required)
|
|
20
|
+
- **Readability score 0 for German content** — `countSentences` now handles newline-separated sentences and periods without trailing spaces
|
|
21
|
+
- **`slug-invalid-characters` false positive on URL paths** — slashes are now allowed when `contentSource` is `'url'`
|
|
22
|
+
- **`content-repetition` flagging footnotes** — reference boilerplate (Wikipedia citations, DOI, ISBN, etc.) is stripped before n-gram analysis
|
|
23
|
+
- **`missing-h1` false positive on URL content** — rule is skipped when `contentSource` is `'url'` (Readability strips `<h1>`, title is in metadata)
|
|
24
|
+
|
|
10
25
|
## [0.2.0] - 2026-03-10
|
|
11
26
|
|
|
12
27
|
### Added
|
package/dist/cli.cjs
CHANGED
|
@@ -883,6 +883,68 @@ var duplicateRules = [
|
|
|
883
883
|
duplicateDescription
|
|
884
884
|
];
|
|
885
885
|
|
|
886
|
+
// src/utils/plaintext-structure.ts
|
|
887
|
+
var MAX_HEADING_LENGTH = 80;
|
|
888
|
+
var MIN_TABLE_ROWS = 2;
|
|
889
|
+
function detectPlaintextHeadings(text) {
|
|
890
|
+
const lines = text.split("\n");
|
|
891
|
+
const headings = [];
|
|
892
|
+
for (let i = 0; i < lines.length; i++) {
|
|
893
|
+
const line = lines[i].trim();
|
|
894
|
+
if (!line || line.length > MAX_HEADING_LENGTH) continue;
|
|
895
|
+
const nextLine = lines[i + 1]?.trim() ?? "";
|
|
896
|
+
const isFollowedByBlank = i + 1 >= lines.length || nextLine === "";
|
|
897
|
+
if (!isFollowedByBlank) continue;
|
|
898
|
+
if (/[.,;:]$/.test(line)) continue;
|
|
899
|
+
const isTitleCase = /^[A-ZÄÖÜ]/.test(line) && line.split(/\s+/).length <= 12;
|
|
900
|
+
const isAllCaps = line === line.toUpperCase() && /[A-ZÄÖÜ]/.test(line) && line.length > 2;
|
|
901
|
+
const isQuestion = line.endsWith("?");
|
|
902
|
+
if (isTitleCase || isAllCaps || isQuestion) {
|
|
903
|
+
const level = isAllCaps || line.split(/\s+/).length <= 4 ? 2 : 3;
|
|
904
|
+
headings.push({ level, text: line, line: i + 1 });
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
return headings;
|
|
908
|
+
}
|
|
909
|
+
function detectPlaintextTable(text) {
|
|
910
|
+
const lines = text.split("\n").filter((l) => l.trim().length > 0);
|
|
911
|
+
const tabLines = lines.filter((l) => l.includes(" "));
|
|
912
|
+
if (tabLines.length >= MIN_TABLE_ROWS) {
|
|
913
|
+
const colCounts = tabLines.map((l) => l.split(" ").length);
|
|
914
|
+
const consistent = colCounts.every(
|
|
915
|
+
(c) => c === colCounts[0] && c >= 2
|
|
916
|
+
);
|
|
917
|
+
if (consistent) return true;
|
|
918
|
+
}
|
|
919
|
+
const spaceSeparated = lines.filter((l) => /\S {3,}\S/.test(l));
|
|
920
|
+
if (spaceSeparated.length >= MIN_TABLE_ROWS + 1) {
|
|
921
|
+
return true;
|
|
922
|
+
}
|
|
923
|
+
return false;
|
|
924
|
+
}
|
|
925
|
+
function detectPlaintextList(text) {
|
|
926
|
+
const listPattern = /^[\s]*[•·–—]\s+|^[\s]*\w\)\s+|^[\s]*\d+\)\s+/m;
|
|
927
|
+
const lines = text.split("\n").filter((l) => listPattern.test(l));
|
|
928
|
+
return lines.length >= 2;
|
|
929
|
+
}
|
|
930
|
+
function detectPlaintextFaq(text) {
|
|
931
|
+
const lines = text.split("\n");
|
|
932
|
+
let questionCount = 0;
|
|
933
|
+
for (let i = 0; i < lines.length; i++) {
|
|
934
|
+
const line = lines[i].trim();
|
|
935
|
+
if (!line.endsWith("?")) continue;
|
|
936
|
+
if (line.length > MAX_HEADING_LENGTH) continue;
|
|
937
|
+
const nextContent = lines.slice(i + 1).find((l) => l.trim().length > 0);
|
|
938
|
+
if (nextContent && nextContent.trim().length > line.length) {
|
|
939
|
+
questionCount++;
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
return {
|
|
943
|
+
hasFaq: questionCount >= 2,
|
|
944
|
+
questionCount
|
|
945
|
+
};
|
|
946
|
+
}
|
|
947
|
+
|
|
886
948
|
// src/utils/heading-extractor.ts
|
|
887
949
|
function isInCodeBlock(lines, lineIndex) {
|
|
888
950
|
let inCodeBlock = false;
|
|
@@ -894,7 +956,7 @@ function isInCodeBlock(lines, lineIndex) {
|
|
|
894
956
|
}
|
|
895
957
|
return inCodeBlock;
|
|
896
958
|
}
|
|
897
|
-
function extractHeadings(mdxBody) {
|
|
959
|
+
function extractHeadings(mdxBody, contentSource) {
|
|
898
960
|
const headings = [];
|
|
899
961
|
const lines = mdxBody.split("\n");
|
|
900
962
|
const headingRegex = /^(#{1,6})\s+(.+)$/;
|
|
@@ -911,6 +973,9 @@ function extractHeadings(mdxBody) {
|
|
|
911
973
|
});
|
|
912
974
|
}
|
|
913
975
|
}
|
|
976
|
+
if (headings.length === 0 && contentSource === "url") {
|
|
977
|
+
return detectPlaintextHeadings(mdxBody);
|
|
978
|
+
}
|
|
914
979
|
return headings;
|
|
915
980
|
}
|
|
916
981
|
function countH1s(headings) {
|
|
@@ -942,6 +1007,9 @@ var missingH1 = {
|
|
|
942
1007
|
category: "seo",
|
|
943
1008
|
fixStrategy: "Add an H1 heading (# Heading) at the start of the content",
|
|
944
1009
|
run: (item) => {
|
|
1010
|
+
if (item.contentSource === "url") {
|
|
1011
|
+
return [];
|
|
1012
|
+
}
|
|
945
1013
|
if (item.contentType === "blog") {
|
|
946
1014
|
return [];
|
|
947
1015
|
}
|
|
@@ -1228,8 +1296,16 @@ function countWords(text) {
|
|
|
1228
1296
|
}
|
|
1229
1297
|
function countSentences(text) {
|
|
1230
1298
|
const stripped = stripMarkdown(text);
|
|
1231
|
-
const
|
|
1232
|
-
|
|
1299
|
+
const sentenceEndings = stripped.match(/[.!?]+(?:\s|$|(?=[A-ZÄÖÜ]))/g);
|
|
1300
|
+
if (sentenceEndings && sentenceEndings.length > 0) {
|
|
1301
|
+
return sentenceEndings.length;
|
|
1302
|
+
}
|
|
1303
|
+
const lines = stripped.split(/\n+/).filter((l) => l.trim().length > 20);
|
|
1304
|
+
if (lines.length > 1) {
|
|
1305
|
+
return lines.length;
|
|
1306
|
+
}
|
|
1307
|
+
const hasWords = /\w{2,}/.test(stripped);
|
|
1308
|
+
return hasWords ? 1 : 0;
|
|
1233
1309
|
}
|
|
1234
1310
|
|
|
1235
1311
|
// src/utils/readability.ts
|
|
@@ -1511,6 +1587,7 @@ var robotsRules = [
|
|
|
1511
1587
|
// src/rules/slug-rules.ts
|
|
1512
1588
|
var SLUG_DEFAULTS = { maxLength: 75 };
|
|
1513
1589
|
var SLUG_PATTERN = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
|
|
1590
|
+
var URL_PATH_PATTERN = /^[a-z0-9]+(?:[-/][a-z0-9]+)*$/;
|
|
1514
1591
|
var slugInvalidCharacters = {
|
|
1515
1592
|
name: "slug-invalid-characters",
|
|
1516
1593
|
severity: "error",
|
|
@@ -1518,8 +1595,10 @@ var slugInvalidCharacters = {
|
|
|
1518
1595
|
fixStrategy: 'Use lowercase alphanumeric characters with hyphens only (e.g., "my-blog-post")',
|
|
1519
1596
|
run: (item) => {
|
|
1520
1597
|
if (!item.slug) return [];
|
|
1598
|
+
const isUrl = item.contentSource === "url";
|
|
1599
|
+
const pattern = isUrl ? URL_PATH_PATTERN : SLUG_PATTERN;
|
|
1521
1600
|
const hasUppercase = /[A-Z]/.test(item.slug);
|
|
1522
|
-
const matchesPattern =
|
|
1601
|
+
const matchesPattern = pattern.test(item.slug);
|
|
1523
1602
|
if (hasUppercase || !matchesPattern) {
|
|
1524
1603
|
return [{
|
|
1525
1604
|
file: getDisplayPath(item),
|
|
@@ -1527,7 +1606,7 @@ var slugInvalidCharacters = {
|
|
|
1527
1606
|
rule: "slug-invalid-characters",
|
|
1528
1607
|
severity: "error",
|
|
1529
1608
|
message: `Slug "${item.slug}" contains invalid characters`,
|
|
1530
|
-
suggestion: 'Slugs must be lowercase alphanumeric with hyphens only (e.g., "my-blog-post")'
|
|
1609
|
+
suggestion: isUrl ? "URL paths must be lowercase alphanumeric with hyphens and slashes only" : 'Slugs must be lowercase alphanumeric with hyphens only (e.g., "my-blog-post")'
|
|
1531
1610
|
}];
|
|
1532
1611
|
}
|
|
1533
1612
|
return [];
|
|
@@ -1792,8 +1871,8 @@ var WEAK_LEAD_STARTS = [
|
|
|
1792
1871
|
"schauen wir uns"
|
|
1793
1872
|
];
|
|
1794
1873
|
var TABLE_SEPARATOR_PATTERN = /\|\s*:?-{2,}/;
|
|
1795
|
-
function countQuestionHeadings(body) {
|
|
1796
|
-
const headings = extractHeadings(body);
|
|
1874
|
+
function countQuestionHeadings(body, contentSource) {
|
|
1875
|
+
const headings = extractHeadings(body, contentSource);
|
|
1797
1876
|
let count = 0;
|
|
1798
1877
|
for (const heading of headings) {
|
|
1799
1878
|
const text = heading.text.trim();
|
|
@@ -1855,12 +1934,20 @@ function countStatistics(body) {
|
|
|
1855
1934
|
}
|
|
1856
1935
|
return matches.size;
|
|
1857
1936
|
}
|
|
1858
|
-
function hasFAQSection(body) {
|
|
1937
|
+
function hasFAQSection(body, contentSource) {
|
|
1859
1938
|
const faqPattern = /#{2,3}\s*(FAQ|Häufige Fragen|Frequently Asked|Fragen und Antworten)/i;
|
|
1860
|
-
|
|
1939
|
+
if (faqPattern.test(body)) return true;
|
|
1940
|
+
if (contentSource === "url") {
|
|
1941
|
+
return detectPlaintextFaq(body).hasFaq;
|
|
1942
|
+
}
|
|
1943
|
+
return false;
|
|
1861
1944
|
}
|
|
1862
|
-
function hasMarkdownTable(body) {
|
|
1863
|
-
|
|
1945
|
+
function hasMarkdownTable(body, contentSource) {
|
|
1946
|
+
if (TABLE_SEPARATOR_PATTERN.test(body)) return true;
|
|
1947
|
+
if (contentSource === "url") {
|
|
1948
|
+
return detectPlaintextTable(body);
|
|
1949
|
+
}
|
|
1950
|
+
return false;
|
|
1864
1951
|
}
|
|
1865
1952
|
function countEntityMentions(body, entity) {
|
|
1866
1953
|
const escapedEntity = entity.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
@@ -2022,7 +2109,7 @@ function getParagraphs(body) {
|
|
|
2022
2109
|
}
|
|
2023
2110
|
return paragraphs;
|
|
2024
2111
|
}
|
|
2025
|
-
function hasMarkdownList(body) {
|
|
2112
|
+
function hasMarkdownList(body, contentSource) {
|
|
2026
2113
|
const lines = body.split("\n");
|
|
2027
2114
|
let inCodeBlock = false;
|
|
2028
2115
|
for (const line of lines) {
|
|
@@ -2035,6 +2122,9 @@ function hasMarkdownList(body) {
|
|
|
2035
2122
|
if (/^[-*]\s+/.test(trimmed)) return true;
|
|
2036
2123
|
if (/^\d+\.\s+/.test(trimmed)) return true;
|
|
2037
2124
|
}
|
|
2125
|
+
if (contentSource === "url") {
|
|
2126
|
+
return detectPlaintextList(body);
|
|
2127
|
+
}
|
|
2038
2128
|
return false;
|
|
2039
2129
|
}
|
|
2040
2130
|
function countInternalLinks(body) {
|
|
@@ -13766,8 +13856,27 @@ function jaccardSimilarity(a, b) {
|
|
|
13766
13856
|
const union = a.size + b.size - intersection;
|
|
13767
13857
|
return union > 0 ? intersection / union : 0;
|
|
13768
13858
|
}
|
|
13859
|
+
var REFERENCE_PATTERNS = [
|
|
13860
|
+
/archived from the original on/gi,
|
|
13861
|
+
/retrieved (?:on )?\d/gi,
|
|
13862
|
+
/accessed (?:on )?\d/gi,
|
|
13863
|
+
/cite (?:web|book|journal|news)/gi,
|
|
13864
|
+
/\^\s*\[?\d+\]?/g,
|
|
13865
|
+
/isbn \d/gi,
|
|
13866
|
+
/doi:\s*\d/gi,
|
|
13867
|
+
/pmid:\s*\d/gi
|
|
13868
|
+
];
|
|
13869
|
+
function stripReferenceBoilerplate(text) {
|
|
13870
|
+
let result = text;
|
|
13871
|
+
for (const pattern of REFERENCE_PATTERNS) {
|
|
13872
|
+
result = result.replace(pattern, "");
|
|
13873
|
+
}
|
|
13874
|
+
result = result.replace(/\n(?:references|sources|bibliography|einzelnachweise|weblinks)\n[\s\S]*$/i, "");
|
|
13875
|
+
return result;
|
|
13876
|
+
}
|
|
13769
13877
|
function analyzeRepetition(body) {
|
|
13770
|
-
const
|
|
13878
|
+
const cleaned = stripReferenceBoilerplate(body);
|
|
13879
|
+
const plain = stripMarkdown(cleaned).toLowerCase();
|
|
13771
13880
|
const words = plain.replace(/[^\p{L}\p{N}\s]/gu, " ").split(/\s+/).filter((w) => w.length > 0);
|
|
13772
13881
|
const fiveGrams = extractNgrams(words, 5);
|
|
13773
13882
|
const phraseCounts = /* @__PURE__ */ new Map();
|
|
@@ -13776,7 +13885,7 @@ function analyzeRepetition(body) {
|
|
|
13776
13885
|
}
|
|
13777
13886
|
const repeatedPhrases = [...phraseCounts.entries()].filter(([, count]) => count >= 3).sort((a, b) => b[1] - a[1]);
|
|
13778
13887
|
const topRepeatedPhrases = repeatedPhrases.slice(0, 5).map(([phrase, count]) => ({ phrase, count }));
|
|
13779
|
-
const paragraphs =
|
|
13888
|
+
const paragraphs = cleaned.split(/\n\s*\n/).map((p) => p.trim()).filter((p) => p.length > 0 && !p.startsWith("#") && !p.startsWith("|"));
|
|
13780
13889
|
let totalSimilarity = 0;
|
|
13781
13890
|
let pairCount = 0;
|
|
13782
13891
|
for (let i = 0; i < paragraphs.length; i++) {
|
|
@@ -14082,10 +14191,10 @@ var geoNoQuestionHeadings = {
|
|
|
14082
14191
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14083
14192
|
const wordCount = countWords(item.body);
|
|
14084
14193
|
if (wordCount < GEO_MIN_WORDS) return [];
|
|
14085
|
-
const headings = extractHeadings(item.body);
|
|
14194
|
+
const headings = extractHeadings(item.body, item.contentSource);
|
|
14086
14195
|
const subHeadings = headings.filter((h) => h.level === 2 || h.level === 3);
|
|
14087
14196
|
if (subHeadings.length === 0) return [];
|
|
14088
|
-
const questionCount = countQuestionHeadings(item.body);
|
|
14197
|
+
const questionCount = countQuestionHeadings(item.body, item.contentSource);
|
|
14089
14198
|
const ratio = questionCount / subHeadings.length;
|
|
14090
14199
|
if (ratio < QUESTION_HEADING_THRESHOLD) {
|
|
14091
14200
|
return [{
|
|
@@ -14175,7 +14284,7 @@ var geoMissingFaqSection = {
|
|
|
14175
14284
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14176
14285
|
const wordCount = countWords(item.body);
|
|
14177
14286
|
if (wordCount < FAQ_MIN_WORDS) return [];
|
|
14178
|
-
if (!hasFAQSection(item.body)) {
|
|
14287
|
+
if (!hasFAQSection(item.body, item.contentSource)) {
|
|
14179
14288
|
return [{
|
|
14180
14289
|
file: getDisplayPath(item),
|
|
14181
14290
|
field: "body",
|
|
@@ -14220,7 +14329,7 @@ var geoMissingTable = {
|
|
|
14220
14329
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14221
14330
|
const wordCount = countWords(item.body);
|
|
14222
14331
|
if (wordCount < TABLE_MIN_WORDS) return [];
|
|
14223
|
-
if (!hasMarkdownTable(item.body)) {
|
|
14332
|
+
if (!hasMarkdownTable(item.body, item.contentSource)) {
|
|
14224
14333
|
return [{
|
|
14225
14334
|
file: getDisplayPath(item),
|
|
14226
14335
|
field: "body",
|
|
@@ -14868,7 +14977,7 @@ var geoMissingLists = {
|
|
|
14868
14977
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14869
14978
|
const wordCount = countWords(item.body);
|
|
14870
14979
|
if (wordCount < STRUCTURE_MIN_WORDS) return [];
|
|
14871
|
-
if (!hasMarkdownList(item.body)) {
|
|
14980
|
+
if (!hasMarkdownList(item.body, item.contentSource)) {
|
|
14872
14981
|
return [{
|
|
14873
14982
|
file: getDisplayPath(item),
|
|
14874
14983
|
field: "body",
|