@ijonis/geo-lint 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/cli.cjs +128 -19
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +128 -19
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +128 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +128 -19
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -898,6 +898,68 @@ var duplicateRules = [
|
|
|
898
898
|
duplicateDescription
|
|
899
899
|
];
|
|
900
900
|
|
|
901
|
+
// src/utils/plaintext-structure.ts
|
|
902
|
+
var MAX_HEADING_LENGTH = 80;
|
|
903
|
+
var MIN_TABLE_ROWS = 2;
|
|
904
|
+
function detectPlaintextHeadings(text) {
|
|
905
|
+
const lines = text.split("\n");
|
|
906
|
+
const headings = [];
|
|
907
|
+
for (let i = 0; i < lines.length; i++) {
|
|
908
|
+
const line = lines[i].trim();
|
|
909
|
+
if (!line || line.length > MAX_HEADING_LENGTH) continue;
|
|
910
|
+
const nextLine = lines[i + 1]?.trim() ?? "";
|
|
911
|
+
const isFollowedByBlank = i + 1 >= lines.length || nextLine === "";
|
|
912
|
+
if (!isFollowedByBlank) continue;
|
|
913
|
+
if (/[.,;:]$/.test(line)) continue;
|
|
914
|
+
const isTitleCase = /^[A-ZÄÖÜ]/.test(line) && line.split(/\s+/).length <= 12;
|
|
915
|
+
const isAllCaps = line === line.toUpperCase() && /[A-ZÄÖÜ]/.test(line) && line.length > 2;
|
|
916
|
+
const isQuestion = line.endsWith("?");
|
|
917
|
+
if (isTitleCase || isAllCaps || isQuestion) {
|
|
918
|
+
const level = isAllCaps || line.split(/\s+/).length <= 4 ? 2 : 3;
|
|
919
|
+
headings.push({ level, text: line, line: i + 1 });
|
|
920
|
+
}
|
|
921
|
+
}
|
|
922
|
+
return headings;
|
|
923
|
+
}
|
|
924
|
+
function detectPlaintextTable(text) {
|
|
925
|
+
const lines = text.split("\n").filter((l) => l.trim().length > 0);
|
|
926
|
+
const tabLines = lines.filter((l) => l.includes(" "));
|
|
927
|
+
if (tabLines.length >= MIN_TABLE_ROWS) {
|
|
928
|
+
const colCounts = tabLines.map((l) => l.split(" ").length);
|
|
929
|
+
const consistent = colCounts.every(
|
|
930
|
+
(c) => c === colCounts[0] && c >= 2
|
|
931
|
+
);
|
|
932
|
+
if (consistent) return true;
|
|
933
|
+
}
|
|
934
|
+
const spaceSeparated = lines.filter((l) => /\S {3,}\S/.test(l));
|
|
935
|
+
if (spaceSeparated.length >= MIN_TABLE_ROWS + 1) {
|
|
936
|
+
return true;
|
|
937
|
+
}
|
|
938
|
+
return false;
|
|
939
|
+
}
|
|
940
|
+
function detectPlaintextList(text) {
|
|
941
|
+
const listPattern = /^[\s]*[•·–—]\s+|^[\s]*\w\)\s+|^[\s]*\d+\)\s+/m;
|
|
942
|
+
const lines = text.split("\n").filter((l) => listPattern.test(l));
|
|
943
|
+
return lines.length >= 2;
|
|
944
|
+
}
|
|
945
|
+
function detectPlaintextFaq(text) {
|
|
946
|
+
const lines = text.split("\n");
|
|
947
|
+
let questionCount = 0;
|
|
948
|
+
for (let i = 0; i < lines.length; i++) {
|
|
949
|
+
const line = lines[i].trim();
|
|
950
|
+
if (!line.endsWith("?")) continue;
|
|
951
|
+
if (line.length > MAX_HEADING_LENGTH) continue;
|
|
952
|
+
const nextContent = lines.slice(i + 1).find((l) => l.trim().length > 0);
|
|
953
|
+
if (nextContent && nextContent.trim().length > line.length) {
|
|
954
|
+
questionCount++;
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
return {
|
|
958
|
+
hasFaq: questionCount >= 2,
|
|
959
|
+
questionCount
|
|
960
|
+
};
|
|
961
|
+
}
|
|
962
|
+
|
|
901
963
|
// src/utils/heading-extractor.ts
|
|
902
964
|
function isInCodeBlock(lines, lineIndex) {
|
|
903
965
|
let inCodeBlock = false;
|
|
@@ -909,7 +971,7 @@ function isInCodeBlock(lines, lineIndex) {
|
|
|
909
971
|
}
|
|
910
972
|
return inCodeBlock;
|
|
911
973
|
}
|
|
912
|
-
function extractHeadings(mdxBody) {
|
|
974
|
+
function extractHeadings(mdxBody, contentSource) {
|
|
913
975
|
const headings = [];
|
|
914
976
|
const lines = mdxBody.split("\n");
|
|
915
977
|
const headingRegex = /^(#{1,6})\s+(.+)$/;
|
|
@@ -926,6 +988,9 @@ function extractHeadings(mdxBody) {
|
|
|
926
988
|
});
|
|
927
989
|
}
|
|
928
990
|
}
|
|
991
|
+
if (headings.length === 0 && contentSource === "url") {
|
|
992
|
+
return detectPlaintextHeadings(mdxBody);
|
|
993
|
+
}
|
|
929
994
|
return headings;
|
|
930
995
|
}
|
|
931
996
|
function countH1s(headings) {
|
|
@@ -957,6 +1022,9 @@ var missingH1 = {
|
|
|
957
1022
|
category: "seo",
|
|
958
1023
|
fixStrategy: "Add an H1 heading (# Heading) at the start of the content",
|
|
959
1024
|
run: (item) => {
|
|
1025
|
+
if (item.contentSource === "url") {
|
|
1026
|
+
return [];
|
|
1027
|
+
}
|
|
960
1028
|
if (item.contentType === "blog") {
|
|
961
1029
|
return [];
|
|
962
1030
|
}
|
|
@@ -1243,8 +1311,16 @@ function countWords(text) {
|
|
|
1243
1311
|
}
|
|
1244
1312
|
function countSentences(text) {
|
|
1245
1313
|
const stripped = stripMarkdown(text);
|
|
1246
|
-
const
|
|
1247
|
-
|
|
1314
|
+
const sentenceEndings = stripped.match(/[.!?]+(?:\s|$|(?=[A-ZÄÖÜ]))/g);
|
|
1315
|
+
if (sentenceEndings && sentenceEndings.length > 0) {
|
|
1316
|
+
return sentenceEndings.length;
|
|
1317
|
+
}
|
|
1318
|
+
const lines = stripped.split(/\n+/).filter((l) => l.trim().length > 20);
|
|
1319
|
+
if (lines.length > 1) {
|
|
1320
|
+
return lines.length;
|
|
1321
|
+
}
|
|
1322
|
+
const hasWords = /\w{2,}/.test(stripped);
|
|
1323
|
+
return hasWords ? 1 : 0;
|
|
1248
1324
|
}
|
|
1249
1325
|
|
|
1250
1326
|
// src/utils/readability.ts
|
|
@@ -1526,6 +1602,7 @@ var robotsRules = [
|
|
|
1526
1602
|
// src/rules/slug-rules.ts
|
|
1527
1603
|
var SLUG_DEFAULTS = { maxLength: 75 };
|
|
1528
1604
|
var SLUG_PATTERN = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
|
|
1605
|
+
var URL_PATH_PATTERN = /^[a-z0-9]+(?:[-/][a-z0-9]+)*$/;
|
|
1529
1606
|
var slugInvalidCharacters = {
|
|
1530
1607
|
name: "slug-invalid-characters",
|
|
1531
1608
|
severity: "error",
|
|
@@ -1533,8 +1610,10 @@ var slugInvalidCharacters = {
|
|
|
1533
1610
|
fixStrategy: 'Use lowercase alphanumeric characters with hyphens only (e.g., "my-blog-post")',
|
|
1534
1611
|
run: (item) => {
|
|
1535
1612
|
if (!item.slug) return [];
|
|
1613
|
+
const isUrl = item.contentSource === "url";
|
|
1614
|
+
const pattern = isUrl ? URL_PATH_PATTERN : SLUG_PATTERN;
|
|
1536
1615
|
const hasUppercase = /[A-Z]/.test(item.slug);
|
|
1537
|
-
const matchesPattern =
|
|
1616
|
+
const matchesPattern = pattern.test(item.slug);
|
|
1538
1617
|
if (hasUppercase || !matchesPattern) {
|
|
1539
1618
|
return [{
|
|
1540
1619
|
file: getDisplayPath(item),
|
|
@@ -1542,7 +1621,7 @@ var slugInvalidCharacters = {
|
|
|
1542
1621
|
rule: "slug-invalid-characters",
|
|
1543
1622
|
severity: "error",
|
|
1544
1623
|
message: `Slug "${item.slug}" contains invalid characters`,
|
|
1545
|
-
suggestion: 'Slugs must be lowercase alphanumeric with hyphens only (e.g., "my-blog-post")'
|
|
1624
|
+
suggestion: isUrl ? "URL paths must be lowercase alphanumeric with hyphens and slashes only" : 'Slugs must be lowercase alphanumeric with hyphens only (e.g., "my-blog-post")'
|
|
1546
1625
|
}];
|
|
1547
1626
|
}
|
|
1548
1627
|
return [];
|
|
@@ -1807,8 +1886,8 @@ var WEAK_LEAD_STARTS = [
|
|
|
1807
1886
|
"schauen wir uns"
|
|
1808
1887
|
];
|
|
1809
1888
|
var TABLE_SEPARATOR_PATTERN = /\|\s*:?-{2,}/;
|
|
1810
|
-
function countQuestionHeadings(body) {
|
|
1811
|
-
const headings = extractHeadings(body);
|
|
1889
|
+
function countQuestionHeadings(body, contentSource) {
|
|
1890
|
+
const headings = extractHeadings(body, contentSource);
|
|
1812
1891
|
let count = 0;
|
|
1813
1892
|
for (const heading of headings) {
|
|
1814
1893
|
const text = heading.text.trim();
|
|
@@ -1870,12 +1949,20 @@ function countStatistics(body) {
|
|
|
1870
1949
|
}
|
|
1871
1950
|
return matches.size;
|
|
1872
1951
|
}
|
|
1873
|
-
function hasFAQSection(body) {
|
|
1952
|
+
function hasFAQSection(body, contentSource) {
|
|
1874
1953
|
const faqPattern = /#{2,3}\s*(FAQ|Häufige Fragen|Frequently Asked|Fragen und Antworten)/i;
|
|
1875
|
-
|
|
1954
|
+
if (faqPattern.test(body)) return true;
|
|
1955
|
+
if (contentSource === "url") {
|
|
1956
|
+
return detectPlaintextFaq(body).hasFaq;
|
|
1957
|
+
}
|
|
1958
|
+
return false;
|
|
1876
1959
|
}
|
|
1877
|
-
function hasMarkdownTable(body) {
|
|
1878
|
-
|
|
1960
|
+
function hasMarkdownTable(body, contentSource) {
|
|
1961
|
+
if (TABLE_SEPARATOR_PATTERN.test(body)) return true;
|
|
1962
|
+
if (contentSource === "url") {
|
|
1963
|
+
return detectPlaintextTable(body);
|
|
1964
|
+
}
|
|
1965
|
+
return false;
|
|
1879
1966
|
}
|
|
1880
1967
|
function countEntityMentions(body, entity) {
|
|
1881
1968
|
const escapedEntity = entity.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
@@ -2037,7 +2124,7 @@ function getParagraphs(body) {
|
|
|
2037
2124
|
}
|
|
2038
2125
|
return paragraphs;
|
|
2039
2126
|
}
|
|
2040
|
-
function hasMarkdownList(body) {
|
|
2127
|
+
function hasMarkdownList(body, contentSource) {
|
|
2041
2128
|
const lines = body.split("\n");
|
|
2042
2129
|
let inCodeBlock = false;
|
|
2043
2130
|
for (const line of lines) {
|
|
@@ -2050,6 +2137,9 @@ function hasMarkdownList(body) {
|
|
|
2050
2137
|
if (/^[-*]\s+/.test(trimmed)) return true;
|
|
2051
2138
|
if (/^\d+\.\s+/.test(trimmed)) return true;
|
|
2052
2139
|
}
|
|
2140
|
+
if (contentSource === "url") {
|
|
2141
|
+
return detectPlaintextList(body);
|
|
2142
|
+
}
|
|
2053
2143
|
return false;
|
|
2054
2144
|
}
|
|
2055
2145
|
function countInternalLinks(body) {
|
|
@@ -13781,8 +13871,27 @@ function jaccardSimilarity(a, b) {
|
|
|
13781
13871
|
const union = a.size + b.size - intersection;
|
|
13782
13872
|
return union > 0 ? intersection / union : 0;
|
|
13783
13873
|
}
|
|
13874
|
+
var REFERENCE_PATTERNS = [
|
|
13875
|
+
/archived from the original on/gi,
|
|
13876
|
+
/retrieved (?:on )?\d/gi,
|
|
13877
|
+
/accessed (?:on )?\d/gi,
|
|
13878
|
+
/cite (?:web|book|journal|news)/gi,
|
|
13879
|
+
/\^\s*\[?\d+\]?/g,
|
|
13880
|
+
/isbn \d/gi,
|
|
13881
|
+
/doi:\s*\d/gi,
|
|
13882
|
+
/pmid:\s*\d/gi
|
|
13883
|
+
];
|
|
13884
|
+
function stripReferenceBoilerplate(text) {
|
|
13885
|
+
let result = text;
|
|
13886
|
+
for (const pattern of REFERENCE_PATTERNS) {
|
|
13887
|
+
result = result.replace(pattern, "");
|
|
13888
|
+
}
|
|
13889
|
+
result = result.replace(/\n(?:references|sources|bibliography|einzelnachweise|weblinks)\n[\s\S]*$/i, "");
|
|
13890
|
+
return result;
|
|
13891
|
+
}
|
|
13784
13892
|
function analyzeRepetition(body) {
|
|
13785
|
-
const
|
|
13893
|
+
const cleaned = stripReferenceBoilerplate(body);
|
|
13894
|
+
const plain = stripMarkdown(cleaned).toLowerCase();
|
|
13786
13895
|
const words = plain.replace(/[^\p{L}\p{N}\s]/gu, " ").split(/\s+/).filter((w) => w.length > 0);
|
|
13787
13896
|
const fiveGrams = extractNgrams(words, 5);
|
|
13788
13897
|
const phraseCounts = /* @__PURE__ */ new Map();
|
|
@@ -13791,7 +13900,7 @@ function analyzeRepetition(body) {
|
|
|
13791
13900
|
}
|
|
13792
13901
|
const repeatedPhrases = [...phraseCounts.entries()].filter(([, count]) => count >= 3).sort((a, b) => b[1] - a[1]);
|
|
13793
13902
|
const topRepeatedPhrases = repeatedPhrases.slice(0, 5).map(([phrase, count]) => ({ phrase, count }));
|
|
13794
|
-
const paragraphs =
|
|
13903
|
+
const paragraphs = cleaned.split(/\n\s*\n/).map((p) => p.trim()).filter((p) => p.length > 0 && !p.startsWith("#") && !p.startsWith("|"));
|
|
13795
13904
|
let totalSimilarity = 0;
|
|
13796
13905
|
let pairCount = 0;
|
|
13797
13906
|
for (let i = 0; i < paragraphs.length; i++) {
|
|
@@ -14097,10 +14206,10 @@ var geoNoQuestionHeadings = {
|
|
|
14097
14206
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14098
14207
|
const wordCount = countWords(item.body);
|
|
14099
14208
|
if (wordCount < GEO_MIN_WORDS) return [];
|
|
14100
|
-
const headings = extractHeadings(item.body);
|
|
14209
|
+
const headings = extractHeadings(item.body, item.contentSource);
|
|
14101
14210
|
const subHeadings = headings.filter((h) => h.level === 2 || h.level === 3);
|
|
14102
14211
|
if (subHeadings.length === 0) return [];
|
|
14103
|
-
const questionCount = countQuestionHeadings(item.body);
|
|
14212
|
+
const questionCount = countQuestionHeadings(item.body, item.contentSource);
|
|
14104
14213
|
const ratio = questionCount / subHeadings.length;
|
|
14105
14214
|
if (ratio < QUESTION_HEADING_THRESHOLD) {
|
|
14106
14215
|
return [{
|
|
@@ -14190,7 +14299,7 @@ var geoMissingFaqSection = {
|
|
|
14190
14299
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14191
14300
|
const wordCount = countWords(item.body);
|
|
14192
14301
|
if (wordCount < FAQ_MIN_WORDS) return [];
|
|
14193
|
-
if (!hasFAQSection(item.body)) {
|
|
14302
|
+
if (!hasFAQSection(item.body, item.contentSource)) {
|
|
14194
14303
|
return [{
|
|
14195
14304
|
file: getDisplayPath(item),
|
|
14196
14305
|
field: "body",
|
|
@@ -14235,7 +14344,7 @@ var geoMissingTable = {
|
|
|
14235
14344
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14236
14345
|
const wordCount = countWords(item.body);
|
|
14237
14346
|
if (wordCount < TABLE_MIN_WORDS) return [];
|
|
14238
|
-
if (!hasMarkdownTable(item.body)) {
|
|
14347
|
+
if (!hasMarkdownTable(item.body, item.contentSource)) {
|
|
14239
14348
|
return [{
|
|
14240
14349
|
file: getDisplayPath(item),
|
|
14241
14350
|
field: "body",
|
|
@@ -14883,7 +14992,7 @@ var geoMissingLists = {
|
|
|
14883
14992
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14884
14993
|
const wordCount = countWords(item.body);
|
|
14885
14994
|
if (wordCount < STRUCTURE_MIN_WORDS) return [];
|
|
14886
|
-
if (!hasMarkdownList(item.body)) {
|
|
14995
|
+
if (!hasMarkdownList(item.body, item.contentSource)) {
|
|
14887
14996
|
return [{
|
|
14888
14997
|
file: getDisplayPath(item),
|
|
14889
14998
|
field: "body",
|