@ijonis/geo-lint 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/cli.cjs +128 -19
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +128 -19
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +128 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +128 -19
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -51,6 +51,8 @@ interface ContentItem {
|
|
|
51
51
|
rawContent: string;
|
|
52
52
|
/** Body content without frontmatter */
|
|
53
53
|
body: string;
|
|
54
|
+
/** How content was acquired: 'file' (MDX on disk) or 'url' (extracted via Readability) */
|
|
55
|
+
contentSource?: 'file' | 'url';
|
|
54
56
|
}
|
|
55
57
|
/**
|
|
56
58
|
* Context passed to rules for cross-content validation
|
package/dist/index.d.ts
CHANGED
|
@@ -51,6 +51,8 @@ interface ContentItem {
|
|
|
51
51
|
rawContent: string;
|
|
52
52
|
/** Body content without frontmatter */
|
|
53
53
|
body: string;
|
|
54
|
+
/** How content was acquired: 'file' (MDX on disk) or 'url' (extracted via Readability) */
|
|
55
|
+
contentSource?: 'file' | 'url';
|
|
54
56
|
}
|
|
55
57
|
/**
|
|
56
58
|
* Context passed to rules for cross-content validation
|
package/dist/index.js
CHANGED
|
@@ -854,6 +854,68 @@ var duplicateRules = [
|
|
|
854
854
|
duplicateDescription
|
|
855
855
|
];
|
|
856
856
|
|
|
857
|
+
// src/utils/plaintext-structure.ts
|
|
858
|
+
var MAX_HEADING_LENGTH = 80;
|
|
859
|
+
var MIN_TABLE_ROWS = 2;
|
|
860
|
+
function detectPlaintextHeadings(text) {
|
|
861
|
+
const lines = text.split("\n");
|
|
862
|
+
const headings = [];
|
|
863
|
+
for (let i = 0; i < lines.length; i++) {
|
|
864
|
+
const line = lines[i].trim();
|
|
865
|
+
if (!line || line.length > MAX_HEADING_LENGTH) continue;
|
|
866
|
+
const nextLine = lines[i + 1]?.trim() ?? "";
|
|
867
|
+
const isFollowedByBlank = i + 1 >= lines.length || nextLine === "";
|
|
868
|
+
if (!isFollowedByBlank) continue;
|
|
869
|
+
if (/[.,;:]$/.test(line)) continue;
|
|
870
|
+
const isTitleCase = /^[A-ZÄÖÜ]/.test(line) && line.split(/\s+/).length <= 12;
|
|
871
|
+
const isAllCaps = line === line.toUpperCase() && /[A-ZÄÖÜ]/.test(line) && line.length > 2;
|
|
872
|
+
const isQuestion = line.endsWith("?");
|
|
873
|
+
if (isTitleCase || isAllCaps || isQuestion) {
|
|
874
|
+
const level = isAllCaps || line.split(/\s+/).length <= 4 ? 2 : 3;
|
|
875
|
+
headings.push({ level, text: line, line: i + 1 });
|
|
876
|
+
}
|
|
877
|
+
}
|
|
878
|
+
return headings;
|
|
879
|
+
}
|
|
880
|
+
function detectPlaintextTable(text) {
|
|
881
|
+
const lines = text.split("\n").filter((l) => l.trim().length > 0);
|
|
882
|
+
const tabLines = lines.filter((l) => l.includes(" "));
|
|
883
|
+
if (tabLines.length >= MIN_TABLE_ROWS) {
|
|
884
|
+
const colCounts = tabLines.map((l) => l.split(" ").length);
|
|
885
|
+
const consistent = colCounts.every(
|
|
886
|
+
(c) => c === colCounts[0] && c >= 2
|
|
887
|
+
);
|
|
888
|
+
if (consistent) return true;
|
|
889
|
+
}
|
|
890
|
+
const spaceSeparated = lines.filter((l) => /\S {3,}\S/.test(l));
|
|
891
|
+
if (spaceSeparated.length >= MIN_TABLE_ROWS + 1) {
|
|
892
|
+
return true;
|
|
893
|
+
}
|
|
894
|
+
return false;
|
|
895
|
+
}
|
|
896
|
+
function detectPlaintextList(text) {
|
|
897
|
+
const listPattern = /^[\s]*[•·–—]\s+|^[\s]*\w\)\s+|^[\s]*\d+\)\s+/m;
|
|
898
|
+
const lines = text.split("\n").filter((l) => listPattern.test(l));
|
|
899
|
+
return lines.length >= 2;
|
|
900
|
+
}
|
|
901
|
+
function detectPlaintextFaq(text) {
|
|
902
|
+
const lines = text.split("\n");
|
|
903
|
+
let questionCount = 0;
|
|
904
|
+
for (let i = 0; i < lines.length; i++) {
|
|
905
|
+
const line = lines[i].trim();
|
|
906
|
+
if (!line.endsWith("?")) continue;
|
|
907
|
+
if (line.length > MAX_HEADING_LENGTH) continue;
|
|
908
|
+
const nextContent = lines.slice(i + 1).find((l) => l.trim().length > 0);
|
|
909
|
+
if (nextContent && nextContent.trim().length > line.length) {
|
|
910
|
+
questionCount++;
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
return {
|
|
914
|
+
hasFaq: questionCount >= 2,
|
|
915
|
+
questionCount
|
|
916
|
+
};
|
|
917
|
+
}
|
|
918
|
+
|
|
857
919
|
// src/utils/heading-extractor.ts
|
|
858
920
|
function isInCodeBlock(lines, lineIndex) {
|
|
859
921
|
let inCodeBlock = false;
|
|
@@ -865,7 +927,7 @@ function isInCodeBlock(lines, lineIndex) {
|
|
|
865
927
|
}
|
|
866
928
|
return inCodeBlock;
|
|
867
929
|
}
|
|
868
|
-
function extractHeadings(mdxBody) {
|
|
930
|
+
function extractHeadings(mdxBody, contentSource) {
|
|
869
931
|
const headings = [];
|
|
870
932
|
const lines = mdxBody.split("\n");
|
|
871
933
|
const headingRegex = /^(#{1,6})\s+(.+)$/;
|
|
@@ -882,6 +944,9 @@ function extractHeadings(mdxBody) {
|
|
|
882
944
|
});
|
|
883
945
|
}
|
|
884
946
|
}
|
|
947
|
+
if (headings.length === 0 && contentSource === "url") {
|
|
948
|
+
return detectPlaintextHeadings(mdxBody);
|
|
949
|
+
}
|
|
885
950
|
return headings;
|
|
886
951
|
}
|
|
887
952
|
function countH1s(headings) {
|
|
@@ -913,6 +978,9 @@ var missingH1 = {
|
|
|
913
978
|
category: "seo",
|
|
914
979
|
fixStrategy: "Add an H1 heading (# Heading) at the start of the content",
|
|
915
980
|
run: (item) => {
|
|
981
|
+
if (item.contentSource === "url") {
|
|
982
|
+
return [];
|
|
983
|
+
}
|
|
916
984
|
if (item.contentType === "blog") {
|
|
917
985
|
return [];
|
|
918
986
|
}
|
|
@@ -1199,8 +1267,16 @@ function countWords(text) {
|
|
|
1199
1267
|
}
|
|
1200
1268
|
function countSentences(text) {
|
|
1201
1269
|
const stripped = stripMarkdown(text);
|
|
1202
|
-
const
|
|
1203
|
-
|
|
1270
|
+
const sentenceEndings = stripped.match(/[.!?]+(?:\s|$|(?=[A-ZÄÖÜ]))/g);
|
|
1271
|
+
if (sentenceEndings && sentenceEndings.length > 0) {
|
|
1272
|
+
return sentenceEndings.length;
|
|
1273
|
+
}
|
|
1274
|
+
const lines = stripped.split(/\n+/).filter((l) => l.trim().length > 20);
|
|
1275
|
+
if (lines.length > 1) {
|
|
1276
|
+
return lines.length;
|
|
1277
|
+
}
|
|
1278
|
+
const hasWords = /\w{2,}/.test(stripped);
|
|
1279
|
+
return hasWords ? 1 : 0;
|
|
1204
1280
|
}
|
|
1205
1281
|
|
|
1206
1282
|
// src/utils/readability.ts
|
|
@@ -1482,6 +1558,7 @@ var robotsRules = [
|
|
|
1482
1558
|
// src/rules/slug-rules.ts
|
|
1483
1559
|
var SLUG_DEFAULTS = { maxLength: 75 };
|
|
1484
1560
|
var SLUG_PATTERN = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
|
|
1561
|
+
var URL_PATH_PATTERN = /^[a-z0-9]+(?:[-/][a-z0-9]+)*$/;
|
|
1485
1562
|
var slugInvalidCharacters = {
|
|
1486
1563
|
name: "slug-invalid-characters",
|
|
1487
1564
|
severity: "error",
|
|
@@ -1489,8 +1566,10 @@ var slugInvalidCharacters = {
|
|
|
1489
1566
|
fixStrategy: 'Use lowercase alphanumeric characters with hyphens only (e.g., "my-blog-post")',
|
|
1490
1567
|
run: (item) => {
|
|
1491
1568
|
if (!item.slug) return [];
|
|
1569
|
+
const isUrl = item.contentSource === "url";
|
|
1570
|
+
const pattern = isUrl ? URL_PATH_PATTERN : SLUG_PATTERN;
|
|
1492
1571
|
const hasUppercase = /[A-Z]/.test(item.slug);
|
|
1493
|
-
const matchesPattern =
|
|
1572
|
+
const matchesPattern = pattern.test(item.slug);
|
|
1494
1573
|
if (hasUppercase || !matchesPattern) {
|
|
1495
1574
|
return [{
|
|
1496
1575
|
file: getDisplayPath(item),
|
|
@@ -1498,7 +1577,7 @@ var slugInvalidCharacters = {
|
|
|
1498
1577
|
rule: "slug-invalid-characters",
|
|
1499
1578
|
severity: "error",
|
|
1500
1579
|
message: `Slug "${item.slug}" contains invalid characters`,
|
|
1501
|
-
suggestion: 'Slugs must be lowercase alphanumeric with hyphens only (e.g., "my-blog-post")'
|
|
1580
|
+
suggestion: isUrl ? "URL paths must be lowercase alphanumeric with hyphens and slashes only" : 'Slugs must be lowercase alphanumeric with hyphens only (e.g., "my-blog-post")'
|
|
1502
1581
|
}];
|
|
1503
1582
|
}
|
|
1504
1583
|
return [];
|
|
@@ -1763,8 +1842,8 @@ var WEAK_LEAD_STARTS = [
|
|
|
1763
1842
|
"schauen wir uns"
|
|
1764
1843
|
];
|
|
1765
1844
|
var TABLE_SEPARATOR_PATTERN = /\|\s*:?-{2,}/;
|
|
1766
|
-
function countQuestionHeadings(body) {
|
|
1767
|
-
const headings = extractHeadings(body);
|
|
1845
|
+
function countQuestionHeadings(body, contentSource) {
|
|
1846
|
+
const headings = extractHeadings(body, contentSource);
|
|
1768
1847
|
let count = 0;
|
|
1769
1848
|
for (const heading of headings) {
|
|
1770
1849
|
const text = heading.text.trim();
|
|
@@ -1826,12 +1905,20 @@ function countStatistics(body) {
|
|
|
1826
1905
|
}
|
|
1827
1906
|
return matches.size;
|
|
1828
1907
|
}
|
|
1829
|
-
function hasFAQSection(body) {
|
|
1908
|
+
function hasFAQSection(body, contentSource) {
|
|
1830
1909
|
const faqPattern = /#{2,3}\s*(FAQ|Häufige Fragen|Frequently Asked|Fragen und Antworten)/i;
|
|
1831
|
-
|
|
1910
|
+
if (faqPattern.test(body)) return true;
|
|
1911
|
+
if (contentSource === "url") {
|
|
1912
|
+
return detectPlaintextFaq(body).hasFaq;
|
|
1913
|
+
}
|
|
1914
|
+
return false;
|
|
1832
1915
|
}
|
|
1833
|
-
function hasMarkdownTable(body) {
|
|
1834
|
-
|
|
1916
|
+
function hasMarkdownTable(body, contentSource) {
|
|
1917
|
+
if (TABLE_SEPARATOR_PATTERN.test(body)) return true;
|
|
1918
|
+
if (contentSource === "url") {
|
|
1919
|
+
return detectPlaintextTable(body);
|
|
1920
|
+
}
|
|
1921
|
+
return false;
|
|
1835
1922
|
}
|
|
1836
1923
|
function countEntityMentions(body, entity) {
|
|
1837
1924
|
const escapedEntity = entity.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
@@ -1993,7 +2080,7 @@ function getParagraphs(body) {
|
|
|
1993
2080
|
}
|
|
1994
2081
|
return paragraphs;
|
|
1995
2082
|
}
|
|
1996
|
-
function hasMarkdownList(body) {
|
|
2083
|
+
function hasMarkdownList(body, contentSource) {
|
|
1997
2084
|
const lines = body.split("\n");
|
|
1998
2085
|
let inCodeBlock = false;
|
|
1999
2086
|
for (const line of lines) {
|
|
@@ -2006,6 +2093,9 @@ function hasMarkdownList(body) {
|
|
|
2006
2093
|
if (/^[-*]\s+/.test(trimmed)) return true;
|
|
2007
2094
|
if (/^\d+\.\s+/.test(trimmed)) return true;
|
|
2008
2095
|
}
|
|
2096
|
+
if (contentSource === "url") {
|
|
2097
|
+
return detectPlaintextList(body);
|
|
2098
|
+
}
|
|
2009
2099
|
return false;
|
|
2010
2100
|
}
|
|
2011
2101
|
function countInternalLinks(body) {
|
|
@@ -13737,8 +13827,27 @@ function jaccardSimilarity(a, b) {
|
|
|
13737
13827
|
const union = a.size + b.size - intersection;
|
|
13738
13828
|
return union > 0 ? intersection / union : 0;
|
|
13739
13829
|
}
|
|
13830
|
+
var REFERENCE_PATTERNS = [
|
|
13831
|
+
/archived from the original on/gi,
|
|
13832
|
+
/retrieved (?:on )?\d/gi,
|
|
13833
|
+
/accessed (?:on )?\d/gi,
|
|
13834
|
+
/cite (?:web|book|journal|news)/gi,
|
|
13835
|
+
/\^\s*\[?\d+\]?/g,
|
|
13836
|
+
/isbn \d/gi,
|
|
13837
|
+
/doi:\s*\d/gi,
|
|
13838
|
+
/pmid:\s*\d/gi
|
|
13839
|
+
];
|
|
13840
|
+
function stripReferenceBoilerplate(text) {
|
|
13841
|
+
let result = text;
|
|
13842
|
+
for (const pattern of REFERENCE_PATTERNS) {
|
|
13843
|
+
result = result.replace(pattern, "");
|
|
13844
|
+
}
|
|
13845
|
+
result = result.replace(/\n(?:references|sources|bibliography|einzelnachweise|weblinks)\n[\s\S]*$/i, "");
|
|
13846
|
+
return result;
|
|
13847
|
+
}
|
|
13740
13848
|
function analyzeRepetition(body) {
|
|
13741
|
-
const
|
|
13849
|
+
const cleaned = stripReferenceBoilerplate(body);
|
|
13850
|
+
const plain = stripMarkdown(cleaned).toLowerCase();
|
|
13742
13851
|
const words = plain.replace(/[^\p{L}\p{N}\s]/gu, " ").split(/\s+/).filter((w) => w.length > 0);
|
|
13743
13852
|
const fiveGrams = extractNgrams(words, 5);
|
|
13744
13853
|
const phraseCounts = /* @__PURE__ */ new Map();
|
|
@@ -13747,7 +13856,7 @@ function analyzeRepetition(body) {
|
|
|
13747
13856
|
}
|
|
13748
13857
|
const repeatedPhrases = [...phraseCounts.entries()].filter(([, count]) => count >= 3).sort((a, b) => b[1] - a[1]);
|
|
13749
13858
|
const topRepeatedPhrases = repeatedPhrases.slice(0, 5).map(([phrase, count]) => ({ phrase, count }));
|
|
13750
|
-
const paragraphs =
|
|
13859
|
+
const paragraphs = cleaned.split(/\n\s*\n/).map((p) => p.trim()).filter((p) => p.length > 0 && !p.startsWith("#") && !p.startsWith("|"));
|
|
13751
13860
|
let totalSimilarity = 0;
|
|
13752
13861
|
let pairCount = 0;
|
|
13753
13862
|
for (let i = 0; i < paragraphs.length; i++) {
|
|
@@ -14053,10 +14162,10 @@ var geoNoQuestionHeadings = {
|
|
|
14053
14162
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14054
14163
|
const wordCount = countWords(item.body);
|
|
14055
14164
|
if (wordCount < GEO_MIN_WORDS) return [];
|
|
14056
|
-
const headings = extractHeadings(item.body);
|
|
14165
|
+
const headings = extractHeadings(item.body, item.contentSource);
|
|
14057
14166
|
const subHeadings = headings.filter((h) => h.level === 2 || h.level === 3);
|
|
14058
14167
|
if (subHeadings.length === 0) return [];
|
|
14059
|
-
const questionCount = countQuestionHeadings(item.body);
|
|
14168
|
+
const questionCount = countQuestionHeadings(item.body, item.contentSource);
|
|
14060
14169
|
const ratio = questionCount / subHeadings.length;
|
|
14061
14170
|
if (ratio < QUESTION_HEADING_THRESHOLD) {
|
|
14062
14171
|
return [{
|
|
@@ -14146,7 +14255,7 @@ var geoMissingFaqSection = {
|
|
|
14146
14255
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14147
14256
|
const wordCount = countWords(item.body);
|
|
14148
14257
|
if (wordCount < FAQ_MIN_WORDS) return [];
|
|
14149
|
-
if (!hasFAQSection(item.body)) {
|
|
14258
|
+
if (!hasFAQSection(item.body, item.contentSource)) {
|
|
14150
14259
|
return [{
|
|
14151
14260
|
file: getDisplayPath(item),
|
|
14152
14261
|
field: "body",
|
|
@@ -14191,7 +14300,7 @@ var geoMissingTable = {
|
|
|
14191
14300
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14192
14301
|
const wordCount = countWords(item.body);
|
|
14193
14302
|
if (wordCount < TABLE_MIN_WORDS) return [];
|
|
14194
|
-
if (!hasMarkdownTable(item.body)) {
|
|
14303
|
+
if (!hasMarkdownTable(item.body, item.contentSource)) {
|
|
14195
14304
|
return [{
|
|
14196
14305
|
file: getDisplayPath(item),
|
|
14197
14306
|
field: "body",
|
|
@@ -14839,7 +14948,7 @@ var geoMissingLists = {
|
|
|
14839
14948
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14840
14949
|
const wordCount = countWords(item.body);
|
|
14841
14950
|
if (wordCount < STRUCTURE_MIN_WORDS) return [];
|
|
14842
|
-
if (!hasMarkdownList(item.body)) {
|
|
14951
|
+
if (!hasMarkdownList(item.body, item.contentSource)) {
|
|
14843
14952
|
return [{
|
|
14844
14953
|
file: getDisplayPath(item),
|
|
14845
14954
|
field: "body",
|