@ijonis/geo-lint 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/cli.cjs +128 -19
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +128 -19
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +128 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +128 -19
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -856,6 +856,68 @@ var duplicateRules = [
|
|
|
856
856
|
duplicateDescription
|
|
857
857
|
];
|
|
858
858
|
|
|
859
|
+
// src/utils/plaintext-structure.ts
|
|
860
|
+
var MAX_HEADING_LENGTH = 80;
|
|
861
|
+
var MIN_TABLE_ROWS = 2;
|
|
862
|
+
function detectPlaintextHeadings(text) {
|
|
863
|
+
const lines = text.split("\n");
|
|
864
|
+
const headings = [];
|
|
865
|
+
for (let i = 0; i < lines.length; i++) {
|
|
866
|
+
const line = lines[i].trim();
|
|
867
|
+
if (!line || line.length > MAX_HEADING_LENGTH) continue;
|
|
868
|
+
const nextLine = lines[i + 1]?.trim() ?? "";
|
|
869
|
+
const isFollowedByBlank = i + 1 >= lines.length || nextLine === "";
|
|
870
|
+
if (!isFollowedByBlank) continue;
|
|
871
|
+
if (/[.,;:]$/.test(line)) continue;
|
|
872
|
+
const isTitleCase = /^[A-ZÄÖÜ]/.test(line) && line.split(/\s+/).length <= 12;
|
|
873
|
+
const isAllCaps = line === line.toUpperCase() && /[A-ZÄÖÜ]/.test(line) && line.length > 2;
|
|
874
|
+
const isQuestion = line.endsWith("?");
|
|
875
|
+
if (isTitleCase || isAllCaps || isQuestion) {
|
|
876
|
+
const level = isAllCaps || line.split(/\s+/).length <= 4 ? 2 : 3;
|
|
877
|
+
headings.push({ level, text: line, line: i + 1 });
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
return headings;
|
|
881
|
+
}
|
|
882
|
+
function detectPlaintextTable(text) {
|
|
883
|
+
const lines = text.split("\n").filter((l) => l.trim().length > 0);
|
|
884
|
+
const tabLines = lines.filter((l) => l.includes(" "));
|
|
885
|
+
if (tabLines.length >= MIN_TABLE_ROWS) {
|
|
886
|
+
const colCounts = tabLines.map((l) => l.split(" ").length);
|
|
887
|
+
const consistent = colCounts.every(
|
|
888
|
+
(c) => c === colCounts[0] && c >= 2
|
|
889
|
+
);
|
|
890
|
+
if (consistent) return true;
|
|
891
|
+
}
|
|
892
|
+
const spaceSeparated = lines.filter((l) => /\S {3,}\S/.test(l));
|
|
893
|
+
if (spaceSeparated.length >= MIN_TABLE_ROWS + 1) {
|
|
894
|
+
return true;
|
|
895
|
+
}
|
|
896
|
+
return false;
|
|
897
|
+
}
|
|
898
|
+
function detectPlaintextList(text) {
|
|
899
|
+
const listPattern = /^[\s]*[•·–—]\s+|^[\s]*\w\)\s+|^[\s]*\d+\)\s+/m;
|
|
900
|
+
const lines = text.split("\n").filter((l) => listPattern.test(l));
|
|
901
|
+
return lines.length >= 2;
|
|
902
|
+
}
|
|
903
|
+
function detectPlaintextFaq(text) {
|
|
904
|
+
const lines = text.split("\n");
|
|
905
|
+
let questionCount = 0;
|
|
906
|
+
for (let i = 0; i < lines.length; i++) {
|
|
907
|
+
const line = lines[i].trim();
|
|
908
|
+
if (!line.endsWith("?")) continue;
|
|
909
|
+
if (line.length > MAX_HEADING_LENGTH) continue;
|
|
910
|
+
const nextContent = lines.slice(i + 1).find((l) => l.trim().length > 0);
|
|
911
|
+
if (nextContent && nextContent.trim().length > line.length) {
|
|
912
|
+
questionCount++;
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
return {
|
|
916
|
+
hasFaq: questionCount >= 2,
|
|
917
|
+
questionCount
|
|
918
|
+
};
|
|
919
|
+
}
|
|
920
|
+
|
|
859
921
|
// src/utils/heading-extractor.ts
|
|
860
922
|
function isInCodeBlock(lines, lineIndex) {
|
|
861
923
|
let inCodeBlock = false;
|
|
@@ -867,7 +929,7 @@ function isInCodeBlock(lines, lineIndex) {
|
|
|
867
929
|
}
|
|
868
930
|
return inCodeBlock;
|
|
869
931
|
}
|
|
870
|
-
function extractHeadings(mdxBody) {
|
|
932
|
+
function extractHeadings(mdxBody, contentSource) {
|
|
871
933
|
const headings = [];
|
|
872
934
|
const lines = mdxBody.split("\n");
|
|
873
935
|
const headingRegex = /^(#{1,6})\s+(.+)$/;
|
|
@@ -884,6 +946,9 @@ function extractHeadings(mdxBody) {
|
|
|
884
946
|
});
|
|
885
947
|
}
|
|
886
948
|
}
|
|
949
|
+
if (headings.length === 0 && contentSource === "url") {
|
|
950
|
+
return detectPlaintextHeadings(mdxBody);
|
|
951
|
+
}
|
|
887
952
|
return headings;
|
|
888
953
|
}
|
|
889
954
|
function countH1s(headings) {
|
|
@@ -915,6 +980,9 @@ var missingH1 = {
|
|
|
915
980
|
category: "seo",
|
|
916
981
|
fixStrategy: "Add an H1 heading (# Heading) at the start of the content",
|
|
917
982
|
run: (item) => {
|
|
983
|
+
if (item.contentSource === "url") {
|
|
984
|
+
return [];
|
|
985
|
+
}
|
|
918
986
|
if (item.contentType === "blog") {
|
|
919
987
|
return [];
|
|
920
988
|
}
|
|
@@ -1201,8 +1269,16 @@ function countWords(text) {
|
|
|
1201
1269
|
}
|
|
1202
1270
|
function countSentences(text) {
|
|
1203
1271
|
const stripped = stripMarkdown(text);
|
|
1204
|
-
const
|
|
1205
|
-
|
|
1272
|
+
const sentenceEndings = stripped.match(/[.!?]+(?:\s|$|(?=[A-ZÄÖÜ]))/g);
|
|
1273
|
+
if (sentenceEndings && sentenceEndings.length > 0) {
|
|
1274
|
+
return sentenceEndings.length;
|
|
1275
|
+
}
|
|
1276
|
+
const lines = stripped.split(/\n+/).filter((l) => l.trim().length > 20);
|
|
1277
|
+
if (lines.length > 1) {
|
|
1278
|
+
return lines.length;
|
|
1279
|
+
}
|
|
1280
|
+
const hasWords = /\w{2,}/.test(stripped);
|
|
1281
|
+
return hasWords ? 1 : 0;
|
|
1206
1282
|
}
|
|
1207
1283
|
|
|
1208
1284
|
// src/utils/readability.ts
|
|
@@ -1484,6 +1560,7 @@ var robotsRules = [
|
|
|
1484
1560
|
// src/rules/slug-rules.ts
|
|
1485
1561
|
var SLUG_DEFAULTS = { maxLength: 75 };
|
|
1486
1562
|
var SLUG_PATTERN = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
|
|
1563
|
+
var URL_PATH_PATTERN = /^[a-z0-9]+(?:[-/][a-z0-9]+)*$/;
|
|
1487
1564
|
var slugInvalidCharacters = {
|
|
1488
1565
|
name: "slug-invalid-characters",
|
|
1489
1566
|
severity: "error",
|
|
@@ -1491,8 +1568,10 @@ var slugInvalidCharacters = {
|
|
|
1491
1568
|
fixStrategy: 'Use lowercase alphanumeric characters with hyphens only (e.g., "my-blog-post")',
|
|
1492
1569
|
run: (item) => {
|
|
1493
1570
|
if (!item.slug) return [];
|
|
1571
|
+
const isUrl = item.contentSource === "url";
|
|
1572
|
+
const pattern = isUrl ? URL_PATH_PATTERN : SLUG_PATTERN;
|
|
1494
1573
|
const hasUppercase = /[A-Z]/.test(item.slug);
|
|
1495
|
-
const matchesPattern =
|
|
1574
|
+
const matchesPattern = pattern.test(item.slug);
|
|
1496
1575
|
if (hasUppercase || !matchesPattern) {
|
|
1497
1576
|
return [{
|
|
1498
1577
|
file: getDisplayPath(item),
|
|
@@ -1500,7 +1579,7 @@ var slugInvalidCharacters = {
|
|
|
1500
1579
|
rule: "slug-invalid-characters",
|
|
1501
1580
|
severity: "error",
|
|
1502
1581
|
message: `Slug "${item.slug}" contains invalid characters`,
|
|
1503
|
-
suggestion: 'Slugs must be lowercase alphanumeric with hyphens only (e.g., "my-blog-post")'
|
|
1582
|
+
suggestion: isUrl ? "URL paths must be lowercase alphanumeric with hyphens and slashes only" : 'Slugs must be lowercase alphanumeric with hyphens only (e.g., "my-blog-post")'
|
|
1504
1583
|
}];
|
|
1505
1584
|
}
|
|
1506
1585
|
return [];
|
|
@@ -1765,8 +1844,8 @@ var WEAK_LEAD_STARTS = [
|
|
|
1765
1844
|
"schauen wir uns"
|
|
1766
1845
|
];
|
|
1767
1846
|
var TABLE_SEPARATOR_PATTERN = /\|\s*:?-{2,}/;
|
|
1768
|
-
function countQuestionHeadings(body) {
|
|
1769
|
-
const headings = extractHeadings(body);
|
|
1847
|
+
function countQuestionHeadings(body, contentSource) {
|
|
1848
|
+
const headings = extractHeadings(body, contentSource);
|
|
1770
1849
|
let count = 0;
|
|
1771
1850
|
for (const heading of headings) {
|
|
1772
1851
|
const text = heading.text.trim();
|
|
@@ -1828,12 +1907,20 @@ function countStatistics(body) {
|
|
|
1828
1907
|
}
|
|
1829
1908
|
return matches.size;
|
|
1830
1909
|
}
|
|
1831
|
-
function hasFAQSection(body) {
|
|
1910
|
+
function hasFAQSection(body, contentSource) {
|
|
1832
1911
|
const faqPattern = /#{2,3}\s*(FAQ|Häufige Fragen|Frequently Asked|Fragen und Antworten)/i;
|
|
1833
|
-
|
|
1912
|
+
if (faqPattern.test(body)) return true;
|
|
1913
|
+
if (contentSource === "url") {
|
|
1914
|
+
return detectPlaintextFaq(body).hasFaq;
|
|
1915
|
+
}
|
|
1916
|
+
return false;
|
|
1834
1917
|
}
|
|
1835
|
-
function hasMarkdownTable(body) {
|
|
1836
|
-
|
|
1918
|
+
function hasMarkdownTable(body, contentSource) {
|
|
1919
|
+
if (TABLE_SEPARATOR_PATTERN.test(body)) return true;
|
|
1920
|
+
if (contentSource === "url") {
|
|
1921
|
+
return detectPlaintextTable(body);
|
|
1922
|
+
}
|
|
1923
|
+
return false;
|
|
1837
1924
|
}
|
|
1838
1925
|
function countEntityMentions(body, entity) {
|
|
1839
1926
|
const escapedEntity = entity.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
@@ -1995,7 +2082,7 @@ function getParagraphs(body) {
|
|
|
1995
2082
|
}
|
|
1996
2083
|
return paragraphs;
|
|
1997
2084
|
}
|
|
1998
|
-
function hasMarkdownList(body) {
|
|
2085
|
+
function hasMarkdownList(body, contentSource) {
|
|
1999
2086
|
const lines = body.split("\n");
|
|
2000
2087
|
let inCodeBlock = false;
|
|
2001
2088
|
for (const line of lines) {
|
|
@@ -2008,6 +2095,9 @@ function hasMarkdownList(body) {
|
|
|
2008
2095
|
if (/^[-*]\s+/.test(trimmed)) return true;
|
|
2009
2096
|
if (/^\d+\.\s+/.test(trimmed)) return true;
|
|
2010
2097
|
}
|
|
2098
|
+
if (contentSource === "url") {
|
|
2099
|
+
return detectPlaintextList(body);
|
|
2100
|
+
}
|
|
2011
2101
|
return false;
|
|
2012
2102
|
}
|
|
2013
2103
|
function countInternalLinks(body) {
|
|
@@ -13739,8 +13829,27 @@ function jaccardSimilarity(a, b) {
|
|
|
13739
13829
|
const union = a.size + b.size - intersection;
|
|
13740
13830
|
return union > 0 ? intersection / union : 0;
|
|
13741
13831
|
}
|
|
13832
|
+
var REFERENCE_PATTERNS = [
|
|
13833
|
+
/archived from the original on/gi,
|
|
13834
|
+
/retrieved (?:on )?\d/gi,
|
|
13835
|
+
/accessed (?:on )?\d/gi,
|
|
13836
|
+
/cite (?:web|book|journal|news)/gi,
|
|
13837
|
+
/\^\s*\[?\d+\]?/g,
|
|
13838
|
+
/isbn \d/gi,
|
|
13839
|
+
/doi:\s*\d/gi,
|
|
13840
|
+
/pmid:\s*\d/gi
|
|
13841
|
+
];
|
|
13842
|
+
function stripReferenceBoilerplate(text) {
|
|
13843
|
+
let result = text;
|
|
13844
|
+
for (const pattern of REFERENCE_PATTERNS) {
|
|
13845
|
+
result = result.replace(pattern, "");
|
|
13846
|
+
}
|
|
13847
|
+
result = result.replace(/\n(?:references|sources|bibliography|einzelnachweise|weblinks)\n[\s\S]*$/i, "");
|
|
13848
|
+
return result;
|
|
13849
|
+
}
|
|
13742
13850
|
function analyzeRepetition(body) {
|
|
13743
|
-
const
|
|
13851
|
+
const cleaned = stripReferenceBoilerplate(body);
|
|
13852
|
+
const plain = stripMarkdown(cleaned).toLowerCase();
|
|
13744
13853
|
const words = plain.replace(/[^\p{L}\p{N}\s]/gu, " ").split(/\s+/).filter((w) => w.length > 0);
|
|
13745
13854
|
const fiveGrams = extractNgrams(words, 5);
|
|
13746
13855
|
const phraseCounts = /* @__PURE__ */ new Map();
|
|
@@ -13749,7 +13858,7 @@ function analyzeRepetition(body) {
|
|
|
13749
13858
|
}
|
|
13750
13859
|
const repeatedPhrases = [...phraseCounts.entries()].filter(([, count]) => count >= 3).sort((a, b) => b[1] - a[1]);
|
|
13751
13860
|
const topRepeatedPhrases = repeatedPhrases.slice(0, 5).map(([phrase, count]) => ({ phrase, count }));
|
|
13752
|
-
const paragraphs =
|
|
13861
|
+
const paragraphs = cleaned.split(/\n\s*\n/).map((p) => p.trim()).filter((p) => p.length > 0 && !p.startsWith("#") && !p.startsWith("|"));
|
|
13753
13862
|
let totalSimilarity = 0;
|
|
13754
13863
|
let pairCount = 0;
|
|
13755
13864
|
for (let i = 0; i < paragraphs.length; i++) {
|
|
@@ -14055,10 +14164,10 @@ var geoNoQuestionHeadings = {
|
|
|
14055
14164
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14056
14165
|
const wordCount = countWords(item.body);
|
|
14057
14166
|
if (wordCount < GEO_MIN_WORDS) return [];
|
|
14058
|
-
const headings = extractHeadings(item.body);
|
|
14167
|
+
const headings = extractHeadings(item.body, item.contentSource);
|
|
14059
14168
|
const subHeadings = headings.filter((h) => h.level === 2 || h.level === 3);
|
|
14060
14169
|
if (subHeadings.length === 0) return [];
|
|
14061
|
-
const questionCount = countQuestionHeadings(item.body);
|
|
14170
|
+
const questionCount = countQuestionHeadings(item.body, item.contentSource);
|
|
14062
14171
|
const ratio = questionCount / subHeadings.length;
|
|
14063
14172
|
if (ratio < QUESTION_HEADING_THRESHOLD) {
|
|
14064
14173
|
return [{
|
|
@@ -14148,7 +14257,7 @@ var geoMissingFaqSection = {
|
|
|
14148
14257
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14149
14258
|
const wordCount = countWords(item.body);
|
|
14150
14259
|
if (wordCount < FAQ_MIN_WORDS) return [];
|
|
14151
|
-
if (!hasFAQSection(item.body)) {
|
|
14260
|
+
if (!hasFAQSection(item.body, item.contentSource)) {
|
|
14152
14261
|
return [{
|
|
14153
14262
|
file: getDisplayPath(item),
|
|
14154
14263
|
field: "body",
|
|
@@ -14193,7 +14302,7 @@ var geoMissingTable = {
|
|
|
14193
14302
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14194
14303
|
const wordCount = countWords(item.body);
|
|
14195
14304
|
if (wordCount < TABLE_MIN_WORDS) return [];
|
|
14196
|
-
if (!hasMarkdownTable(item.body)) {
|
|
14305
|
+
if (!hasMarkdownTable(item.body, item.contentSource)) {
|
|
14197
14306
|
return [{
|
|
14198
14307
|
file: getDisplayPath(item),
|
|
14199
14308
|
field: "body",
|
|
@@ -14841,7 +14950,7 @@ var geoMissingLists = {
|
|
|
14841
14950
|
if (!geoTypes.includes(item.contentType)) return [];
|
|
14842
14951
|
const wordCount = countWords(item.body);
|
|
14843
14952
|
if (wordCount < STRUCTURE_MIN_WORDS) return [];
|
|
14844
|
-
if (!hasMarkdownList(item.body)) {
|
|
14953
|
+
if (!hasMarkdownList(item.body, item.contentSource)) {
|
|
14845
14954
|
return [{
|
|
14846
14955
|
file: getDisplayPath(item),
|
|
14847
14956
|
field: "body",
|