aiseo-audit 1.2.7 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -1
- package/dist/cli.js +138 -45
- package/dist/cli.mjs +138 -45
- package/dist/index.d.mts +14 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.js +138 -45
- package/dist/index.mjs +138 -45
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
[](https://nodejs.org)
|
|
7
7
|
[](https://www.typescriptlang.org/)
|
|
8
|
-
[](https://github.com/agencyenterprise/aiseo-audit)
|
|
9
9
|
[](https://codecov.io/gh/agencyenterprise/aiseo-audit)
|
|
10
10
|
|
|
11
11
|
<div align="center">
|
|
@@ -15,6 +15,9 @@
|
|
|
15
15
|
|
|
16
16
|
Deterministic CLI that audits web pages for **AI search readiness**. Think Lighthouse, but for how well AI engines can fetch, extract, understand, and cite your content.
|
|
17
17
|
|
|
18
|
+
> [!TIP]
|
|
19
|
+
> Run `aiseo-audit https://www.aiseo-audit.com` to see a 100/100 [A+ Score](https://www.aiseo-audit.com/).
|
|
20
|
+
|
|
18
21
|
**AI SEO measures how reusable your content is for generative engines, not traditional search rankings.**
|
|
19
22
|
|
|
20
23
|
- [Quick Start](#quick-start)
|
package/dist/cli.js
CHANGED
|
@@ -27,7 +27,7 @@ var import_commander = require("commander");
|
|
|
27
27
|
|
|
28
28
|
// src/modules/analyzer/constants.ts
|
|
29
29
|
var DOMAIN_SIGNAL_TIMEOUT_CAP = 5e3;
|
|
30
|
-
var VERSION = true ? "1.
|
|
30
|
+
var VERSION = true ? "1.3.0" : "0.0.0";
|
|
31
31
|
|
|
32
32
|
// src/modules/fetcher/constants.ts
|
|
33
33
|
var MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
|
@@ -366,7 +366,16 @@ function extractEntities(text) {
|
|
|
366
366
|
0,
|
|
367
367
|
15
|
|
368
368
|
);
|
|
369
|
-
|
|
369
|
+
const imperativeVerbCount = doc.verbs().isImperative().length;
|
|
370
|
+
const numberCount = doc.numbers().length;
|
|
371
|
+
return {
|
|
372
|
+
people,
|
|
373
|
+
organizations,
|
|
374
|
+
places,
|
|
375
|
+
topics,
|
|
376
|
+
imperativeVerbCount,
|
|
377
|
+
numberCount
|
|
378
|
+
};
|
|
370
379
|
}
|
|
371
380
|
function computeFleschReadingEase(text) {
|
|
372
381
|
const words = text.split(/\s+/).filter((w) => w.length > 0);
|
|
@@ -435,6 +444,7 @@ function auditAnswerability(page) {
|
|
|
435
444
|
const text = page.cleanText;
|
|
436
445
|
const $ = page.$;
|
|
437
446
|
const factors = [];
|
|
447
|
+
const { imperativeVerbCount = 0 } = extractEntities(text);
|
|
438
448
|
const defCount = countPatternMatches(text, DEFINITION_PATTERNS);
|
|
439
449
|
const defScore = thresholdScore(defCount, [
|
|
440
450
|
[6, 10],
|
|
@@ -479,7 +489,7 @@ function auditAnswerability(page) {
|
|
|
479
489
|
);
|
|
480
490
|
const stepCount = countPatternMatches(text, STEP_PATTERNS);
|
|
481
491
|
const hasOl = $("ol").length > 0;
|
|
482
|
-
const stepTotal = stepCount + (hasOl ? 2 : 0);
|
|
492
|
+
const stepTotal = stepCount + imperativeVerbCount + (hasOl ? 2 : 0);
|
|
483
493
|
const stepScore = thresholdScore(stepTotal, [
|
|
484
494
|
[5, 10],
|
|
485
495
|
[2, 7],
|
|
@@ -491,7 +501,7 @@ function auditAnswerability(page) {
|
|
|
491
501
|
"Step-by-Step Content",
|
|
492
502
|
stepScore,
|
|
493
503
|
10,
|
|
494
|
-
`${stepCount} step indicators${hasOl ? ", ordered lists found" : ""}`
|
|
504
|
+
`${stepCount} step indicators, ${imperativeVerbCount} instruction verbs${hasOl ? ", ordered lists found" : ""}`
|
|
495
505
|
)
|
|
496
506
|
);
|
|
497
507
|
const questionMatches = text.match(/[^.!?]*\?/g) || [];
|
|
@@ -790,51 +800,99 @@ function auditAuthorityContext(page) {
|
|
|
790
800
|
}
|
|
791
801
|
|
|
792
802
|
// src/modules/audits/support/robots.ts
|
|
803
|
+
function parseRobotGroups(robotsTxt) {
|
|
804
|
+
const groups = [];
|
|
805
|
+
let current = null;
|
|
806
|
+
for (const raw of robotsTxt.split("\n")) {
|
|
807
|
+
const line = raw.split("#")[0].trim();
|
|
808
|
+
if (!line) {
|
|
809
|
+
current = null;
|
|
810
|
+
continue;
|
|
811
|
+
}
|
|
812
|
+
const colonAt = line.indexOf(":");
|
|
813
|
+
if (colonAt === -1) continue;
|
|
814
|
+
const field = line.slice(0, colonAt).trim().toLowerCase();
|
|
815
|
+
const value = line.slice(colonAt + 1).trim();
|
|
816
|
+
if (field === "user-agent") {
|
|
817
|
+
if (!current) {
|
|
818
|
+
current = { agents: [], rules: [] };
|
|
819
|
+
groups.push(current);
|
|
820
|
+
}
|
|
821
|
+
current.agents.push(value.toLowerCase());
|
|
822
|
+
} else if (field === "disallow" || field === "allow") {
|
|
823
|
+
if (current) {
|
|
824
|
+
current.rules.push({ type: field, path: value });
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
return groups;
|
|
829
|
+
}
|
|
830
|
+
function matchingRulesForCrawler(groups, crawlerLower) {
|
|
831
|
+
const specific = [];
|
|
832
|
+
const wildcard = [];
|
|
833
|
+
for (const group of groups) {
|
|
834
|
+
if (group.agents.includes(crawlerLower)) specific.push(...group.rules);
|
|
835
|
+
else if (group.agents.includes("*")) wildcard.push(...group.rules);
|
|
836
|
+
}
|
|
837
|
+
return { specific, wildcard };
|
|
838
|
+
}
|
|
839
|
+
function resolvesPathAsBlocked(rules, path) {
|
|
840
|
+
let bestMatchLength = -1;
|
|
841
|
+
let bestMatchIsDisallow = false;
|
|
842
|
+
for (const rule of rules) {
|
|
843
|
+
const rulePath = rule.path;
|
|
844
|
+
if (!rulePath || !path.startsWith(rulePath)) continue;
|
|
845
|
+
if (rulePath.length > bestMatchLength) {
|
|
846
|
+
bestMatchLength = rulePath.length;
|
|
847
|
+
bestMatchIsDisallow = rule.type === "disallow";
|
|
848
|
+
} else if (rulePath.length === bestMatchLength && rule.type === "allow") {
|
|
849
|
+
bestMatchIsDisallow = false;
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
return bestMatchLength >= 0 && bestMatchIsDisallow;
|
|
853
|
+
}
|
|
854
|
+
function findPartialBlocks(rules) {
|
|
855
|
+
return rules.filter((r) => r.type === "disallow" && r.path && r.path !== "/").map((r) => r.path);
|
|
856
|
+
}
|
|
793
857
|
function checkCrawlerAccess(robotsTxt) {
|
|
794
858
|
if (!robotsTxt)
|
|
795
859
|
return { allowed: [], blocked: [], unknown: [...AI_CRAWLERS] };
|
|
796
|
-
const
|
|
860
|
+
const groups = parseRobotGroups(robotsTxt);
|
|
797
861
|
const allowed = [];
|
|
798
862
|
const blocked = [];
|
|
799
863
|
const unknown = [];
|
|
864
|
+
const partiallyBlocked = [];
|
|
800
865
|
for (const crawler of AI_CRAWLERS) {
|
|
801
866
|
const crawlerLower = crawler.toLowerCase();
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
if (lower.startsWith("disallow:")) {
|
|
811
|
-
const path = lower.split(":")[1]?.trim();
|
|
812
|
-
if (path === "/") {
|
|
813
|
-
if (currentAgent === crawlerLower) {
|
|
814
|
-
isBlocked = true;
|
|
815
|
-
found = true;
|
|
816
|
-
} else if (currentAgent === "*" && !found) {
|
|
817
|
-
isBlocked = true;
|
|
818
|
-
}
|
|
819
|
-
}
|
|
820
|
-
} else if (lower.startsWith("allow:")) {
|
|
821
|
-
if (currentAgent === crawlerLower) {
|
|
822
|
-
found = true;
|
|
823
|
-
isBlocked = false;
|
|
824
|
-
}
|
|
825
|
-
}
|
|
826
|
-
}
|
|
867
|
+
const { specific, wildcard } = matchingRulesForCrawler(
|
|
868
|
+
groups,
|
|
869
|
+
crawlerLower
|
|
870
|
+
);
|
|
871
|
+
const applicableRules = specific.length > 0 ? specific : wildcard;
|
|
872
|
+
if (applicableRules.length === 0) {
|
|
873
|
+
unknown.push(crawler);
|
|
874
|
+
continue;
|
|
827
875
|
}
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
else allowed.push(crawler);
|
|
831
|
-
} else if (isBlocked) {
|
|
876
|
+
const isSiteBlocked = resolvesPathAsBlocked(applicableRules, "/");
|
|
877
|
+
if (isSiteBlocked) {
|
|
832
878
|
blocked.push(crawler);
|
|
833
879
|
} else {
|
|
834
|
-
|
|
880
|
+
allowed.push(crawler);
|
|
881
|
+
const pathBlocks = findPartialBlocks(applicableRules);
|
|
882
|
+
for (const path of pathBlocks) {
|
|
883
|
+
const entry = `${crawler}: ${path}`;
|
|
884
|
+
if (!partiallyBlocked.includes(entry)) {
|
|
885
|
+
partiallyBlocked.push(entry);
|
|
886
|
+
}
|
|
887
|
+
}
|
|
835
888
|
}
|
|
836
889
|
}
|
|
837
|
-
return {
|
|
890
|
+
return {
|
|
891
|
+
allowed,
|
|
892
|
+
blocked,
|
|
893
|
+
unknown,
|
|
894
|
+
...partiallyBlocked.length > 0 && { partiallyBlocked }
|
|
895
|
+
};
|
|
838
896
|
}
|
|
839
897
|
|
|
840
898
|
// src/modules/audits/categories/content-extractability.ts
|
|
@@ -851,7 +909,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
|
|
|
851
909
|
)
|
|
852
910
|
);
|
|
853
911
|
const extractRatio = page.stats.rawByteLength > 0 ? page.stats.cleanTextLength / page.stats.rawByteLength : 0;
|
|
854
|
-
const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio
|
|
912
|
+
const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio > 0.15 ? 10 : extractRatio >= 0.01 ? 8 : 2;
|
|
855
913
|
factors.push(
|
|
856
914
|
makeFactor(
|
|
857
915
|
"Text Extraction Quality",
|
|
@@ -876,7 +934,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
|
|
|
876
934
|
)
|
|
877
935
|
);
|
|
878
936
|
const wc = page.stats.wordCount;
|
|
879
|
-
const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc
|
|
937
|
+
const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc > 3e3 ? 10 : wc >= 100 ? 8 : 2;
|
|
880
938
|
factors.push(makeFactor("Word Count Adequacy", wcScore, 12, `${wc} words`));
|
|
881
939
|
if (domainSignals) {
|
|
882
940
|
const access2 = checkCrawlerAccess(domainSignals.robotsTxt);
|
|
@@ -891,6 +949,10 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
|
|
|
891
949
|
)
|
|
892
950
|
);
|
|
893
951
|
rawData.crawlerAccess = access2;
|
|
952
|
+
rawData.llmsTxt = {
|
|
953
|
+
llmsTxtExists: domainSignals.llmsTxtExists,
|
|
954
|
+
llmsFullTxtExists: domainSignals.llmsFullTxtExists
|
|
955
|
+
};
|
|
894
956
|
const hasLlms = domainSignals.llmsTxtExists;
|
|
895
957
|
const hasLlmsFull = domainSignals.llmsFullTxtExists;
|
|
896
958
|
const llmsScore = hasLlms && hasLlmsFull ? 6 : hasLlms || hasLlmsFull ? 4 : 0;
|
|
@@ -1110,6 +1172,7 @@ function auditGroundingSignals(page) {
|
|
|
1110
1172
|
const $ = page.$;
|
|
1111
1173
|
const text = page.cleanText;
|
|
1112
1174
|
const factors = [];
|
|
1175
|
+
const { numberCount = 0 } = extractEntities(text);
|
|
1113
1176
|
const externalLinks = page.externalLinks;
|
|
1114
1177
|
const extScore = thresholdScore(externalLinks.length, [
|
|
1115
1178
|
[6, 13],
|
|
@@ -1143,7 +1206,8 @@ function auditGroundingSignals(page) {
|
|
|
1143
1206
|
)
|
|
1144
1207
|
);
|
|
1145
1208
|
const numericCount = countPatternMatches(text, NUMERIC_CLAIM_PATTERNS);
|
|
1146
|
-
const
|
|
1209
|
+
const totalNumericSignals = numericCount + numberCount;
|
|
1210
|
+
const numScore = thresholdScore(totalNumericSignals, [
|
|
1147
1211
|
[9, 13],
|
|
1148
1212
|
[4, 9],
|
|
1149
1213
|
[1, 5],
|
|
@@ -1154,7 +1218,7 @@ function auditGroundingSignals(page) {
|
|
|
1154
1218
|
"Numeric Claims",
|
|
1155
1219
|
numScore,
|
|
1156
1220
|
13,
|
|
1157
|
-
`${numericCount} statistical references`
|
|
1221
|
+
`${numericCount} statistical references, ${numberCount} numeric values`
|
|
1158
1222
|
)
|
|
1159
1223
|
);
|
|
1160
1224
|
const attrCount = countPatternMatches(text, ATTRIBUTION_PATTERNS);
|
|
@@ -1359,7 +1423,11 @@ function removeBoilerplate($) {
|
|
|
1359
1423
|
function normalizeWhitespace(text) {
|
|
1360
1424
|
return text.replace(/\s+/g, " ").trim();
|
|
1361
1425
|
}
|
|
1426
|
+
var BLOCK_ELEMENTS = "p,div,td,th,li,h1,h2,h3,h4,h5,h6,dt,dd,br,blockquote,section,article";
|
|
1362
1427
|
function extractCleanText($) {
|
|
1428
|
+
$(BLOCK_ELEMENTS).each((_, el) => {
|
|
1429
|
+
$(el).append(" ");
|
|
1430
|
+
});
|
|
1363
1431
|
return normalizeWhitespace($("body").text());
|
|
1364
1432
|
}
|
|
1365
1433
|
|
|
@@ -1368,7 +1436,9 @@ function extractPage(html, url) {
|
|
|
1368
1436
|
const $ = cheerio.load(html);
|
|
1369
1437
|
const title = $("title").text().trim() || $('meta[property="og:title"]').attr("content")?.trim() || "";
|
|
1370
1438
|
const metaDescription = $('meta[name="description"]').attr("content")?.trim() || $('meta[property="og:description"]').attr("content")?.trim() || "";
|
|
1371
|
-
const
|
|
1439
|
+
const $raw = cheerio.load(html);
|
|
1440
|
+
$raw("script, style, noscript").remove();
|
|
1441
|
+
const rawText = $raw("body").text().replace(/\s+/g, " ").trim();
|
|
1372
1442
|
const rawByteLength = Buffer.byteLength(html, "utf-8");
|
|
1373
1443
|
const h1Count = $("h1").length;
|
|
1374
1444
|
const h2Count = $("h2").length;
|
|
@@ -1380,9 +1450,22 @@ function extractPage(html, url) {
|
|
|
1380
1450
|
const listItemCount = $("li").length;
|
|
1381
1451
|
const tableCount = $("table").length;
|
|
1382
1452
|
const paragraphCount = $("p").length;
|
|
1453
|
+
const GENERIC_ALT_VALUES = /* @__PURE__ */ new Set([
|
|
1454
|
+
"image",
|
|
1455
|
+
"photo",
|
|
1456
|
+
"logo",
|
|
1457
|
+
"icon",
|
|
1458
|
+
"picture",
|
|
1459
|
+
"img",
|
|
1460
|
+
"graphic",
|
|
1461
|
+
"thumbnail"
|
|
1462
|
+
]);
|
|
1383
1463
|
let imagesWithAlt = 0;
|
|
1384
1464
|
$("img").each((_, el) => {
|
|
1385
|
-
|
|
1465
|
+
const alt = $(el).attr("alt")?.trim() ?? "";
|
|
1466
|
+
const words = alt.split(/\s+/).filter((w) => w.length > 0);
|
|
1467
|
+
const isMeaningful = words.length > 1 && alt.length < 200 && !GENERIC_ALT_VALUES.has(alt.toLowerCase());
|
|
1468
|
+
if (isMeaningful) imagesWithAlt++;
|
|
1386
1469
|
});
|
|
1387
1470
|
const pageDomain = getDomain(url);
|
|
1388
1471
|
const externalLinks = [];
|
|
@@ -1516,9 +1599,19 @@ var RECOMMENDATION_BUILDERS = {
|
|
|
1516
1599
|
}
|
|
1517
1600
|
return `Your robots.txt is blocking ${blocked}. Blocking these crawlers means your content cannot be discovered or cited by AI engines.`;
|
|
1518
1601
|
},
|
|
1519
|
-
"LLMs.txt Presence":
|
|
1520
|
-
|
|
1521
|
-
|
|
1602
|
+
"LLMs.txt Presence": (rawData) => {
|
|
1603
|
+
const llms = rawData.llmsTxt;
|
|
1604
|
+
if (!llms) {
|
|
1605
|
+
return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
|
|
1606
|
+
}
|
|
1607
|
+
if (llms.llmsTxtExists && !llms.llmsFullTxtExists) {
|
|
1608
|
+
return "You have llms.txt but are missing llms-full.txt. Adding llms-full.txt provides AI systems with a comprehensive version of your site documentation for deeper ingestion.";
|
|
1609
|
+
}
|
|
1610
|
+
if (!llms.llmsTxtExists && llms.llmsFullTxtExists) {
|
|
1611
|
+
return "You have llms-full.txt but are missing llms.txt. Adding llms.txt provides AI systems with a concise structured overview of your site's purpose and key pages.";
|
|
1612
|
+
}
|
|
1613
|
+
return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
|
|
1614
|
+
},
|
|
1522
1615
|
"Image Accessibility": (rawData) => {
|
|
1523
1616
|
const images = rawData.imageAccessibility;
|
|
1524
1617
|
if (!images || images.imageCount === 0) {
|
package/dist/cli.mjs
CHANGED
|
@@ -3,7 +3,7 @@ import { Command } from "commander";
|
|
|
3
3
|
|
|
4
4
|
// src/modules/analyzer/constants.ts
|
|
5
5
|
var DOMAIN_SIGNAL_TIMEOUT_CAP = 5e3;
|
|
6
|
-
var VERSION = true ? "1.
|
|
6
|
+
var VERSION = true ? "1.3.0" : "0.0.0";
|
|
7
7
|
|
|
8
8
|
// src/modules/fetcher/constants.ts
|
|
9
9
|
var MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
|
@@ -342,7 +342,16 @@ function extractEntities(text) {
|
|
|
342
342
|
0,
|
|
343
343
|
15
|
|
344
344
|
);
|
|
345
|
-
|
|
345
|
+
const imperativeVerbCount = doc.verbs().isImperative().length;
|
|
346
|
+
const numberCount = doc.numbers().length;
|
|
347
|
+
return {
|
|
348
|
+
people,
|
|
349
|
+
organizations,
|
|
350
|
+
places,
|
|
351
|
+
topics,
|
|
352
|
+
imperativeVerbCount,
|
|
353
|
+
numberCount
|
|
354
|
+
};
|
|
346
355
|
}
|
|
347
356
|
function computeFleschReadingEase(text) {
|
|
348
357
|
const words = text.split(/\s+/).filter((w) => w.length > 0);
|
|
@@ -411,6 +420,7 @@ function auditAnswerability(page) {
|
|
|
411
420
|
const text = page.cleanText;
|
|
412
421
|
const $ = page.$;
|
|
413
422
|
const factors = [];
|
|
423
|
+
const { imperativeVerbCount = 0 } = extractEntities(text);
|
|
414
424
|
const defCount = countPatternMatches(text, DEFINITION_PATTERNS);
|
|
415
425
|
const defScore = thresholdScore(defCount, [
|
|
416
426
|
[6, 10],
|
|
@@ -455,7 +465,7 @@ function auditAnswerability(page) {
|
|
|
455
465
|
);
|
|
456
466
|
const stepCount = countPatternMatches(text, STEP_PATTERNS);
|
|
457
467
|
const hasOl = $("ol").length > 0;
|
|
458
|
-
const stepTotal = stepCount + (hasOl ? 2 : 0);
|
|
468
|
+
const stepTotal = stepCount + imperativeVerbCount + (hasOl ? 2 : 0);
|
|
459
469
|
const stepScore = thresholdScore(stepTotal, [
|
|
460
470
|
[5, 10],
|
|
461
471
|
[2, 7],
|
|
@@ -467,7 +477,7 @@ function auditAnswerability(page) {
|
|
|
467
477
|
"Step-by-Step Content",
|
|
468
478
|
stepScore,
|
|
469
479
|
10,
|
|
470
|
-
`${stepCount} step indicators${hasOl ? ", ordered lists found" : ""}`
|
|
480
|
+
`${stepCount} step indicators, ${imperativeVerbCount} instruction verbs${hasOl ? ", ordered lists found" : ""}`
|
|
471
481
|
)
|
|
472
482
|
);
|
|
473
483
|
const questionMatches = text.match(/[^.!?]*\?/g) || [];
|
|
@@ -766,51 +776,99 @@ function auditAuthorityContext(page) {
|
|
|
766
776
|
}
|
|
767
777
|
|
|
768
778
|
// src/modules/audits/support/robots.ts
|
|
779
|
+
function parseRobotGroups(robotsTxt) {
|
|
780
|
+
const groups = [];
|
|
781
|
+
let current = null;
|
|
782
|
+
for (const raw of robotsTxt.split("\n")) {
|
|
783
|
+
const line = raw.split("#")[0].trim();
|
|
784
|
+
if (!line) {
|
|
785
|
+
current = null;
|
|
786
|
+
continue;
|
|
787
|
+
}
|
|
788
|
+
const colonAt = line.indexOf(":");
|
|
789
|
+
if (colonAt === -1) continue;
|
|
790
|
+
const field = line.slice(0, colonAt).trim().toLowerCase();
|
|
791
|
+
const value = line.slice(colonAt + 1).trim();
|
|
792
|
+
if (field === "user-agent") {
|
|
793
|
+
if (!current) {
|
|
794
|
+
current = { agents: [], rules: [] };
|
|
795
|
+
groups.push(current);
|
|
796
|
+
}
|
|
797
|
+
current.agents.push(value.toLowerCase());
|
|
798
|
+
} else if (field === "disallow" || field === "allow") {
|
|
799
|
+
if (current) {
|
|
800
|
+
current.rules.push({ type: field, path: value });
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
return groups;
|
|
805
|
+
}
|
|
806
|
+
function matchingRulesForCrawler(groups, crawlerLower) {
|
|
807
|
+
const specific = [];
|
|
808
|
+
const wildcard = [];
|
|
809
|
+
for (const group of groups) {
|
|
810
|
+
if (group.agents.includes(crawlerLower)) specific.push(...group.rules);
|
|
811
|
+
else if (group.agents.includes("*")) wildcard.push(...group.rules);
|
|
812
|
+
}
|
|
813
|
+
return { specific, wildcard };
|
|
814
|
+
}
|
|
815
|
+
function resolvesPathAsBlocked(rules, path) {
|
|
816
|
+
let bestMatchLength = -1;
|
|
817
|
+
let bestMatchIsDisallow = false;
|
|
818
|
+
for (const rule of rules) {
|
|
819
|
+
const rulePath = rule.path;
|
|
820
|
+
if (!rulePath || !path.startsWith(rulePath)) continue;
|
|
821
|
+
if (rulePath.length > bestMatchLength) {
|
|
822
|
+
bestMatchLength = rulePath.length;
|
|
823
|
+
bestMatchIsDisallow = rule.type === "disallow";
|
|
824
|
+
} else if (rulePath.length === bestMatchLength && rule.type === "allow") {
|
|
825
|
+
bestMatchIsDisallow = false;
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
return bestMatchLength >= 0 && bestMatchIsDisallow;
|
|
829
|
+
}
|
|
830
|
+
function findPartialBlocks(rules) {
|
|
831
|
+
return rules.filter((r) => r.type === "disallow" && r.path && r.path !== "/").map((r) => r.path);
|
|
832
|
+
}
|
|
769
833
|
function checkCrawlerAccess(robotsTxt) {
|
|
770
834
|
if (!robotsTxt)
|
|
771
835
|
return { allowed: [], blocked: [], unknown: [...AI_CRAWLERS] };
|
|
772
|
-
const
|
|
836
|
+
const groups = parseRobotGroups(robotsTxt);
|
|
773
837
|
const allowed = [];
|
|
774
838
|
const blocked = [];
|
|
775
839
|
const unknown = [];
|
|
840
|
+
const partiallyBlocked = [];
|
|
776
841
|
for (const crawler of AI_CRAWLERS) {
|
|
777
842
|
const crawlerLower = crawler.toLowerCase();
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
if (lower.startsWith("disallow:")) {
|
|
787
|
-
const path = lower.split(":")[1]?.trim();
|
|
788
|
-
if (path === "/") {
|
|
789
|
-
if (currentAgent === crawlerLower) {
|
|
790
|
-
isBlocked = true;
|
|
791
|
-
found = true;
|
|
792
|
-
} else if (currentAgent === "*" && !found) {
|
|
793
|
-
isBlocked = true;
|
|
794
|
-
}
|
|
795
|
-
}
|
|
796
|
-
} else if (lower.startsWith("allow:")) {
|
|
797
|
-
if (currentAgent === crawlerLower) {
|
|
798
|
-
found = true;
|
|
799
|
-
isBlocked = false;
|
|
800
|
-
}
|
|
801
|
-
}
|
|
802
|
-
}
|
|
843
|
+
const { specific, wildcard } = matchingRulesForCrawler(
|
|
844
|
+
groups,
|
|
845
|
+
crawlerLower
|
|
846
|
+
);
|
|
847
|
+
const applicableRules = specific.length > 0 ? specific : wildcard;
|
|
848
|
+
if (applicableRules.length === 0) {
|
|
849
|
+
unknown.push(crawler);
|
|
850
|
+
continue;
|
|
803
851
|
}
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
else allowed.push(crawler);
|
|
807
|
-
} else if (isBlocked) {
|
|
852
|
+
const isSiteBlocked = resolvesPathAsBlocked(applicableRules, "/");
|
|
853
|
+
if (isSiteBlocked) {
|
|
808
854
|
blocked.push(crawler);
|
|
809
855
|
} else {
|
|
810
|
-
|
|
856
|
+
allowed.push(crawler);
|
|
857
|
+
const pathBlocks = findPartialBlocks(applicableRules);
|
|
858
|
+
for (const path of pathBlocks) {
|
|
859
|
+
const entry = `${crawler}: ${path}`;
|
|
860
|
+
if (!partiallyBlocked.includes(entry)) {
|
|
861
|
+
partiallyBlocked.push(entry);
|
|
862
|
+
}
|
|
863
|
+
}
|
|
811
864
|
}
|
|
812
865
|
}
|
|
813
|
-
return {
|
|
866
|
+
return {
|
|
867
|
+
allowed,
|
|
868
|
+
blocked,
|
|
869
|
+
unknown,
|
|
870
|
+
...partiallyBlocked.length > 0 && { partiallyBlocked }
|
|
871
|
+
};
|
|
814
872
|
}
|
|
815
873
|
|
|
816
874
|
// src/modules/audits/categories/content-extractability.ts
|
|
@@ -827,7 +885,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
|
|
|
827
885
|
)
|
|
828
886
|
);
|
|
829
887
|
const extractRatio = page.stats.rawByteLength > 0 ? page.stats.cleanTextLength / page.stats.rawByteLength : 0;
|
|
830
|
-
const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio
|
|
888
|
+
const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio > 0.15 ? 10 : extractRatio >= 0.01 ? 8 : 2;
|
|
831
889
|
factors.push(
|
|
832
890
|
makeFactor(
|
|
833
891
|
"Text Extraction Quality",
|
|
@@ -852,7 +910,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
|
|
|
852
910
|
)
|
|
853
911
|
);
|
|
854
912
|
const wc = page.stats.wordCount;
|
|
855
|
-
const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc
|
|
913
|
+
const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc > 3e3 ? 10 : wc >= 100 ? 8 : 2;
|
|
856
914
|
factors.push(makeFactor("Word Count Adequacy", wcScore, 12, `${wc} words`));
|
|
857
915
|
if (domainSignals) {
|
|
858
916
|
const access2 = checkCrawlerAccess(domainSignals.robotsTxt);
|
|
@@ -867,6 +925,10 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
|
|
|
867
925
|
)
|
|
868
926
|
);
|
|
869
927
|
rawData.crawlerAccess = access2;
|
|
928
|
+
rawData.llmsTxt = {
|
|
929
|
+
llmsTxtExists: domainSignals.llmsTxtExists,
|
|
930
|
+
llmsFullTxtExists: domainSignals.llmsFullTxtExists
|
|
931
|
+
};
|
|
870
932
|
const hasLlms = domainSignals.llmsTxtExists;
|
|
871
933
|
const hasLlmsFull = domainSignals.llmsFullTxtExists;
|
|
872
934
|
const llmsScore = hasLlms && hasLlmsFull ? 6 : hasLlms || hasLlmsFull ? 4 : 0;
|
|
@@ -1086,6 +1148,7 @@ function auditGroundingSignals(page) {
|
|
|
1086
1148
|
const $ = page.$;
|
|
1087
1149
|
const text = page.cleanText;
|
|
1088
1150
|
const factors = [];
|
|
1151
|
+
const { numberCount = 0 } = extractEntities(text);
|
|
1089
1152
|
const externalLinks = page.externalLinks;
|
|
1090
1153
|
const extScore = thresholdScore(externalLinks.length, [
|
|
1091
1154
|
[6, 13],
|
|
@@ -1119,7 +1182,8 @@ function auditGroundingSignals(page) {
|
|
|
1119
1182
|
)
|
|
1120
1183
|
);
|
|
1121
1184
|
const numericCount = countPatternMatches(text, NUMERIC_CLAIM_PATTERNS);
|
|
1122
|
-
const
|
|
1185
|
+
const totalNumericSignals = numericCount + numberCount;
|
|
1186
|
+
const numScore = thresholdScore(totalNumericSignals, [
|
|
1123
1187
|
[9, 13],
|
|
1124
1188
|
[4, 9],
|
|
1125
1189
|
[1, 5],
|
|
@@ -1130,7 +1194,7 @@ function auditGroundingSignals(page) {
|
|
|
1130
1194
|
"Numeric Claims",
|
|
1131
1195
|
numScore,
|
|
1132
1196
|
13,
|
|
1133
|
-
`${numericCount} statistical references`
|
|
1197
|
+
`${numericCount} statistical references, ${numberCount} numeric values`
|
|
1134
1198
|
)
|
|
1135
1199
|
);
|
|
1136
1200
|
const attrCount = countPatternMatches(text, ATTRIBUTION_PATTERNS);
|
|
@@ -1335,7 +1399,11 @@ function removeBoilerplate($) {
|
|
|
1335
1399
|
function normalizeWhitespace(text) {
|
|
1336
1400
|
return text.replace(/\s+/g, " ").trim();
|
|
1337
1401
|
}
|
|
1402
|
+
var BLOCK_ELEMENTS = "p,div,td,th,li,h1,h2,h3,h4,h5,h6,dt,dd,br,blockquote,section,article";
|
|
1338
1403
|
function extractCleanText($) {
|
|
1404
|
+
$(BLOCK_ELEMENTS).each((_, el) => {
|
|
1405
|
+
$(el).append(" ");
|
|
1406
|
+
});
|
|
1339
1407
|
return normalizeWhitespace($("body").text());
|
|
1340
1408
|
}
|
|
1341
1409
|
|
|
@@ -1344,7 +1412,9 @@ function extractPage(html, url) {
|
|
|
1344
1412
|
const $ = cheerio.load(html);
|
|
1345
1413
|
const title = $("title").text().trim() || $('meta[property="og:title"]').attr("content")?.trim() || "";
|
|
1346
1414
|
const metaDescription = $('meta[name="description"]').attr("content")?.trim() || $('meta[property="og:description"]').attr("content")?.trim() || "";
|
|
1347
|
-
const
|
|
1415
|
+
const $raw = cheerio.load(html);
|
|
1416
|
+
$raw("script, style, noscript").remove();
|
|
1417
|
+
const rawText = $raw("body").text().replace(/\s+/g, " ").trim();
|
|
1348
1418
|
const rawByteLength = Buffer.byteLength(html, "utf-8");
|
|
1349
1419
|
const h1Count = $("h1").length;
|
|
1350
1420
|
const h2Count = $("h2").length;
|
|
@@ -1356,9 +1426,22 @@ function extractPage(html, url) {
|
|
|
1356
1426
|
const listItemCount = $("li").length;
|
|
1357
1427
|
const tableCount = $("table").length;
|
|
1358
1428
|
const paragraphCount = $("p").length;
|
|
1429
|
+
const GENERIC_ALT_VALUES = /* @__PURE__ */ new Set([
|
|
1430
|
+
"image",
|
|
1431
|
+
"photo",
|
|
1432
|
+
"logo",
|
|
1433
|
+
"icon",
|
|
1434
|
+
"picture",
|
|
1435
|
+
"img",
|
|
1436
|
+
"graphic",
|
|
1437
|
+
"thumbnail"
|
|
1438
|
+
]);
|
|
1359
1439
|
let imagesWithAlt = 0;
|
|
1360
1440
|
$("img").each((_, el) => {
|
|
1361
|
-
|
|
1441
|
+
const alt = $(el).attr("alt")?.trim() ?? "";
|
|
1442
|
+
const words = alt.split(/\s+/).filter((w) => w.length > 0);
|
|
1443
|
+
const isMeaningful = words.length > 1 && alt.length < 200 && !GENERIC_ALT_VALUES.has(alt.toLowerCase());
|
|
1444
|
+
if (isMeaningful) imagesWithAlt++;
|
|
1362
1445
|
});
|
|
1363
1446
|
const pageDomain = getDomain(url);
|
|
1364
1447
|
const externalLinks = [];
|
|
@@ -1492,9 +1575,19 @@ var RECOMMENDATION_BUILDERS = {
|
|
|
1492
1575
|
}
|
|
1493
1576
|
return `Your robots.txt is blocking ${blocked}. Blocking these crawlers means your content cannot be discovered or cited by AI engines.`;
|
|
1494
1577
|
},
|
|
1495
|
-
"LLMs.txt Presence":
|
|
1496
|
-
|
|
1497
|
-
|
|
1578
|
+
"LLMs.txt Presence": (rawData) => {
|
|
1579
|
+
const llms = rawData.llmsTxt;
|
|
1580
|
+
if (!llms) {
|
|
1581
|
+
return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
|
|
1582
|
+
}
|
|
1583
|
+
if (llms.llmsTxtExists && !llms.llmsFullTxtExists) {
|
|
1584
|
+
return "You have llms.txt but are missing llms-full.txt. Adding llms-full.txt provides AI systems with a comprehensive version of your site documentation for deeper ingestion.";
|
|
1585
|
+
}
|
|
1586
|
+
if (!llms.llmsTxtExists && llms.llmsFullTxtExists) {
|
|
1587
|
+
return "You have llms-full.txt but are missing llms.txt. Adding llms.txt provides AI systems with a concise structured overview of your site's purpose and key pages.";
|
|
1588
|
+
}
|
|
1589
|
+
return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
|
|
1590
|
+
},
|
|
1498
1591
|
"Image Accessibility": (rawData) => {
|
|
1499
1592
|
const images = rawData.imageAccessibility;
|
|
1500
1593
|
if (!images || images.imageCount === 0) {
|
package/dist/index.d.mts
CHANGED
|
@@ -90,6 +90,11 @@ declare const AnalyzerResultSchema: z.ZodObject<{
|
|
|
90
90
|
allowed: z.ZodArray<z.ZodString>;
|
|
91
91
|
blocked: z.ZodArray<z.ZodString>;
|
|
92
92
|
unknown: z.ZodArray<z.ZodString>;
|
|
93
|
+
partiallyBlocked: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
94
|
+
}, z.core.$strip>>;
|
|
95
|
+
llmsTxt: z.ZodOptional<z.ZodObject<{
|
|
96
|
+
llmsTxtExists: z.ZodBoolean;
|
|
97
|
+
llmsFullTxtExists: z.ZodBoolean;
|
|
93
98
|
}, z.core.$strip>>;
|
|
94
99
|
sectionLengths: z.ZodOptional<z.ZodObject<{
|
|
95
100
|
sectionCount: z.ZodNumber;
|
|
@@ -105,6 +110,8 @@ declare const AnalyzerResultSchema: z.ZodObject<{
|
|
|
105
110
|
organizations: z.ZodArray<z.ZodString>;
|
|
106
111
|
places: z.ZodArray<z.ZodString>;
|
|
107
112
|
topics: z.ZodArray<z.ZodString>;
|
|
113
|
+
imperativeVerbCount: z.ZodOptional<z.ZodNumber>;
|
|
114
|
+
numberCount: z.ZodOptional<z.ZodNumber>;
|
|
108
115
|
}, z.core.$strip>>;
|
|
109
116
|
externalLinks: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
110
117
|
url: z.ZodString;
|
|
@@ -266,6 +273,11 @@ declare const AuditResultSchema: z.ZodObject<{
|
|
|
266
273
|
allowed: z.ZodArray<z.ZodString>;
|
|
267
274
|
blocked: z.ZodArray<z.ZodString>;
|
|
268
275
|
unknown: z.ZodArray<z.ZodString>;
|
|
276
|
+
partiallyBlocked: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
277
|
+
}, z.core.$strip>>;
|
|
278
|
+
llmsTxt: z.ZodOptional<z.ZodObject<{
|
|
279
|
+
llmsTxtExists: z.ZodBoolean;
|
|
280
|
+
llmsFullTxtExists: z.ZodBoolean;
|
|
269
281
|
}, z.core.$strip>>;
|
|
270
282
|
sectionLengths: z.ZodOptional<z.ZodObject<{
|
|
271
283
|
sectionCount: z.ZodNumber;
|
|
@@ -281,6 +293,8 @@ declare const AuditResultSchema: z.ZodObject<{
|
|
|
281
293
|
organizations: z.ZodArray<z.ZodString>;
|
|
282
294
|
places: z.ZodArray<z.ZodString>;
|
|
283
295
|
topics: z.ZodArray<z.ZodString>;
|
|
296
|
+
imperativeVerbCount: z.ZodOptional<z.ZodNumber>;
|
|
297
|
+
numberCount: z.ZodOptional<z.ZodNumber>;
|
|
284
298
|
}, z.core.$strip>>;
|
|
285
299
|
externalLinks: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
286
300
|
url: z.ZodString;
|
package/dist/index.d.ts
CHANGED
|
@@ -90,6 +90,11 @@ declare const AnalyzerResultSchema: z.ZodObject<{
|
|
|
90
90
|
allowed: z.ZodArray<z.ZodString>;
|
|
91
91
|
blocked: z.ZodArray<z.ZodString>;
|
|
92
92
|
unknown: z.ZodArray<z.ZodString>;
|
|
93
|
+
partiallyBlocked: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
94
|
+
}, z.core.$strip>>;
|
|
95
|
+
llmsTxt: z.ZodOptional<z.ZodObject<{
|
|
96
|
+
llmsTxtExists: z.ZodBoolean;
|
|
97
|
+
llmsFullTxtExists: z.ZodBoolean;
|
|
93
98
|
}, z.core.$strip>>;
|
|
94
99
|
sectionLengths: z.ZodOptional<z.ZodObject<{
|
|
95
100
|
sectionCount: z.ZodNumber;
|
|
@@ -105,6 +110,8 @@ declare const AnalyzerResultSchema: z.ZodObject<{
|
|
|
105
110
|
organizations: z.ZodArray<z.ZodString>;
|
|
106
111
|
places: z.ZodArray<z.ZodString>;
|
|
107
112
|
topics: z.ZodArray<z.ZodString>;
|
|
113
|
+
imperativeVerbCount: z.ZodOptional<z.ZodNumber>;
|
|
114
|
+
numberCount: z.ZodOptional<z.ZodNumber>;
|
|
108
115
|
}, z.core.$strip>>;
|
|
109
116
|
externalLinks: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
110
117
|
url: z.ZodString;
|
|
@@ -266,6 +273,11 @@ declare const AuditResultSchema: z.ZodObject<{
|
|
|
266
273
|
allowed: z.ZodArray<z.ZodString>;
|
|
267
274
|
blocked: z.ZodArray<z.ZodString>;
|
|
268
275
|
unknown: z.ZodArray<z.ZodString>;
|
|
276
|
+
partiallyBlocked: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
277
|
+
}, z.core.$strip>>;
|
|
278
|
+
llmsTxt: z.ZodOptional<z.ZodObject<{
|
|
279
|
+
llmsTxtExists: z.ZodBoolean;
|
|
280
|
+
llmsFullTxtExists: z.ZodBoolean;
|
|
269
281
|
}, z.core.$strip>>;
|
|
270
282
|
sectionLengths: z.ZodOptional<z.ZodObject<{
|
|
271
283
|
sectionCount: z.ZodNumber;
|
|
@@ -281,6 +293,8 @@ declare const AuditResultSchema: z.ZodObject<{
|
|
|
281
293
|
organizations: z.ZodArray<z.ZodString>;
|
|
282
294
|
places: z.ZodArray<z.ZodString>;
|
|
283
295
|
topics: z.ZodArray<z.ZodString>;
|
|
296
|
+
imperativeVerbCount: z.ZodOptional<z.ZodNumber>;
|
|
297
|
+
numberCount: z.ZodOptional<z.ZodNumber>;
|
|
284
298
|
}, z.core.$strip>>;
|
|
285
299
|
externalLinks: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
286
300
|
url: z.ZodString;
|
package/dist/index.js
CHANGED
|
@@ -364,7 +364,16 @@ function extractEntities(text) {
|
|
|
364
364
|
0,
|
|
365
365
|
15
|
|
366
366
|
);
|
|
367
|
-
|
|
367
|
+
const imperativeVerbCount = doc.verbs().isImperative().length;
|
|
368
|
+
const numberCount = doc.numbers().length;
|
|
369
|
+
return {
|
|
370
|
+
people,
|
|
371
|
+
organizations,
|
|
372
|
+
places,
|
|
373
|
+
topics,
|
|
374
|
+
imperativeVerbCount,
|
|
375
|
+
numberCount
|
|
376
|
+
};
|
|
368
377
|
}
|
|
369
378
|
function computeFleschReadingEase(text) {
|
|
370
379
|
const words = text.split(/\s+/).filter((w) => w.length > 0);
|
|
@@ -433,6 +442,7 @@ function auditAnswerability(page) {
|
|
|
433
442
|
const text = page.cleanText;
|
|
434
443
|
const $ = page.$;
|
|
435
444
|
const factors = [];
|
|
445
|
+
const { imperativeVerbCount = 0 } = extractEntities(text);
|
|
436
446
|
const defCount = countPatternMatches(text, DEFINITION_PATTERNS);
|
|
437
447
|
const defScore = thresholdScore(defCount, [
|
|
438
448
|
[6, 10],
|
|
@@ -477,7 +487,7 @@ function auditAnswerability(page) {
|
|
|
477
487
|
);
|
|
478
488
|
const stepCount = countPatternMatches(text, STEP_PATTERNS);
|
|
479
489
|
const hasOl = $("ol").length > 0;
|
|
480
|
-
const stepTotal = stepCount + (hasOl ? 2 : 0);
|
|
490
|
+
const stepTotal = stepCount + imperativeVerbCount + (hasOl ? 2 : 0);
|
|
481
491
|
const stepScore = thresholdScore(stepTotal, [
|
|
482
492
|
[5, 10],
|
|
483
493
|
[2, 7],
|
|
@@ -489,7 +499,7 @@ function auditAnswerability(page) {
|
|
|
489
499
|
"Step-by-Step Content",
|
|
490
500
|
stepScore,
|
|
491
501
|
10,
|
|
492
|
-
`${stepCount} step indicators${hasOl ? ", ordered lists found" : ""}`
|
|
502
|
+
`${stepCount} step indicators, ${imperativeVerbCount} instruction verbs${hasOl ? ", ordered lists found" : ""}`
|
|
493
503
|
)
|
|
494
504
|
);
|
|
495
505
|
const questionMatches = text.match(/[^.!?]*\?/g) || [];
|
|
@@ -788,51 +798,99 @@ function auditAuthorityContext(page) {
|
|
|
788
798
|
}
|
|
789
799
|
|
|
790
800
|
// src/modules/audits/support/robots.ts
|
|
801
|
+
function parseRobotGroups(robotsTxt) {
|
|
802
|
+
const groups = [];
|
|
803
|
+
let current = null;
|
|
804
|
+
for (const raw of robotsTxt.split("\n")) {
|
|
805
|
+
const line = raw.split("#")[0].trim();
|
|
806
|
+
if (!line) {
|
|
807
|
+
current = null;
|
|
808
|
+
continue;
|
|
809
|
+
}
|
|
810
|
+
const colonAt = line.indexOf(":");
|
|
811
|
+
if (colonAt === -1) continue;
|
|
812
|
+
const field = line.slice(0, colonAt).trim().toLowerCase();
|
|
813
|
+
const value = line.slice(colonAt + 1).trim();
|
|
814
|
+
if (field === "user-agent") {
|
|
815
|
+
if (!current) {
|
|
816
|
+
current = { agents: [], rules: [] };
|
|
817
|
+
groups.push(current);
|
|
818
|
+
}
|
|
819
|
+
current.agents.push(value.toLowerCase());
|
|
820
|
+
} else if (field === "disallow" || field === "allow") {
|
|
821
|
+
if (current) {
|
|
822
|
+
current.rules.push({ type: field, path: value });
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
return groups;
|
|
827
|
+
}
|
|
828
|
+
function matchingRulesForCrawler(groups, crawlerLower) {
|
|
829
|
+
const specific = [];
|
|
830
|
+
const wildcard = [];
|
|
831
|
+
for (const group of groups) {
|
|
832
|
+
if (group.agents.includes(crawlerLower)) specific.push(...group.rules);
|
|
833
|
+
else if (group.agents.includes("*")) wildcard.push(...group.rules);
|
|
834
|
+
}
|
|
835
|
+
return { specific, wildcard };
|
|
836
|
+
}
|
|
837
|
+
function resolvesPathAsBlocked(rules, path) {
|
|
838
|
+
let bestMatchLength = -1;
|
|
839
|
+
let bestMatchIsDisallow = false;
|
|
840
|
+
for (const rule of rules) {
|
|
841
|
+
const rulePath = rule.path;
|
|
842
|
+
if (!rulePath || !path.startsWith(rulePath)) continue;
|
|
843
|
+
if (rulePath.length > bestMatchLength) {
|
|
844
|
+
bestMatchLength = rulePath.length;
|
|
845
|
+
bestMatchIsDisallow = rule.type === "disallow";
|
|
846
|
+
} else if (rulePath.length === bestMatchLength && rule.type === "allow") {
|
|
847
|
+
bestMatchIsDisallow = false;
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
return bestMatchLength >= 0 && bestMatchIsDisallow;
|
|
851
|
+
}
|
|
852
|
+
function findPartialBlocks(rules) {
|
|
853
|
+
return rules.filter((r) => r.type === "disallow" && r.path && r.path !== "/").map((r) => r.path);
|
|
854
|
+
}
|
|
791
855
|
function checkCrawlerAccess(robotsTxt) {
|
|
792
856
|
if (!robotsTxt)
|
|
793
857
|
return { allowed: [], blocked: [], unknown: [...AI_CRAWLERS] };
|
|
794
|
-
const
|
|
858
|
+
const groups = parseRobotGroups(robotsTxt);
|
|
795
859
|
const allowed = [];
|
|
796
860
|
const blocked = [];
|
|
797
861
|
const unknown = [];
|
|
862
|
+
const partiallyBlocked = [];
|
|
798
863
|
for (const crawler of AI_CRAWLERS) {
|
|
799
864
|
const crawlerLower = crawler.toLowerCase();
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
if (lower.startsWith("disallow:")) {
|
|
809
|
-
const path = lower.split(":")[1]?.trim();
|
|
810
|
-
if (path === "/") {
|
|
811
|
-
if (currentAgent === crawlerLower) {
|
|
812
|
-
isBlocked = true;
|
|
813
|
-
found = true;
|
|
814
|
-
} else if (currentAgent === "*" && !found) {
|
|
815
|
-
isBlocked = true;
|
|
816
|
-
}
|
|
817
|
-
}
|
|
818
|
-
} else if (lower.startsWith("allow:")) {
|
|
819
|
-
if (currentAgent === crawlerLower) {
|
|
820
|
-
found = true;
|
|
821
|
-
isBlocked = false;
|
|
822
|
-
}
|
|
823
|
-
}
|
|
824
|
-
}
|
|
865
|
+
const { specific, wildcard } = matchingRulesForCrawler(
|
|
866
|
+
groups,
|
|
867
|
+
crawlerLower
|
|
868
|
+
);
|
|
869
|
+
const applicableRules = specific.length > 0 ? specific : wildcard;
|
|
870
|
+
if (applicableRules.length === 0) {
|
|
871
|
+
unknown.push(crawler);
|
|
872
|
+
continue;
|
|
825
873
|
}
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
else allowed.push(crawler);
|
|
829
|
-
} else if (isBlocked) {
|
|
874
|
+
const isSiteBlocked = resolvesPathAsBlocked(applicableRules, "/");
|
|
875
|
+
if (isSiteBlocked) {
|
|
830
876
|
blocked.push(crawler);
|
|
831
877
|
} else {
|
|
832
|
-
|
|
878
|
+
allowed.push(crawler);
|
|
879
|
+
const pathBlocks = findPartialBlocks(applicableRules);
|
|
880
|
+
for (const path of pathBlocks) {
|
|
881
|
+
const entry = `${crawler}: ${path}`;
|
|
882
|
+
if (!partiallyBlocked.includes(entry)) {
|
|
883
|
+
partiallyBlocked.push(entry);
|
|
884
|
+
}
|
|
885
|
+
}
|
|
833
886
|
}
|
|
834
887
|
}
|
|
835
|
-
return {
|
|
888
|
+
return {
|
|
889
|
+
allowed,
|
|
890
|
+
blocked,
|
|
891
|
+
unknown,
|
|
892
|
+
...partiallyBlocked.length > 0 && { partiallyBlocked }
|
|
893
|
+
};
|
|
836
894
|
}
|
|
837
895
|
|
|
838
896
|
// src/modules/audits/categories/content-extractability.ts
|
|
@@ -849,7 +907,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
|
|
|
849
907
|
)
|
|
850
908
|
);
|
|
851
909
|
const extractRatio = page.stats.rawByteLength > 0 ? page.stats.cleanTextLength / page.stats.rawByteLength : 0;
|
|
852
|
-
const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio
|
|
910
|
+
const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio > 0.15 ? 10 : extractRatio >= 0.01 ? 8 : 2;
|
|
853
911
|
factors.push(
|
|
854
912
|
makeFactor(
|
|
855
913
|
"Text Extraction Quality",
|
|
@@ -874,7 +932,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
|
|
|
874
932
|
)
|
|
875
933
|
);
|
|
876
934
|
const wc = page.stats.wordCount;
|
|
877
|
-
const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc
|
|
935
|
+
const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc > 3e3 ? 10 : wc >= 100 ? 8 : 2;
|
|
878
936
|
factors.push(makeFactor("Word Count Adequacy", wcScore, 12, `${wc} words`));
|
|
879
937
|
if (domainSignals) {
|
|
880
938
|
const access2 = checkCrawlerAccess(domainSignals.robotsTxt);
|
|
@@ -889,6 +947,10 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
|
|
|
889
947
|
)
|
|
890
948
|
);
|
|
891
949
|
rawData.crawlerAccess = access2;
|
|
950
|
+
rawData.llmsTxt = {
|
|
951
|
+
llmsTxtExists: domainSignals.llmsTxtExists,
|
|
952
|
+
llmsFullTxtExists: domainSignals.llmsFullTxtExists
|
|
953
|
+
};
|
|
892
954
|
const hasLlms = domainSignals.llmsTxtExists;
|
|
893
955
|
const hasLlmsFull = domainSignals.llmsFullTxtExists;
|
|
894
956
|
const llmsScore = hasLlms && hasLlmsFull ? 6 : hasLlms || hasLlmsFull ? 4 : 0;
|
|
@@ -1108,6 +1170,7 @@ function auditGroundingSignals(page) {
|
|
|
1108
1170
|
const $ = page.$;
|
|
1109
1171
|
const text = page.cleanText;
|
|
1110
1172
|
const factors = [];
|
|
1173
|
+
const { numberCount = 0 } = extractEntities(text);
|
|
1111
1174
|
const externalLinks = page.externalLinks;
|
|
1112
1175
|
const extScore = thresholdScore(externalLinks.length, [
|
|
1113
1176
|
[6, 13],
|
|
@@ -1141,7 +1204,8 @@ function auditGroundingSignals(page) {
|
|
|
1141
1204
|
)
|
|
1142
1205
|
);
|
|
1143
1206
|
const numericCount = countPatternMatches(text, NUMERIC_CLAIM_PATTERNS);
|
|
1144
|
-
const
|
|
1207
|
+
const totalNumericSignals = numericCount + numberCount;
|
|
1208
|
+
const numScore = thresholdScore(totalNumericSignals, [
|
|
1145
1209
|
[9, 13],
|
|
1146
1210
|
[4, 9],
|
|
1147
1211
|
[1, 5],
|
|
@@ -1152,7 +1216,7 @@ function auditGroundingSignals(page) {
|
|
|
1152
1216
|
"Numeric Claims",
|
|
1153
1217
|
numScore,
|
|
1154
1218
|
13,
|
|
1155
|
-
`${numericCount} statistical references`
|
|
1219
|
+
`${numericCount} statistical references, ${numberCount} numeric values`
|
|
1156
1220
|
)
|
|
1157
1221
|
);
|
|
1158
1222
|
const attrCount = countPatternMatches(text, ATTRIBUTION_PATTERNS);
|
|
@@ -1357,7 +1421,11 @@ function removeBoilerplate($) {
|
|
|
1357
1421
|
function normalizeWhitespace(text) {
|
|
1358
1422
|
return text.replace(/\s+/g, " ").trim();
|
|
1359
1423
|
}
|
|
1424
|
+
var BLOCK_ELEMENTS = "p,div,td,th,li,h1,h2,h3,h4,h5,h6,dt,dd,br,blockquote,section,article";
|
|
1360
1425
|
function extractCleanText($) {
|
|
1426
|
+
$(BLOCK_ELEMENTS).each((_, el) => {
|
|
1427
|
+
$(el).append(" ");
|
|
1428
|
+
});
|
|
1361
1429
|
return normalizeWhitespace($("body").text());
|
|
1362
1430
|
}
|
|
1363
1431
|
|
|
@@ -1366,7 +1434,9 @@ function extractPage(html, url) {
|
|
|
1366
1434
|
const $ = cheerio.load(html);
|
|
1367
1435
|
const title = $("title").text().trim() || $('meta[property="og:title"]').attr("content")?.trim() || "";
|
|
1368
1436
|
const metaDescription = $('meta[name="description"]').attr("content")?.trim() || $('meta[property="og:description"]').attr("content")?.trim() || "";
|
|
1369
|
-
const
|
|
1437
|
+
const $raw = cheerio.load(html);
|
|
1438
|
+
$raw("script, style, noscript").remove();
|
|
1439
|
+
const rawText = $raw("body").text().replace(/\s+/g, " ").trim();
|
|
1370
1440
|
const rawByteLength = Buffer.byteLength(html, "utf-8");
|
|
1371
1441
|
const h1Count = $("h1").length;
|
|
1372
1442
|
const h2Count = $("h2").length;
|
|
@@ -1378,9 +1448,22 @@ function extractPage(html, url) {
|
|
|
1378
1448
|
const listItemCount = $("li").length;
|
|
1379
1449
|
const tableCount = $("table").length;
|
|
1380
1450
|
const paragraphCount = $("p").length;
|
|
1451
|
+
const GENERIC_ALT_VALUES = /* @__PURE__ */ new Set([
|
|
1452
|
+
"image",
|
|
1453
|
+
"photo",
|
|
1454
|
+
"logo",
|
|
1455
|
+
"icon",
|
|
1456
|
+
"picture",
|
|
1457
|
+
"img",
|
|
1458
|
+
"graphic",
|
|
1459
|
+
"thumbnail"
|
|
1460
|
+
]);
|
|
1381
1461
|
let imagesWithAlt = 0;
|
|
1382
1462
|
$("img").each((_, el) => {
|
|
1383
|
-
|
|
1463
|
+
const alt = $(el).attr("alt")?.trim() ?? "";
|
|
1464
|
+
const words = alt.split(/\s+/).filter((w) => w.length > 0);
|
|
1465
|
+
const isMeaningful = words.length > 1 && alt.length < 200 && !GENERIC_ALT_VALUES.has(alt.toLowerCase());
|
|
1466
|
+
if (isMeaningful) imagesWithAlt++;
|
|
1384
1467
|
});
|
|
1385
1468
|
const pageDomain = getDomain(url);
|
|
1386
1469
|
const externalLinks = [];
|
|
@@ -1440,7 +1523,7 @@ var import_zod = require("zod");
|
|
|
1440
1523
|
|
|
1441
1524
|
// src/modules/analyzer/constants.ts
|
|
1442
1525
|
var DOMAIN_SIGNAL_TIMEOUT_CAP = 5e3;
|
|
1443
|
-
var VERSION = true ? "1.
|
|
1526
|
+
var VERSION = true ? "1.3.0" : "0.0.0";
|
|
1444
1527
|
|
|
1445
1528
|
// src/modules/fetcher/schema.ts
|
|
1446
1529
|
var FetchOptionsSchema = import_zod.z.object({
|
|
@@ -1520,9 +1603,19 @@ var RECOMMENDATION_BUILDERS = {
|
|
|
1520
1603
|
}
|
|
1521
1604
|
return `Your robots.txt is blocking ${blocked}. Blocking these crawlers means your content cannot be discovered or cited by AI engines.`;
|
|
1522
1605
|
},
|
|
1523
|
-
"LLMs.txt Presence":
|
|
1524
|
-
|
|
1525
|
-
|
|
1606
|
+
"LLMs.txt Presence": (rawData) => {
|
|
1607
|
+
const llms = rawData.llmsTxt;
|
|
1608
|
+
if (!llms) {
|
|
1609
|
+
return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
|
|
1610
|
+
}
|
|
1611
|
+
if (llms.llmsTxtExists && !llms.llmsFullTxtExists) {
|
|
1612
|
+
return "You have llms.txt but are missing llms-full.txt. Adding llms-full.txt provides AI systems with a comprehensive version of your site documentation for deeper ingestion.";
|
|
1613
|
+
}
|
|
1614
|
+
if (!llms.llmsTxtExists && llms.llmsFullTxtExists) {
|
|
1615
|
+
return "You have llms-full.txt but are missing llms.txt. Adding llms.txt provides AI systems with a concise structured overview of your site's purpose and key pages.";
|
|
1616
|
+
}
|
|
1617
|
+
return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
|
|
1618
|
+
},
|
|
1526
1619
|
"Image Accessibility": (rawData) => {
|
|
1527
1620
|
const images = rawData.imageAccessibility;
|
|
1528
1621
|
if (!images || images.imageCount === 0) {
|
package/dist/index.mjs
CHANGED
|
@@ -326,7 +326,16 @@ function extractEntities(text) {
|
|
|
326
326
|
0,
|
|
327
327
|
15
|
|
328
328
|
);
|
|
329
|
-
|
|
329
|
+
const imperativeVerbCount = doc.verbs().isImperative().length;
|
|
330
|
+
const numberCount = doc.numbers().length;
|
|
331
|
+
return {
|
|
332
|
+
people,
|
|
333
|
+
organizations,
|
|
334
|
+
places,
|
|
335
|
+
topics,
|
|
336
|
+
imperativeVerbCount,
|
|
337
|
+
numberCount
|
|
338
|
+
};
|
|
330
339
|
}
|
|
331
340
|
function computeFleschReadingEase(text) {
|
|
332
341
|
const words = text.split(/\s+/).filter((w) => w.length > 0);
|
|
@@ -395,6 +404,7 @@ function auditAnswerability(page) {
|
|
|
395
404
|
const text = page.cleanText;
|
|
396
405
|
const $ = page.$;
|
|
397
406
|
const factors = [];
|
|
407
|
+
const { imperativeVerbCount = 0 } = extractEntities(text);
|
|
398
408
|
const defCount = countPatternMatches(text, DEFINITION_PATTERNS);
|
|
399
409
|
const defScore = thresholdScore(defCount, [
|
|
400
410
|
[6, 10],
|
|
@@ -439,7 +449,7 @@ function auditAnswerability(page) {
|
|
|
439
449
|
);
|
|
440
450
|
const stepCount = countPatternMatches(text, STEP_PATTERNS);
|
|
441
451
|
const hasOl = $("ol").length > 0;
|
|
442
|
-
const stepTotal = stepCount + (hasOl ? 2 : 0);
|
|
452
|
+
const stepTotal = stepCount + imperativeVerbCount + (hasOl ? 2 : 0);
|
|
443
453
|
const stepScore = thresholdScore(stepTotal, [
|
|
444
454
|
[5, 10],
|
|
445
455
|
[2, 7],
|
|
@@ -451,7 +461,7 @@ function auditAnswerability(page) {
|
|
|
451
461
|
"Step-by-Step Content",
|
|
452
462
|
stepScore,
|
|
453
463
|
10,
|
|
454
|
-
`${stepCount} step indicators${hasOl ? ", ordered lists found" : ""}`
|
|
464
|
+
`${stepCount} step indicators, ${imperativeVerbCount} instruction verbs${hasOl ? ", ordered lists found" : ""}`
|
|
455
465
|
)
|
|
456
466
|
);
|
|
457
467
|
const questionMatches = text.match(/[^.!?]*\?/g) || [];
|
|
@@ -750,51 +760,99 @@ function auditAuthorityContext(page) {
|
|
|
750
760
|
}
|
|
751
761
|
|
|
752
762
|
// src/modules/audits/support/robots.ts
|
|
763
|
+
function parseRobotGroups(robotsTxt) {
|
|
764
|
+
const groups = [];
|
|
765
|
+
let current = null;
|
|
766
|
+
for (const raw of robotsTxt.split("\n")) {
|
|
767
|
+
const line = raw.split("#")[0].trim();
|
|
768
|
+
if (!line) {
|
|
769
|
+
current = null;
|
|
770
|
+
continue;
|
|
771
|
+
}
|
|
772
|
+
const colonAt = line.indexOf(":");
|
|
773
|
+
if (colonAt === -1) continue;
|
|
774
|
+
const field = line.slice(0, colonAt).trim().toLowerCase();
|
|
775
|
+
const value = line.slice(colonAt + 1).trim();
|
|
776
|
+
if (field === "user-agent") {
|
|
777
|
+
if (!current) {
|
|
778
|
+
current = { agents: [], rules: [] };
|
|
779
|
+
groups.push(current);
|
|
780
|
+
}
|
|
781
|
+
current.agents.push(value.toLowerCase());
|
|
782
|
+
} else if (field === "disallow" || field === "allow") {
|
|
783
|
+
if (current) {
|
|
784
|
+
current.rules.push({ type: field, path: value });
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
return groups;
|
|
789
|
+
}
|
|
790
|
+
function matchingRulesForCrawler(groups, crawlerLower) {
|
|
791
|
+
const specific = [];
|
|
792
|
+
const wildcard = [];
|
|
793
|
+
for (const group of groups) {
|
|
794
|
+
if (group.agents.includes(crawlerLower)) specific.push(...group.rules);
|
|
795
|
+
else if (group.agents.includes("*")) wildcard.push(...group.rules);
|
|
796
|
+
}
|
|
797
|
+
return { specific, wildcard };
|
|
798
|
+
}
|
|
799
|
+
function resolvesPathAsBlocked(rules, path) {
|
|
800
|
+
let bestMatchLength = -1;
|
|
801
|
+
let bestMatchIsDisallow = false;
|
|
802
|
+
for (const rule of rules) {
|
|
803
|
+
const rulePath = rule.path;
|
|
804
|
+
if (!rulePath || !path.startsWith(rulePath)) continue;
|
|
805
|
+
if (rulePath.length > bestMatchLength) {
|
|
806
|
+
bestMatchLength = rulePath.length;
|
|
807
|
+
bestMatchIsDisallow = rule.type === "disallow";
|
|
808
|
+
} else if (rulePath.length === bestMatchLength && rule.type === "allow") {
|
|
809
|
+
bestMatchIsDisallow = false;
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
return bestMatchLength >= 0 && bestMatchIsDisallow;
|
|
813
|
+
}
|
|
814
|
+
function findPartialBlocks(rules) {
|
|
815
|
+
return rules.filter((r) => r.type === "disallow" && r.path && r.path !== "/").map((r) => r.path);
|
|
816
|
+
}
|
|
753
817
|
function checkCrawlerAccess(robotsTxt) {
|
|
754
818
|
if (!robotsTxt)
|
|
755
819
|
return { allowed: [], blocked: [], unknown: [...AI_CRAWLERS] };
|
|
756
|
-
const
|
|
820
|
+
const groups = parseRobotGroups(robotsTxt);
|
|
757
821
|
const allowed = [];
|
|
758
822
|
const blocked = [];
|
|
759
823
|
const unknown = [];
|
|
824
|
+
const partiallyBlocked = [];
|
|
760
825
|
for (const crawler of AI_CRAWLERS) {
|
|
761
826
|
const crawlerLower = crawler.toLowerCase();
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
if (lower.startsWith("disallow:")) {
|
|
771
|
-
const path = lower.split(":")[1]?.trim();
|
|
772
|
-
if (path === "/") {
|
|
773
|
-
if (currentAgent === crawlerLower) {
|
|
774
|
-
isBlocked = true;
|
|
775
|
-
found = true;
|
|
776
|
-
} else if (currentAgent === "*" && !found) {
|
|
777
|
-
isBlocked = true;
|
|
778
|
-
}
|
|
779
|
-
}
|
|
780
|
-
} else if (lower.startsWith("allow:")) {
|
|
781
|
-
if (currentAgent === crawlerLower) {
|
|
782
|
-
found = true;
|
|
783
|
-
isBlocked = false;
|
|
784
|
-
}
|
|
785
|
-
}
|
|
786
|
-
}
|
|
827
|
+
const { specific, wildcard } = matchingRulesForCrawler(
|
|
828
|
+
groups,
|
|
829
|
+
crawlerLower
|
|
830
|
+
);
|
|
831
|
+
const applicableRules = specific.length > 0 ? specific : wildcard;
|
|
832
|
+
if (applicableRules.length === 0) {
|
|
833
|
+
unknown.push(crawler);
|
|
834
|
+
continue;
|
|
787
835
|
}
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
else allowed.push(crawler);
|
|
791
|
-
} else if (isBlocked) {
|
|
836
|
+
const isSiteBlocked = resolvesPathAsBlocked(applicableRules, "/");
|
|
837
|
+
if (isSiteBlocked) {
|
|
792
838
|
blocked.push(crawler);
|
|
793
839
|
} else {
|
|
794
|
-
|
|
840
|
+
allowed.push(crawler);
|
|
841
|
+
const pathBlocks = findPartialBlocks(applicableRules);
|
|
842
|
+
for (const path of pathBlocks) {
|
|
843
|
+
const entry = `${crawler}: ${path}`;
|
|
844
|
+
if (!partiallyBlocked.includes(entry)) {
|
|
845
|
+
partiallyBlocked.push(entry);
|
|
846
|
+
}
|
|
847
|
+
}
|
|
795
848
|
}
|
|
796
849
|
}
|
|
797
|
-
return {
|
|
850
|
+
return {
|
|
851
|
+
allowed,
|
|
852
|
+
blocked,
|
|
853
|
+
unknown,
|
|
854
|
+
...partiallyBlocked.length > 0 && { partiallyBlocked }
|
|
855
|
+
};
|
|
798
856
|
}
|
|
799
857
|
|
|
800
858
|
// src/modules/audits/categories/content-extractability.ts
|
|
@@ -811,7 +869,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
|
|
|
811
869
|
)
|
|
812
870
|
);
|
|
813
871
|
const extractRatio = page.stats.rawByteLength > 0 ? page.stats.cleanTextLength / page.stats.rawByteLength : 0;
|
|
814
|
-
const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio
|
|
872
|
+
const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio > 0.15 ? 10 : extractRatio >= 0.01 ? 8 : 2;
|
|
815
873
|
factors.push(
|
|
816
874
|
makeFactor(
|
|
817
875
|
"Text Extraction Quality",
|
|
@@ -836,7 +894,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
|
|
|
836
894
|
)
|
|
837
895
|
);
|
|
838
896
|
const wc = page.stats.wordCount;
|
|
839
|
-
const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc
|
|
897
|
+
const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc > 3e3 ? 10 : wc >= 100 ? 8 : 2;
|
|
840
898
|
factors.push(makeFactor("Word Count Adequacy", wcScore, 12, `${wc} words`));
|
|
841
899
|
if (domainSignals) {
|
|
842
900
|
const access2 = checkCrawlerAccess(domainSignals.robotsTxt);
|
|
@@ -851,6 +909,10 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
|
|
|
851
909
|
)
|
|
852
910
|
);
|
|
853
911
|
rawData.crawlerAccess = access2;
|
|
912
|
+
rawData.llmsTxt = {
|
|
913
|
+
llmsTxtExists: domainSignals.llmsTxtExists,
|
|
914
|
+
llmsFullTxtExists: domainSignals.llmsFullTxtExists
|
|
915
|
+
};
|
|
854
916
|
const hasLlms = domainSignals.llmsTxtExists;
|
|
855
917
|
const hasLlmsFull = domainSignals.llmsFullTxtExists;
|
|
856
918
|
const llmsScore = hasLlms && hasLlmsFull ? 6 : hasLlms || hasLlmsFull ? 4 : 0;
|
|
@@ -1070,6 +1132,7 @@ function auditGroundingSignals(page) {
|
|
|
1070
1132
|
const $ = page.$;
|
|
1071
1133
|
const text = page.cleanText;
|
|
1072
1134
|
const factors = [];
|
|
1135
|
+
const { numberCount = 0 } = extractEntities(text);
|
|
1073
1136
|
const externalLinks = page.externalLinks;
|
|
1074
1137
|
const extScore = thresholdScore(externalLinks.length, [
|
|
1075
1138
|
[6, 13],
|
|
@@ -1103,7 +1166,8 @@ function auditGroundingSignals(page) {
|
|
|
1103
1166
|
)
|
|
1104
1167
|
);
|
|
1105
1168
|
const numericCount = countPatternMatches(text, NUMERIC_CLAIM_PATTERNS);
|
|
1106
|
-
const
|
|
1169
|
+
const totalNumericSignals = numericCount + numberCount;
|
|
1170
|
+
const numScore = thresholdScore(totalNumericSignals, [
|
|
1107
1171
|
[9, 13],
|
|
1108
1172
|
[4, 9],
|
|
1109
1173
|
[1, 5],
|
|
@@ -1114,7 +1178,7 @@ function auditGroundingSignals(page) {
|
|
|
1114
1178
|
"Numeric Claims",
|
|
1115
1179
|
numScore,
|
|
1116
1180
|
13,
|
|
1117
|
-
`${numericCount} statistical references`
|
|
1181
|
+
`${numericCount} statistical references, ${numberCount} numeric values`
|
|
1118
1182
|
)
|
|
1119
1183
|
);
|
|
1120
1184
|
const attrCount = countPatternMatches(text, ATTRIBUTION_PATTERNS);
|
|
@@ -1319,7 +1383,11 @@ function removeBoilerplate($) {
|
|
|
1319
1383
|
function normalizeWhitespace(text) {
|
|
1320
1384
|
return text.replace(/\s+/g, " ").trim();
|
|
1321
1385
|
}
|
|
1386
|
+
var BLOCK_ELEMENTS = "p,div,td,th,li,h1,h2,h3,h4,h5,h6,dt,dd,br,blockquote,section,article";
|
|
1322
1387
|
function extractCleanText($) {
|
|
1388
|
+
$(BLOCK_ELEMENTS).each((_, el) => {
|
|
1389
|
+
$(el).append(" ");
|
|
1390
|
+
});
|
|
1323
1391
|
return normalizeWhitespace($("body").text());
|
|
1324
1392
|
}
|
|
1325
1393
|
|
|
@@ -1328,7 +1396,9 @@ function extractPage(html, url) {
|
|
|
1328
1396
|
const $ = cheerio.load(html);
|
|
1329
1397
|
const title = $("title").text().trim() || $('meta[property="og:title"]').attr("content")?.trim() || "";
|
|
1330
1398
|
const metaDescription = $('meta[name="description"]').attr("content")?.trim() || $('meta[property="og:description"]').attr("content")?.trim() || "";
|
|
1331
|
-
const
|
|
1399
|
+
const $raw = cheerio.load(html);
|
|
1400
|
+
$raw("script, style, noscript").remove();
|
|
1401
|
+
const rawText = $raw("body").text().replace(/\s+/g, " ").trim();
|
|
1332
1402
|
const rawByteLength = Buffer.byteLength(html, "utf-8");
|
|
1333
1403
|
const h1Count = $("h1").length;
|
|
1334
1404
|
const h2Count = $("h2").length;
|
|
@@ -1340,9 +1410,22 @@ function extractPage(html, url) {
|
|
|
1340
1410
|
const listItemCount = $("li").length;
|
|
1341
1411
|
const tableCount = $("table").length;
|
|
1342
1412
|
const paragraphCount = $("p").length;
|
|
1413
|
+
const GENERIC_ALT_VALUES = /* @__PURE__ */ new Set([
|
|
1414
|
+
"image",
|
|
1415
|
+
"photo",
|
|
1416
|
+
"logo",
|
|
1417
|
+
"icon",
|
|
1418
|
+
"picture",
|
|
1419
|
+
"img",
|
|
1420
|
+
"graphic",
|
|
1421
|
+
"thumbnail"
|
|
1422
|
+
]);
|
|
1343
1423
|
let imagesWithAlt = 0;
|
|
1344
1424
|
$("img").each((_, el) => {
|
|
1345
|
-
|
|
1425
|
+
const alt = $(el).attr("alt")?.trim() ?? "";
|
|
1426
|
+
const words = alt.split(/\s+/).filter((w) => w.length > 0);
|
|
1427
|
+
const isMeaningful = words.length > 1 && alt.length < 200 && !GENERIC_ALT_VALUES.has(alt.toLowerCase());
|
|
1428
|
+
if (isMeaningful) imagesWithAlt++;
|
|
1346
1429
|
});
|
|
1347
1430
|
const pageDomain = getDomain(url);
|
|
1348
1431
|
const externalLinks = [];
|
|
@@ -1402,7 +1485,7 @@ import { z } from "zod";
|
|
|
1402
1485
|
|
|
1403
1486
|
// src/modules/analyzer/constants.ts
|
|
1404
1487
|
var DOMAIN_SIGNAL_TIMEOUT_CAP = 5e3;
|
|
1405
|
-
var VERSION = true ? "1.
|
|
1488
|
+
var VERSION = true ? "1.3.0" : "0.0.0";
|
|
1406
1489
|
|
|
1407
1490
|
// src/modules/fetcher/schema.ts
|
|
1408
1491
|
var FetchOptionsSchema = z.object({
|
|
@@ -1482,9 +1565,19 @@ var RECOMMENDATION_BUILDERS = {
|
|
|
1482
1565
|
}
|
|
1483
1566
|
return `Your robots.txt is blocking ${blocked}. Blocking these crawlers means your content cannot be discovered or cited by AI engines.`;
|
|
1484
1567
|
},
|
|
1485
|
-
"LLMs.txt Presence":
|
|
1486
|
-
|
|
1487
|
-
|
|
1568
|
+
"LLMs.txt Presence": (rawData) => {
|
|
1569
|
+
const llms = rawData.llmsTxt;
|
|
1570
|
+
if (!llms) {
|
|
1571
|
+
return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
|
|
1572
|
+
}
|
|
1573
|
+
if (llms.llmsTxtExists && !llms.llmsFullTxtExists) {
|
|
1574
|
+
return "You have llms.txt but are missing llms-full.txt. Adding llms-full.txt provides AI systems with a comprehensive version of your site documentation for deeper ingestion.";
|
|
1575
|
+
}
|
|
1576
|
+
if (!llms.llmsTxtExists && llms.llmsFullTxtExists) {
|
|
1577
|
+
return "You have llms-full.txt but are missing llms.txt. Adding llms.txt provides AI systems with a concise structured overview of your site's purpose and key pages.";
|
|
1578
|
+
}
|
|
1579
|
+
return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
|
|
1580
|
+
},
|
|
1488
1581
|
"Image Accessibility": (rawData) => {
|
|
1489
1582
|
const images = rawData.imageAccessibility;
|
|
1490
1583
|
if (!images || images.imageCount === 0) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "aiseo-audit",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.3.0",
|
|
4
4
|
"description": "Lighthouse for AI SEO. Audit any webpage for AI search readiness. 7 categories, 30+ factors, research-backed scoring. Deterministic, engine-agnostic, zero API keys.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|