@agentimization/core 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +344 -3
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -769,11 +769,11 @@ var extractMetaTags = (html) => {
|
|
|
769
769
|
const metaRegex = /<meta[^>]+(?:name|property)=["']([^"']+)["'][^>]+content=["']([^"']+)["']/gi;
|
|
770
770
|
let match;
|
|
771
771
|
while ((match = metaRegex.exec(html)) !== null) {
|
|
772
|
-
meta[match[1]] = match[2];
|
|
772
|
+
meta[match[1].toLowerCase()] = match[2];
|
|
773
773
|
}
|
|
774
774
|
const metaRegex2 = /<meta[^>]+content=["']([^"']+)["'][^>]+(?:name|property)=["']([^"']+)["']/gi;
|
|
775
775
|
while ((match = metaRegex2.exec(html)) !== null) {
|
|
776
|
-
meta[match[2]] = match[1];
|
|
776
|
+
meta[match[2].toLowerCase()] = match[1];
|
|
777
777
|
}
|
|
778
778
|
return meta;
|
|
779
779
|
};
|
|
@@ -789,6 +789,24 @@ var extractJsonLd = (html) => {
|
|
|
789
789
|
}
|
|
790
790
|
return results;
|
|
791
791
|
};
|
|
792
|
+
var readAttr = (attrs, name) => {
|
|
793
|
+
const re = new RegExp(`\\b${name}=(?:"([^"]*)"|'([^']*)')`, "i");
|
|
794
|
+
const m = attrs.match(re);
|
|
795
|
+
if (!m) return void 0;
|
|
796
|
+
return m[1] ?? m[2];
|
|
797
|
+
};
|
|
798
|
+
var extractImages = (html) => {
|
|
799
|
+
const images = [];
|
|
800
|
+
const imgRegex = /<img\b([^>]*)>/gi;
|
|
801
|
+
let match;
|
|
802
|
+
while ((match = imgRegex.exec(html)) !== null) {
|
|
803
|
+
const attrs = match[1];
|
|
804
|
+
const src = readAttr(attrs, "src");
|
|
805
|
+
if (src === void 0) continue;
|
|
806
|
+
images.push({ src, alt: readAttr(attrs, "alt") });
|
|
807
|
+
}
|
|
808
|
+
return images;
|
|
809
|
+
};
|
|
792
810
|
var extractHeadings = (html) => {
|
|
793
811
|
const headings = [];
|
|
794
812
|
const regex = /<h([1-6])[^>]*>([\s\S]*?)<\/h\1>/gi;
|
|
@@ -902,6 +920,53 @@ var renderingStrategy = {
|
|
|
902
920
|
};
|
|
903
921
|
}
|
|
904
922
|
};
|
|
923
|
+
var substantialTextContent = {
|
|
924
|
+
id: "substantial-text-content",
|
|
925
|
+
name: "Substantial Text Content",
|
|
926
|
+
category: "page-size",
|
|
927
|
+
description: "Checks for at least 100 words of readable body text",
|
|
928
|
+
weight: 0.8,
|
|
929
|
+
run: async (ctx) => {
|
|
930
|
+
const pages = ctx.sampledPages.slice(0, 10);
|
|
931
|
+
if (pages.length === 0) {
|
|
932
|
+
return {
|
|
933
|
+
id: "substantial-text-content",
|
|
934
|
+
name: "Substantial Text Content",
|
|
935
|
+
category: "page-size",
|
|
936
|
+
status: "skip",
|
|
937
|
+
message: "No pages sampled"
|
|
938
|
+
};
|
|
939
|
+
}
|
|
940
|
+
let withSubstantialContent = 0;
|
|
941
|
+
let totalWords = 0;
|
|
942
|
+
for (const page of pages) {
|
|
943
|
+
const text = stripHtml(page.html);
|
|
944
|
+
const words = text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
945
|
+
totalWords += words;
|
|
946
|
+
if (words >= 100) withSubstantialContent++;
|
|
947
|
+
}
|
|
948
|
+
const avgWords = Math.round(totalWords / pages.length);
|
|
949
|
+
if (withSubstantialContent === pages.length) {
|
|
950
|
+
return {
|
|
951
|
+
id: "substantial-text-content",
|
|
952
|
+
name: "Substantial Text Content",
|
|
953
|
+
category: "page-size",
|
|
954
|
+
status: "pass",
|
|
955
|
+
message: `All ${pages.length} pages have \u2265100 words of body text (avg ${avgWords})`,
|
|
956
|
+
metadata: { withSubstantialContent, avgWords }
|
|
957
|
+
};
|
|
958
|
+
}
|
|
959
|
+
return {
|
|
960
|
+
id: "substantial-text-content",
|
|
961
|
+
name: "Substantial Text Content",
|
|
962
|
+
category: "page-size",
|
|
963
|
+
status: withSubstantialContent > 0 ? "warn" : "fail",
|
|
964
|
+
message: `${withSubstantialContent}/${pages.length} pages have \u2265100 words of body text (avg ${avgWords})`,
|
|
965
|
+
suggestion: "Generative engines can't cite pages that are mostly images or short copy. Add at least 100 words of substantive text content per page.",
|
|
966
|
+
metadata: { withSubstantialContent, avgWords }
|
|
967
|
+
};
|
|
968
|
+
}
|
|
969
|
+
};
|
|
905
970
|
var pageSizeHtml = {
|
|
906
971
|
id: "page-size-html",
|
|
907
972
|
name: "Page Size (HTML)",
|
|
@@ -1036,6 +1101,7 @@ var contentStartPosition = {
|
|
|
1036
1101
|
};
|
|
1037
1102
|
var pageSizeChecks = [
|
|
1038
1103
|
renderingStrategy,
|
|
1104
|
+
substantialTextContent,
|
|
1039
1105
|
pageSizeHtml,
|
|
1040
1106
|
pageSizeMarkdown,
|
|
1041
1107
|
contentStartPosition
|
|
@@ -1209,13 +1275,105 @@ var tabbedContentSerialization = {
|
|
|
1209
1275
|
};
|
|
1210
1276
|
}
|
|
1211
1277
|
};
|
|
1278
|
+
var imageAltText = {
|
|
1279
|
+
id: "image-alt-text",
|
|
1280
|
+
name: "Image Alt Text Coverage",
|
|
1281
|
+
category: "content-structure",
|
|
1282
|
+
description: "Checks that at least 50% of images have descriptive alt text",
|
|
1283
|
+
weight: 0.5,
|
|
1284
|
+
run: async (ctx) => {
|
|
1285
|
+
const pages = ctx.sampledPages.slice(0, 10);
|
|
1286
|
+
if (pages.length === 0) {
|
|
1287
|
+
return {
|
|
1288
|
+
id: "image-alt-text",
|
|
1289
|
+
name: "Image Alt Text Coverage",
|
|
1290
|
+
category: "content-structure",
|
|
1291
|
+
status: "skip",
|
|
1292
|
+
message: "No pages sampled"
|
|
1293
|
+
};
|
|
1294
|
+
}
|
|
1295
|
+
const allImages = pages.flatMap((p) => extractImages(p.html));
|
|
1296
|
+
const contentImages = allImages.filter((img) => img.alt === void 0 || img.alt.trim().length > 0);
|
|
1297
|
+
const decorativeImages = allImages.length - contentImages.length;
|
|
1298
|
+
const withAlt = contentImages.filter((img) => img.alt !== void 0 && img.alt.trim().length > 0).length;
|
|
1299
|
+
if (allImages.length === 0) {
|
|
1300
|
+
return {
|
|
1301
|
+
id: "image-alt-text",
|
|
1302
|
+
name: "Image Alt Text Coverage",
|
|
1303
|
+
category: "content-structure",
|
|
1304
|
+
status: "info",
|
|
1305
|
+
message: `No images found across ${pages.length} sampled pages`
|
|
1306
|
+
};
|
|
1307
|
+
}
|
|
1308
|
+
if (contentImages.length === 0) {
|
|
1309
|
+
return {
|
|
1310
|
+
id: "image-alt-text",
|
|
1311
|
+
name: "Image Alt Text Coverage",
|
|
1312
|
+
category: "content-structure",
|
|
1313
|
+
status: "info",
|
|
1314
|
+
message: `All ${allImages.length} sampled images are decorative (alt="")`,
|
|
1315
|
+
metadata: { decorativeImages, totalImages: allImages.length }
|
|
1316
|
+
};
|
|
1317
|
+
}
|
|
1318
|
+
const ratio = withAlt / contentImages.length;
|
|
1319
|
+
const pct = Math.round(ratio * 100);
|
|
1320
|
+
const summary = `${withAlt}/${contentImages.length} content images have descriptive alt text (${pct}%)${decorativeImages > 0 ? `; ${decorativeImages} decorative skipped` : ""}`;
|
|
1321
|
+
if (ratio >= 0.5) {
|
|
1322
|
+
return {
|
|
1323
|
+
id: "image-alt-text",
|
|
1324
|
+
name: "Image Alt Text Coverage",
|
|
1325
|
+
category: "content-structure",
|
|
1326
|
+
status: "pass",
|
|
1327
|
+
message: summary,
|
|
1328
|
+
metadata: { withAlt, contentImages: contentImages.length, decorativeImages, pct }
|
|
1329
|
+
};
|
|
1330
|
+
}
|
|
1331
|
+
return {
|
|
1332
|
+
id: "image-alt-text",
|
|
1333
|
+
name: "Image Alt Text Coverage",
|
|
1334
|
+
category: "content-structure",
|
|
1335
|
+
status: ratio >= 0.25 ? "warn" : "fail",
|
|
1336
|
+
message: summary,
|
|
1337
|
+
suggestion: `Add descriptive alt text to at least 50% of content images. AI agents and screen readers rely on alt text to understand visual content. Mark purely decorative images with alt="" so they don't dilute the ratio.`,
|
|
1338
|
+
metadata: { withAlt, contentImages: contentImages.length, decorativeImages, pct }
|
|
1339
|
+
};
|
|
1340
|
+
}
|
|
1341
|
+
};
|
|
1212
1342
|
var contentStructureChecks = [
|
|
1213
1343
|
markdownCodeFenceValidity,
|
|
1214
1344
|
sectionHeaderQuality,
|
|
1215
|
-
tabbedContentSerialization
|
|
1345
|
+
tabbedContentSerialization,
|
|
1346
|
+
imageAltText
|
|
1216
1347
|
];
|
|
1217
1348
|
|
|
1218
1349
|
// src/checks/url-stability.ts
|
|
1350
|
+
var httpsEnabled = {
|
|
1351
|
+
id: "https-enabled",
|
|
1352
|
+
name: "HTTPS Enabled",
|
|
1353
|
+
category: "url-stability",
|
|
1354
|
+
description: "Checks if the site is served over HTTPS",
|
|
1355
|
+
weight: 0.7,
|
|
1356
|
+
requiresNetwork: true,
|
|
1357
|
+
run: async (ctx) => {
|
|
1358
|
+
if (ctx.baseUrl.protocol === "https:") {
|
|
1359
|
+
return {
|
|
1360
|
+
id: "https-enabled",
|
|
1361
|
+
name: "HTTPS Enabled",
|
|
1362
|
+
category: "url-stability",
|
|
1363
|
+
status: "pass",
|
|
1364
|
+
message: "Site is served over HTTPS"
|
|
1365
|
+
};
|
|
1366
|
+
}
|
|
1367
|
+
return {
|
|
1368
|
+
id: "https-enabled",
|
|
1369
|
+
name: "HTTPS Enabled",
|
|
1370
|
+
category: "url-stability",
|
|
1371
|
+
status: "fail",
|
|
1372
|
+
message: `Site is served over ${ctx.baseUrl.protocol.replace(":", "")} \u2014 AI crawlers de-prioritize non-HTTPS sources`,
|
|
1373
|
+
suggestion: "Serve your site over HTTPS. AI crawlers like GPTBot, ClaudeBot, and PerplexityBot strongly prefer HTTPS and may skip plain HTTP entirely."
|
|
1374
|
+
};
|
|
1375
|
+
}
|
|
1376
|
+
};
|
|
1219
1377
|
var httpStatusCodes = {
|
|
1220
1378
|
id: "http-status-codes",
|
|
1221
1379
|
name: "HTTP Status Codes",
|
|
@@ -1327,6 +1485,7 @@ var cacheHeaderHygiene = {
|
|
|
1327
1485
|
}
|
|
1328
1486
|
};
|
|
1329
1487
|
var urlStabilityChecks = [
|
|
1488
|
+
httpsEnabled,
|
|
1330
1489
|
httpStatusCodes,
|
|
1331
1490
|
redirectBehavior,
|
|
1332
1491
|
cacheHeaderHygiene
|
|
@@ -1736,6 +1895,185 @@ var faqSchema = {
|
|
|
1736
1895
|
};
|
|
1737
1896
|
}
|
|
1738
1897
|
};
|
|
1898
|
+
var metaDescription = {
|
|
1899
|
+
id: "meta-description",
|
|
1900
|
+
name: "Meta Description",
|
|
1901
|
+
category: "geo-signals",
|
|
1902
|
+
description: "Checks for a meta description between 50 and 160 characters",
|
|
1903
|
+
weight: 0.5,
|
|
1904
|
+
run: async (ctx) => {
|
|
1905
|
+
const pages = ctx.sampledPages.slice(0, 10);
|
|
1906
|
+
if (pages.length === 0) {
|
|
1907
|
+
return {
|
|
1908
|
+
id: "meta-description",
|
|
1909
|
+
name: "Meta Description",
|
|
1910
|
+
category: "geo-signals",
|
|
1911
|
+
status: "skip",
|
|
1912
|
+
message: "No pages sampled"
|
|
1913
|
+
};
|
|
1914
|
+
}
|
|
1915
|
+
let withGoodDescription = 0;
|
|
1916
|
+
let missing = 0;
|
|
1917
|
+
let tooShort = 0;
|
|
1918
|
+
let tooLong = 0;
|
|
1919
|
+
for (const page of pages) {
|
|
1920
|
+
const meta = extractMetaTags(page.html);
|
|
1921
|
+
const description = meta["description"]?.trim();
|
|
1922
|
+
if (!description) {
|
|
1923
|
+
missing++;
|
|
1924
|
+
continue;
|
|
1925
|
+
}
|
|
1926
|
+
const len = description.length;
|
|
1927
|
+
if (len >= 50 && len <= 160) withGoodDescription++;
|
|
1928
|
+
else if (len < 50) tooShort++;
|
|
1929
|
+
else tooLong++;
|
|
1930
|
+
}
|
|
1931
|
+
if (withGoodDescription === pages.length) {
|
|
1932
|
+
return {
|
|
1933
|
+
id: "meta-description",
|
|
1934
|
+
name: "Meta Description",
|
|
1935
|
+
category: "geo-signals",
|
|
1936
|
+
status: "pass",
|
|
1937
|
+
message: `All ${pages.length} pages have a meta description between 50\u2013160 characters`,
|
|
1938
|
+
metadata: { withGoodDescription }
|
|
1939
|
+
};
|
|
1940
|
+
}
|
|
1941
|
+
if (missing === pages.length) {
|
|
1942
|
+
return {
|
|
1943
|
+
id: "meta-description",
|
|
1944
|
+
name: "Meta Description",
|
|
1945
|
+
category: "geo-signals",
|
|
1946
|
+
status: "fail",
|
|
1947
|
+
message: "No meta description found on any sampled page",
|
|
1948
|
+
suggestion: 'Add a <meta name="description"> between 50 and 160 characters to every page. Generative engines quote meta descriptions when summarizing your content.'
|
|
1949
|
+
};
|
|
1950
|
+
}
|
|
1951
|
+
const detail = [
|
|
1952
|
+
missing > 0 ? `${missing} missing` : null,
|
|
1953
|
+
tooShort > 0 ? `${tooShort} too short` : null,
|
|
1954
|
+
tooLong > 0 ? `${tooLong} too long` : null
|
|
1955
|
+
].filter(Boolean).join(" \xB7 ");
|
|
1956
|
+
return {
|
|
1957
|
+
id: "meta-description",
|
|
1958
|
+
name: "Meta Description",
|
|
1959
|
+
category: "geo-signals",
|
|
1960
|
+
status: missing >= pages.length / 2 ? "fail" : "warn",
|
|
1961
|
+
message: `${withGoodDescription}/${pages.length} pages have meta descriptions in the 50\u2013160 char range${detail ? ` \xB7 ${detail}` : ""}`,
|
|
1962
|
+
suggestion: missing > 0 ? 'Add a <meta name="description"> between 50 and 160 characters to every page. Some pages are missing it entirely.' : "Aim for 50\u2013160 characters. Shorter descriptions lack context for AI; longer ones get truncated.",
|
|
1963
|
+
metadata: { withGoodDescription, missing, tooShort, tooLong }
|
|
1964
|
+
};
|
|
1965
|
+
}
|
|
1966
|
+
};
|
|
1967
|
+
var openGraphTags = {
|
|
1968
|
+
id: "open-graph-tags",
|
|
1969
|
+
name: "Open Graph Tags",
|
|
1970
|
+
category: "geo-signals",
|
|
1971
|
+
description: "Checks for og:title, og:description, og:image, and og:url",
|
|
1972
|
+
weight: 0.5,
|
|
1973
|
+
run: async (ctx) => {
|
|
1974
|
+
const pages = ctx.sampledPages.slice(0, 10);
|
|
1975
|
+
if (pages.length === 0) {
|
|
1976
|
+
return {
|
|
1977
|
+
id: "open-graph-tags",
|
|
1978
|
+
name: "Open Graph Tags",
|
|
1979
|
+
category: "geo-signals",
|
|
1980
|
+
status: "skip",
|
|
1981
|
+
message: "No pages sampled"
|
|
1982
|
+
};
|
|
1983
|
+
}
|
|
1984
|
+
const required = ["og:title", "og:description", "og:image", "og:url"];
|
|
1985
|
+
let fullCoverage = 0;
|
|
1986
|
+
let partialCoverage = 0;
|
|
1987
|
+
const missingCounts = { "og:title": 0, "og:description": 0, "og:image": 0, "og:url": 0 };
|
|
1988
|
+
for (const page of pages) {
|
|
1989
|
+
const meta = extractMetaTags(page.html);
|
|
1990
|
+
const missing = required.filter((tag) => !meta[tag]);
|
|
1991
|
+
for (const tag of missing) missingCounts[tag] = (missingCounts[tag] ?? 0) + 1;
|
|
1992
|
+
if (missing.length === 0) fullCoverage++;
|
|
1993
|
+
else if (missing.length < required.length) partialCoverage++;
|
|
1994
|
+
}
|
|
1995
|
+
if (fullCoverage === pages.length) {
|
|
1996
|
+
return {
|
|
1997
|
+
id: "open-graph-tags",
|
|
1998
|
+
name: "Open Graph Tags",
|
|
1999
|
+
category: "geo-signals",
|
|
2000
|
+
status: "pass",
|
|
2001
|
+
message: `All ${pages.length} pages have complete Open Graph tags`
|
|
2002
|
+
};
|
|
2003
|
+
}
|
|
2004
|
+
const mostMissing = Object.entries(missingCounts).filter(([, n]) => n > 0).sort(([, a], [, b]) => b - a).map(([tag]) => tag);
|
|
2005
|
+
const noneCovered = pages.length - fullCoverage - partialCoverage;
|
|
2006
|
+
return {
|
|
2007
|
+
id: "open-graph-tags",
|
|
2008
|
+
name: "Open Graph Tags",
|
|
2009
|
+
category: "geo-signals",
|
|
2010
|
+
status: fullCoverage + partialCoverage === 0 ? "fail" : "warn",
|
|
2011
|
+
message: `${fullCoverage}/${pages.length} pages have complete Open Graph tags${partialCoverage > 0 ? ` \xB7 ${partialCoverage} partial` : ""}${noneCovered > 0 ? ` \xB7 ${noneCovered} with none` : ""}${mostMissing.length > 0 ? ` \xB7 most often missing: ${mostMissing.slice(0, 2).join(", ")}` : ""}`,
|
|
2012
|
+
suggestion: "Add og:title, og:description, og:image, and og:url to every page. AI engines and link previews use these to render rich citations of your content.",
|
|
2013
|
+
metadata: { fullCoverage, partialCoverage, noneCovered, missingCounts }
|
|
2014
|
+
};
|
|
2015
|
+
}
|
|
2016
|
+
};
|
|
2017
|
+
var externalCitations = {
|
|
2018
|
+
id: "external-citations",
|
|
2019
|
+
name: "External Citations",
|
|
2020
|
+
category: "geo-signals",
|
|
2021
|
+
description: "Checks for at least 2 outbound links to external sources per page",
|
|
2022
|
+
weight: 0.5,
|
|
2023
|
+
run: async (ctx) => {
|
|
2024
|
+
if (ctx.mode === "local") {
|
|
2025
|
+
return {
|
|
2026
|
+
id: "external-citations",
|
|
2027
|
+
name: "External Citations",
|
|
2028
|
+
category: "geo-signals",
|
|
2029
|
+
status: "info",
|
|
2030
|
+
message: "External link detection requires a live origin to compare against"
|
|
2031
|
+
};
|
|
2032
|
+
}
|
|
2033
|
+
const pages = ctx.sampledPages.slice(0, 10);
|
|
2034
|
+
if (pages.length === 0) {
|
|
2035
|
+
return {
|
|
2036
|
+
id: "external-citations",
|
|
2037
|
+
name: "External Citations",
|
|
2038
|
+
category: "geo-signals",
|
|
2039
|
+
status: "skip",
|
|
2040
|
+
message: "No pages sampled"
|
|
2041
|
+
};
|
|
2042
|
+
}
|
|
2043
|
+
const origin = ctx.baseUrl.origin;
|
|
2044
|
+
let pagesWithCitations = 0;
|
|
2045
|
+
let totalExternal = 0;
|
|
2046
|
+
for (const page of pages) {
|
|
2047
|
+
const links = extractLinks(page.html, origin);
|
|
2048
|
+
const external = links.filter((l) => {
|
|
2049
|
+
const u = new URL(l);
|
|
2050
|
+
return u.protocol.startsWith("http") && u.origin !== origin;
|
|
2051
|
+
});
|
|
2052
|
+
totalExternal += external.length;
|
|
2053
|
+
if (external.length >= 2) pagesWithCitations++;
|
|
2054
|
+
}
|
|
2055
|
+
const avgExternal = Math.round(totalExternal / pages.length);
|
|
2056
|
+
if (pagesWithCitations >= pages.length * 0.7) {
|
|
2057
|
+
return {
|
|
2058
|
+
id: "external-citations",
|
|
2059
|
+
name: "External Citations",
|
|
2060
|
+
category: "geo-signals",
|
|
2061
|
+
status: "pass",
|
|
2062
|
+
message: `${pagesWithCitations}/${pages.length} pages have \u22652 outbound links (avg ${avgExternal}/page)`,
|
|
2063
|
+
metadata: { pagesWithCitations, avgExternal }
|
|
2064
|
+
};
|
|
2065
|
+
}
|
|
2066
|
+
return {
|
|
2067
|
+
id: "external-citations",
|
|
2068
|
+
name: "External Citations",
|
|
2069
|
+
category: "geo-signals",
|
|
2070
|
+
status: pagesWithCitations > 0 ? "warn" : "fail",
|
|
2071
|
+
message: `Only ${pagesWithCitations}/${pages.length} pages have \u22652 outbound links (avg ${avgExternal}/page)`,
|
|
2072
|
+
suggestion: "Add at least 2 outbound links to authoritative external sources per page. Citing sources signals credibility to generative engines, which weigh outbound links when deciding what to cite.",
|
|
2073
|
+
metadata: { pagesWithCitations, avgExternal }
|
|
2074
|
+
};
|
|
2075
|
+
}
|
|
2076
|
+
};
|
|
1739
2077
|
var canonicalUrlConsistency = {
|
|
1740
2078
|
id: "canonical-url-consistency",
|
|
1741
2079
|
name: "Canonical URL Consistency",
|
|
@@ -1806,6 +2144,9 @@ var geoSignalChecks = [
|
|
|
1806
2144
|
contentFreshness,
|
|
1807
2145
|
eeatSignals,
|
|
1808
2146
|
faqSchema,
|
|
2147
|
+
metaDescription,
|
|
2148
|
+
openGraphTags,
|
|
2149
|
+
externalCitations,
|
|
1809
2150
|
canonicalUrlConsistency
|
|
1810
2151
|
];
|
|
1811
2152
|
|