@agentimization/core 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +344 -3
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -769,11 +769,11 @@ var extractMetaTags = (html) => {
769
769
  const metaRegex = /<meta[^>]+(?:name|property)=["']([^"']+)["'][^>]+content=["']([^"']+)["']/gi;
770
770
  let match;
771
771
  while ((match = metaRegex.exec(html)) !== null) {
772
- meta[match[1]] = match[2];
772
+ meta[match[1].toLowerCase()] = match[2];
773
773
  }
774
774
  const metaRegex2 = /<meta[^>]+content=["']([^"']+)["'][^>]+(?:name|property)=["']([^"']+)["']/gi;
775
775
  while ((match = metaRegex2.exec(html)) !== null) {
776
- meta[match[2]] = match[1];
776
+ meta[match[2].toLowerCase()] = match[1];
777
777
  }
778
778
  return meta;
779
779
  };
@@ -789,6 +789,24 @@ var extractJsonLd = (html) => {
789
789
  }
790
790
  return results;
791
791
  };
792
+ var readAttr = (attrs, name) => {
793
+ const re = new RegExp(`\\b${name}=(?:"([^"]*)"|'([^']*)')`, "i");
794
+ const m = attrs.match(re);
795
+ if (!m) return void 0;
796
+ return m[1] ?? m[2];
797
+ };
798
+ var extractImages = (html) => {
799
+ const images = [];
800
+ const imgRegex = /<img\b([^>]*)>/gi;
801
+ let match;
802
+ while ((match = imgRegex.exec(html)) !== null) {
803
+ const attrs = match[1];
804
+ const src = readAttr(attrs, "src");
805
+ if (src === void 0) continue;
806
+ images.push({ src, alt: readAttr(attrs, "alt") });
807
+ }
808
+ return images;
809
+ };
792
810
  var extractHeadings = (html) => {
793
811
  const headings = [];
794
812
  const regex = /<h([1-6])[^>]*>([\s\S]*?)<\/h\1>/gi;
@@ -902,6 +920,53 @@ var renderingStrategy = {
902
920
  };
903
921
  }
904
922
  };
923
+ var substantialTextContent = {
924
+ id: "substantial-text-content",
925
+ name: "Substantial Text Content",
926
+ category: "page-size",
927
+ description: "Checks for at least 100 words of readable body text",
928
+ weight: 0.8,
929
+ run: async (ctx) => {
930
+ const pages = ctx.sampledPages.slice(0, 10);
931
+ if (pages.length === 0) {
932
+ return {
933
+ id: "substantial-text-content",
934
+ name: "Substantial Text Content",
935
+ category: "page-size",
936
+ status: "skip",
937
+ message: "No pages sampled"
938
+ };
939
+ }
940
+ let withSubstantialContent = 0;
941
+ let totalWords = 0;
942
+ for (const page of pages) {
943
+ const text = stripHtml(page.html);
944
+ const words = text.split(/\s+/).filter((w) => w.length > 0).length;
945
+ totalWords += words;
946
+ if (words >= 100) withSubstantialContent++;
947
+ }
948
+ const avgWords = Math.round(totalWords / pages.length);
949
+ if (withSubstantialContent === pages.length) {
950
+ return {
951
+ id: "substantial-text-content",
952
+ name: "Substantial Text Content",
953
+ category: "page-size",
954
+ status: "pass",
955
+ message: `All ${pages.length} pages have \u2265100 words of body text (avg ${avgWords})`,
956
+ metadata: { withSubstantialContent, avgWords }
957
+ };
958
+ }
959
+ return {
960
+ id: "substantial-text-content",
961
+ name: "Substantial Text Content",
962
+ category: "page-size",
963
+ status: withSubstantialContent > 0 ? "warn" : "fail",
964
+ message: `${withSubstantialContent}/${pages.length} pages have \u2265100 words of body text (avg ${avgWords})`,
965
+ suggestion: "Generative engines can't cite pages that are mostly images or short copy. Add at least 100 words of substantive text content per page.",
966
+ metadata: { withSubstantialContent, avgWords }
967
+ };
968
+ }
969
+ };
905
970
  var pageSizeHtml = {
906
971
  id: "page-size-html",
907
972
  name: "Page Size (HTML)",
@@ -1036,6 +1101,7 @@ var contentStartPosition = {
1036
1101
  };
1037
1102
  var pageSizeChecks = [
1038
1103
  renderingStrategy,
1104
+ substantialTextContent,
1039
1105
  pageSizeHtml,
1040
1106
  pageSizeMarkdown,
1041
1107
  contentStartPosition
@@ -1209,13 +1275,105 @@ var tabbedContentSerialization = {
1209
1275
  };
1210
1276
  }
1211
1277
  };
1278
+ var imageAltText = {
1279
+ id: "image-alt-text",
1280
+ name: "Image Alt Text Coverage",
1281
+ category: "content-structure",
1282
+ description: "Checks that at least 50% of images have descriptive alt text",
1283
+ weight: 0.5,
1284
+ run: async (ctx) => {
1285
+ const pages = ctx.sampledPages.slice(0, 10);
1286
+ if (pages.length === 0) {
1287
+ return {
1288
+ id: "image-alt-text",
1289
+ name: "Image Alt Text Coverage",
1290
+ category: "content-structure",
1291
+ status: "skip",
1292
+ message: "No pages sampled"
1293
+ };
1294
+ }
1295
+ const allImages = pages.flatMap((p) => extractImages(p.html));
1296
+ const contentImages = allImages.filter((img) => img.alt === void 0 || img.alt.trim().length > 0);
1297
+ const decorativeImages = allImages.length - contentImages.length;
1298
+ const withAlt = contentImages.filter((img) => img.alt !== void 0 && img.alt.trim().length > 0).length;
1299
+ if (allImages.length === 0) {
1300
+ return {
1301
+ id: "image-alt-text",
1302
+ name: "Image Alt Text Coverage",
1303
+ category: "content-structure",
1304
+ status: "info",
1305
+ message: `No images found across ${pages.length} sampled pages`
1306
+ };
1307
+ }
1308
+ if (contentImages.length === 0) {
1309
+ return {
1310
+ id: "image-alt-text",
1311
+ name: "Image Alt Text Coverage",
1312
+ category: "content-structure",
1313
+ status: "info",
1314
+ message: `All ${allImages.length} sampled images are decorative (alt="")`,
1315
+ metadata: { decorativeImages, totalImages: allImages.length }
1316
+ };
1317
+ }
1318
+ const ratio = withAlt / contentImages.length;
1319
+ const pct = Math.round(ratio * 100);
1320
+ const summary = `${withAlt}/${contentImages.length} content images have descriptive alt text (${pct}%)${decorativeImages > 0 ? `; ${decorativeImages} decorative skipped` : ""}`;
1321
+ if (ratio >= 0.5) {
1322
+ return {
1323
+ id: "image-alt-text",
1324
+ name: "Image Alt Text Coverage",
1325
+ category: "content-structure",
1326
+ status: "pass",
1327
+ message: summary,
1328
+ metadata: { withAlt, contentImages: contentImages.length, decorativeImages, pct }
1329
+ };
1330
+ }
1331
+ return {
1332
+ id: "image-alt-text",
1333
+ name: "Image Alt Text Coverage",
1334
+ category: "content-structure",
1335
+ status: ratio >= 0.25 ? "warn" : "fail",
1336
+ message: summary,
1337
+ suggestion: `Add descriptive alt text to at least 50% of content images. AI agents and screen readers rely on alt text to understand visual content. Mark purely decorative images with alt="" so they don't dilute the ratio.`,
1338
+ metadata: { withAlt, contentImages: contentImages.length, decorativeImages, pct }
1339
+ };
1340
+ }
1341
+ };
1212
1342
  var contentStructureChecks = [
1213
1343
  markdownCodeFenceValidity,
1214
1344
  sectionHeaderQuality,
1215
- tabbedContentSerialization
1345
+ tabbedContentSerialization,
1346
+ imageAltText
1216
1347
  ];
1217
1348
 
1218
1349
  // src/checks/url-stability.ts
1350
+ var httpsEnabled = {
1351
+ id: "https-enabled",
1352
+ name: "HTTPS Enabled",
1353
+ category: "url-stability",
1354
+ description: "Checks if the site is served over HTTPS",
1355
+ weight: 0.7,
1356
+ requiresNetwork: true,
1357
+ run: async (ctx) => {
1358
+ if (ctx.baseUrl.protocol === "https:") {
1359
+ return {
1360
+ id: "https-enabled",
1361
+ name: "HTTPS Enabled",
1362
+ category: "url-stability",
1363
+ status: "pass",
1364
+ message: "Site is served over HTTPS"
1365
+ };
1366
+ }
1367
+ return {
1368
+ id: "https-enabled",
1369
+ name: "HTTPS Enabled",
1370
+ category: "url-stability",
1371
+ status: "fail",
1372
+ message: `Site is served over ${ctx.baseUrl.protocol.replace(":", "")} \u2014 AI crawlers de-prioritize non-HTTPS sources`,
1373
+ suggestion: "Serve your site over HTTPS. AI crawlers like GPTBot, ClaudeBot, and PerplexityBot strongly prefer HTTPS and may skip plain HTTP entirely."
1374
+ };
1375
+ }
1376
+ };
1219
1377
  var httpStatusCodes = {
1220
1378
  id: "http-status-codes",
1221
1379
  name: "HTTP Status Codes",
@@ -1327,6 +1485,7 @@ var cacheHeaderHygiene = {
1327
1485
  }
1328
1486
  };
1329
1487
  var urlStabilityChecks = [
1488
+ httpsEnabled,
1330
1489
  httpStatusCodes,
1331
1490
  redirectBehavior,
1332
1491
  cacheHeaderHygiene
@@ -1736,6 +1895,185 @@ var faqSchema = {
1736
1895
  };
1737
1896
  }
1738
1897
  };
1898
+ var metaDescription = {
1899
+ id: "meta-description",
1900
+ name: "Meta Description",
1901
+ category: "geo-signals",
1902
+ description: "Checks for a meta description between 50 and 160 characters",
1903
+ weight: 0.5,
1904
+ run: async (ctx) => {
1905
+ const pages = ctx.sampledPages.slice(0, 10);
1906
+ if (pages.length === 0) {
1907
+ return {
1908
+ id: "meta-description",
1909
+ name: "Meta Description",
1910
+ category: "geo-signals",
1911
+ status: "skip",
1912
+ message: "No pages sampled"
1913
+ };
1914
+ }
1915
+ let withGoodDescription = 0;
1916
+ let missing = 0;
1917
+ let tooShort = 0;
1918
+ let tooLong = 0;
1919
+ for (const page of pages) {
1920
+ const meta = extractMetaTags(page.html);
1921
+ const description = meta["description"]?.trim();
1922
+ if (!description) {
1923
+ missing++;
1924
+ continue;
1925
+ }
1926
+ const len = description.length;
1927
+ if (len >= 50 && len <= 160) withGoodDescription++;
1928
+ else if (len < 50) tooShort++;
1929
+ else tooLong++;
1930
+ }
1931
+ if (withGoodDescription === pages.length) {
1932
+ return {
1933
+ id: "meta-description",
1934
+ name: "Meta Description",
1935
+ category: "geo-signals",
1936
+ status: "pass",
1937
+ message: `All ${pages.length} pages have a meta description between 50\u2013160 characters`,
1938
+ metadata: { withGoodDescription }
1939
+ };
1940
+ }
1941
+ if (missing === pages.length) {
1942
+ return {
1943
+ id: "meta-description",
1944
+ name: "Meta Description",
1945
+ category: "geo-signals",
1946
+ status: "fail",
1947
+ message: "No meta description found on any sampled page",
1948
+ suggestion: 'Add a <meta name="description"> between 50 and 160 characters to every page. Generative engines quote meta descriptions when summarizing your content.'
1949
+ };
1950
+ }
1951
+ const detail = [
1952
+ missing > 0 ? `${missing} missing` : null,
1953
+ tooShort > 0 ? `${tooShort} too short` : null,
1954
+ tooLong > 0 ? `${tooLong} too long` : null
1955
+ ].filter(Boolean).join(" \xB7 ");
1956
+ return {
1957
+ id: "meta-description",
1958
+ name: "Meta Description",
1959
+ category: "geo-signals",
1960
+ status: missing >= pages.length / 2 ? "fail" : "warn",
1961
+ message: `${withGoodDescription}/${pages.length} pages have meta descriptions in the 50\u2013160 char range${detail ? ` \xB7 ${detail}` : ""}`,
1962
+ suggestion: missing > 0 ? 'Add a <meta name="description"> between 50 and 160 characters to every page. Some pages are missing it entirely.' : "Aim for 50\u2013160 characters. Shorter descriptions lack context for AI; longer ones get truncated.",
1963
+ metadata: { withGoodDescription, missing, tooShort, tooLong }
1964
+ };
1965
+ }
1966
+ };
1967
+ var openGraphTags = {
1968
+ id: "open-graph-tags",
1969
+ name: "Open Graph Tags",
1970
+ category: "geo-signals",
1971
+ description: "Checks for og:title, og:description, og:image, and og:url",
1972
+ weight: 0.5,
1973
+ run: async (ctx) => {
1974
+ const pages = ctx.sampledPages.slice(0, 10);
1975
+ if (pages.length === 0) {
1976
+ return {
1977
+ id: "open-graph-tags",
1978
+ name: "Open Graph Tags",
1979
+ category: "geo-signals",
1980
+ status: "skip",
1981
+ message: "No pages sampled"
1982
+ };
1983
+ }
1984
+ const required = ["og:title", "og:description", "og:image", "og:url"];
1985
+ let fullCoverage = 0;
1986
+ let partialCoverage = 0;
1987
+ const missingCounts = { "og:title": 0, "og:description": 0, "og:image": 0, "og:url": 0 };
1988
+ for (const page of pages) {
1989
+ const meta = extractMetaTags(page.html);
1990
+ const missing = required.filter((tag) => !meta[tag]);
1991
+ for (const tag of missing) missingCounts[tag] = (missingCounts[tag] ?? 0) + 1;
1992
+ if (missing.length === 0) fullCoverage++;
1993
+ else if (missing.length < required.length) partialCoverage++;
1994
+ }
1995
+ if (fullCoverage === pages.length) {
1996
+ return {
1997
+ id: "open-graph-tags",
1998
+ name: "Open Graph Tags",
1999
+ category: "geo-signals",
2000
+ status: "pass",
2001
+ message: `All ${pages.length} pages have complete Open Graph tags`
2002
+ };
2003
+ }
2004
+ const mostMissing = Object.entries(missingCounts).filter(([, n]) => n > 0).sort(([, a], [, b]) => b - a).map(([tag]) => tag);
2005
+ const noneCovered = pages.length - fullCoverage - partialCoverage;
2006
+ return {
2007
+ id: "open-graph-tags",
2008
+ name: "Open Graph Tags",
2009
+ category: "geo-signals",
2010
+ status: fullCoverage + partialCoverage === 0 ? "fail" : "warn",
2011
+ message: `${fullCoverage}/${pages.length} pages have complete Open Graph tags${partialCoverage > 0 ? ` \xB7 ${partialCoverage} partial` : ""}${noneCovered > 0 ? ` \xB7 ${noneCovered} with none` : ""}${mostMissing.length > 0 ? ` \xB7 most often missing: ${mostMissing.slice(0, 2).join(", ")}` : ""}`,
2012
+ suggestion: "Add og:title, og:description, og:image, and og:url to every page. AI engines and link previews use these to render rich citations of your content.",
2013
+ metadata: { fullCoverage, partialCoverage, noneCovered, missingCounts }
2014
+ };
2015
+ }
2016
+ };
2017
+ var externalCitations = {
2018
+ id: "external-citations",
2019
+ name: "External Citations",
2020
+ category: "geo-signals",
2021
+ description: "Checks for at least 2 outbound links to external sources per page",
2022
+ weight: 0.5,
2023
+ run: async (ctx) => {
2024
+ if (ctx.mode === "local") {
2025
+ return {
2026
+ id: "external-citations",
2027
+ name: "External Citations",
2028
+ category: "geo-signals",
2029
+ status: "info",
2030
+ message: "External link detection requires a live origin to compare against"
2031
+ };
2032
+ }
2033
+ const pages = ctx.sampledPages.slice(0, 10);
2034
+ if (pages.length === 0) {
2035
+ return {
2036
+ id: "external-citations",
2037
+ name: "External Citations",
2038
+ category: "geo-signals",
2039
+ status: "skip",
2040
+ message: "No pages sampled"
2041
+ };
2042
+ }
2043
+ const origin = ctx.baseUrl.origin;
2044
+ let pagesWithCitations = 0;
2045
+ let totalExternal = 0;
2046
+ for (const page of pages) {
2047
+ const links = extractLinks(page.html, origin);
2048
+ const external = links.filter((l) => {
2049
+ const u = new URL(l);
2050
+ return u.protocol.startsWith("http") && u.origin !== origin;
2051
+ });
2052
+ totalExternal += external.length;
2053
+ if (external.length >= 2) pagesWithCitations++;
2054
+ }
2055
+ const avgExternal = Math.round(totalExternal / pages.length);
2056
+ if (pagesWithCitations >= pages.length * 0.7) {
2057
+ return {
2058
+ id: "external-citations",
2059
+ name: "External Citations",
2060
+ category: "geo-signals",
2061
+ status: "pass",
2062
+ message: `${pagesWithCitations}/${pages.length} pages have \u22652 outbound links (avg ${avgExternal}/page)`,
2063
+ metadata: { pagesWithCitations, avgExternal }
2064
+ };
2065
+ }
2066
+ return {
2067
+ id: "external-citations",
2068
+ name: "External Citations",
2069
+ category: "geo-signals",
2070
+ status: pagesWithCitations > 0 ? "warn" : "fail",
2071
+ message: `Only ${pagesWithCitations}/${pages.length} pages have \u22652 outbound links (avg ${avgExternal}/page)`,
2072
+ suggestion: "Add at least 2 outbound links to authoritative external sources per page. Citing sources signals credibility to generative engines, which weigh outbound links when deciding what to cite.",
2073
+ metadata: { pagesWithCitations, avgExternal }
2074
+ };
2075
+ }
2076
+ };
1739
2077
  var canonicalUrlConsistency = {
1740
2078
  id: "canonical-url-consistency",
1741
2079
  name: "Canonical URL Consistency",
@@ -1806,6 +2144,9 @@ var geoSignalChecks = [
1806
2144
  contentFreshness,
1807
2145
  eeatSignals,
1808
2146
  faqSchema,
2147
+ metaDescription,
2148
+ openGraphTags,
2149
+ externalCitations,
1809
2150
  canonicalUrlConsistency
1810
2151
  ];
1811
2152
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agentimization/core",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "GEO audit engine. Check if your website is agent-ready and generative-engine optimized.",
5
5
  "keywords": [
6
6
  "geo",