agentimization 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +6 -8
  2. package/dist/index.js +478 -199
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -2,14 +2,12 @@
2
2
 
3
3
  [![npm version](https://img.shields.io/npm/v/agentimization?style=flat-square&color=blue)](https://www.npmjs.com/package/agentimization)
4
4
 
5
- ```text
6
- ╭───────────────────────────────────────────────╮
7
- ▓░▒▓░░▒░▓▒░▓▓░▒░▓░░▒▓▒░▓░░▓▒░▓░▒░▓░░▒▓░░ │
8
- ░▓▒░▓░░▒▓▒░▓░░▒▓▓░▒░▓▒░░▓▒░▓░▒░░▓▒░░▓░▒ │
9
- │ ▓░▒▓░░▒▓▒░░▓░▒▓▒░░▓░░▓▒░▓░▒░░▓▒░▓░░▒▓░ │
10
- │ ░▒▓░▒░▓▒░░▓░▒▓░░▒▓▒░░▓░▒▓░░▒▓░ agentimization │
11
- ╰───────────────────────────────────────────────╯
12
- ```
5
+ <p align="center">
6
+ <picture>
7
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/antlio/agentimization/main/assets/hero-loading-dark.svg">
8
+ <img src="https://raw.githubusercontent.com/antlio/agentimization/main/assets/hero-loading-light.svg" alt="agentimization" width="620">
9
+ </picture>
10
+ </p>
13
11
 
14
12
  geo audit for agent-ready websites and projects.
15
13
 
package/dist/index.js CHANGED
@@ -4105,6 +4105,140 @@ var DEFAULT_CONFIG = {
4105
4105
  onEvent: () => {
4106
4106
  }
4107
4107
  };
4108
+ var stripHtml = (html) => html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
4109
+ var extractMarkdownLinks = (markdown) => {
4110
+ const links = [];
4111
+ const linkRegex = /\[.+?\]\(([^)]+)\)/g;
4112
+ let match;
4113
+ while ((match = linkRegex.exec(markdown)) !== null) {
4114
+ links.push(match[1]);
4115
+ }
4116
+ return links;
4117
+ };
4118
+ var extractLinks = (html, baseUrl) => {
4119
+ const links = [];
4120
+ const linkRegex = /<a[^>]+href=["']([^"']+)["']/gi;
4121
+ let match;
4122
+ while ((match = linkRegex.exec(html)) !== null) {
4123
+ try {
4124
+ const resolved = new URL(match[1], baseUrl).href;
4125
+ links.push(resolved);
4126
+ } catch {
4127
+ }
4128
+ }
4129
+ return links;
4130
+ };
4131
+ var extractMetaTags = (html) => {
4132
+ const meta = {};
4133
+ const metaRegex = /<meta[^>]+(?:name|property)=["']([^"']+)["'][^>]+content=["']([^"']+)["']/gi;
4134
+ let match;
4135
+ while ((match = metaRegex.exec(html)) !== null) {
4136
+ meta[match[1].toLowerCase()] = match[2];
4137
+ }
4138
+ const metaRegex2 = /<meta[^>]+content=["']([^"']+)["'][^>]+(?:name|property)=["']([^"']+)["']/gi;
4139
+ while ((match = metaRegex2.exec(html)) !== null) {
4140
+ meta[match[2].toLowerCase()] = match[1];
4141
+ }
4142
+ return meta;
4143
+ };
4144
+ var extractJsonLd = (html) => {
4145
+ const results = [];
4146
+ const regex = /<script[^>]+type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
4147
+ let match;
4148
+ while ((match = regex.exec(html)) !== null) {
4149
+ try {
4150
+ results.push(JSON.parse(match[1]));
4151
+ } catch {
4152
+ }
4153
+ }
4154
+ return results;
4155
+ };
4156
+ var readAttr = (attrs, name) => {
4157
+ const re = new RegExp(`\\b${name}=(?:"([^"]*)"|'([^']*)')`, "i");
4158
+ const m = attrs.match(re);
4159
+ if (!m) return void 0;
4160
+ return m[1] ?? m[2];
4161
+ };
4162
+ var extractImages = (html) => {
4163
+ const images = [];
4164
+ const imgRegex = /<img\b([^>]*)>/gi;
4165
+ let match;
4166
+ while ((match = imgRegex.exec(html)) !== null) {
4167
+ const attrs = match[1];
4168
+ const src = readAttr(attrs, "src");
4169
+ if (src === void 0) continue;
4170
+ images.push({ src, alt: readAttr(attrs, "alt") });
4171
+ }
4172
+ return images;
4173
+ };
4174
+ var extractHeadings = (html) => {
4175
+ const headings = [];
4176
+ const regex = /<h([1-6])[^>]*>([\s\S]*?)<\/h\1>/gi;
4177
+ let match;
4178
+ while ((match = regex.exec(html)) !== null) {
4179
+ headings.push({
4180
+ level: parseInt(match[1], 10),
4181
+ text: stripHtml(match[2]).trim()
4182
+ });
4183
+ }
4184
+ return headings;
4185
+ };
4186
+ var hasServerRenderedContent = (html) => {
4187
+ const withoutScripts = html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "");
4188
+ const textContent = stripHtml(withoutScripts);
4189
+ return textContent.length > 100;
4190
+ };
4191
+ var findContentStartPosition = (html) => {
4192
+ const markers = [
4193
+ /<main[\s>]/i,
4194
+ /<article[\s>]/i,
4195
+ /id=["']content["']/i,
4196
+ /id=["']main["']/i,
4197
+ /class=["'][^"']*content[^"']*["']/i,
4198
+ /role=["']main["']/i
4199
+ ];
4200
+ for (const marker of markers) {
4201
+ const match = html.search(marker);
4202
+ if (match >= 0) {
4203
+ return match / html.length;
4204
+ }
4205
+ }
4206
+ const firstP = html.search(/<p[\s>]/i);
4207
+ if (firstP >= 0) {
4208
+ return firstP / html.length;
4209
+ }
4210
+ return 0.5;
4211
+ };
4212
+ var extractCodeFences = (markdown) => {
4213
+ const fences = [];
4214
+ const lines = markdown.split("\n");
4215
+ let inFence = false;
4216
+ let currentLang = "";
4217
+ for (const line of lines) {
4218
+ const openMatch = line.match(/^```(\w*)/);
4219
+ if (openMatch && !inFence) {
4220
+ inFence = true;
4221
+ currentLang = openMatch[1] ?? "";
4222
+ } else if (line.trim() === "```" && inFence) {
4223
+ fences.push({ lang: currentLang, closed: true });
4224
+ inFence = false;
4225
+ currentLang = "";
4226
+ }
4227
+ }
4228
+ if (inFence) {
4229
+ fences.push({ lang: currentLang, closed: false });
4230
+ }
4231
+ return fences;
4232
+ };
4233
+ var parseSitemapUrls = (xml) => {
4234
+ const urls = [];
4235
+ const regex = /<loc>([^<]+)<\/loc>/gi;
4236
+ let match;
4237
+ while ((match = regex.exec(xml)) !== null) {
4238
+ urls.push(match[1].trim());
4239
+ }
4240
+ return urls;
4241
+ };
4108
4242
  var llmsTxtExists = {
4109
4243
  id: "llms-txt-exists",
4110
4244
  name: "llms.txt Exists",
@@ -4144,7 +4278,7 @@ var llmsTxtValid = {
4144
4278
  name: "llms.txt Valid Structure",
4145
4279
  category: "content-discoverability",
4146
4280
  status: "skip",
4147
- message: "Skipped \u2014 no llms.txt found"
4281
+ message: "Skipped: no llms.txt found"
4148
4282
  };
4149
4283
  }
4150
4284
  const issues = [];
@@ -4189,7 +4323,7 @@ var llmsTxtSize = {
4189
4323
  name: "llms.txt Size",
4190
4324
  category: "content-discoverability",
4191
4325
  status: "skip",
4192
- message: "Skipped \u2014 no llms.txt found"
4326
+ message: "Skipped: no llms.txt found"
4193
4327
  };
4194
4328
  }
4195
4329
  const size = ctx.llmsTxt.length;
@@ -4227,7 +4361,7 @@ var llmsTxtFreshness = {
4227
4361
  name: "llms.txt Coverage",
4228
4362
  category: "content-discoverability",
4229
4363
  status: "skip",
4230
- message: "Skipped \u2014 no llms.txt found"
4364
+ message: "Skipped: no llms.txt found"
4231
4365
  };
4232
4366
  }
4233
4367
  if (ctx.sitemapUrls.length === 0) {
@@ -4257,11 +4391,9 @@ var llmsTxtFreshness = {
4257
4391
  return null;
4258
4392
  }
4259
4393
  };
4260
- const linkRegex = /\[.+?\]\(([^)]+)\)/g;
4261
4394
  const llmsKeys = /* @__PURE__ */ new Set();
4262
- let match;
4263
- while ((match = linkRegex.exec(ctx.llmsTxt)) !== null) {
4264
- const k = keyFor(match[1]);
4395
+ for (const link of extractMarkdownLinks(ctx.llmsTxt)) {
4396
+ const k = keyFor(link);
4265
4397
  if (k) llmsKeys.add(k);
4266
4398
  }
4267
4399
  const sitemapKeys = /* @__PURE__ */ new Set();
@@ -4301,7 +4433,7 @@ var llmsTxtFreshness = {
4301
4433
  category: "content-discoverability",
4302
4434
  status: coveragePct >= 40 || freshnessPct >= 70 ? "warn" : "fail",
4303
4435
  message: `${message}${missingFromLlms > 0 ? ` \xB7 ${missingFromLlms} sitemap pages not in llms.txt` : ""}${staleInLlms > 0 ? ` \xB7 ${staleInLlms} llms.txt links not in sitemap` : ""}`,
4304
- suggestion: coveragePct < freshnessPct ? "Add missing sitemap pages to llms.txt to improve AI agent discoverability." : "Some llms.txt links aren't in the sitemap \u2014 they may be stale or your sitemap may be incomplete.",
4436
+ suggestion: coveragePct < freshnessPct ? "Add missing sitemap pages to llms.txt to improve AI agent discoverability." : "Some llms.txt links aren't in the sitemap. They may be stale or your sitemap may be incomplete.",
4305
4437
  metadata: {
4306
4438
  coveragePct,
4307
4439
  freshnessPct,
@@ -4327,15 +4459,13 @@ var llmsTxtLinksResolve = {
4327
4459
  name: "llms.txt Links Resolve",
4328
4460
  category: "content-discoverability",
4329
4461
  status: "skip",
4330
- message: "Skipped \u2014 no llms.txt found"
4462
+ message: "Skipped: no llms.txt found"
4331
4463
  };
4332
4464
  }
4333
- const linkRegex = /\[.+?\]\(([^)]+)\)/g;
4334
4465
  const urls = [];
4335
- let match;
4336
- while ((match = linkRegex.exec(ctx.llmsTxt)) !== null) {
4466
+ for (const link of extractMarkdownLinks(ctx.llmsTxt)) {
4337
4467
  try {
4338
- const resolved2 = new URL(match[1], ctx.baseUrl.origin);
4468
+ const resolved2 = new URL(link, ctx.baseUrl.origin);
4339
4469
  if (resolved2.origin === ctx.baseUrl.origin) {
4340
4470
  urls.push(resolved2.href);
4341
4471
  }
@@ -4376,7 +4506,7 @@ var llmsTxtLinksResolve = {
4376
4506
  name: "llms.txt Links Resolve",
4377
4507
  category: "content-discoverability",
4378
4508
  status: "fail",
4379
- message: `${resolved}/${sampled.length} sampled links resolve \u2014 ${sampled.length - resolved} broken`,
4509
+ message: `${resolved}/${sampled.length} sampled links resolve, ${sampled.length - resolved} broken`,
4380
4510
  suggestion: "Fix broken links in llms.txt. AI agents will fail to fetch these pages.",
4381
4511
  metadata: { resolved, sampled: sampled.length, total: urls.length }
4382
4512
  };
@@ -4395,15 +4525,10 @@ var llmsTxtLinksMarkdown = {
4395
4525
  name: "llms.txt Links Markdown",
4396
4526
  category: "content-discoverability",
4397
4527
  status: "skip",
4398
- message: "Skipped \u2014 no llms.txt found"
4528
+ message: "Skipped: no llms.txt found"
4399
4529
  };
4400
4530
  }
4401
- const linkRegex = /\[.+?\]\(([^)]+)\)/g;
4402
- const urls = [];
4403
- let m;
4404
- while ((m = linkRegex.exec(ctx.llmsTxt)) !== null) {
4405
- urls.push(m[1]);
4406
- }
4531
+ const urls = extractMarkdownLinks(ctx.llmsTxt);
4407
4532
  if (urls.length === 0) {
4408
4533
  return {
4409
4534
  id: "llms-txt-links-markdown",
@@ -4451,7 +4576,7 @@ var llmsTxtLinksMarkdown = {
4451
4576
  category: "content-discoverability",
4452
4577
  status: "fail",
4453
4578
  message: `Only ${mdLinks}/${urls.length} llms.txt links point to .md URLs (${pct}%)`,
4454
- suggestion: "Most llms.txt links are HTML-only. Serve a markdown version at .md URLs and link to those \u2014 agents get cleaner content and fewer parse failures.",
4579
+ suggestion: "Most llms.txt links are HTML-only. Serve a markdown version at .md URLs and link to those, so agents get cleaner content and fewer parse failures.",
4455
4580
  metadata: { mdLinks, total: urls.length, pct }
4456
4581
  };
4457
4582
  }
@@ -4553,6 +4678,176 @@ var robotsTxtAgentRules = {
4553
4678
  };
4554
4679
  }
4555
4680
  };
4681
+ var llmsFullExists = {
4682
+ id: "llms-full-exists",
4683
+ name: "llms-full.txt Exists",
4684
+ category: "content-discoverability",
4685
+ description: "Checks if llms-full.txt (the complete-content variant) is present at the site root",
4686
+ weight: 0.4,
4687
+ run: async (ctx) => {
4688
+ if (ctx.llmsFullTxt) {
4689
+ return {
4690
+ id: "llms-full-exists",
4691
+ name: "llms-full.txt Exists",
4692
+ category: "content-discoverability",
4693
+ status: "pass",
4694
+ message: ctx.mode === "local" ? "llms-full.txt found in project root" : `llms-full.txt found at ${ctx.baseUrl.origin}/llms-full.txt`
4695
+ };
4696
+ }
4697
+ return {
4698
+ id: "llms-full-exists",
4699
+ name: "llms-full.txt Exists",
4700
+ category: "content-discoverability",
4701
+ status: "info",
4702
+ message: "No llms-full.txt found (optional)",
4703
+ suggestion: "If your llms.txt is large or you want agents to get full content in one fetch, add a /llms-full.txt containing the concatenated markdown of your docs."
4704
+ };
4705
+ }
4706
+ };
4707
+ var llmsFullValid = {
4708
+ id: "llms-full-valid",
4709
+ name: "llms-full.txt Valid Structure",
4710
+ category: "content-discoverability",
4711
+ description: "Checks if llms-full.txt has recognizable markdown structure (headings, content)",
4712
+ weight: 0.4,
4713
+ run: async (ctx) => {
4714
+ if (!ctx.llmsFullTxt) {
4715
+ return {
4716
+ id: "llms-full-valid",
4717
+ name: "llms-full.txt Valid Structure",
4718
+ category: "content-discoverability",
4719
+ status: "skip",
4720
+ message: "Skipped: no llms-full.txt found"
4721
+ };
4722
+ }
4723
+ const hasHeadings = /^#{1,3}\s+/m.test(ctx.llmsFullTxt);
4724
+ const hasProse = ctx.llmsFullTxt.length > 600;
4725
+ if (hasHeadings && hasProse) {
4726
+ return {
4727
+ id: "llms-full-valid",
4728
+ name: "llms-full.txt Valid Structure",
4729
+ category: "content-discoverability",
4730
+ status: "pass",
4731
+ message: "llms-full.txt has recognizable markdown structure"
4732
+ };
4733
+ }
4734
+ return {
4735
+ id: "llms-full-valid",
4736
+ name: "llms-full.txt Valid Structure",
4737
+ category: "content-discoverability",
4738
+ status: "warn",
4739
+ message: `llms-full.txt found but ${!hasHeadings ? "has no markdown headings" : "has little content"}`,
4740
+ suggestion: "llms-full.txt should contain the full markdown content of your docs, with headings, so agents can parse it."
4741
+ };
4742
+ }
4743
+ };
4744
+ var llmsFullSize = {
4745
+ id: "llms-full-size",
4746
+ name: "llms-full.txt Size",
4747
+ category: "content-discoverability",
4748
+ description: "Checks if llms-full.txt size is within the expected range (substantial but not excessive)",
4749
+ weight: 0.3,
4750
+ run: async (ctx) => {
4751
+ if (!ctx.llmsFullTxt) {
4752
+ return {
4753
+ id: "llms-full-size",
4754
+ name: "llms-full.txt Size",
4755
+ category: "content-discoverability",
4756
+ status: "skip",
4757
+ message: "Skipped: no llms-full.txt found"
4758
+ };
4759
+ }
4760
+ const size = ctx.llmsFullTxt.length;
4761
+ const MIN = 1e4;
4762
+ const MAX = 5e6;
4763
+ if (size >= MIN && size <= MAX) {
4764
+ return {
4765
+ id: "llms-full-size",
4766
+ name: "llms-full.txt Size",
4767
+ category: "content-discoverability",
4768
+ status: "pass",
4769
+ message: `llms-full.txt is ${size.toLocaleString()} characters (within expected range)`,
4770
+ metadata: { size }
4771
+ };
4772
+ }
4773
+ return {
4774
+ id: "llms-full-size",
4775
+ name: "llms-full.txt Size",
4776
+ category: "content-discoverability",
4777
+ status: "warn",
4778
+ message: size < MIN ? `llms-full.txt is only ${size.toLocaleString()} characters, smaller than expected for a full-content file` : `llms-full.txt is ${size.toLocaleString()} characters, large enough to overflow agent context windows`,
4779
+ suggestion: size < MIN ? "llms-full.txt should contain your complete documentation. If it's this small, llms.txt alone may be enough." : "Consider trimming llms-full.txt or splitting content so agents can fetch what fits their context window.",
4780
+ metadata: { size }
4781
+ };
4782
+ }
4783
+ };
4784
+ var llmsFullLinksResolve = {
4785
+ id: "llms-full-links-resolve",
4786
+ name: "llms-full.txt Links Resolve",
4787
+ category: "content-discoverability",
4788
+ description: "Checks if links in llms-full.txt return 200 OK",
4789
+ weight: 0.4,
4790
+ requiresNetwork: true,
4791
+ run: async (ctx) => {
4792
+ if (!ctx.llmsFullTxt) {
4793
+ return {
4794
+ id: "llms-full-links-resolve",
4795
+ name: "llms-full.txt Links Resolve",
4796
+ category: "content-discoverability",
4797
+ status: "skip",
4798
+ message: "Skipped: no llms-full.txt found"
4799
+ };
4800
+ }
4801
+ const urls = [];
4802
+ for (const link of extractMarkdownLinks(ctx.llmsFullTxt)) {
4803
+ try {
4804
+ const resolved2 = new URL(link, ctx.baseUrl.origin);
4805
+ if (resolved2.origin === ctx.baseUrl.origin) {
4806
+ urls.push(resolved2.href);
4807
+ }
4808
+ } catch {
4809
+ }
4810
+ }
4811
+ if (urls.length === 0) {
4812
+ return {
4813
+ id: "llms-full-links-resolve",
4814
+ name: "llms-full.txt Links Resolve",
4815
+ category: "content-discoverability",
4816
+ status: "info",
4817
+ message: "No same-origin links found in llms-full.txt"
4818
+ };
4819
+ }
4820
+ const sampled = urls.slice(0, 10);
4821
+ const results = await Promise.allSettled(
4822
+ sampled.map(async (url) => {
4823
+ const resp = await fetch(url, { method: "HEAD", redirect: "follow" });
4824
+ return { url, status: resp.status };
4825
+ })
4826
+ );
4827
+ const resolved = results.filter(
4828
+ (r) => r.status === "fulfilled" && r.value.status >= 200 && r.value.status < 400
4829
+ ).length;
4830
+ if (resolved === sampled.length) {
4831
+ return {
4832
+ id: "llms-full-links-resolve",
4833
+ name: "llms-full.txt Links Resolve",
4834
+ category: "content-discoverability",
4835
+ status: "pass",
4836
+ message: `All ${resolved} sampled same-origin links resolve (${urls.length} total links)`,
4837
+ metadata: { resolved, sampled: sampled.length, total: urls.length }
4838
+ };
4839
+ }
4840
+ return {
4841
+ id: "llms-full-links-resolve",
4842
+ name: "llms-full.txt Links Resolve",
4843
+ category: "content-discoverability",
4844
+ status: "fail",
4845
+ message: `${resolved}/${sampled.length} sampled links resolve, ${sampled.length - resolved} broken`,
4846
+ suggestion: "Fix broken links in llms-full.txt. AI agents will fail to fetch these pages.",
4847
+ metadata: { resolved, sampled: sampled.length, total: urls.length }
4848
+ };
4849
+ }
4850
+ };
4556
4851
  var contentDiscoverabilityChecks = [
4557
4852
  llmsTxtExists,
4558
4853
  llmsTxtValid,
@@ -4560,21 +4855,29 @@ var contentDiscoverabilityChecks = [
4560
4855
  llmsTxtFreshness,
4561
4856
  llmsTxtLinksResolve,
4562
4857
  llmsTxtLinksMarkdown,
4858
+ llmsFullExists,
4859
+ llmsFullValid,
4860
+ llmsFullSize,
4861
+ llmsFullLinksResolve,
4563
4862
  sitemapExists,
4564
4863
  robotsTxtAgentRules
4565
4864
  ];
4566
- var makeHeaders = (config) => ({
4865
+ var BROWSER_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
4866
+ var makeHeaders = (config, asBrowser = false) => asBrowser ? {
4867
+ "User-Agent": BROWSER_UA,
4868
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
4869
+ } : {
4567
4870
  "User-Agent": config.userAgent ?? DEFAULT_CONFIG.userAgent,
4568
4871
  Accept: "text/html,application/xhtml+xml,text/markdown,text/plain,*/*"
4569
- });
4570
- var fetchPage = async (url, config = {}) => {
4872
+ };
4873
+ var fetchPage = async (url, config = {}, asBrowser = false) => {
4571
4874
  const timeout = config.timeout ?? DEFAULT_CONFIG.timeout;
4572
4875
  const start = Date.now();
4573
4876
  const controller = new AbortController();
4574
4877
  const timer = setTimeout(() => controller.abort(), timeout);
4575
4878
  try {
4576
4879
  const response = await fetch(url, {
4577
- headers: makeHeaders(config),
4880
+ headers: makeHeaders(config, asBrowser),
4578
4881
  signal: controller.signal,
4579
4882
  redirect: "follow"
4580
4883
  });
@@ -4627,13 +4930,13 @@ var fetchWithContentNegotiation = async (url, accept, config = {}) => {
4627
4930
  clearTimeout(timer);
4628
4931
  }
4629
4932
  };
4630
- var fetchMany = async (urls, config = {}) => {
4933
+ var fetchMany = async (urls, config = {}, asBrowser = false) => {
4631
4934
  const concurrency = config.concurrency ?? DEFAULT_CONFIG.concurrency;
4632
4935
  const results = [];
4633
4936
  for (let i = 0; i < urls.length; i += concurrency) {
4634
4937
  const chunk = urls.slice(i, i + concurrency);
4635
4938
  const chunkResults = await Promise.allSettled(
4636
- chunk.map((url) => fetchPage(url, config))
4939
+ chunk.map((url) => fetchPage(url, config, asBrowser))
4637
4940
  );
4638
4941
  for (const result of chunkResults) {
4639
4942
  if (result.status === "fulfilled") {
@@ -4796,131 +5099,6 @@ var markdownAvailabilityChecks = [
4796
5099
  contentNegotiation,
4797
5100
  markdownContentParity
4798
5101
  ];
4799
- var stripHtml = (html) => html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
4800
- var extractLinks = (html, baseUrl) => {
4801
- const links = [];
4802
- const linkRegex = /<a[^>]+href=["']([^"']+)["']/gi;
4803
- let match;
4804
- while ((match = linkRegex.exec(html)) !== null) {
4805
- try {
4806
- const resolved = new URL(match[1], baseUrl).href;
4807
- links.push(resolved);
4808
- } catch {
4809
- }
4810
- }
4811
- return links;
4812
- };
4813
- var extractMetaTags = (html) => {
4814
- const meta = {};
4815
- const metaRegex = /<meta[^>]+(?:name|property)=["']([^"']+)["'][^>]+content=["']([^"']+)["']/gi;
4816
- let match;
4817
- while ((match = metaRegex.exec(html)) !== null) {
4818
- meta[match[1].toLowerCase()] = match[2];
4819
- }
4820
- const metaRegex2 = /<meta[^>]+content=["']([^"']+)["'][^>]+(?:name|property)=["']([^"']+)["']/gi;
4821
- while ((match = metaRegex2.exec(html)) !== null) {
4822
- meta[match[2].toLowerCase()] = match[1];
4823
- }
4824
- return meta;
4825
- };
4826
- var extractJsonLd = (html) => {
4827
- const results = [];
4828
- const regex = /<script[^>]+type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
4829
- let match;
4830
- while ((match = regex.exec(html)) !== null) {
4831
- try {
4832
- results.push(JSON.parse(match[1]));
4833
- } catch {
4834
- }
4835
- }
4836
- return results;
4837
- };
4838
- var readAttr = (attrs, name) => {
4839
- const re = new RegExp(`\\b${name}=(?:"([^"]*)"|'([^']*)')`, "i");
4840
- const m = attrs.match(re);
4841
- if (!m) return void 0;
4842
- return m[1] ?? m[2];
4843
- };
4844
- var extractImages = (html) => {
4845
- const images = [];
4846
- const imgRegex = /<img\b([^>]*)>/gi;
4847
- let match;
4848
- while ((match = imgRegex.exec(html)) !== null) {
4849
- const attrs = match[1];
4850
- const src = readAttr(attrs, "src");
4851
- if (src === void 0) continue;
4852
- images.push({ src, alt: readAttr(attrs, "alt") });
4853
- }
4854
- return images;
4855
- };
4856
- var extractHeadings = (html) => {
4857
- const headings = [];
4858
- const regex = /<h([1-6])[^>]*>([\s\S]*?)<\/h\1>/gi;
4859
- let match;
4860
- while ((match = regex.exec(html)) !== null) {
4861
- headings.push({
4862
- level: parseInt(match[1], 10),
4863
- text: stripHtml(match[2]).trim()
4864
- });
4865
- }
4866
- return headings;
4867
- };
4868
- var hasServerRenderedContent = (html) => {
4869
- const withoutScripts = html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "");
4870
- const textContent = stripHtml(withoutScripts);
4871
- return textContent.length > 100;
4872
- };
4873
- var findContentStartPosition = (html) => {
4874
- const markers = [
4875
- /<main[\s>]/i,
4876
- /<article[\s>]/i,
4877
- /id=["']content["']/i,
4878
- /id=["']main["']/i,
4879
- /class=["'][^"']*content[^"']*["']/i,
4880
- /role=["']main["']/i
4881
- ];
4882
- for (const marker of markers) {
4883
- const match = html.search(marker);
4884
- if (match >= 0) {
4885
- return match / html.length;
4886
- }
4887
- }
4888
- const firstP = html.search(/<p[\s>]/i);
4889
- if (firstP >= 0) {
4890
- return firstP / html.length;
4891
- }
4892
- return 0.5;
4893
- };
4894
- var extractCodeFences = (markdown) => {
4895
- const fences = [];
4896
- const lines = markdown.split("\n");
4897
- let inFence = false;
4898
- let currentLang = "";
4899
- for (const line of lines) {
4900
- const openMatch = line.match(/^```(\w*)/);
4901
- if (openMatch && !inFence) {
4902
- inFence = true;
4903
- currentLang = openMatch[1] ?? "";
4904
- } else if (line.trim() === "```" && inFence) {
4905
- fences.push({ lang: currentLang, closed: true });
4906
- inFence = false;
4907
- currentLang = "";
4908
- }
4909
- }
4910
- if (inFence) {
4911
- fences.push({ lang: currentLang, closed: false });
4912
- }
4913
- return fences;
4914
- };
4915
- var parseSitemapUrls = (xml) => {
4916
- const urls = [];
4917
- const regex = /<loc>([^<]+)<\/loc>/gi;
4918
- let match;
4919
- while ((match = regex.exec(xml)) !== null) {
4920
- urls.push(match[1].trim());
4921
- }
4922
- return urls;
4923
- };
4924
5102
  var MAX_HTML_CHARS = 5e4;
4925
5103
  var MAX_MD_CHARS = 5e4;
4926
5104
  var renderingStrategy = {
@@ -6239,6 +6417,56 @@ var mcpServerCard = {
6239
6417
  }
6240
6418
  }
6241
6419
  };
6420
+ var mcpToolCount = {
6421
+ id: "mcp-tool-count",
6422
+ name: "MCP Tool Count",
6423
+ category: "agent-protocols",
6424
+ description: "Checks that the MCP server card exposes at least one tool",
6425
+ weight: 0.4,
6426
+ run: async (ctx) => {
6427
+ if (!ctx.mcpServerCard) {
6428
+ return {
6429
+ id: "mcp-tool-count",
6430
+ name: "MCP Tool Count",
6431
+ category: "agent-protocols",
6432
+ status: "skip",
6433
+ message: "Skipped: no MCP server card found"
6434
+ };
6435
+ }
6436
+ let card;
6437
+ try {
6438
+ card = JSON.parse(ctx.mcpServerCard);
6439
+ } catch {
6440
+ return {
6441
+ id: "mcp-tool-count",
6442
+ name: "MCP Tool Count",
6443
+ category: "agent-protocols",
6444
+ status: "skip",
6445
+ message: "Skipped: MCP server card is invalid JSON"
6446
+ };
6447
+ }
6448
+ const toolCount = Array.isArray(card.tools) ? card.tools.length : Array.isArray(card.capabilities?.tools) ? card.capabilities.tools.length : 0;
6449
+ if (toolCount > 0) {
6450
+ return {
6451
+ id: "mcp-tool-count",
6452
+ name: "MCP Tool Count",
6453
+ category: "agent-protocols",
6454
+ status: "pass",
6455
+ message: `MCP server exposes ${toolCount} tool${toolCount === 1 ? "" : "s"}`,
6456
+ metadata: { toolCount }
6457
+ };
6458
+ }
6459
+ return {
6460
+ id: "mcp-tool-count",
6461
+ name: "MCP Tool Count",
6462
+ category: "agent-protocols",
6463
+ status: "warn",
6464
+ message: "MCP server card found but exposes no tools",
6465
+ suggestion: "List your MCP server's tools in the server card so agents know what actions are available before connecting.",
6466
+ metadata: { toolCount }
6467
+ };
6468
+ }
6469
+ };
6242
6470
  var apiCatalog = {
6243
6471
  id: "api-catalog",
6244
6472
  name: "API Catalog (RFC 9727)",
@@ -6312,7 +6540,7 @@ var contentSignals = {
6312
6540
  name: "Content Signals (AI Usage Declarations)",
6313
6541
  category: "agent-protocols",
6314
6542
  status: "info",
6315
- message: "No robots.txt found \u2014 cannot check for content signals",
6543
+ message: "No robots.txt found, cannot check for content signals",
6316
6544
  suggestion: "Add a robots.txt with Content Signals directives to declare how AI agents may use your content (ai-train, ai-input, search)."
6317
6545
  };
6318
6546
  }
@@ -6497,7 +6725,7 @@ var agentsMd = {
6497
6725
  category: "agent-protocols",
6498
6726
  status: "fail",
6499
6727
  message: "No AGENTS.md or AGENT.md found",
6500
- suggestion: "Add an AGENTS.md at the project root. This is the universal agent configuration file \u2014 a README for AI coding agents. Include build/test commands, architecture overview, conventions, and any gotchas. Used by 60k+ open-source projects."
6728
+ suggestion: "Add an AGENTS.md at the project root. This is the universal agent configuration file, a README for AI coding agents. Include build/test commands, architecture overview, conventions, and any gotchas. Used by 60k+ open-source projects."
6501
6729
  };
6502
6730
  }
6503
6731
  const content = ctx.agentsMd;
@@ -6549,6 +6777,7 @@ var agentsMd = {
6549
6777
  };
6550
6778
  var agentProtocolChecks = [
6551
6779
  mcpServerCard,
6780
+ mcpToolCount,
6552
6781
  apiCatalog,
6553
6782
  contentSignals,
6554
6783
  linkHeaders,
@@ -6725,7 +6954,7 @@ var buildRemoteContext = async (targetUrl, config) => {
6725
6954
  const apiCatalog2 = apiCatalogResult.status === "fulfilled" && apiCatalogResult.value?.statusCode === 200 ? apiCatalogResult.value.text : void 0;
6726
6955
  const agentSkillsIndex2 = agentSkillsResult.status === "fulfilled" && agentSkillsResult.value?.statusCode === 200 ? agentSkillsResult.value.text : void 0;
6727
6956
  const agentsMd2 = void 0;
6728
- const sitemapUrls = sitemapXml ? parseSitemapUrls(sitemapXml) : [];
6957
+ let sitemapUrls = sitemapXml ? parseSitemapUrls(sitemapXml) : [];
6729
6958
  if (!sitemapXml && robotsTxt) {
6730
6959
  const sitemapMatch = robotsTxt.match(/Sitemap:\s*(.+)/i);
6731
6960
  if (sitemapMatch) {
@@ -6735,10 +6964,28 @@ var buildRemoteContext = async (targetUrl, config) => {
6735
6964
  }
6736
6965
  }
6737
6966
  }
6967
+ const isSitemapIndex = (sitemapXml ?? "").includes("<sitemapindex");
6968
+ if (isSitemapIndex && sitemapUrls.length > 0) {
6969
+ const nested = await Promise.allSettled(
6970
+ sitemapUrls.slice(0, 20).map((u) => fetchText(u, config))
6971
+ );
6972
+ sitemapUrls = nested.flatMap(
6973
+ (r) => r.status === "fulfilled" && r.value?.statusCode === 200 ? parseSitemapUrls(r.value.text) : []
6974
+ );
6975
+ }
6738
6976
  let pagesToSample = [];
6739
6977
  if (sitemapUrls.length > 0) {
6740
- const shuffled = [...sitemapUrls].sort(() => Math.random() - 0.5);
6741
- pagesToSample = shuffled.slice(0, config.sampleSize);
6978
+ const pathPrefix = baseUrl.pathname.replace(/\/+$/, "");
6979
+ const scoped = pathPrefix.length > 1 ? sitemapUrls.filter((u) => {
6980
+ try {
6981
+ return new URL(u).pathname.startsWith(pathPrefix);
6982
+ } catch {
6983
+ return false;
6984
+ }
6985
+ }) : sitemapUrls;
6986
+ const pool = scoped.length > 0 ? scoped : sitemapUrls;
6987
+ const step = Math.max(1, Math.floor(pool.length / config.sampleSize));
6988
+ pagesToSample = pool.filter((_, i) => i % step === 0).slice(0, config.sampleSize);
6742
6989
  } else {
6743
6990
  const mainPage = await fetchPage(targetUrl, config);
6744
6991
  const linkRegex = /<a[^>]+href=["']([^"'#]+)["']/gi;
@@ -6758,14 +7005,16 @@ var buildRemoteContext = async (targetUrl, config) => {
6758
7005
  if (!pagesToSample.includes(targetUrl)) {
6759
7006
  pagesToSample.unshift(targetUrl);
6760
7007
  }
6761
- const sampledPages = await fetchMany(pagesToSample, config);
7008
+ const sampledPages = await fetchMany(pagesToSample, config, true);
6762
7009
  emit({ type: "context-ready", pageCount: sampledPages.length });
6763
- for (const page of sampledPages) {
6764
- const mdResult = await fetchWithContentNegotiation(page.url, "text/markdown", config);
6765
- if (mdResult && mdResult.statusCode === 200 && (mdResult.contentType.includes("text/markdown") || mdResult.contentType.includes("text/plain"))) {
6766
- page.markdown = mdResult.text;
6767
- }
6768
- }
7010
+ await Promise.allSettled(
7011
+ sampledPages.map(async (page) => {
7012
+ const mdResult = await fetchWithContentNegotiation(page.url, "text/markdown", config);
7013
+ if (mdResult && mdResult.statusCode === 200 && (mdResult.contentType.includes("text/markdown") || mdResult.contentType.includes("text/plain"))) {
7014
+ page.markdown = mdResult.text;
7015
+ }
7016
+ })
7017
+ );
6769
7018
  return {
6770
7019
  mode: "remote",
6771
7020
  targetUrl,
@@ -7421,31 +7670,57 @@ import { Box as Box4, Text as Text4, useApp, useInput } from "ink";
7421
7670
  // src/ui/agent-prompt.ts
7422
7671
  import { execSync } from "child_process";
7423
7672
  import { platform } from "os";
7424
- var CATEGORY_LABELS2 = {
7425
- "content-discoverability": "Content Discoverability",
7426
- "markdown-availability": "Markdown Availability",
7427
- "content-structure": "Content Structure",
7428
- "page-size": "Page Size & Rendering",
7429
- "url-stability": "URL Stability",
7430
- "authentication": "Authentication & Access",
7431
- "geo-signals": "GEO Signals",
7432
- "agent-protocols": "Agent Protocols"
7433
- };
7434
- var statusEmoji = (status) => {
7673
+ var statusMarker = (status) => {
7435
7674
  switch (status) {
7436
7675
  case "pass":
7437
- return "\u2705";
7676
+ return "PASS";
7438
7677
  case "warn":
7439
- return "\u26A0\uFE0F";
7678
+ return "WARN";
7440
7679
  case "fail":
7441
- return "\u274C";
7680
+ return "FAIL";
7442
7681
  case "skip":
7443
- return "\u23ED\uFE0F";
7682
+ return "SKIP";
7444
7683
  case "info":
7445
- return "\u2139\uFE0F";
7446
- }
7447
- };
7448
- var buildIssuesBlock = (result, opts) => {
7684
+ return "INFO";
7685
+ }
7686
+ };
7687
+ var RATIONALE_LEAD = /^(generative engines|ai (agents|engines|crawlers|search)|this |these |without (it|this)|used by|some agents|blocked agents|missing content|each redirect|citing sources|shorter descriptions|the more context)\b/i;
7688
+ var terseSuggestion = (suggestion) => {
7689
+ const sentences = suggestion.split(/(?<=\.)\s+(?=[A-Z])/);
7690
+ const kept = [];
7691
+ const rescuedUrls = [];
7692
+ for (const raw of sentences) {
7693
+ const s = raw.trim();
7694
+ if (!s) continue;
7695
+ if (RATIONALE_LEAD.test(s)) {
7696
+ const url = s.match(/https?:\/\/\S+/)?.[0];
7697
+ if (url && !suggestion.slice(0, suggestion.indexOf(s)).includes(url)) rescuedUrls.push(url.replace(/[.)]+$/, ""));
7698
+ continue;
7699
+ }
7700
+ kept.push(s);
7701
+ }
7702
+ const base = (kept.join(" ") || suggestion).trim();
7703
+ return rescuedUrls.length > 0 ? `${base} ${rescuedUrls.join(" ")}` : base;
7704
+ };
7705
+ var asciiPunct = (s) => s.replace(/[—–]/g, "-").replace(/·/g, "-").replace(/→/g, "->").replace(/;/g, ",");
7706
+ var SUCCESS_TABLE = {
7707
+ "llms-txt-exists": { success: "GET /llms.txt returns 200 with an H1, a blockquote summary, and >=1 ## link section." },
7708
+ "sitemap-exists": { success: "GET /sitemap.xml returns 200 valid XML listing all public pages." },
7709
+ "markdown-url-support": (m) => ({ success: `appending .md to each page URL returns 200 text/markdown (now ${m.supported ?? 0}/${m.total ?? "?"}).` }),
7710
+ "structured-data-coverage": { success: "every sampled page has a valid schema.org JSON-LD block." },
7711
+ "topical-authority-signals": (m) => ({ success: `avg >=5 internal links/page and >=70% of pages have >=3 (now avg ${m.avgLinks ?? 0}/page).` }),
7712
+ "content-freshness": { success: ">=80% of pages expose a machine-readable date (Last-Modified, meta, or JSON-LD)." },
7713
+ "eeat-signals": { success: "each content page names an author with credentials and links to an about/team page." },
7714
+ "canonical-url-consistency": { success: 'every page has a self-referencing <link rel="canonical">.' },
7715
+ "mcp-server-card": { success: "GET /.well-known/mcp/server-card.json returns valid JSON with name + description + >=1 tool." },
7716
+ "section-header-quality": { success: "every page has exactly one H1 and no skipped heading levels." }
7717
+ };
7718
+ var resolveSuccess = (issue) => {
7719
+ const entry = SUCCESS_TABLE[issue.id];
7720
+ if (!entry) return void 0;
7721
+ return (typeof entry === "function" ? entry(issue.metadata ?? {}) : entry).success;
7722
+ };
7723
+ var buildIssuesBlock = (result, opts, terse = false) => {
7449
7724
  const failures = result.checks.filter((c) => c.status === "fail");
7450
7725
  const warnings = result.checks.filter((c) => c.status === "warn");
7451
7726
  const issues = [...failures, ...warnings];
@@ -7454,8 +7729,10 @@ var buildIssuesBlock = (result, opts) => {
7454
7729
  lines.push(`All checks passed! No fixes needed.`);
7455
7730
  return lines;
7456
7731
  }
7457
- lines.push(`Fix the following GEO issues to make this ${opts.mode === "local" ? "project" : "website"} more discoverable by AI agents:`);
7458
- lines.push(``);
7732
+ if (!terse) {
7733
+ lines.push(`Fix the following GEO issues to make this ${opts.mode === "local" ? "project" : "website"} more discoverable by AI agents:`);
7734
+ lines.push(``);
7735
+ }
7459
7736
  const byCategory = /* @__PURE__ */ new Map();
7460
7737
  for (const issue of issues) {
7461
7738
  const existing = byCategory.get(issue.category) ?? [];
@@ -7463,15 +7740,20 @@ var buildIssuesBlock = (result, opts) => {
7463
7740
  byCategory.set(issue.category, existing);
7464
7741
  }
7465
7742
  for (const [cat, catIssues] of byCategory) {
7466
- const label = CATEGORY_LABELS2[cat] ?? cat;
7743
+ const label = CATEGORY_LABELS[cat] ?? cat;
7467
7744
  const catScore = result.categories[cat]?.score ?? "?";
7468
- lines.push(`### ${label} (${catScore}/100)`);
7745
+ lines.push(terse ? label : `### ${label} (${catScore}/100)`);
7469
7746
  lines.push(``);
7470
7747
  for (const issue of catIssues) {
7471
- lines.push(`- ${statusEmoji(issue.status)} **${issue.id}**: ${issue.message}`);
7472
- if (issue.suggestion) {
7473
- lines.push(` - **Fix:** ${issue.suggestion}`);
7748
+ if (terse) {
7749
+ lines.push(`- ${issue.id} (${statusMarker(issue.status)}): ${asciiPunct(issue.message)}`);
7750
+ if (issue.suggestion) lines.push(` -> ${asciiPunct(terseSuggestion(issue.suggestion))}`);
7751
+ continue;
7474
7752
  }
7753
+ lines.push(`- ${statusMarker(issue.status)} **${issue.id}**: ${issue.message}`);
7754
+ if (issue.suggestion) lines.push(` - **Fix:** ${issue.suggestion}`);
7755
+ const success = resolveSuccess(issue);
7756
+ if (success) lines.push(` - **Success:** ${success}`);
7475
7757
  }
7476
7758
  lines.push(``);
7477
7759
  }
@@ -7479,15 +7761,12 @@ var buildIssuesBlock = (result, opts) => {
7479
7761
  };
7480
7762
  var generateClipboardPrompt = (result, opts) => {
7481
7763
  const lines = [];
7482
- lines.push(`# Fix GEO issues \u2014 ${opts.target}`);
7764
+ const subject = opts.mode === "local" ? "project" : "website";
7765
+ lines.push(`Fix these GEO issues on ${opts.target} so AI agents can discover this ${subject}. Fixes are grouped by area. Do FAIL before WARN.`);
7483
7766
  lines.push(``);
7484
- lines.push(`Score: ${result.grade} (${result.overall_score}/100) \xB7 ${result.summary.failed} failed, ${result.summary.warned} warnings`);
7485
- lines.push(``);
7486
- lines.push(...buildIssuesBlock(result, opts));
7767
+ lines.push(...buildIssuesBlock(result, opts, true));
7487
7768
  if (opts.mode === "local") {
7488
7769
  lines.push(`Files are at \`${opts.target}\`. Fix the issues above, then re-run \`agentimization ${opts.target}\` to verify.`);
7489
- } else {
7490
- lines.push(`Prioritize failures (\u274C) over warnings (\u26A0\uFE0F). Suggest specific code changes.`);
7491
7770
  }
7492
7771
  return lines.join("\n");
7493
7772
  };
@@ -7510,7 +7789,7 @@ var generateAgentPrompt = (result, opts) => {
7510
7789
  lines.push(`These checks are already good (don't break them while fixing the issues above):`);
7511
7790
  lines.push(``);
7512
7791
  for (const pass of passes) {
7513
- lines.push(`- \u2705 **${pass.id}**: ${pass.message}`);
7792
+ lines.push(`- PASS **${pass.id}**: ${pass.message}`);
7514
7793
  }
7515
7794
  lines.push(``);
7516
7795
  }
@@ -7523,7 +7802,7 @@ var generateAgentPrompt = (result, opts) => {
7523
7802
  } else {
7524
7803
  lines.push(`This is a remote site audit of ${opts.target}.`);
7525
7804
  lines.push(`Please suggest the specific code changes needed to fix each issue.`);
7526
- lines.push(`Prioritize failures (\u274C) over warnings (\u26A0\uFE0F).`);
7805
+ lines.push(`Prioritize FAIL over WARN.`);
7527
7806
  }
7528
7807
  lines.push(``);
7529
7808
  lines.push(`Focus on the highest-impact fixes first. The goal is to maximize the GEO score so AI agents can discover, parse, and cite this content effectively.`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentimization",
3
- "version": "0.1.3",
3
+ "version": "0.2.1",
4
4
  "description": "GEO audit CLI — check if your website is agent-ready",
5
5
  "license": "MIT",
6
6
  "author": "Anthony Lio <hello@antl.io>",