geo-checker 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js ADDED
@@ -0,0 +1,1357 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/cli.ts
4
+ import { cac } from "cac";
5
+ import kleur2 from "kleur";
6
+
7
+ // src/context.ts
8
+ import { load as cheerioLoad } from "cheerio";
9
+
10
+ // src/fetcher/static.ts
11
+ import { request } from "undici";
12
+ var DEFAULT_UA = "geo-checker/0.1.0 (+https://github.com/BaRam-OSS/geo-checker)";
13
+ function normalizeHeaders(input) {
14
+ const out = {};
15
+ for (const [k, v] of Object.entries(input)) {
16
+ if (v == null) continue;
17
+ out[k.toLowerCase()] = Array.isArray(v) ? v.join(", ") : v;
18
+ }
19
+ return out;
20
+ }
21
+ async function fetchStatic(url, opts = {}) {
22
+ const maxRedirects = opts.maxRedirects ?? 5;
23
+ const timeout = opts.timeoutMs ?? 2e4;
24
+ const userAgent = opts.userAgent ?? DEFAULT_UA;
25
+ const accept = opts.accept ?? "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
26
+ let current = url;
27
+ let redirects = 0;
28
+ for (; ; ) {
29
+ const res = await request(current, {
30
+ method: "GET",
31
+ headers: {
32
+ "user-agent": userAgent,
33
+ accept,
34
+ "accept-language": "en,*"
35
+ },
36
+ bodyTimeout: timeout,
37
+ headersTimeout: timeout
38
+ });
39
+ const status = res.statusCode;
40
+ const headers = normalizeHeaders(res.headers);
41
+ if (status >= 300 && status < 400 && headers.location && redirects < maxRedirects) {
42
+ redirects += 1;
43
+ current = new URL(headers.location, current).toString();
44
+ await res.body.dump();
45
+ continue;
46
+ }
47
+ const body = await res.body.text();
48
+ return { finalUrl: current, status, headers, body, redirectCount: redirects };
49
+ }
50
+ }
51
+ async function fetchText(url, opts = {}) {
52
+ try {
53
+ const res = await fetchStatic(url, opts);
54
+ if (res.status >= 200 && res.status < 300) return res.body;
55
+ return null;
56
+ } catch {
57
+ return null;
58
+ }
59
+ }
60
+
61
+ // src/fetcher/rendered.ts
62
+ async function fetchRendered(url, opts = {}) {
63
+ let playwright;
64
+ try {
65
+ playwright = await import("playwright");
66
+ } catch {
67
+ throw new Error(
68
+ "Playwright is required for --render. Install with: npm i -D playwright && npx playwright install chromium"
69
+ );
70
+ }
71
+ const userAgent = opts.userAgent ?? DEFAULT_UA;
72
+ const timeout = opts.timeoutMs ?? 3e4;
73
+ const browser = await playwright.chromium.launch({ headless: true });
74
+ try {
75
+ const ctx = await browser.newContext({ userAgent });
76
+ const page = await ctx.newPage();
77
+ const response = await page.goto(url, { waitUntil: "networkidle", timeout });
78
+ const html = await page.content();
79
+ const finalUrl = page.url();
80
+ const status = response?.status() ?? 200;
81
+ const headers = response ? await response.allHeaders() : {};
82
+ return { finalUrl, html, status, headers };
83
+ } finally {
84
+ await browser.close();
85
+ }
86
+ }
87
+
88
+ // src/fetcher/robots.ts
89
+ function parseRobots(raw) {
90
+ const groups = [];
91
+ const sitemaps = [];
92
+ let current = null;
93
+ const lines = raw.split(/\r?\n/);
94
+ for (const rawLine of lines) {
95
+ const line = rawLine.replace(/#.*$/, "").trim();
96
+ if (!line) continue;
97
+ const idx = line.indexOf(":");
98
+ if (idx === -1) continue;
99
+ const field = line.slice(0, idx).trim().toLowerCase();
100
+ const value = line.slice(idx + 1).trim();
101
+ if (field === "user-agent") {
102
+ if (!current || current.allow.length > 0 || current.disallow.length > 0) {
103
+ current = { userAgent: value, allow: [], disallow: [] };
104
+ groups.push(current);
105
+ } else {
106
+ current.userAgent = value;
107
+ }
108
+ } else if (field === "allow" && current) {
109
+ current.allow.push(value);
110
+ } else if (field === "disallow" && current) {
111
+ current.disallow.push(value);
112
+ } else if (field === "sitemap") {
113
+ sitemaps.push(value);
114
+ }
115
+ }
116
+ return { raw, groups, sitemaps };
117
+ }
118
+ function matchGroup(robots, userAgent) {
119
+ const lower = userAgent.toLowerCase();
120
+ const exact = robots.groups.find((g) => g.userAgent.toLowerCase() === lower);
121
+ if (exact) return exact;
122
+ const wildcard = robots.groups.find((g) => g.userAgent === "*");
123
+ return wildcard ?? null;
124
+ }
125
+ function isPathAllowed(group, path) {
126
+ if (!group) return true;
127
+ const matches = (pattern) => {
128
+ if (pattern === "") return -1;
129
+ if (path.startsWith(pattern)) return pattern.length;
130
+ return -1;
131
+ };
132
+ let bestAllow = -1;
133
+ let bestDisallow = -1;
134
+ for (const p of group.allow) bestAllow = Math.max(bestAllow, matches(p));
135
+ for (const p of group.disallow) bestDisallow = Math.max(bestDisallow, matches(p));
136
+ if (bestDisallow === -1) return true;
137
+ return bestAllow >= bestDisallow;
138
+ }
139
+
140
+ // src/fetcher/llms-txt.ts
141
+ var LINK_RE = /^\s*-\s*\[([^\]]+)\]\(([^)]+)\)\s*(?::\s*(.+))?\s*$/;
142
+ function parseLlmsTxt(raw) {
143
+ const lines = raw.split(/\r?\n/);
144
+ let title = null;
145
+ let summary = null;
146
+ const sections = [];
147
+ let currentSection = null;
148
+ const summaryParts = [];
149
+ let inSummaryPhase = false;
150
+ for (const line of lines) {
151
+ if (/^#\s+/.test(line) && title === null) {
152
+ title = line.replace(/^#\s+/, "").trim();
153
+ inSummaryPhase = true;
154
+ continue;
155
+ }
156
+ if (/^##\s+/.test(line)) {
157
+ if (inSummaryPhase && summaryParts.length > 0) {
158
+ summary = summaryParts.join(" ").trim();
159
+ }
160
+ inSummaryPhase = false;
161
+ currentSection = { title: line.replace(/^##\s+/, "").trim(), links: [] };
162
+ sections.push(currentSection);
163
+ continue;
164
+ }
165
+ if (inSummaryPhase) {
166
+ const trimmed = line.trim();
167
+ if (trimmed.startsWith(">")) {
168
+ summaryParts.push(trimmed.replace(/^>\s*/, ""));
169
+ } else if (trimmed.length > 0) {
170
+ summaryParts.push(trimmed);
171
+ }
172
+ continue;
173
+ }
174
+ if (currentSection) {
175
+ const m = LINK_RE.exec(line);
176
+ if (m) {
177
+ const link = { title: m[1].trim(), url: m[2].trim() };
178
+ if (m[3]) link.description = m[3].trim();
179
+ currentSection.links.push(link);
180
+ }
181
+ }
182
+ }
183
+ if (inSummaryPhase && summary === null && summaryParts.length > 0) {
184
+ summary = summaryParts.join(" ").trim();
185
+ }
186
+ return { raw, title, summary, sections };
187
+ }
188
+ function isLlmsTxtWellFormed(parsed) {
189
+ if (!parsed.title) return { ok: false, reason: "Missing H1 project title" };
190
+ if (parsed.sections.length === 0) return { ok: false, reason: "No H2 sections found" };
191
+ const totalLinks = parsed.sections.reduce((n, s) => n + s.links.length, 0);
192
+ if (totalLinks === 0) return { ok: false, reason: "No link items found under any section" };
193
+ return { ok: true };
194
+ }
195
+
196
+ // src/fetcher/sitemap.ts
197
+ var LOC_RE = /<loc>\s*([^<\s]+)\s*<\/loc>/gi;
198
+ var LASTMOD_RE = /<lastmod>\s*([^<\s]+)\s*<\/lastmod>/i;
199
+ function parseSitemap(xml) {
200
+ const urls = [];
201
+ let match;
202
+ const re = new RegExp(LOC_RE.source, LOC_RE.flags);
203
+ while ((match = re.exec(xml)) !== null) {
204
+ const url = match[1];
205
+ if (url) urls.push(url);
206
+ }
207
+ const lastmodMatch = LASTMOD_RE.exec(xml);
208
+ const summary = { urls };
209
+ if (lastmodMatch?.[1]) summary.lastmod = lastmodMatch[1];
210
+ return summary;
211
+ }
212
+
213
+ // src/context.ts
214
+ function extractJsonLd($) {
215
+ const blocks = [];
216
+ $('script[type="application/ld+json"]').each((_i, el) => {
217
+ const txt = $(el).contents().text().trim();
218
+ if (!txt) return;
219
+ try {
220
+ const parsed = JSON.parse(txt);
221
+ if (Array.isArray(parsed)) blocks.push(...parsed);
222
+ else blocks.push(parsed);
223
+ } catch {
224
+ blocks.push({ __parseError: true, raw: txt.slice(0, 200) });
225
+ }
226
+ });
227
+ return blocks;
228
+ }
229
+ function detectSpa($) {
230
+ const bodyText = $("body").clone().find("script, style, noscript").remove().end().text().replace(/\s+/g, " ").trim();
231
+ if (bodyText.length >= 500) return false;
232
+ const roots = $("#__next, #root, #app, [data-reactroot], [ng-app], [data-server-rendered]");
233
+ return roots.length > 0;
234
+ }
235
+ async function buildContext(url, opts = {}) {
236
+ const warnings = [];
237
+ let finalUrl;
238
+ let html;
239
+ let headers;
240
+ let status;
241
+ let renderMode;
242
+ if (opts.render) {
243
+ const page = await fetchRendered(url, opts);
244
+ finalUrl = page.finalUrl;
245
+ html = page.html;
246
+ headers = page.headers;
247
+ status = page.status;
248
+ renderMode = "rendered";
249
+ } else {
250
+ const page = await fetchStatic(url, opts);
251
+ finalUrl = page.finalUrl;
252
+ html = page.body;
253
+ headers = page.headers;
254
+ status = page.status;
255
+ renderMode = "static";
256
+ }
257
+ const $ = cheerioLoad(html);
258
+ const origin = new URL(finalUrl).origin;
259
+ if (renderMode === "static" && detectSpa($)) {
260
+ warnings.push(
261
+ "Site appears to be JS-rendered (sparse body + SPA root element). Re-run with --render for accurate results."
262
+ );
263
+ }
264
+ const [robotsRaw, llmsRaw] = await Promise.all([
265
+ fetchText(`${origin}/robots.txt`, opts),
266
+ fetchText(`${origin}/llms.txt`, opts)
267
+ ]);
268
+ let sitemapUrl = null;
269
+ const robots = robotsRaw ? parseRobots(robotsRaw) : null;
270
+ if (robots && robots.sitemaps.length > 0) sitemapUrl = robots.sitemaps[0] ?? null;
271
+ if (!sitemapUrl) sitemapUrl = `${origin}/sitemap.xml`;
272
+ const sitemapRaw = await fetchText(sitemapUrl, opts);
273
+ const sitemap = sitemapRaw ? parseSitemap(sitemapRaw) : null;
274
+ return {
275
+ url,
276
+ finalUrl,
277
+ html,
278
+ $,
279
+ headers,
280
+ status,
281
+ robots,
282
+ llmsTxt: llmsRaw ? parseLlmsTxt(llmsRaw) : null,
283
+ sitemap,
284
+ jsonLd: extractJsonLd($),
285
+ renderMode,
286
+ fetchedAt: (/* @__PURE__ */ new Date()).toISOString(),
287
+ warnings
288
+ };
289
+ }
290
+
291
+ // src/types.ts
292
+ function defineRule(rule) {
293
+ return rule;
294
+ }
295
+ var CATEGORY_WEIGHTS = {
296
+ crawler: 25,
297
+ "structured-data": 30,
298
+ citation: 25,
299
+ content: 20
300
+ };
301
+
302
+ // src/engine.ts
303
+ var VERSION = "0.1.0";
304
+ async function runRules(ctx, rules, opts = {}) {
305
+ const onlySet = opts.only ? new Set(opts.only) : null;
306
+ const catSet = opts.categories ? new Set(opts.categories) : null;
307
+ const buckets = {
308
+ crawler: { score: 0, weight: CATEGORY_WEIGHTS.crawler, results: [] },
309
+ "structured-data": { score: 0, weight: CATEGORY_WEIGHTS["structured-data"], results: [] },
310
+ citation: { score: 0, weight: CATEGORY_WEIGHTS.citation, results: [] },
311
+ content: { score: 0, weight: CATEGORY_WEIGHTS.content, results: [] }
312
+ };
313
+ for (const rule of rules) {
314
+ if (onlySet && !onlySet.has(rule.id)) continue;
315
+ if (catSet && !catSet.has(rule.category)) continue;
316
+ let result;
317
+ try {
318
+ result = await rule.run(ctx);
319
+ } catch (err) {
320
+ result = {
321
+ status: "skip",
322
+ score: 0,
323
+ rationale: `Rule crashed: ${err instanceof Error ? err.message : String(err)}`
324
+ };
325
+ }
326
+ buckets[rule.category].results.push({
327
+ id: rule.id,
328
+ title: rule.title,
329
+ weight: rule.weight,
330
+ ...result
331
+ });
332
+ }
333
+ for (const cat of Object.keys(buckets)) {
334
+ const b = buckets[cat];
335
+ let weighted = 0;
336
+ let totalWeight = 0;
337
+ for (const r of b.results) {
338
+ if (r.status === "skip") continue;
339
+ weighted += r.score * r.weight;
340
+ totalWeight += r.weight;
341
+ }
342
+ b.score = totalWeight === 0 ? 0 : Math.round(weighted / totalWeight * 100);
343
+ }
344
+ let overallWeighted = 0;
345
+ let overallWeight = 0;
346
+ for (const cat of Object.keys(buckets)) {
347
+ const b = buckets[cat];
348
+ if (b.results.length === 0) continue;
349
+ overallWeighted += b.score * b.weight;
350
+ overallWeight += b.weight;
351
+ }
352
+ const overall = overallWeight === 0 ? 0 : Math.round(overallWeighted / overallWeight);
353
+ return {
354
+ url: ctx.url,
355
+ finalUrl: ctx.finalUrl,
356
+ fetchedAt: ctx.fetchedAt,
357
+ renderMode: ctx.renderMode,
358
+ overall,
359
+ categories: buckets,
360
+ warnings: [...ctx.warnings],
361
+ version: VERSION
362
+ };
363
+ }
364
+
365
+ // src/rules/crawler/https.ts
366
+ var httpsRule = defineRule({
367
+ id: "crawler.https",
368
+ category: "crawler",
369
+ weight: 2,
370
+ title: "Site is served over HTTPS",
371
+ description: "AI crawlers treat HTTPS pages as more trustworthy and some skip plain HTTP entirely.",
372
+ run(ctx) {
373
+ const isHttps = ctx.finalUrl.startsWith("https://");
374
+ return isHttps ? { status: "pass", score: 1, rationale: "Final URL uses HTTPS." } : {
375
+ status: "fail",
376
+ score: 0,
377
+ rationale: "Final URL does not use HTTPS. Redirect HTTP \u2192 HTTPS site-wide.",
378
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
379
+ };
380
+ }
381
+ });
382
+
383
+ // src/rules/crawler/robots-reachable.ts
384
+ var robotsReachableRule = defineRule({
385
+ id: "crawler.robots-reachable",
386
+ category: "crawler",
387
+ weight: 2,
388
+ title: "robots.txt is reachable",
389
+ description: "A reachable robots.txt lets crawlers confirm their permissions; missing file is treated as allow-all but blocks explicit signalling.",
390
+ run(ctx) {
391
+ if (ctx.robots) {
392
+ return { status: "pass", score: 1, rationale: "robots.txt returned successfully." };
393
+ }
394
+ return {
395
+ status: "warn",
396
+ score: 0.3,
397
+ rationale: "robots.txt is missing. Add one even if empty to explicitly signal crawl policy.",
398
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
399
+ };
400
+ }
401
+ });
402
+
403
+ // src/rules/crawler/robots-ai-allow.ts
404
+ var AI_BOTS = [
405
+ "GPTBot",
406
+ "Google-Extended",
407
+ "ClaudeBot",
408
+ "PerplexityBot",
409
+ "CCBot",
410
+ "Amazonbot",
411
+ "anthropic-ai"
412
+ ];
413
+ var robotsAiAllowRule = defineRule({
414
+ id: "crawler.robots-ai-allow",
415
+ category: "crawler",
416
+ weight: 5,
417
+ title: "AI crawlers are allowed",
418
+ description: "Major AI search crawlers (GPTBot, Google-Extended, ClaudeBot, PerplexityBot, CCBot, Amazonbot) must be allowed to index the homepage.",
419
+ run(ctx) {
420
+ if (!ctx.robots) {
421
+ return {
422
+ status: "warn",
423
+ score: 0.5,
424
+ rationale: "robots.txt missing; AI crawlers default to allow, but explicit allow is recommended."
425
+ };
426
+ }
427
+ const path = new URL(ctx.finalUrl).pathname || "/";
428
+ const blocked = [];
429
+ const mentioned = [];
430
+ for (const bot of AI_BOTS) {
431
+ const group = matchGroup(ctx.robots, bot);
432
+ if (group && group.userAgent.toLowerCase() === bot.toLowerCase()) {
433
+ mentioned.push(bot);
434
+ }
435
+ if (!isPathAllowed(group, path)) blocked.push(bot);
436
+ }
437
+ if (blocked.length > 0) {
438
+ return {
439
+ status: "fail",
440
+ score: 0,
441
+ rationale: `Blocked: ${blocked.join(", ")}. Remove the Disallow or add an explicit Allow for these user-agents.`,
442
+ evidence: { blocked, mentioned },
443
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
444
+ };
445
+ }
446
+ if (mentioned.length === 0) {
447
+ return {
448
+ status: "warn",
449
+ score: 0.6,
450
+ rationale: "No AI crawler is explicitly mentioned. Consider adding explicit Allow rules to remove ambiguity."
451
+ };
452
+ }
453
+ return {
454
+ status: "pass",
455
+ score: 1,
456
+ rationale: `All AI crawlers can reach the page; ${mentioned.length} explicitly listed.`,
457
+ evidence: { mentioned }
458
+ };
459
+ }
460
+ });
461
+
462
+ // src/rules/crawler/llms-txt-present.ts
463
+ var llmsTxtPresentRule = defineRule({
464
+ id: "crawler.llms-txt-present",
465
+ category: "crawler",
466
+ weight: 4,
467
+ title: "llms.txt is present",
468
+ description: "An /llms.txt file at the site root gives AI assistants a curated map of the most citation-worthy pages.",
469
+ run(ctx) {
470
+ if (ctx.llmsTxt) {
471
+ return { status: "pass", score: 1, rationale: "llms.txt found at site root." };
472
+ }
473
+ return {
474
+ status: "warn",
475
+ score: 0,
476
+ rationale: "No /llms.txt found. Add one to curate the pages AI assistants should read.",
477
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
478
+ };
479
+ }
480
+ });
481
+
482
+ // src/rules/crawler/llms-txt-wellformed.ts
483
+ var llmsTxtWellformedRule = defineRule({
484
+ id: "crawler.llms-txt-wellformed",
485
+ category: "crawler",
486
+ weight: 3,
487
+ title: "llms.txt follows the spec",
488
+ description: "Must start with an H1 project title, then a brief summary, then at least one H2 section containing link items.",
489
+ run(ctx) {
490
+ if (!ctx.llmsTxt) {
491
+ return { status: "skip", score: 0, rationale: "No llms.txt to validate." };
492
+ }
493
+ const check = isLlmsTxtWellFormed(ctx.llmsTxt);
494
+ if (check.ok) {
495
+ const totalLinks = ctx.llmsTxt.sections.reduce((n, s) => n + s.links.length, 0);
496
+ return {
497
+ status: "pass",
498
+ score: 1,
499
+ rationale: `Well-formed with ${ctx.llmsTxt.sections.length} section(s) and ${totalLinks} link(s).`
500
+ };
501
+ }
502
+ return {
503
+ status: "warn",
504
+ score: 0.3,
505
+ rationale: `llms.txt does not fully match the spec: ${check.reason}.`,
506
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
507
+ };
508
+ }
509
+ });
510
+
511
+ // src/rules/crawler/sitemap-present.ts
512
+ var sitemapPresentRule = defineRule({
513
+ id: "crawler.sitemap-present",
514
+ category: "crawler",
515
+ weight: 4,
516
+ title: "sitemap.xml is present",
517
+ description: "A sitemap helps AI crawlers discover and prioritise pages; many crawlers short-circuit discovery without one.",
518
+ run(ctx) {
519
+ if (ctx.sitemap && ctx.sitemap.urls.length > 0) {
520
+ return {
521
+ status: "pass",
522
+ score: 1,
523
+ rationale: `Sitemap found with ${ctx.sitemap.urls.length} URL(s).`
524
+ };
525
+ }
526
+ return {
527
+ status: "warn",
528
+ score: 0.2,
529
+ rationale: "No sitemap.xml found (checked /sitemap.xml and Sitemap: directive in robots.txt).",
530
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
531
+ };
532
+ }
533
+ });
534
+
535
+ // src/rules/crawler/index.ts
536
+ var crawlerRules = [
537
+ httpsRule,
538
+ robotsReachableRule,
539
+ robotsAiAllowRule,
540
+ llmsTxtPresentRule,
541
+ llmsTxtWellformedRule,
542
+ sitemapPresentRule
543
+ ];
544
+
545
+ // src/rules/structured-data/jsonld-present.ts
546
+ var jsonLdPresentRule = defineRule({
547
+ id: "sd.jsonld-present",
548
+ category: "structured-data",
549
+ weight: 5,
550
+ title: "JSON-LD structured data is present",
551
+ description: 'At least one <script type="application/ld+json"> block is the primary way AI engines map your page to an entity.',
552
+ run(ctx) {
553
+ if (ctx.jsonLd.length > 0) {
554
+ return { status: "pass", score: 1, rationale: `Found ${ctx.jsonLd.length} JSON-LD block(s).` };
555
+ }
556
+ return {
557
+ status: "fail",
558
+ score: 0,
559
+ rationale: "No JSON-LD blocks found. Add schema.org structured data.",
560
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
561
+ };
562
+ }
563
+ });
564
+
565
+ // src/rules/util.ts
566
+ var KNOWN_SCHEMA_TYPES = [
567
+ "Article",
568
+ "NewsArticle",
569
+ "BlogPosting",
570
+ "FAQPage",
571
+ "HowTo",
572
+ "Organization",
573
+ "Person",
574
+ "BreadcrumbList",
575
+ "Product",
576
+ "WebSite",
577
+ "WebPage"
578
+ ];
579
+ var REQUIRED_FIELDS = {
580
+ Article: ["headline", "author", "datePublished"],
581
+ NewsArticle: ["headline", "author", "datePublished"],
582
+ BlogPosting: ["headline", "author", "datePublished"],
583
+ FAQPage: ["mainEntity"],
584
+ HowTo: ["name", "step"],
585
+ Product: ["name", "offers"],
586
+ Organization: ["name"],
587
+ Person: ["name"],
588
+ BreadcrumbList: ["itemListElement"]
589
+ };
590
+ function getTypes(node) {
591
+ if (!node || typeof node !== "object") return [];
592
+ const t = node["@type"];
593
+ if (typeof t === "string") return [t];
594
+ if (Array.isArray(t)) return t.filter((x) => typeof x === "string");
595
+ return [];
596
+ }
597
+ function flattenJsonLd(blocks) {
598
+ const out = [];
599
+ const visit = (node) => {
600
+ if (!node || typeof node !== "object") return;
601
+ if (Array.isArray(node)) {
602
+ for (const item of node) visit(item);
603
+ return;
604
+ }
605
+ out.push(node);
606
+ const graph = node["@graph"];
607
+ if (Array.isArray(graph)) for (const item of graph) visit(item);
608
+ };
609
+ for (const b of blocks) visit(b);
610
+ return out;
611
+ }
612
+ function hasParseError(blocks) {
613
+ return blocks.some((b) => b && typeof b === "object" && b.__parseError);
614
+ }
615
+ function hasField(node, field) {
616
+ if (!node || typeof node !== "object") return false;
617
+ const v = node[field];
618
+ if (v == null) return false;
619
+ if (typeof v === "string") return v.trim().length > 0;
620
+ if (Array.isArray(v)) return v.length > 0;
621
+ return true;
622
+ }
623
+
624
+ // src/rules/structured-data/jsonld-valid-json.ts
625
+ var jsonLdValidJsonRule = defineRule({
626
+ id: "sd.jsonld-valid-json",
627
+ category: "structured-data",
628
+ weight: 3,
629
+ title: "JSON-LD blocks parse as valid JSON",
630
+ description: "Malformed JSON in an ld+json block is silently ignored by most consumers \u2014 a costly silent failure.",
631
+ run(ctx) {
632
+ if (ctx.jsonLd.length === 0) {
633
+ return { status: "skip", score: 0, rationale: "No JSON-LD to validate." };
634
+ }
635
+ if (hasParseError(ctx.jsonLd)) {
636
+ return {
637
+ status: "fail",
638
+ score: 0,
639
+ rationale: "One or more JSON-LD blocks failed to parse.",
640
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
641
+ };
642
+ }
643
+ return { status: "pass", score: 1, rationale: "All JSON-LD blocks parse cleanly." };
644
+ }
645
+ });
646
+
647
+ // src/rules/structured-data/schema-type-recognized.ts
648
+ var schemaTypeRecognizedRule = defineRule({
649
+ id: "sd.schema-type-recognized",
650
+ category: "structured-data",
651
+ weight: 4,
652
+ title: "Schema.org @type is a recognised kind",
653
+ description: "AI engines match pages against well-known types (Article, Product, FAQPage...). Obscure types weaken the signal.",
654
+ run(ctx) {
655
+ if (ctx.jsonLd.length === 0) {
656
+ return { status: "skip", score: 0, rationale: "No JSON-LD to analyse." };
657
+ }
658
+ const nodes = flattenJsonLd(ctx.jsonLd);
659
+ const recognized = /* @__PURE__ */ new Set();
660
+ const seenTypes = /* @__PURE__ */ new Set();
661
+ for (const node of nodes) {
662
+ for (const t of getTypes(node)) {
663
+ seenTypes.add(t);
664
+ if (KNOWN_SCHEMA_TYPES.includes(t)) recognized.add(t);
665
+ }
666
+ }
667
+ if (recognized.size > 0) {
668
+ return {
669
+ status: "pass",
670
+ score: 1,
671
+ rationale: `Recognised: ${[...recognized].join(", ")}.`,
672
+ evidence: { recognized: [...recognized], all: [...seenTypes] }
673
+ };
674
+ }
675
+ return {
676
+ status: "warn",
677
+ score: 0.3,
678
+ rationale: `No recognised schema.org types. Saw: ${[...seenTypes].join(", ") || "(none)"}.`,
679
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
680
+ };
681
+ }
682
+ });
683
+
684
+ // src/rules/structured-data/required-fields.ts
685
+ var requiredFieldsRule = defineRule({
686
+ id: "sd.required-fields",
687
+ category: "structured-data",
688
+ weight: 6,
689
+ title: "Required fields for recognised types are set",
690
+ description: "Article needs headline/author/datePublished, FAQPage needs mainEntity, Product needs offers, etc.",
691
+ run(ctx) {
692
+ if (ctx.jsonLd.length === 0) {
693
+ return { status: "skip", score: 0, rationale: "No JSON-LD to analyse." };
694
+ }
695
+ const nodes = flattenJsonLd(ctx.jsonLd);
696
+ const missing = [];
697
+ const checked = [];
698
+ for (const node of nodes) {
699
+ for (const t of getTypes(node)) {
700
+ const required = REQUIRED_FIELDS[t];
701
+ if (!required) continue;
702
+ checked.push(t);
703
+ for (const f of required) {
704
+ if (!hasField(node, f)) missing.push({ type: t, field: f });
705
+ }
706
+ }
707
+ }
708
+ if (checked.length === 0) {
709
+ return {
710
+ status: "skip",
711
+ score: 0,
712
+ rationale: "No types with known required fields were found."
713
+ };
714
+ }
715
+ if (missing.length === 0) {
716
+ return {
717
+ status: "pass",
718
+ score: 1,
719
+ rationale: `Required fields set on ${checked.length} node(s).`
720
+ };
721
+ }
722
+ const msg = missing.map((m) => `${m.type}.${m.field}`).join(", ");
723
+ return {
724
+ status: "fail",
725
+ score: Math.max(0, 1 - missing.length / (checked.length * 2)),
726
+ rationale: `Missing required fields: ${msg}.`,
727
+ evidence: missing,
728
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
729
+ };
730
+ }
731
+ });
732
+
733
+ // src/rules/structured-data/microdata-fallback.ts
734
+ var microdataFallbackRule = defineRule({
735
+ id: "sd.microdata-fallback",
736
+ category: "structured-data",
737
+ weight: 2,
738
+ title: "Microdata or RDFa fallback when JSON-LD is missing",
739
+ description: "If JSON-LD is absent, inline microdata (itemscope/itemtype) or RDFa still gives some structured signal.",
740
+ run(ctx) {
741
+ if (ctx.jsonLd.length > 0) {
742
+ return { status: "skip", score: 0, rationale: "JSON-LD is present; fallback not needed." };
743
+ }
744
+ const microdata = ctx.$("[itemscope][itemtype]").length;
745
+ const rdfa = ctx.$("[typeof][vocab], [typeof][property]").length;
746
+ if (microdata > 0 || rdfa > 0) {
747
+ return {
748
+ status: "pass",
749
+ score: 1,
750
+ rationale: `Found ${microdata} microdata and ${rdfa} RDFa nodes.`
751
+ };
752
+ }
753
+ return {
754
+ status: "fail",
755
+ score: 0,
756
+ rationale: "No structured data at all (no JSON-LD, no microdata, no RDFa).",
757
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
758
+ };
759
+ }
760
+ });
761
+
762
+ // src/rules/structured-data/no-duplicate-types.ts
763
+ var UNIQUE_TYPES = /* @__PURE__ */ new Set(["Article", "NewsArticle", "BlogPosting", "Product", "Organization"]);
764
+ var noDuplicateTypesRule = defineRule({
765
+ id: "sd.no-duplicate-types",
766
+ category: "structured-data",
767
+ weight: 2,
768
+ title: "No conflicting duplicate @types",
769
+ description: "Multiple competing entities of the same primary type (e.g. two Articles) confuse the engine about which one represents the page.",
770
+ run(ctx) {
771
+ if (ctx.jsonLd.length === 0) {
772
+ return { status: "skip", score: 0, rationale: "No JSON-LD to analyse." };
773
+ }
774
+ const counts = /* @__PURE__ */ new Map();
775
+ for (const node of flattenJsonLd(ctx.jsonLd)) {
776
+ for (const t of getTypes(node)) {
777
+ if (UNIQUE_TYPES.has(t)) counts.set(t, (counts.get(t) ?? 0) + 1);
778
+ }
779
+ }
780
+ const dupes = [...counts.entries()].filter(([, n]) => n > 1);
781
+ if (dupes.length === 0) {
782
+ return { status: "pass", score: 1, rationale: "No duplicate primary types." };
783
+ }
784
+ return {
785
+ status: "warn",
786
+ score: 0.4,
787
+ rationale: `Duplicate primary types: ${dupes.map(([t, n]) => `${t}\xD7${n}`).join(", ")}.`,
788
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
789
+ };
790
+ }
791
+ });
792
+
793
+ // src/rules/structured-data/index.ts
794
+ var structuredDataRules = [
795
+ jsonLdPresentRule,
796
+ jsonLdValidJsonRule,
797
+ schemaTypeRecognizedRule,
798
+ requiredFieldsRule,
799
+ microdataFallbackRule,
800
+ noDuplicateTypesRule
801
+ ];
802
+
803
+ // src/rules/citation/title.ts
804
+ var titleRule = defineRule({
805
+ id: "cit.title",
806
+ category: "citation",
807
+ weight: 2,
808
+ title: "<title> is set with a reasonable length",
809
+ description: "The document title is the single most-cited piece of text and should be 10\u201370 characters.",
810
+ run(ctx) {
811
+ const title = ctx.$("head > title").first().text().trim();
812
+ if (!title) {
813
+ return {
814
+ status: "fail",
815
+ score: 0,
816
+ rationale: "Page has no <title>.",
817
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
818
+ };
819
+ }
820
+ if (title.length < 10) {
821
+ return {
822
+ status: "warn",
823
+ score: 0.4,
824
+ rationale: `Title is only ${title.length} chars; consider a more descriptive one.`
825
+ };
826
+ }
827
+ if (title.length > 70) {
828
+ return {
829
+ status: "warn",
830
+ score: 0.6,
831
+ rationale: `Title is ${title.length} chars; search UIs commonly truncate after ~70.`
832
+ };
833
+ }
834
+ return { status: "pass", score: 1, rationale: `Title length ${title.length} is within range.` };
835
+ }
836
+ });
837
+
838
+ // src/rules/citation/meta-description.ts
839
+ var metaDescriptionRule = defineRule({
840
+ id: "cit.meta-description",
841
+ category: "citation",
842
+ weight: 2,
843
+ title: "meta description is set (50\u2013160 chars)",
844
+ description: "AI snippets often quote the meta description verbatim; aim for 50\u2013160 chars.",
845
+ run(ctx) {
846
+ const desc = ctx.$('head meta[name="description"]').attr("content")?.trim() ?? "";
847
+ if (!desc) {
848
+ return {
849
+ status: "warn",
850
+ score: 0,
851
+ rationale: "No meta description set.",
852
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
853
+ };
854
+ }
855
+ if (desc.length < 50) {
856
+ return { status: "warn", score: 0.5, rationale: `Only ${desc.length} chars; aim for 50+.` };
857
+ }
858
+ if (desc.length > 160) {
859
+ return { status: "warn", score: 0.7, rationale: `${desc.length} chars; may be truncated after 160.` };
860
+ }
861
+ return { status: "pass", score: 1, rationale: `Description length ${desc.length} is within range.` };
862
+ }
863
+ });
864
+
865
+ // src/rules/citation/canonical.ts
866
+ var canonicalRule = defineRule({
867
+ id: "cit.canonical",
868
+ category: "citation",
869
+ weight: 3,
870
+ title: "Canonical URL is declared",
871
+ description: 'rel="canonical" tells crawlers which URL is the source of truth, preventing duplicate-citation confusion.',
872
+ run(ctx) {
873
+ const href = ctx.$('head link[rel="canonical"]').attr("href")?.trim();
874
+ if (!href) {
875
+ return {
876
+ status: "warn",
877
+ score: 0,
878
+ rationale: 'No <link rel="canonical"> found.',
879
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
880
+ };
881
+ }
882
+ try {
883
+ const abs = new URL(href, ctx.finalUrl).toString();
884
+ return { status: "pass", score: 1, rationale: `Canonical URL: ${abs}.` };
885
+ } catch {
886
+ return { status: "fail", score: 0, rationale: `Canonical href is not a valid URL: ${href}` };
887
+ }
888
+ }
889
+ });
890
+
891
+ // src/rules/citation/og-tags.ts
892
+ var REQUIRED = ["og:title", "og:type", "og:url", "og:image"];
893
+ var ogTagsRule = defineRule({
894
+ id: "cit.og-tags",
895
+ category: "citation",
896
+ weight: 3,
897
+ title: "Open Graph tags are set",
898
+ description: "og:title/type/url/image power rich previews on AI chat, social, and messaging.",
899
+ run(ctx) {
900
+ const missing = [];
901
+ for (const prop of REQUIRED) {
902
+ const val = ctx.$(`head meta[property="${prop}"]`).attr("content")?.trim();
903
+ if (!val) missing.push(prop);
904
+ }
905
+ if (missing.length === 0) {
906
+ return { status: "pass", score: 1, rationale: "All required OG tags present." };
907
+ }
908
+ const ratio = 1 - missing.length / REQUIRED.length;
909
+ return {
910
+ status: missing.length === REQUIRED.length ? "fail" : "warn",
911
+ score: ratio,
912
+ rationale: `Missing: ${missing.join(", ")}.`,
913
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
914
+ };
915
+ }
916
+ });
917
+
918
+ // src/rules/citation/twitter-card.ts
919
+ var twitterCardRule = defineRule({
920
+ id: "cit.twitter-card",
921
+ category: "citation",
922
+ weight: 2,
923
+ title: "Twitter Card metadata is set",
924
+ description: "twitter:card + twitter:title give better previews on X/Twitter and some AI surfaces that reuse the tags.",
925
+ run(ctx) {
926
+ const card = ctx.$('head meta[name="twitter:card"]').attr("content")?.trim();
927
+ const title = ctx.$('head meta[name="twitter:title"]').attr("content")?.trim();
928
+ if (card && title) {
929
+ return { status: "pass", score: 1, rationale: `Card type: ${card}.` };
930
+ }
931
+ if (card || title) {
932
+ return { status: "warn", score: 0.5, rationale: "Partial twitter:* metadata; add the missing tag." };
933
+ }
934
+ return {
935
+ status: "warn",
936
+ score: 0,
937
+ rationale: "No twitter:card metadata.",
938
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
939
+ };
940
+ }
941
+ });
942
+
943
+ // src/rules/citation/lang-attr.ts
944
+ var langAttrRule = defineRule({
945
+ id: "cit.lang-attr",
946
+ category: "citation",
947
+ weight: 2,
948
+ title: "<html lang> is set",
949
+ description: "A lang attribute helps AI engines route the page to the right-language search surface (and helps screen readers).",
950
+ run(ctx) {
951
+ const lang = ctx.$("html").attr("lang")?.trim();
952
+ if (!lang) {
953
+ return {
954
+ status: "warn",
955
+ score: 0,
956
+ rationale: "No lang attribute on <html>.",
957
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
958
+ };
959
+ }
960
+ return { status: "pass", score: 1, rationale: `lang="${lang}".` };
961
+ }
962
+ });
963
+
964
+ // src/rules/citation/author-visible.ts
965
+ var authorVisibleRule = defineRule({
966
+ id: "cit.author-visible",
967
+ category: "citation",
968
+ weight: 4,
969
+ title: "Author is declared",
970
+ description: "AI engines prefer citing content with an identifiable author; expose one via JSON-LD, meta[name=author], rel=author, or a .author class.",
971
+ run(ctx) {
972
+ for (const node of flattenJsonLd(ctx.jsonLd)) {
973
+ if (hasField(node, "author")) {
974
+ return { status: "pass", score: 1, rationale: "Author found in JSON-LD." };
975
+ }
976
+ }
977
+ const metaAuthor = ctx.$('head meta[name="author"]').attr("content")?.trim();
978
+ if (metaAuthor) return { status: "pass", score: 1, rationale: `meta[name=author] = "${metaAuthor}".` };
979
+ if (ctx.$('[rel="author"]').length > 0) {
980
+ return { status: "pass", score: 1, rationale: 'rel="author" link found.' };
981
+ }
982
+ if (ctx.$('.author, [class*="author"], [itemprop="author"]').length > 0) {
983
+ return { status: "pass", score: 0.8, rationale: "Author-ish DOM selector found (weaker signal)." };
984
+ }
985
+ return {
986
+ status: "warn",
987
+ score: 0,
988
+ rationale: "No author signal found (JSON-LD, meta, rel, or .author).",
989
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
990
+ };
991
+ }
992
+ });
993
+
994
+ // src/rules/citation/dates.ts
995
+ var datesRule = defineRule({
996
+ id: "cit.dates",
997
+ category: "citation",
998
+ weight: 5,
999
+ title: "Publish / modified date is present",
1000
+ description: "AI engines rank recent pages higher; expose datePublished via JSON-LD, <time datetime>, or article:published_time meta.",
1001
+ run(ctx) {
1002
+ for (const node of flattenJsonLd(ctx.jsonLd)) {
1003
+ if (hasField(node, "datePublished")) {
1004
+ return { status: "pass", score: 1, rationale: "datePublished found in JSON-LD." };
1005
+ }
1006
+ }
1007
+ const articleTime = ctx.$('head meta[property="article:published_time"]').attr("content")?.trim();
1008
+ if (articleTime) {
1009
+ return { status: "pass", score: 1, rationale: `article:published_time = ${articleTime}.` };
1010
+ }
1011
+ const timeEl = ctx.$("time[datetime]").first().attr("datetime")?.trim();
1012
+ if (timeEl) {
1013
+ return { status: "pass", score: 0.8, rationale: `<time datetime="${timeEl}"> found.` };
1014
+ }
1015
+ return {
1016
+ status: "warn",
1017
+ score: 0,
1018
+ rationale: "No publish date found (JSON-LD, meta article:published_time, or <time datetime>).",
1019
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
1020
+ };
1021
+ }
1022
+ });
1023
+
1024
+ // src/rules/citation/index.ts
1025
+ var citationRules = [
1026
+ titleRule,
1027
+ metaDescriptionRule,
1028
+ canonicalRule,
1029
+ ogTagsRule,
1030
+ twitterCardRule,
1031
+ langAttrRule,
1032
+ authorVisibleRule,
1033
+ datesRule
1034
+ ];
1035
+
1036
+ // src/rules/content/single-h1.ts
1037
+ var singleH1Rule = defineRule({
1038
+ id: "cnt.single-h1",
1039
+ category: "content",
1040
+ weight: 3,
1041
+ title: "Exactly one <h1>",
1042
+ description: "A single H1 tells AI engines the primary topic of the page without ambiguity.",
1043
+ run(ctx) {
1044
+ const n = ctx.$("h1").length;
1045
+ if (n === 1) return { status: "pass", score: 1, rationale: "Exactly one <h1>." };
1046
+ if (n === 0) {
1047
+ return {
1048
+ status: "fail",
1049
+ score: 0,
1050
+ rationale: "No <h1> on the page.",
1051
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules/cnt.single-h1.md"
1052
+ };
1053
+ }
1054
+ return {
1055
+ status: "warn",
1056
+ score: Math.max(0.3, 1 / n),
1057
+ rationale: `Found ${n} <h1> tags; prefer one primary heading.`
1058
+ };
1059
+ }
1060
+ });
1061
+
1062
+ // src/rules/content/heading-hierarchy.ts
1063
+ var headingHierarchyRule = defineRule({
1064
+ id: "cnt.heading-hierarchy",
1065
+ category: "content",
1066
+ weight: 3,
1067
+ title: "Heading levels do not skip",
1068
+ description: "Going from H2 directly to H4 breaks the outline AI engines use to segment content.",
1069
+ run(ctx) {
1070
+ const levels = [];
1071
+ ctx.$("h1, h2, h3, h4, h5, h6").each((_i, el) => {
1072
+ const name = el.tagName?.toLowerCase() ?? "h1";
1073
+ const m = /^h([1-6])$/.exec(name);
1074
+ if (m?.[1]) levels.push(parseInt(m[1], 10));
1075
+ });
1076
+ if (levels.length === 0) {
1077
+ return { status: "skip", score: 0, rationale: "No headings found." };
1078
+ }
1079
+ const skips = [];
1080
+ for (let i = 1; i < levels.length; i++) {
1081
+ const prev = levels[i - 1];
1082
+ const curr = levels[i];
1083
+ if (curr > prev + 1) skips.push({ from: prev, to: curr });
1084
+ }
1085
+ if (skips.length === 0) {
1086
+ return { status: "pass", score: 1, rationale: "No heading-level skips." };
1087
+ }
1088
+ return {
1089
+ status: "warn",
1090
+ score: Math.max(0.3, 1 - skips.length / levels.length),
1091
+ rationale: `${skips.length} heading skip(s) detected (e.g. h${skips[0].from}\u2192h${skips[0].to}).`,
1092
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
1093
+ };
1094
+ }
1095
+ });
1096
+
1097
+ // src/rules/content/image-alt.ts
1098
+ var imageAltRule = defineRule({
1099
+ id: "cnt.image-alt",
1100
+ category: "content",
1101
+ weight: 3,
1102
+ title: "\u226580% of <img> have alt text",
1103
+ description: "Alt text gives AI engines a textual anchor for visual content and improves accessibility.",
1104
+ run(ctx) {
1105
+ const imgs = ctx.$("img");
1106
+ const total = imgs.length;
1107
+ if (total === 0) return { status: "skip", score: 0, rationale: "No <img> on the page." };
1108
+ let withAlt = 0;
1109
+ imgs.each((_i, el) => {
1110
+ const alt = ctx.$(el).attr("alt");
1111
+ if (typeof alt === "string" && alt.trim().length > 0) withAlt += 1;
1112
+ });
1113
+ const ratio = withAlt / total;
1114
+ if (ratio >= 0.8) {
1115
+ return { status: "pass", score: 1, rationale: `${withAlt}/${total} images have alt (${Math.round(ratio * 100)}%).` };
1116
+ }
1117
+ return {
1118
+ status: "warn",
1119
+ score: ratio,
1120
+ rationale: `Only ${withAlt}/${total} images have alt text (${Math.round(ratio * 100)}%). Aim for \u226580%.`,
1121
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
1122
+ };
1123
+ }
1124
+ });
1125
+
1126
+ // src/rules/content/tldr-or-faq.ts
1127
+ var tldrOrFaqRule = defineRule({
1128
+ id: "cnt.tldr-or-faq",
1129
+ category: "content",
1130
+ weight: 5,
1131
+ title: "TL;DR summary or FAQ block",
1132
+ description: 'AI engines strongly prefer content with a quotable summary or FAQ \u2014 it makes the page "citation-ready".',
1133
+ run(ctx) {
1134
+ for (const node of flattenJsonLd(ctx.jsonLd)) {
1135
+ if (getTypes(node).includes("FAQPage")) {
1136
+ return { status: "pass", score: 1, rationale: "FAQPage schema present." };
1137
+ }
1138
+ }
1139
+ const sel = [
1140
+ 'section[id*="tldr" i]',
1141
+ 'section[id*="summary" i]',
1142
+ 'section[id*="faq" i]',
1143
+ ".tldr",
1144
+ ".summary",
1145
+ ".faq",
1146
+ "[data-tldr]"
1147
+ ].join(", ");
1148
+ if (ctx.$(sel).length > 0) {
1149
+ return { status: "pass", score: 0.85, rationale: "TL;DR / summary / FAQ region detected by selector." };
1150
+ }
1151
+ return {
1152
+ status: "warn",
1153
+ score: 0,
1154
+ rationale: "No TL;DR / summary / FAQ found; add one to boost AI citation odds.",
1155
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
1156
+ };
1157
+ }
1158
+ });
1159
+
1160
+ // src/rules/content/word-count.ts
1161
+ var wordCountRule = defineRule({
1162
+ id: "cnt.word-count",
1163
+ category: "content",
1164
+ weight: 2,
1165
+ title: "Page has enough body text",
1166
+ description: "Thin pages (under ~100 words) are rarely cited by AI engines. Aim for \u2265300 words of meaningful body copy.",
1167
+ run(ctx) {
1168
+ const $ = ctx.$;
1169
+ const clone = $("body").clone();
1170
+ clone.find("script, style, noscript, nav, header, footer, aside").remove();
1171
+ const text = clone.text().replace(/\s+/g, " ").trim();
1172
+ const words = text ? text.split(" ").length : 0;
1173
+ if (words >= 300) return { status: "pass", score: 1, rationale: `${words} words of body text.` };
1174
+ if (words >= 100) return { status: "warn", score: 0.5, rationale: `Only ${words} words; aim for 300+.` };
1175
+ return {
1176
+ status: "fail",
1177
+ score: 0,
1178
+ rationale: `Only ${words} words; too thin to be cited.`,
1179
+ fixUrl: "https://github.com/BaRam-OSS/geo-checker/blob/main/docs/rules.md"
1180
+ };
1181
+ }
1182
+ });
1183
+
1184
+ // src/rules/content/index.ts
1185
+ var contentRules = [
1186
+ singleH1Rule,
1187
+ headingHierarchyRule,
1188
+ imageAltRule,
1189
+ tldrOrFaqRule,
1190
+ wordCountRule
1191
+ ];
1192
+
1193
+ // src/rules/index.ts
1194
+ var defaultRules = [
1195
+ ...crawlerRules,
1196
+ ...structuredDataRules,
1197
+ ...citationRules,
1198
+ ...contentRules
1199
+ ];
1200
+
1201
+ // src/index.ts
1202
+ async function audit(url, options = {}) {
1203
+ const ctx = await buildContext(url, {
1204
+ ...options.render ? { render: true } : {},
1205
+ ...options.userAgent !== void 0 ? { userAgent: options.userAgent } : {},
1206
+ ...options.timeoutMs !== void 0 ? { timeoutMs: options.timeoutMs } : {}
1207
+ });
1208
+ const rules = [...defaultRules, ...options.extraRules ?? []];
1209
+ return runRules(ctx, rules, {
1210
+ ...options.only ? { only: options.only } : {},
1211
+ ...options.categories ? { categories: options.categories } : {}
1212
+ });
1213
+ }
1214
+
1215
+ // src/reporters/json.ts
1216
+ function toJson(report, pretty = true) {
1217
+ return pretty ? JSON.stringify(report, null, 2) : JSON.stringify(report);
1218
+ }
1219
+
1220
+ // src/reporters/cli.ts
1221
+ import kleur from "kleur";
1222
+ import Table from "cli-table3";
1223
+ var CATEGORY_LABELS = {
1224
+ crawler: "AI Crawler Access",
1225
+ "structured-data": "Structured Data",
1226
+ citation: "Citation Signals",
1227
+ content: "Content Structure"
1228
+ };
1229
+ function colorScore(score) {
1230
+ if (score >= 85) return kleur.green().bold(`${score}`);
1231
+ if (score >= 60) return kleur.yellow().bold(`${score}`);
1232
+ return kleur.red().bold(`${score}`);
1233
+ }
1234
+ function statusBadge(status) {
1235
+ switch (status) {
1236
+ case "pass":
1237
+ return kleur.green("pass");
1238
+ case "warn":
1239
+ return kleur.yellow("warn");
1240
+ case "fail":
1241
+ return kleur.red("fail");
1242
+ default:
1243
+ return kleur.gray("skip");
1244
+ }
1245
+ }
1246
+ function bar(score, width = 20) {
1247
+ const filled = Math.round(score / 100 * width);
1248
+ const empty = width - filled;
1249
+ const color = score >= 85 ? kleur.green : score >= 60 ? kleur.yellow : kleur.red;
1250
+ return color("\u2588".repeat(filled)) + kleur.gray("\u2591".repeat(empty));
1251
+ }
1252
+ function toCli(report) {
1253
+ const lines = [];
1254
+ lines.push("");
1255
+ lines.push(
1256
+ kleur.bold("geo-checker") + kleur.gray(" \xB7 ") + report.finalUrl + kleur.gray(` (${report.renderMode})`)
1257
+ );
1258
+ lines.push(
1259
+ kleur.gray("fetched ") + report.fetchedAt + kleur.gray(" \xB7 v") + report.version
1260
+ );
1261
+ lines.push("");
1262
+ lines.push(kleur.bold("Overall ") + colorScore(report.overall) + kleur.gray(" / 100"));
1263
+ lines.push("");
1264
+ for (const w of report.warnings) {
1265
+ lines.push(kleur.yellow("! ") + w);
1266
+ }
1267
+ if (report.warnings.length > 0) lines.push("");
1268
+ for (const cat of Object.keys(report.categories)) {
1269
+ const b = report.categories[cat];
1270
+ if (b.results.length === 0) continue;
1271
+ lines.push(
1272
+ ` ${kleur.bold(CATEGORY_LABELS[cat].padEnd(20))} ${bar(b.score)} ${colorScore(b.score).padStart(3)}/100`
1273
+ );
1274
+ const table = new Table({
1275
+ head: [kleur.gray("status"), kleur.gray("rule"), kleur.gray("note")],
1276
+ colWidths: [7, 34, 70],
1277
+ wordWrap: true,
1278
+ style: { head: [], border: ["grey"] }
1279
+ });
1280
+ for (const r of b.results) {
1281
+ table.push([statusBadge(r.status), r.id, r.rationale]);
1282
+ }
1283
+ lines.push(
1284
+ table.toString().split("\n").map((l) => " " + l).join("\n")
1285
+ );
1286
+ lines.push("");
1287
+ }
1288
+ const fixUrls = Object.values(report.categories).flatMap((c) => c.results).filter((r) => (r.status === "fail" || r.status === "warn") && r.fixUrl).map((r) => ` - ${r.id}: ${r.fixUrl}`);
1289
+ if (fixUrls.length > 0) {
1290
+ lines.push(kleur.bold("How to fix:"));
1291
+ lines.push(...fixUrls);
1292
+ lines.push("");
1293
+ }
1294
+ return lines.join("\n");
1295
+ }
1296
+
1297
+ // src/cli.ts
1298
+ var pkgVersion = "0.1.0";
1299
+ var VALID_CATEGORIES = ["crawler", "structured-data", "citation", "content"];
1300
+ function parseCategories(raw) {
1301
+ if (!raw) return void 0;
1302
+ const items = raw.split(",").map((s) => s.trim()).filter(Boolean);
1303
+ const valid = items.filter((c) => VALID_CATEGORIES.includes(c));
1304
+ const invalid = items.filter((c) => !VALID_CATEGORIES.includes(c));
1305
+ if (invalid.length > 0) {
1306
+ throw new Error(`Unknown category: ${invalid.join(", ")}. Valid: ${VALID_CATEGORIES.join(", ")}.`);
1307
+ }
1308
+ return valid;
1309
+ }
1310
+ function parseOnly(raw) {
1311
+ if (!raw) return void 0;
1312
+ return raw.split(",").map((s) => s.trim()).filter(Boolean);
1313
+ }
1314
+ function worstStatus(report) {
1315
+ let worst = "pass";
1316
+ const order = { pass: 0, skip: 0, warn: 1, fail: 2 };
1317
+ for (const cat of Object.values(report.categories)) {
1318
+ for (const r of cat.results) {
1319
+ if (order[r.status] > order[worst]) worst = r.status;
1320
+ }
1321
+ }
1322
+ return worst;
1323
+ }
1324
+ var cli = cac("geo-checker");
1325
+ cli.command("<url>", "Audit a URL for GEO (Generative Engine Optimization) readiness").option("--json", "Output a JSON report to stdout").option("--render", "Use a headless browser (requires optional playwright dependency)").option("--category <names>", "Run only the given categories (comma-separated)").option("--only <ids>", "Run only the given rule IDs (comma-separated)").option("--fail-on <level>", "Exit non-zero when a result is at or above this level (warn|fail)", {
1326
+ default: "fail"
1327
+ }).option("--timeout <ms>", "Per-request timeout in milliseconds", { default: 2e4 }).action(async (url, flags) => {
1328
+ const categories = parseCategories(flags.category);
1329
+ const only = parseOnly(flags.only);
1330
+ const timeoutMs = typeof flags.timeout === "string" ? parseInt(flags.timeout, 10) : flags.timeout;
1331
+ const report = await audit(url, {
1332
+ ...flags.render ? { render: true } : {},
1333
+ ...categories ? { categories } : {},
1334
+ ...only ? { only } : {},
1335
+ ...typeof timeoutMs === "number" && !Number.isNaN(timeoutMs) ? { timeoutMs } : {}
1336
+ });
1337
+ if (flags.json) {
1338
+ process.stdout.write(toJson(report) + "\n");
1339
+ } else {
1340
+ process.stdout.write(toCli(report) + "\n");
1341
+ }
1342
+ const worst = worstStatus(report);
1343
+ const failOn = flags.failOn === "warn" ? "warn" : "fail";
1344
+ if (failOn === "fail" && worst === "fail") process.exit(1);
1345
+ if (failOn === "warn" && (worst === "warn" || worst === "fail")) process.exit(1);
1346
+ });
1347
+ cli.help();
1348
+ cli.version(pkgVersion);
1349
+ async function main() {
1350
+ cli.parse(process.argv, { run: false });
1351
+ await cli.runMatchedCommand();
1352
+ }
1353
+ main().catch((err) => {
1354
+ console.error(kleur2.red("geo-checker crashed:"), err instanceof Error ? err.message : err);
1355
+ process.exit(2);
1356
+ });
1357
+ //# sourceMappingURL=cli.js.map