aeorank 3.2.0 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -10
- package/dist/browser.js +90 -64
- package/dist/browser.js.map +1 -1
- package/dist/{chunk-RYV25AUV.js → chunk-DW7MPQ4X.js} +188 -30
- package/dist/chunk-DW7MPQ4X.js.map +1 -0
- package/dist/chunk-PYV5JVTC.js +179 -0
- package/dist/chunk-PYV5JVTC.js.map +1 -0
- package/dist/cli.js +83 -59
- package/dist/cli.js.map +1 -1
- package/dist/{full-site-crawler-TQ35TB2X.js → full-site-crawler-HAF2X2X3.js} +2 -2
- package/dist/{full-site-crawler-OBECS7AT.js → full-site-crawler-W3WSE6WT.js} +18 -30
- package/dist/full-site-crawler-W3WSE6WT.js.map +1 -0
- package/dist/index.cjs +277 -90
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +90 -64
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-RYV25AUV.js.map +0 -1
- package/dist/full-site-crawler-OBECS7AT.js.map +0 -1
- /package/dist/{full-site-crawler-TQ35TB2X.js.map → full-site-crawler-HAF2X2X3.js.map} +0 -0
package/README.md
CHANGED
|
@@ -96,8 +96,8 @@ AEORank evaluates 40 criteria that determine how AI engines (ChatGPT, Claude, Pe
|
|
|
96
96
|
| Visible Date Signal | 1.5% | Visible publication dates with `<time>` elements |
|
|
97
97
|
| Extraction Friction | 2% | Sentence length, voice-friendly leads, jargon density |
|
|
98
98
|
| Image Context for AI | 0.5% | Figure/figcaption, descriptive alt text, contextual placement |
|
|
99
|
-
| Schema Coverage & Depth | 0% | Schema markup on inner pages, not just homepage |
|
|
100
|
-
| Speakable Schema | 0% | SpeakableSpecification for voice assistants |
|
|
99
|
+
| Schema Coverage & Depth | 0.5% | Schema markup on inner pages, not just homepage |
|
|
100
|
+
| Speakable Schema | 0.5% | SpeakableSpecification for voice assistants |
|
|
101
101
|
|
|
102
102
|
**Pillar 5: AI Discovery (~10%)** - *Whether* AI crawlers can find you:
|
|
103
103
|
|
|
@@ -110,7 +110,7 @@ AEORank evaluates 40 criteria that determine how AI engines (ChatGPT, Claude, Pe
|
|
|
110
110
|
| Content Licensing & AI Permissions | 1% | /ai.txt file, license schema for AI usage |
|
|
111
111
|
| Sitemap Completeness | 1% | sitemap.xml with lastmod dates |
|
|
112
112
|
| Canonical URL Strategy | 0.5% | Self-referencing canonical tags |
|
|
113
|
-
| RSS/Atom Feed | 0% | RSS feed linked from homepage |
|
|
113
|
+
| RSS/Atom Feed | 0.5% | RSS feed linked from homepage |
|
|
114
114
|
|
|
115
115
|
> **Coherence Gate:** Sites with topic coherence below 6/10 are score-capped regardless of technical perfection. A scattered site with perfect robots.txt, llms.txt, and schema will score lower than a focused site with mediocre technical implementation.
|
|
116
116
|
>
|
|
@@ -133,7 +133,7 @@ AEORank evaluates 40 criteria that determine how AI engines (ChatGPT, Claude, Pe
|
|
|
133
133
|
| 10 | Semantic HTML5 & Accessibility | 2% | Technical Foundation |
|
|
134
134
|
| 11 | Content Freshness Signals | 4% | Trust & Authority |
|
|
135
135
|
| 12 | Sitemap Completeness | 1% | AI Discovery |
|
|
136
|
-
| 13 | RSS/Atom Feed | 0% | AI Discovery |
|
|
136
|
+
| 13 | RSS/Atom Feed | 0.5% | AI Discovery |
|
|
137
137
|
| 14 | Table & List Extractability | 3% | Content Structure |
|
|
138
138
|
| 15 | Definition Patterns | 1.5% | Content Structure |
|
|
139
139
|
| 16 | Direct Answer Paragraphs | 5% | Content Structure |
|
|
@@ -142,8 +142,8 @@ AEORank evaluates 40 criteria that determine how AI engines (ChatGPT, Claude, Pe
|
|
|
142
142
|
| 19 | Fact & Data Density | 6% | Answer Readiness |
|
|
143
143
|
| 20 | Canonical URL Strategy | 0.5% | AI Discovery |
|
|
144
144
|
| 21 | Content Publishing Velocity | 2% | AI Discovery |
|
|
145
|
-
| 22 | Schema Coverage & Depth | 0% | Technical Foundation |
|
|
146
|
-
| 23 | Speakable Schema | 0% | Technical Foundation |
|
|
145
|
+
| 22 | Schema Coverage & Depth | 0.5% | Technical Foundation |
|
|
146
|
+
| 23 | Speakable Schema | 0.5% | Technical Foundation |
|
|
147
147
|
| 24 | Query-Answer Alignment | 4% | Content Structure |
|
|
148
148
|
| 25 | Content Cannibalization | 2% | AI Discovery |
|
|
149
149
|
| 26 | Visible Date Signal | 1.5% | Technical Foundation |
|
|
@@ -586,6 +586,14 @@ console.log(result.comparison.tied); // Criteria with equal scores
|
|
|
586
586
|
|
|
587
587
|
## Changelog
|
|
588
588
|
|
|
589
|
+
### v3.2.1 - Security & Release Hardening
|
|
590
|
+
|
|
591
|
+
DNS-aware fetch guards now block hostnames that resolve to private or reserved IP ranges, including sitemap sub-fetch paths and headless rendering requests. The GitHub Action is now deterministic: it runs the bundled CLI from the tagged release instead of installing `aeorank@latest` at runtime, and CI/release workflows now use SHA-pinned actions plus `npm ci`.
|
|
592
|
+
|
|
593
|
+
### v3.2.0 - Helpful Content Criteria
|
|
594
|
+
|
|
595
|
+
Added 4 new criteria: Helpful Purpose Alignment, First-Hand Experience Signals, Creator Transparency, and Methodology Transparency. The model now scores 40 total criteria and 25 page-level criteria while explicitly avoiding any "AI-written" detector.
|
|
596
|
+
|
|
589
597
|
### v3.1.1 - Duplicate Detection False-Positive Fix
|
|
590
598
|
|
|
591
599
|
Duplicate-content detection now ignores short metadata rows like `Deadline:` and `Decision timeline:` so structured guides do not get penalized for repeated timeline labels. Shared duplicate-matching logic is now used by both page scoring and site-wide crawling.
|
|
@@ -594,10 +602,6 @@ Duplicate-content detection now ignores short metadata rows like `Deadline:` and
|
|
|
594
602
|
|
|
595
603
|
2 new criteria (#35-#36): Duplicate Content Blocks (intra-page, 5%) and Cross-Page Duplicate Content (3%). Detects identical text blocks within pages and copy-pasted paragraphs across pages using shingle-based Jaccard similarity. Boilerplate filtering excludes CTAs, signups, and template content from false positives. Duplication gate caps per-page scores when severe duplication is found. CLI now shows duplicate section names inline per page.
|
|
596
604
|
|
|
597
|
-
### v3.2.0 - Helpful Content Criteria
|
|
598
|
-
|
|
599
|
-
Added 4 new criteria: Helpful Purpose Alignment, First-Hand Experience Signals, Creator Transparency, and Methodology Transparency. The model now scores 40 total criteria and 25 page-level criteria while explicitly avoiding any "AI-written" detector.
|
|
600
|
-
|
|
601
605
|
### v3.0.0 - 5-Pillar Framework & 6 New Criteria
|
|
602
606
|
|
|
603
607
|
Scoring Engine v2: 28 → 34 criteria with 5-pillar framework (Answer Readiness, Content Structure, Trust & Authority, Technical Foundation, AI Discovery). 6 new criteria targeting citation quality, evidence packaging, and extraction friction. Per-pillar sub-scores, top-3 fixes, client-friendly names. Single-page score cap at 75.
|
package/dist/browser.js
CHANGED
|
@@ -2,8 +2,12 @@ import {
|
|
|
2
2
|
crawlFullSite,
|
|
3
3
|
extractAllUrlsFromSitemap,
|
|
4
4
|
extractInternalLinks,
|
|
5
|
-
inferCategory
|
|
6
|
-
|
|
5
|
+
inferCategory,
|
|
6
|
+
isSafeFetchTarget,
|
|
7
|
+
isSafePublicUrl,
|
|
8
|
+
normalizeHostname,
|
|
9
|
+
safeFetch
|
|
10
|
+
} from "./chunk-DW7MPQ4X.js";
|
|
7
11
|
|
|
8
12
|
// src/parked-domain.ts
|
|
9
13
|
var PARKING_PATHS = ["/lander", "/parking", "/park", "/sedoparking"];
|
|
@@ -317,18 +321,11 @@ function scoreMethodologyTransparency(html, url) {
|
|
|
317
321
|
}
|
|
318
322
|
|
|
319
323
|
// src/site-crawler.ts
|
|
320
|
-
async function fetchText(url) {
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
redirect: "follow"
|
|
326
|
-
});
|
|
327
|
-
const text = await res.text();
|
|
328
|
-
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
329
|
-
} catch {
|
|
330
|
-
return null;
|
|
331
|
-
}
|
|
324
|
+
async function fetchText(url, expectedDomain) {
|
|
325
|
+
const res = await safeFetch(url, { timeoutMs: 15e3, expectedDomain });
|
|
326
|
+
if (!res) return null;
|
|
327
|
+
const text = await res.text();
|
|
328
|
+
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
332
329
|
}
|
|
333
330
|
function extractDomain(url) {
|
|
334
331
|
return url.replace(/^https?:\/\//, "").replace(/\/.*/, "").replace(/:[0-9]+$/, "").replace(/^www\./, "").toLowerCase();
|
|
@@ -369,13 +366,16 @@ function isHtmlResponse(result) {
|
|
|
369
366
|
return trimmed.startsWith("<!doctype html") || trimmed.startsWith("<html") || /<head[\s>]/i.test(trimmed);
|
|
370
367
|
}
|
|
371
368
|
async function prefetchSiteData(domain) {
|
|
369
|
+
if (!await isSafeFetchTarget(`https://${domain}`)) {
|
|
370
|
+
return { domain, protocol: null, homepage: null, llmsTxt: null, robotsTxt: null, faqPage: null, sitemapXml: null, rssFeed: null, aiTxt: null, redirectedTo: null, parkedReason: null, blogSample: [] };
|
|
371
|
+
}
|
|
372
372
|
let protocol = null;
|
|
373
373
|
let homepage = null;
|
|
374
|
-
homepage = await fetchText(`https://${domain}
|
|
374
|
+
homepage = await fetchText(`https://${domain}`, domain);
|
|
375
375
|
if (homepage && homepage.status >= 200 && homepage.status < 400) {
|
|
376
376
|
protocol = "https";
|
|
377
377
|
} else {
|
|
378
|
-
homepage = await fetchText(`http://${domain}
|
|
378
|
+
homepage = await fetchText(`http://${domain}`, domain);
|
|
379
379
|
if (homepage && homepage.status >= 200 && homepage.status < 400) {
|
|
380
380
|
protocol = "http";
|
|
381
381
|
}
|
|
@@ -395,38 +395,38 @@ async function prefetchSiteData(domain) {
|
|
|
395
395
|
}
|
|
396
396
|
const baseUrl = `${protocol}://${domain}`;
|
|
397
397
|
const [llmsTxt, robotsTxt, faqPage, sitemapXml, aiTxt] = await Promise.all([
|
|
398
|
-
fetchText(`${baseUrl}/llms.txt
|
|
399
|
-
fetchText(`${baseUrl}/robots.txt
|
|
400
|
-
fetchText(`${baseUrl}/faq
|
|
398
|
+
fetchText(`${baseUrl}/llms.txt`, domain),
|
|
399
|
+
fetchText(`${baseUrl}/robots.txt`, domain),
|
|
400
|
+
fetchText(`${baseUrl}/faq`, domain).then(async (result) => {
|
|
401
401
|
if (result && result.status === 200) return result;
|
|
402
402
|
for (const path of ["/frequently-asked-questions", "/help", "/support", "/help-center"]) {
|
|
403
|
-
const fallback = await fetchText(`${baseUrl}${path}
|
|
403
|
+
const fallback = await fetchText(`${baseUrl}${path}`, domain);
|
|
404
404
|
if (fallback && fallback.status === 200) return fallback;
|
|
405
405
|
}
|
|
406
406
|
return result;
|
|
407
407
|
}),
|
|
408
|
-
fetchText(`${baseUrl}/sitemap.xml
|
|
409
|
-
fetchText(`${baseUrl}/ai.txt
|
|
408
|
+
fetchText(`${baseUrl}/sitemap.xml`, domain),
|
|
409
|
+
fetchText(`${baseUrl}/ai.txt`, domain)
|
|
410
410
|
]);
|
|
411
411
|
let rssFeed = null;
|
|
412
412
|
if (homepage) {
|
|
413
413
|
const rssLinkMatch = homepage.text.match(/<link[^>]*type="application\/(?:rss|atom)\+xml"[^>]*href="([^"]*)"[^>]*>/i);
|
|
414
414
|
if (rssLinkMatch) {
|
|
415
415
|
const rssUrl = rssLinkMatch[1].startsWith("http") ? rssLinkMatch[1] : `${baseUrl}${rssLinkMatch[1]}`;
|
|
416
|
-
rssFeed = await fetchText(rssUrl);
|
|
416
|
+
rssFeed = await fetchText(rssUrl, domain);
|
|
417
417
|
}
|
|
418
418
|
if (!rssFeed || rssFeed.status !== 200) {
|
|
419
419
|
for (const path of ["/feed", "/rss.xml", "/feed.xml"]) {
|
|
420
|
-
rssFeed = await fetchText(`${baseUrl}${path}
|
|
420
|
+
rssFeed = await fetchText(`${baseUrl}${path}`, domain);
|
|
421
421
|
if (rssFeed && rssFeed.status === 200 && (rssFeed.text.includes("<rss") || rssFeed.text.includes("<feed") || rssFeed.text.includes("<channel"))) break;
|
|
422
422
|
rssFeed = null;
|
|
423
423
|
}
|
|
424
424
|
}
|
|
425
425
|
}
|
|
426
426
|
if (sitemapXml && sitemapXml.status === 200 && sitemapXml.text.includes("<sitemapindex")) {
|
|
427
|
-
const subUrls = extractAllSubSitemapUrls(sitemapXml.text, 5);
|
|
427
|
+
const subUrls = extractAllSubSitemapUrls(sitemapXml.text, domain, 5);
|
|
428
428
|
if (subUrls.length > 0) {
|
|
429
|
-
const subResults = await Promise.all(subUrls.map((u) => fetchText(u)));
|
|
429
|
+
const subResults = await Promise.all(subUrls.map((u) => fetchText(u, domain)));
|
|
430
430
|
for (const sub of subResults) {
|
|
431
431
|
if (sub && sub.status === 200) {
|
|
432
432
|
sitemapXml.text += "\n" + sub.text;
|
|
@@ -439,7 +439,7 @@ async function prefetchSiteData(domain) {
|
|
|
439
439
|
const sitemapForBlog = sitemapXml.text;
|
|
440
440
|
const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 50);
|
|
441
441
|
if (blogUrls.length > 0) {
|
|
442
|
-
const fetched = await Promise.all(blogUrls.map((url) => fetchText(url)));
|
|
442
|
+
const fetched = await Promise.all(blogUrls.map((url) => fetchText(url, domain)));
|
|
443
443
|
blogSample = fetched.filter(
|
|
444
444
|
(r) => r !== null && r.status === 200 && r.text.length > 500
|
|
445
445
|
);
|
|
@@ -1490,13 +1490,15 @@ function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 50) {
|
|
|
1490
1490
|
});
|
|
1491
1491
|
return candidates.slice(0, limit).map((c) => c.url);
|
|
1492
1492
|
}
|
|
1493
|
-
function extractAllSubSitemapUrls(sitemapText,
|
|
1493
|
+
function extractAllSubSitemapUrls(sitemapText, domainOrLimit, maybeLimit = 5) {
|
|
1494
1494
|
if (!sitemapText.includes("<sitemapindex")) return [];
|
|
1495
|
+
const domain = typeof domainOrLimit === "string" ? domainOrLimit : void 0;
|
|
1496
|
+
const limit = typeof domainOrLimit === "number" ? domainOrLimit : maybeLimit;
|
|
1495
1497
|
const sitemapLocs = sitemapText.match(/<sitemap>[\s\S]*?<loc>([^<]+)<\/loc>[\s\S]*?<\/sitemap>/gi) || [];
|
|
1496
1498
|
const urls = sitemapLocs.map((block) => {
|
|
1497
1499
|
const match = block.match(/<loc>([^<]+)<\/loc>/i);
|
|
1498
1500
|
return match ? match[1].trim() : "";
|
|
1499
|
-
}).filter(
|
|
1501
|
+
}).filter((url) => !!url && isSafePublicUrl(url, domain));
|
|
1500
1502
|
const preferred = urls.filter((u) => /post|blog|article|page/i.test(u));
|
|
1501
1503
|
const rest = urls.filter((u) => !preferred.includes(u));
|
|
1502
1504
|
return [...preferred, ...rest].slice(0, limit);
|
|
@@ -3143,7 +3145,11 @@ function auditSiteFromData(data) {
|
|
|
3143
3145
|
];
|
|
3144
3146
|
}
|
|
3145
3147
|
async function auditSite(targetUrl) {
|
|
3146
|
-
const
|
|
3148
|
+
const normalizedTarget = targetUrl.startsWith("http") ? targetUrl : `https://${targetUrl}`;
|
|
3149
|
+
if (!await isSafeFetchTarget(normalizedTarget)) {
|
|
3150
|
+
throw new Error(`Refusing to audit private or local address: ${targetUrl}`);
|
|
3151
|
+
}
|
|
3152
|
+
const url = new URL(normalizedTarget);
|
|
3147
3153
|
const domain = url.hostname.replace(/^www\./, "");
|
|
3148
3154
|
const data = await prefetchSiteData(domain);
|
|
3149
3155
|
return auditSiteFromData(data);
|
|
@@ -3208,9 +3214,9 @@ var WEIGHTS = {
|
|
|
3208
3214
|
content_licensing: 0.01,
|
|
3209
3215
|
sitemap_completeness: 0.01,
|
|
3210
3216
|
canonical_url: 5e-3,
|
|
3211
|
-
rss_feed:
|
|
3212
|
-
schema_coverage:
|
|
3213
|
-
speakable_schema:
|
|
3217
|
+
rss_feed: 5e-3,
|
|
3218
|
+
schema_coverage: 5e-3,
|
|
3219
|
+
speakable_schema: 5e-3,
|
|
3214
3220
|
// ─── V2 Criteria (~15%) ───────────────────────────────────────────────────
|
|
3215
3221
|
// Citation quality, evidence packaging, and extraction friction.
|
|
3216
3222
|
citation_ready_writing: 0.04,
|
|
@@ -3375,8 +3381,8 @@ var PILLAR_WEIGHTS = {
|
|
|
3375
3381
|
visible_date_signal: 0.015,
|
|
3376
3382
|
extraction_friction: 0.02,
|
|
3377
3383
|
image_context_ai: 5e-3,
|
|
3378
|
-
schema_coverage:
|
|
3379
|
-
speakable_schema:
|
|
3384
|
+
schema_coverage: 5e-3,
|
|
3385
|
+
speakable_schema: 5e-3,
|
|
3380
3386
|
content_cannibalization: 0.02,
|
|
3381
3387
|
llms_txt: 0.01,
|
|
3382
3388
|
robots_txt: 0.01,
|
|
@@ -3384,7 +3390,7 @@ var PILLAR_WEIGHTS = {
|
|
|
3384
3390
|
content_licensing: 0.01,
|
|
3385
3391
|
canonical_url: 5e-3,
|
|
3386
3392
|
sitemap_completeness: 0.01,
|
|
3387
|
-
rss_feed:
|
|
3393
|
+
rss_feed: 5e-3
|
|
3388
3394
|
};
|
|
3389
3395
|
var CRITERION_EFFORT = {
|
|
3390
3396
|
topic_coherence: "High",
|
|
@@ -3683,9 +3689,9 @@ var CRITERION_WEIGHTS = {
|
|
|
3683
3689
|
content_licensing: 0.01,
|
|
3684
3690
|
sitemap_completeness: 0.01,
|
|
3685
3691
|
canonical_url: 5e-3,
|
|
3686
|
-
rss_feed:
|
|
3687
|
-
schema_coverage:
|
|
3688
|
-
speakable_schema:
|
|
3692
|
+
rss_feed: 5e-3,
|
|
3693
|
+
schema_coverage: 5e-3,
|
|
3694
|
+
speakable_schema: 5e-3,
|
|
3689
3695
|
// V2 Criteria (~15%)
|
|
3690
3696
|
citation_ready_writing: 0.04,
|
|
3691
3697
|
answer_first_placement: 0.03,
|
|
@@ -4067,20 +4073,12 @@ function formatList(items) {
|
|
|
4067
4073
|
}
|
|
4068
4074
|
|
|
4069
4075
|
// src/multi-page-fetcher.ts
|
|
4070
|
-
async function fetchPage(url, timeoutMs = 1e4) {
|
|
4071
|
-
|
|
4072
|
-
|
|
4073
|
-
|
|
4074
|
-
|
|
4075
|
-
|
|
4076
|
-
});
|
|
4077
|
-
if (res.status !== 200) return null;
|
|
4078
|
-
const text = await res.text();
|
|
4079
|
-
if (text.length < 200) return null;
|
|
4080
|
-
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
4081
|
-
} catch {
|
|
4082
|
-
return null;
|
|
4083
|
-
}
|
|
4076
|
+
async function fetchPage(url, domain, timeoutMs = 1e4) {
|
|
4077
|
+
const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });
|
|
4078
|
+
if (!res || res.status !== 200) return null;
|
|
4079
|
+
const text = await res.text();
|
|
4080
|
+
if (text.length < 200) return null;
|
|
4081
|
+
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
4084
4082
|
}
|
|
4085
4083
|
var PAGE_VARIANTS = {
|
|
4086
4084
|
about: ["/about", "/about-us", "/company", "/who-we-are"],
|
|
@@ -4236,7 +4234,7 @@ async function fetchMultiPageData(siteData, options) {
|
|
|
4236
4234
|
}
|
|
4237
4235
|
const entries = Array.from(urlsToFetch.entries());
|
|
4238
4236
|
if (entries.length === 0) return 0;
|
|
4239
|
-
const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));
|
|
4237
|
+
const results = await Promise.all(entries.map(([url]) => fetchPage(url, siteData.domain, timeoutMs)));
|
|
4240
4238
|
if (!siteData.blogSample) siteData.blogSample = [];
|
|
4241
4239
|
let added = 0;
|
|
4242
4240
|
for (let i = 0; i < results.length; i++) {
|
|
@@ -5363,9 +5361,9 @@ var CRITERION_WEIGHTS2 = {
|
|
|
5363
5361
|
content_licensing: 0.01,
|
|
5364
5362
|
sitemap_completeness: 0.01,
|
|
5365
5363
|
canonical_url: 5e-3,
|
|
5366
|
-
rss_feed:
|
|
5367
|
-
schema_coverage:
|
|
5368
|
-
speakable_schema:
|
|
5364
|
+
rss_feed: 5e-3,
|
|
5365
|
+
schema_coverage: 5e-3,
|
|
5366
|
+
speakable_schema: 5e-3,
|
|
5369
5367
|
// V2 Criteria (~15%)
|
|
5370
5368
|
citation_ready_writing: 0.04,
|
|
5371
5369
|
answer_first_placement: 0.03,
|
|
@@ -6823,6 +6821,13 @@ function isSpaShell(html) {
|
|
|
6823
6821
|
return SPA_INDICATORS.some((pattern) => pattern.test(html));
|
|
6824
6822
|
}
|
|
6825
6823
|
async function fetchWithHeadless(url, options) {
|
|
6824
|
+
let expectedDomain;
|
|
6825
|
+
try {
|
|
6826
|
+
expectedDomain = normalizeHostname(new URL(url).hostname);
|
|
6827
|
+
} catch {
|
|
6828
|
+
return null;
|
|
6829
|
+
}
|
|
6830
|
+
if (!await isSafeFetchTarget(url, expectedDomain)) return null;
|
|
6826
6831
|
let puppeteer;
|
|
6827
6832
|
try {
|
|
6828
6833
|
const mod = "puppeteer";
|
|
@@ -6849,12 +6854,28 @@ async function fetchWithHeadless(url, options) {
|
|
|
6849
6854
|
const page = await browser.newPage();
|
|
6850
6855
|
await page.setRequestInterception(true);
|
|
6851
6856
|
page.on("request", (req) => {
|
|
6852
|
-
|
|
6853
|
-
|
|
6854
|
-
|
|
6855
|
-
|
|
6856
|
-
|
|
6857
|
-
|
|
6857
|
+
void (async () => {
|
|
6858
|
+
const alreadyHandled = typeof req.isInterceptResolutionHandled === "function" ? req.isInterceptResolutionHandled() : false;
|
|
6859
|
+
if (alreadyHandled) return;
|
|
6860
|
+
if (!await isSafeFetchTarget(req.url(), expectedDomain)) {
|
|
6861
|
+
try {
|
|
6862
|
+
if (!req.isInterceptResolutionHandled?.()) await req.abort();
|
|
6863
|
+
} catch {
|
|
6864
|
+
}
|
|
6865
|
+
return;
|
|
6866
|
+
}
|
|
6867
|
+
const type = req.resourceType();
|
|
6868
|
+
try {
|
|
6869
|
+
if (!req.isInterceptResolutionHandled?.()) {
|
|
6870
|
+
if (["image", "font", "media", "stylesheet"].includes(type)) {
|
|
6871
|
+
await req.abort();
|
|
6872
|
+
} else {
|
|
6873
|
+
await req.continue();
|
|
6874
|
+
}
|
|
6875
|
+
}
|
|
6876
|
+
} catch {
|
|
6877
|
+
}
|
|
6878
|
+
})();
|
|
6858
6879
|
});
|
|
6859
6880
|
await page.setUserAgent("AEO-Visibility-Bot/1.0");
|
|
6860
6881
|
await page.goto(url, { waitUntil: "networkidle2", timeout });
|
|
@@ -6867,6 +6888,7 @@ async function fetchWithHeadless(url, options) {
|
|
|
6867
6888
|
}
|
|
6868
6889
|
const html = await page.content();
|
|
6869
6890
|
const finalUrl = page.url();
|
|
6891
|
+
if (!await isSafeFetchTarget(finalUrl, expectedDomain)) return null;
|
|
6870
6892
|
return {
|
|
6871
6893
|
text: html.slice(0, 5e5),
|
|
6872
6894
|
status: 200,
|
|
@@ -6889,6 +6911,10 @@ function getTextLength(html) {
|
|
|
6889
6911
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim().length;
|
|
6890
6912
|
}
|
|
6891
6913
|
async function audit(domain, options) {
|
|
6914
|
+
const normalizedTarget = domain.startsWith("http") ? domain : `https://${domain}`;
|
|
6915
|
+
if (!await isSafeFetchTarget(normalizedTarget)) {
|
|
6916
|
+
throw new Error(`Refusing to audit private or local address: ${domain}`);
|
|
6917
|
+
}
|
|
6892
6918
|
const startTime = Date.now();
|
|
6893
6919
|
let renderedWithHeadless = false;
|
|
6894
6920
|
const siteData = await prefetchSiteData(domain);
|
|
@@ -6921,7 +6947,7 @@ async function audit(domain, options) {
|
|
|
6921
6947
|
}
|
|
6922
6948
|
}
|
|
6923
6949
|
if (options?.fullCrawl) {
|
|
6924
|
-
const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-
|
|
6950
|
+
const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-HAF2X2X3.js");
|
|
6925
6951
|
const crawlResult = await crawlFullSite2(siteData, {
|
|
6926
6952
|
maxPages: options.maxPages ?? 200,
|
|
6927
6953
|
concurrency: options.concurrency ?? 5
|