aeorank 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -1
- package/dist/chunk-3IJISYWT.js +291 -0
- package/dist/chunk-3IJISYWT.js.map +1 -0
- package/dist/cli.js +38 -6
- package/dist/cli.js.map +1 -1
- package/dist/full-site-crawler-F7J2HRL4.js +292 -0
- package/dist/full-site-crawler-F7J2HRL4.js.map +1 -0
- package/dist/full-site-crawler-VFARFR2C.js +17 -0
- package/dist/full-site-crawler-VFARFR2C.js.map +1 -0
- package/dist/index.cjs +330 -2
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +64 -3
- package/dist/index.d.ts +64 -3
- package/dist/index.js +30 -2
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -35,7 +35,7 @@ interface PitchMetric {
|
|
|
35
35
|
value: string;
|
|
36
36
|
significance: string;
|
|
37
37
|
}
|
|
38
|
-
type PageCategory$1 = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'content';
|
|
38
|
+
type PageCategory$1 = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'faq' | 'content';
|
|
39
39
|
interface PageIssue$1 {
|
|
40
40
|
check: string;
|
|
41
41
|
label: string;
|
|
@@ -84,6 +84,12 @@ interface AuditOptions {
|
|
|
84
84
|
noMultiPage?: boolean;
|
|
85
85
|
/** Fetch timeout in ms (default: 15000) */
|
|
86
86
|
timeout?: number;
|
|
87
|
+
/** Enable full-site BFS crawl (default: false) */
|
|
88
|
+
fullCrawl?: boolean;
|
|
89
|
+
/** Max pages for full crawl (default: 200) */
|
|
90
|
+
maxPages?: number;
|
|
91
|
+
/** Parallel fetch concurrency for full crawl (default: 5) */
|
|
92
|
+
concurrency?: number;
|
|
87
93
|
}
|
|
88
94
|
interface AuditResult extends AuditData {
|
|
89
95
|
/** True if headless browser was used for SPA rendering */
|
|
@@ -111,7 +117,7 @@ interface CriterionResult {
|
|
|
111
117
|
findings: AuditFinding[];
|
|
112
118
|
fix_priority: Priority;
|
|
113
119
|
}
|
|
114
|
-
type PageCategory = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'content';
|
|
120
|
+
type PageCategory = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'faq' | 'content';
|
|
115
121
|
interface FetchResult {
|
|
116
122
|
text: string;
|
|
117
123
|
status: number;
|
|
@@ -134,6 +140,13 @@ interface SiteData {
|
|
|
134
140
|
parkedReason: string | null;
|
|
135
141
|
/** Sampled blog/content pages from sitemap (up to 5) */
|
|
136
142
|
blogSample?: FetchResult[];
|
|
143
|
+
/** Full-crawl statistics (set when --full-crawl is used) */
|
|
144
|
+
crawlStats?: {
|
|
145
|
+
discovered: number;
|
|
146
|
+
fetched: number;
|
|
147
|
+
skipped: number;
|
|
148
|
+
elapsed: number;
|
|
149
|
+
};
|
|
137
150
|
}
|
|
138
151
|
interface RawDataSummary {
|
|
139
152
|
domain: string;
|
|
@@ -203,6 +216,9 @@ interface RawDataSummary {
|
|
|
203
216
|
has_visible_date: boolean;
|
|
204
217
|
has_schema_date_in_ld: boolean;
|
|
205
218
|
date_modified_recency_days: number | null;
|
|
219
|
+
crawl_discovered: number;
|
|
220
|
+
crawl_fetched: number;
|
|
221
|
+
crawl_skipped: number;
|
|
206
222
|
}
|
|
207
223
|
/**
|
|
208
224
|
* Fetches all site data in parallel with HTTPS/HTTP fallback.
|
|
@@ -289,6 +305,51 @@ interface MultiPageOptions {
|
|
|
289
305
|
*/
|
|
290
306
|
declare function fetchMultiPageData(siteData: SiteData, options?: MultiPageOptions): Promise<number>;
|
|
291
307
|
|
|
308
|
+
/**
|
|
309
|
+
* Full-site crawler for deep AEO audits.
|
|
310
|
+
* BFS crawl that discovers all internal pages up to a configurable limit.
|
|
311
|
+
*/
|
|
312
|
+
|
|
313
|
+
interface CrawlOptions {
|
|
314
|
+
/** Maximum pages to fetch (default 200) */
|
|
315
|
+
maxPages?: number;
|
|
316
|
+
/** Per-page fetch timeout in ms (default 10000) */
|
|
317
|
+
timeoutMs?: number;
|
|
318
|
+
/** Parallel fetches (default 5) */
|
|
319
|
+
concurrency?: number;
|
|
320
|
+
/** Honor robots.txt Disallow rules (default true) */
|
|
321
|
+
respectRobots?: boolean;
|
|
322
|
+
/** Include asset files — skipped by default */
|
|
323
|
+
includeAssets?: boolean;
|
|
324
|
+
}
|
|
325
|
+
interface CrawlResult {
|
|
326
|
+
pages: FetchResult[];
|
|
327
|
+
discoveredUrls: string[];
|
|
328
|
+
fetchedUrls: string[];
|
|
329
|
+
skippedUrls: string[];
|
|
330
|
+
elapsed: number;
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Extract all page URLs from sitemap XML (handles sitemapindex with sub-sitemaps).
|
|
334
|
+
* Filters to same domain only, skips resource files.
|
|
335
|
+
*/
|
|
336
|
+
declare function extractAllUrlsFromSitemap(sitemapText: string, domain: string, timeoutMs?: number): Promise<string[]>;
|
|
337
|
+
/**
|
|
338
|
+
* Extract ALL internal links from HTML (not just nav).
|
|
339
|
+
* Returns deduplicated full URLs for the same domain.
|
|
340
|
+
*/
|
|
341
|
+
declare function extractInternalLinks(html: string, domain: string): string[];
|
|
342
|
+
/**
|
|
343
|
+
* Infer PageCategory from URL path patterns.
|
|
344
|
+
*/
|
|
345
|
+
declare function inferCategory(url: string): PageCategory;
|
|
346
|
+
/**
|
|
347
|
+
* BFS crawl of a site, discovering all internal pages up to maxPages.
|
|
348
|
+
* Seeds from sitemap URLs + homepage internal links.
|
|
349
|
+
* Skips URLs already in siteData.blogSample and homepage.
|
|
350
|
+
*/
|
|
351
|
+
declare function crawlFullSite(siteData: SiteData, options?: CrawlOptions): Promise<CrawlResult>;
|
|
352
|
+
|
|
292
353
|
/**
|
|
293
354
|
* SPA detection and headless Chromium rendering for pre-crawl.
|
|
294
355
|
*
|
|
@@ -395,4 +456,4 @@ interface ComparisonResult {
|
|
|
395
456
|
*/
|
|
396
457
|
declare function compare(domainA: string, domainB: string, options?: AuditOptions): Promise<ComparisonResult>;
|
|
397
458
|
|
|
398
|
-
export { type AuditData, type AuditFinding, type AuditOptions, type AuditResult, type AuditStatus, CRITERION_LABELS, type ComparisonResult, type CriterionComparison, type CriterionDetail, type CriterionResult, type Deliverable, type DetailedFinding, type FetchResult, type FindingSeverity, type FindingType, type HeadlessOptions, type ImpactLevel, type PageCategory$1 as PageCategory, type PageIssue$1 as PageIssue, type PageReview$1 as PageReview, type ParkedDomainResult, type PitchMetric, type Priority, type RawDataSummary, type RenderingMethod, type ScoreCardItem, type Severity, type SiteData, type Status, analyzeAllPages, analyzePage, audit, auditSiteFromData, buildDetailedFindings, buildScorecard, calculateOverallScore, classifyRendering, compare, detectParkedDomain, extractContentPagesFromSitemap, extractNavLinks, extractRawDataSummary, fetchMultiPageData, fetchWithHeadless, generateBottomLine, generateComparisonHtmlReport, generateHtmlReport, generateOpportunities, generatePitchNumbers, generateVerdict, isSpaShell, prefetchSiteData, scoreToStatus };
|
|
459
|
+
export { type AuditData, type AuditFinding, type AuditOptions, type AuditResult, type AuditStatus, CRITERION_LABELS, type ComparisonResult, type CrawlOptions, type CrawlResult, type CriterionComparison, type CriterionDetail, type CriterionResult, type Deliverable, type DetailedFinding, type FetchResult, type FindingSeverity, type FindingType, type HeadlessOptions, type ImpactLevel, type PageCategory$1 as PageCategory, type PageIssue$1 as PageIssue, type PageReview$1 as PageReview, type ParkedDomainResult, type PitchMetric, type Priority, type RawDataSummary, type RenderingMethod, type ScoreCardItem, type Severity, type SiteData, type Status, analyzeAllPages, analyzePage, audit, auditSiteFromData, buildDetailedFindings, buildScorecard, calculateOverallScore, classifyRendering, compare, crawlFullSite, detectParkedDomain, extractAllUrlsFromSitemap, extractContentPagesFromSitemap, extractInternalLinks, extractNavLinks, extractRawDataSummary, fetchMultiPageData, fetchWithHeadless, generateBottomLine, generateComparisonHtmlReport, generateHtmlReport, generateOpportunities, generatePitchNumbers, generateVerdict, inferCategory, isSpaShell, prefetchSiteData, scoreToStatus };
|
package/dist/index.d.ts
CHANGED
|
@@ -35,7 +35,7 @@ interface PitchMetric {
|
|
|
35
35
|
value: string;
|
|
36
36
|
significance: string;
|
|
37
37
|
}
|
|
38
|
-
type PageCategory$1 = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'content';
|
|
38
|
+
type PageCategory$1 = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'faq' | 'content';
|
|
39
39
|
interface PageIssue$1 {
|
|
40
40
|
check: string;
|
|
41
41
|
label: string;
|
|
@@ -84,6 +84,12 @@ interface AuditOptions {
|
|
|
84
84
|
noMultiPage?: boolean;
|
|
85
85
|
/** Fetch timeout in ms (default: 15000) */
|
|
86
86
|
timeout?: number;
|
|
87
|
+
/** Enable full-site BFS crawl (default: false) */
|
|
88
|
+
fullCrawl?: boolean;
|
|
89
|
+
/** Max pages for full crawl (default: 200) */
|
|
90
|
+
maxPages?: number;
|
|
91
|
+
/** Parallel fetch concurrency for full crawl (default: 5) */
|
|
92
|
+
concurrency?: number;
|
|
87
93
|
}
|
|
88
94
|
interface AuditResult extends AuditData {
|
|
89
95
|
/** True if headless browser was used for SPA rendering */
|
|
@@ -111,7 +117,7 @@ interface CriterionResult {
|
|
|
111
117
|
findings: AuditFinding[];
|
|
112
118
|
fix_priority: Priority;
|
|
113
119
|
}
|
|
114
|
-
type PageCategory = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'content';
|
|
120
|
+
type PageCategory = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'faq' | 'content';
|
|
115
121
|
interface FetchResult {
|
|
116
122
|
text: string;
|
|
117
123
|
status: number;
|
|
@@ -134,6 +140,13 @@ interface SiteData {
|
|
|
134
140
|
parkedReason: string | null;
|
|
135
141
|
/** Sampled blog/content pages from sitemap (up to 5) */
|
|
136
142
|
blogSample?: FetchResult[];
|
|
143
|
+
/** Full-crawl statistics (set when --full-crawl is used) */
|
|
144
|
+
crawlStats?: {
|
|
145
|
+
discovered: number;
|
|
146
|
+
fetched: number;
|
|
147
|
+
skipped: number;
|
|
148
|
+
elapsed: number;
|
|
149
|
+
};
|
|
137
150
|
}
|
|
138
151
|
interface RawDataSummary {
|
|
139
152
|
domain: string;
|
|
@@ -203,6 +216,9 @@ interface RawDataSummary {
|
|
|
203
216
|
has_visible_date: boolean;
|
|
204
217
|
has_schema_date_in_ld: boolean;
|
|
205
218
|
date_modified_recency_days: number | null;
|
|
219
|
+
crawl_discovered: number;
|
|
220
|
+
crawl_fetched: number;
|
|
221
|
+
crawl_skipped: number;
|
|
206
222
|
}
|
|
207
223
|
/**
|
|
208
224
|
* Fetches all site data in parallel with HTTPS/HTTP fallback.
|
|
@@ -289,6 +305,51 @@ interface MultiPageOptions {
|
|
|
289
305
|
*/
|
|
290
306
|
declare function fetchMultiPageData(siteData: SiteData, options?: MultiPageOptions): Promise<number>;
|
|
291
307
|
|
|
308
|
+
/**
|
|
309
|
+
* Full-site crawler for deep AEO audits.
|
|
310
|
+
* BFS crawl that discovers all internal pages up to a configurable limit.
|
|
311
|
+
*/
|
|
312
|
+
|
|
313
|
+
interface CrawlOptions {
|
|
314
|
+
/** Maximum pages to fetch (default 200) */
|
|
315
|
+
maxPages?: number;
|
|
316
|
+
/** Per-page fetch timeout in ms (default 10000) */
|
|
317
|
+
timeoutMs?: number;
|
|
318
|
+
/** Parallel fetches (default 5) */
|
|
319
|
+
concurrency?: number;
|
|
320
|
+
/** Honor robots.txt Disallow rules (default true) */
|
|
321
|
+
respectRobots?: boolean;
|
|
322
|
+
/** Include asset files — skipped by default */
|
|
323
|
+
includeAssets?: boolean;
|
|
324
|
+
}
|
|
325
|
+
interface CrawlResult {
|
|
326
|
+
pages: FetchResult[];
|
|
327
|
+
discoveredUrls: string[];
|
|
328
|
+
fetchedUrls: string[];
|
|
329
|
+
skippedUrls: string[];
|
|
330
|
+
elapsed: number;
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Extract all page URLs from sitemap XML (handles sitemapindex with sub-sitemaps).
|
|
334
|
+
* Filters to same domain only, skips resource files.
|
|
335
|
+
*/
|
|
336
|
+
declare function extractAllUrlsFromSitemap(sitemapText: string, domain: string, timeoutMs?: number): Promise<string[]>;
|
|
337
|
+
/**
|
|
338
|
+
* Extract ALL internal links from HTML (not just nav).
|
|
339
|
+
* Returns deduplicated full URLs for the same domain.
|
|
340
|
+
*/
|
|
341
|
+
declare function extractInternalLinks(html: string, domain: string): string[];
|
|
342
|
+
/**
|
|
343
|
+
* Infer PageCategory from URL path patterns.
|
|
344
|
+
*/
|
|
345
|
+
declare function inferCategory(url: string): PageCategory;
|
|
346
|
+
/**
|
|
347
|
+
* BFS crawl of a site, discovering all internal pages up to maxPages.
|
|
348
|
+
* Seeds from sitemap URLs + homepage internal links.
|
|
349
|
+
* Skips URLs already in siteData.blogSample and homepage.
|
|
350
|
+
*/
|
|
351
|
+
declare function crawlFullSite(siteData: SiteData, options?: CrawlOptions): Promise<CrawlResult>;
|
|
352
|
+
|
|
292
353
|
/**
|
|
293
354
|
* SPA detection and headless Chromium rendering for pre-crawl.
|
|
294
355
|
*
|
|
@@ -395,4 +456,4 @@ interface ComparisonResult {
|
|
|
395
456
|
*/
|
|
396
457
|
declare function compare(domainA: string, domainB: string, options?: AuditOptions): Promise<ComparisonResult>;
|
|
397
458
|
|
|
398
|
-
export { type AuditData, type AuditFinding, type AuditOptions, type AuditResult, type AuditStatus, CRITERION_LABELS, type ComparisonResult, type CriterionComparison, type CriterionDetail, type CriterionResult, type Deliverable, type DetailedFinding, type FetchResult, type FindingSeverity, type FindingType, type HeadlessOptions, type ImpactLevel, type PageCategory$1 as PageCategory, type PageIssue$1 as PageIssue, type PageReview$1 as PageReview, type ParkedDomainResult, type PitchMetric, type Priority, type RawDataSummary, type RenderingMethod, type ScoreCardItem, type Severity, type SiteData, type Status, analyzeAllPages, analyzePage, audit, auditSiteFromData, buildDetailedFindings, buildScorecard, calculateOverallScore, classifyRendering, compare, detectParkedDomain, extractContentPagesFromSitemap, extractNavLinks, extractRawDataSummary, fetchMultiPageData, fetchWithHeadless, generateBottomLine, generateComparisonHtmlReport, generateHtmlReport, generateOpportunities, generatePitchNumbers, generateVerdict, isSpaShell, prefetchSiteData, scoreToStatus };
|
|
459
|
+
export { type AuditData, type AuditFinding, type AuditOptions, type AuditResult, type AuditStatus, CRITERION_LABELS, type ComparisonResult, type CrawlOptions, type CrawlResult, type CriterionComparison, type CriterionDetail, type CriterionResult, type Deliverable, type DetailedFinding, type FetchResult, type FindingSeverity, type FindingType, type HeadlessOptions, type ImpactLevel, type PageCategory$1 as PageCategory, type PageIssue$1 as PageIssue, type PageReview$1 as PageReview, type ParkedDomainResult, type PitchMetric, type Priority, type RawDataSummary, type RenderingMethod, type ScoreCardItem, type Severity, type SiteData, type Status, analyzeAllPages, analyzePage, audit, auditSiteFromData, buildDetailedFindings, buildScorecard, calculateOverallScore, classifyRendering, compare, crawlFullSite, detectParkedDomain, extractAllUrlsFromSitemap, extractContentPagesFromSitemap, extractInternalLinks, extractNavLinks, extractRawDataSummary, fetchMultiPageData, fetchWithHeadless, generateBottomLine, generateComparisonHtmlReport, generateHtmlReport, generateOpportunities, generatePitchNumbers, generateVerdict, inferCategory, isSpaShell, prefetchSiteData, scoreToStatus };
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
import {
|
|
2
|
+
crawlFullSite,
|
|
3
|
+
extractAllUrlsFromSitemap,
|
|
4
|
+
extractInternalLinks,
|
|
5
|
+
inferCategory
|
|
6
|
+
} from "./chunk-3IJISYWT.js";
|
|
7
|
+
|
|
1
8
|
// src/parked-domain.ts
|
|
2
9
|
var PARKING_PATHS = ["/lander", "/parking", "/park", "/sedoparking"];
|
|
3
10
|
var PARKING_SERVICE_DOMAINS = [
|
|
@@ -1753,7 +1760,11 @@ function extractRawDataSummary(data) {
|
|
|
1753
1760
|
const d = new Date(m[1]);
|
|
1754
1761
|
if (isNaN(d.getTime())) return null;
|
|
1755
1762
|
return Math.floor((Date.now() - d.getTime()) / (1e3 * 60 * 60 * 24));
|
|
1756
|
-
})()
|
|
1763
|
+
})(),
|
|
1764
|
+
// Full-crawl stats
|
|
1765
|
+
crawl_discovered: data.crawlStats?.discovered ?? 0,
|
|
1766
|
+
crawl_fetched: data.crawlStats?.fetched ?? 0,
|
|
1767
|
+
crawl_skipped: data.crawlStats?.skipped ?? 0
|
|
1757
1768
|
};
|
|
1758
1769
|
}
|
|
1759
1770
|
function auditSiteFromData(data) {
|
|
@@ -2749,7 +2760,20 @@ async function audit(domain, options) {
|
|
|
2749
2760
|
}
|
|
2750
2761
|
}
|
|
2751
2762
|
}
|
|
2752
|
-
if (
|
|
2763
|
+
if (options?.fullCrawl) {
|
|
2764
|
+
const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-VFARFR2C.js");
|
|
2765
|
+
const crawlResult = await crawlFullSite2(siteData, {
|
|
2766
|
+
maxPages: options.maxPages ?? 200,
|
|
2767
|
+
concurrency: options.concurrency ?? 5
|
|
2768
|
+
});
|
|
2769
|
+
siteData.blogSample = crawlResult.pages;
|
|
2770
|
+
siteData.crawlStats = {
|
|
2771
|
+
discovered: crawlResult.discoveredUrls.length,
|
|
2772
|
+
fetched: crawlResult.fetchedUrls.length,
|
|
2773
|
+
skipped: crawlResult.skippedUrls.length,
|
|
2774
|
+
elapsed: crawlResult.elapsed
|
|
2775
|
+
};
|
|
2776
|
+
} else if (!options?.noMultiPage) {
|
|
2753
2777
|
await fetchMultiPageData(siteData);
|
|
2754
2778
|
}
|
|
2755
2779
|
const results = auditSiteFromData(siteData);
|
|
@@ -3115,8 +3139,11 @@ export {
|
|
|
3115
3139
|
calculateOverallScore,
|
|
3116
3140
|
classifyRendering,
|
|
3117
3141
|
compare,
|
|
3142
|
+
crawlFullSite,
|
|
3118
3143
|
detectParkedDomain,
|
|
3144
|
+
extractAllUrlsFromSitemap,
|
|
3119
3145
|
extractContentPagesFromSitemap,
|
|
3146
|
+
extractInternalLinks,
|
|
3120
3147
|
extractNavLinks,
|
|
3121
3148
|
extractRawDataSummary,
|
|
3122
3149
|
fetchMultiPageData,
|
|
@@ -3127,6 +3154,7 @@ export {
|
|
|
3127
3154
|
generateOpportunities,
|
|
3128
3155
|
generatePitchNumbers,
|
|
3129
3156
|
generateVerdict,
|
|
3157
|
+
inferCategory,
|
|
3130
3158
|
isSpaShell,
|
|
3131
3159
|
prefetchSiteData,
|
|
3132
3160
|
scoreToStatus
|