aeorank 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -35,7 +35,7 @@ interface PitchMetric {
35
35
  value: string;
36
36
  significance: string;
37
37
  }
38
- type PageCategory$1 = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'content';
38
+ type PageCategory$1 = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'faq' | 'content';
39
39
  interface PageIssue$1 {
40
40
  check: string;
41
41
  label: string;
@@ -84,6 +84,12 @@ interface AuditOptions {
84
84
  noMultiPage?: boolean;
85
85
  /** Fetch timeout in ms (default: 15000) */
86
86
  timeout?: number;
87
+ /** Enable full-site BFS crawl (default: false) */
88
+ fullCrawl?: boolean;
89
+ /** Max pages for full crawl (default: 200) */
90
+ maxPages?: number;
91
+ /** Parallel fetch concurrency for full crawl (default: 5) */
92
+ concurrency?: number;
87
93
  }
88
94
  interface AuditResult extends AuditData {
89
95
  /** True if headless browser was used for SPA rendering */
@@ -111,7 +117,7 @@ interface CriterionResult {
111
117
  findings: AuditFinding[];
112
118
  fix_priority: Priority;
113
119
  }
114
- type PageCategory = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'content';
120
+ type PageCategory = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'faq' | 'content';
115
121
  interface FetchResult {
116
122
  text: string;
117
123
  status: number;
@@ -134,6 +140,13 @@ interface SiteData {
134
140
  parkedReason: string | null;
135
141
  /** Sampled blog/content pages from sitemap (up to 5) */
136
142
  blogSample?: FetchResult[];
143
+ /** Full-crawl statistics (set when --full-crawl is used) */
144
+ crawlStats?: {
145
+ discovered: number;
146
+ fetched: number;
147
+ skipped: number;
148
+ elapsed: number;
149
+ };
137
150
  }
138
151
  interface RawDataSummary {
139
152
  domain: string;
@@ -203,6 +216,9 @@ interface RawDataSummary {
203
216
  has_visible_date: boolean;
204
217
  has_schema_date_in_ld: boolean;
205
218
  date_modified_recency_days: number | null;
219
+ crawl_discovered: number;
220
+ crawl_fetched: number;
221
+ crawl_skipped: number;
206
222
  }
207
223
  /**
208
224
  * Fetches all site data in parallel with HTTPS/HTTP fallback.
@@ -289,6 +305,51 @@ interface MultiPageOptions {
289
305
  */
290
306
  declare function fetchMultiPageData(siteData: SiteData, options?: MultiPageOptions): Promise<number>;
291
307
 
308
+ /**
309
+ * Full-site crawler for deep AEO audits.
310
+ * BFS crawl that discovers all internal pages up to a configurable limit.
311
+ */
312
+
313
+ interface CrawlOptions {
314
+ /** Maximum pages to fetch (default 200) */
315
+ maxPages?: number;
316
+ /** Per-page fetch timeout in ms (default 10000) */
317
+ timeoutMs?: number;
318
+ /** Parallel fetches (default 5) */
319
+ concurrency?: number;
320
+ /** Honor robots.txt Disallow rules (default true) */
321
+ respectRobots?: boolean;
322
+ /** Include asset files — skipped by default */
323
+ includeAssets?: boolean;
324
+ }
325
+ interface CrawlResult {
326
+ pages: FetchResult[];
327
+ discoveredUrls: string[];
328
+ fetchedUrls: string[];
329
+ skippedUrls: string[];
330
+ elapsed: number;
331
+ }
332
+ /**
333
+ * Extract all page URLs from sitemap XML (handles sitemapindex with sub-sitemaps).
334
+ * Filters to same domain only, skips resource files.
335
+ */
336
+ declare function extractAllUrlsFromSitemap(sitemapText: string, domain: string, timeoutMs?: number): Promise<string[]>;
337
+ /**
338
+ * Extract ALL internal links from HTML (not just nav).
339
+ * Returns deduplicated full URLs for the same domain.
340
+ */
341
+ declare function extractInternalLinks(html: string, domain: string): string[];
342
+ /**
343
+ * Infer PageCategory from URL path patterns.
344
+ */
345
+ declare function inferCategory(url: string): PageCategory;
346
+ /**
347
+ * BFS crawl of a site, discovering all internal pages up to maxPages.
348
+ * Seeds from sitemap URLs + homepage internal links.
349
+ * Skips URLs already in siteData.blogSample and homepage.
350
+ */
351
+ declare function crawlFullSite(siteData: SiteData, options?: CrawlOptions): Promise<CrawlResult>;
352
+
292
353
  /**
293
354
  * SPA detection and headless Chromium rendering for pre-crawl.
294
355
  *
@@ -395,4 +456,4 @@ interface ComparisonResult {
395
456
  */
396
457
  declare function compare(domainA: string, domainB: string, options?: AuditOptions): Promise<ComparisonResult>;
397
458
 
398
- export { type AuditData, type AuditFinding, type AuditOptions, type AuditResult, type AuditStatus, CRITERION_LABELS, type ComparisonResult, type CriterionComparison, type CriterionDetail, type CriterionResult, type Deliverable, type DetailedFinding, type FetchResult, type FindingSeverity, type FindingType, type HeadlessOptions, type ImpactLevel, type PageCategory$1 as PageCategory, type PageIssue$1 as PageIssue, type PageReview$1 as PageReview, type ParkedDomainResult, type PitchMetric, type Priority, type RawDataSummary, type RenderingMethod, type ScoreCardItem, type Severity, type SiteData, type Status, analyzeAllPages, analyzePage, audit, auditSiteFromData, buildDetailedFindings, buildScorecard, calculateOverallScore, classifyRendering, compare, detectParkedDomain, extractContentPagesFromSitemap, extractNavLinks, extractRawDataSummary, fetchMultiPageData, fetchWithHeadless, generateBottomLine, generateComparisonHtmlReport, generateHtmlReport, generateOpportunities, generatePitchNumbers, generateVerdict, isSpaShell, prefetchSiteData, scoreToStatus };
459
+ export { type AuditData, type AuditFinding, type AuditOptions, type AuditResult, type AuditStatus, CRITERION_LABELS, type ComparisonResult, type CrawlOptions, type CrawlResult, type CriterionComparison, type CriterionDetail, type CriterionResult, type Deliverable, type DetailedFinding, type FetchResult, type FindingSeverity, type FindingType, type HeadlessOptions, type ImpactLevel, type PageCategory$1 as PageCategory, type PageIssue$1 as PageIssue, type PageReview$1 as PageReview, type ParkedDomainResult, type PitchMetric, type Priority, type RawDataSummary, type RenderingMethod, type ScoreCardItem, type Severity, type SiteData, type Status, analyzeAllPages, analyzePage, audit, auditSiteFromData, buildDetailedFindings, buildScorecard, calculateOverallScore, classifyRendering, compare, crawlFullSite, detectParkedDomain, extractAllUrlsFromSitemap, extractContentPagesFromSitemap, extractInternalLinks, extractNavLinks, extractRawDataSummary, fetchMultiPageData, fetchWithHeadless, generateBottomLine, generateComparisonHtmlReport, generateHtmlReport, generateOpportunities, generatePitchNumbers, generateVerdict, inferCategory, isSpaShell, prefetchSiteData, scoreToStatus };
package/dist/index.d.ts CHANGED
@@ -35,7 +35,7 @@ interface PitchMetric {
35
35
  value: string;
36
36
  significance: string;
37
37
  }
38
- type PageCategory$1 = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'content';
38
+ type PageCategory$1 = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'faq' | 'content';
39
39
  interface PageIssue$1 {
40
40
  check: string;
41
41
  label: string;
@@ -84,6 +84,12 @@ interface AuditOptions {
84
84
  noMultiPage?: boolean;
85
85
  /** Fetch timeout in ms (default: 15000) */
86
86
  timeout?: number;
87
+ /** Enable full-site BFS crawl (default: false) */
88
+ fullCrawl?: boolean;
89
+ /** Max pages for full crawl (default: 200) */
90
+ maxPages?: number;
91
+ /** Parallel fetch concurrency for full crawl (default: 5) */
92
+ concurrency?: number;
87
93
  }
88
94
  interface AuditResult extends AuditData {
89
95
  /** True if headless browser was used for SPA rendering */
@@ -111,7 +117,7 @@ interface CriterionResult {
111
117
  findings: AuditFinding[];
112
118
  fix_priority: Priority;
113
119
  }
114
- type PageCategory = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'content';
120
+ type PageCategory = 'homepage' | 'blog' | 'about' | 'pricing' | 'services' | 'contact' | 'team' | 'resources' | 'docs' | 'cases' | 'faq' | 'content';
115
121
  interface FetchResult {
116
122
  text: string;
117
123
  status: number;
@@ -134,6 +140,13 @@ interface SiteData {
134
140
  parkedReason: string | null;
135
141
  /** Sampled blog/content pages from sitemap (up to 5) */
136
142
  blogSample?: FetchResult[];
143
+ /** Full-crawl statistics (set when --full-crawl is used) */
144
+ crawlStats?: {
145
+ discovered: number;
146
+ fetched: number;
147
+ skipped: number;
148
+ elapsed: number;
149
+ };
137
150
  }
138
151
  interface RawDataSummary {
139
152
  domain: string;
@@ -203,6 +216,9 @@ interface RawDataSummary {
203
216
  has_visible_date: boolean;
204
217
  has_schema_date_in_ld: boolean;
205
218
  date_modified_recency_days: number | null;
219
+ crawl_discovered: number;
220
+ crawl_fetched: number;
221
+ crawl_skipped: number;
206
222
  }
207
223
  /**
208
224
  * Fetches all site data in parallel with HTTPS/HTTP fallback.
@@ -289,6 +305,51 @@ interface MultiPageOptions {
289
305
  */
290
306
  declare function fetchMultiPageData(siteData: SiteData, options?: MultiPageOptions): Promise<number>;
291
307
 
308
+ /**
309
+ * Full-site crawler for deep AEO audits.
310
+ * BFS crawl that discovers all internal pages up to a configurable limit.
311
+ */
312
+
313
+ interface CrawlOptions {
314
+ /** Maximum pages to fetch (default 200) */
315
+ maxPages?: number;
316
+ /** Per-page fetch timeout in ms (default 10000) */
317
+ timeoutMs?: number;
318
+ /** Parallel fetches (default 5) */
319
+ concurrency?: number;
320
+ /** Honor robots.txt Disallow rules (default true) */
321
+ respectRobots?: boolean;
322
+ /** Include asset files — skipped by default */
323
+ includeAssets?: boolean;
324
+ }
325
+ interface CrawlResult {
326
+ pages: FetchResult[];
327
+ discoveredUrls: string[];
328
+ fetchedUrls: string[];
329
+ skippedUrls: string[];
330
+ elapsed: number;
331
+ }
332
+ /**
333
+ * Extract all page URLs from sitemap XML (handles sitemapindex with sub-sitemaps).
334
+ * Filters to same domain only, skips resource files.
335
+ */
336
+ declare function extractAllUrlsFromSitemap(sitemapText: string, domain: string, timeoutMs?: number): Promise<string[]>;
337
+ /**
338
+ * Extract ALL internal links from HTML (not just nav).
339
+ * Returns deduplicated full URLs for the same domain.
340
+ */
341
+ declare function extractInternalLinks(html: string, domain: string): string[];
342
+ /**
343
+ * Infer PageCategory from URL path patterns.
344
+ */
345
+ declare function inferCategory(url: string): PageCategory;
346
+ /**
347
+ * BFS crawl of a site, discovering all internal pages up to maxPages.
348
+ * Seeds from sitemap URLs + homepage internal links.
349
+ * Skips URLs already in siteData.blogSample and homepage.
350
+ */
351
+ declare function crawlFullSite(siteData: SiteData, options?: CrawlOptions): Promise<CrawlResult>;
352
+
292
353
  /**
293
354
  * SPA detection and headless Chromium rendering for pre-crawl.
294
355
  *
@@ -395,4 +456,4 @@ interface ComparisonResult {
395
456
  */
396
457
  declare function compare(domainA: string, domainB: string, options?: AuditOptions): Promise<ComparisonResult>;
397
458
 
398
- export { type AuditData, type AuditFinding, type AuditOptions, type AuditResult, type AuditStatus, CRITERION_LABELS, type ComparisonResult, type CriterionComparison, type CriterionDetail, type CriterionResult, type Deliverable, type DetailedFinding, type FetchResult, type FindingSeverity, type FindingType, type HeadlessOptions, type ImpactLevel, type PageCategory$1 as PageCategory, type PageIssue$1 as PageIssue, type PageReview$1 as PageReview, type ParkedDomainResult, type PitchMetric, type Priority, type RawDataSummary, type RenderingMethod, type ScoreCardItem, type Severity, type SiteData, type Status, analyzeAllPages, analyzePage, audit, auditSiteFromData, buildDetailedFindings, buildScorecard, calculateOverallScore, classifyRendering, compare, detectParkedDomain, extractContentPagesFromSitemap, extractNavLinks, extractRawDataSummary, fetchMultiPageData, fetchWithHeadless, generateBottomLine, generateComparisonHtmlReport, generateHtmlReport, generateOpportunities, generatePitchNumbers, generateVerdict, isSpaShell, prefetchSiteData, scoreToStatus };
459
+ export { type AuditData, type AuditFinding, type AuditOptions, type AuditResult, type AuditStatus, CRITERION_LABELS, type ComparisonResult, type CrawlOptions, type CrawlResult, type CriterionComparison, type CriterionDetail, type CriterionResult, type Deliverable, type DetailedFinding, type FetchResult, type FindingSeverity, type FindingType, type HeadlessOptions, type ImpactLevel, type PageCategory$1 as PageCategory, type PageIssue$1 as PageIssue, type PageReview$1 as PageReview, type ParkedDomainResult, type PitchMetric, type Priority, type RawDataSummary, type RenderingMethod, type ScoreCardItem, type Severity, type SiteData, type Status, analyzeAllPages, analyzePage, audit, auditSiteFromData, buildDetailedFindings, buildScorecard, calculateOverallScore, classifyRendering, compare, crawlFullSite, detectParkedDomain, extractAllUrlsFromSitemap, extractContentPagesFromSitemap, extractInternalLinks, extractNavLinks, extractRawDataSummary, fetchMultiPageData, fetchWithHeadless, generateBottomLine, generateComparisonHtmlReport, generateHtmlReport, generateOpportunities, generatePitchNumbers, generateVerdict, inferCategory, isSpaShell, prefetchSiteData, scoreToStatus };
package/dist/index.js CHANGED
@@ -1,3 +1,10 @@
1
+ import {
2
+ crawlFullSite,
3
+ extractAllUrlsFromSitemap,
4
+ extractInternalLinks,
5
+ inferCategory
6
+ } from "./chunk-3IJISYWT.js";
7
+
1
8
  // src/parked-domain.ts
2
9
  var PARKING_PATHS = ["/lander", "/parking", "/park", "/sedoparking"];
3
10
  var PARKING_SERVICE_DOMAINS = [
@@ -1753,7 +1760,11 @@ function extractRawDataSummary(data) {
1753
1760
  const d = new Date(m[1]);
1754
1761
  if (isNaN(d.getTime())) return null;
1755
1762
  return Math.floor((Date.now() - d.getTime()) / (1e3 * 60 * 60 * 24));
1756
- })()
1763
+ })(),
1764
+ // Full-crawl stats
1765
+ crawl_discovered: data.crawlStats?.discovered ?? 0,
1766
+ crawl_fetched: data.crawlStats?.fetched ?? 0,
1767
+ crawl_skipped: data.crawlStats?.skipped ?? 0
1757
1768
  };
1758
1769
  }
1759
1770
  function auditSiteFromData(data) {
@@ -2749,7 +2760,20 @@ async function audit(domain, options) {
2749
2760
  }
2750
2761
  }
2751
2762
  }
2752
- if (!options?.noMultiPage) {
2763
+ if (options?.fullCrawl) {
2764
+ const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-VFARFR2C.js");
2765
+ const crawlResult = await crawlFullSite2(siteData, {
2766
+ maxPages: options.maxPages ?? 200,
2767
+ concurrency: options.concurrency ?? 5
2768
+ });
2769
+ siteData.blogSample = crawlResult.pages;
2770
+ siteData.crawlStats = {
2771
+ discovered: crawlResult.discoveredUrls.length,
2772
+ fetched: crawlResult.fetchedUrls.length,
2773
+ skipped: crawlResult.skippedUrls.length,
2774
+ elapsed: crawlResult.elapsed
2775
+ };
2776
+ } else if (!options?.noMultiPage) {
2753
2777
  await fetchMultiPageData(siteData);
2754
2778
  }
2755
2779
  const results = auditSiteFromData(siteData);
@@ -3115,8 +3139,11 @@ export {
3115
3139
  calculateOverallScore,
3116
3140
  classifyRendering,
3117
3141
  compare,
3142
+ crawlFullSite,
3118
3143
  detectParkedDomain,
3144
+ extractAllUrlsFromSitemap,
3119
3145
  extractContentPagesFromSitemap,
3146
+ extractInternalLinks,
3120
3147
  extractNavLinks,
3121
3148
  extractRawDataSummary,
3122
3149
  fetchMultiPageData,
@@ -3127,6 +3154,7 @@ export {
3127
3154
  generateOpportunities,
3128
3155
  generatePitchNumbers,
3129
3156
  generateVerdict,
3157
+ inferCategory,
3130
3158
  isSpaShell,
3131
3159
  prefetchSiteData,
3132
3160
  scoreToStatus