@rankcli/agent-runtime 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/README.md +242 -0
  2. package/dist/analyzer-2CSWIQGD.mjs +6 -0
  3. package/dist/chunk-YNZYHEYM.mjs +774 -0
  4. package/dist/index.d.mts +4012 -0
  5. package/dist/index.d.ts +4012 -0
  6. package/dist/index.js +29672 -0
  7. package/dist/index.mjs +28602 -0
  8. package/package.json +53 -0
  9. package/scripts/build-deno.ts +134 -0
  10. package/src/audit/ai/analyzer.ts +347 -0
  11. package/src/audit/ai/index.ts +29 -0
  12. package/src/audit/ai/prompts/content-analysis.ts +271 -0
  13. package/src/audit/ai/types.ts +179 -0
  14. package/src/audit/checks/additional-checks.ts +439 -0
  15. package/src/audit/checks/ai-citation-worthiness.ts +399 -0
  16. package/src/audit/checks/ai-content-structure.ts +325 -0
  17. package/src/audit/checks/ai-readiness.ts +339 -0
  18. package/src/audit/checks/anchor-text.ts +179 -0
  19. package/src/audit/checks/answer-conciseness.ts +322 -0
  20. package/src/audit/checks/asset-minification.ts +270 -0
  21. package/src/audit/checks/bing-optimization.ts +206 -0
  22. package/src/audit/checks/brand-mention-optimization.ts +349 -0
  23. package/src/audit/checks/caching-headers.ts +305 -0
  24. package/src/audit/checks/canonical-advanced.ts +150 -0
  25. package/src/audit/checks/canonical-domain.ts +196 -0
  26. package/src/audit/checks/citation-quality.ts +358 -0
  27. package/src/audit/checks/client-rendering.ts +542 -0
  28. package/src/audit/checks/color-contrast.ts +342 -0
  29. package/src/audit/checks/content-freshness.ts +170 -0
  30. package/src/audit/checks/content-science.ts +589 -0
  31. package/src/audit/checks/conversion-elements.ts +526 -0
  32. package/src/audit/checks/crawlability.ts +220 -0
  33. package/src/audit/checks/directory-listing.ts +172 -0
  34. package/src/audit/checks/dom-analysis.ts +191 -0
  35. package/src/audit/checks/dom-size.ts +246 -0
  36. package/src/audit/checks/duplicate-content.ts +194 -0
  37. package/src/audit/checks/eeat-signals.ts +990 -0
  38. package/src/audit/checks/entity-seo.ts +396 -0
  39. package/src/audit/checks/featured-snippet.ts +473 -0
  40. package/src/audit/checks/freshness-signals.ts +443 -0
  41. package/src/audit/checks/funnel-intent.ts +463 -0
  42. package/src/audit/checks/hreflang.ts +174 -0
  43. package/src/audit/checks/html-compliance.ts +302 -0
  44. package/src/audit/checks/image-dimensions.ts +167 -0
  45. package/src/audit/checks/images.ts +160 -0
  46. package/src/audit/checks/indexnow.ts +275 -0
  47. package/src/audit/checks/interactive-tools.ts +475 -0
  48. package/src/audit/checks/internal-link-graph.ts +436 -0
  49. package/src/audit/checks/keyword-analysis.ts +239 -0
  50. package/src/audit/checks/keyword-cannibalization.ts +385 -0
  51. package/src/audit/checks/keyword-placement.ts +471 -0
  52. package/src/audit/checks/links.ts +203 -0
  53. package/src/audit/checks/llms-txt.ts +224 -0
  54. package/src/audit/checks/local-seo.ts +296 -0
  55. package/src/audit/checks/mobile.ts +167 -0
  56. package/src/audit/checks/modern-images.ts +226 -0
  57. package/src/audit/checks/navboost-signals.ts +395 -0
  58. package/src/audit/checks/on-page.ts +209 -0
  59. package/src/audit/checks/page-resources.ts +285 -0
  60. package/src/audit/checks/pagination.ts +180 -0
  61. package/src/audit/checks/performance.ts +153 -0
  62. package/src/audit/checks/platform-presence.ts +580 -0
  63. package/src/audit/checks/redirect-analysis.ts +153 -0
  64. package/src/audit/checks/redirect-chain.ts +389 -0
  65. package/src/audit/checks/resource-hints.ts +420 -0
  66. package/src/audit/checks/responsive-css.ts +247 -0
  67. package/src/audit/checks/responsive-images.ts +396 -0
  68. package/src/audit/checks/review-ecosystem.ts +415 -0
  69. package/src/audit/checks/robots-validation.ts +373 -0
  70. package/src/audit/checks/security-headers.ts +172 -0
  71. package/src/audit/checks/security.ts +144 -0
  72. package/src/audit/checks/serp-preview.ts +251 -0
  73. package/src/audit/checks/site-maturity.ts +444 -0
  74. package/src/audit/checks/social-meta.test.ts +275 -0
  75. package/src/audit/checks/social-meta.ts +134 -0
  76. package/src/audit/checks/soft-404.ts +151 -0
  77. package/src/audit/checks/structured-data.ts +238 -0
  78. package/src/audit/checks/tech-detection.ts +496 -0
  79. package/src/audit/checks/topical-clusters.ts +435 -0
  80. package/src/audit/checks/tracker-bloat.ts +462 -0
  81. package/src/audit/checks/tracking-verification.test.ts +371 -0
  82. package/src/audit/checks/tracking-verification.ts +636 -0
  83. package/src/audit/checks/url-safety.ts +682 -0
  84. package/src/audit/deno-entry.ts +66 -0
  85. package/src/audit/discovery/index.ts +15 -0
  86. package/src/audit/discovery/link-crawler.ts +232 -0
  87. package/src/audit/discovery/repo-routes.ts +347 -0
  88. package/src/audit/engine.ts +620 -0
  89. package/src/audit/fixes/index.ts +209 -0
  90. package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
  91. package/src/audit/fixes/social-meta-fixes.ts +463 -0
  92. package/src/audit/index.ts +74 -0
  93. package/src/audit/runner.test.ts +299 -0
  94. package/src/audit/runner.ts +130 -0
  95. package/src/audit/types.ts +1953 -0
  96. package/src/content/featured-snippet.ts +367 -0
  97. package/src/content/generator.test.ts +534 -0
  98. package/src/content/generator.ts +501 -0
  99. package/src/content/headline.ts +317 -0
  100. package/src/content/index.ts +62 -0
  101. package/src/content/intent.ts +258 -0
  102. package/src/content/keyword-density.ts +349 -0
  103. package/src/content/readability.ts +262 -0
  104. package/src/executor.ts +336 -0
  105. package/src/fixer.ts +416 -0
  106. package/src/frameworks/detector.test.ts +248 -0
  107. package/src/frameworks/detector.ts +371 -0
  108. package/src/frameworks/index.ts +68 -0
  109. package/src/frameworks/recipes/angular.yaml +171 -0
  110. package/src/frameworks/recipes/astro.yaml +206 -0
  111. package/src/frameworks/recipes/django.yaml +180 -0
  112. package/src/frameworks/recipes/laravel.yaml +137 -0
  113. package/src/frameworks/recipes/nextjs.yaml +268 -0
  114. package/src/frameworks/recipes/nuxt.yaml +175 -0
  115. package/src/frameworks/recipes/rails.yaml +188 -0
  116. package/src/frameworks/recipes/react.yaml +202 -0
  117. package/src/frameworks/recipes/sveltekit.yaml +154 -0
  118. package/src/frameworks/recipes/vue.yaml +137 -0
  119. package/src/frameworks/recipes/wordpress.yaml +209 -0
  120. package/src/frameworks/suggestion-engine.ts +320 -0
  121. package/src/geo/geo-content.test.ts +305 -0
  122. package/src/geo/geo-content.ts +266 -0
  123. package/src/geo/geo-history.test.ts +473 -0
  124. package/src/geo/geo-history.ts +433 -0
  125. package/src/geo/geo-tracker.test.ts +359 -0
  126. package/src/geo/geo-tracker.ts +411 -0
  127. package/src/geo/index.ts +10 -0
  128. package/src/git/commit-helper.test.ts +261 -0
  129. package/src/git/commit-helper.ts +329 -0
  130. package/src/git/index.ts +12 -0
  131. package/src/git/pr-helper.test.ts +284 -0
  132. package/src/git/pr-helper.ts +307 -0
  133. package/src/index.ts +66 -0
  134. package/src/keywords/ai-keyword-engine.ts +1062 -0
  135. package/src/keywords/ai-summarizer.ts +387 -0
  136. package/src/keywords/ci-mode.ts +555 -0
  137. package/src/keywords/engine.ts +359 -0
  138. package/src/keywords/index.ts +151 -0
  139. package/src/keywords/llm-judge.ts +357 -0
  140. package/src/keywords/nlp-analysis.ts +706 -0
  141. package/src/keywords/prioritizer.ts +295 -0
  142. package/src/keywords/site-crawler.ts +342 -0
  143. package/src/keywords/sources/autocomplete.ts +139 -0
  144. package/src/keywords/sources/competitive-search.ts +450 -0
  145. package/src/keywords/sources/competitor-analysis.ts +374 -0
  146. package/src/keywords/sources/dataforseo.ts +206 -0
  147. package/src/keywords/sources/free-sources.ts +294 -0
  148. package/src/keywords/sources/gsc.ts +123 -0
  149. package/src/keywords/topic-grouping.ts +327 -0
  150. package/src/keywords/types.ts +144 -0
  151. package/src/keywords/wizard.ts +457 -0
  152. package/src/loader.ts +40 -0
  153. package/src/reports/index.ts +7 -0
  154. package/src/reports/report-generator.test.ts +293 -0
  155. package/src/reports/report-generator.ts +713 -0
  156. package/src/scheduler/alerts.test.ts +458 -0
  157. package/src/scheduler/alerts.ts +328 -0
  158. package/src/scheduler/index.ts +8 -0
  159. package/src/scheduler/scheduled-audit.test.ts +377 -0
  160. package/src/scheduler/scheduled-audit.ts +149 -0
  161. package/src/test/integration-test.ts +325 -0
  162. package/src/tools/analyzer.ts +373 -0
  163. package/src/tools/crawl.ts +293 -0
  164. package/src/tools/files.ts +301 -0
  165. package/src/tools/h1-fixer.ts +249 -0
  166. package/src/tools/index.ts +67 -0
  167. package/src/tracking/github-action.ts +326 -0
  168. package/src/tracking/google-analytics.ts +265 -0
  169. package/src/tracking/index.ts +45 -0
  170. package/src/tracking/report-generator.ts +386 -0
  171. package/src/tracking/search-console.ts +335 -0
  172. package/src/types.ts +134 -0
  173. package/src/utils/http.ts +302 -0
  174. package/src/wasm-adapter.ts +297 -0
  175. package/src/wasm-entry.ts +14 -0
  176. package/tsconfig.json +17 -0
  177. package/tsup.wasm.config.ts +26 -0
  178. package/vitest.config.ts +15 -0
@@ -0,0 +1,209 @@
1
+ import * as cheerio from 'cheerio';
2
+ import type { AuditIssue } from '../types.js';
3
+ import { ISSUE_DEFINITIONS } from '../types.js';
4
+
5
+ export interface OnPageData {
6
+ title?: string;
7
+ titleLength: number;
8
+ description?: string;
9
+ descriptionLength: number;
10
+ canonical?: string;
11
+ h1s: string[];
12
+ headings: { level: number; text: string }[];
13
+ wordCount: number;
14
+ textToHtmlRatio: number;
15
+ hasNoindex: boolean;
16
+ metaRobots?: string;
17
+ }
18
+
19
+ export function analyzeOnPage(html: string, url: string): { issues: AuditIssue[]; data: OnPageData } {
20
+ const issues: AuditIssue[] = [];
21
+ const $ = cheerio.load(html);
22
+
23
+ // Extract data
24
+ const title = $('title').text().trim();
25
+ const description = $('meta[name="description"]').attr('content')?.trim();
26
+ const canonical = $('link[rel="canonical"]').attr('href');
27
+ const metaRobots = $('meta[name="robots"]').attr('content');
28
+
29
+ // Check noindex
30
+ const hasNoindex = metaRobots?.toLowerCase().includes('noindex') || false;
31
+
32
+ // Extract headings
33
+ const h1s: string[] = [];
34
+ $('h1').each((_, el) => {
35
+ h1s.push($(el).text().trim());
36
+ });
37
+
38
+ const headings: { level: number; text: string }[] = [];
39
+ $('h1, h2, h3, h4, h5, h6').each((_, el) => {
40
+ const level = parseInt(el.tagName.charAt(1), 10);
41
+ headings.push({ level, text: $(el).text().trim() });
42
+ });
43
+
44
+ // Calculate word count (text content only)
45
+ const bodyText = $('body').text().replace(/\s+/g, ' ').trim();
46
+ const wordCount = bodyText.split(/\s+/).filter(w => w.length > 0).length;
47
+
48
+ // Calculate text-to-HTML ratio
49
+ const textLength = bodyText.length;
50
+ const htmlLength = html.length;
51
+ const textToHtmlRatio = htmlLength > 0 ? (textLength / htmlLength) * 100 : 0;
52
+
53
+ const data: OnPageData = {
54
+ title: title || undefined,
55
+ titleLength: title?.length || 0,
56
+ description,
57
+ descriptionLength: description?.length || 0,
58
+ canonical,
59
+ h1s,
60
+ headings,
61
+ wordCount,
62
+ textToHtmlRatio,
63
+ hasNoindex,
64
+ metaRobots,
65
+ };
66
+
67
+ // ==================== TITLE CHECKS ====================
68
+ if (!title) {
69
+ issues.push({
70
+ ...ISSUE_DEFINITIONS.TITLE_MISSING,
71
+ affectedUrls: [url],
72
+ });
73
+ } else {
74
+ if (title.length < 30) {
75
+ issues.push({
76
+ ...ISSUE_DEFINITIONS.TITLE_TOO_SHORT,
77
+ affectedUrls: [url],
78
+ details: { title, length: title.length },
79
+ });
80
+ } else if (title.length > 60) {
81
+ issues.push({
82
+ ...ISSUE_DEFINITIONS.TITLE_TOO_LONG,
83
+ affectedUrls: [url],
84
+ details: { title, length: title.length },
85
+ });
86
+ }
87
+ }
88
+
89
+ // ==================== META DESCRIPTION CHECKS ====================
90
+ if (!description) {
91
+ issues.push({
92
+ ...ISSUE_DEFINITIONS.META_DESC_MISSING,
93
+ affectedUrls: [url],
94
+ });
95
+ } else {
96
+ if (description.length < 120) {
97
+ issues.push({
98
+ ...ISSUE_DEFINITIONS.META_DESC_TOO_SHORT,
99
+ affectedUrls: [url],
100
+ details: { description, length: description.length },
101
+ });
102
+ } else if (description.length > 160) {
103
+ issues.push({
104
+ ...ISSUE_DEFINITIONS.META_DESC_TOO_LONG,
105
+ affectedUrls: [url],
106
+ details: { description, length: description.length },
107
+ });
108
+ }
109
+ }
110
+
111
+ // ==================== CANONICAL CHECKS ====================
112
+ if (!canonical) {
113
+ issues.push({
114
+ ...ISSUE_DEFINITIONS.CANONICAL_MISSING,
115
+ affectedUrls: [url],
116
+ });
117
+ }
118
+
119
+ // Check for multiple canonicals
120
+ const canonicalCount = $('link[rel="canonical"]').length;
121
+ if (canonicalCount > 1) {
122
+ issues.push({
123
+ ...ISSUE_DEFINITIONS.MULTIPLE_CANONICALS,
124
+ affectedUrls: [url],
125
+ details: { count: canonicalCount },
126
+ });
127
+ }
128
+
129
+ // ==================== H1 CHECKS ====================
130
+ if (h1s.length === 0) {
131
+ issues.push({
132
+ ...ISSUE_DEFINITIONS.H1_MISSING,
133
+ affectedUrls: [url],
134
+ });
135
+ } else if (h1s.length > 1) {
136
+ issues.push({
137
+ ...ISSUE_DEFINITIONS.H1_MULTIPLE,
138
+ affectedUrls: [url],
139
+ details: { h1s, count: h1s.length },
140
+ });
141
+ }
142
+
143
+ // Check if H1 duplicates title
144
+ if (h1s.length === 1 && title && h1s[0].toLowerCase() === title.toLowerCase()) {
145
+ issues.push({
146
+ ...ISSUE_DEFINITIONS.H1_DUPLICATE_OF_TITLE,
147
+ affectedUrls: [url],
148
+ details: { h1: h1s[0], title },
149
+ });
150
+ }
151
+
152
+ // Check for missing H2 tags (important for content structure)
153
+ const h2Count = headings.filter(h => h.level === 2).length;
154
+ if (h2Count === 0 && wordCount > 100) {
155
+ // Only warn if there's enough content to warrant H2 headings
156
+ issues.push({
157
+ code: 'H2_MISSING',
158
+ severity: 'warning',
159
+ category: 'on-page',
160
+ title: 'No H2 headings found',
161
+ description: 'No H2 tags were found on the page. H2 headings help structure content and improve readability.',
162
+ impact: 'Without H2 headings, search engines and users may find it harder to understand content structure. H2s are important for featuring in search results.',
163
+ howToFix: 'Break your content into logical sections using H2 headings. Include relevant keywords naturally in your H2 tags.',
164
+ affectedUrls: [url],
165
+ });
166
+ }
167
+
168
+ // Check heading hierarchy
169
+ let previousLevel = 0;
170
+ for (const heading of headings) {
171
+ if (heading.level > previousLevel + 1 && previousLevel > 0) {
172
+ issues.push({
173
+ ...ISSUE_DEFINITIONS.HEADING_SKIP,
174
+ affectedUrls: [url],
175
+ details: { from: previousLevel, to: heading.level },
176
+ });
177
+ break; // Only report once
178
+ }
179
+ previousLevel = heading.level;
180
+ }
181
+
182
+ // ==================== CONTENT CHECKS ====================
183
+ if (wordCount < 300) {
184
+ issues.push({
185
+ ...ISSUE_DEFINITIONS.THIN_CONTENT,
186
+ affectedUrls: [url],
187
+ details: { wordCount },
188
+ });
189
+ }
190
+
191
+ if (textToHtmlRatio < 10) {
192
+ issues.push({
193
+ ...ISSUE_DEFINITIONS.LOW_TEXT_HTML_RATIO,
194
+ affectedUrls: [url],
195
+ details: { ratio: textToHtmlRatio.toFixed(1) + '%' },
196
+ });
197
+ }
198
+
199
+ // ==================== NOINDEX CHECK ====================
200
+ if (hasNoindex) {
201
+ issues.push({
202
+ ...ISSUE_DEFINITIONS.NOINDEX_TAG,
203
+ affectedUrls: [url],
204
+ details: { metaRobots },
205
+ });
206
+ }
207
+
208
+ return { issues, data };
209
+ }
@@ -0,0 +1,285 @@
1
+ /**
2
+ * Page Resources Check (Page Objects Analysis)
3
+ *
4
+ * Analyzes the number and types of embedded resources on a page.
5
+ * Too many HTTP requests slow down page load significantly due to:
6
+ * - Connection overhead (DNS, TCP, TLS)
7
+ * - Browser connection limits per domain
8
+ * - Render blocking
9
+ *
10
+ * Best practices:
11
+ * - Keep total requests under 50 for optimal performance
12
+ * - Minimize third-party resources
13
+ * - Combine/bundle CSS and JS where possible
14
+ */
15
+
16
+ import * as cheerio from 'cheerio';
17
+ import type { AuditIssue } from '../types.js';
18
+
19
+ export interface PageResourcesData {
20
+ total: number;
21
+ byType: {
22
+ stylesheets: number;
23
+ scripts: number;
24
+ images: number;
25
+ fonts: number;
26
+ iframes: number;
27
+ other: number;
28
+ };
29
+ byOrigin: {
30
+ firstParty: number;
31
+ thirdParty: number;
32
+ };
33
+ thirdPartyDomains: string[];
34
+ details: {
35
+ stylesheets: string[];
36
+ scripts: string[];
37
+ images: string[];
38
+ fonts: string[];
39
+ iframes: string[];
40
+ };
41
+ }
42
+
43
+ /**
44
+ * Analyze page resources
45
+ */
46
+ export function analyzePageResources(html: string, url: string): { issues: AuditIssue[]; data: PageResourcesData } {
47
+ const issues: AuditIssue[] = [];
48
+ const $ = cheerio.load(html);
49
+ const baseUrl = new URL(url);
50
+ const baseHostname = baseUrl.hostname;
51
+
52
+ const stylesheets: string[] = [];
53
+ const scripts: string[] = [];
54
+ const images: string[] = [];
55
+ const fonts: string[] = [];
56
+ const iframes: string[] = [];
57
+ const thirdPartyDomains = new Set<string>();
58
+
59
+ let firstParty = 0;
60
+ let thirdParty = 0;
61
+
62
+ /**
63
+ * Check if URL is first-party
64
+ */
65
+ function isFirstParty(resourceUrl: string): boolean {
66
+ try {
67
+ const resourceHostname = new URL(resourceUrl, url).hostname;
68
+ // Consider same domain or subdomains as first-party
69
+ return resourceHostname === baseHostname || resourceHostname.endsWith(`.${baseHostname}`);
70
+ } catch {
71
+ return true; // Relative URLs are first-party
72
+ }
73
+ }
74
+
75
+ /**
76
+ * Track resource origin
77
+ */
78
+ function trackOrigin(resourceUrl: string): void {
79
+ if (isFirstParty(resourceUrl)) {
80
+ firstParty++;
81
+ } else {
82
+ thirdParty++;
83
+ try {
84
+ const domain = new URL(resourceUrl, url).hostname;
85
+ thirdPartyDomains.add(domain);
86
+ } catch {
87
+ // Invalid URL
88
+ }
89
+ }
90
+ }
91
+
92
+ // Extract stylesheets
93
+ $('link[rel="stylesheet"][href]').each((_, el) => {
94
+ const href = $(el).attr('href');
95
+ if (href && !href.startsWith('data:')) {
96
+ stylesheets.push(href);
97
+ trackOrigin(href);
98
+ }
99
+ });
100
+
101
+ // Extract scripts
102
+ $('script[src]').each((_, el) => {
103
+ const src = $(el).attr('src');
104
+ if (src && !src.startsWith('data:')) {
105
+ scripts.push(src);
106
+ trackOrigin(src);
107
+ }
108
+ });
109
+
110
+ // Extract images
111
+ $('img[src]').each((_, el) => {
112
+ const src = $(el).attr('src');
113
+ if (src && !src.startsWith('data:')) {
114
+ images.push(src);
115
+ trackOrigin(src);
116
+ }
117
+ });
118
+
119
+ // Also check srcset for responsive images
120
+ $('img[srcset], source[srcset]').each((_, el) => {
121
+ const srcset = $(el).attr('srcset');
122
+ if (srcset) {
123
+ // Parse srcset - format: "url1 1x, url2 2x" or "url1 300w, url2 600w"
124
+ const urls = srcset.split(',').map((s) => s.trim().split(/\s+/)[0]);
125
+ for (const srcUrl of urls) {
126
+ if (srcUrl && !srcUrl.startsWith('data:') && !images.includes(srcUrl)) {
127
+ images.push(srcUrl);
128
+ trackOrigin(srcUrl);
129
+ }
130
+ }
131
+ }
132
+ });
133
+
134
+ // Extract fonts (preloaded or linked)
135
+ $('link[rel="preload"][as="font"], link[href*=".woff"], link[href*=".woff2"], link[href*=".ttf"], link[href*=".otf"]').each(
136
+ (_, el) => {
137
+ const href = $(el).attr('href');
138
+ if (href && !href.startsWith('data:')) {
139
+ fonts.push(href);
140
+ trackOrigin(href);
141
+ }
142
+ }
143
+ );
144
+
145
+ // Extract iframes
146
+ $('iframe[src]').each((_, el) => {
147
+ const src = $(el).attr('src');
148
+ if (src && !src.startsWith('about:') && !src.startsWith('javascript:')) {
149
+ iframes.push(src);
150
+ trackOrigin(src);
151
+ }
152
+ });
153
+
154
+ // Calculate other resources (preloads, prefetches that aren't already counted)
155
+ let other = 0;
156
+ $('link[rel="preload"], link[rel="prefetch"], link[rel="modulepreload"]').each((_, el) => {
157
+ const href = $(el).attr('href');
158
+ const as = $(el).attr('as');
159
+ if (href && !href.startsWith('data:')) {
160
+ // Don't double-count fonts and stylesheets
161
+ if (as !== 'font' && as !== 'style' && !stylesheets.includes(href) && !fonts.includes(href)) {
162
+ other++;
163
+ trackOrigin(href);
164
+ }
165
+ }
166
+ });
167
+
168
+ const total = stylesheets.length + scripts.length + images.length + fonts.length + iframes.length + other;
169
+
170
+ // Generate issues
171
+ if (total > 100) {
172
+ issues.push({
173
+ code: 'PAGE_RESOURCES_EXCESSIVE',
174
+ severity: 'error',
175
+ category: 'performance',
176
+ title: 'Excessive number of page resources',
177
+ description: `Page requests ${total} resources. This significantly impacts load time due to connection overhead and browser limits.`,
178
+ impact:
179
+ 'Too many HTTP requests cause slow page loads, poor Core Web Vitals, and high bounce rates. Each request has DNS, TCP, and TLS overhead.',
180
+ howToFix:
181
+ 'Combine CSS/JS files, use image sprites or inline small images, lazy-load non-critical resources, remove unused dependencies.',
182
+ affectedUrls: [url],
183
+ details: {
184
+ total,
185
+ breakdown: {
186
+ stylesheets: stylesheets.length,
187
+ scripts: scripts.length,
188
+ images: images.length,
189
+ fonts: fonts.length,
190
+ iframes: iframes.length,
191
+ other,
192
+ },
193
+ recommendation: 'Aim for under 50 total requests for optimal performance.',
194
+ },
195
+ });
196
+ } else if (total > 50) {
197
+ issues.push({
198
+ code: 'PAGE_RESOURCES_HIGH',
199
+ severity: 'warning',
200
+ category: 'performance',
201
+ title: 'High number of page resources',
202
+ description: `Page requests ${total} resources. Consider reducing for better performance.`,
203
+ impact: 'Many HTTP requests increase page load time, especially on mobile networks.',
204
+ howToFix: 'Bundle CSS/JS files, lazy-load images below the fold, consider critical CSS inlining.',
205
+ affectedUrls: [url],
206
+ details: {
207
+ total,
208
+ breakdown: {
209
+ stylesheets: stylesheets.length,
210
+ scripts: scripts.length,
211
+ images: images.length,
212
+ fonts: fonts.length,
213
+ iframes: iframes.length,
214
+ other,
215
+ },
216
+ },
217
+ });
218
+ }
219
+
220
+ // Check third-party resources
221
+ if (thirdParty > 20) {
222
+ issues.push({
223
+ code: 'PAGE_RESOURCES_THIRD_PARTY_HIGH',
224
+ severity: 'warning',
225
+ category: 'performance',
226
+ title: 'Many third-party resources',
227
+ description: `Page loads ${thirdParty} resources from ${thirdPartyDomains.size} third-party domains.`,
228
+ impact:
229
+ 'Third-party resources are outside your control and add latency. They can also be a privacy/security concern.',
230
+ howToFix:
231
+ 'Self-host critical third-party resources when possible. Use preconnect hints for remaining third-party origins.',
232
+ affectedUrls: [url],
233
+ details: {
234
+ thirdPartyCount: thirdParty,
235
+ domains: Array.from(thirdPartyDomains).slice(0, 10),
236
+ },
237
+ });
238
+ }
239
+
240
+ // Check for too many iframes
241
+ if (iframes.length > 3) {
242
+ issues.push({
243
+ code: 'PAGE_RESOURCES_MANY_IFRAMES',
244
+ severity: 'warning',
245
+ category: 'performance',
246
+ title: 'Multiple iframes detected',
247
+ description: `Page contains ${iframes.length} iframes. Each iframe loads its own document and resources.`,
248
+ impact: 'Iframes significantly increase page weight and can block the main thread.',
249
+ howToFix:
250
+ 'Lazy-load iframes that are below the fold. Consider native embeds or facade patterns for video/widget iframes.',
251
+ affectedUrls: [url],
252
+ details: {
253
+ iframeCount: iframes.length,
254
+ iframes: iframes.slice(0, 5),
255
+ },
256
+ });
257
+ }
258
+
259
+ return {
260
+ issues,
261
+ data: {
262
+ total,
263
+ byType: {
264
+ stylesheets: stylesheets.length,
265
+ scripts: scripts.length,
266
+ images: images.length,
267
+ fonts: fonts.length,
268
+ iframes: iframes.length,
269
+ other,
270
+ },
271
+ byOrigin: {
272
+ firstParty,
273
+ thirdParty,
274
+ },
275
+ thirdPartyDomains: Array.from(thirdPartyDomains),
276
+ details: {
277
+ stylesheets,
278
+ scripts,
279
+ images: images.slice(0, 20), // Limit for data size
280
+ fonts,
281
+ iframes,
282
+ },
283
+ },
284
+ };
285
+ }
@@ -0,0 +1,180 @@
1
+ import * as cheerio from 'cheerio';
2
+ import type { AuditIssue } from '../types.js';
3
+ import { ISSUE_DEFINITIONS } from '../types.js';
4
+
5
+ export interface PaginationData {
6
+ hasPagination: boolean;
7
+ hasHtmlPaginationLinks: boolean;
8
+ hasInfiniteScroll: boolean;
9
+ hasLoadMoreButton: boolean;
10
+ paginationLinks: string[];
11
+ relNextPrev: {
12
+ hasNext: boolean;
13
+ hasPrev: boolean;
14
+ nextUrl?: string;
15
+ prevUrl?: string;
16
+ };
17
+ currentPage?: number;
18
+ totalPages?: number;
19
+ }
20
+
21
+ export function analyzePagination(
22
+ html: string,
23
+ url: string,
24
+ canonical?: string
25
+ ): { issues: AuditIssue[]; data: PaginationData } {
26
+ const issues: AuditIssue[] = [];
27
+ const $ = cheerio.load(html);
28
+
29
+ // Check for rel="next" and rel="prev"
30
+ const nextLink = $('link[rel="next"]').attr('href');
31
+ const prevLink = $('link[rel="prev"]').attr('href');
32
+
33
+ // Detect pagination patterns in links
34
+ const paginationLinks: string[] = [];
35
+ const paginationPatterns = [
36
+ /[?&]page=\d+/,
37
+ /[?&]p=\d+/,
38
+ /\/page\/\d+/,
39
+ /\/p\/\d+/,
40
+ /-page-\d+/,
41
+ /\/\d+\/?$/,
42
+ ];
43
+
44
+ $('a[href]').each((_, el) => {
45
+ const href = $(el).attr('href') || '';
46
+ for (const pattern of paginationPatterns) {
47
+ if (pattern.test(href)) {
48
+ try {
49
+ const fullUrl = new URL(href, url).href;
50
+ if (!paginationLinks.includes(fullUrl)) {
51
+ paginationLinks.push(fullUrl);
52
+ }
53
+ } catch {
54
+ // Invalid URL, skip
55
+ }
56
+ break;
57
+ }
58
+ }
59
+ });
60
+
61
+ // Detect infinite scroll patterns
62
+ const hasInfiniteScrollScript = html.includes('infinite-scroll') ||
63
+ html.includes('infiniteScroll') ||
64
+ html.includes('infinite_scroll') ||
65
+ $('[data-infinite-scroll]').length > 0 ||
66
+ $('[data-infinite]').length > 0 ||
67
+ $('.infinite-scroll').length > 0;
68
+
69
+ // Detect "load more" buttons
70
+ const loadMoreSelectors = [
71
+ 'button:contains("Load More")',
72
+ 'button:contains("Load more")',
73
+ 'button:contains("Show More")',
74
+ 'button:contains("Show more")',
75
+ 'a:contains("Load More")',
76
+ 'a:contains("Load more")',
77
+ '[class*="load-more"]',
78
+ '[class*="loadmore"]',
79
+ '[data-load-more]',
80
+ ];
81
+
82
+ let hasLoadMoreButton = false;
83
+ for (const selector of loadMoreSelectors) {
84
+ try {
85
+ if ($(selector).length > 0) {
86
+ hasLoadMoreButton = true;
87
+ break;
88
+ }
89
+ } catch {
90
+ // Invalid selector, skip
91
+ }
92
+ }
93
+
94
+ // Try to detect current page number
95
+ let currentPage: number | undefined;
96
+ let totalPages: number | undefined;
97
+
98
+ // Check URL for page number
99
+ const pageMatch = url.match(/[?&]page=(\d+)|\/page\/(\d+)|[?&]p=(\d+)/);
100
+ if (pageMatch) {
101
+ currentPage = parseInt(pageMatch[1] || pageMatch[2] || pageMatch[3], 10);
102
+ }
103
+
104
+ // Check for "Page X of Y" pattern
105
+ const pageOfPattern = /page\s*(\d+)\s*of\s*(\d+)/i;
106
+ const pageOfMatch = $('body').text().match(pageOfPattern);
107
+ if (pageOfMatch) {
108
+ currentPage = parseInt(pageOfMatch[1], 10);
109
+ totalPages = parseInt(pageOfMatch[2], 10);
110
+ }
111
+
112
+ const hasPagination = paginationLinks.length > 0 ||
113
+ nextLink !== undefined ||
114
+ prevLink !== undefined ||
115
+ currentPage !== undefined;
116
+
117
+ const hasHtmlPaginationLinks = paginationLinks.length > 0;
118
+
119
+ const data: PaginationData = {
120
+ hasPagination,
121
+ hasHtmlPaginationLinks,
122
+ hasInfiniteScroll: hasInfiniteScrollScript,
123
+ hasLoadMoreButton,
124
+ paginationLinks,
125
+ relNextPrev: {
126
+ hasNext: !!nextLink,
127
+ hasPrev: !!prevLink,
128
+ nextUrl: nextLink,
129
+ prevUrl: prevLink,
130
+ },
131
+ currentPage,
132
+ totalPages,
133
+ };
134
+
135
+ // ==================== Issue Detection ====================
136
+
137
+ // Infinite scroll without HTML fallback links
138
+ if (hasInfiniteScrollScript && !hasHtmlPaginationLinks) {
139
+ issues.push({
140
+ ...ISSUE_DEFINITIONS.INFINITE_SCROLL_NO_FALLBACK,
141
+ affectedUrls: [url],
142
+ details: {
143
+ hasLoadMore: hasLoadMoreButton,
144
+ suggestion: 'Add <a href> pagination links alongside infinite scroll for SEO',
145
+ },
146
+ });
147
+ }
148
+
149
+ // Load more without HTML links
150
+ if (hasLoadMoreButton && !hasHtmlPaginationLinks && !hasInfiniteScrollScript) {
151
+ issues.push({
152
+ ...ISSUE_DEFINITIONS.PAGINATION_NO_LINKS,
153
+ affectedUrls: [url],
154
+ details: {
155
+ suggestion: 'Ensure "Load More" button also has underlying <a> links',
156
+ },
157
+ });
158
+ }
159
+
160
+ // Check for improper canonical on paginated pages
161
+ if (hasPagination && currentPage && currentPage > 1 && canonical) {
162
+ // Check if canonical points to page 1
163
+ const isCanonicalToPage1 = !paginationPatterns.some(p => p.test(canonical));
164
+ const urlHasPageNumber = paginationPatterns.some(p => p.test(url));
165
+
166
+ if (isCanonicalToPage1 && urlHasPageNumber) {
167
+ issues.push({
168
+ ...ISSUE_DEFINITIONS.PAGINATION_CANONICAL_ISSUE,
169
+ affectedUrls: [url],
170
+ details: {
171
+ currentPage,
172
+ canonical,
173
+ suggestion: 'Paginated pages should self-reference or point to a View All page',
174
+ },
175
+ });
176
+ }
177
+ }
178
+
179
+ return { issues, data };
180
+ }