@rankcli/agent-runtime 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +242 -0
- package/dist/analyzer-2CSWIQGD.mjs +6 -0
- package/dist/chunk-YNZYHEYM.mjs +774 -0
- package/dist/index.d.mts +4012 -0
- package/dist/index.d.ts +4012 -0
- package/dist/index.js +29672 -0
- package/dist/index.mjs +28602 -0
- package/package.json +53 -0
- package/scripts/build-deno.ts +134 -0
- package/src/audit/ai/analyzer.ts +347 -0
- package/src/audit/ai/index.ts +29 -0
- package/src/audit/ai/prompts/content-analysis.ts +271 -0
- package/src/audit/ai/types.ts +179 -0
- package/src/audit/checks/additional-checks.ts +439 -0
- package/src/audit/checks/ai-citation-worthiness.ts +399 -0
- package/src/audit/checks/ai-content-structure.ts +325 -0
- package/src/audit/checks/ai-readiness.ts +339 -0
- package/src/audit/checks/anchor-text.ts +179 -0
- package/src/audit/checks/answer-conciseness.ts +322 -0
- package/src/audit/checks/asset-minification.ts +270 -0
- package/src/audit/checks/bing-optimization.ts +206 -0
- package/src/audit/checks/brand-mention-optimization.ts +349 -0
- package/src/audit/checks/caching-headers.ts +305 -0
- package/src/audit/checks/canonical-advanced.ts +150 -0
- package/src/audit/checks/canonical-domain.ts +196 -0
- package/src/audit/checks/citation-quality.ts +358 -0
- package/src/audit/checks/client-rendering.ts +542 -0
- package/src/audit/checks/color-contrast.ts +342 -0
- package/src/audit/checks/content-freshness.ts +170 -0
- package/src/audit/checks/content-science.ts +589 -0
- package/src/audit/checks/conversion-elements.ts +526 -0
- package/src/audit/checks/crawlability.ts +220 -0
- package/src/audit/checks/directory-listing.ts +172 -0
- package/src/audit/checks/dom-analysis.ts +191 -0
- package/src/audit/checks/dom-size.ts +246 -0
- package/src/audit/checks/duplicate-content.ts +194 -0
- package/src/audit/checks/eeat-signals.ts +990 -0
- package/src/audit/checks/entity-seo.ts +396 -0
- package/src/audit/checks/featured-snippet.ts +473 -0
- package/src/audit/checks/freshness-signals.ts +443 -0
- package/src/audit/checks/funnel-intent.ts +463 -0
- package/src/audit/checks/hreflang.ts +174 -0
- package/src/audit/checks/html-compliance.ts +302 -0
- package/src/audit/checks/image-dimensions.ts +167 -0
- package/src/audit/checks/images.ts +160 -0
- package/src/audit/checks/indexnow.ts +275 -0
- package/src/audit/checks/interactive-tools.ts +475 -0
- package/src/audit/checks/internal-link-graph.ts +436 -0
- package/src/audit/checks/keyword-analysis.ts +239 -0
- package/src/audit/checks/keyword-cannibalization.ts +385 -0
- package/src/audit/checks/keyword-placement.ts +471 -0
- package/src/audit/checks/links.ts +203 -0
- package/src/audit/checks/llms-txt.ts +224 -0
- package/src/audit/checks/local-seo.ts +296 -0
- package/src/audit/checks/mobile.ts +167 -0
- package/src/audit/checks/modern-images.ts +226 -0
- package/src/audit/checks/navboost-signals.ts +395 -0
- package/src/audit/checks/on-page.ts +209 -0
- package/src/audit/checks/page-resources.ts +285 -0
- package/src/audit/checks/pagination.ts +180 -0
- package/src/audit/checks/performance.ts +153 -0
- package/src/audit/checks/platform-presence.ts +580 -0
- package/src/audit/checks/redirect-analysis.ts +153 -0
- package/src/audit/checks/redirect-chain.ts +389 -0
- package/src/audit/checks/resource-hints.ts +420 -0
- package/src/audit/checks/responsive-css.ts +247 -0
- package/src/audit/checks/responsive-images.ts +396 -0
- package/src/audit/checks/review-ecosystem.ts +415 -0
- package/src/audit/checks/robots-validation.ts +373 -0
- package/src/audit/checks/security-headers.ts +172 -0
- package/src/audit/checks/security.ts +144 -0
- package/src/audit/checks/serp-preview.ts +251 -0
- package/src/audit/checks/site-maturity.ts +444 -0
- package/src/audit/checks/social-meta.test.ts +275 -0
- package/src/audit/checks/social-meta.ts +134 -0
- package/src/audit/checks/soft-404.ts +151 -0
- package/src/audit/checks/structured-data.ts +238 -0
- package/src/audit/checks/tech-detection.ts +496 -0
- package/src/audit/checks/topical-clusters.ts +435 -0
- package/src/audit/checks/tracker-bloat.ts +462 -0
- package/src/audit/checks/tracking-verification.test.ts +371 -0
- package/src/audit/checks/tracking-verification.ts +636 -0
- package/src/audit/checks/url-safety.ts +682 -0
- package/src/audit/deno-entry.ts +66 -0
- package/src/audit/discovery/index.ts +15 -0
- package/src/audit/discovery/link-crawler.ts +232 -0
- package/src/audit/discovery/repo-routes.ts +347 -0
- package/src/audit/engine.ts +620 -0
- package/src/audit/fixes/index.ts +209 -0
- package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
- package/src/audit/fixes/social-meta-fixes.ts +463 -0
- package/src/audit/index.ts +74 -0
- package/src/audit/runner.test.ts +299 -0
- package/src/audit/runner.ts +130 -0
- package/src/audit/types.ts +1953 -0
- package/src/content/featured-snippet.ts +367 -0
- package/src/content/generator.test.ts +534 -0
- package/src/content/generator.ts +501 -0
- package/src/content/headline.ts +317 -0
- package/src/content/index.ts +62 -0
- package/src/content/intent.ts +258 -0
- package/src/content/keyword-density.ts +349 -0
- package/src/content/readability.ts +262 -0
- package/src/executor.ts +336 -0
- package/src/fixer.ts +416 -0
- package/src/frameworks/detector.test.ts +248 -0
- package/src/frameworks/detector.ts +371 -0
- package/src/frameworks/index.ts +68 -0
- package/src/frameworks/recipes/angular.yaml +171 -0
- package/src/frameworks/recipes/astro.yaml +206 -0
- package/src/frameworks/recipes/django.yaml +180 -0
- package/src/frameworks/recipes/laravel.yaml +137 -0
- package/src/frameworks/recipes/nextjs.yaml +268 -0
- package/src/frameworks/recipes/nuxt.yaml +175 -0
- package/src/frameworks/recipes/rails.yaml +188 -0
- package/src/frameworks/recipes/react.yaml +202 -0
- package/src/frameworks/recipes/sveltekit.yaml +154 -0
- package/src/frameworks/recipes/vue.yaml +137 -0
- package/src/frameworks/recipes/wordpress.yaml +209 -0
- package/src/frameworks/suggestion-engine.ts +320 -0
- package/src/geo/geo-content.test.ts +305 -0
- package/src/geo/geo-content.ts +266 -0
- package/src/geo/geo-history.test.ts +473 -0
- package/src/geo/geo-history.ts +433 -0
- package/src/geo/geo-tracker.test.ts +359 -0
- package/src/geo/geo-tracker.ts +411 -0
- package/src/geo/index.ts +10 -0
- package/src/git/commit-helper.test.ts +261 -0
- package/src/git/commit-helper.ts +329 -0
- package/src/git/index.ts +12 -0
- package/src/git/pr-helper.test.ts +284 -0
- package/src/git/pr-helper.ts +307 -0
- package/src/index.ts +66 -0
- package/src/keywords/ai-keyword-engine.ts +1062 -0
- package/src/keywords/ai-summarizer.ts +387 -0
- package/src/keywords/ci-mode.ts +555 -0
- package/src/keywords/engine.ts +359 -0
- package/src/keywords/index.ts +151 -0
- package/src/keywords/llm-judge.ts +357 -0
- package/src/keywords/nlp-analysis.ts +706 -0
- package/src/keywords/prioritizer.ts +295 -0
- package/src/keywords/site-crawler.ts +342 -0
- package/src/keywords/sources/autocomplete.ts +139 -0
- package/src/keywords/sources/competitive-search.ts +450 -0
- package/src/keywords/sources/competitor-analysis.ts +374 -0
- package/src/keywords/sources/dataforseo.ts +206 -0
- package/src/keywords/sources/free-sources.ts +294 -0
- package/src/keywords/sources/gsc.ts +123 -0
- package/src/keywords/topic-grouping.ts +327 -0
- package/src/keywords/types.ts +144 -0
- package/src/keywords/wizard.ts +457 -0
- package/src/loader.ts +40 -0
- package/src/reports/index.ts +7 -0
- package/src/reports/report-generator.test.ts +293 -0
- package/src/reports/report-generator.ts +713 -0
- package/src/scheduler/alerts.test.ts +458 -0
- package/src/scheduler/alerts.ts +328 -0
- package/src/scheduler/index.ts +8 -0
- package/src/scheduler/scheduled-audit.test.ts +377 -0
- package/src/scheduler/scheduled-audit.ts +149 -0
- package/src/test/integration-test.ts +325 -0
- package/src/tools/analyzer.ts +373 -0
- package/src/tools/crawl.ts +293 -0
- package/src/tools/files.ts +301 -0
- package/src/tools/h1-fixer.ts +249 -0
- package/src/tools/index.ts +67 -0
- package/src/tracking/github-action.ts +326 -0
- package/src/tracking/google-analytics.ts +265 -0
- package/src/tracking/index.ts +45 -0
- package/src/tracking/report-generator.ts +386 -0
- package/src/tracking/search-console.ts +335 -0
- package/src/types.ts +134 -0
- package/src/utils/http.ts +302 -0
- package/src/wasm-adapter.ts +297 -0
- package/src/wasm-entry.ts +14 -0
- package/tsconfig.json +17 -0
- package/tsup.wasm.config.ts +26 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
import type { AuditIssue } from '../types.js';
|
|
3
|
+
import { ISSUE_DEFINITIONS } from '../types.js';
|
|
4
|
+
|
|
5
|
+
export interface OnPageData {
|
|
6
|
+
title?: string;
|
|
7
|
+
titleLength: number;
|
|
8
|
+
description?: string;
|
|
9
|
+
descriptionLength: number;
|
|
10
|
+
canonical?: string;
|
|
11
|
+
h1s: string[];
|
|
12
|
+
headings: { level: number; text: string }[];
|
|
13
|
+
wordCount: number;
|
|
14
|
+
textToHtmlRatio: number;
|
|
15
|
+
hasNoindex: boolean;
|
|
16
|
+
metaRobots?: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function analyzeOnPage(html: string, url: string): { issues: AuditIssue[]; data: OnPageData } {
|
|
20
|
+
const issues: AuditIssue[] = [];
|
|
21
|
+
const $ = cheerio.load(html);
|
|
22
|
+
|
|
23
|
+
// Extract data
|
|
24
|
+
const title = $('title').text().trim();
|
|
25
|
+
const description = $('meta[name="description"]').attr('content')?.trim();
|
|
26
|
+
const canonical = $('link[rel="canonical"]').attr('href');
|
|
27
|
+
const metaRobots = $('meta[name="robots"]').attr('content');
|
|
28
|
+
|
|
29
|
+
// Check noindex
|
|
30
|
+
const hasNoindex = metaRobots?.toLowerCase().includes('noindex') || false;
|
|
31
|
+
|
|
32
|
+
// Extract headings
|
|
33
|
+
const h1s: string[] = [];
|
|
34
|
+
$('h1').each((_, el) => {
|
|
35
|
+
h1s.push($(el).text().trim());
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
const headings: { level: number; text: string }[] = [];
|
|
39
|
+
$('h1, h2, h3, h4, h5, h6').each((_, el) => {
|
|
40
|
+
const level = parseInt(el.tagName.charAt(1), 10);
|
|
41
|
+
headings.push({ level, text: $(el).text().trim() });
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// Calculate word count (text content only)
|
|
45
|
+
const bodyText = $('body').text().replace(/\s+/g, ' ').trim();
|
|
46
|
+
const wordCount = bodyText.split(/\s+/).filter(w => w.length > 0).length;
|
|
47
|
+
|
|
48
|
+
// Calculate text-to-HTML ratio
|
|
49
|
+
const textLength = bodyText.length;
|
|
50
|
+
const htmlLength = html.length;
|
|
51
|
+
const textToHtmlRatio = htmlLength > 0 ? (textLength / htmlLength) * 100 : 0;
|
|
52
|
+
|
|
53
|
+
const data: OnPageData = {
|
|
54
|
+
title: title || undefined,
|
|
55
|
+
titleLength: title?.length || 0,
|
|
56
|
+
description,
|
|
57
|
+
descriptionLength: description?.length || 0,
|
|
58
|
+
canonical,
|
|
59
|
+
h1s,
|
|
60
|
+
headings,
|
|
61
|
+
wordCount,
|
|
62
|
+
textToHtmlRatio,
|
|
63
|
+
hasNoindex,
|
|
64
|
+
metaRobots,
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
// ==================== TITLE CHECKS ====================
|
|
68
|
+
if (!title) {
|
|
69
|
+
issues.push({
|
|
70
|
+
...ISSUE_DEFINITIONS.TITLE_MISSING,
|
|
71
|
+
affectedUrls: [url],
|
|
72
|
+
});
|
|
73
|
+
} else {
|
|
74
|
+
if (title.length < 30) {
|
|
75
|
+
issues.push({
|
|
76
|
+
...ISSUE_DEFINITIONS.TITLE_TOO_SHORT,
|
|
77
|
+
affectedUrls: [url],
|
|
78
|
+
details: { title, length: title.length },
|
|
79
|
+
});
|
|
80
|
+
} else if (title.length > 60) {
|
|
81
|
+
issues.push({
|
|
82
|
+
...ISSUE_DEFINITIONS.TITLE_TOO_LONG,
|
|
83
|
+
affectedUrls: [url],
|
|
84
|
+
details: { title, length: title.length },
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// ==================== META DESCRIPTION CHECKS ====================
|
|
90
|
+
if (!description) {
|
|
91
|
+
issues.push({
|
|
92
|
+
...ISSUE_DEFINITIONS.META_DESC_MISSING,
|
|
93
|
+
affectedUrls: [url],
|
|
94
|
+
});
|
|
95
|
+
} else {
|
|
96
|
+
if (description.length < 120) {
|
|
97
|
+
issues.push({
|
|
98
|
+
...ISSUE_DEFINITIONS.META_DESC_TOO_SHORT,
|
|
99
|
+
affectedUrls: [url],
|
|
100
|
+
details: { description, length: description.length },
|
|
101
|
+
});
|
|
102
|
+
} else if (description.length > 160) {
|
|
103
|
+
issues.push({
|
|
104
|
+
...ISSUE_DEFINITIONS.META_DESC_TOO_LONG,
|
|
105
|
+
affectedUrls: [url],
|
|
106
|
+
details: { description, length: description.length },
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// ==================== CANONICAL CHECKS ====================
|
|
112
|
+
if (!canonical) {
|
|
113
|
+
issues.push({
|
|
114
|
+
...ISSUE_DEFINITIONS.CANONICAL_MISSING,
|
|
115
|
+
affectedUrls: [url],
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Check for multiple canonicals
|
|
120
|
+
const canonicalCount = $('link[rel="canonical"]').length;
|
|
121
|
+
if (canonicalCount > 1) {
|
|
122
|
+
issues.push({
|
|
123
|
+
...ISSUE_DEFINITIONS.MULTIPLE_CANONICALS,
|
|
124
|
+
affectedUrls: [url],
|
|
125
|
+
details: { count: canonicalCount },
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// ==================== H1 CHECKS ====================
|
|
130
|
+
if (h1s.length === 0) {
|
|
131
|
+
issues.push({
|
|
132
|
+
...ISSUE_DEFINITIONS.H1_MISSING,
|
|
133
|
+
affectedUrls: [url],
|
|
134
|
+
});
|
|
135
|
+
} else if (h1s.length > 1) {
|
|
136
|
+
issues.push({
|
|
137
|
+
...ISSUE_DEFINITIONS.H1_MULTIPLE,
|
|
138
|
+
affectedUrls: [url],
|
|
139
|
+
details: { h1s, count: h1s.length },
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Check if H1 duplicates title
|
|
144
|
+
if (h1s.length === 1 && title && h1s[0].toLowerCase() === title.toLowerCase()) {
|
|
145
|
+
issues.push({
|
|
146
|
+
...ISSUE_DEFINITIONS.H1_DUPLICATE_OF_TITLE,
|
|
147
|
+
affectedUrls: [url],
|
|
148
|
+
details: { h1: h1s[0], title },
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Check for missing H2 tags (important for content structure)
|
|
153
|
+
const h2Count = headings.filter(h => h.level === 2).length;
|
|
154
|
+
if (h2Count === 0 && wordCount > 100) {
|
|
155
|
+
// Only warn if there's enough content to warrant H2 headings
|
|
156
|
+
issues.push({
|
|
157
|
+
code: 'H2_MISSING',
|
|
158
|
+
severity: 'warning',
|
|
159
|
+
category: 'on-page',
|
|
160
|
+
title: 'No H2 headings found',
|
|
161
|
+
description: 'No H2 tags were found on the page. H2 headings help structure content and improve readability.',
|
|
162
|
+
impact: 'Without H2 headings, search engines and users may find it harder to understand content structure. H2s are important for featuring in search results.',
|
|
163
|
+
howToFix: 'Break your content into logical sections using H2 headings. Include relevant keywords naturally in your H2 tags.',
|
|
164
|
+
affectedUrls: [url],
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Check heading hierarchy
|
|
169
|
+
let previousLevel = 0;
|
|
170
|
+
for (const heading of headings) {
|
|
171
|
+
if (heading.level > previousLevel + 1 && previousLevel > 0) {
|
|
172
|
+
issues.push({
|
|
173
|
+
...ISSUE_DEFINITIONS.HEADING_SKIP,
|
|
174
|
+
affectedUrls: [url],
|
|
175
|
+
details: { from: previousLevel, to: heading.level },
|
|
176
|
+
});
|
|
177
|
+
break; // Only report once
|
|
178
|
+
}
|
|
179
|
+
previousLevel = heading.level;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// ==================== CONTENT CHECKS ====================
|
|
183
|
+
if (wordCount < 300) {
|
|
184
|
+
issues.push({
|
|
185
|
+
...ISSUE_DEFINITIONS.THIN_CONTENT,
|
|
186
|
+
affectedUrls: [url],
|
|
187
|
+
details: { wordCount },
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (textToHtmlRatio < 10) {
|
|
192
|
+
issues.push({
|
|
193
|
+
...ISSUE_DEFINITIONS.LOW_TEXT_HTML_RATIO,
|
|
194
|
+
affectedUrls: [url],
|
|
195
|
+
details: { ratio: textToHtmlRatio.toFixed(1) + '%' },
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// ==================== NOINDEX CHECK ====================
|
|
200
|
+
if (hasNoindex) {
|
|
201
|
+
issues.push({
|
|
202
|
+
...ISSUE_DEFINITIONS.NOINDEX_TAG,
|
|
203
|
+
affectedUrls: [url],
|
|
204
|
+
details: { metaRobots },
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return { issues, data };
|
|
209
|
+
}
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Page Resources Check (Page Objects Analysis)
|
|
3
|
+
*
|
|
4
|
+
* Analyzes the number and types of embedded resources on a page.
|
|
5
|
+
* Too many HTTP requests slow down page load significantly due to:
|
|
6
|
+
* - Connection overhead (DNS, TCP, TLS)
|
|
7
|
+
* - Browser connection limits per domain
|
|
8
|
+
* - Render blocking
|
|
9
|
+
*
|
|
10
|
+
* Best practices:
|
|
11
|
+
* - Keep total requests under 50 for optimal performance
|
|
12
|
+
* - Minimize third-party resources
|
|
13
|
+
* - Combine/bundle CSS and JS where possible
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import * as cheerio from 'cheerio';
|
|
17
|
+
import type { AuditIssue } from '../types.js';
|
|
18
|
+
|
|
19
|
+
export interface PageResourcesData {
|
|
20
|
+
total: number;
|
|
21
|
+
byType: {
|
|
22
|
+
stylesheets: number;
|
|
23
|
+
scripts: number;
|
|
24
|
+
images: number;
|
|
25
|
+
fonts: number;
|
|
26
|
+
iframes: number;
|
|
27
|
+
other: number;
|
|
28
|
+
};
|
|
29
|
+
byOrigin: {
|
|
30
|
+
firstParty: number;
|
|
31
|
+
thirdParty: number;
|
|
32
|
+
};
|
|
33
|
+
thirdPartyDomains: string[];
|
|
34
|
+
details: {
|
|
35
|
+
stylesheets: string[];
|
|
36
|
+
scripts: string[];
|
|
37
|
+
images: string[];
|
|
38
|
+
fonts: string[];
|
|
39
|
+
iframes: string[];
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Analyze page resources
|
|
45
|
+
*/
|
|
46
|
+
export function analyzePageResources(html: string, url: string): { issues: AuditIssue[]; data: PageResourcesData } {
|
|
47
|
+
const issues: AuditIssue[] = [];
|
|
48
|
+
const $ = cheerio.load(html);
|
|
49
|
+
const baseUrl = new URL(url);
|
|
50
|
+
const baseHostname = baseUrl.hostname;
|
|
51
|
+
|
|
52
|
+
const stylesheets: string[] = [];
|
|
53
|
+
const scripts: string[] = [];
|
|
54
|
+
const images: string[] = [];
|
|
55
|
+
const fonts: string[] = [];
|
|
56
|
+
const iframes: string[] = [];
|
|
57
|
+
const thirdPartyDomains = new Set<string>();
|
|
58
|
+
|
|
59
|
+
let firstParty = 0;
|
|
60
|
+
let thirdParty = 0;
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Check if URL is first-party
|
|
64
|
+
*/
|
|
65
|
+
function isFirstParty(resourceUrl: string): boolean {
|
|
66
|
+
try {
|
|
67
|
+
const resourceHostname = new URL(resourceUrl, url).hostname;
|
|
68
|
+
// Consider same domain or subdomains as first-party
|
|
69
|
+
return resourceHostname === baseHostname || resourceHostname.endsWith(`.${baseHostname}`);
|
|
70
|
+
} catch {
|
|
71
|
+
return true; // Relative URLs are first-party
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Track resource origin
|
|
77
|
+
*/
|
|
78
|
+
function trackOrigin(resourceUrl: string): void {
|
|
79
|
+
if (isFirstParty(resourceUrl)) {
|
|
80
|
+
firstParty++;
|
|
81
|
+
} else {
|
|
82
|
+
thirdParty++;
|
|
83
|
+
try {
|
|
84
|
+
const domain = new URL(resourceUrl, url).hostname;
|
|
85
|
+
thirdPartyDomains.add(domain);
|
|
86
|
+
} catch {
|
|
87
|
+
// Invalid URL
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Extract stylesheets
|
|
93
|
+
$('link[rel="stylesheet"][href]').each((_, el) => {
|
|
94
|
+
const href = $(el).attr('href');
|
|
95
|
+
if (href && !href.startsWith('data:')) {
|
|
96
|
+
stylesheets.push(href);
|
|
97
|
+
trackOrigin(href);
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// Extract scripts
|
|
102
|
+
$('script[src]').each((_, el) => {
|
|
103
|
+
const src = $(el).attr('src');
|
|
104
|
+
if (src && !src.startsWith('data:')) {
|
|
105
|
+
scripts.push(src);
|
|
106
|
+
trackOrigin(src);
|
|
107
|
+
}
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
// Extract images
|
|
111
|
+
$('img[src]').each((_, el) => {
|
|
112
|
+
const src = $(el).attr('src');
|
|
113
|
+
if (src && !src.startsWith('data:')) {
|
|
114
|
+
images.push(src);
|
|
115
|
+
trackOrigin(src);
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
// Also check srcset for responsive images
|
|
120
|
+
$('img[srcset], source[srcset]').each((_, el) => {
|
|
121
|
+
const srcset = $(el).attr('srcset');
|
|
122
|
+
if (srcset) {
|
|
123
|
+
// Parse srcset - format: "url1 1x, url2 2x" or "url1 300w, url2 600w"
|
|
124
|
+
const urls = srcset.split(',').map((s) => s.trim().split(/\s+/)[0]);
|
|
125
|
+
for (const srcUrl of urls) {
|
|
126
|
+
if (srcUrl && !srcUrl.startsWith('data:') && !images.includes(srcUrl)) {
|
|
127
|
+
images.push(srcUrl);
|
|
128
|
+
trackOrigin(srcUrl);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
// Extract fonts (preloaded or linked)
|
|
135
|
+
$('link[rel="preload"][as="font"], link[href*=".woff"], link[href*=".woff2"], link[href*=".ttf"], link[href*=".otf"]').each(
|
|
136
|
+
(_, el) => {
|
|
137
|
+
const href = $(el).attr('href');
|
|
138
|
+
if (href && !href.startsWith('data:')) {
|
|
139
|
+
fonts.push(href);
|
|
140
|
+
trackOrigin(href);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
);
|
|
144
|
+
|
|
145
|
+
// Extract iframes
|
|
146
|
+
$('iframe[src]').each((_, el) => {
|
|
147
|
+
const src = $(el).attr('src');
|
|
148
|
+
if (src && !src.startsWith('about:') && !src.startsWith('javascript:')) {
|
|
149
|
+
iframes.push(src);
|
|
150
|
+
trackOrigin(src);
|
|
151
|
+
}
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
// Calculate other resources (preloads, prefetches that aren't already counted)
|
|
155
|
+
let other = 0;
|
|
156
|
+
$('link[rel="preload"], link[rel="prefetch"], link[rel="modulepreload"]').each((_, el) => {
|
|
157
|
+
const href = $(el).attr('href');
|
|
158
|
+
const as = $(el).attr('as');
|
|
159
|
+
if (href && !href.startsWith('data:')) {
|
|
160
|
+
// Don't double-count fonts and stylesheets
|
|
161
|
+
if (as !== 'font' && as !== 'style' && !stylesheets.includes(href) && !fonts.includes(href)) {
|
|
162
|
+
other++;
|
|
163
|
+
trackOrigin(href);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
const total = stylesheets.length + scripts.length + images.length + fonts.length + iframes.length + other;
|
|
169
|
+
|
|
170
|
+
// Generate issues
|
|
171
|
+
if (total > 100) {
|
|
172
|
+
issues.push({
|
|
173
|
+
code: 'PAGE_RESOURCES_EXCESSIVE',
|
|
174
|
+
severity: 'error',
|
|
175
|
+
category: 'performance',
|
|
176
|
+
title: 'Excessive number of page resources',
|
|
177
|
+
description: `Page requests ${total} resources. This significantly impacts load time due to connection overhead and browser limits.`,
|
|
178
|
+
impact:
|
|
179
|
+
'Too many HTTP requests cause slow page loads, poor Core Web Vitals, and high bounce rates. Each request has DNS, TCP, and TLS overhead.',
|
|
180
|
+
howToFix:
|
|
181
|
+
'Combine CSS/JS files, use image sprites or inline small images, lazy-load non-critical resources, remove unused dependencies.',
|
|
182
|
+
affectedUrls: [url],
|
|
183
|
+
details: {
|
|
184
|
+
total,
|
|
185
|
+
breakdown: {
|
|
186
|
+
stylesheets: stylesheets.length,
|
|
187
|
+
scripts: scripts.length,
|
|
188
|
+
images: images.length,
|
|
189
|
+
fonts: fonts.length,
|
|
190
|
+
iframes: iframes.length,
|
|
191
|
+
other,
|
|
192
|
+
},
|
|
193
|
+
recommendation: 'Aim for under 50 total requests for optimal performance.',
|
|
194
|
+
},
|
|
195
|
+
});
|
|
196
|
+
} else if (total > 50) {
|
|
197
|
+
issues.push({
|
|
198
|
+
code: 'PAGE_RESOURCES_HIGH',
|
|
199
|
+
severity: 'warning',
|
|
200
|
+
category: 'performance',
|
|
201
|
+
title: 'High number of page resources',
|
|
202
|
+
description: `Page requests ${total} resources. Consider reducing for better performance.`,
|
|
203
|
+
impact: 'Many HTTP requests increase page load time, especially on mobile networks.',
|
|
204
|
+
howToFix: 'Bundle CSS/JS files, lazy-load images below the fold, consider critical CSS inlining.',
|
|
205
|
+
affectedUrls: [url],
|
|
206
|
+
details: {
|
|
207
|
+
total,
|
|
208
|
+
breakdown: {
|
|
209
|
+
stylesheets: stylesheets.length,
|
|
210
|
+
scripts: scripts.length,
|
|
211
|
+
images: images.length,
|
|
212
|
+
fonts: fonts.length,
|
|
213
|
+
iframes: iframes.length,
|
|
214
|
+
other,
|
|
215
|
+
},
|
|
216
|
+
},
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Check third-party resources
|
|
221
|
+
if (thirdParty > 20) {
|
|
222
|
+
issues.push({
|
|
223
|
+
code: 'PAGE_RESOURCES_THIRD_PARTY_HIGH',
|
|
224
|
+
severity: 'warning',
|
|
225
|
+
category: 'performance',
|
|
226
|
+
title: 'Many third-party resources',
|
|
227
|
+
description: `Page loads ${thirdParty} resources from ${thirdPartyDomains.size} third-party domains.`,
|
|
228
|
+
impact:
|
|
229
|
+
'Third-party resources are outside your control and add latency. They can also be a privacy/security concern.',
|
|
230
|
+
howToFix:
|
|
231
|
+
'Self-host critical third-party resources when possible. Use preconnect hints for remaining third-party origins.',
|
|
232
|
+
affectedUrls: [url],
|
|
233
|
+
details: {
|
|
234
|
+
thirdPartyCount: thirdParty,
|
|
235
|
+
domains: Array.from(thirdPartyDomains).slice(0, 10),
|
|
236
|
+
},
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Check for too many iframes
|
|
241
|
+
if (iframes.length > 3) {
|
|
242
|
+
issues.push({
|
|
243
|
+
code: 'PAGE_RESOURCES_MANY_IFRAMES',
|
|
244
|
+
severity: 'warning',
|
|
245
|
+
category: 'performance',
|
|
246
|
+
title: 'Multiple iframes detected',
|
|
247
|
+
description: `Page contains ${iframes.length} iframes. Each iframe loads its own document and resources.`,
|
|
248
|
+
impact: 'Iframes significantly increase page weight and can block the main thread.',
|
|
249
|
+
howToFix:
|
|
250
|
+
'Lazy-load iframes that are below the fold. Consider native embeds or facade patterns for video/widget iframes.',
|
|
251
|
+
affectedUrls: [url],
|
|
252
|
+
details: {
|
|
253
|
+
iframeCount: iframes.length,
|
|
254
|
+
iframes: iframes.slice(0, 5),
|
|
255
|
+
},
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
return {
|
|
260
|
+
issues,
|
|
261
|
+
data: {
|
|
262
|
+
total,
|
|
263
|
+
byType: {
|
|
264
|
+
stylesheets: stylesheets.length,
|
|
265
|
+
scripts: scripts.length,
|
|
266
|
+
images: images.length,
|
|
267
|
+
fonts: fonts.length,
|
|
268
|
+
iframes: iframes.length,
|
|
269
|
+
other,
|
|
270
|
+
},
|
|
271
|
+
byOrigin: {
|
|
272
|
+
firstParty,
|
|
273
|
+
thirdParty,
|
|
274
|
+
},
|
|
275
|
+
thirdPartyDomains: Array.from(thirdPartyDomains),
|
|
276
|
+
details: {
|
|
277
|
+
stylesheets,
|
|
278
|
+
scripts,
|
|
279
|
+
images: images.slice(0, 20), // Limit for data size
|
|
280
|
+
fonts,
|
|
281
|
+
iframes,
|
|
282
|
+
},
|
|
283
|
+
},
|
|
284
|
+
};
|
|
285
|
+
}
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
import type { AuditIssue } from '../types.js';
|
|
3
|
+
import { ISSUE_DEFINITIONS } from '../types.js';
|
|
4
|
+
|
|
5
|
+
export interface PaginationData {
|
|
6
|
+
hasPagination: boolean;
|
|
7
|
+
hasHtmlPaginationLinks: boolean;
|
|
8
|
+
hasInfiniteScroll: boolean;
|
|
9
|
+
hasLoadMoreButton: boolean;
|
|
10
|
+
paginationLinks: string[];
|
|
11
|
+
relNextPrev: {
|
|
12
|
+
hasNext: boolean;
|
|
13
|
+
hasPrev: boolean;
|
|
14
|
+
nextUrl?: string;
|
|
15
|
+
prevUrl?: string;
|
|
16
|
+
};
|
|
17
|
+
currentPage?: number;
|
|
18
|
+
totalPages?: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function analyzePagination(
|
|
22
|
+
html: string,
|
|
23
|
+
url: string,
|
|
24
|
+
canonical?: string
|
|
25
|
+
): { issues: AuditIssue[]; data: PaginationData } {
|
|
26
|
+
const issues: AuditIssue[] = [];
|
|
27
|
+
const $ = cheerio.load(html);
|
|
28
|
+
|
|
29
|
+
// Check for rel="next" and rel="prev"
|
|
30
|
+
const nextLink = $('link[rel="next"]').attr('href');
|
|
31
|
+
const prevLink = $('link[rel="prev"]').attr('href');
|
|
32
|
+
|
|
33
|
+
// Detect pagination patterns in links
|
|
34
|
+
const paginationLinks: string[] = [];
|
|
35
|
+
const paginationPatterns = [
|
|
36
|
+
/[?&]page=\d+/,
|
|
37
|
+
/[?&]p=\d+/,
|
|
38
|
+
/\/page\/\d+/,
|
|
39
|
+
/\/p\/\d+/,
|
|
40
|
+
/-page-\d+/,
|
|
41
|
+
/\/\d+\/?$/,
|
|
42
|
+
];
|
|
43
|
+
|
|
44
|
+
$('a[href]').each((_, el) => {
|
|
45
|
+
const href = $(el).attr('href') || '';
|
|
46
|
+
for (const pattern of paginationPatterns) {
|
|
47
|
+
if (pattern.test(href)) {
|
|
48
|
+
try {
|
|
49
|
+
const fullUrl = new URL(href, url).href;
|
|
50
|
+
if (!paginationLinks.includes(fullUrl)) {
|
|
51
|
+
paginationLinks.push(fullUrl);
|
|
52
|
+
}
|
|
53
|
+
} catch {
|
|
54
|
+
// Invalid URL, skip
|
|
55
|
+
}
|
|
56
|
+
break;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
// Detect infinite scroll patterns
|
|
62
|
+
const hasInfiniteScrollScript = html.includes('infinite-scroll') ||
|
|
63
|
+
html.includes('infiniteScroll') ||
|
|
64
|
+
html.includes('infinite_scroll') ||
|
|
65
|
+
$('[data-infinite-scroll]').length > 0 ||
|
|
66
|
+
$('[data-infinite]').length > 0 ||
|
|
67
|
+
$('.infinite-scroll').length > 0;
|
|
68
|
+
|
|
69
|
+
// Detect "load more" buttons
|
|
70
|
+
const loadMoreSelectors = [
|
|
71
|
+
'button:contains("Load More")',
|
|
72
|
+
'button:contains("Load more")',
|
|
73
|
+
'button:contains("Show More")',
|
|
74
|
+
'button:contains("Show more")',
|
|
75
|
+
'a:contains("Load More")',
|
|
76
|
+
'a:contains("Load more")',
|
|
77
|
+
'[class*="load-more"]',
|
|
78
|
+
'[class*="loadmore"]',
|
|
79
|
+
'[data-load-more]',
|
|
80
|
+
];
|
|
81
|
+
|
|
82
|
+
let hasLoadMoreButton = false;
|
|
83
|
+
for (const selector of loadMoreSelectors) {
|
|
84
|
+
try {
|
|
85
|
+
if ($(selector).length > 0) {
|
|
86
|
+
hasLoadMoreButton = true;
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
} catch {
|
|
90
|
+
// Invalid selector, skip
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Try to detect current page number
|
|
95
|
+
let currentPage: number | undefined;
|
|
96
|
+
let totalPages: number | undefined;
|
|
97
|
+
|
|
98
|
+
// Check URL for page number
|
|
99
|
+
const pageMatch = url.match(/[?&]page=(\d+)|\/page\/(\d+)|[?&]p=(\d+)/);
|
|
100
|
+
if (pageMatch) {
|
|
101
|
+
currentPage = parseInt(pageMatch[1] || pageMatch[2] || pageMatch[3], 10);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Check for "Page X of Y" pattern
|
|
105
|
+
const pageOfPattern = /page\s*(\d+)\s*of\s*(\d+)/i;
|
|
106
|
+
const pageOfMatch = $('body').text().match(pageOfPattern);
|
|
107
|
+
if (pageOfMatch) {
|
|
108
|
+
currentPage = parseInt(pageOfMatch[1], 10);
|
|
109
|
+
totalPages = parseInt(pageOfMatch[2], 10);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const hasPagination = paginationLinks.length > 0 ||
|
|
113
|
+
nextLink !== undefined ||
|
|
114
|
+
prevLink !== undefined ||
|
|
115
|
+
currentPage !== undefined;
|
|
116
|
+
|
|
117
|
+
const hasHtmlPaginationLinks = paginationLinks.length > 0;
|
|
118
|
+
|
|
119
|
+
const data: PaginationData = {
|
|
120
|
+
hasPagination,
|
|
121
|
+
hasHtmlPaginationLinks,
|
|
122
|
+
hasInfiniteScroll: hasInfiniteScrollScript,
|
|
123
|
+
hasLoadMoreButton,
|
|
124
|
+
paginationLinks,
|
|
125
|
+
relNextPrev: {
|
|
126
|
+
hasNext: !!nextLink,
|
|
127
|
+
hasPrev: !!prevLink,
|
|
128
|
+
nextUrl: nextLink,
|
|
129
|
+
prevUrl: prevLink,
|
|
130
|
+
},
|
|
131
|
+
currentPage,
|
|
132
|
+
totalPages,
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
// ==================== Issue Detection ====================
|
|
136
|
+
|
|
137
|
+
// Infinite scroll without HTML fallback links
|
|
138
|
+
if (hasInfiniteScrollScript && !hasHtmlPaginationLinks) {
|
|
139
|
+
issues.push({
|
|
140
|
+
...ISSUE_DEFINITIONS.INFINITE_SCROLL_NO_FALLBACK,
|
|
141
|
+
affectedUrls: [url],
|
|
142
|
+
details: {
|
|
143
|
+
hasLoadMore: hasLoadMoreButton,
|
|
144
|
+
suggestion: 'Add <a href> pagination links alongside infinite scroll for SEO',
|
|
145
|
+
},
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Load more without HTML links
|
|
150
|
+
if (hasLoadMoreButton && !hasHtmlPaginationLinks && !hasInfiniteScrollScript) {
|
|
151
|
+
issues.push({
|
|
152
|
+
...ISSUE_DEFINITIONS.PAGINATION_NO_LINKS,
|
|
153
|
+
affectedUrls: [url],
|
|
154
|
+
details: {
|
|
155
|
+
suggestion: 'Ensure "Load More" button also has underlying <a> links',
|
|
156
|
+
},
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Check for improper canonical on paginated pages
|
|
161
|
+
if (hasPagination && currentPage && currentPage > 1 && canonical) {
|
|
162
|
+
// Check if canonical points to page 1
|
|
163
|
+
const isCanonicalToPage1 = !paginationPatterns.some(p => p.test(canonical));
|
|
164
|
+
const urlHasPageNumber = paginationPatterns.some(p => p.test(url));
|
|
165
|
+
|
|
166
|
+
if (isCanonicalToPage1 && urlHasPageNumber) {
|
|
167
|
+
issues.push({
|
|
168
|
+
...ISSUE_DEFINITIONS.PAGINATION_CANONICAL_ISSUE,
|
|
169
|
+
affectedUrls: [url],
|
|
170
|
+
details: {
|
|
171
|
+
currentPage,
|
|
172
|
+
canonical,
|
|
173
|
+
suggestion: 'Paginated pages should self-reference or point to a View All page',
|
|
174
|
+
},
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return { issues, data };
|
|
180
|
+
}
|