@rankcli/agent-runtime 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/README.md +242 -0
  2. package/dist/analyzer-2CSWIQGD.mjs +6 -0
  3. package/dist/chunk-YNZYHEYM.mjs +774 -0
  4. package/dist/index.d.mts +4012 -0
  5. package/dist/index.d.ts +4012 -0
  6. package/dist/index.js +29672 -0
  7. package/dist/index.mjs +28602 -0
  8. package/package.json +53 -0
  9. package/scripts/build-deno.ts +134 -0
  10. package/src/audit/ai/analyzer.ts +347 -0
  11. package/src/audit/ai/index.ts +29 -0
  12. package/src/audit/ai/prompts/content-analysis.ts +271 -0
  13. package/src/audit/ai/types.ts +179 -0
  14. package/src/audit/checks/additional-checks.ts +439 -0
  15. package/src/audit/checks/ai-citation-worthiness.ts +399 -0
  16. package/src/audit/checks/ai-content-structure.ts +325 -0
  17. package/src/audit/checks/ai-readiness.ts +339 -0
  18. package/src/audit/checks/anchor-text.ts +179 -0
  19. package/src/audit/checks/answer-conciseness.ts +322 -0
  20. package/src/audit/checks/asset-minification.ts +270 -0
  21. package/src/audit/checks/bing-optimization.ts +206 -0
  22. package/src/audit/checks/brand-mention-optimization.ts +349 -0
  23. package/src/audit/checks/caching-headers.ts +305 -0
  24. package/src/audit/checks/canonical-advanced.ts +150 -0
  25. package/src/audit/checks/canonical-domain.ts +196 -0
  26. package/src/audit/checks/citation-quality.ts +358 -0
  27. package/src/audit/checks/client-rendering.ts +542 -0
  28. package/src/audit/checks/color-contrast.ts +342 -0
  29. package/src/audit/checks/content-freshness.ts +170 -0
  30. package/src/audit/checks/content-science.ts +589 -0
  31. package/src/audit/checks/conversion-elements.ts +526 -0
  32. package/src/audit/checks/crawlability.ts +220 -0
  33. package/src/audit/checks/directory-listing.ts +172 -0
  34. package/src/audit/checks/dom-analysis.ts +191 -0
  35. package/src/audit/checks/dom-size.ts +246 -0
  36. package/src/audit/checks/duplicate-content.ts +194 -0
  37. package/src/audit/checks/eeat-signals.ts +990 -0
  38. package/src/audit/checks/entity-seo.ts +396 -0
  39. package/src/audit/checks/featured-snippet.ts +473 -0
  40. package/src/audit/checks/freshness-signals.ts +443 -0
  41. package/src/audit/checks/funnel-intent.ts +463 -0
  42. package/src/audit/checks/hreflang.ts +174 -0
  43. package/src/audit/checks/html-compliance.ts +302 -0
  44. package/src/audit/checks/image-dimensions.ts +167 -0
  45. package/src/audit/checks/images.ts +160 -0
  46. package/src/audit/checks/indexnow.ts +275 -0
  47. package/src/audit/checks/interactive-tools.ts +475 -0
  48. package/src/audit/checks/internal-link-graph.ts +436 -0
  49. package/src/audit/checks/keyword-analysis.ts +239 -0
  50. package/src/audit/checks/keyword-cannibalization.ts +385 -0
  51. package/src/audit/checks/keyword-placement.ts +471 -0
  52. package/src/audit/checks/links.ts +203 -0
  53. package/src/audit/checks/llms-txt.ts +224 -0
  54. package/src/audit/checks/local-seo.ts +296 -0
  55. package/src/audit/checks/mobile.ts +167 -0
  56. package/src/audit/checks/modern-images.ts +226 -0
  57. package/src/audit/checks/navboost-signals.ts +395 -0
  58. package/src/audit/checks/on-page.ts +209 -0
  59. package/src/audit/checks/page-resources.ts +285 -0
  60. package/src/audit/checks/pagination.ts +180 -0
  61. package/src/audit/checks/performance.ts +153 -0
  62. package/src/audit/checks/platform-presence.ts +580 -0
  63. package/src/audit/checks/redirect-analysis.ts +153 -0
  64. package/src/audit/checks/redirect-chain.ts +389 -0
  65. package/src/audit/checks/resource-hints.ts +420 -0
  66. package/src/audit/checks/responsive-css.ts +247 -0
  67. package/src/audit/checks/responsive-images.ts +396 -0
  68. package/src/audit/checks/review-ecosystem.ts +415 -0
  69. package/src/audit/checks/robots-validation.ts +373 -0
  70. package/src/audit/checks/security-headers.ts +172 -0
  71. package/src/audit/checks/security.ts +144 -0
  72. package/src/audit/checks/serp-preview.ts +251 -0
  73. package/src/audit/checks/site-maturity.ts +444 -0
  74. package/src/audit/checks/social-meta.test.ts +275 -0
  75. package/src/audit/checks/social-meta.ts +134 -0
  76. package/src/audit/checks/soft-404.ts +151 -0
  77. package/src/audit/checks/structured-data.ts +238 -0
  78. package/src/audit/checks/tech-detection.ts +496 -0
  79. package/src/audit/checks/topical-clusters.ts +435 -0
  80. package/src/audit/checks/tracker-bloat.ts +462 -0
  81. package/src/audit/checks/tracking-verification.test.ts +371 -0
  82. package/src/audit/checks/tracking-verification.ts +636 -0
  83. package/src/audit/checks/url-safety.ts +682 -0
  84. package/src/audit/deno-entry.ts +66 -0
  85. package/src/audit/discovery/index.ts +15 -0
  86. package/src/audit/discovery/link-crawler.ts +232 -0
  87. package/src/audit/discovery/repo-routes.ts +347 -0
  88. package/src/audit/engine.ts +620 -0
  89. package/src/audit/fixes/index.ts +209 -0
  90. package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
  91. package/src/audit/fixes/social-meta-fixes.ts +463 -0
  92. package/src/audit/index.ts +74 -0
  93. package/src/audit/runner.test.ts +299 -0
  94. package/src/audit/runner.ts +130 -0
  95. package/src/audit/types.ts +1953 -0
  96. package/src/content/featured-snippet.ts +367 -0
  97. package/src/content/generator.test.ts +534 -0
  98. package/src/content/generator.ts +501 -0
  99. package/src/content/headline.ts +317 -0
  100. package/src/content/index.ts +62 -0
  101. package/src/content/intent.ts +258 -0
  102. package/src/content/keyword-density.ts +349 -0
  103. package/src/content/readability.ts +262 -0
  104. package/src/executor.ts +336 -0
  105. package/src/fixer.ts +416 -0
  106. package/src/frameworks/detector.test.ts +248 -0
  107. package/src/frameworks/detector.ts +371 -0
  108. package/src/frameworks/index.ts +68 -0
  109. package/src/frameworks/recipes/angular.yaml +171 -0
  110. package/src/frameworks/recipes/astro.yaml +206 -0
  111. package/src/frameworks/recipes/django.yaml +180 -0
  112. package/src/frameworks/recipes/laravel.yaml +137 -0
  113. package/src/frameworks/recipes/nextjs.yaml +268 -0
  114. package/src/frameworks/recipes/nuxt.yaml +175 -0
  115. package/src/frameworks/recipes/rails.yaml +188 -0
  116. package/src/frameworks/recipes/react.yaml +202 -0
  117. package/src/frameworks/recipes/sveltekit.yaml +154 -0
  118. package/src/frameworks/recipes/vue.yaml +137 -0
  119. package/src/frameworks/recipes/wordpress.yaml +209 -0
  120. package/src/frameworks/suggestion-engine.ts +320 -0
  121. package/src/geo/geo-content.test.ts +305 -0
  122. package/src/geo/geo-content.ts +266 -0
  123. package/src/geo/geo-history.test.ts +473 -0
  124. package/src/geo/geo-history.ts +433 -0
  125. package/src/geo/geo-tracker.test.ts +359 -0
  126. package/src/geo/geo-tracker.ts +411 -0
  127. package/src/geo/index.ts +10 -0
  128. package/src/git/commit-helper.test.ts +261 -0
  129. package/src/git/commit-helper.ts +329 -0
  130. package/src/git/index.ts +12 -0
  131. package/src/git/pr-helper.test.ts +284 -0
  132. package/src/git/pr-helper.ts +307 -0
  133. package/src/index.ts +66 -0
  134. package/src/keywords/ai-keyword-engine.ts +1062 -0
  135. package/src/keywords/ai-summarizer.ts +387 -0
  136. package/src/keywords/ci-mode.ts +555 -0
  137. package/src/keywords/engine.ts +359 -0
  138. package/src/keywords/index.ts +151 -0
  139. package/src/keywords/llm-judge.ts +357 -0
  140. package/src/keywords/nlp-analysis.ts +706 -0
  141. package/src/keywords/prioritizer.ts +295 -0
  142. package/src/keywords/site-crawler.ts +342 -0
  143. package/src/keywords/sources/autocomplete.ts +139 -0
  144. package/src/keywords/sources/competitive-search.ts +450 -0
  145. package/src/keywords/sources/competitor-analysis.ts +374 -0
  146. package/src/keywords/sources/dataforseo.ts +206 -0
  147. package/src/keywords/sources/free-sources.ts +294 -0
  148. package/src/keywords/sources/gsc.ts +123 -0
  149. package/src/keywords/topic-grouping.ts +327 -0
  150. package/src/keywords/types.ts +144 -0
  151. package/src/keywords/wizard.ts +457 -0
  152. package/src/loader.ts +40 -0
  153. package/src/reports/index.ts +7 -0
  154. package/src/reports/report-generator.test.ts +293 -0
  155. package/src/reports/report-generator.ts +713 -0
  156. package/src/scheduler/alerts.test.ts +458 -0
  157. package/src/scheduler/alerts.ts +328 -0
  158. package/src/scheduler/index.ts +8 -0
  159. package/src/scheduler/scheduled-audit.test.ts +377 -0
  160. package/src/scheduler/scheduled-audit.ts +149 -0
  161. package/src/test/integration-test.ts +325 -0
  162. package/src/tools/analyzer.ts +373 -0
  163. package/src/tools/crawl.ts +293 -0
  164. package/src/tools/files.ts +301 -0
  165. package/src/tools/h1-fixer.ts +249 -0
  166. package/src/tools/index.ts +67 -0
  167. package/src/tracking/github-action.ts +326 -0
  168. package/src/tracking/google-analytics.ts +265 -0
  169. package/src/tracking/index.ts +45 -0
  170. package/src/tracking/report-generator.ts +386 -0
  171. package/src/tracking/search-console.ts +335 -0
  172. package/src/types.ts +134 -0
  173. package/src/utils/http.ts +302 -0
  174. package/src/wasm-adapter.ts +297 -0
  175. package/src/wasm-entry.ts +14 -0
  176. package/tsconfig.json +17 -0
  177. package/tsup.wasm.config.ts +26 -0
  178. package/vitest.config.ts +15 -0
@@ -0,0 +1,275 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { analyzeSocialMeta } from './social-meta.js';
3
+
4
+ describe('analyzeSocialMeta', () => {
5
+ const testUrl = 'https://example.com';
6
+
7
+ describe('Open Graph analysis', () => {
8
+ it('detects all Open Graph tags when present', () => {
9
+ const html = `
10
+ <html>
11
+ <head>
12
+ <meta property="og:title" content="Test Title" />
13
+ <meta property="og:description" content="Test Description" />
14
+ <meta property="og:image" content="https://example.com/og.png" />
15
+ <meta property="og:url" content="https://example.com" />
16
+ <meta property="og:type" content="website" />
17
+ <meta property="og:site_name" content="Test Site" />
18
+ </head>
19
+ </html>
20
+ `;
21
+
22
+ const { data, issues } = analyzeSocialMeta(html, testUrl);
23
+
24
+ expect(data.openGraph.title).toBe('Test Title');
25
+ expect(data.openGraph.description).toBe('Test Description');
26
+ expect(data.openGraph.image).toBe('https://example.com/og.png');
27
+ expect(data.openGraph.url).toBe('https://example.com');
28
+ expect(data.openGraph.type).toBe('website');
29
+ expect(data.openGraph.siteName).toBe('Test Site');
30
+
31
+ // Should not have OG-related issues
32
+ const ogIssues = issues.filter((i) => i.code.startsWith('OG_'));
33
+ expect(ogIssues).toHaveLength(0);
34
+ });
35
+
36
+ it('reports missing og:title', () => {
37
+ const html = `<html><head></head></html>`;
38
+
39
+ const { issues } = analyzeSocialMeta(html, testUrl);
40
+
41
+ const issue = issues.find((i) => i.code === 'OG_TITLE_MISSING');
42
+ expect(issue).toBeDefined();
43
+ expect(issue?.severity).toBe('warning');
44
+ expect(issue?.affectedUrls).toContain(testUrl);
45
+ });
46
+
47
+ it('reports missing og:description', () => {
48
+ const html = `<html><head></head></html>`;
49
+
50
+ const { issues } = analyzeSocialMeta(html, testUrl);
51
+
52
+ const issue = issues.find((i) => i.code === 'OG_DESCRIPTION_MISSING');
53
+ expect(issue).toBeDefined();
54
+ });
55
+
56
+ it('reports missing og:image', () => {
57
+ const html = `<html><head></head></html>`;
58
+
59
+ const { issues } = analyzeSocialMeta(html, testUrl);
60
+
61
+ const issue = issues.find((i) => i.code === 'OG_IMAGE_MISSING');
62
+ expect(issue).toBeDefined();
63
+ });
64
+
65
+ it('reports missing og:url', () => {
66
+ const html = `<html><head></head></html>`;
67
+
68
+ const { issues } = analyzeSocialMeta(html, testUrl);
69
+
70
+ const issue = issues.find((i) => i.code === 'OG_URL_MISSING');
71
+ expect(issue).toBeDefined();
72
+ });
73
+
74
+ it('reports missing og:type', () => {
75
+ const html = `<html><head></head></html>`;
76
+
77
+ const { issues } = analyzeSocialMeta(html, testUrl);
78
+
79
+ const issue = issues.find((i) => i.code === 'OG_TYPE_MISSING');
80
+ expect(issue).toBeDefined();
81
+ });
82
+ });
83
+
84
+ describe('Twitter Card analysis', () => {
85
+ it('detects all Twitter Card tags when present', () => {
86
+ const html = `
87
+ <html>
88
+ <head>
89
+ <meta name="twitter:card" content="summary_large_image" />
90
+ <meta name="twitter:title" content="Twitter Title" />
91
+ <meta name="twitter:description" content="Twitter Description" />
92
+ <meta name="twitter:image" content="https://example.com/twitter.png" />
93
+ <meta name="twitter:site" content="@example" />
94
+ <meta name="twitter:creator" content="@author" />
95
+ </head>
96
+ </html>
97
+ `;
98
+
99
+ const { data, issues } = analyzeSocialMeta(html, testUrl);
100
+
101
+ expect(data.twitter.card).toBe('summary_large_image');
102
+ expect(data.twitter.title).toBe('Twitter Title');
103
+ expect(data.twitter.description).toBe('Twitter Description');
104
+ expect(data.twitter.image).toBe('https://example.com/twitter.png');
105
+ expect(data.twitter.site).toBe('@example');
106
+ expect(data.twitter.creator).toBe('@author');
107
+
108
+ // Should not have Twitter-related issues
109
+ const twitterIssues = issues.filter((i) => i.code.startsWith('TWITTER_'));
110
+ expect(twitterIssues).toHaveLength(0);
111
+ });
112
+
113
+ it('reports missing twitter:card', () => {
114
+ const html = `<html><head></head></html>`;
115
+
116
+ const { issues } = analyzeSocialMeta(html, testUrl);
117
+
118
+ const issue = issues.find((i) => i.code === 'TWITTER_CARD_MISSING');
119
+ expect(issue).toBeDefined();
120
+ // Twitter card is a notice since it's less critical than OG tags
121
+ expect(issue?.severity).toBe('notice');
122
+ });
123
+
124
+ it('does not report missing twitter:image if og:image exists', () => {
125
+ const html = `
126
+ <html>
127
+ <head>
128
+ <meta property="og:image" content="https://example.com/og.png" />
129
+ </head>
130
+ </html>
131
+ `;
132
+
133
+ const { issues } = analyzeSocialMeta(html, testUrl);
134
+
135
+ const issue = issues.find((i) => i.code === 'TWITTER_IMAGE_MISSING');
136
+ expect(issue).toBeUndefined();
137
+ });
138
+
139
+ it('reports missing twitter:image if no og:image', () => {
140
+ const html = `<html><head></head></html>`;
141
+
142
+ const { issues } = analyzeSocialMeta(html, testUrl);
143
+
144
+ const issue = issues.find((i) => i.code === 'TWITTER_IMAGE_MISSING');
145
+ expect(issue).toBeDefined();
146
+ });
147
+ });
148
+
149
+ describe('Favicon analysis', () => {
150
+ it('detects standard favicon', () => {
151
+ const html = `
152
+ <html>
153
+ <head>
154
+ <link rel="icon" href="/favicon.ico" />
155
+ </head>
156
+ </html>
157
+ `;
158
+
159
+ const { data, issues } = analyzeSocialMeta(html, testUrl);
160
+
161
+ expect(data.hasFavicon).toBe(true);
162
+ const issue = issues.find((i) => i.code === 'FAVICON_MISSING');
163
+ expect(issue).toBeUndefined();
164
+ });
165
+
166
+ it('detects shortcut icon', () => {
167
+ const html = `
168
+ <html>
169
+ <head>
170
+ <link rel="shortcut icon" href="/favicon.ico" />
171
+ </head>
172
+ </html>
173
+ `;
174
+
175
+ const { data } = analyzeSocialMeta(html, testUrl);
176
+ expect(data.hasFavicon).toBe(true);
177
+ });
178
+
179
+ it('detects apple-touch-icon', () => {
180
+ const html = `
181
+ <html>
182
+ <head>
183
+ <link rel="apple-touch-icon" href="/apple-icon.png" />
184
+ </head>
185
+ </html>
186
+ `;
187
+
188
+ const { data } = analyzeSocialMeta(html, testUrl);
189
+ expect(data.hasFavicon).toBe(true);
190
+ });
191
+
192
+ it('reports missing favicon', () => {
193
+ const html = `<html><head></head></html>`;
194
+
195
+ const { data, issues } = analyzeSocialMeta(html, testUrl);
196
+
197
+ expect(data.hasFavicon).toBe(false);
198
+ const issue = issues.find((i) => i.code === 'FAVICON_MISSING');
199
+ expect(issue).toBeDefined();
200
+ });
201
+ });
202
+
203
+ describe('Lang attribute analysis', () => {
204
+ it('detects lang attribute', () => {
205
+ const html = `<html lang="en"><head></head></html>`;
206
+
207
+ const { data, issues } = analyzeSocialMeta(html, testUrl);
208
+
209
+ expect(data.langAttribute).toBe('en');
210
+ const issue = issues.find((i) => i.code === 'LANG_ATTR_MISSING');
211
+ expect(issue).toBeUndefined();
212
+ });
213
+
214
+ it('reports missing lang attribute', () => {
215
+ const html = `<html><head></head></html>`;
216
+
217
+ const { data, issues } = analyzeSocialMeta(html, testUrl);
218
+
219
+ expect(data.langAttribute).toBeUndefined();
220
+ const issue = issues.find((i) => i.code === 'LANG_ATTR_MISSING');
221
+ expect(issue).toBeDefined();
222
+ });
223
+
224
+ it('handles different lang formats', () => {
225
+ const html = `<html lang="en-US"><head></head></html>`;
226
+
227
+ const { data } = analyzeSocialMeta(html, testUrl);
228
+ expect(data.langAttribute).toBe('en-US');
229
+ });
230
+ });
231
+
232
+ describe('Complete HTML analysis', () => {
233
+ it('analyzes fully compliant HTML', () => {
234
+ const html = `
235
+ <!DOCTYPE html>
236
+ <html lang="en">
237
+ <head>
238
+ <meta property="og:title" content="Test Page" />
239
+ <meta property="og:description" content="A description" />
240
+ <meta property="og:image" content="https://example.com/og.png" />
241
+ <meta property="og:url" content="https://example.com" />
242
+ <meta property="og:type" content="website" />
243
+ <meta name="twitter:card" content="summary_large_image" />
244
+ <meta name="twitter:image" content="https://example.com/twitter.png" />
245
+ <link rel="icon" href="/favicon.ico" />
246
+ </head>
247
+ <body></body>
248
+ </html>
249
+ `;
250
+
251
+ const { issues } = analyzeSocialMeta(html, testUrl);
252
+
253
+ // Should have no issues for a fully compliant page
254
+ expect(issues).toHaveLength(0);
255
+ });
256
+
257
+ it('analyzes completely non-compliant HTML', () => {
258
+ const html = `<html><head></head><body></body></html>`;
259
+
260
+ const { issues } = analyzeSocialMeta(html, testUrl);
261
+
262
+ // Should have multiple issues
263
+ expect(issues.length).toBeGreaterThan(5);
264
+
265
+ // Check for specific issues
266
+ const issueCodes = issues.map((i) => i.code);
267
+ expect(issueCodes).toContain('OG_TITLE_MISSING');
268
+ expect(issueCodes).toContain('OG_DESCRIPTION_MISSING');
269
+ expect(issueCodes).toContain('OG_IMAGE_MISSING');
270
+ expect(issueCodes).toContain('TWITTER_CARD_MISSING');
271
+ expect(issueCodes).toContain('FAVICON_MISSING');
272
+ expect(issueCodes).toContain('LANG_ATTR_MISSING');
273
+ });
274
+ });
275
+ });
@@ -0,0 +1,134 @@
1
+ import * as cheerio from 'cheerio';
2
+ import type { AuditIssue } from '../types.js';
3
+ import { ISSUE_DEFINITIONS } from '../types.js';
4
+
5
+ export interface SocialMetaData {
6
+ openGraph: {
7
+ title?: string;
8
+ description?: string;
9
+ image?: string;
10
+ url?: string;
11
+ type?: string;
12
+ siteName?: string;
13
+ };
14
+ twitter: {
15
+ card?: string;
16
+ title?: string;
17
+ description?: string;
18
+ image?: string;
19
+ site?: string;
20
+ creator?: string;
21
+ };
22
+ hasFavicon: boolean;
23
+ langAttribute?: string;
24
+ }
25
+
26
+ export function analyzeSocialMeta(html: string, url: string): { issues: AuditIssue[]; data: SocialMetaData } {
27
+ const issues: AuditIssue[] = [];
28
+ const $ = cheerio.load(html);
29
+
30
+ // Extract Open Graph data
31
+ const openGraph = {
32
+ title: $('meta[property="og:title"]').attr('content'),
33
+ description: $('meta[property="og:description"]').attr('content'),
34
+ image: $('meta[property="og:image"]').attr('content'),
35
+ url: $('meta[property="og:url"]').attr('content'),
36
+ type: $('meta[property="og:type"]').attr('content'),
37
+ siteName: $('meta[property="og:site_name"]').attr('content'),
38
+ };
39
+
40
+ // Extract Twitter Card data
41
+ const twitter = {
42
+ card: $('meta[name="twitter:card"]').attr('content'),
43
+ title: $('meta[name="twitter:title"]').attr('content'),
44
+ description: $('meta[name="twitter:description"]').attr('content'),
45
+ image: $('meta[name="twitter:image"]').attr('content'),
46
+ site: $('meta[name="twitter:site"]').attr('content'),
47
+ creator: $('meta[name="twitter:creator"]').attr('content'),
48
+ };
49
+
50
+ // Check favicon
51
+ const hasFavicon = $('link[rel="icon"]').length > 0 ||
52
+ $('link[rel="shortcut icon"]').length > 0 ||
53
+ $('link[rel="apple-touch-icon"]').length > 0;
54
+
55
+ // Check lang attribute
56
+ const langAttribute = $('html').attr('lang');
57
+
58
+ const data: SocialMetaData = {
59
+ openGraph,
60
+ twitter,
61
+ hasFavicon,
62
+ langAttribute,
63
+ };
64
+
65
+ // ==================== Open Graph Checks ====================
66
+ if (!openGraph.title) {
67
+ issues.push({
68
+ ...ISSUE_DEFINITIONS.OG_TITLE_MISSING,
69
+ affectedUrls: [url],
70
+ });
71
+ }
72
+
73
+ if (!openGraph.description) {
74
+ issues.push({
75
+ ...ISSUE_DEFINITIONS.OG_DESCRIPTION_MISSING,
76
+ affectedUrls: [url],
77
+ });
78
+ }
79
+
80
+ if (!openGraph.image) {
81
+ issues.push({
82
+ ...ISSUE_DEFINITIONS.OG_IMAGE_MISSING,
83
+ affectedUrls: [url],
84
+ });
85
+ }
86
+
87
+ if (!openGraph.url) {
88
+ issues.push({
89
+ ...ISSUE_DEFINITIONS.OG_URL_MISSING,
90
+ affectedUrls: [url],
91
+ });
92
+ }
93
+
94
+ if (!openGraph.type) {
95
+ issues.push({
96
+ ...ISSUE_DEFINITIONS.OG_TYPE_MISSING,
97
+ affectedUrls: [url],
98
+ });
99
+ }
100
+
101
+ // ==================== Twitter Card Checks ====================
102
+ if (!twitter.card) {
103
+ issues.push({
104
+ ...ISSUE_DEFINITIONS.TWITTER_CARD_MISSING,
105
+ affectedUrls: [url],
106
+ });
107
+ }
108
+
109
+ if (!twitter.image && !openGraph.image) {
110
+ // Only flag if no OG image either (Twitter falls back to OG)
111
+ issues.push({
112
+ ...ISSUE_DEFINITIONS.TWITTER_IMAGE_MISSING,
113
+ affectedUrls: [url],
114
+ });
115
+ }
116
+
117
+ // ==================== Favicon Check ====================
118
+ if (!hasFavicon) {
119
+ issues.push({
120
+ ...ISSUE_DEFINITIONS.FAVICON_MISSING,
121
+ affectedUrls: [url],
122
+ });
123
+ }
124
+
125
+ // ==================== Lang Attribute Check ====================
126
+ if (!langAttribute) {
127
+ issues.push({
128
+ ...ISSUE_DEFINITIONS.LANG_ATTR_MISSING,
129
+ affectedUrls: [url],
130
+ });
131
+ }
132
+
133
+ return { issues, data };
134
+ }
@@ -0,0 +1,151 @@
1
+ import * as cheerio from 'cheerio';
2
+ import type { AuditIssue } from '../types.js';
3
+ import { ISSUE_DEFINITIONS } from '../types.js';
4
+
5
+ export interface Soft404Data {
6
+ isSoft404: boolean;
7
+ confidence: number;
8
+ signals: string[];
9
+ }
10
+
11
+ // Common error page phrases
12
+ const ERROR_PHRASES = [
13
+ 'page not found',
14
+ '404',
15
+ 'not found',
16
+ 'page doesn\'t exist',
17
+ 'page does not exist',
18
+ 'no longer available',
19
+ 'has been removed',
20
+ 'has been deleted',
21
+ 'couldn\'t find',
22
+ 'could not find',
23
+ 'doesn\'t exist',
24
+ 'does not exist',
25
+ 'no results found',
26
+ 'nothing found',
27
+ 'sorry, we couldn\'t find',
28
+ 'oops',
29
+ 'error occurred',
30
+ 'something went wrong',
31
+ 'page you requested',
32
+ 'page you were looking for',
33
+ 'this page isn\'t available',
34
+ 'this page is not available',
35
+ 'we can\'t find',
36
+ 'we cannot find',
37
+ ];
38
+
39
+ // Common error page title patterns
40
+ const ERROR_TITLE_PATTERNS = [
41
+ /404/i,
42
+ /not found/i,
43
+ /page.*not.*found/i,
44
+ /error/i,
45
+ /oops/i,
46
+ ];
47
+
48
+ export function detectSoft404(
49
+ html: string,
50
+ url: string,
51
+ statusCode: number = 200
52
+ ): { issues: AuditIssue[]; data: Soft404Data } {
53
+ const issues: AuditIssue[] = [];
54
+ const $ = cheerio.load(html);
55
+ const signals: string[] = [];
56
+ let score = 0;
57
+
58
+ // Only check pages that return 200
59
+ if (statusCode !== 200) {
60
+ return {
61
+ issues,
62
+ data: { isSoft404: false, confidence: 0, signals: [] },
63
+ };
64
+ }
65
+
66
+ const title = $('title').text().toLowerCase();
67
+ const bodyText = $('body').text().toLowerCase().replace(/\s+/g, ' ');
68
+ const h1 = $('h1').first().text().toLowerCase();
69
+
70
+ // Check title for error patterns
71
+ for (const pattern of ERROR_TITLE_PATTERNS) {
72
+ if (pattern.test(title)) {
73
+ score += 30;
74
+ signals.push(`Title matches error pattern: "${title.substring(0, 50)}"`);
75
+ break;
76
+ }
77
+ }
78
+
79
+ // Check H1 for error patterns
80
+ for (const pattern of ERROR_TITLE_PATTERNS) {
81
+ if (pattern.test(h1)) {
82
+ score += 25;
83
+ signals.push(`H1 matches error pattern: "${h1.substring(0, 50)}"`);
84
+ break;
85
+ }
86
+ }
87
+
88
+ // Check body text for error phrases
89
+ let errorPhraseCount = 0;
90
+ for (const phrase of ERROR_PHRASES) {
91
+ if (bodyText.includes(phrase)) {
92
+ errorPhraseCount++;
93
+ if (errorPhraseCount <= 3) {
94
+ signals.push(`Body contains error phrase: "${phrase}"`);
95
+ }
96
+ }
97
+ }
98
+ score += Math.min(errorPhraseCount * 10, 30);
99
+
100
+ // Check for very thin content (common in error pages)
101
+ const wordCount = bodyText.split(/\s+/).filter(w => w.length > 0).length;
102
+ if (wordCount < 100) {
103
+ score += 15;
104
+ signals.push(`Very thin content: ${wordCount} words`);
105
+ }
106
+
107
+ // Check for search form on error-like pages (common pattern)
108
+ const hasSearchForm = $('form[action*="search"]').length > 0 ||
109
+ $('input[type="search"]').length > 0 ||
110
+ $('input[name="q"]').length > 0 ||
111
+ $('input[name="search"]').length > 0;
112
+
113
+ // Search form + error signals = higher soft 404 confidence
114
+ if (hasSearchForm && score > 20) {
115
+ score += 10;
116
+ signals.push('Page has search form (common on 404 pages)');
117
+ }
118
+
119
+ // Check for common 404 page CSS classes
120
+ const errorClasses = ['error', '404', 'not-found', 'notfound', 'page-error'];
121
+ for (const cls of errorClasses) {
122
+ if ($(`[class*="${cls}"]`).length > 0) {
123
+ score += 10;
124
+ signals.push(`Found error-related CSS class containing "${cls}"`);
125
+ break;
126
+ }
127
+ }
128
+
129
+ // Calculate confidence (0-100)
130
+ const confidence = Math.min(score, 100);
131
+ const isSoft404 = confidence >= 50;
132
+
133
+ const data: Soft404Data = {
134
+ isSoft404,
135
+ confidence,
136
+ signals,
137
+ };
138
+
139
+ if (isSoft404) {
140
+ issues.push({
141
+ ...ISSUE_DEFINITIONS.SOFT_404,
142
+ affectedUrls: [url],
143
+ details: {
144
+ confidence: `${confidence}%`,
145
+ signals: signals.slice(0, 5),
146
+ },
147
+ });
148
+ }
149
+
150
+ return { issues, data };
151
+ }