@rankcli/agent-runtime 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/README.md +242 -0
  2. package/dist/analyzer-2CSWIQGD.mjs +6 -0
  3. package/dist/chunk-YNZYHEYM.mjs +774 -0
  4. package/dist/index.d.mts +4012 -0
  5. package/dist/index.d.ts +4012 -0
  6. package/dist/index.js +29672 -0
  7. package/dist/index.mjs +28602 -0
  8. package/package.json +53 -0
  9. package/scripts/build-deno.ts +134 -0
  10. package/src/audit/ai/analyzer.ts +347 -0
  11. package/src/audit/ai/index.ts +29 -0
  12. package/src/audit/ai/prompts/content-analysis.ts +271 -0
  13. package/src/audit/ai/types.ts +179 -0
  14. package/src/audit/checks/additional-checks.ts +439 -0
  15. package/src/audit/checks/ai-citation-worthiness.ts +399 -0
  16. package/src/audit/checks/ai-content-structure.ts +325 -0
  17. package/src/audit/checks/ai-readiness.ts +339 -0
  18. package/src/audit/checks/anchor-text.ts +179 -0
  19. package/src/audit/checks/answer-conciseness.ts +322 -0
  20. package/src/audit/checks/asset-minification.ts +270 -0
  21. package/src/audit/checks/bing-optimization.ts +206 -0
  22. package/src/audit/checks/brand-mention-optimization.ts +349 -0
  23. package/src/audit/checks/caching-headers.ts +305 -0
  24. package/src/audit/checks/canonical-advanced.ts +150 -0
  25. package/src/audit/checks/canonical-domain.ts +196 -0
  26. package/src/audit/checks/citation-quality.ts +358 -0
  27. package/src/audit/checks/client-rendering.ts +542 -0
  28. package/src/audit/checks/color-contrast.ts +342 -0
  29. package/src/audit/checks/content-freshness.ts +170 -0
  30. package/src/audit/checks/content-science.ts +589 -0
  31. package/src/audit/checks/conversion-elements.ts +526 -0
  32. package/src/audit/checks/crawlability.ts +220 -0
  33. package/src/audit/checks/directory-listing.ts +172 -0
  34. package/src/audit/checks/dom-analysis.ts +191 -0
  35. package/src/audit/checks/dom-size.ts +246 -0
  36. package/src/audit/checks/duplicate-content.ts +194 -0
  37. package/src/audit/checks/eeat-signals.ts +990 -0
  38. package/src/audit/checks/entity-seo.ts +396 -0
  39. package/src/audit/checks/featured-snippet.ts +473 -0
  40. package/src/audit/checks/freshness-signals.ts +443 -0
  41. package/src/audit/checks/funnel-intent.ts +463 -0
  42. package/src/audit/checks/hreflang.ts +174 -0
  43. package/src/audit/checks/html-compliance.ts +302 -0
  44. package/src/audit/checks/image-dimensions.ts +167 -0
  45. package/src/audit/checks/images.ts +160 -0
  46. package/src/audit/checks/indexnow.ts +275 -0
  47. package/src/audit/checks/interactive-tools.ts +475 -0
  48. package/src/audit/checks/internal-link-graph.ts +436 -0
  49. package/src/audit/checks/keyword-analysis.ts +239 -0
  50. package/src/audit/checks/keyword-cannibalization.ts +385 -0
  51. package/src/audit/checks/keyword-placement.ts +471 -0
  52. package/src/audit/checks/links.ts +203 -0
  53. package/src/audit/checks/llms-txt.ts +224 -0
  54. package/src/audit/checks/local-seo.ts +296 -0
  55. package/src/audit/checks/mobile.ts +167 -0
  56. package/src/audit/checks/modern-images.ts +226 -0
  57. package/src/audit/checks/navboost-signals.ts +395 -0
  58. package/src/audit/checks/on-page.ts +209 -0
  59. package/src/audit/checks/page-resources.ts +285 -0
  60. package/src/audit/checks/pagination.ts +180 -0
  61. package/src/audit/checks/performance.ts +153 -0
  62. package/src/audit/checks/platform-presence.ts +580 -0
  63. package/src/audit/checks/redirect-analysis.ts +153 -0
  64. package/src/audit/checks/redirect-chain.ts +389 -0
  65. package/src/audit/checks/resource-hints.ts +420 -0
  66. package/src/audit/checks/responsive-css.ts +247 -0
  67. package/src/audit/checks/responsive-images.ts +396 -0
  68. package/src/audit/checks/review-ecosystem.ts +415 -0
  69. package/src/audit/checks/robots-validation.ts +373 -0
  70. package/src/audit/checks/security-headers.ts +172 -0
  71. package/src/audit/checks/security.ts +144 -0
  72. package/src/audit/checks/serp-preview.ts +251 -0
  73. package/src/audit/checks/site-maturity.ts +444 -0
  74. package/src/audit/checks/social-meta.test.ts +275 -0
  75. package/src/audit/checks/social-meta.ts +134 -0
  76. package/src/audit/checks/soft-404.ts +151 -0
  77. package/src/audit/checks/structured-data.ts +238 -0
  78. package/src/audit/checks/tech-detection.ts +496 -0
  79. package/src/audit/checks/topical-clusters.ts +435 -0
  80. package/src/audit/checks/tracker-bloat.ts +462 -0
  81. package/src/audit/checks/tracking-verification.test.ts +371 -0
  82. package/src/audit/checks/tracking-verification.ts +636 -0
  83. package/src/audit/checks/url-safety.ts +682 -0
  84. package/src/audit/deno-entry.ts +66 -0
  85. package/src/audit/discovery/index.ts +15 -0
  86. package/src/audit/discovery/link-crawler.ts +232 -0
  87. package/src/audit/discovery/repo-routes.ts +347 -0
  88. package/src/audit/engine.ts +620 -0
  89. package/src/audit/fixes/index.ts +209 -0
  90. package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
  91. package/src/audit/fixes/social-meta-fixes.ts +463 -0
  92. package/src/audit/index.ts +74 -0
  93. package/src/audit/runner.test.ts +299 -0
  94. package/src/audit/runner.ts +130 -0
  95. package/src/audit/types.ts +1953 -0
  96. package/src/content/featured-snippet.ts +367 -0
  97. package/src/content/generator.test.ts +534 -0
  98. package/src/content/generator.ts +501 -0
  99. package/src/content/headline.ts +317 -0
  100. package/src/content/index.ts +62 -0
  101. package/src/content/intent.ts +258 -0
  102. package/src/content/keyword-density.ts +349 -0
  103. package/src/content/readability.ts +262 -0
  104. package/src/executor.ts +336 -0
  105. package/src/fixer.ts +416 -0
  106. package/src/frameworks/detector.test.ts +248 -0
  107. package/src/frameworks/detector.ts +371 -0
  108. package/src/frameworks/index.ts +68 -0
  109. package/src/frameworks/recipes/angular.yaml +171 -0
  110. package/src/frameworks/recipes/astro.yaml +206 -0
  111. package/src/frameworks/recipes/django.yaml +180 -0
  112. package/src/frameworks/recipes/laravel.yaml +137 -0
  113. package/src/frameworks/recipes/nextjs.yaml +268 -0
  114. package/src/frameworks/recipes/nuxt.yaml +175 -0
  115. package/src/frameworks/recipes/rails.yaml +188 -0
  116. package/src/frameworks/recipes/react.yaml +202 -0
  117. package/src/frameworks/recipes/sveltekit.yaml +154 -0
  118. package/src/frameworks/recipes/vue.yaml +137 -0
  119. package/src/frameworks/recipes/wordpress.yaml +209 -0
  120. package/src/frameworks/suggestion-engine.ts +320 -0
  121. package/src/geo/geo-content.test.ts +305 -0
  122. package/src/geo/geo-content.ts +266 -0
  123. package/src/geo/geo-history.test.ts +473 -0
  124. package/src/geo/geo-history.ts +433 -0
  125. package/src/geo/geo-tracker.test.ts +359 -0
  126. package/src/geo/geo-tracker.ts +411 -0
  127. package/src/geo/index.ts +10 -0
  128. package/src/git/commit-helper.test.ts +261 -0
  129. package/src/git/commit-helper.ts +329 -0
  130. package/src/git/index.ts +12 -0
  131. package/src/git/pr-helper.test.ts +284 -0
  132. package/src/git/pr-helper.ts +307 -0
  133. package/src/index.ts +66 -0
  134. package/src/keywords/ai-keyword-engine.ts +1062 -0
  135. package/src/keywords/ai-summarizer.ts +387 -0
  136. package/src/keywords/ci-mode.ts +555 -0
  137. package/src/keywords/engine.ts +359 -0
  138. package/src/keywords/index.ts +151 -0
  139. package/src/keywords/llm-judge.ts +357 -0
  140. package/src/keywords/nlp-analysis.ts +706 -0
  141. package/src/keywords/prioritizer.ts +295 -0
  142. package/src/keywords/site-crawler.ts +342 -0
  143. package/src/keywords/sources/autocomplete.ts +139 -0
  144. package/src/keywords/sources/competitive-search.ts +450 -0
  145. package/src/keywords/sources/competitor-analysis.ts +374 -0
  146. package/src/keywords/sources/dataforseo.ts +206 -0
  147. package/src/keywords/sources/free-sources.ts +294 -0
  148. package/src/keywords/sources/gsc.ts +123 -0
  149. package/src/keywords/topic-grouping.ts +327 -0
  150. package/src/keywords/types.ts +144 -0
  151. package/src/keywords/wizard.ts +457 -0
  152. package/src/loader.ts +40 -0
  153. package/src/reports/index.ts +7 -0
  154. package/src/reports/report-generator.test.ts +293 -0
  155. package/src/reports/report-generator.ts +713 -0
  156. package/src/scheduler/alerts.test.ts +458 -0
  157. package/src/scheduler/alerts.ts +328 -0
  158. package/src/scheduler/index.ts +8 -0
  159. package/src/scheduler/scheduled-audit.test.ts +377 -0
  160. package/src/scheduler/scheduled-audit.ts +149 -0
  161. package/src/test/integration-test.ts +325 -0
  162. package/src/tools/analyzer.ts +373 -0
  163. package/src/tools/crawl.ts +293 -0
  164. package/src/tools/files.ts +301 -0
  165. package/src/tools/h1-fixer.ts +249 -0
  166. package/src/tools/index.ts +67 -0
  167. package/src/tracking/github-action.ts +326 -0
  168. package/src/tracking/google-analytics.ts +265 -0
  169. package/src/tracking/index.ts +45 -0
  170. package/src/tracking/report-generator.ts +386 -0
  171. package/src/tracking/search-console.ts +335 -0
  172. package/src/types.ts +134 -0
  173. package/src/utils/http.ts +302 -0
  174. package/src/wasm-adapter.ts +297 -0
  175. package/src/wasm-entry.ts +14 -0
  176. package/tsconfig.json +17 -0
  177. package/tsup.wasm.config.ts +26 -0
  178. package/vitest.config.ts +15 -0
@@ -0,0 +1,251 @@
1
+ // SERP Preview and Pixel-based Meta Analysis
2
+ // Calculates pixel widths for title and description for accurate SERP display prediction
3
+
4
+ import * as cheerio from 'cheerio';
5
+ import type { AuditIssue } from '../types.js';
6
+ import { ISSUE_DEFINITIONS } from '../types.js';
7
+
8
+ // Google's approximate character widths in pixels (based on their font)
9
+ // This is a simplified approximation - real pixel widths vary by character
10
+ const CHAR_WIDTHS: Record<string, number> = {
11
+ // Lowercase letters
12
+ a: 9, b: 9, c: 8, d: 9, e: 9, f: 5, g: 9, h: 9, i: 4, j: 4,
13
+ k: 8, l: 4, m: 14, n: 9, o: 9, p: 9, q: 9, r: 6, s: 8, t: 5,
14
+ u: 9, v: 8, w: 12, x: 8, y: 8, z: 7,
15
+ // Uppercase letters
16
+ A: 11, B: 10, C: 10, D: 11, E: 9, F: 9, G: 11, H: 11, I: 4, J: 8,
17
+ K: 10, L: 8, M: 13, N: 11, O: 12, P: 10, Q: 12, R: 10, S: 10, T: 9,
18
+ U: 11, V: 10, W: 14, X: 10, Y: 10, Z: 9,
19
+ // Numbers
20
+ '0': 9, '1': 9, '2': 9, '3': 9, '4': 9, '5': 9, '6': 9, '7': 9, '8': 9, '9': 9,
21
+ // Common punctuation and spaces
22
+ ' ': 4, '.': 4, ',': 4, '!': 4, '?': 8, '-': 5, '_': 8, ':': 4, ';': 4,
23
+ "'": 3, '"': 6, '(': 5, ')': 5, '[': 5, ']': 5, '/': 5, '|': 4, '&': 10,
24
+ '@': 14, '#': 9, '$': 9, '%': 12, '^': 7, '*': 6, '+': 9, '=': 9,
25
+ };
26
+
27
+ // Default width for unknown characters
28
+ const DEFAULT_CHAR_WIDTH = 9;
29
+
30
+ // SERP pixel limits
31
+ const TITLE_MAX_PIXELS = 580; // Google's title limit
32
+ const DESCRIPTION_MAX_PIXELS_DESKTOP = 920; // Desktop description limit
33
+ const DESCRIPTION_MAX_PIXELS_MOBILE = 680; // Mobile description limit
34
+
35
+ export interface SERPPreviewData {
36
+ title: {
37
+ text: string;
38
+ charLength: number;
39
+ pixelWidth: number;
40
+ truncated: boolean;
41
+ displayText: string;
42
+ };
43
+ description: {
44
+ text: string;
45
+ charLength: number;
46
+ pixelWidthDesktop: number;
47
+ pixelWidthMobile: number;
48
+ truncatedDesktop: boolean;
49
+ truncatedMobile: boolean;
50
+ displayTextDesktop: string;
51
+ displayTextMobile: string;
52
+ };
53
+ url: {
54
+ display: string;
55
+ breadcrumbs: string[];
56
+ };
57
+ preview: {
58
+ desktop: string;
59
+ mobile: string;
60
+ };
61
+ }
62
+
63
+ /**
64
+ * Calculate pixel width of a string
65
+ */
66
+ function calculatePixelWidth(text: string): number {
67
+ let width = 0;
68
+ for (const char of text) {
69
+ width += CHAR_WIDTHS[char] ?? DEFAULT_CHAR_WIDTH;
70
+ }
71
+ return width;
72
+ }
73
+
74
+ /**
75
+ * Truncate text to fit within pixel limit
76
+ */
77
+ function truncateToPixels(text: string, maxPixels: number): { text: string; truncated: boolean } {
78
+ let width = 0;
79
+ let truncated = false;
80
+
81
+ for (let i = 0; i < text.length; i++) {
82
+ const charWidth = CHAR_WIDTHS[text[i]] ?? DEFAULT_CHAR_WIDTH;
83
+ if (width + charWidth > maxPixels - 20) {
84
+ // Leave room for "..."
85
+ truncated = true;
86
+ return { text: text.substring(0, i) + '...', truncated };
87
+ }
88
+ width += charWidth;
89
+ }
90
+
91
+ return { text, truncated };
92
+ }
93
+
94
+ /**
95
+ * Format URL for SERP display
96
+ */
97
+ function formatSERPUrl(url: string): { display: string; breadcrumbs: string[] } {
98
+ try {
99
+ const parsed = new URL(url);
100
+ const domain = parsed.hostname.replace(/^www\./, '');
101
+ const pathParts = parsed.pathname.split('/').filter(Boolean);
102
+
103
+ // Create breadcrumb-style path
104
+ const breadcrumbs = [domain, ...pathParts];
105
+
106
+ // SERP display format
107
+ const display = breadcrumbs.join(' › ');
108
+
109
+ return { display, breadcrumbs };
110
+ } catch {
111
+ return { display: url, breadcrumbs: [url] };
112
+ }
113
+ }
114
+
115
+ /**
116
+ * Analyze SERP preview and generate pixel-based issues
117
+ */
118
+ export function analyzeSERPPreview(html: string, url: string): { issues: AuditIssue[]; data: SERPPreviewData } {
119
+ const issues: AuditIssue[] = [];
120
+ const $ = cheerio.load(html);
121
+
122
+ // Extract title
123
+ const rawTitle = $('title').text().trim() || '';
124
+ const titlePixels = calculatePixelWidth(rawTitle);
125
+ const titleTruncated = truncateToPixels(rawTitle, TITLE_MAX_PIXELS);
126
+
127
+ // Extract meta description
128
+ const rawDescription =
129
+ $('meta[name="description"]').attr('content')?.trim() ||
130
+ $('meta[property="og:description"]').attr('content')?.trim() ||
131
+ '';
132
+ const descPixelsDesktop = calculatePixelWidth(rawDescription);
133
+ const descPixelsMobile = calculatePixelWidth(rawDescription);
134
+ const descTruncatedDesktop = truncateToPixels(rawDescription, DESCRIPTION_MAX_PIXELS_DESKTOP);
135
+ const descTruncatedMobile = truncateToPixels(rawDescription, DESCRIPTION_MAX_PIXELS_MOBILE);
136
+
137
+ // Format URL
138
+ const urlData = formatSERPUrl(url);
139
+
140
+ // Generate issues
141
+ if (titleTruncated.truncated) {
142
+ issues.push({
143
+ ...ISSUE_DEFINITIONS.TITLE_TRUNCATED_SERP,
144
+ affectedUrls: [url],
145
+ details: {
146
+ pixelWidth: titlePixels,
147
+ maxPixels: TITLE_MAX_PIXELS,
148
+ charLength: rawTitle.length,
149
+ displayPreview: titleTruncated.text,
150
+ },
151
+ });
152
+ }
153
+
154
+ if (descTruncatedDesktop.truncated) {
155
+ issues.push({
156
+ ...ISSUE_DEFINITIONS.META_DESC_TRUNCATED_SERP,
157
+ affectedUrls: [url],
158
+ details: {
159
+ pixelWidth: descPixelsDesktop,
160
+ maxPixels: DESCRIPTION_MAX_PIXELS_DESKTOP,
161
+ charLength: rawDescription.length,
162
+ displayPreview: descTruncatedDesktop.text,
163
+ },
164
+ });
165
+ }
166
+
167
+ // Build preview data
168
+ const data: SERPPreviewData = {
169
+ title: {
170
+ text: rawTitle,
171
+ charLength: rawTitle.length,
172
+ pixelWidth: titlePixels,
173
+ truncated: titleTruncated.truncated,
174
+ displayText: titleTruncated.text,
175
+ },
176
+ description: {
177
+ text: rawDescription,
178
+ charLength: rawDescription.length,
179
+ pixelWidthDesktop: descPixelsDesktop,
180
+ pixelWidthMobile: descPixelsMobile,
181
+ truncatedDesktop: descTruncatedDesktop.truncated,
182
+ truncatedMobile: descTruncatedMobile.truncated,
183
+ displayTextDesktop: descTruncatedDesktop.text,
184
+ displayTextMobile: descTruncatedMobile.text,
185
+ },
186
+ url: urlData,
187
+ preview: {
188
+ desktop: generateDesktopPreview(titleTruncated.text, descTruncatedDesktop.text, urlData.display),
189
+ mobile: generateMobilePreview(titleTruncated.text, descTruncatedMobile.text, urlData.display),
190
+ },
191
+ };
192
+
193
+ return { issues, data };
194
+ }
195
+
196
+ /**
197
+ * Generate ASCII art SERP preview for desktop
198
+ */
199
+ function generateDesktopPreview(title: string, description: string, url: string): string {
200
+ const maxWidth = 60;
201
+ const titleLine = title.length > maxWidth ? title.substring(0, maxWidth - 3) + '...' : title;
202
+ const urlLine = url.length > maxWidth ? url.substring(0, maxWidth - 3) + '...' : url;
203
+ const descLines = wrapText(description, maxWidth);
204
+
205
+ return [
206
+ '┌' + '─'.repeat(maxWidth + 2) + '┐',
207
+ '│ ' + titleLine.padEnd(maxWidth) + ' │',
208
+ '│ ' + urlLine.padEnd(maxWidth) + ' │',
209
+ ...descLines.map((line) => '│ ' + line.padEnd(maxWidth) + ' │'),
210
+ '└' + '─'.repeat(maxWidth + 2) + '┘',
211
+ ].join('\n');
212
+ }
213
+
214
+ /**
215
+ * Generate ASCII art SERP preview for mobile
216
+ */
217
+ function generateMobilePreview(title: string, description: string, url: string): string {
218
+ const maxWidth = 40;
219
+ const titleLine = title.length > maxWidth ? title.substring(0, maxWidth - 3) + '...' : title;
220
+ const urlLine = url.length > maxWidth ? url.substring(0, maxWidth - 3) + '...' : url;
221
+ const descLines = wrapText(description, maxWidth);
222
+
223
+ return [
224
+ '┌' + '─'.repeat(maxWidth + 2) + '┐',
225
+ '│ ' + titleLine.padEnd(maxWidth) + ' │',
226
+ '│ ' + urlLine.padEnd(maxWidth) + ' │',
227
+ ...descLines.slice(0, 2).map((line) => '│ ' + line.padEnd(maxWidth) + ' │'),
228
+ '└' + '─'.repeat(maxWidth + 2) + '┘',
229
+ ].join('\n');
230
+ }
231
+
232
+ /**
233
+ * Wrap text to specified width
234
+ */
235
+ function wrapText(text: string, maxWidth: number): string[] {
236
+ const words = text.split(' ');
237
+ const lines: string[] = [];
238
+ let currentLine = '';
239
+
240
+ for (const word of words) {
241
+ if ((currentLine + ' ' + word).trim().length <= maxWidth) {
242
+ currentLine = (currentLine + ' ' + word).trim();
243
+ } else {
244
+ if (currentLine) lines.push(currentLine);
245
+ currentLine = word;
246
+ }
247
+ }
248
+ if (currentLine) lines.push(currentLine);
249
+
250
+ return lines;
251
+ }
@@ -0,0 +1,444 @@
1
+ // Site Maturity Analysis - Different SEO Strategies for New vs Established Sites
2
+ // Based on Nathan Gotch's advice: New sites should focus on authority building, not content proliferation
3
+ // Reference: "How to do SEO when your website is new?"
4
+
5
+ import { httpGet } from '../../utils/http.js';
6
+ import * as cheerio from 'cheerio';
7
+ import type { AuditIssue } from '../types.js';
8
+
9
+ export type SiteMaturity = 'new' | 'growing' | 'established' | 'mature';
10
+ export type SEOStrategy = 'authority-first' | 'content-expansion' | 'optimization' | 'maintenance';
11
+
12
+ export interface SiteMaturityData {
13
+ estimatedAge: {
14
+ days: number | null;
15
+ source: 'wayback' | 'whois' | 'ssl' | 'sitemap' | 'estimated';
16
+ confidence: 'high' | 'medium' | 'low';
17
+ };
18
+ maturityLevel: SiteMaturity;
19
+ authorityIndicators: {
20
+ hasBacklinks: boolean;
21
+ estimatedBacklinks: number;
22
+ hasIndexedPages: boolean;
23
+ indexedPagesEstimate: number;
24
+ hasBrandMentions: boolean;
25
+ sslAge: number | null; // days
26
+ };
27
+ contentMetrics: {
28
+ pageCount: number;
29
+ hasMultipleContentTypes: boolean;
30
+ hasBlog: boolean;
31
+ contentToAuthorityRatio: 'balanced' | 'content-heavy' | 'authority-heavy';
32
+ };
33
+ recommendedStrategy: SEOStrategy;
34
+ strategicRecommendations: string[];
35
+ }
36
+
37
+ /**
38
+ * Estimate domain age from SSL certificate issuance date
39
+ */
40
+ export async function getSSLCertificateAge(url: string): Promise<number | null> {
41
+ return new Promise((resolve) => {
42
+ try {
43
+ const parsedUrl = new URL(url);
44
+ const options = {
45
+ host: parsedUrl.hostname,
46
+ port: 443,
47
+ method: 'GET',
48
+ rejectUnauthorized: false, // Accept self-signed for checking
49
+ };
50
+
51
+ const req = https.request(options, (res) => {
52
+ const cert = (res.socket as any).getPeerCertificate?.();
53
+ if (cert && cert.valid_from) {
54
+ const validFrom = new Date(cert.valid_from);
55
+ const now = new Date();
56
+ const ageDays = Math.floor((now.getTime() - validFrom.getTime()) / (1000 * 60 * 60 * 24));
57
+ resolve(ageDays);
58
+ } else {
59
+ resolve(null);
60
+ }
61
+ });
62
+
63
+ req.on('error', () => resolve(null));
64
+ req.setTimeout(5000, () => {
65
+ req.destroy();
66
+ resolve(null);
67
+ });
68
+ req.end();
69
+ } catch {
70
+ resolve(null);
71
+ }
72
+ });
73
+ }
74
+
75
+ /**
76
+ * Check Wayback Machine for earliest snapshot (approximate domain age)
77
+ */
78
+ export async function checkWaybackMachine(domain: string): Promise<{ firstSeen: Date | null; snapshots: number }> {
79
+ try {
80
+ // Wayback CDX API - get earliest snapshot
81
+ const response = await httpGet<string>(
82
+ `https://web.archive.org/cdx/search/cdx?url=${domain}&output=json&limit=1&from=1990`,
83
+ {
84
+ timeout: 10000,
85
+ validateStatus: () => true,
86
+ }
87
+ );
88
+
89
+ if (response.status === 200 && Array.isArray(response.data) && response.data.length > 1) {
90
+ // First row is headers, second is earliest snapshot
91
+ const timestamp = response.data[1]?.[1];
92
+ if (timestamp) {
93
+ // Wayback timestamp format: YYYYMMDDHHmmss
94
+ const year = parseInt(timestamp.substring(0, 4));
95
+ const month = parseInt(timestamp.substring(4, 6)) - 1;
96
+ const day = parseInt(timestamp.substring(6, 8));
97
+ return { firstSeen: new Date(year, month, day), snapshots: response.data.length - 1 };
98
+ }
99
+ }
100
+
101
+ return { firstSeen: null, snapshots: 0 };
102
+ } catch {
103
+ return { firstSeen: null, snapshots: 0 };
104
+ }
105
+ }
106
+
107
+ /**
108
+ * Estimate number of indexed pages using site: query approximation
109
+ * (Without Google API, we use heuristics from sitemap)
110
+ */
111
+ export async function estimateIndexedPages(url: string): Promise<number> {
112
+ const origin = new URL(url).origin;
113
+
114
+ try {
115
+ // Check sitemap for page count
116
+ const sitemapUrls = [
117
+ `${origin}/sitemap.xml`,
118
+ `${origin}/sitemap_index.xml`,
119
+ `${origin}/sitemap-index.xml`,
120
+ ];
121
+
122
+ for (const sitemapUrl of sitemapUrls) {
123
+ try {
124
+ const response = await httpGet<string>(sitemapUrl, {
125
+
126
+ timeout: 10000,
127
+ validateStatus: () => true,
128
+ });
129
+
130
+ if (response.status === 200) {
131
+ const $ = cheerio.load(response.data, { xmlMode: true });
132
+
133
+ // Check if it's a sitemap index
134
+ const sitemaps = $('sitemap').length;
135
+ if (sitemaps > 0) {
136
+ // Estimate: each sub-sitemap typically has ~1000 URLs
137
+ return sitemaps * 500; // Conservative estimate
138
+ }
139
+
140
+ // Regular sitemap - count URLs
141
+ const urls = $('url').length;
142
+ if (urls > 0) {
143
+ return urls;
144
+ }
145
+ }
146
+ } catch {
147
+ // Try next sitemap URL
148
+ }
149
+ }
150
+
151
+ return 0; // Unable to estimate
152
+ } catch {
153
+ return 0;
154
+ }
155
+ }
156
+
157
+ /**
158
+ * Analyze HTML for site maturity signals
159
+ */
160
+ export function analyzeHTMLMaturitySignals(html: string, url: string): {
161
+ hasBlog: boolean;
162
+ hasMultipleContentTypes: boolean;
163
+ hasEstablishedBrand: boolean;
164
+ contentIndicators: string[];
165
+ } {
166
+ const $ = cheerio.load(html);
167
+ const text = $('body').text().toLowerCase();
168
+ const htmlLower = html.toLowerCase();
169
+
170
+ // Blog detection
171
+ const hasBlog =
172
+ $('a[href*="/blog"]').length > 0 ||
173
+ $('a[href*="/posts"]').length > 0 ||
174
+ $('a[href*="/articles"]').length > 0 ||
175
+ htmlLower.includes('/blog') ||
176
+ $('[class*="blog"]').length > 0;
177
+
178
+ // Multiple content types
179
+ const contentIndicators: string[] = [];
180
+ if (hasBlog) contentIndicators.push('blog');
181
+ if ($('a[href*="/products"], [class*="product"]').length > 0) contentIndicators.push('products');
182
+ if ($('a[href*="/services"], [class*="service"]').length > 0) contentIndicators.push('services');
183
+ if ($('a[href*="/portfolio"], [class*="portfolio"]').length > 0) contentIndicators.push('portfolio');
184
+ if ($('a[href*="/case-stud"], [class*="case"]').length > 0) contentIndicators.push('case-studies');
185
+ if ($('a[href*="/testimonial"], [class*="testimonial"]').length > 0) contentIndicators.push('testimonials');
186
+ if ($('a[href*="/resource"], [class*="resource"]').length > 0) contentIndicators.push('resources');
187
+
188
+ const hasMultipleContentTypes = contentIndicators.length >= 3;
189
+
190
+ // Established brand signals
191
+ const hasEstablishedBrand =
192
+ // Social proof
193
+ ($('[class*="trust"], [class*="partner"], [class*="client"]').length > 0 ||
194
+ // Press/media mentions
195
+ text.includes('featured in') ||
196
+ text.includes('as seen') ||
197
+ text.includes('trusted by') ||
198
+ // Awards/certifications
199
+ text.includes('award') ||
200
+ text.includes('certified') ||
201
+ // Copyright with year range
202
+ /©\s*\d{4}\s*-\s*\d{4}/.test(text) ||
203
+ // Multiple social links
204
+ $('a[href*="linkedin.com"], a[href*="twitter.com"], a[href*="facebook.com"]').length >= 2);
205
+
206
+ return {
207
+ hasBlog,
208
+ hasMultipleContentTypes,
209
+ hasEstablishedBrand,
210
+ contentIndicators,
211
+ };
212
+ }
213
+
214
+ /**
215
+ * Determine recommended SEO strategy based on site maturity
216
+ */
217
+ export function determineStrategy(
218
+ maturity: SiteMaturity,
219
+ authorityIndicators: SiteMaturityData['authorityIndicators'],
220
+ contentMetrics: SiteMaturityData['contentMetrics']
221
+ ): { strategy: SEOStrategy; recommendations: string[] } {
222
+ const recommendations: string[] = [];
223
+
224
+ if (maturity === 'new') {
225
+ // Nathan Gotch's advice: Focus on authority first
226
+ recommendations.push(
227
+ '🎯 PRIORITY: Build site authority before creating more content',
228
+ '📝 Create ONE exceptional linkable asset (ultimate guide, original research, tool)',
229
+ '🔗 Focus 80% of effort on link acquisition to your best page',
230
+ '⏳ Wait until you have 10+ quality backlinks before content expansion',
231
+ '❌ AVOID: Publishing lots of content that won\'t rank without authority'
232
+ );
233
+ return { strategy: 'authority-first', recommendations };
234
+ }
235
+
236
+ if (maturity === 'growing') {
237
+ if (contentMetrics.contentToAuthorityRatio === 'content-heavy') {
238
+ recommendations.push(
239
+ '⚠️ Content-to-authority imbalance detected',
240
+ '🔗 Pause content creation and focus on link building',
241
+ '📊 Your content won\'t rank well without more domain authority',
242
+ '🎯 Build 20-50 quality backlinks before adding more pages',
243
+ '💡 Consider "Reverse Silo": Drive links to content assets, then internal link to money pages'
244
+ );
245
+ return { strategy: 'authority-first', recommendations };
246
+ }
247
+
248
+ recommendations.push(
249
+ '🚀 You\'re in the growth phase - balance content and links',
250
+ '📝 Create pillar content around 3-5 main topics',
251
+ '🔗 Build 5-10 quality backlinks per month',
252
+ '🏗️ Establish topic clusters with internal linking',
253
+ '📈 Monitor rankings - optimize pages stuck on page 2'
254
+ );
255
+ return { strategy: 'content-expansion', recommendations };
256
+ }
257
+
258
+ if (maturity === 'established') {
259
+ recommendations.push(
260
+ '✅ Site has established authority - focus on optimization',
261
+ '🔍 Audit existing content for improvement opportunities',
262
+ '📊 Use data to find underperforming pages to refresh',
263
+ '🎯 Target more competitive keywords',
264
+ '💼 Consider merger technique: Acquire relevant expired domains',
265
+ '🔗 Focus on earning links naturally through quality content'
266
+ );
267
+ return { strategy: 'optimization', recommendations };
268
+ }
269
+
270
+ // Mature sites
271
+ recommendations.push(
272
+ '👑 Mature site - maintain and defend rankings',
273
+ '🔄 Regularly refresh top-performing content',
274
+ '🛡️ Monitor for ranking drops and algorithm impacts',
275
+ '📈 Expand into adjacent topics and markets',
276
+ '🏆 Focus on brand building and E-E-A-T signals',
277
+ '⚡ Optimize for Core Web Vitals and user experience'
278
+ );
279
+ return { strategy: 'maintenance', recommendations };
280
+ }
281
+
282
+ /**
283
+ * Main function: Analyze site maturity and recommend strategy
284
+ */
285
+ export async function analyzeSiteMaturity(
286
+ html: string,
287
+ url: string
288
+ ): Promise<{ issues: AuditIssue[]; data: SiteMaturityData }> {
289
+ const issues: AuditIssue[] = [];
290
+ const domain = new URL(url).hostname;
291
+
292
+ // Gather maturity signals
293
+ const [sslAge, wayback, indexedPages] = await Promise.all([
294
+ getSSLCertificateAge(url),
295
+ checkWaybackMachine(domain),
296
+ estimateIndexedPages(url),
297
+ ]);
298
+
299
+ const htmlSignals = analyzeHTMLMaturitySignals(html, url);
300
+
301
+ // Estimate age (prefer Wayback, fallback to SSL)
302
+ let estimatedAgeDays: number | null = null;
303
+ let ageSource: 'wayback' | 'ssl' | 'estimated' = 'estimated';
304
+ let ageConfidence: 'high' | 'medium' | 'low' = 'low';
305
+
306
+ if (wayback.firstSeen) {
307
+ estimatedAgeDays = Math.floor((Date.now() - wayback.firstSeen.getTime()) / (1000 * 60 * 60 * 24));
308
+ ageSource = 'wayback';
309
+ ageConfidence = wayback.snapshots > 10 ? 'high' : 'medium';
310
+ } else if (sslAge !== null && sslAge > 0) {
311
+ estimatedAgeDays = sslAge;
312
+ ageSource = 'ssl';
313
+ ageConfidence = 'low'; // SSL can be renewed, not reliable for age
314
+ }
315
+
316
+ // Determine maturity level
317
+ let maturityLevel: SiteMaturity;
318
+ if (estimatedAgeDays === null || estimatedAgeDays < 90) {
319
+ maturityLevel = 'new';
320
+ } else if (estimatedAgeDays < 365) {
321
+ maturityLevel = 'growing';
322
+ } else if (estimatedAgeDays < 730) {
323
+ maturityLevel = 'established';
324
+ } else {
325
+ maturityLevel = 'mature';
326
+ }
327
+
328
+ // Override based on other signals
329
+ if (htmlSignals.hasEstablishedBrand && indexedPages > 100) {
330
+ if (maturityLevel === 'new') maturityLevel = 'growing';
331
+ if (maturityLevel === 'growing') maturityLevel = 'established';
332
+ }
333
+
334
+ // Authority indicators (simplified without external API)
335
+ const authorityIndicators = {
336
+ hasBacklinks: wayback.snapshots > 5, // Rough proxy
337
+ estimatedBacklinks: wayback.snapshots * 2, // Very rough estimate
338
+ hasIndexedPages: indexedPages > 0,
339
+ indexedPagesEstimate: indexedPages,
340
+ hasBrandMentions: htmlSignals.hasEstablishedBrand,
341
+ sslAge,
342
+ };
343
+
344
+ // Content metrics
345
+ const contentToAuthorityRatio: 'balanced' | 'content-heavy' | 'authority-heavy' =
346
+ indexedPages > 50 && !authorityIndicators.hasBacklinks
347
+ ? 'content-heavy'
348
+ : indexedPages < 10 && authorityIndicators.hasBacklinks
349
+ ? 'authority-heavy'
350
+ : 'balanced';
351
+
352
+ const contentMetrics: SiteMaturityData['contentMetrics'] = {
353
+ pageCount: indexedPages,
354
+ hasMultipleContentTypes: htmlSignals.hasMultipleContentTypes,
355
+ hasBlog: htmlSignals.hasBlog,
356
+ contentToAuthorityRatio,
357
+ };
358
+
359
+ // Get strategy recommendations
360
+ const { strategy, recommendations } = determineStrategy(maturityLevel, authorityIndicators, contentMetrics);
361
+
362
+ // Generate issues based on maturity
363
+ if (maturityLevel === 'new') {
364
+ issues.push({
365
+ code: 'NEW_SITE_STRATEGY',
366
+ severity: 'notice',
367
+ category: 'content',
368
+ title: 'New website detected - Authority-first strategy recommended',
369
+ description:
370
+ 'Your site appears to be new (<90 days). Standard SEO advice won\'t work well for new sites.',
371
+ impact:
372
+ 'Creating lots of content without authority is wasted effort. Content from new sites rarely ranks.',
373
+ howToFix:
374
+ 'Focus on building 10+ quality backlinks to ONE great page before expanding content. See strategic recommendations.',
375
+ affectedUrls: [url],
376
+ details: {
377
+ estimatedAge: estimatedAgeDays,
378
+ recommendedStrategy: strategy,
379
+ recommendations,
380
+ },
381
+ });
382
+ }
383
+
384
+ if (contentToAuthorityRatio === 'content-heavy') {
385
+ issues.push({
386
+ code: 'CONTENT_AUTHORITY_IMBALANCE',
387
+ severity: 'warning',
388
+ category: 'content',
389
+ title: 'Content-to-authority imbalance',
390
+ description: `Site has ~${indexedPages} pages but limited backlink signals. Content is unlikely to rank.`,
391
+ impact: 'Most of your content may be invisible to search engines due to low domain authority.',
392
+ howToFix:
393
+ 'Stop publishing new content. Focus 100% on link building until you see ranking improvements.',
394
+ affectedUrls: [url],
395
+ details: {
396
+ indexedPages,
397
+ contentToAuthorityRatio,
398
+ maturityLevel,
399
+ },
400
+ });
401
+ }
402
+
403
+ if (maturityLevel === 'growing' && !htmlSignals.hasBlog) {
404
+ issues.push({
405
+ code: 'GROWING_SITE_NO_BLOG',
406
+ severity: 'notice',
407
+ category: 'content',
408
+ title: 'Growing site without blog/content hub',
409
+ description: 'Site is in growth phase but lacks a blog or content section.',
410
+ impact: 'Blog content is essential for attracting backlinks and building topical authority.',
411
+ howToFix: 'Create a blog section with linkable content assets (guides, research, tools).',
412
+ affectedUrls: [url],
413
+ });
414
+ }
415
+
416
+ return {
417
+ issues,
418
+ data: {
419
+ estimatedAge: {
420
+ days: estimatedAgeDays,
421
+ source: ageSource,
422
+ confidence: ageConfidence,
423
+ },
424
+ maturityLevel,
425
+ authorityIndicators,
426
+ contentMetrics,
427
+ recommendedStrategy: strategy,
428
+ strategicRecommendations: recommendations,
429
+ },
430
+ };
431
+ }
432
+
433
+ /**
434
+ * Quick check for site age category (for conditional audit logic)
435
+ */
436
+ export function getSiteAgeCategory(
437
+ estimatedAgeDays: number | null
438
+ ): 'brand-new' | 'new' | 'adolescent' | 'established' | 'veteran' {
439
+ if (estimatedAgeDays === null || estimatedAgeDays < 30) return 'brand-new';
440
+ if (estimatedAgeDays < 180) return 'new';
441
+ if (estimatedAgeDays < 365) return 'adolescent';
442
+ if (estimatedAgeDays < 1095) return 'established'; // 3 years
443
+ return 'veteran';
444
+ }