@rankcli/agent-runtime 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/README.md +242 -0
  2. package/dist/analyzer-2CSWIQGD.mjs +6 -0
  3. package/dist/chunk-YNZYHEYM.mjs +774 -0
  4. package/dist/index.d.mts +4012 -0
  5. package/dist/index.d.ts +4012 -0
  6. package/dist/index.js +29672 -0
  7. package/dist/index.mjs +28602 -0
  8. package/package.json +53 -0
  9. package/scripts/build-deno.ts +134 -0
  10. package/src/audit/ai/analyzer.ts +347 -0
  11. package/src/audit/ai/index.ts +29 -0
  12. package/src/audit/ai/prompts/content-analysis.ts +271 -0
  13. package/src/audit/ai/types.ts +179 -0
  14. package/src/audit/checks/additional-checks.ts +439 -0
  15. package/src/audit/checks/ai-citation-worthiness.ts +399 -0
  16. package/src/audit/checks/ai-content-structure.ts +325 -0
  17. package/src/audit/checks/ai-readiness.ts +339 -0
  18. package/src/audit/checks/anchor-text.ts +179 -0
  19. package/src/audit/checks/answer-conciseness.ts +322 -0
  20. package/src/audit/checks/asset-minification.ts +270 -0
  21. package/src/audit/checks/bing-optimization.ts +206 -0
  22. package/src/audit/checks/brand-mention-optimization.ts +349 -0
  23. package/src/audit/checks/caching-headers.ts +305 -0
  24. package/src/audit/checks/canonical-advanced.ts +150 -0
  25. package/src/audit/checks/canonical-domain.ts +196 -0
  26. package/src/audit/checks/citation-quality.ts +358 -0
  27. package/src/audit/checks/client-rendering.ts +542 -0
  28. package/src/audit/checks/color-contrast.ts +342 -0
  29. package/src/audit/checks/content-freshness.ts +170 -0
  30. package/src/audit/checks/content-science.ts +589 -0
  31. package/src/audit/checks/conversion-elements.ts +526 -0
  32. package/src/audit/checks/crawlability.ts +220 -0
  33. package/src/audit/checks/directory-listing.ts +172 -0
  34. package/src/audit/checks/dom-analysis.ts +191 -0
  35. package/src/audit/checks/dom-size.ts +246 -0
  36. package/src/audit/checks/duplicate-content.ts +194 -0
  37. package/src/audit/checks/eeat-signals.ts +990 -0
  38. package/src/audit/checks/entity-seo.ts +396 -0
  39. package/src/audit/checks/featured-snippet.ts +473 -0
  40. package/src/audit/checks/freshness-signals.ts +443 -0
  41. package/src/audit/checks/funnel-intent.ts +463 -0
  42. package/src/audit/checks/hreflang.ts +174 -0
  43. package/src/audit/checks/html-compliance.ts +302 -0
  44. package/src/audit/checks/image-dimensions.ts +167 -0
  45. package/src/audit/checks/images.ts +160 -0
  46. package/src/audit/checks/indexnow.ts +275 -0
  47. package/src/audit/checks/interactive-tools.ts +475 -0
  48. package/src/audit/checks/internal-link-graph.ts +436 -0
  49. package/src/audit/checks/keyword-analysis.ts +239 -0
  50. package/src/audit/checks/keyword-cannibalization.ts +385 -0
  51. package/src/audit/checks/keyword-placement.ts +471 -0
  52. package/src/audit/checks/links.ts +203 -0
  53. package/src/audit/checks/llms-txt.ts +224 -0
  54. package/src/audit/checks/local-seo.ts +296 -0
  55. package/src/audit/checks/mobile.ts +167 -0
  56. package/src/audit/checks/modern-images.ts +226 -0
  57. package/src/audit/checks/navboost-signals.ts +395 -0
  58. package/src/audit/checks/on-page.ts +209 -0
  59. package/src/audit/checks/page-resources.ts +285 -0
  60. package/src/audit/checks/pagination.ts +180 -0
  61. package/src/audit/checks/performance.ts +153 -0
  62. package/src/audit/checks/platform-presence.ts +580 -0
  63. package/src/audit/checks/redirect-analysis.ts +153 -0
  64. package/src/audit/checks/redirect-chain.ts +389 -0
  65. package/src/audit/checks/resource-hints.ts +420 -0
  66. package/src/audit/checks/responsive-css.ts +247 -0
  67. package/src/audit/checks/responsive-images.ts +396 -0
  68. package/src/audit/checks/review-ecosystem.ts +415 -0
  69. package/src/audit/checks/robots-validation.ts +373 -0
  70. package/src/audit/checks/security-headers.ts +172 -0
  71. package/src/audit/checks/security.ts +144 -0
  72. package/src/audit/checks/serp-preview.ts +251 -0
  73. package/src/audit/checks/site-maturity.ts +444 -0
  74. package/src/audit/checks/social-meta.test.ts +275 -0
  75. package/src/audit/checks/social-meta.ts +134 -0
  76. package/src/audit/checks/soft-404.ts +151 -0
  77. package/src/audit/checks/structured-data.ts +238 -0
  78. package/src/audit/checks/tech-detection.ts +496 -0
  79. package/src/audit/checks/topical-clusters.ts +435 -0
  80. package/src/audit/checks/tracker-bloat.ts +462 -0
  81. package/src/audit/checks/tracking-verification.test.ts +371 -0
  82. package/src/audit/checks/tracking-verification.ts +636 -0
  83. package/src/audit/checks/url-safety.ts +682 -0
  84. package/src/audit/deno-entry.ts +66 -0
  85. package/src/audit/discovery/index.ts +15 -0
  86. package/src/audit/discovery/link-crawler.ts +232 -0
  87. package/src/audit/discovery/repo-routes.ts +347 -0
  88. package/src/audit/engine.ts +620 -0
  89. package/src/audit/fixes/index.ts +209 -0
  90. package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
  91. package/src/audit/fixes/social-meta-fixes.ts +463 -0
  92. package/src/audit/index.ts +74 -0
  93. package/src/audit/runner.test.ts +299 -0
  94. package/src/audit/runner.ts +130 -0
  95. package/src/audit/types.ts +1953 -0
  96. package/src/content/featured-snippet.ts +367 -0
  97. package/src/content/generator.test.ts +534 -0
  98. package/src/content/generator.ts +501 -0
  99. package/src/content/headline.ts +317 -0
  100. package/src/content/index.ts +62 -0
  101. package/src/content/intent.ts +258 -0
  102. package/src/content/keyword-density.ts +349 -0
  103. package/src/content/readability.ts +262 -0
  104. package/src/executor.ts +336 -0
  105. package/src/fixer.ts +416 -0
  106. package/src/frameworks/detector.test.ts +248 -0
  107. package/src/frameworks/detector.ts +371 -0
  108. package/src/frameworks/index.ts +68 -0
  109. package/src/frameworks/recipes/angular.yaml +171 -0
  110. package/src/frameworks/recipes/astro.yaml +206 -0
  111. package/src/frameworks/recipes/django.yaml +180 -0
  112. package/src/frameworks/recipes/laravel.yaml +137 -0
  113. package/src/frameworks/recipes/nextjs.yaml +268 -0
  114. package/src/frameworks/recipes/nuxt.yaml +175 -0
  115. package/src/frameworks/recipes/rails.yaml +188 -0
  116. package/src/frameworks/recipes/react.yaml +202 -0
  117. package/src/frameworks/recipes/sveltekit.yaml +154 -0
  118. package/src/frameworks/recipes/vue.yaml +137 -0
  119. package/src/frameworks/recipes/wordpress.yaml +209 -0
  120. package/src/frameworks/suggestion-engine.ts +320 -0
  121. package/src/geo/geo-content.test.ts +305 -0
  122. package/src/geo/geo-content.ts +266 -0
  123. package/src/geo/geo-history.test.ts +473 -0
  124. package/src/geo/geo-history.ts +433 -0
  125. package/src/geo/geo-tracker.test.ts +359 -0
  126. package/src/geo/geo-tracker.ts +411 -0
  127. package/src/geo/index.ts +10 -0
  128. package/src/git/commit-helper.test.ts +261 -0
  129. package/src/git/commit-helper.ts +329 -0
  130. package/src/git/index.ts +12 -0
  131. package/src/git/pr-helper.test.ts +284 -0
  132. package/src/git/pr-helper.ts +307 -0
  133. package/src/index.ts +66 -0
  134. package/src/keywords/ai-keyword-engine.ts +1062 -0
  135. package/src/keywords/ai-summarizer.ts +387 -0
  136. package/src/keywords/ci-mode.ts +555 -0
  137. package/src/keywords/engine.ts +359 -0
  138. package/src/keywords/index.ts +151 -0
  139. package/src/keywords/llm-judge.ts +357 -0
  140. package/src/keywords/nlp-analysis.ts +706 -0
  141. package/src/keywords/prioritizer.ts +295 -0
  142. package/src/keywords/site-crawler.ts +342 -0
  143. package/src/keywords/sources/autocomplete.ts +139 -0
  144. package/src/keywords/sources/competitive-search.ts +450 -0
  145. package/src/keywords/sources/competitor-analysis.ts +374 -0
  146. package/src/keywords/sources/dataforseo.ts +206 -0
  147. package/src/keywords/sources/free-sources.ts +294 -0
  148. package/src/keywords/sources/gsc.ts +123 -0
  149. package/src/keywords/topic-grouping.ts +327 -0
  150. package/src/keywords/types.ts +144 -0
  151. package/src/keywords/wizard.ts +457 -0
  152. package/src/loader.ts +40 -0
  153. package/src/reports/index.ts +7 -0
  154. package/src/reports/report-generator.test.ts +293 -0
  155. package/src/reports/report-generator.ts +713 -0
  156. package/src/scheduler/alerts.test.ts +458 -0
  157. package/src/scheduler/alerts.ts +328 -0
  158. package/src/scheduler/index.ts +8 -0
  159. package/src/scheduler/scheduled-audit.test.ts +377 -0
  160. package/src/scheduler/scheduled-audit.ts +149 -0
  161. package/src/test/integration-test.ts +325 -0
  162. package/src/tools/analyzer.ts +373 -0
  163. package/src/tools/crawl.ts +293 -0
  164. package/src/tools/files.ts +301 -0
  165. package/src/tools/h1-fixer.ts +249 -0
  166. package/src/tools/index.ts +67 -0
  167. package/src/tracking/github-action.ts +326 -0
  168. package/src/tracking/google-analytics.ts +265 -0
  169. package/src/tracking/index.ts +45 -0
  170. package/src/tracking/report-generator.ts +386 -0
  171. package/src/tracking/search-console.ts +335 -0
  172. package/src/types.ts +134 -0
  173. package/src/utils/http.ts +302 -0
  174. package/src/wasm-adapter.ts +297 -0
  175. package/src/wasm-entry.ts +14 -0
  176. package/tsconfig.json +17 -0
  177. package/tsup.wasm.config.ts +26 -0
  178. package/vitest.config.ts +15 -0
@@ -0,0 +1,682 @@
1
+ /**
2
+ * URL Safety Check (Local Hash Database)
3
+ *
4
+ * Implements a Google Safe Browsing-style architecture:
5
+ * 1. Maintains a local database of hash prefixes
6
+ * 2. All URL checks happen locally against the hash database
7
+ * 3. Database can be updated from open threat feeds (URLhaus, etc.)
8
+ * 4. No external API calls during audit - fully offline capable
9
+ *
10
+ * Hash Database Format:
11
+ * - URLs are canonicalized and hashed with FNV-1a
12
+ * - 8-character hex prefixes are stored for space efficiency
13
+ * - Prefixes are stored in a Set for O(1) lookup
14
+ *
15
+ * Data Sources for updates:
16
+ * - URLhaus (abuse.ch) - https://urlhaus.abuse.ch/downloads/csv/
17
+ * - PhishTank - https://data.phishtank.com/
18
+ * - OpenPhish - https://openphish.com/
19
+ */
20
+
21
+ import type { AuditIssue } from '../types.js';
22
+
23
+ // ============================================================================
24
+ // CROSS-PLATFORM HASHING (FNV-1a - Works in Node.js, Deno, and browsers)
25
+ // ============================================================================
26
+
27
+ /**
28
+ * FNV-1a hash function - fast, simple, cross-platform
29
+ * Returns a 32-bit hash as 8 hex characters
30
+ */
31
+ function fnv1aHash(str: string): string {
32
+ let hash = 0x811c9dc5; // FNV offset basis
33
+ const FNV_PRIME = 0x01000193;
34
+
35
+ for (let i = 0; i < str.length; i++) {
36
+ hash ^= str.charCodeAt(i);
37
+ hash = Math.imul(hash, FNV_PRIME);
38
+ }
39
+
40
+ // Convert to unsigned 32-bit and return as hex
41
+ return (hash >>> 0).toString(16).padStart(8, '0');
42
+ }
43
+
44
+ // ============================================================================
45
+ // TYPES
46
+ // ============================================================================
47
+
48
+ export interface UrlSafetyData {
49
+ checkedUrls: number;
50
+ matchedUrls: UrlMatch[];
51
+ databaseInfo: {
52
+ prefixCount: number;
53
+ lastUpdated?: string;
54
+ sources: string[];
55
+ };
56
+ patternMatches: PatternMatch[];
57
+ }
58
+
59
+ interface UrlMatch {
60
+ url: string;
61
+ hashPrefix: string;
62
+ matchType: 'exact' | 'domain' | 'pattern';
63
+ threatType?: string;
64
+ }
65
+
66
+ interface PatternMatch {
67
+ url: string;
68
+ reasons: string[];
69
+ riskLevel: 'low' | 'medium' | 'high';
70
+ }
71
+
72
+ // ============================================================================
73
+ // HASH DATABASE
74
+ // ============================================================================
75
+
76
+ /**
77
+ * Local hash prefix database
78
+ * Format: Set of 8-character hex strings (4-byte SHA256 prefixes)
79
+ *
80
+ * This is a bundled snapshot. In production, this would be:
81
+ * 1. Loaded from a local file that gets periodic updates
82
+ * 2. Updated via a background process from URLhaus/PhishTank
83
+ *
84
+ * The prefixes below are derived from known malicious patterns
85
+ * and serve as examples of the format.
86
+ */
87
+ class ThreatDatabase {
88
+ private hashPrefixes: Set<string> = new Set();
89
+ private domainPrefixes: Set<string> = new Set();
90
+ private lastUpdated: string = new Date().toISOString();
91
+ private sources: string[] = ['builtin-patterns'];
92
+
93
+ constructor() {
94
+ this.initializeBuiltinData();
95
+ }
96
+
97
+ /**
98
+ * Initialize with built-in threat patterns
99
+ * These are hashes of known malicious URL patterns
100
+ */
101
+ private initializeBuiltinData(): void {
102
+ // Add known malicious domain patterns (hashed)
103
+ // These would normally come from URLhaus CSV dump
104
+ const knownBadPatterns: string[] = [
105
+ // Example pattern hashes - in production, load from file or Supabase
106
+ // Format: FNV-1a hash prefix of canonical URL
107
+ ];
108
+
109
+ for (const prefix of knownBadPatterns) {
110
+ this.hashPrefixes.add(prefix);
111
+ }
112
+ }
113
+
114
+ /**
115
+ * Compute canonical form of URL (similar to Google Safe Browsing)
116
+ * - Lowercase hostname
117
+ * - Remove default ports
118
+ * - Normalize path
119
+ * - Remove fragments
120
+ */
121
+ canonicalizeUrl(urlString: string): string | null {
122
+ try {
123
+ const url = new URL(urlString);
124
+
125
+ // Lowercase hostname
126
+ let canonical = url.protocol + '//' + url.hostname.toLowerCase();
127
+
128
+ // Remove default ports
129
+ if (url.port && !((url.protocol === 'http:' && url.port === '80') ||
130
+ (url.protocol === 'https:' && url.port === '443'))) {
131
+ canonical += ':' + url.port;
132
+ }
133
+
134
+ // Normalize path (remove trailing slash for root, keep for others)
135
+ let path = url.pathname;
136
+ if (path === '/') {
137
+ canonical += '/';
138
+ } else {
139
+ // Remove duplicate slashes
140
+ path = path.replace(/\/+/g, '/');
141
+ canonical += path;
142
+ }
143
+
144
+ // Include query string but not fragment
145
+ if (url.search) {
146
+ canonical += url.search;
147
+ }
148
+
149
+ return canonical;
150
+ } catch {
151
+ return null;
152
+ }
153
+ }
154
+
155
+ /**
156
+ * Compute hash of URL and return prefix
157
+ * Uses FNV-1a for cross-platform compatibility (Node.js, Deno, browser)
158
+ */
159
+ computeHashPrefix(url: string): string {
160
+ return fnv1aHash(url);
161
+ }
162
+
163
+ /**
164
+ * Compute hash prefix for domain only
165
+ */
166
+ computeDomainHashPrefix(hostname: string): string {
167
+ return fnv1aHash(hostname.toLowerCase());
168
+ }
169
+
170
+ /**
171
+ * Check if URL matches any hash in the database
172
+ */
173
+ checkUrl(urlString: string): { matched: boolean; prefix?: string; matchType?: 'exact' | 'domain' } {
174
+ const canonical = this.canonicalizeUrl(urlString);
175
+ if (!canonical) {
176
+ return { matched: false };
177
+ }
178
+
179
+ // Check full URL hash
180
+ const urlPrefix = this.computeHashPrefix(canonical);
181
+ if (this.hashPrefixes.has(urlPrefix)) {
182
+ return { matched: true, prefix: urlPrefix, matchType: 'exact' };
183
+ }
184
+
185
+ // Check domain hash
186
+ try {
187
+ const url = new URL(urlString);
188
+ const domainPrefix = this.computeDomainHashPrefix(url.hostname);
189
+ if (this.domainPrefixes.has(domainPrefix)) {
190
+ return { matched: true, prefix: domainPrefix, matchType: 'domain' };
191
+ }
192
+ } catch {
193
+ // Invalid URL
194
+ }
195
+
196
+ return { matched: false };
197
+ }
198
+
199
+ /**
200
+ * Add hashes from URLhaus CSV data
201
+ * Format: id,dateadded,url,url_status,last_online,threat,tags,urlhaus_link,reporter
202
+ */
203
+ loadFromUrlhausCsv(csvData: string): number {
204
+ const lines = csvData.split('\n');
205
+ let added = 0;
206
+
207
+ for (const line of lines) {
208
+ // Skip comments and header
209
+ if (line.startsWith('#') || line.startsWith('id,')) continue;
210
+
211
+ const parts = line.split(',');
212
+ if (parts.length >= 3) {
213
+ const url = parts[2].replace(/"/g, '');
214
+ const canonical = this.canonicalizeUrl(url);
215
+ if (canonical) {
216
+ const prefix = this.computeHashPrefix(canonical);
217
+ this.hashPrefixes.add(prefix);
218
+ added++;
219
+ }
220
+ }
221
+ }
222
+
223
+ this.lastUpdated = new Date().toISOString();
224
+ if (!this.sources.includes('urlhaus')) {
225
+ this.sources.push('urlhaus');
226
+ }
227
+
228
+ return added;
229
+ }
230
+
231
+ /**
232
+ * Add a list of URLs to the database
233
+ */
234
+ addUrls(urls: string[]): number {
235
+ let added = 0;
236
+ for (const url of urls) {
237
+ const canonical = this.canonicalizeUrl(url);
238
+ if (canonical) {
239
+ const prefix = this.computeHashPrefix(canonical);
240
+ if (!this.hashPrefixes.has(prefix)) {
241
+ this.hashPrefixes.add(prefix);
242
+ added++;
243
+ }
244
+ }
245
+ }
246
+ return added;
247
+ }
248
+
249
+ /**
250
+ * Add domains to the blocklist
251
+ */
252
+ addDomains(domains: string[]): number {
253
+ let added = 0;
254
+ for (const domain of domains) {
255
+ const prefix = this.computeDomainHashPrefix(domain);
256
+ if (!this.domainPrefixes.has(prefix)) {
257
+ this.domainPrefixes.add(prefix);
258
+ added++;
259
+ }
260
+ }
261
+ return added;
262
+ }
263
+
264
+ /**
265
+ * Bulk load hash prefixes directly (for Supabase integration)
266
+ * This is used when loading from the threat_hashes table
267
+ */
268
+ loadHashPrefixes(hashes: Array<{ hash_prefix: string; hash_type: 'url' | 'domain' }>): number {
269
+ let added = 0;
270
+ for (const hash of hashes) {
271
+ if (hash.hash_type === 'url') {
272
+ if (!this.hashPrefixes.has(hash.hash_prefix)) {
273
+ this.hashPrefixes.add(hash.hash_prefix);
274
+ added++;
275
+ }
276
+ } else if (hash.hash_type === 'domain') {
277
+ if (!this.domainPrefixes.has(hash.hash_prefix)) {
278
+ this.domainPrefixes.add(hash.hash_prefix);
279
+ added++;
280
+ }
281
+ }
282
+ }
283
+
284
+ this.lastUpdated = new Date().toISOString();
285
+ if (!this.sources.includes('supabase')) {
286
+ this.sources.push('supabase');
287
+ }
288
+
289
+ return added;
290
+ }
291
+
292
+ /**
293
+ * Clear all hashes (useful before reloading)
294
+ */
295
+ clear(): void {
296
+ this.hashPrefixes.clear();
297
+ this.domainPrefixes.clear();
298
+ this.sources = [];
299
+ }
300
+
301
+ /**
302
+ * Check if database has been populated
303
+ */
304
+ isPopulated(): boolean {
305
+ return this.hashPrefixes.size > 0 || this.domainPrefixes.size > 0;
306
+ }
307
+
308
+ /**
309
+ * Get database statistics
310
+ */
311
+ getStats(): { prefixCount: number; lastUpdated: string; sources: string[] } {
312
+ return {
313
+ prefixCount: this.hashPrefixes.size + this.domainPrefixes.size,
314
+ lastUpdated: this.lastUpdated,
315
+ sources: this.sources,
316
+ };
317
+ }
318
+ }
319
+
320
+ // Global database instance
321
+ const threatDb = new ThreatDatabase();
322
+
323
+ // ============================================================================
324
+ // PATTERN-BASED DETECTION (Local, no external calls)
325
+ // ============================================================================
326
+
327
+ // Popular domains that are commonly typosquatted
328
+ const POPULAR_DOMAINS = [
329
+ 'google', 'facebook', 'amazon', 'apple', 'microsoft', 'paypal',
330
+ 'netflix', 'instagram', 'twitter', 'linkedin', 'youtube', 'github',
331
+ 'dropbox', 'adobe', 'salesforce', 'stripe', 'shopify', 'wordpress',
332
+ 'cloudflare', 'aws', 'azure', 'slack', 'zoom', 'docusign',
333
+ ];
334
+
335
+ // Suspicious TLDs often used in phishing/malware
336
+ const SUSPICIOUS_TLDS = [
337
+ '.tk', '.ml', '.ga', '.cf', '.gq', // Free TLDs abused for phishing
338
+ '.xyz', '.top', '.work', '.click', '.link', '.download',
339
+ '.zip', '.mov', // New TLDs that can be confusing
340
+ ];
341
+
342
+ // File extensions that are suspicious in URLs
343
+ const SUSPICIOUS_EXTENSIONS = [
344
+ '.exe', '.msi', '.bat', '.cmd', '.ps1', '.vbs',
345
+ '.jar', '.scr', '.pif', '.application',
346
+ '.hta', '.cpl', '.msc', '.wsf',
347
+ ];
348
+
349
+ // Homograph characters (look-alike Unicode)
350
+ const HOMOGRAPH_CHARS: Record<string, string[]> = {
351
+ 'a': ['а', 'ɑ', 'α'], // Cyrillic а, Latin alpha
352
+ 'c': ['с', 'ϲ'], // Cyrillic с
353
+ 'e': ['е', 'ё'], // Cyrillic е
354
+ 'o': ['о', 'ο'], // Cyrillic о, Greek omicron
355
+ 'p': ['р'], // Cyrillic р
356
+ 'x': ['х'], // Cyrillic х
357
+ 'y': ['у'], // Cyrillic у
358
+ };
359
+
360
+ /**
361
+ * Check if a string contains homograph characters
362
+ */
363
+ function containsHomographs(str: string): boolean {
364
+ for (const [_, lookalikes] of Object.entries(HOMOGRAPH_CHARS)) {
365
+ for (const char of lookalikes) {
366
+ if (str.includes(char)) return true;
367
+ }
368
+ }
369
+ return false;
370
+ }
371
+
372
+ /**
373
+ * Calculate Levenshtein distance
374
+ */
375
+ function levenshteinDistance(a: string, b: string): number {
376
+ if (a.length === 0) return b.length;
377
+ if (b.length === 0) return a.length;
378
+
379
+ const matrix: number[][] = [];
380
+ for (let i = 0; i <= b.length; i++) matrix[i] = [i];
381
+ for (let j = 0; j <= a.length; j++) matrix[0][j] = j;
382
+
383
+ for (let i = 1; i <= b.length; i++) {
384
+ for (let j = 1; j <= a.length; j++) {
385
+ if (b.charAt(i - 1) === a.charAt(j - 1)) {
386
+ matrix[i][j] = matrix[i - 1][j - 1];
387
+ } else {
388
+ matrix[i][j] = Math.min(
389
+ matrix[i - 1][j - 1] + 1,
390
+ matrix[i][j - 1] + 1,
391
+ matrix[i - 1][j] + 1
392
+ );
393
+ }
394
+ }
395
+ }
396
+ return matrix[b.length][a.length];
397
+ }
398
+
399
+ /**
400
+ * Check for typosquatting of popular domains
401
+ */
402
+ function checkTyposquatting(hostname: string): string | null {
403
+ const parts = hostname.toLowerCase().split('.');
404
+ const mainDomain = parts.length >= 2 ? parts[parts.length - 2] : parts[0];
405
+
406
+ for (const popular of POPULAR_DOMAINS) {
407
+ if (mainDomain === popular) continue;
408
+
409
+ // Levenshtein distance check
410
+ const distance = levenshteinDistance(mainDomain, popular);
411
+ if (distance > 0 && distance <= 2) {
412
+ return `Possible typosquat of "${popular}"`;
413
+ }
414
+
415
+ // Suspicious variations
416
+ if (mainDomain.includes(popular) && mainDomain !== popular) {
417
+ if (mainDomain.includes('-') || mainDomain.includes('secure') ||
418
+ mainDomain.includes('login') || mainDomain.includes('account')) {
419
+ return `Suspicious variation of "${popular}"`;
420
+ }
421
+ }
422
+ }
423
+ return null;
424
+ }
425
+
426
+ /**
427
+ * Analyze URL for suspicious patterns (local analysis only)
428
+ */
429
+ function analyzeUrlPatterns(urlString: string): PatternMatch | null {
430
+ const reasons: string[] = [];
431
+
432
+ try {
433
+ const url = new URL(urlString);
434
+ const hostname = url.hostname.toLowerCase();
435
+ const pathname = url.pathname.toLowerCase();
436
+
437
+ // IP address instead of domain
438
+ if (/^(\d{1,3}\.){3}\d{1,3}$/.test(hostname)) {
439
+ reasons.push('Uses IP address instead of domain');
440
+ }
441
+
442
+ // Suspicious TLDs
443
+ for (const tld of SUSPICIOUS_TLDS) {
444
+ if (hostname.endsWith(tld)) {
445
+ reasons.push(`Suspicious TLD: ${tld}`);
446
+ break;
447
+ }
448
+ }
449
+
450
+ // Excessive subdomains
451
+ if (hostname.split('.').length > 4) {
452
+ reasons.push('Excessive subdomains');
453
+ }
454
+
455
+ // Typosquatting
456
+ const typosquat = checkTyposquatting(hostname);
457
+ if (typosquat) reasons.push(typosquat);
458
+
459
+ // Homograph attack
460
+ if (containsHomographs(hostname)) {
461
+ reasons.push('Contains look-alike Unicode characters');
462
+ }
463
+
464
+ // Suspicious file extensions
465
+ for (const ext of SUSPICIOUS_EXTENSIONS) {
466
+ if (pathname.endsWith(ext)) {
467
+ reasons.push(`Dangerous file type: ${ext}`);
468
+ break;
469
+ }
470
+ }
471
+
472
+ // @ symbol in URL
473
+ if (urlString.includes('@')) {
474
+ reasons.push('Contains @ symbol (may obscure destination)');
475
+ }
476
+
477
+ // URL shorteners
478
+ const shorteners = ['bit.ly', 'tinyurl.com', 't.co', 'goo.gl', 'ow.ly', 'is.gd'];
479
+ if (shorteners.some(s => hostname === s || hostname.endsWith('.' + s))) {
480
+ reasons.push('URL shortener (destination hidden)');
481
+ }
482
+
483
+ } catch {
484
+ reasons.push('Malformed URL');
485
+ }
486
+
487
+ if (reasons.length === 0) return null;
488
+
489
+ // Determine risk level
490
+ let riskLevel: 'low' | 'medium' | 'high' = 'medium';
491
+ const highRisk = ['homograph', 'IP address', 'typosquat', 'Dangerous file'];
492
+ if (reasons.some(r => highRisk.some(h => r.toLowerCase().includes(h.toLowerCase())))) {
493
+ riskLevel = 'high';
494
+ }
495
+ if (reasons.length >= 3) riskLevel = 'high';
496
+
497
+ return { url: urlString, reasons, riskLevel };
498
+ }
499
+
500
+ // ============================================================================
501
+ // MAIN EXPORT
502
+ // ============================================================================
503
+
504
+ /**
505
+ * Analyze URL safety using local hash database and pattern matching
506
+ *
507
+ * This function performs two types of checks:
508
+ * 1. Hash-based lookup against the local threat database
509
+ * 2. Pattern-based detection for suspicious URL characteristics
510
+ *
511
+ * No external API calls are made - all checks are local.
512
+ */
513
+ export function analyzeUrlSafety(
514
+ url: string,
515
+ externalLinks: string[] = []
516
+ ): { issues: AuditIssue[]; data: UrlSafetyData } {
517
+ const issues: AuditIssue[] = [];
518
+ const matchedUrls: UrlMatch[] = [];
519
+ const patternMatches: PatternMatch[] = [];
520
+ const allUrls = [url, ...externalLinks];
521
+
522
+ // Check each URL against the hash database
523
+ for (const checkUrl of allUrls) {
524
+ // Hash-based check
525
+ const hashResult = threatDb.checkUrl(checkUrl);
526
+ if (hashResult.matched) {
527
+ matchedUrls.push({
528
+ url: checkUrl,
529
+ hashPrefix: hashResult.prefix!,
530
+ matchType: hashResult.matchType!,
531
+ });
532
+ }
533
+
534
+ // Pattern-based check
535
+ const patternResult = analyzeUrlPatterns(checkUrl);
536
+ if (patternResult) {
537
+ patternMatches.push(patternResult);
538
+ }
539
+ }
540
+
541
+ // Generate issues for hash matches (known threats)
542
+ if (matchedUrls.length > 0) {
543
+ const mainSiteMatched = matchedUrls.some(m => m.url === url);
544
+
545
+ if (mainSiteMatched) {
546
+ issues.push({
547
+ code: 'URL_SAFETY_KNOWN_THREAT',
548
+ severity: 'error',
549
+ category: 'security',
550
+ title: 'Website URL matches known threat database',
551
+ description: 'Your website URL matches entries in the threat database. This indicates your site may have been compromised or flagged.',
552
+ impact: 'Browsers and security tools will block access to your site. Search engines will remove you from results.',
553
+ howToFix: 'Scan your site for malware, remove any malicious content, and request removal from threat databases.',
554
+ affectedUrls: [url],
555
+ details: {
556
+ matchedHash: matchedUrls.find(m => m.url === url)?.hashPrefix,
557
+ },
558
+ });
559
+ }
560
+
561
+ const externalMatches = matchedUrls.filter(m => m.url !== url);
562
+ if (externalMatches.length > 0) {
563
+ issues.push({
564
+ code: 'URL_SAFETY_EXTERNAL_THREAT',
565
+ severity: 'error',
566
+ category: 'security',
567
+ title: 'External links to known malicious URLs',
568
+ description: `${externalMatches.length} external link(s) point to URLs in the threat database.`,
569
+ impact: 'Linking to malicious sites harms visitors and damages your reputation and rankings.',
570
+ howToFix: 'Remove all links to flagged URLs immediately.',
571
+ affectedUrls: externalMatches.map(m => m.url),
572
+ });
573
+ }
574
+ }
575
+
576
+ // Generate issues for pattern matches (suspicious characteristics)
577
+ const highRiskPatterns = patternMatches.filter(p => p.riskLevel === 'high');
578
+
579
+ if (highRiskPatterns.some(p => p.url === url)) {
580
+ const mainPattern = highRiskPatterns.find(p => p.url === url)!;
581
+ issues.push({
582
+ code: 'URL_SAFETY_SUSPICIOUS_DOMAIN',
583
+ severity: 'warning',
584
+ category: 'security',
585
+ title: 'Website URL has suspicious characteristics',
586
+ description: `Your URL shows patterns associated with malicious sites: ${mainPattern.reasons.join('; ')}`,
587
+ impact: 'Users and security tools may distrust your site.',
588
+ howToFix: 'Use a trustworthy domain structure. Avoid patterns that mimic other brands.',
589
+ affectedUrls: [url],
590
+ details: { reasons: mainPattern.reasons },
591
+ });
592
+ }
593
+
594
+ const suspiciousExternal = highRiskPatterns.filter(p => p.url !== url);
595
+ if (suspiciousExternal.length > 0) {
596
+ issues.push({
597
+ code: 'URL_SAFETY_SUSPICIOUS_EXTERNAL',
598
+ severity: 'warning',
599
+ category: 'security',
600
+ title: 'External links with suspicious characteristics',
601
+ description: `${suspiciousExternal.length} external link(s) show suspicious patterns.`,
602
+ impact: 'Linking to suspicious sites can harm visitors and rankings.',
603
+ howToFix: 'Review and remove or replace suspicious external links.',
604
+ affectedUrls: suspiciousExternal.map(p => p.url),
605
+ details: {
606
+ suspiciousLinks: suspiciousExternal.map(p => ({
607
+ url: p.url,
608
+ reasons: p.reasons,
609
+ })),
610
+ },
611
+ });
612
+ }
613
+
614
+ // URL shortener notice
615
+ const shortenerMatches = patternMatches.filter(p =>
616
+ p.reasons.some(r => r.includes('shortener'))
617
+ );
618
+ if (shortenerMatches.length > 0) {
619
+ issues.push({
620
+ code: 'URL_SAFETY_SHORTENERS',
621
+ severity: 'notice',
622
+ category: 'security',
623
+ title: 'External links use URL shorteners',
624
+ description: `${shortenerMatches.length} link(s) use URL shorteners, hiding destinations.`,
625
+ impact: 'URL shorteners reduce trust and SEO link value.',
626
+ howToFix: 'Replace shortened URLs with direct links.',
627
+ affectedUrls: shortenerMatches.map(p => p.url),
628
+ });
629
+ }
630
+
631
+ return {
632
+ issues,
633
+ data: {
634
+ checkedUrls: allUrls.length,
635
+ matchedUrls,
636
+ databaseInfo: threatDb.getStats(),
637
+ patternMatches,
638
+ },
639
+ };
640
+ }
641
+
642
+ /**
643
+ * Export the threat database for external updates
644
+ */
645
+ export const urlSafetyDatabase = {
646
+ /**
647
+ * Load URLs from URLhaus CSV format
648
+ */
649
+ loadFromUrlhausCsv: (csv: string) => threatDb.loadFromUrlhausCsv(csv),
650
+
651
+ /**
652
+ * Add URLs to the blocklist
653
+ */
654
+ addUrls: (urls: string[]) => threatDb.addUrls(urls),
655
+
656
+ /**
657
+ * Add domains to the blocklist
658
+ */
659
+ addDomains: (domains: string[]) => threatDb.addDomains(domains),
660
+
661
+ /**
662
+ * Bulk load hash prefixes (for Supabase integration)
663
+ * Call this with data from: SELECT hash_prefix, hash_type FROM threat_hashes WHERE is_active = TRUE
664
+ */
665
+ loadHashPrefixes: (hashes: Array<{ hash_prefix: string; hash_type: 'url' | 'domain' }>) =>
666
+ threatDb.loadHashPrefixes(hashes),
667
+
668
+ /**
669
+ * Clear all hashes (useful before reloading from fresh data)
670
+ */
671
+ clear: () => threatDb.clear(),
672
+
673
+ /**
674
+ * Check if database has been populated with threat data
675
+ */
676
+ isPopulated: () => threatDb.isPopulated(),
677
+
678
+ /**
679
+ * Get database statistics
680
+ */
681
+ getStats: () => threatDb.getStats(),
682
+ };