@rankcli/agent-runtime 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +242 -0
- package/dist/analyzer-2CSWIQGD.mjs +6 -0
- package/dist/chunk-YNZYHEYM.mjs +774 -0
- package/dist/index.d.mts +4012 -0
- package/dist/index.d.ts +4012 -0
- package/dist/index.js +29672 -0
- package/dist/index.mjs +28602 -0
- package/package.json +53 -0
- package/scripts/build-deno.ts +134 -0
- package/src/audit/ai/analyzer.ts +347 -0
- package/src/audit/ai/index.ts +29 -0
- package/src/audit/ai/prompts/content-analysis.ts +271 -0
- package/src/audit/ai/types.ts +179 -0
- package/src/audit/checks/additional-checks.ts +439 -0
- package/src/audit/checks/ai-citation-worthiness.ts +399 -0
- package/src/audit/checks/ai-content-structure.ts +325 -0
- package/src/audit/checks/ai-readiness.ts +339 -0
- package/src/audit/checks/anchor-text.ts +179 -0
- package/src/audit/checks/answer-conciseness.ts +322 -0
- package/src/audit/checks/asset-minification.ts +270 -0
- package/src/audit/checks/bing-optimization.ts +206 -0
- package/src/audit/checks/brand-mention-optimization.ts +349 -0
- package/src/audit/checks/caching-headers.ts +305 -0
- package/src/audit/checks/canonical-advanced.ts +150 -0
- package/src/audit/checks/canonical-domain.ts +196 -0
- package/src/audit/checks/citation-quality.ts +358 -0
- package/src/audit/checks/client-rendering.ts +542 -0
- package/src/audit/checks/color-contrast.ts +342 -0
- package/src/audit/checks/content-freshness.ts +170 -0
- package/src/audit/checks/content-science.ts +589 -0
- package/src/audit/checks/conversion-elements.ts +526 -0
- package/src/audit/checks/crawlability.ts +220 -0
- package/src/audit/checks/directory-listing.ts +172 -0
- package/src/audit/checks/dom-analysis.ts +191 -0
- package/src/audit/checks/dom-size.ts +246 -0
- package/src/audit/checks/duplicate-content.ts +194 -0
- package/src/audit/checks/eeat-signals.ts +990 -0
- package/src/audit/checks/entity-seo.ts +396 -0
- package/src/audit/checks/featured-snippet.ts +473 -0
- package/src/audit/checks/freshness-signals.ts +443 -0
- package/src/audit/checks/funnel-intent.ts +463 -0
- package/src/audit/checks/hreflang.ts +174 -0
- package/src/audit/checks/html-compliance.ts +302 -0
- package/src/audit/checks/image-dimensions.ts +167 -0
- package/src/audit/checks/images.ts +160 -0
- package/src/audit/checks/indexnow.ts +275 -0
- package/src/audit/checks/interactive-tools.ts +475 -0
- package/src/audit/checks/internal-link-graph.ts +436 -0
- package/src/audit/checks/keyword-analysis.ts +239 -0
- package/src/audit/checks/keyword-cannibalization.ts +385 -0
- package/src/audit/checks/keyword-placement.ts +471 -0
- package/src/audit/checks/links.ts +203 -0
- package/src/audit/checks/llms-txt.ts +224 -0
- package/src/audit/checks/local-seo.ts +296 -0
- package/src/audit/checks/mobile.ts +167 -0
- package/src/audit/checks/modern-images.ts +226 -0
- package/src/audit/checks/navboost-signals.ts +395 -0
- package/src/audit/checks/on-page.ts +209 -0
- package/src/audit/checks/page-resources.ts +285 -0
- package/src/audit/checks/pagination.ts +180 -0
- package/src/audit/checks/performance.ts +153 -0
- package/src/audit/checks/platform-presence.ts +580 -0
- package/src/audit/checks/redirect-analysis.ts +153 -0
- package/src/audit/checks/redirect-chain.ts +389 -0
- package/src/audit/checks/resource-hints.ts +420 -0
- package/src/audit/checks/responsive-css.ts +247 -0
- package/src/audit/checks/responsive-images.ts +396 -0
- package/src/audit/checks/review-ecosystem.ts +415 -0
- package/src/audit/checks/robots-validation.ts +373 -0
- package/src/audit/checks/security-headers.ts +172 -0
- package/src/audit/checks/security.ts +144 -0
- package/src/audit/checks/serp-preview.ts +251 -0
- package/src/audit/checks/site-maturity.ts +444 -0
- package/src/audit/checks/social-meta.test.ts +275 -0
- package/src/audit/checks/social-meta.ts +134 -0
- package/src/audit/checks/soft-404.ts +151 -0
- package/src/audit/checks/structured-data.ts +238 -0
- package/src/audit/checks/tech-detection.ts +496 -0
- package/src/audit/checks/topical-clusters.ts +435 -0
- package/src/audit/checks/tracker-bloat.ts +462 -0
- package/src/audit/checks/tracking-verification.test.ts +371 -0
- package/src/audit/checks/tracking-verification.ts +636 -0
- package/src/audit/checks/url-safety.ts +682 -0
- package/src/audit/deno-entry.ts +66 -0
- package/src/audit/discovery/index.ts +15 -0
- package/src/audit/discovery/link-crawler.ts +232 -0
- package/src/audit/discovery/repo-routes.ts +347 -0
- package/src/audit/engine.ts +620 -0
- package/src/audit/fixes/index.ts +209 -0
- package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
- package/src/audit/fixes/social-meta-fixes.ts +463 -0
- package/src/audit/index.ts +74 -0
- package/src/audit/runner.test.ts +299 -0
- package/src/audit/runner.ts +130 -0
- package/src/audit/types.ts +1953 -0
- package/src/content/featured-snippet.ts +367 -0
- package/src/content/generator.test.ts +534 -0
- package/src/content/generator.ts +501 -0
- package/src/content/headline.ts +317 -0
- package/src/content/index.ts +62 -0
- package/src/content/intent.ts +258 -0
- package/src/content/keyword-density.ts +349 -0
- package/src/content/readability.ts +262 -0
- package/src/executor.ts +336 -0
- package/src/fixer.ts +416 -0
- package/src/frameworks/detector.test.ts +248 -0
- package/src/frameworks/detector.ts +371 -0
- package/src/frameworks/index.ts +68 -0
- package/src/frameworks/recipes/angular.yaml +171 -0
- package/src/frameworks/recipes/astro.yaml +206 -0
- package/src/frameworks/recipes/django.yaml +180 -0
- package/src/frameworks/recipes/laravel.yaml +137 -0
- package/src/frameworks/recipes/nextjs.yaml +268 -0
- package/src/frameworks/recipes/nuxt.yaml +175 -0
- package/src/frameworks/recipes/rails.yaml +188 -0
- package/src/frameworks/recipes/react.yaml +202 -0
- package/src/frameworks/recipes/sveltekit.yaml +154 -0
- package/src/frameworks/recipes/vue.yaml +137 -0
- package/src/frameworks/recipes/wordpress.yaml +209 -0
- package/src/frameworks/suggestion-engine.ts +320 -0
- package/src/geo/geo-content.test.ts +305 -0
- package/src/geo/geo-content.ts +266 -0
- package/src/geo/geo-history.test.ts +473 -0
- package/src/geo/geo-history.ts +433 -0
- package/src/geo/geo-tracker.test.ts +359 -0
- package/src/geo/geo-tracker.ts +411 -0
- package/src/geo/index.ts +10 -0
- package/src/git/commit-helper.test.ts +261 -0
- package/src/git/commit-helper.ts +329 -0
- package/src/git/index.ts +12 -0
- package/src/git/pr-helper.test.ts +284 -0
- package/src/git/pr-helper.ts +307 -0
- package/src/index.ts +66 -0
- package/src/keywords/ai-keyword-engine.ts +1062 -0
- package/src/keywords/ai-summarizer.ts +387 -0
- package/src/keywords/ci-mode.ts +555 -0
- package/src/keywords/engine.ts +359 -0
- package/src/keywords/index.ts +151 -0
- package/src/keywords/llm-judge.ts +357 -0
- package/src/keywords/nlp-analysis.ts +706 -0
- package/src/keywords/prioritizer.ts +295 -0
- package/src/keywords/site-crawler.ts +342 -0
- package/src/keywords/sources/autocomplete.ts +139 -0
- package/src/keywords/sources/competitive-search.ts +450 -0
- package/src/keywords/sources/competitor-analysis.ts +374 -0
- package/src/keywords/sources/dataforseo.ts +206 -0
- package/src/keywords/sources/free-sources.ts +294 -0
- package/src/keywords/sources/gsc.ts +123 -0
- package/src/keywords/topic-grouping.ts +327 -0
- package/src/keywords/types.ts +144 -0
- package/src/keywords/wizard.ts +457 -0
- package/src/loader.ts +40 -0
- package/src/reports/index.ts +7 -0
- package/src/reports/report-generator.test.ts +293 -0
- package/src/reports/report-generator.ts +713 -0
- package/src/scheduler/alerts.test.ts +458 -0
- package/src/scheduler/alerts.ts +328 -0
- package/src/scheduler/index.ts +8 -0
- package/src/scheduler/scheduled-audit.test.ts +377 -0
- package/src/scheduler/scheduled-audit.ts +149 -0
- package/src/test/integration-test.ts +325 -0
- package/src/tools/analyzer.ts +373 -0
- package/src/tools/crawl.ts +293 -0
- package/src/tools/files.ts +301 -0
- package/src/tools/h1-fixer.ts +249 -0
- package/src/tools/index.ts +67 -0
- package/src/tracking/github-action.ts +326 -0
- package/src/tracking/google-analytics.ts +265 -0
- package/src/tracking/index.ts +45 -0
- package/src/tracking/report-generator.ts +386 -0
- package/src/tracking/search-console.ts +335 -0
- package/src/types.ts +134 -0
- package/src/utils/http.ts +302 -0
- package/src/wasm-adapter.ts +297 -0
- package/src/wasm-entry.ts +14 -0
- package/tsconfig.json +17 -0
- package/tsup.wasm.config.ts +26 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,682 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL Safety Check (Local Hash Database)
|
|
3
|
+
*
|
|
4
|
+
* Implements a Google Safe Browsing-style architecture:
|
|
5
|
+
* 1. Maintains a local database of hash prefixes
|
|
6
|
+
* 2. All URL checks happen locally against the hash database
|
|
7
|
+
* 3. Database can be updated from open threat feeds (URLhaus, etc.)
|
|
8
|
+
* 4. No external API calls during audit - fully offline capable
|
|
9
|
+
*
|
|
10
|
+
* Hash Database Format:
|
|
11
|
+
* - URLs are canonicalized and hashed with FNV-1a
|
|
12
|
+
* - 8-character hex prefixes are stored for space efficiency
|
|
13
|
+
* - Prefixes are stored in a Set for O(1) lookup
|
|
14
|
+
*
|
|
15
|
+
* Data Sources for updates:
|
|
16
|
+
* - URLhaus (abuse.ch) - https://urlhaus.abuse.ch/downloads/csv/
|
|
17
|
+
* - PhishTank - https://data.phishtank.com/
|
|
18
|
+
* - OpenPhish - https://openphish.com/
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import type { AuditIssue } from '../types.js';
|
|
22
|
+
|
|
23
|
+
// ============================================================================
|
|
24
|
+
// CROSS-PLATFORM HASHING (FNV-1a - Works in Node.js, Deno, and browsers)
|
|
25
|
+
// ============================================================================
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* FNV-1a hash function - fast, simple, cross-platform
|
|
29
|
+
* Returns a 32-bit hash as 8 hex characters
|
|
30
|
+
*/
|
|
31
|
+
function fnv1aHash(str: string): string {
|
|
32
|
+
let hash = 0x811c9dc5; // FNV offset basis
|
|
33
|
+
const FNV_PRIME = 0x01000193;
|
|
34
|
+
|
|
35
|
+
for (let i = 0; i < str.length; i++) {
|
|
36
|
+
hash ^= str.charCodeAt(i);
|
|
37
|
+
hash = Math.imul(hash, FNV_PRIME);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Convert to unsigned 32-bit and return as hex
|
|
41
|
+
return (hash >>> 0).toString(16).padStart(8, '0');
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ============================================================================
|
|
45
|
+
// TYPES
|
|
46
|
+
// ============================================================================
|
|
47
|
+
|
|
48
|
+
export interface UrlSafetyData {
|
|
49
|
+
checkedUrls: number;
|
|
50
|
+
matchedUrls: UrlMatch[];
|
|
51
|
+
databaseInfo: {
|
|
52
|
+
prefixCount: number;
|
|
53
|
+
lastUpdated?: string;
|
|
54
|
+
sources: string[];
|
|
55
|
+
};
|
|
56
|
+
patternMatches: PatternMatch[];
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
interface UrlMatch {
|
|
60
|
+
url: string;
|
|
61
|
+
hashPrefix: string;
|
|
62
|
+
matchType: 'exact' | 'domain' | 'pattern';
|
|
63
|
+
threatType?: string;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
interface PatternMatch {
|
|
67
|
+
url: string;
|
|
68
|
+
reasons: string[];
|
|
69
|
+
riskLevel: 'low' | 'medium' | 'high';
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// ============================================================================
|
|
73
|
+
// HASH DATABASE
|
|
74
|
+
// ============================================================================
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Local hash prefix database
|
|
78
|
+
* Format: Set of 8-character hex strings (4-byte SHA256 prefixes)
|
|
79
|
+
*
|
|
80
|
+
* This is a bundled snapshot. In production, this would be:
|
|
81
|
+
* 1. Loaded from a local file that gets periodic updates
|
|
82
|
+
* 2. Updated via a background process from URLhaus/PhishTank
|
|
83
|
+
*
|
|
84
|
+
* The prefixes below are derived from known malicious patterns
|
|
85
|
+
* and serve as examples of the format.
|
|
86
|
+
*/
|
|
87
|
+
class ThreatDatabase {
|
|
88
|
+
private hashPrefixes: Set<string> = new Set();
|
|
89
|
+
private domainPrefixes: Set<string> = new Set();
|
|
90
|
+
private lastUpdated: string = new Date().toISOString();
|
|
91
|
+
private sources: string[] = ['builtin-patterns'];
|
|
92
|
+
|
|
93
|
+
constructor() {
|
|
94
|
+
this.initializeBuiltinData();
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Initialize with built-in threat patterns
|
|
99
|
+
* These are hashes of known malicious URL patterns
|
|
100
|
+
*/
|
|
101
|
+
private initializeBuiltinData(): void {
|
|
102
|
+
// Add known malicious domain patterns (hashed)
|
|
103
|
+
// These would normally come from URLhaus CSV dump
|
|
104
|
+
const knownBadPatterns: string[] = [
|
|
105
|
+
// Example pattern hashes - in production, load from file or Supabase
|
|
106
|
+
// Format: FNV-1a hash prefix of canonical URL
|
|
107
|
+
];
|
|
108
|
+
|
|
109
|
+
for (const prefix of knownBadPatterns) {
|
|
110
|
+
this.hashPrefixes.add(prefix);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Compute canonical form of URL (similar to Google Safe Browsing)
|
|
116
|
+
* - Lowercase hostname
|
|
117
|
+
* - Remove default ports
|
|
118
|
+
* - Normalize path
|
|
119
|
+
* - Remove fragments
|
|
120
|
+
*/
|
|
121
|
+
canonicalizeUrl(urlString: string): string | null {
|
|
122
|
+
try {
|
|
123
|
+
const url = new URL(urlString);
|
|
124
|
+
|
|
125
|
+
// Lowercase hostname
|
|
126
|
+
let canonical = url.protocol + '//' + url.hostname.toLowerCase();
|
|
127
|
+
|
|
128
|
+
// Remove default ports
|
|
129
|
+
if (url.port && !((url.protocol === 'http:' && url.port === '80') ||
|
|
130
|
+
(url.protocol === 'https:' && url.port === '443'))) {
|
|
131
|
+
canonical += ':' + url.port;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Normalize path (remove trailing slash for root, keep for others)
|
|
135
|
+
let path = url.pathname;
|
|
136
|
+
if (path === '/') {
|
|
137
|
+
canonical += '/';
|
|
138
|
+
} else {
|
|
139
|
+
// Remove duplicate slashes
|
|
140
|
+
path = path.replace(/\/+/g, '/');
|
|
141
|
+
canonical += path;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Include query string but not fragment
|
|
145
|
+
if (url.search) {
|
|
146
|
+
canonical += url.search;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return canonical;
|
|
150
|
+
} catch {
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Compute hash of URL and return prefix
|
|
157
|
+
* Uses FNV-1a for cross-platform compatibility (Node.js, Deno, browser)
|
|
158
|
+
*/
|
|
159
|
+
computeHashPrefix(url: string): string {
|
|
160
|
+
return fnv1aHash(url);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Compute hash prefix for domain only
|
|
165
|
+
*/
|
|
166
|
+
computeDomainHashPrefix(hostname: string): string {
|
|
167
|
+
return fnv1aHash(hostname.toLowerCase());
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Check if URL matches any hash in the database
|
|
172
|
+
*/
|
|
173
|
+
checkUrl(urlString: string): { matched: boolean; prefix?: string; matchType?: 'exact' | 'domain' } {
|
|
174
|
+
const canonical = this.canonicalizeUrl(urlString);
|
|
175
|
+
if (!canonical) {
|
|
176
|
+
return { matched: false };
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Check full URL hash
|
|
180
|
+
const urlPrefix = this.computeHashPrefix(canonical);
|
|
181
|
+
if (this.hashPrefixes.has(urlPrefix)) {
|
|
182
|
+
return { matched: true, prefix: urlPrefix, matchType: 'exact' };
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Check domain hash
|
|
186
|
+
try {
|
|
187
|
+
const url = new URL(urlString);
|
|
188
|
+
const domainPrefix = this.computeDomainHashPrefix(url.hostname);
|
|
189
|
+
if (this.domainPrefixes.has(domainPrefix)) {
|
|
190
|
+
return { matched: true, prefix: domainPrefix, matchType: 'domain' };
|
|
191
|
+
}
|
|
192
|
+
} catch {
|
|
193
|
+
// Invalid URL
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return { matched: false };
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Add hashes from URLhaus CSV data
|
|
201
|
+
* Format: id,dateadded,url,url_status,last_online,threat,tags,urlhaus_link,reporter
|
|
202
|
+
*/
|
|
203
|
+
loadFromUrlhausCsv(csvData: string): number {
|
|
204
|
+
const lines = csvData.split('\n');
|
|
205
|
+
let added = 0;
|
|
206
|
+
|
|
207
|
+
for (const line of lines) {
|
|
208
|
+
// Skip comments and header
|
|
209
|
+
if (line.startsWith('#') || line.startsWith('id,')) continue;
|
|
210
|
+
|
|
211
|
+
const parts = line.split(',');
|
|
212
|
+
if (parts.length >= 3) {
|
|
213
|
+
const url = parts[2].replace(/"/g, '');
|
|
214
|
+
const canonical = this.canonicalizeUrl(url);
|
|
215
|
+
if (canonical) {
|
|
216
|
+
const prefix = this.computeHashPrefix(canonical);
|
|
217
|
+
this.hashPrefixes.add(prefix);
|
|
218
|
+
added++;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
this.lastUpdated = new Date().toISOString();
|
|
224
|
+
if (!this.sources.includes('urlhaus')) {
|
|
225
|
+
this.sources.push('urlhaus');
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
return added;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Add a list of URLs to the database
|
|
233
|
+
*/
|
|
234
|
+
addUrls(urls: string[]): number {
|
|
235
|
+
let added = 0;
|
|
236
|
+
for (const url of urls) {
|
|
237
|
+
const canonical = this.canonicalizeUrl(url);
|
|
238
|
+
if (canonical) {
|
|
239
|
+
const prefix = this.computeHashPrefix(canonical);
|
|
240
|
+
if (!this.hashPrefixes.has(prefix)) {
|
|
241
|
+
this.hashPrefixes.add(prefix);
|
|
242
|
+
added++;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
return added;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Add domains to the blocklist
|
|
251
|
+
*/
|
|
252
|
+
addDomains(domains: string[]): number {
|
|
253
|
+
let added = 0;
|
|
254
|
+
for (const domain of domains) {
|
|
255
|
+
const prefix = this.computeDomainHashPrefix(domain);
|
|
256
|
+
if (!this.domainPrefixes.has(prefix)) {
|
|
257
|
+
this.domainPrefixes.add(prefix);
|
|
258
|
+
added++;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
return added;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Bulk load hash prefixes directly (for Supabase integration)
|
|
266
|
+
* This is used when loading from the threat_hashes table
|
|
267
|
+
*/
|
|
268
|
+
loadHashPrefixes(hashes: Array<{ hash_prefix: string; hash_type: 'url' | 'domain' }>): number {
|
|
269
|
+
let added = 0;
|
|
270
|
+
for (const hash of hashes) {
|
|
271
|
+
if (hash.hash_type === 'url') {
|
|
272
|
+
if (!this.hashPrefixes.has(hash.hash_prefix)) {
|
|
273
|
+
this.hashPrefixes.add(hash.hash_prefix);
|
|
274
|
+
added++;
|
|
275
|
+
}
|
|
276
|
+
} else if (hash.hash_type === 'domain') {
|
|
277
|
+
if (!this.domainPrefixes.has(hash.hash_prefix)) {
|
|
278
|
+
this.domainPrefixes.add(hash.hash_prefix);
|
|
279
|
+
added++;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
this.lastUpdated = new Date().toISOString();
|
|
285
|
+
if (!this.sources.includes('supabase')) {
|
|
286
|
+
this.sources.push('supabase');
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
return added;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Clear all hashes (useful before reloading)
|
|
294
|
+
*/
|
|
295
|
+
clear(): void {
|
|
296
|
+
this.hashPrefixes.clear();
|
|
297
|
+
this.domainPrefixes.clear();
|
|
298
|
+
this.sources = [];
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Check if database has been populated
|
|
303
|
+
*/
|
|
304
|
+
isPopulated(): boolean {
|
|
305
|
+
return this.hashPrefixes.size > 0 || this.domainPrefixes.size > 0;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Get database statistics
|
|
310
|
+
*/
|
|
311
|
+
getStats(): { prefixCount: number; lastUpdated: string; sources: string[] } {
|
|
312
|
+
return {
|
|
313
|
+
prefixCount: this.hashPrefixes.size + this.domainPrefixes.size,
|
|
314
|
+
lastUpdated: this.lastUpdated,
|
|
315
|
+
sources: this.sources,
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// Global database instance
|
|
321
|
+
const threatDb = new ThreatDatabase();
|
|
322
|
+
|
|
323
|
+
// ============================================================================
|
|
324
|
+
// PATTERN-BASED DETECTION (Local, no external calls)
|
|
325
|
+
// ============================================================================
|
|
326
|
+
|
|
327
|
+
// Popular domains that are commonly typosquatted
|
|
328
|
+
const POPULAR_DOMAINS = [
|
|
329
|
+
'google', 'facebook', 'amazon', 'apple', 'microsoft', 'paypal',
|
|
330
|
+
'netflix', 'instagram', 'twitter', 'linkedin', 'youtube', 'github',
|
|
331
|
+
'dropbox', 'adobe', 'salesforce', 'stripe', 'shopify', 'wordpress',
|
|
332
|
+
'cloudflare', 'aws', 'azure', 'slack', 'zoom', 'docusign',
|
|
333
|
+
];
|
|
334
|
+
|
|
335
|
+
// Suspicious TLDs often used in phishing/malware
|
|
336
|
+
const SUSPICIOUS_TLDS = [
|
|
337
|
+
'.tk', '.ml', '.ga', '.cf', '.gq', // Free TLDs abused for phishing
|
|
338
|
+
'.xyz', '.top', '.work', '.click', '.link', '.download',
|
|
339
|
+
'.zip', '.mov', // New TLDs that can be confusing
|
|
340
|
+
];
|
|
341
|
+
|
|
342
|
+
// File extensions that are suspicious in URLs
|
|
343
|
+
const SUSPICIOUS_EXTENSIONS = [
|
|
344
|
+
'.exe', '.msi', '.bat', '.cmd', '.ps1', '.vbs',
|
|
345
|
+
'.jar', '.scr', '.pif', '.application',
|
|
346
|
+
'.hta', '.cpl', '.msc', '.wsf',
|
|
347
|
+
];
|
|
348
|
+
|
|
349
|
+
// Homograph characters (look-alike Unicode)
|
|
350
|
+
const HOMOGRAPH_CHARS: Record<string, string[]> = {
|
|
351
|
+
'a': ['а', 'ɑ', 'α'], // Cyrillic а, Latin alpha
|
|
352
|
+
'c': ['с', 'ϲ'], // Cyrillic с
|
|
353
|
+
'e': ['е', 'ё'], // Cyrillic е
|
|
354
|
+
'o': ['о', 'ο'], // Cyrillic о, Greek omicron
|
|
355
|
+
'p': ['р'], // Cyrillic р
|
|
356
|
+
'x': ['х'], // Cyrillic х
|
|
357
|
+
'y': ['у'], // Cyrillic у
|
|
358
|
+
};
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* Check if a string contains homograph characters
|
|
362
|
+
*/
|
|
363
|
+
function containsHomographs(str: string): boolean {
|
|
364
|
+
for (const [_, lookalikes] of Object.entries(HOMOGRAPH_CHARS)) {
|
|
365
|
+
for (const char of lookalikes) {
|
|
366
|
+
if (str.includes(char)) return true;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
return false;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Calculate Levenshtein distance
|
|
374
|
+
*/
|
|
375
|
+
function levenshteinDistance(a: string, b: string): number {
|
|
376
|
+
if (a.length === 0) return b.length;
|
|
377
|
+
if (b.length === 0) return a.length;
|
|
378
|
+
|
|
379
|
+
const matrix: number[][] = [];
|
|
380
|
+
for (let i = 0; i <= b.length; i++) matrix[i] = [i];
|
|
381
|
+
for (let j = 0; j <= a.length; j++) matrix[0][j] = j;
|
|
382
|
+
|
|
383
|
+
for (let i = 1; i <= b.length; i++) {
|
|
384
|
+
for (let j = 1; j <= a.length; j++) {
|
|
385
|
+
if (b.charAt(i - 1) === a.charAt(j - 1)) {
|
|
386
|
+
matrix[i][j] = matrix[i - 1][j - 1];
|
|
387
|
+
} else {
|
|
388
|
+
matrix[i][j] = Math.min(
|
|
389
|
+
matrix[i - 1][j - 1] + 1,
|
|
390
|
+
matrix[i][j - 1] + 1,
|
|
391
|
+
matrix[i - 1][j] + 1
|
|
392
|
+
);
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
return matrix[b.length][a.length];
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Check for typosquatting of popular domains
|
|
401
|
+
*/
|
|
402
|
+
function checkTyposquatting(hostname: string): string | null {
|
|
403
|
+
const parts = hostname.toLowerCase().split('.');
|
|
404
|
+
const mainDomain = parts.length >= 2 ? parts[parts.length - 2] : parts[0];
|
|
405
|
+
|
|
406
|
+
for (const popular of POPULAR_DOMAINS) {
|
|
407
|
+
if (mainDomain === popular) continue;
|
|
408
|
+
|
|
409
|
+
// Levenshtein distance check
|
|
410
|
+
const distance = levenshteinDistance(mainDomain, popular);
|
|
411
|
+
if (distance > 0 && distance <= 2) {
|
|
412
|
+
return `Possible typosquat of "${popular}"`;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
// Suspicious variations
|
|
416
|
+
if (mainDomain.includes(popular) && mainDomain !== popular) {
|
|
417
|
+
if (mainDomain.includes('-') || mainDomain.includes('secure') ||
|
|
418
|
+
mainDomain.includes('login') || mainDomain.includes('account')) {
|
|
419
|
+
return `Suspicious variation of "${popular}"`;
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
return null;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
/**
|
|
427
|
+
* Analyze URL for suspicious patterns (local analysis only)
|
|
428
|
+
*/
|
|
429
|
+
function analyzeUrlPatterns(urlString: string): PatternMatch | null {
|
|
430
|
+
const reasons: string[] = [];
|
|
431
|
+
|
|
432
|
+
try {
|
|
433
|
+
const url = new URL(urlString);
|
|
434
|
+
const hostname = url.hostname.toLowerCase();
|
|
435
|
+
const pathname = url.pathname.toLowerCase();
|
|
436
|
+
|
|
437
|
+
// IP address instead of domain
|
|
438
|
+
if (/^(\d{1,3}\.){3}\d{1,3}$/.test(hostname)) {
|
|
439
|
+
reasons.push('Uses IP address instead of domain');
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// Suspicious TLDs
|
|
443
|
+
for (const tld of SUSPICIOUS_TLDS) {
|
|
444
|
+
if (hostname.endsWith(tld)) {
|
|
445
|
+
reasons.push(`Suspicious TLD: ${tld}`);
|
|
446
|
+
break;
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// Excessive subdomains
|
|
451
|
+
if (hostname.split('.').length > 4) {
|
|
452
|
+
reasons.push('Excessive subdomains');
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// Typosquatting
|
|
456
|
+
const typosquat = checkTyposquatting(hostname);
|
|
457
|
+
if (typosquat) reasons.push(typosquat);
|
|
458
|
+
|
|
459
|
+
// Homograph attack
|
|
460
|
+
if (containsHomographs(hostname)) {
|
|
461
|
+
reasons.push('Contains look-alike Unicode characters');
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
// Suspicious file extensions
|
|
465
|
+
for (const ext of SUSPICIOUS_EXTENSIONS) {
|
|
466
|
+
if (pathname.endsWith(ext)) {
|
|
467
|
+
reasons.push(`Dangerous file type: ${ext}`);
|
|
468
|
+
break;
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// @ symbol in URL
|
|
473
|
+
if (urlString.includes('@')) {
|
|
474
|
+
reasons.push('Contains @ symbol (may obscure destination)');
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// URL shorteners
|
|
478
|
+
const shorteners = ['bit.ly', 'tinyurl.com', 't.co', 'goo.gl', 'ow.ly', 'is.gd'];
|
|
479
|
+
if (shorteners.some(s => hostname === s || hostname.endsWith('.' + s))) {
|
|
480
|
+
reasons.push('URL shortener (destination hidden)');
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
} catch {
|
|
484
|
+
reasons.push('Malformed URL');
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
if (reasons.length === 0) return null;
|
|
488
|
+
|
|
489
|
+
// Determine risk level
|
|
490
|
+
let riskLevel: 'low' | 'medium' | 'high' = 'medium';
|
|
491
|
+
const highRisk = ['homograph', 'IP address', 'typosquat', 'Dangerous file'];
|
|
492
|
+
if (reasons.some(r => highRisk.some(h => r.toLowerCase().includes(h.toLowerCase())))) {
|
|
493
|
+
riskLevel = 'high';
|
|
494
|
+
}
|
|
495
|
+
if (reasons.length >= 3) riskLevel = 'high';
|
|
496
|
+
|
|
497
|
+
return { url: urlString, reasons, riskLevel };
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// ============================================================================
|
|
501
|
+
// MAIN EXPORT
|
|
502
|
+
// ============================================================================
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Analyze URL safety using local hash database and pattern matching
|
|
506
|
+
*
|
|
507
|
+
* This function performs two types of checks:
|
|
508
|
+
* 1. Hash-based lookup against the local threat database
|
|
509
|
+
* 2. Pattern-based detection for suspicious URL characteristics
|
|
510
|
+
*
|
|
511
|
+
* No external API calls are made - all checks are local.
|
|
512
|
+
*/
|
|
513
|
+
export function analyzeUrlSafety(
|
|
514
|
+
url: string,
|
|
515
|
+
externalLinks: string[] = []
|
|
516
|
+
): { issues: AuditIssue[]; data: UrlSafetyData } {
|
|
517
|
+
const issues: AuditIssue[] = [];
|
|
518
|
+
const matchedUrls: UrlMatch[] = [];
|
|
519
|
+
const patternMatches: PatternMatch[] = [];
|
|
520
|
+
const allUrls = [url, ...externalLinks];
|
|
521
|
+
|
|
522
|
+
// Check each URL against the hash database
|
|
523
|
+
for (const checkUrl of allUrls) {
|
|
524
|
+
// Hash-based check
|
|
525
|
+
const hashResult = threatDb.checkUrl(checkUrl);
|
|
526
|
+
if (hashResult.matched) {
|
|
527
|
+
matchedUrls.push({
|
|
528
|
+
url: checkUrl,
|
|
529
|
+
hashPrefix: hashResult.prefix!,
|
|
530
|
+
matchType: hashResult.matchType!,
|
|
531
|
+
});
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// Pattern-based check
|
|
535
|
+
const patternResult = analyzeUrlPatterns(checkUrl);
|
|
536
|
+
if (patternResult) {
|
|
537
|
+
patternMatches.push(patternResult);
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// Generate issues for hash matches (known threats)
|
|
542
|
+
if (matchedUrls.length > 0) {
|
|
543
|
+
const mainSiteMatched = matchedUrls.some(m => m.url === url);
|
|
544
|
+
|
|
545
|
+
if (mainSiteMatched) {
|
|
546
|
+
issues.push({
|
|
547
|
+
code: 'URL_SAFETY_KNOWN_THREAT',
|
|
548
|
+
severity: 'error',
|
|
549
|
+
category: 'security',
|
|
550
|
+
title: 'Website URL matches known threat database',
|
|
551
|
+
description: 'Your website URL matches entries in the threat database. This indicates your site may have been compromised or flagged.',
|
|
552
|
+
impact: 'Browsers and security tools will block access to your site. Search engines will remove you from results.',
|
|
553
|
+
howToFix: 'Scan your site for malware, remove any malicious content, and request removal from threat databases.',
|
|
554
|
+
affectedUrls: [url],
|
|
555
|
+
details: {
|
|
556
|
+
matchedHash: matchedUrls.find(m => m.url === url)?.hashPrefix,
|
|
557
|
+
},
|
|
558
|
+
});
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
const externalMatches = matchedUrls.filter(m => m.url !== url);
|
|
562
|
+
if (externalMatches.length > 0) {
|
|
563
|
+
issues.push({
|
|
564
|
+
code: 'URL_SAFETY_EXTERNAL_THREAT',
|
|
565
|
+
severity: 'error',
|
|
566
|
+
category: 'security',
|
|
567
|
+
title: 'External links to known malicious URLs',
|
|
568
|
+
description: `${externalMatches.length} external link(s) point to URLs in the threat database.`,
|
|
569
|
+
impact: 'Linking to malicious sites harms visitors and damages your reputation and rankings.',
|
|
570
|
+
howToFix: 'Remove all links to flagged URLs immediately.',
|
|
571
|
+
affectedUrls: externalMatches.map(m => m.url),
|
|
572
|
+
});
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
// Generate issues for pattern matches (suspicious characteristics)
|
|
577
|
+
const highRiskPatterns = patternMatches.filter(p => p.riskLevel === 'high');
|
|
578
|
+
|
|
579
|
+
if (highRiskPatterns.some(p => p.url === url)) {
|
|
580
|
+
const mainPattern = highRiskPatterns.find(p => p.url === url)!;
|
|
581
|
+
issues.push({
|
|
582
|
+
code: 'URL_SAFETY_SUSPICIOUS_DOMAIN',
|
|
583
|
+
severity: 'warning',
|
|
584
|
+
category: 'security',
|
|
585
|
+
title: 'Website URL has suspicious characteristics',
|
|
586
|
+
description: `Your URL shows patterns associated with malicious sites: ${mainPattern.reasons.join('; ')}`,
|
|
587
|
+
impact: 'Users and security tools may distrust your site.',
|
|
588
|
+
howToFix: 'Use a trustworthy domain structure. Avoid patterns that mimic other brands.',
|
|
589
|
+
affectedUrls: [url],
|
|
590
|
+
details: { reasons: mainPattern.reasons },
|
|
591
|
+
});
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
const suspiciousExternal = highRiskPatterns.filter(p => p.url !== url);
|
|
595
|
+
if (suspiciousExternal.length > 0) {
|
|
596
|
+
issues.push({
|
|
597
|
+
code: 'URL_SAFETY_SUSPICIOUS_EXTERNAL',
|
|
598
|
+
severity: 'warning',
|
|
599
|
+
category: 'security',
|
|
600
|
+
title: 'External links with suspicious characteristics',
|
|
601
|
+
description: `${suspiciousExternal.length} external link(s) show suspicious patterns.`,
|
|
602
|
+
impact: 'Linking to suspicious sites can harm visitors and rankings.',
|
|
603
|
+
howToFix: 'Review and remove or replace suspicious external links.',
|
|
604
|
+
affectedUrls: suspiciousExternal.map(p => p.url),
|
|
605
|
+
details: {
|
|
606
|
+
suspiciousLinks: suspiciousExternal.map(p => ({
|
|
607
|
+
url: p.url,
|
|
608
|
+
reasons: p.reasons,
|
|
609
|
+
})),
|
|
610
|
+
},
|
|
611
|
+
});
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
// URL shortener notice
|
|
615
|
+
const shortenerMatches = patternMatches.filter(p =>
|
|
616
|
+
p.reasons.some(r => r.includes('shortener'))
|
|
617
|
+
);
|
|
618
|
+
if (shortenerMatches.length > 0) {
|
|
619
|
+
issues.push({
|
|
620
|
+
code: 'URL_SAFETY_SHORTENERS',
|
|
621
|
+
severity: 'notice',
|
|
622
|
+
category: 'security',
|
|
623
|
+
title: 'External links use URL shorteners',
|
|
624
|
+
description: `${shortenerMatches.length} link(s) use URL shorteners, hiding destinations.`,
|
|
625
|
+
impact: 'URL shorteners reduce trust and SEO link value.',
|
|
626
|
+
howToFix: 'Replace shortened URLs with direct links.',
|
|
627
|
+
affectedUrls: shortenerMatches.map(p => p.url),
|
|
628
|
+
});
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
return {
|
|
632
|
+
issues,
|
|
633
|
+
data: {
|
|
634
|
+
checkedUrls: allUrls.length,
|
|
635
|
+
matchedUrls,
|
|
636
|
+
databaseInfo: threatDb.getStats(),
|
|
637
|
+
patternMatches,
|
|
638
|
+
},
|
|
639
|
+
};
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
/**
|
|
643
|
+
* Export the threat database for external updates
|
|
644
|
+
*/
|
|
645
|
+
export const urlSafetyDatabase = {
|
|
646
|
+
/**
|
|
647
|
+
* Load URLs from URLhaus CSV format
|
|
648
|
+
*/
|
|
649
|
+
loadFromUrlhausCsv: (csv: string) => threatDb.loadFromUrlhausCsv(csv),
|
|
650
|
+
|
|
651
|
+
/**
|
|
652
|
+
* Add URLs to the blocklist
|
|
653
|
+
*/
|
|
654
|
+
addUrls: (urls: string[]) => threatDb.addUrls(urls),
|
|
655
|
+
|
|
656
|
+
/**
|
|
657
|
+
* Add domains to the blocklist
|
|
658
|
+
*/
|
|
659
|
+
addDomains: (domains: string[]) => threatDb.addDomains(domains),
|
|
660
|
+
|
|
661
|
+
/**
|
|
662
|
+
* Bulk load hash prefixes (for Supabase integration)
|
|
663
|
+
* Call this with data from: SELECT hash_prefix, hash_type FROM threat_hashes WHERE is_active = TRUE
|
|
664
|
+
*/
|
|
665
|
+
loadHashPrefixes: (hashes: Array<{ hash_prefix: string; hash_type: 'url' | 'domain' }>) =>
|
|
666
|
+
threatDb.loadHashPrefixes(hashes),
|
|
667
|
+
|
|
668
|
+
/**
|
|
669
|
+
* Clear all hashes (useful before reloading from fresh data)
|
|
670
|
+
*/
|
|
671
|
+
clear: () => threatDb.clear(),
|
|
672
|
+
|
|
673
|
+
/**
|
|
674
|
+
* Check if database has been populated with threat data
|
|
675
|
+
*/
|
|
676
|
+
isPopulated: () => threatDb.isPopulated(),
|
|
677
|
+
|
|
678
|
+
/**
|
|
679
|
+
* Get database statistics
|
|
680
|
+
*/
|
|
681
|
+
getStats: () => threatDb.getStats(),
|
|
682
|
+
};
|