@q32/signal-scanner 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/dynamic.d.ts +43 -0
  2. package/dist/dynamic.d.ts.map +1 -0
  3. package/{src/dynamic.ts → dist/dynamic.js} +133 -156
  4. package/dist/dynamic.js.map +1 -0
  5. package/dist/feeds.d.ts +66 -0
  6. package/dist/feeds.d.ts.map +1 -0
  7. package/dist/feeds.js +259 -0
  8. package/dist/feeds.js.map +1 -0
  9. package/dist/index.d.ts +110 -0
  10. package/dist/index.d.ts.map +1 -0
  11. package/dist/index.js +1251 -0
  12. package/dist/index.js.map +1 -0
  13. package/dist/intel.d.ts +72 -0
  14. package/dist/intel.d.ts.map +1 -0
  15. package/dist/intel.js +480 -0
  16. package/dist/intel.js.map +1 -0
  17. package/dist/node-tls.d.ts +8 -0
  18. package/dist/node-tls.d.ts.map +1 -0
  19. package/dist/node-tls.js +48 -0
  20. package/dist/node-tls.js.map +1 -0
  21. package/dist/render.d.ts +26 -0
  22. package/dist/render.d.ts.map +1 -0
  23. package/dist/render.js +248 -0
  24. package/dist/render.js.map +1 -0
  25. package/dist/rules/packs/binary.d.ts +4 -0
  26. package/dist/rules/packs/binary.d.ts.map +1 -0
  27. package/dist/rules/packs/binary.js +101 -0
  28. package/dist/rules/packs/binary.js.map +1 -0
  29. package/dist/rules/packs/css.d.ts +3 -0
  30. package/dist/rules/packs/css.d.ts.map +1 -0
  31. package/dist/rules/packs/css.js +43 -0
  32. package/dist/rules/packs/css.js.map +1 -0
  33. package/dist/rules/packs/decoders.d.ts +3 -0
  34. package/dist/rules/packs/decoders.d.ts.map +1 -0
  35. package/dist/rules/packs/decoders.js +46 -0
  36. package/dist/rules/packs/decoders.js.map +1 -0
  37. package/dist/rules/packs/html.d.ts +4 -0
  38. package/dist/rules/packs/html.d.ts.map +1 -0
  39. package/dist/rules/packs/html.js +227 -0
  40. package/dist/rules/packs/html.js.map +1 -0
  41. package/dist/rules/packs/index.d.ts +24 -0
  42. package/dist/rules/packs/index.d.ts.map +1 -0
  43. package/dist/rules/packs/index.js +75 -0
  44. package/dist/rules/packs/index.js.map +1 -0
  45. package/dist/rules/packs/script-risk.d.ts +4 -0
  46. package/dist/rules/packs/script-risk.d.ts.map +1 -0
  47. package/dist/rules/packs/script-risk.js +231 -0
  48. package/dist/rules/packs/script-risk.js.map +1 -0
  49. package/dist/rules/packs/source-code.d.ts +3 -0
  50. package/dist/rules/packs/source-code.d.ts.map +1 -0
  51. package/dist/rules/packs/source-code.js +179 -0
  52. package/dist/rules/packs/source-code.js.map +1 -0
  53. package/dist/rules/packs/urls.d.ts +3 -0
  54. package/dist/rules/packs/urls.d.ts.map +1 -0
  55. package/dist/rules/packs/urls.js +123 -0
  56. package/dist/rules/packs/urls.js.map +1 -0
  57. package/dist/rules/types.d.ts +34 -0
  58. package/dist/rules/types.d.ts.map +1 -0
  59. package/dist/rules/types.js +2 -0
  60. package/dist/rules/types.js.map +1 -0
  61. package/package.json +18 -14
  62. package/src/feeds.ts +0 -334
  63. package/src/index.ts +0 -1366
  64. package/src/intel.ts +0 -561
  65. package/src/node-tls.ts +0 -55
  66. package/src/render.ts +0 -233
  67. package/src/rules/packs/binary.ts +0 -103
  68. package/src/rules/packs/css.ts +0 -44
  69. package/src/rules/packs/decoders.ts +0 -47
  70. package/src/rules/packs/html.ts +0 -255
  71. package/src/rules/packs/index.ts +0 -76
  72. package/src/rules/packs/script-risk.ts +0 -236
  73. package/src/rules/packs/source-code.ts +0 -180
  74. package/src/rules/packs/urls.ts +0 -138
  75. package/src/rules/types.ts +0 -56
package/dist/index.js ADDED
@@ -0,0 +1,1251 @@
1
+ import { Parser } from "htmlparser2";
2
+ import { binaryRules, binaryStringRules, cssRules, decodedArtifactRules, htmlRules, htmlTechnologyRules, scriptCompositeRules, scriptRiskRules, sourceCodeRules, urlRules } from "./rules/packs/index.js";
3
+ const DEFAULT_WINDOW_CHARS = 64 * 1024;
4
+ const DEFAULT_CARRY_CHARS = 4096;
5
+ const DEFAULT_MAX_DECODED_BYTES = 128 * 1024;
6
+ const DEFAULT_MAX_DECODE_DEPTH = 2;
7
+ export function createScanner(options = {}) {
8
+ const state = {
9
+ source: options.source ?? {},
10
+ contentKind: detectContentKind({
11
+ contentType: options.source?.contentType ?? null,
12
+ filename: options.source?.filename ?? options.source?.url,
13
+ firstBytes: new Uint8Array()
14
+ }),
15
+ textWindow: "",
16
+ scanCarry: "",
17
+ absoluteOffset: 0,
18
+ line: 1,
19
+ column: 1,
20
+ findings: [],
21
+ findingKeys: new Set(),
22
+ urls: new Map(),
23
+ artifacts: [],
24
+ counters: {},
25
+ forms: [],
26
+ externalScripts: [],
27
+ inScript: false,
28
+ currentScript: "",
29
+ binaryHeaderScanned: false
30
+ };
31
+ const maxWindowChars = options.maxWindowChars ?? DEFAULT_WINDOW_CHARS;
32
+ const maxDecodedBytes = options.maxDecodedBytes ?? DEFAULT_MAX_DECODED_BYTES;
33
+ const maxDecodeDepth = options.maxDecodeDepth ?? DEFAULT_MAX_DECODE_DEPTH;
34
+ if (state.source.url)
35
+ addUrl(state, state.source.url);
36
+ if (state.source.finalUrl && state.source.finalUrl !== state.source.url)
37
+ addUrl(state, state.source.finalUrl);
38
+ scanRedirectContext(state);
39
+ scanTlsContext(state);
40
+ return {
41
+ feed(chunk) {
42
+ if (!chunk.byteLength)
43
+ return [];
44
+ if (state.absoluteOffset === 0) {
45
+ state.contentKind = detectContentKind({
46
+ contentType: state.source.contentType ?? null,
47
+ filename: state.source.filename ?? state.source.url,
48
+ firstBytes: chunk
49
+ });
50
+ scanBinaryHeader(state, chunk);
51
+ }
52
+ const before = state.findings.length;
53
+ const text = decodeText(chunk);
54
+ const scanTextInput = state.scanCarry + text;
55
+ state.textWindow = trimWindow(state.textWindow + text, maxWindowChars);
56
+ scanText(state, scanTextInput, state.absoluteOffset - byteLength(state.scanCarry), 0, maxDecodedBytes, maxDecodeDepth);
57
+ updatePosition(state, text);
58
+ state.scanCarry = trimWindow(scanTextInput, DEFAULT_CARRY_CHARS);
59
+ state.absoluteOffset += chunk.byteLength;
60
+ state.counters.bytes_seen = state.absoluteOffset;
61
+ return state.findings.slice(before);
62
+ },
63
+ finish() {
64
+ finalizeAggregateRules(state);
65
+ const score = scoreFindings(state.findings);
66
+ return {
67
+ contentKind: state.contentKind,
68
+ findings: dedupeFindings(state.findings),
69
+ urls: [...state.urls.values()],
70
+ artifacts: state.artifacts,
71
+ score,
72
+ disposition: dispositionForScore(score),
73
+ counters: { ...state.counters }
74
+ };
75
+ }
76
+ };
77
+ }
78
+ export function detectContentKind(input) {
79
+ const first = input.firstBytes ?? new Uint8Array();
80
+ if (hasElfMagic(first))
81
+ return "executable";
82
+ const contentType = (input.contentType ?? "").toLowerCase().split(";")[0].trim();
83
+ if (contentType.includes("html"))
84
+ return "html";
85
+ if (contentType.includes("javascript") || contentType.includes("ecmascript"))
86
+ return "javascript";
87
+ if (contentType === "text/css")
88
+ return "css";
89
+ if (contentType.includes("json"))
90
+ return "json";
91
+ if (contentType.includes("svg"))
92
+ return "svg";
93
+ if (contentType.startsWith("text/"))
94
+ return "text";
95
+ if (contentType.includes("zip") || contentType.includes("tar") || contentType.includes("gzip") || contentType.includes("x-7z") || contentType.includes("rar"))
96
+ return "archive";
97
+ const filename = (input.filename ?? "").toLowerCase().split("?")[0];
98
+ if (/\.(html?|xhtml)$/.test(filename))
99
+ return "html";
100
+ if (/\.(mjs|cjs|js|jsx|ts|tsx)$/.test(filename))
101
+ return "javascript";
102
+ if (/\.css$/.test(filename))
103
+ return "css";
104
+ if (/\.json$/.test(filename))
105
+ return "json";
106
+ if (/\.svg$/.test(filename))
107
+ return "svg";
108
+ if (/\.(zip|jar|war|tar|tgz|gz|7z|rar)$/.test(filename))
109
+ return "archive";
110
+ if (first.length >= 4 && first[0] === 0x50 && first[1] === 0x4b)
111
+ return "archive";
112
+ if (first.length >= 2 && first[0] === 0x1f && first[1] === 0x8b)
113
+ return "archive";
114
+ if (first.length >= 6 && first[0] === 0x37 && first[1] === 0x7a && first[2] === 0xbc && first[3] === 0xaf && first[4] === 0x27 && first[5] === 0x1c)
115
+ return "archive";
116
+ const text = decodeText(first.slice(0, 512)).trimStart();
117
+ if (/^<!doctype html/i.test(text) || /^<html[\s>]/i.test(text))
118
+ return "html";
119
+ if (/^<svg[\s>]/i.test(text))
120
+ return "svg";
121
+ if (/^\s*(?:import|export|const|let|var|function)\b/.test(text))
122
+ return "javascript";
123
+ if (/^\s*(?:@import|[.#]?[a-z0-9_-]+\s*\{[^}]+:)/i.test(text))
124
+ return "css";
125
+ if (/^[\[{]/.test(text))
126
+ return "json";
127
+ return text ? "text" : "unknown";
128
+ }
129
+ export function normalizeUrl(raw, base) {
130
+ try {
131
+ const url = new URL(raw, base);
132
+ url.hash = "";
133
+ const normalized = url.toString();
134
+ const host = url.hostname.toLowerCase();
135
+ const registrableDomain = registrableDomainFor(host);
136
+ const baseHost = base ? new URL(base).hostname.toLowerCase() : "";
137
+ const baseDomain = baseHost ? registrableDomainFor(baseHost) : null;
138
+ const flags = [];
139
+ if (host.startsWith("xn--") || host.includes(".xn--"))
140
+ flags.push("punycode");
141
+ if (isIpLiteral(host))
142
+ flags.push("ip_literal");
143
+ if (isPrivateHost(host))
144
+ flags.push("private_or_localhost");
145
+ if (isUrlShortener(host))
146
+ flags.push("url_shortener");
147
+ // Credential/account/banking lure terms in the path (multilingual + a few
148
+ // leetspeak spellings). These only CONVICT when the host is also suspicious
149
+ // (see credential_path_on_suspicious_host), so the breadth is safe.
150
+ if (/(?:log[i1]n|sign[\s_-]?[i1]n|signon|account|verify|verif|wallet|checkout|payment|download|payload|secure|update|confirm|recover|unlock|billing|webscr|kunden|compte|cliente?s|conta|codigo|banking)/i.test(url.pathname))
151
+ flags.push("suspicious_path_terms");
152
+ if (isSuspiciousTld(host))
153
+ flags.push("suspicious_tld");
154
+ if (/(?:\/|^)(?:payload|installer|setup|invoice|verify|wallet|checkout|payment)(?:[\/_.-]|$)|\.(?:exe|scr|msi|dmg|pkg|apk|zip)$/i.test(url.pathname)) {
155
+ flags.push("download_like_path");
156
+ }
157
+ if (isMalwareDownloadLikePath(url.pathname))
158
+ flags.push("malware_download_like_path");
159
+ if (isSharedHostingSubdomain(host, registrableDomain))
160
+ flags.push("shared_hosting_subdomain");
161
+ if (isGeneratedHostLabel(host, registrableDomain))
162
+ flags.push("generated_host_label");
163
+ return {
164
+ raw,
165
+ normalized,
166
+ registrableDomain,
167
+ relation: relationFor(host, registrableDomain, baseHost, baseDomain),
168
+ scheme: url.protocol.replace(":", ""),
169
+ destinationType: destinationTypeFor(url, host),
170
+ flags
171
+ };
172
+ }
173
+ catch {
174
+ return null;
175
+ }
176
+ }
177
+ function scanText(state, text, offset, depth, maxDecodedBytes, maxDecodeDepth) {
178
+ collectUrls(state, text);
179
+ scanPageIntentSignals(state, text);
180
+ if (state.contentKind === "html" || /<html|<script|<form|<iframe/i.test(text))
181
+ scanHtml(state, text);
182
+ if (state.contentKind === "javascript" || state.inScript || /<script\b/i.test(text)) {
183
+ scanJavaScript(state, text);
184
+ }
185
+ if (state.contentKind === "css" || /(?:display\s*:\s*none|opacity\s*:\s*0|@import|url\()/i.test(text))
186
+ scanCss(state, text);
187
+ if (state.contentKind === "executable" || likelyBinaryStrings(text))
188
+ scanBinaryStrings(state, text);
189
+ if (shouldScanSourceText(state))
190
+ scanSourceText(state, text);
191
+ if (depth < maxDecodeDepth)
192
+ decodeAndRescan(state, text, offset, depth, maxDecodedBytes, maxDecodeDepth);
193
+ }
194
+ function scanHtml(state, text) {
195
+ // Tokenize with htmlparser2 rather than hand-rolled regexes: it correctly
196
+ // handles malformed markup, entity-encoded attribute values (e.g.
197
+ // href="java&#115;cript:…"), quoting tricks, and tags split oddly — all of
198
+ // which trivially evade `<tag ...>` regexes. The scanner already streams in
199
+ // overlapping windows, so we parse this window in one pass; the inflated
200
+ // counts from the carry overlap and finding dedup behave exactly as before.
201
+ let scriptBody = "";
202
+ let scriptDepth = 0;
203
+ const parser = new Parser({
204
+ onopentag(name, attribs) {
205
+ const attrs = new Map();
206
+ for (const key of Object.keys(attribs))
207
+ attrs.set(key.toLowerCase(), attribs[key]);
208
+ if (name === "script") {
209
+ scriptDepth += 1;
210
+ scriptBody = "";
211
+ }
212
+ handleOpenTag(state, name, attrs);
213
+ },
214
+ ontext(chunk) {
215
+ if (scriptDepth > 0)
216
+ scriptBody += chunk;
217
+ },
218
+ onclosetag(name) {
219
+ if (name === "script" && scriptDepth > 0) {
220
+ scriptDepth -= 1;
221
+ state.inScript = false;
222
+ if (scriptBody)
223
+ scanJavaScript(state, scriptBody);
224
+ scriptBody = "";
225
+ }
226
+ }
227
+ }, { decodeEntities: true, lowerCaseTags: true, lowerCaseAttributeNames: true });
228
+ parser.write(text);
229
+ parser.end();
230
+ // A <script> whose closing tag falls beyond this window: still scan what we
231
+ // captured (regexes would have missed the whole block), and remember we're
232
+ // mid-script so the next chunk keeps scanning JS.
233
+ if (scriptDepth > 0 && scriptBody) {
234
+ scanJavaScript(state, scriptBody);
235
+ state.inScript = true;
236
+ }
237
+ if (/wp-content|wp-includes/i.test(text)) {
238
+ addRuleFinding(state, htmlTechnologyRules.wordpress_surface_reference, pageUrl(state) ?? "html", {});
239
+ }
240
+ scanTechnologyFingerprint(state, text, pageUrl(state) ?? "html");
241
+ if (/(?:login|sign in|password|account|verify|checkout|payment)/i.test(text))
242
+ increment(state, "brand_login_or_payment_language");
243
+ recordContentBrandMentions(state, text);
244
+ }
245
+ // Per-tag dispatch, shared by the htmlparser2 open-tag callback. `name` is
246
+ // already lowercased; `attrs` keys are lowercased with entity-decoded values.
247
+ function handleOpenTag(state, name, attrs) {
248
+ if (name === "script") {
249
+ const src = attrs.get("src");
250
+ if (src) {
251
+ increment(state, "html.script_src");
252
+ addUrl(state, src);
253
+ const normalized = normalizeUrl(src, pageUrl(state));
254
+ // Ad/analytics/tag-manager scripts are expected on ordinary ad-funded
255
+ // sites (news, blogs) and are never a phishing exfil channel, so they
256
+ // don't count toward "suspicious external scripts".
257
+ if (normalized?.relation === "off-site" && !isAdOrAnalyticsHost(normalized.normalized))
258
+ state.externalScripts.push(normalized);
259
+ if (pageUrl(state)?.startsWith("https://") && normalized?.scheme === "http")
260
+ addRuleFinding(state, htmlRules.mixed_content_script, normalized.normalized, {});
261
+ scanTechnologyFingerprint(state, src, normalized?.normalized ?? src);
262
+ }
263
+ else {
264
+ increment(state, "inline_script");
265
+ }
266
+ state.inScript = true;
267
+ }
268
+ if (name === "form") {
269
+ increment(state, "html.form");
270
+ state.forms.push({
271
+ action: attrs.get("action") ?? null,
272
+ method: attrs.get("method")?.toLowerCase() ?? "get",
273
+ hasPassword: false,
274
+ hasPayment: false,
275
+ hiddenTarget: /display\s*:\s*none|visibility\s*:\s*hidden|opacity\s*:\s*0/i.test(attrs.get("style") ?? "")
276
+ });
277
+ }
278
+ if (name === "input") {
279
+ const type = (attrs.get("type") ?? "").toLowerCase();
280
+ const field = `${attrs.get("name") ?? ""} ${attrs.get("autocomplete") ?? ""}`.toLowerCase();
281
+ const isPassword = type === "password" || field.includes("password");
282
+ // A password field anywhere on the page is credential capture, even when
283
+ // it isn't wrapped in a <form> — PIN/OTP grids and JS-submit kits routinely
284
+ // place inputs outside any form and exfiltrate via fetch.
285
+ if (isPassword)
286
+ increment(state, "page_password_input");
287
+ if (state.forms.length) {
288
+ increment(state, "html.input");
289
+ const form = state.forms[state.forms.length - 1];
290
+ if (isPassword)
291
+ form.hasPassword = true;
292
+ if (/(?:cc-|card|cvv|cvc|expiry|payment)/.test(`${type} ${field}`))
293
+ form.hasPayment = true;
294
+ }
295
+ }
296
+ if (["a", "link", "img", "iframe"].includes(name)) {
297
+ increment(state, `html.${name}`);
298
+ const src = attrs.get("href") ?? attrs.get("src");
299
+ if (src)
300
+ addUrl(state, src);
301
+ if (name === "iframe" && src && hiddenAttrs(attrs)) {
302
+ const normalized = normalizeUrl(src, pageUrl(state));
303
+ if (normalized?.relation === "off-site" && hasRiskyUrlFlags(normalized))
304
+ addRuleFinding(state, htmlRules.hidden_iframe_off_origin, normalized.normalized, {});
305
+ }
306
+ }
307
+ if (name === "base") {
308
+ const href = attrs.get("href");
309
+ if (href) {
310
+ increment(state, "html.base_href");
311
+ addUrl(state, href);
312
+ }
313
+ }
314
+ if (name === "link" && /canonical/i.test(attrs.get("rel") ?? "")) {
315
+ increment(state, "html.canonical");
316
+ }
317
+ if (name === "meta" && /generator/i.test(attrs.get("name") ?? "") && /wordpress/i.test(attrs.get("content") ?? "")) {
318
+ addRuleFinding(state, htmlTechnologyRules.wordpress_surface_reference, pageUrl(state) ?? "html", { generator: attrs.get("content") ?? "" });
319
+ }
320
+ if (name === "meta" && /refresh/i.test(attrs.get("http-equiv") ?? "")) {
321
+ increment(state, "html.meta_refresh");
322
+ const content = attrs.get("content") ?? "";
323
+ const target = content.match(/url\s*=\s*([^;]+)/i)?.[1]?.trim();
324
+ if (target) {
325
+ const normalized = normalizeUrl(target, pageUrl(state));
326
+ if (normalized?.relation === "off-site")
327
+ addRuleFinding(state, htmlRules.meta_refresh_external, normalized.normalized, {});
328
+ }
329
+ }
330
+ }
331
+ // Count how often each known brand is named in the page content. Combined with a
332
+ // credential field on a non-brand domain (see finalizeAggregateRules) this is the
333
+ // core phishing tell — a page that looks like Brand X but isn't Brand X's site.
334
+ function recordContentBrandMentions(state, text) {
335
+ // The page's claimed identity: brand named in the <title>. Legit sites title
336
+ // themselves with their OWN brand (or none we track), never a brand they
337
+ // aren't — so this is the high-precision impersonation signal.
338
+ const title = text.match(/<title\b[^>]*>([\s\S]{0,200}?)<\/title>/i)?.[1] ?? "";
339
+ for (const brand of PHISH_BRANDS) {
340
+ let hits = 0;
341
+ for (const kw of brand.keywords) {
342
+ if (kw.length < 4)
343
+ continue;
344
+ const re = new RegExp("\\b" + kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + "\\b", "gi");
345
+ const matches = text.match(re);
346
+ if (matches)
347
+ hits += matches.length;
348
+ if (title && re.test(title))
349
+ state.counters["title_brand:" + brand.brand] = 1;
350
+ }
351
+ if (hits)
352
+ state.counters["content_brand:" + brand.brand] = (state.counters["content_brand:" + brand.brand] ?? 0) + hits;
353
+ }
354
+ }
355
+ function scanPageIntentSignals(state, text) {
356
+ const normalized = text.replace(/\\\//g, "/");
357
+ if (hasCryptoWalletLoginLanguage(normalized))
358
+ increment(state, "content.crypto_wallet_login_language");
359
+ if (hasCryptoTradingLandingLanguage(normalized))
360
+ increment(state, "content.crypto_trading_landing_language");
361
+ if (hasLoginUiImageReference(normalized))
362
+ increment(state, "content.login_ui_image_reference");
363
+ if (hasSeoTrademarkStuffing(normalized))
364
+ increment(state, "content.seo_trademark_stuffing");
365
+ }
366
+ function scanJavaScript(state, text) {
367
+ for (const rule of scriptRiskRules) {
368
+ if (rule.pattern.test(text)) {
369
+ increment(state, rule.counter ?? rule.id);
370
+ if (!isPrimitiveJavaScriptSignal(rule.id))
371
+ addRuleFinding(state, rule, pageUrl(state) ?? "inline-script", {});
372
+ }
373
+ }
374
+ const hasExternalRequestApi = /\b(?:fetch|XMLHttpRequest|sendBeacon|WebSocket)\b/.test(text);
375
+ if (hasExternalRequestApi && hasNearbyOffSiteUrlWith(text, pageUrl(state), /(?:password|FormData|localStorage|sessionStorage|document\.cookie|navigator\.clipboard)/i)) {
376
+ addRuleFinding(state, scriptCompositeRules.credential_exfil_candidate, pageUrl(state) ?? "script", {});
377
+ }
378
+ if (hasNearbyRegexPair(text, /(?:eval|Function)\s*\(/g, /\b(?:atob|String\.fromCharCode|unescape)\b/g, 320)) {
379
+ addRuleFinding(state, scriptCompositeRules.decoded_dynamic_execution, pageUrl(state) ?? "script", {});
380
+ }
381
+ if (/\.action\s*=|setAttribute\s*\(\s*['"]action['"]/.test(text)) {
382
+ addRuleFinding(state, scriptCompositeRules.form_action_changed_by_javascript, pageUrl(state) ?? "script", {});
383
+ }
384
+ if (hasWalletSignal(text) && hasExternalRequestApi && hasNearbyOffSiteUrlWith(text, pageUrl(state), /\b(?:window\.ethereum|WalletConnect|ethereum\.request|sendBeacon|fetch|XMLHttpRequest|WebSocket)\b|\.(?:approve|permit)\s*\(|\bmethod\s*:\s*['"]eth_/i)) {
385
+ addRuleFinding(state, scriptCompositeRules.wallet_api_plus_external_beacon, pageUrl(state) ?? "script", {});
386
+ }
387
+ // Payment-card field IDENTIFIERS only — bare "card"/"payment" match UI card
388
+ // components and nav links on ordinary sites (with input listeners everywhere),
389
+ // which is a major false-positive source.
390
+ if (/(?:cc-number|cc-exp|cc-csc|cardnumber|card-number|card_number|card-expiry|cardexpiry|cvv|cvc|security-?code)/i.test(text) && /addEventListener\s*\(\s*['"](?:input|change|keyup|keydown)['"]/.test(text)) {
391
+ addRuleFinding(state, scriptCompositeRules.payment_input_event_hooks, pageUrl(state) ?? "script", {});
392
+ }
393
+ }
394
+ function scanCss(state, text) {
395
+ if (/@import|url\(/i.test(text)) {
396
+ for (const rawUrl of extractCssUrls(text)) {
397
+ addUrl(state, rawUrl);
398
+ const normalized = normalizeUrl(rawUrl, pageUrl(state));
399
+ if (normalized?.relation === "off-site" && hasRiskyUrlFlags(normalized)) {
400
+ addRuleFinding(state, cssRules.css_imports_suspicious_domain, normalized.normalized, {});
401
+ }
402
+ }
403
+ }
404
+ if (/(?:display\s*:\s*none|visibility\s*:\s*hidden|opacity\s*:\s*0|position\s*:\s*absolute[^}]+left\s*:\s*-\d+)/i.test(text)) {
405
+ increment(state, "hidden_css");
406
+ addRuleFinding(state, cssRules.hidden_link_cluster, pageUrl(state) ?? "css", {});
407
+ }
408
+ if (state.forms.some((form) => form.hasPassword || form.hasPayment) &&
409
+ /\b(?:form|input|password|card|cc-|checkout|payment)\b/i.test(text) &&
410
+ /(?:position\s*:\s*(?:fixed|absolute)[^}]+(?:opacity\s*:\s*0|z-index\s*:\s*9\d{2,}|pointer-events\s*:\s*auto)|(?:opacity\s*:\s*0[^}]+position\s*:\s*(?:fixed|absolute)))/i.test(text)) {
411
+ increment(state, "invisible_form_overlay");
412
+ }
413
+ if (/unicode-bidi\s*:\s*bidi-override/i.test(text)) {
414
+ addRuleFinding(state, cssRules.unicode_bidi_trick, pageUrl(state) ?? "css", {});
415
+ }
416
+ }
417
+ function scanSourceText(state, text) {
418
+ for (const rule of sourceCodeRules) {
419
+ if (rule.pattern.test(text)) {
420
+ addRuleFinding(state, rule, state.source.filename ?? state.source.url ?? "source", {});
421
+ }
422
+ }
423
+ }
424
+ function shouldScanSourceText(state) {
425
+ if (state.source.filename)
426
+ return true;
427
+ return state.contentKind === "javascript" || state.contentKind === "json" || state.contentKind === "text";
428
+ }
429
+ function isPrimitiveJavaScriptSignal(ruleId) {
430
+ return [
431
+ "document_write_script",
432
+ "innerhtml_script_injection",
433
+ "insert_adjacent_html",
434
+ "dynamic_script_src",
435
+ "script_src_assignment",
436
+ "append_child_script",
437
+ "external_request_api_seen",
438
+ "js_location_external",
439
+ "decoder_seen",
440
+ "charcodeat_decoder_loop",
441
+ "browser_storage_or_clipboard_seen"
442
+ ].includes(ruleId);
443
+ }
444
+ function scanBinaryHeader(state, chunk) {
445
+ if (state.binaryHeaderScanned)
446
+ return;
447
+ state.binaryHeaderScanned = true;
448
+ if (!hasElfMagic(chunk))
449
+ return;
450
+ addRuleFinding(state, binaryRules.elf_executable_magic, state.source.url ?? state.source.filename ?? "stream", {});
451
+ if (declaredNonExecutableBinary(state.source.contentType)) {
452
+ addRuleFinding(state, binaryRules.content_type_magic_mismatch, state.source.url ?? state.source.filename ?? "stream", {
453
+ content_type: state.source.contentType ?? ""
454
+ });
455
+ }
456
+ if (elfHasWritableExecutableStack(chunk)) {
457
+ addRuleFinding(state, binaryRules.elf_writable_executable_stack, state.source.url ?? state.source.filename ?? "stream", {});
458
+ }
459
+ }
460
+ function scanBinaryStrings(state, text) {
461
+ for (const rule of binaryStringRules) {
462
+ if (rule.pattern.test(text)) {
463
+ increment(state, rule.counter ?? rule.id);
464
+ addRuleFinding(state, rule, state.source.url ?? state.source.filename ?? "binary", {});
465
+ }
466
+ }
467
+ }
468
+ function decodeAndRescan(state, text, offset, depth, maxDecodedBytes, maxDecodeDepth) {
469
+ const candidates = [];
470
+ for (const match of text.matchAll(/[A-Za-z0-9+/]{32,}={0,2}/g)) {
471
+ const index = match.index ?? 0;
472
+ const context = text.slice(Math.max(0, index - 80), Math.min(text.length, index + match[0].length + 80));
473
+ if (/\batob\s*\(|fromBase64|Buffer\.from\s*\([^)]*base64/i.test(context))
474
+ candidates.push(["base64_decoded_string", match[0], index]);
475
+ }
476
+ for (const match of text.matchAll(/(?:\\x[0-9a-fA-F]{2}){8,}/g))
477
+ candidates.push(["javascript_hex_escapes", match[0], match.index ?? 0]);
478
+ for (const match of text.matchAll(/(?:\\u[0-9a-fA-F]{4}){6,}/g))
479
+ candidates.push(["javascript_unicode_escapes", match[0], match.index ?? 0]);
480
+ for (const match of text.matchAll(/String\.fromCharCode\s*\(([\d,\s]+)\)/g))
481
+ candidates.push(["fromcharcode_decoded_string", match[1], match.index ?? 0]);
482
+ for (const [artifactType, value, index] of candidates.slice(0, 8)) {
483
+ const decoded = decodeCandidate(artifactType, value, maxDecodedBytes);
484
+ if (!decoded || decoded.length < 8)
485
+ continue;
486
+ state.artifacts.push({
487
+ source: state.source.filename ?? state.source.url ?? "stream",
488
+ artifactType,
489
+ parentOffset: offset + index,
490
+ depth: depth + 1,
491
+ text: decoded.slice(0, 4096)
492
+ });
493
+ increment(state, artifactType);
494
+ const rule = decodedArtifactRules[artifactType === "base64_decoded_string" ? "large_base64_blob" : artifactType];
495
+ addRuleFinding(state, rule, state.source.filename ?? state.source.url ?? "stream", { depth: depth + 1 });
496
+ if (depth + 1 < maxDecodeDepth)
497
+ scanText(state, decoded, offset + index, depth + 1, maxDecodedBytes, maxDecodeDepth);
498
+ }
499
+ }
500
+ function finalizeAggregateRules(state) {
501
+ for (const form of state.forms) {
502
+ const action = form.action ? normalizeUrl(form.action, pageUrl(state)) : null;
503
+ if (form.hasPassword && pageUrl(state)?.startsWith("http://")) {
504
+ addRuleFinding(state, htmlRules.password_form_without_https, pageUrl(state) ?? "form", {});
505
+ }
506
+ if (form.hasPassword && action?.relation === "off-site") {
507
+ addRuleFinding(state, htmlRules.credential_form_posts_off_origin, action.normalized, {});
508
+ }
509
+ if (form.hasPayment && [...state.urls.values()].some((url) => url.relation === "off-site")) {
510
+ addRuleFinding(state, htmlRules.card_fields_plus_external_script, pageUrl(state) ?? "payment-form", {});
511
+ }
512
+ if (form.hasPassword && hasSuspiciousTargetContext(state)) {
513
+ addRuleFinding(state, htmlRules.credential_form_on_suspicious_host, pageUrl(state) ?? "form", {});
514
+ }
515
+ }
516
+ // Formless credential capture (PIN/OTP grid, JS-submit) on a suspicious host.
517
+ if (incremented(state, "page_password_input") && hasSuspiciousTargetContext(state)) {
518
+ addRuleFinding(state, htmlRules.credential_form_on_suspicious_host, pageUrl(state) ?? "form", {});
519
+ }
520
+ // Brand impersonation in CONTENT: the page prominently names a brand and
521
+ // captures credentials, but is not served from that brand's own domain. This
522
+ // is the durable phishing signal — it doesn't depend on the URL or where the
523
+ // form posts (kits collect to same-host PHP just as often as off-origin).
524
+ if (state.forms.some((form) => form.hasPassword) || incremented(state, "page_password_input")) {
525
+ const host = pageHost(state);
526
+ const pageFlags = host ? normalizeUrl(pageUrl(state))?.flags ?? [] : [];
527
+ const throwawayHost = pageFlags.some((flag) => ["shared_hosting_subdomain", "generated_host_label", "suspicious_tld", "punycode", "ip_literal"].includes(flag));
528
+ if (host) {
529
+ for (const brand of PHISH_BRANDS) {
530
+ if (brand.allowed.test(host))
531
+ continue; // the brand's own domain — not impersonation
532
+ const inTitle = (state.counters["title_brand:" + brand.brand] ?? 0) > 0;
533
+ const mentions = state.counters["content_brand:" + brand.brand] ?? 0;
534
+ // Convict when the page CLAIMS to be the brand (brand in <title>), or the
535
+ // brand dominates the content on a throwaway host (where no legitimate
536
+ // brand login lives). Reputable hosts that merely reference other brands
537
+ // (app-store/social links) don't qualify.
538
+ if (inTitle || (mentions >= 3 && throwawayHost)) {
539
+ addRuleFinding(state, htmlRules.brand_impersonation_content, pageUrl(state) ?? "site", { brand: brand.brand, mentions, in_title: inTitle });
540
+ break;
541
+ }
542
+ }
543
+ }
544
+ }
545
+ const externalScripts = [...state.findings].filter((finding) => finding.ruleId === "external_script_from_unrelated_domain").length;
546
+ const hasSensitivePageContext = state.forms.some((form) => form.hasPassword || form.hasPayment);
547
+ if (hasSensitivePageContext) {
548
+ for (const script of state.externalScripts) {
549
+ addRuleFinding(state, htmlRules.external_script_from_unrelated_domain, script.normalized, { relation: script.relation });
550
+ }
551
+ }
552
+ const riskyExternalScripts = hasSensitivePageContext ? state.externalScripts.length : externalScripts;
553
+ if (riskyExternalScripts >= 5 && hasSensitivePageContext) {
554
+ addRuleFinding(state, htmlRules.excessive_external_scripts_on_login_page, pageUrl(state) ?? "site", { external_scripts: riskyExternalScripts });
555
+ }
556
+ if ([...state.urls.values()].some((url) => url.flags.includes("punycode")) && incremented(state, "brand_login_or_payment_language")) {
557
+ addRuleFinding(state, htmlRules.login_page_with_punycode_links, pageUrl(state) ?? "site", {});
558
+ }
559
+ if (incremented(state, "content.login_ui_image_reference")) {
560
+ addRuleFinding(state, htmlRules.credential_ui_rendered_as_image, pageUrl(state) ?? "site", {});
561
+ }
562
+ // Crypto trigger-word signals only count on an already-suspicious host. They
563
+ // were built for shared-hosted crypto phishing; on reputable hosts (e.g. a
564
+ // LinkedIn login page that merely contains "wallet"/"swap" in bundled JS) they
565
+ // are pure noise.
566
+ if (hasSuspiciousTargetContext(state)) {
567
+ if (incremented(state, "content.crypto_wallet_login_language")) {
568
+ addRuleFinding(state, htmlRules.crypto_wallet_login_language, pageUrl(state) ?? "site", {});
569
+ }
570
+ if (incremented(state, "content.crypto_trading_landing_language")) {
571
+ addRuleFinding(state, htmlRules.crypto_trading_landing_language, pageUrl(state) ?? "site", {});
572
+ }
573
+ }
574
+ if (incremented(state, "content.seo_trademark_stuffing")) {
575
+ addRuleFinding(state, htmlRules.seo_trademark_stuffing, pageUrl(state) ?? "site", {});
576
+ }
577
+ }
578
+ export function scoreFindings(findings) {
579
+ let score = 0;
580
+ const groups = new Map();
581
+ const tags = new Set();
582
+ for (const finding of findings) {
583
+ const group = groups.get(finding.ruleId);
584
+ if (group)
585
+ group.push(finding);
586
+ else
587
+ groups.set(finding.ruleId, [finding]);
588
+ for (const tag of finding.scoreModel.tags)
589
+ tags.add(tag);
590
+ }
591
+ // Within a maxGroup only the single strongest member counts — rules that
592
+ // observe the same behaviour different ways (eval / new Function / runtime
593
+ // eval) must not stack and inflate a legit JS-heavy page.
594
+ const maxGroupScores = new Map();
595
+ for (const group of groups.values()) {
596
+ const model = group[0].scoreModel;
597
+ const repeats = Math.min(group.length - 1, model.maxRepeats ?? 0);
598
+ const ruleScore = model.base + repeats * model.base * (model.repeatMultiplier ?? 0);
599
+ if (model.maxGroup) {
600
+ maxGroupScores.set(model.maxGroup, Math.max(maxGroupScores.get(model.maxGroup) ?? 0, ruleScore));
601
+ }
602
+ else {
603
+ score += ruleScore;
604
+ }
605
+ }
606
+ for (const groupScore of maxGroupScores.values())
607
+ score += groupScore;
608
+ score *= scoreMultiplier(tags);
609
+ return Math.max(0, Math.min(100, Math.round(score)));
610
+ }
611
+ function scoreMultiplier(tags) {
612
+ let multiplier = 1;
613
+ if (tags.has("credential") && (tags.has("hosting") || tags.has("redirect") || tags.has("url")))
614
+ multiplier *= 1.2;
615
+ if ((tags.has("payment") || tags.has("wallet")) && (tags.has("exfiltration") || tags.has("redirect")))
616
+ multiplier *= 1.15;
617
+ if (tags.has("decoded") && (tags.has("script") || tags.has("exfiltration")))
618
+ multiplier *= 1.15;
619
+ if (tags.has("binary") && tags.has("url"))
620
+ multiplier *= 1.1;
621
+ return multiplier;
622
+ }
623
+ export function dispositionForScore(score) {
624
+ if (score >= 75)
625
+ return "block";
626
+ if (score >= 50)
627
+ return "review";
628
+ if (score >= 25)
629
+ return "warn";
630
+ return "allow";
631
+ }
632
+ function collectUrls(state, text) {
633
+ for (const match of text.matchAll(/\bhttps?:\/\/[^\s"'<>`\\)]+/gi))
634
+ addUrl(state, match[0].replace(/[.,;:]+$/, ""));
635
+ }
636
+ function urlsInText(text, base) {
637
+ const urls = [];
638
+ for (const match of text.matchAll(/\bhttps?:\/\/[^\s"'<>`\\)]+/gi)) {
639
+ const normalized = normalizeUrl(match[0].replace(/[.,;:]+$/, ""), base);
640
+ if (normalized)
641
+ urls.push(normalized);
642
+ }
643
+ return urls;
644
+ }
645
+ function hasNearbyOffSiteUrlWith(text, base, signal) {
646
+ for (const match of text.matchAll(/\bhttps?:\/\/[^\s"'<>`\\)]+/gi)) {
647
+ const normalized = normalizeUrl(match[0].replace(/[.,;:]+$/, ""), base);
648
+ if (!normalized || (normalized.relation !== "off-site" && !(normalized.relation === "unknown" && !!normalized.registrableDomain)))
649
+ continue;
650
+ const index = match.index ?? 0;
651
+ const context = text.slice(Math.max(0, index - 160), Math.min(text.length, index + match[0].length + 160));
652
+ if (/\b(?:fetch|XMLHttpRequest|sendBeacon|WebSocket)\b/.test(context) && signal.test(context))
653
+ return true;
654
+ }
655
+ return false;
656
+ }
657
+ function hasWalletSignal(text) {
658
+ return /\b(?:window\.ethereum|WalletConnect|ethereum\.request)\b/i.test(text) || /\.(?:approve|permit)\s*\(/i.test(text) || /\bmethod\s*:\s*['"]eth_/i.test(text);
659
+ }
660
+ function hasNearbyRegexPair(text, left, right, distance) {
661
+ const leftPositions = [...text.matchAll(left)].map((match) => match.index ?? 0);
662
+ const rightPositions = [...text.matchAll(right)].map((match) => match.index ?? 0);
663
+ return leftPositions.some((leftIndex) => rightPositions.some((rightIndex) => Math.abs(leftIndex - rightIndex) <= distance));
664
+ }
665
+ function hasRiskyUrlFlags(url) {
666
+ return url.flags.some((flag) => ["punycode", "ip_literal", "private_or_localhost", "url_shortener", "suspicious_tld", "suspicious_path_terms", "malware_download_like_path"].includes(flag));
667
+ }
668
+ function hasCryptoWalletLoginLanguage(text) {
669
+ // Require a strong, crypto-specific term (not bare "crypto"/"ledger", which
670
+ // collide with normal sites) paired with credential/wallet-connect intent.
671
+ return /\b(?:metamask|walletconnect|usdt|tether|trust\s+wallet|seed\s+phrase|connect\s+wallet|coinbase|binance|web3)\b/i.test(text) &&
672
+ /\b(?:login|log\s*in|sign\s*in|connect|password|securely|access|restore|import)\b/i.test(text);
673
+ }
674
+ function hasCryptoTradingLandingLanguage(text) {
675
+ // Crypto-native vocabulary. Generic finance words (token, exchange, trade,
676
+ // market, liquidity) are excluded — they appear on ordinary sites and in
677
+ // minified JS (CSRF/OAuth "token"). The emitted finding is additionally gated
678
+ // on a suspicious host (see finalizeAggregateRules) so reputable sites that
679
+ // merely mention crypto don't trip it.
680
+ const matches = text.match(/\b(?:crypto|defi|dexs?|solana|swap|blockchain|wallet|web3|metamask|walletconnect|usdt|tether|coinbase|binance|jupiter|airdrop|staking|seed\s+phrase)\b/gi) ?? [];
681
+ return new Set(matches.map((match) => match.toLowerCase())).size >= 2;
682
+ }
683
+ function hasSeoTrademarkStuffing(text) {
684
+ const values = [
685
+ ...[...text.matchAll(/<title[^>]*>([^<]{0,240})<\/title>/gis)].map((match) => match[1]),
686
+ ...[...text.matchAll(/"(?:title|children)"\s*:\s*"([^"]{0,240})"/gis)].map((match) => match[1]),
687
+ ...[...text.matchAll(/"(?:og:title|twitter:title)"\s*,\s*"content"\s*:\s*"([^"]{0,240})"/gis)].map((match) => match[1])
688
+ ];
689
+ return values.some((value) => (value.match(/[®™]/g) ?? []).length >= 2);
690
+ }
691
+ function hasLoginUiImageReference(text) {
692
+ return /(?:imageData|alt|name|src|media|filename|fileName|url)["':\s{,[\]\\]*(?:[^"'<>]{0,160})?(?:screencapture|screenshot|screen[-_ ]?capture)/i.test(text) ||
693
+ /(?:screencapture|screenshot|screen[-_ ]?capture)[^"'<>]{0,160}\b(?:login|log[-_ ]?in|signin|sign[-_ ]?in|password|account)\b/i.test(text) ||
694
+ /\b(?:login|log[-_ ]?in|signin|sign[-_ ]?in|password|account)\b[^"'<>]{0,160}(?:screencapture|screenshot|screen[-_ ]?capture)/i.test(text);
695
+ }
696
+ function hasSuspiciousTargetContext(state) {
697
+ if (incremented(state, "redirect.final_url_offsite"))
698
+ return true;
699
+ // Genuine HOST suspicion only. A login-intent path ("/login", "/account") is
700
+ // benign — every legitimate login page has one — so suspicious_path_terms is
701
+ // deliberately excluded here.
702
+ return [...state.urls.values()].some((url) => isSourceOrFinalUrl(state, url.normalized) &&
703
+ url.flags.some((flag) => ["shared_hosting_subdomain", "generated_host_label", "suspicious_tld", "punycode", "ip_literal"].includes(flag)));
704
+ }
705
+ function scanRedirectContext(state) {
706
+ if (!state.source.url || !state.source.finalUrl || state.source.url === state.source.finalUrl)
707
+ return;
708
+ const source = normalizeUrl(state.source.url);
709
+ const final = normalizeUrl(state.source.finalUrl, state.source.url);
710
+ if (source?.registrableDomain && final?.registrableDomain && source.registrableDomain !== final.registrableDomain) {
711
+ increment(state, "redirect.final_url_offsite");
712
+ addRuleFinding(state, urlRules.final_url_offsite_redirect, final.normalized, { source_url: source.normalized });
713
+ }
714
+ }
715
+ function scanTlsContext(state) {
716
+ const tls = state.source.tls;
717
+ if (!tls)
718
+ return;
719
+ const issuer = tls.issuer ?? "";
720
+ const subject = tls.subject ?? "";
721
+ if (tls.authorized === false)
722
+ increment(state, "tls.unauthorized_certificate");
723
+ if (/(?:let'?s encrypt|zerossl|buypass|ssl\.com)/i.test(issuer))
724
+ increment(state, "tls.free_dv_certificate");
725
+ const organization = subject.match(/(?:^|,\s*)O\s*=\s*([^,]+)/i)?.[1]?.trim();
726
+ if (organization && !/^(?:cloudflare|google trust services|amazon|fastly|akamai|wix|netlify|vercel)\b/i.test(organization)) {
727
+ increment(state, "tls.organization_validated_certificate");
728
+ }
729
+ if (issuer)
730
+ increment(state, `tls.issuer.${issuer.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, "").slice(0, 80)}`);
731
+ }
732
+ function extractCssUrls(text) {
733
+ const urls = [];
734
+ for (const match of text.matchAll(/@import\s+(?:url\(\s*)?["']?([^"')\s;]+)|url\(\s*["']?([^"')]+)["']?\s*\)/gi)) {
735
+ const raw = (match[1] ?? match[2] ?? "").trim().replace(/[.,;:]+$/, "");
736
+ if (/^https?:\/\//i.test(raw))
737
+ urls.push(raw);
738
+ }
739
+ return urls;
740
+ }
741
+ function addUrl(state, raw) {
742
+ const normalized = normalizeUrl(raw, pageUrl(state));
743
+ if (!normalized)
744
+ return;
745
+ state.urls.set(normalized.normalized, normalized);
746
+ for (const flag of normalized.flags)
747
+ increment(state, `url.${flag}`);
748
+ if (normalized.flags.includes("punycode") && /login|signin|account|verify/i.test(normalized.normalized)) {
749
+ addRuleFinding(state, urlRules.punycode_login_url, normalized.normalized, {});
750
+ }
751
+ // Only when the scanned page itself IS, or redirects through, a shortener
752
+ // (cloaking) — not when its content merely links to one. Search engines,
753
+ // social, news and forums are full of bit.ly links in content.
754
+ if (normalized.destinationType === "url-shortener" && isSourceOrFinalUrl(state, normalized.normalized)) {
755
+ addRuleFinding(state, urlRules.redirect_to_url_shortener, normalized.normalized, {});
756
+ }
757
+ if (normalized.flags.includes("private_or_localhost") && isSourceOrFinalUrl(state, normalized.normalized)) {
758
+ addRuleFinding(state, urlRules.private_ip_url, normalized.normalized, {});
759
+ }
760
+ if (normalized.flags.includes("ip_literal") && !normalized.flags.includes("private_or_localhost")) {
761
+ addRuleFinding(state, urlRules.ip_literal_url, normalized.normalized, {});
762
+ }
763
+ if (normalized.flags.includes("suspicious_tld")) {
764
+ addRuleFinding(state, urlRules.suspicious_tld_url, normalized.normalized, {});
765
+ }
766
+ if (normalized.flags.includes("download_like_path") && normalized.relation === "off-site") {
767
+ addRuleFinding(state, urlRules.download_like_external_url, normalized.normalized, {});
768
+ }
769
+ if (normalized.flags.includes("malware_download_like_path") && isSourceOrFinalUrl(state, normalized.normalized)) {
770
+ addRuleFinding(state, urlRules.malware_download_like_url, normalized.normalized, {});
771
+ }
772
+ if (normalized.flags.includes("shared_hosting_subdomain") && isSourceOrFinalUrl(state, normalized.normalized)) {
773
+ addRuleFinding(state, urlRules.shared_hosting_subdomain_url, normalized.normalized, {});
774
+ }
775
+ const brand = unrelatedBrandInUrl(normalized);
776
+ if (brand && isSourceOrFinalUrl(state, normalized.normalized)) {
777
+ addRuleFinding(state, urlRules.brand_impersonation_url, normalized.normalized, { brand });
778
+ }
779
+ if (isSourceOrFinalUrl(state, normalized.normalized) && isCredentialPathOnSuspiciousHost(normalized)) {
780
+ addRuleFinding(state, urlRules.credential_path_on_suspicious_host, normalized.normalized, {});
781
+ }
782
+ if (isSourceOrFinalUrl(state, normalized.normalized) && isGeneratedSuspiciousLandingUrl(normalized)) {
783
+ addRuleFinding(state, urlRules.generated_landing_url, normalized.normalized, {});
784
+ }
785
+ }
786
+ function isSourceOrFinalUrl(state, normalizedUrl) {
787
+ const source = state.source.url ? normalizeUrl(state.source.url)?.normalized : null;
788
+ const final = state.source.finalUrl ? normalizeUrl(state.source.finalUrl)?.normalized : null;
789
+ return normalizedUrl === source || normalizedUrl === final;
790
+ }
791
+ function addRuleFinding(state, rule, locationValue, metadata) {
792
+ addFinding(state, rule.id, rule.severity, rule.confidence, rule.score, rule.title, rule.description, rule.locationType, locationValue, { ...metadata, rule_pack: rule.pack });
793
+ }
794
+ function addFinding(state, ruleId, severity, confidence, scoreModel, title, description, locationType, locationValue, metadata) {
795
+ const key = `${ruleId}:${locationType}:${locationValue}`;
796
+ if (state.findingKeys.has(key))
797
+ return;
798
+ state.findingKeys.add(key);
799
+ state.findings.push({
800
+ id: `${ruleId}:${state.findings.length}`,
801
+ ruleId,
802
+ severity,
803
+ confidence,
804
+ score: scoreModel.base,
805
+ scoreModel,
806
+ title,
807
+ description,
808
+ locationType,
809
+ locationValue,
810
+ metadata: { line: state.line, column: state.column, ...metadata }
811
+ });
812
+ }
813
+ function hiddenAttrs(attrs) {
814
+ const width = Number(attrs.get("width") ?? "1");
815
+ const height = Number(attrs.get("height") ?? "1");
816
+ return width <= 1 || height <= 1 || /display\s*:\s*none|visibility\s*:\s*hidden|opacity\s*:\s*0/i.test(attrs.get("style") ?? "");
817
+ }
818
+ function decodeCandidate(kind, value, maxBytes) {
819
+ try {
820
+ if (kind === "base64_decoded_string") {
821
+ const bytes = base64Decode(value);
822
+ if (!bytes || bytes.byteLength > maxBytes)
823
+ return null;
824
+ const decoded = decodeText(bytes);
825
+ return isMostlyPrintable(decoded) ? decoded : null;
826
+ }
827
+ if (kind === "javascript_hex_escapes")
828
+ return value.replace(/\\x([0-9a-fA-F]{2})/g, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16))).slice(0, maxBytes);
829
+ if (kind === "javascript_unicode_escapes")
830
+ return value.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16))).slice(0, maxBytes);
831
+ if (kind === "fromcharcode_decoded_string")
832
+ return value.split(",").map((part) => String.fromCharCode(Number(part.trim()))).join("").slice(0, maxBytes);
833
+ }
834
+ catch {
835
+ return null;
836
+ }
837
+ return null;
838
+ }
839
+ function base64Decode(value) {
840
+ if (typeof atob === "function") {
841
+ const binary = atob(value);
842
+ return Uint8Array.from(binary, (char) => char.charCodeAt(0));
843
+ }
844
+ const bufferCtor = globalThis.Buffer;
845
+ return bufferCtor?.from(value, "base64") ?? null;
846
+ }
847
+ function dedupeFindings(findings) {
848
+ const seen = new Set();
849
+ return findings.filter((finding) => {
850
+ const key = `${finding.ruleId}:${finding.locationValue}`;
851
+ if (seen.has(key))
852
+ return false;
853
+ seen.add(key);
854
+ return true;
855
+ });
856
+ }
857
+ function relationFor(host, domain, baseHost, baseDomain) {
858
+ if (!baseHost || !baseDomain || !domain)
859
+ return "unknown";
860
+ if (host === baseHost)
861
+ return "same-origin";
862
+ if (domain === baseDomain)
863
+ return host.endsWith(`.${baseHost}`) ? "subdomain" : "same-site";
864
+ return "off-site";
865
+ }
866
+ function destinationTypeFor(url, host) {
867
+ if (isPrivateHost(host))
868
+ return host === "localhost" ? "localhost" : "private";
869
+ if (isIpLiteral(host))
870
+ return "ip";
871
+ if (isUrlShortener(host))
872
+ return "url-shortener";
873
+ if (url.protocol === "http:")
874
+ return "http";
875
+ if (url.protocol === "https:")
876
+ return "https";
877
+ return "other";
878
+ }
879
+ export function registrableDomainFor(host) {
880
+ if (!host || isIpLiteral(host) || host === "localhost")
881
+ return null;
882
+ const parts = host.toLowerCase().split(".").filter(Boolean);
883
+ if (parts.length < 2)
884
+ return host;
885
+ const lastTwo = parts.slice(-2).join(".");
886
+ const lastThree = parts.slice(-3).join(".");
887
+ if (/^(?:co|com|net|org|gov|ac)\.[a-z]{2}$/.test(lastTwo) && parts.length >= 3)
888
+ return lastThree;
889
+ return lastTwo;
890
+ }
891
+ function isIpLiteral(host) {
892
+ return /^(?:\d{1,3}\.){3}\d{1,3}$/.test(host) || host.includes(":");
893
+ }
894
+ function isPrivateHost(host) {
895
+ return host === "localhost" || /^127\.|^10\.|^192\.168\.|^172\.(?:1[6-9]|2\d|3[01])\./.test(host);
896
+ }
897
+ function isUrlShortener(host) {
898
+ return /^(?:bit\.ly|t\.co|tinyurl\.com|goo\.gl|ow\.ly|is\.gd|buff\.ly|cutt\.ly)$/.test(host);
899
+ }
900
+ function isSharedHostingSubdomain(host, registrableDomain) {
901
+ if (!registrableDomain || host === registrableDomain)
902
+ return false;
903
+ return [
904
+ "wixstudio.com",
905
+ "wixsite.com",
906
+ "webflow.io",
907
+ "netlify.app",
908
+ "vercel.app",
909
+ "github.io",
910
+ "pages.dev",
911
+ "workers.dev",
912
+ "edgeone.app",
913
+ "edgeone.dev",
914
+ "firebaseapp.com",
915
+ "web.app",
916
+ "herokuapp.com",
917
+ "render.com",
918
+ "glitch.me",
919
+ "replit.app",
920
+ "replit.dev",
921
+ "wordpress.com",
922
+ "blogspot.com",
923
+ "weebly.com",
924
+ "myshopify.com",
925
+ "godaddysites.com",
926
+ "zapier.app",
927
+ "fwh.is",
928
+ "infinityfreeapp.com",
929
+ "000webhostapp.com",
930
+ "fly.dev",
931
+ "onrender.com",
932
+ "surge.sh",
933
+ "site.je"
934
+ ].includes(registrableDomain);
935
+ }
936
+ function isGeneratedHostLabel(host, registrableDomain) {
937
+ const label = host.split(".")[0] ?? "";
938
+ if (!label || label === registrableDomain)
939
+ return false;
940
+ return /(?:client|account|secure|manager|payment|support|verify|login|area)[-_]?\d{5,}/i.test(label) ||
941
+ /^[a-z]+(?:-[a-z]+){2,}-\d{4,}$/.test(label) ||
942
+ /^[a-z0-9]{16,}$/.test(label) ||
943
+ // A long hex run anywhere in the label (e.g. pub-de59803496c8489585895b6917266e7c.r2.dev).
944
+ /[a-f0-9]{12,}/i.test(label) ||
945
+ // A short all-hex label that includes a digit (0efbd9f, 0ed8a96, 0c4d4e6).
946
+ (label.length >= 7 && /^[a-f0-9]+$/i.test(label) && /\d/.test(label)) ||
947
+ // A short label that is ~half digits — the auto-generated bulk-phishing
948
+ // naming scheme (000p4en, 000ogwl, 000o5eh), which no real brand uses.
949
+ (label.length >= 6 && label.replace(/[^0-9]/g, "").length / label.length >= 0.4);
950
+ }
951
+ // Well-known ad, analytics, and tag-manager networks. Scripts from these are
952
+ // ubiquitous on legitimate ad-funded sites and are never phishing exfil
953
+ // endpoints, so they should not raise the external-script signals that target
954
+ // credential-harvest kits.
955
+ const AD_ANALYTICS_DOMAINS = new Set([
956
+ "doubleclick.net",
957
+ "googlesyndication.com",
958
+ "googletagmanager.com",
959
+ "googletagservices.com",
960
+ "google-analytics.com",
961
+ "googleadservices.com",
962
+ "adservice.google.com",
963
+ "gstatic.com",
964
+ "scorecardresearch.com",
965
+ "quantserve.com",
966
+ "quantcount.com",
967
+ "criteo.com",
968
+ "criteo.net",
969
+ "taboola.com",
970
+ "outbrain.com",
971
+ "adnxs.com",
972
+ "rubiconproject.com",
973
+ "pubmatic.com",
974
+ "casalemedia.com",
975
+ "amazon-adsystem.com",
976
+ "adsrvr.org",
977
+ "moatads.com",
978
+ "indexww.com",
979
+ "openx.net",
980
+ "3lift.com",
981
+ "sharethrough.com",
982
+ "permutive.com",
983
+ "permutive.app",
984
+ "cloudflareinsights.com",
985
+ "newrelic.com",
986
+ "nr-data.net",
987
+ "segment.com",
988
+ "segment.io",
989
+ "optimizely.com",
990
+ "hotjar.com",
991
+ "chartbeat.com",
992
+ "parsely.com",
993
+ "branch.io",
994
+ "onetrust.com",
995
+ "cookielaw.org",
996
+ "fbcdn.net",
997
+ "facebook.net"
998
+ ]);
999
+ export function isAdOrAnalyticsHost(normalizedUrl) {
1000
+ try {
1001
+ const host = new URL(normalizedUrl).hostname.toLowerCase();
1002
+ return AD_ANALYTICS_DOMAINS.has(registrableDomainFor(host) ?? host);
1003
+ }
1004
+ catch {
1005
+ return false;
1006
+ }
1007
+ }
1008
+ function isSuspiciousTld(host) {
1009
+ const tld = host.split(".").pop() ?? "";
1010
+ return /^(?:zip|mov|top|xyz|click|country|gq|tk|ml|cf|ga|work|quest|cam|cfd|icu|buzz)$/.test(tld);
1011
+ }
1012
+ function isMalwareDownloadLikePath(pathname) {
1013
+ return /(?:\/|^)(?:bin|bins|payload|update|loader|bot|mozi|mirai|gafgyt|boatnet|dvr)(?:[./_-]|$)|\.(?:sh|bash|elf|bin|mips|mpsl|arm\d?|x86|x86_64|i686|ppc|sparc)(?:$|[?#])|(?:\/|^)(?:mips|arm\d?|x86|x86_64|i686|ppc|sparc)(?:$|[./_-])/i.test(pathname);
1014
+ }
1015
+ // Brand keywords + the brand's legitimate registrable domains. Matched against
1016
+ // HOST LABELS only (never the path/query — so google.com/search?q=paypal is
1017
+ // safe), as an exact label or, for >=6-char keywords, a label prefix to catch
1018
+ // concatenated lookalikes like "scotiawealthmanagement.com.evil.tld".
1019
+ const PHISH_BRANDS = [
1020
+ { brand: "google", keywords: ["google", "gmail"], allowed: /(?:^|\.)(?:google|gmail)\.(?:com|[a-z]{2})$/i },
1021
+ { brand: "microsoft", keywords: ["microsoft", "office365", "outlook", "onedrive"], allowed: /(?:^|\.)(?:microsoft|microsoftonline|live|office|outlook|sharepoint)\.com$/i },
1022
+ { brand: "apple", keywords: ["icloud", "appleid"], allowed: /(?:^|\.)(?:apple|icloud)\.com$/i },
1023
+ { brand: "paypal", keywords: ["paypal", "paypa1"], allowed: /(?:^|\.)paypal\.(?:com|[a-z]{2})$/i },
1024
+ { brand: "amazon", keywords: ["amazon"], allowed: /(?:^|\.)(?:amazon\.[a-z.]{2,6}|amazonaws\.com|aws\.amazon\.com)$/i },
1025
+ { brand: "netflix", keywords: ["netflix"], allowed: /(?:^|\.)netflix\.com$/i },
1026
+ { brand: "facebook", keywords: ["facebook"], allowed: /(?:^|\.)(?:facebook|meta)\.com$/i },
1027
+ { brand: "instagram", keywords: ["instagram"], allowed: /(?:^|\.)instagram\.com$/i },
1028
+ { brand: "whatsapp", keywords: ["whatsapp"], allowed: /(?:^|\.)whatsapp\.com$/i },
1029
+ { brand: "linkedin", keywords: ["linkedin"], allowed: /(?:^|\.)linkedin\.com$/i },
1030
+ { brand: "dropbox", keywords: ["dropbox"], allowed: /(?:^|\.)dropbox\.com$/i },
1031
+ { brand: "docusign", keywords: ["docusign"], allowed: /(?:^|\.)docusign\.(?:com|net)$/i },
1032
+ { brand: "wetransfer", keywords: ["wetransfer"], allowed: /(?:^|\.)wetransfer\.com$/i },
1033
+ { brand: "dhl", keywords: ["dhl"], allowed: /(?:^|\.)dhl\.(?:com|[a-z]{2})$/i },
1034
+ { brand: "fedex", keywords: ["fedex"], allowed: /(?:^|\.)fedex\.com$/i },
1035
+ { brand: "usps", keywords: ["usps"], allowed: /(?:^|\.)usps\.com$/i },
1036
+ { brand: "roblox", keywords: ["roblox"], allowed: /(?:^|\.)roblox\.com$/i },
1037
+ { brand: "steam", keywords: ["steamcommunity", "steampowered"], allowed: /(?:^|\.)steam(?:community|powered)\.com$/i },
1038
+ { brand: "scotiabank", keywords: ["scotiabank", "scotiawealth", "scotiaonline"], allowed: /(?:^|\.)scotiabank\.com$/i },
1039
+ { brand: "wellsfargo", keywords: ["wellsfargo"], allowed: /(?:^|\.)wellsfargo\.com$/i },
1040
+ { brand: "chase", keywords: ["chase"], allowed: /(?:^|\.)chase\.com$/i },
1041
+ { brand: "bankofamerica", keywords: ["bankofamerica"], allowed: /(?:^|\.)bankofamerica\.com$/i },
1042
+ { brand: "citi", keywords: ["citibank", "citigroup"], allowed: /(?:^|\.)citi\.com$/i },
1043
+ { brand: "coinbase", keywords: ["coinbase"], allowed: /(?:^|\.)coinbase\.com$/i },
1044
+ { brand: "binance", keywords: ["binance"], allowed: /(?:^|\.)binance\.(?:com|us)$/i },
1045
+ { brand: "kraken", keywords: ["kraken"], allowed: /(?:^|\.)kraken\.com$/i },
1046
+ { brand: "metamask", keywords: ["metamask"], allowed: /(?:^|\.)metamask\.io$/i },
1047
+ { brand: "ledger", keywords: ["ledger"], allowed: /(?:^|\.)ledger\.com$/i },
1048
+ { brand: "tangem", keywords: ["tangem"], allowed: /(?:^|\.)tangem\.com$/i },
1049
+ { brand: "etoro", keywords: ["etoro"], allowed: /(?:^|\.)etoro\.com$/i },
1050
+ { brand: "ionos", keywords: ["ionos"], allowed: /(?:^|\.)ionos\.(?:com|de|co\.uk)$/i },
1051
+ { brand: "allegro", keywords: ["allegro"], allowed: /(?:^|\.)allegro\.(?:pl|com)$/i }
1052
+ ];
1053
+ // Normalize leetspeak / homoglyph substitutions so g00gle, paypa1, micr0s0ft,
1054
+ // 0utlook, faceb00k collapse onto their brand spelling. "1" is ambiguous (i or
1055
+ // l), so callers check both variants. Non-alphanumerics are dropped last.
1056
+ function deleet(label, one) {
1057
+ return label
1058
+ .replace(/0/g, "o")
1059
+ .replace(/1/g, one)
1060
+ .replace(/3/g, "e")
1061
+ .replace(/4/g, "a")
1062
+ .replace(/5/g, "s")
1063
+ .replace(/7/g, "t")
1064
+ .replace(/8/g, "b")
1065
+ .replace(/9/g, "g")
1066
+ .replace(/\$/g, "s")
1067
+ .replace(/@/g, "a")
1068
+ .replace(/!/g, "i")
1069
+ .replace(/[^a-z]/g, "");
1070
+ }
1071
+ function unrelatedBrandInUrl(url) {
1072
+ let host;
1073
+ try {
1074
+ host = new URL(url.normalized).hostname.toLowerCase();
1075
+ }
1076
+ catch {
1077
+ return null;
1078
+ }
1079
+ const registrable = registrableDomainFor(host) ?? host;
1080
+ // Subdomain portion (everything left of the registrable domain) and the
1081
+ // registrable's main label.
1082
+ const subPart = host.endsWith(registrable) ? host.slice(0, host.length - registrable.length).replace(/\.$/, "") : host;
1083
+ const subLabels = subPart ? subPart.split(/[.\-_]/).filter(Boolean) : [];
1084
+ const subVariants = [...new Set(subLabels.flatMap((label) => [label, deleet(label, "i"), deleet(label, "l")]))];
1085
+ const mainLabel = registrable.split(".")[0] ?? "";
1086
+ const mainVariants = [deleet(mainLabel, "i"), deleet(mainLabel, "l")];
1087
+ for (const { brand, keywords, allowed } of PHISH_BRANDS) {
1088
+ if (allowed.test(host))
1089
+ continue;
1090
+ for (const kw of keywords) {
1091
+ // Brand in a SUBDOMAIN label => impersonation (paypal.com.evil.xyz,
1092
+ // coinbase_v_login.godaddysites.com, scotiawealth*.cobblestonesw.com).
1093
+ if (subVariants.some((label) => label === kw || (kw.length >= 6 && label.startsWith(kw))))
1094
+ return brand;
1095
+ // Brand as a leet/homoglyph typosquat of the apex label (g00gle.com,
1096
+ // paypa1.net). An EXACT brand apex label (google.com, google.co.uk) is the
1097
+ // brand's own domain and is intentionally not flagged here — that keeps
1098
+ // ccTLDs from reading as impersonation.
1099
+ if (mainLabel !== kw && mainVariants.includes(kw))
1100
+ return brand;
1101
+ }
1102
+ }
1103
+ return null;
1104
+ }
1105
+ const SUSPICIOUS_HOST_FLAGS = ["shared_hosting_subdomain", "generated_host_label", "suspicious_tld", "punycode", "ip_literal", "url_shortener"];
1106
+ // Single source of truth for "did this redirect leave the site, and is the
1107
+ // destination itself sketchy?" — shared by every crawler (Worker stream + Fly/
1108
+ // CLI runner) so a redirect like google.com -> www.google.com (same registrable
1109
+ // domain) or google.com -> google.de (different domain, ordinary host) is not
1110
+ // convicted, while a hop to a shortener/punycode/IP/shared host is flagged.
1111
+ export function assessRedirect(requestedUrl, finalUrl) {
1112
+ let requested;
1113
+ let final;
1114
+ try {
1115
+ requested = new URL(requestedUrl);
1116
+ final = new URL(finalUrl);
1117
+ }
1118
+ catch {
1119
+ return null;
1120
+ }
1121
+ const requestedRegistrable = registrableDomainFor(requested.hostname) ?? requested.hostname;
1122
+ const finalRegistrable = registrableDomainFor(final.hostname) ?? final.hostname;
1123
+ const offSite = requestedRegistrable !== finalRegistrable;
1124
+ const destinationFlags = offSite ? normalizeUrl(final.href)?.flags ?? [] : [];
1125
+ const destinationSuspicious = destinationFlags.some((flag) => SUSPICIOUS_HOST_FLAGS.includes(flag));
1126
+ return { offSite, destinationSuspicious, requestedRegistrable, finalRegistrable, destinationFlags };
1127
+ }
1128
+ // Login/account/verify path served from a host that legitimate brands never use
1129
+ // for credentials. Render-free — fires on the URL alone, before any form loads.
1130
+ function isCredentialPathOnSuspiciousHost(url) {
1131
+ return url.flags.includes("suspicious_path_terms") && url.flags.some((flag) => SUSPICIOUS_HOST_FLAGS.includes(flag));
1132
+ }
1133
+ function isGeneratedSuspiciousLandingUrl(url) {
1134
+ const parsed = new URL(url.normalized);
1135
+ const host = parsed.hostname.toLowerCase();
1136
+ const firstLabel = host.split(".")[0] ?? "";
1137
+ const path = parsed.pathname.toLowerCase();
1138
+ const generatedLabel = /^[a-z]{6,10}$/.test(firstLabel) || /^[a-z0-9]{8,18}$/.test(firstLabel);
1139
+ const uuidPath = /\/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}(?:\/|$)/i.test(path);
1140
+ const fakeUpdateHost = /\.(?:casino|sbs|xyz|top|click|app|co)$/.test(host) || /(?:bet|casino|poker|winx|winsport|perfectgame|parspoker|venusbet)/i.test(host);
1141
+ return generatedLabel && uuidPath && fakeUpdateHost;
1142
+ }
1143
+ function hasElfMagic(bytes) {
1144
+ return bytes.length >= 4 && bytes[0] === 0x7f && bytes[1] === 0x45 && bytes[2] === 0x4c && bytes[3] === 0x46;
1145
+ }
1146
+ function declaredNonExecutableBinary(contentType) {
1147
+ const value = (contentType ?? "").toLowerCase().split(";")[0].trim();
1148
+ return !!value && !/(?:elf|executable|x-executable|x-pie-executable|octet-stream)/.test(value);
1149
+ }
1150
+ function likelyBinaryStrings(text) {
1151
+ return /(?:\/bin\/sh|\/dev\/shm|\/proc\/net\/route|iptables|busybox|cfgtool|sendcmd|\[cnc\]|1:q9:find_node|Mozi\.)/i.test(text);
1152
+ }
1153
+ function elfHasWritableExecutableStack(bytes) {
1154
+ if (!hasElfMagic(bytes) || bytes.length < 52)
1155
+ return false;
1156
+ const dataView = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
1157
+ const littleEndian = bytes[5] !== 2;
1158
+ const elfClass = bytes[4];
1159
+ const programHeaderOffset = elfClass === 2
1160
+ ? Number(dataView.getBigUint64(32, littleEndian))
1161
+ : dataView.getUint32(28, littleEndian);
1162
+ const programHeaderEntrySize = dataView.getUint16(elfClass === 2 ? 54 : 42, littleEndian);
1163
+ const programHeaderCount = dataView.getUint16(elfClass === 2 ? 56 : 44, littleEndian);
1164
+ if (!programHeaderOffset || !programHeaderEntrySize || !programHeaderCount)
1165
+ return false;
1166
+ const PT_GNU_STACK = 0x6474e551;
1167
+ const PF_X = 0x1;
1168
+ const PF_W = 0x2;
1169
+ for (let index = 0; index < programHeaderCount; index += 1) {
1170
+ const offset = programHeaderOffset + index * programHeaderEntrySize;
1171
+ if (offset + 8 > bytes.length)
1172
+ return false;
1173
+ const type = dataView.getUint32(offset, littleEndian);
1174
+ const flags = elfClass === 2
1175
+ ? dataView.getUint32(offset + 4, littleEndian)
1176
+ : dataView.getUint32(offset + 24, littleEndian);
1177
+ if (type === PT_GNU_STACK && (flags & PF_X) && (flags & PF_W))
1178
+ return true;
1179
+ }
1180
+ return false;
1181
+ }
1182
+ function scanTechnologyFingerprint(state, text, locationValue) {
1183
+ if (/\bjquery[-.]1\.\d+(?:\.\d+)?(?:\.min)?\.js\b|jQuery v1\./i.test(text)) {
1184
+ addRuleFinding(state, htmlTechnologyRules.legacy_jquery_reference, locationValue, {});
1185
+ }
1186
+ if (/\bangular(?:\.min)?\.js\b|angularjs|AngularJS v1\.|angular\.version/i.test(text)) {
1187
+ addRuleFinding(state, htmlTechnologyRules.legacy_angularjs_reference, locationValue, {});
1188
+ }
1189
+ if (/\bbootstrap(?:\.min)?\.js\b|bootstrap[-.]3\.\d+(?:\.\d+)?(?:\.min)?\.js\b|Bootstrap v3\./i.test(text)) {
1190
+ addRuleFinding(state, htmlTechnologyRules.legacy_bootstrap_reference, locationValue, {});
1191
+ }
1192
+ if (/\blodash[-.]4\.17\.(?:[0-9]|1[0-9]|20)(?:\.min)?\.js\b|lodash v4\.17\.(?:[0-9]|1[0-9]|20)/i.test(text)) {
1193
+ addRuleFinding(state, htmlTechnologyRules.legacy_lodash_reference, locationValue, {});
1194
+ }
1195
+ if (/(?:sites\/default\/files|drupal-settings-json|Drupal\.settings|\/core\/misc\/drupal\.js)/i.test(text)) {
1196
+ addRuleFinding(state, htmlTechnologyRules.drupal_surface_reference, locationValue, {});
1197
+ }
1198
+ if (/\b(?:phpMyAdmin|pma_navigation|\/phpmyadmin\/|\/pma\/)\b/i.test(text)) {
1199
+ addRuleFinding(state, htmlTechnologyRules.phpmyadmin_surface_reference, locationValue, {});
1200
+ }
1201
+ }
1202
+ function pageUrl(state) {
1203
+ return state.source.finalUrl ?? state.source.url ?? state.source.originUrl;
1204
+ }
1205
+ function pageHost(state) {
1206
+ const url = pageUrl(state);
1207
+ if (!url)
1208
+ return null;
1209
+ try {
1210
+ return new URL(url).hostname.toLowerCase();
1211
+ }
1212
+ catch {
1213
+ return null;
1214
+ }
1215
+ }
1216
+ function decodeText(bytes) {
1217
+ return new TextDecoder("utf-8", { fatal: false }).decode(bytes);
1218
+ }
1219
+ function trimWindow(value, max) {
1220
+ return value.length <= max ? value : value.slice(value.length - max);
1221
+ }
1222
+ function updatePosition(state, text) {
1223
+ for (const char of text) {
1224
+ if (char === "\n") {
1225
+ state.line += 1;
1226
+ state.column = 1;
1227
+ }
1228
+ else {
1229
+ state.column += 1;
1230
+ }
1231
+ }
1232
+ state.counters.lines_seen = state.line;
1233
+ state.counters.bytes_seen = state.absoluteOffset;
1234
+ }
1235
+ function byteLength(text) {
1236
+ return new TextEncoder().encode(text).byteLength;
1237
+ }
1238
+ function isMostlyPrintable(text) {
1239
+ if (!text)
1240
+ return false;
1241
+ const sample = text.slice(0, 4096);
1242
+ const printable = [...sample].filter((char) => char === "\n" || char === "\r" || char === "\t" || (char >= " " && char !== "\uFFFD")).length;
1243
+ return printable / sample.length >= 0.85;
1244
+ }
1245
+ function increment(state, key) {
1246
+ state.counters[key] = (state.counters[key] ?? 0) + 1;
1247
+ }
1248
+ function incremented(state, key) {
1249
+ return (state.counters[key] ?? 0) > 0;
1250
+ }
1251
+ //# sourceMappingURL=index.js.map