@q32/signal-scanner 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +201 -0
- package/package.json +62 -0
- package/scripts/check-coverage.ts +33 -0
- package/scripts/eval.ts +311 -0
- package/scripts/render-isolate/entry.ts +2 -0
- package/scripts/render-isolate/polyfills.ts +33 -0
- package/scripts/render-isolate/run.ts +63 -0
- package/scripts/scan.ts +612 -0
- package/src/dynamic.ts +273 -0
- package/src/feeds.ts +334 -0
- package/src/index.ts +1366 -0
- package/src/intel.ts +561 -0
- package/src/node-tls.ts +55 -0
- package/src/render.ts +233 -0
- package/src/rules/packs/binary.ts +103 -0
- package/src/rules/packs/css.ts +44 -0
- package/src/rules/packs/decoders.ts +47 -0
- package/src/rules/packs/html.ts +255 -0
- package/src/rules/packs/index.ts +76 -0
- package/src/rules/packs/script-risk.ts +236 -0
- package/src/rules/packs/source-code.ts +180 -0
- package/src/rules/packs/urls.ts +138 -0
- package/src/rules/types.ts +56 -0
package/src/intel.ts
ADDED
|
@@ -0,0 +1,561 @@
|
|
|
1
|
+
// URL threat-intelligence correlation for the signal scanner.
|
|
2
|
+
//
|
|
3
|
+
// Runtime-agnostic: depends only on the WHATWG `fetch`, `URL`, and standard
|
|
4
|
+
// timers, so it runs unchanged in Node and in Cloudflare Workers. Everything
|
|
5
|
+
// environment-specific (the fetch implementation, API keys, bounds) is injected
|
|
6
|
+
// through `UrlIntelConfig` — there are no `process.env` or node imports here.
|
|
7
|
+
//
|
|
8
|
+
// It takes the URLs/hosts a scan discovered, correlates them against open and
|
|
9
|
+
// keyed reputation sources, and returns both normalized scanner `Finding`s and a
|
|
10
|
+
// per-source result so callers can show which feeds ran, matched, or failed.
|
|
11
|
+
|
|
12
|
+
import type { Finding, Severity, Confidence } from "./index";
|
|
13
|
+
import type { RuleScoreModel } from "./rules/types";
|
|
14
|
+
import { matchCachedFeeds, type IntelStorage } from "./feeds";
|
|
15
|
+
|
|
16
|
+
export type { IntelStorage } from "./feeds";
|
|
17
|
+
|
|
18
|
+
export interface UrlIntelConfig {
|
|
19
|
+
/** Fetch implementation. Defaults to the global `fetch`. */
|
|
20
|
+
fetchImpl?: typeof fetch;
|
|
21
|
+
/** Google Safe Browsing API key. When absent, that source reports an error. */
|
|
22
|
+
googleSafeBrowsingKey?: string;
|
|
23
|
+
/** abuse.ch Auth-Key (required by URLhaus + ThreatFox). When absent, those calls go unauthenticated. */
|
|
24
|
+
abuseChAuthKey?: string;
|
|
25
|
+
/** Storage backing the cached blocklist feeds. When present, the cached-feed source runs. */
|
|
26
|
+
storage?: IntelStorage;
|
|
27
|
+
/** Skip all network calls and return no sources (e.g. non-production). */
|
|
28
|
+
disabled?: boolean;
|
|
29
|
+
/** Max distinct hosts queried per host-based source. Default 100. */
|
|
30
|
+
maxHosts?: number;
|
|
31
|
+
/** Max distinct URLs queried per URL-based source. Default 100. */
|
|
32
|
+
maxUrls?: number;
|
|
33
|
+
/** Per-request timeout in milliseconds. Default 4000. */
|
|
34
|
+
timeoutMs?: number;
|
|
35
|
+
/** User-Agent sent with feed requests. */
|
|
36
|
+
userAgent?: string;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export type IntelSourceStatus = "match" | "clean" | "error";
|
|
40
|
+
|
|
41
|
+
export interface IntelMatch {
|
|
42
|
+
/** Stable source id, e.g. "urlhaus", "threatfox", "google-safebrowsing", "cached-feeds". */
|
|
43
|
+
source: string;
|
|
44
|
+
/** Human-readable provider name. */
|
|
45
|
+
provider: string;
|
|
46
|
+
url?: string;
|
|
47
|
+
host?: string;
|
|
48
|
+
/** Evidence strength (0-100). Live API sources are high; cached feeds carry their band. */
|
|
49
|
+
score: number;
|
|
50
|
+
detail: Record<string, unknown>;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export interface IntelSourceResult {
|
|
54
|
+
source: string;
|
|
55
|
+
provider: string;
|
|
56
|
+
status: IntelSourceStatus;
|
|
57
|
+
reason?: string;
|
|
58
|
+
urlsChecked: number;
|
|
59
|
+
hostsChecked: number;
|
|
60
|
+
matches: IntelMatch[];
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export interface UrlIntelReport {
|
|
64
|
+
sources: IntelSourceResult[];
|
|
65
|
+
matches: IntelMatch[];
|
|
66
|
+
findings: Finding[];
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export interface UrlIntelInput {
|
|
70
|
+
urls?: string[];
|
|
71
|
+
hosts?: string[];
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
interface IntelContext {
|
|
75
|
+
fetch: typeof fetch;
|
|
76
|
+
timeoutMs: number;
|
|
77
|
+
userAgent: string;
|
|
78
|
+
googleSafeBrowsingKey?: string;
|
|
79
|
+
abuseChAuthKey?: string;
|
|
80
|
+
storage?: IntelStorage;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Live API hits are treated as strong evidence; cached feeds carry their own band.
|
|
84
|
+
const LIVE_INTEL_SCORE = 95;
|
|
85
|
+
|
|
86
|
+
interface IntelSource {
|
|
87
|
+
source: string;
|
|
88
|
+
provider: string;
|
|
89
|
+
run(input: { urls: string[]; hosts: string[] }, ctx: IntelContext): Promise<IntelSourceResult>;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const DEFAULT_MAX_HOSTS = 100;
|
|
93
|
+
const DEFAULT_MAX_URLS = 100;
|
|
94
|
+
const DEFAULT_TIMEOUT_MS = 4000;
|
|
95
|
+
const DEFAULT_USER_AGENT = "q32-signal-scanner/0.1";
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Correlate discovered URLs/hosts against threat-intelligence sources.
|
|
99
|
+
* Never throws for individual source failures — a failed source is reported
|
|
100
|
+
* with `status: "error"` so callers can surface it instead of treating a feed
|
|
101
|
+
* outage as a clean result.
|
|
102
|
+
*/
|
|
103
|
+
export async function checkUrlIntel(input: UrlIntelInput, config: UrlIntelConfig = {}): Promise<UrlIntelReport> {
|
|
104
|
+
const maxHosts = config.maxHosts ?? DEFAULT_MAX_HOSTS;
|
|
105
|
+
const maxUrls = config.maxUrls ?? DEFAULT_MAX_URLS;
|
|
106
|
+
const urls = dedupe(input.urls ?? []).slice(0, maxUrls);
|
|
107
|
+
const hosts = dedupe([...(input.hosts ?? []), ...hostsFromUrls(urls)]).slice(0, maxHosts);
|
|
108
|
+
|
|
109
|
+
if (config.disabled) {
|
|
110
|
+
return { sources: [], matches: [], findings: [] };
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const ctx: IntelContext = {
|
|
114
|
+
// Must be bound to globalThis: calling it as ctx.fetch(...) otherwise makes
|
|
115
|
+
// `this` the context object, which Cloudflare rejects (Illegal Invocation).
|
|
116
|
+
fetch: config.fetchImpl ?? globalThis.fetch.bind(globalThis),
|
|
117
|
+
timeoutMs: config.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
118
|
+
userAgent: config.userAgent ?? DEFAULT_USER_AGENT,
|
|
119
|
+
googleSafeBrowsingKey: config.googleSafeBrowsingKey,
|
|
120
|
+
abuseChAuthKey: config.abuseChAuthKey,
|
|
121
|
+
storage: config.storage
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
// The cached-feed source only joins when storage is wired — we don't list a
|
|
125
|
+
// source we can't query.
|
|
126
|
+
const activeSources = ctx.storage ? [...INTEL_SOURCES, CACHED_FEEDS_SOURCE] : INTEL_SOURCES;
|
|
127
|
+
const sources = await Promise.all(activeSources.map((source) => source.run({ urls, hosts }, ctx)));
|
|
128
|
+
const matches = sources.flatMap((source) => source.matches);
|
|
129
|
+
const findings = matches.map((match, index) => findingForMatch(match, index));
|
|
130
|
+
return { sources, matches, findings };
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/** Derive intel targets (urls + registrable hosts) from a scanner report's URL inventory. */
|
|
134
|
+
export function intelTargetsFromUrls(urls: Array<{ normalized: string }>): { urls: string[]; hosts: string[] } {
|
|
135
|
+
const normalized = urls.map((url) => url.normalized).filter(Boolean);
|
|
136
|
+
return { urls: dedupe(normalized), hosts: dedupe(hostsFromUrls(normalized)) };
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const INTEL_SOURCES: IntelSource[] = [
|
|
140
|
+
{
|
|
141
|
+
source: "urlhaus",
|
|
142
|
+
provider: "URLhaus",
|
|
143
|
+
async run(input, ctx) {
|
|
144
|
+
const base = { source: "urlhaus", provider: "URLhaus" };
|
|
145
|
+
const matches: IntelMatch[] = [];
|
|
146
|
+
let lastError: string | undefined;
|
|
147
|
+
for (const host of input.hosts) {
|
|
148
|
+
const r = await guarded(() => queryUrlhausHost(host, ctx));
|
|
149
|
+
if (r.error) lastError = r.error;
|
|
150
|
+
else if (r.value) matches.push(r.value);
|
|
151
|
+
}
|
|
152
|
+
for (const url of input.urls) {
|
|
153
|
+
const r = await guarded(() => queryUrlhausUrl(url, ctx));
|
|
154
|
+
if (r.error) lastError = r.error;
|
|
155
|
+
else if (r.value) matches.push(r.value);
|
|
156
|
+
}
|
|
157
|
+
return settle(base, input.urls.length, input.hosts.length, matches, lastError);
|
|
158
|
+
}
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
source: "threatfox",
|
|
162
|
+
provider: "ThreatFox",
|
|
163
|
+
async run(input, ctx) {
|
|
164
|
+
const base = { source: "threatfox", provider: "ThreatFox" };
|
|
165
|
+
const matches: IntelMatch[] = [];
|
|
166
|
+
let lastError: string | undefined;
|
|
167
|
+
for (const host of input.hosts) {
|
|
168
|
+
const r = await guarded(() => queryThreatFoxHost(host, ctx));
|
|
169
|
+
if (r.error) lastError = r.error;
|
|
170
|
+
else if (r.value) matches.push(r.value);
|
|
171
|
+
}
|
|
172
|
+
return settle(base, 0, input.hosts.length, matches, lastError);
|
|
173
|
+
}
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
source: "google-safebrowsing",
|
|
177
|
+
provider: "Google Safe Browsing",
|
|
178
|
+
async run(input, ctx) {
|
|
179
|
+
const base = { source: "google-safebrowsing", provider: "Google Safe Browsing" };
|
|
180
|
+
if (!ctx.googleSafeBrowsingKey) {
|
|
181
|
+
return { ...base, status: "error", reason: "Google Safe Browsing key not configured", urlsChecked: input.urls.length, hostsChecked: 0, matches: [] };
|
|
182
|
+
}
|
|
183
|
+
const r = await guarded(() => queryGoogleSafeBrowsing(input.urls, ctx));
|
|
184
|
+
if (r.error) {
|
|
185
|
+
return { ...base, status: "error", reason: r.error, urlsChecked: input.urls.length, hostsChecked: 0, matches: [] };
|
|
186
|
+
}
|
|
187
|
+
const matches = (r.value as IntelMatch[] | null) ?? [];
|
|
188
|
+
return settle(base, input.urls.length, 0, matches, undefined);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
];
|
|
192
|
+
|
|
193
|
+
// Matches crawled hosts against the cached blocklist feed index in storage.
|
|
194
|
+
// Only included in the run when config.storage is provided.
|
|
195
|
+
const CACHED_FEEDS_SOURCE: IntelSource = {
|
|
196
|
+
source: "cached-feeds",
|
|
197
|
+
provider: "Blocklist feeds",
|
|
198
|
+
async run(input, ctx) {
|
|
199
|
+
const base = { source: "cached-feeds", provider: "Blocklist feeds" };
|
|
200
|
+
if (!ctx.storage) return { ...base, status: "clean", urlsChecked: 0, hostsChecked: 0, matches: [] };
|
|
201
|
+
const r = await guarded(() => matchCachedFeeds(ctx.storage!, input.hosts));
|
|
202
|
+
if (r.error) {
|
|
203
|
+
return { ...base, status: "error", reason: r.error, urlsChecked: 0, hostsChecked: input.hosts.length, matches: [] };
|
|
204
|
+
}
|
|
205
|
+
const matches: IntelMatch[] = (r.value ?? []).map((m) => ({
|
|
206
|
+
source: "cached-feeds",
|
|
207
|
+
provider: m.source ? `Blocklist: ${m.source}` : "Blocklist feeds",
|
|
208
|
+
host: m.host,
|
|
209
|
+
score: m.score,
|
|
210
|
+
detail: { feed: m.feedId, feed_source: m.source }
|
|
211
|
+
}));
|
|
212
|
+
return settle(base, 0, input.hosts.length, matches, undefined);
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
function settle(
|
|
217
|
+
base: { source: string; provider: string },
|
|
218
|
+
urlsChecked: number,
|
|
219
|
+
hostsChecked: number,
|
|
220
|
+
matches: IntelMatch[],
|
|
221
|
+
errorReason: string | undefined
|
|
222
|
+
): IntelSourceResult {
|
|
223
|
+
if (matches.length) return { ...base, status: "match", urlsChecked, hostsChecked, matches };
|
|
224
|
+
if (errorReason) return { ...base, status: "error", reason: errorReason, urlsChecked, hostsChecked, matches: [] };
|
|
225
|
+
return { ...base, status: "clean", urlsChecked, hostsChecked, matches: [] };
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
async function guarded<T>(query: () => Promise<T>): Promise<{ value?: T; error?: string }> {
|
|
229
|
+
try {
|
|
230
|
+
return { value: await query() };
|
|
231
|
+
} catch (error) {
|
|
232
|
+
return { error: error instanceof Error ? error.message : "lookup failed" };
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// ---- Sources -------------------------------------------------------------
|
|
237
|
+
|
|
238
|
+
async function queryUrlhausHost(host: string, ctx: IntelContext): Promise<IntelMatch | null> {
|
|
239
|
+
if (isPrivateOrLocalHost(host)) return null;
|
|
240
|
+
// Multi-tenant hosts (cloud buckets, CDNs, app-hosting platforms) collect
|
|
241
|
+
// feed entries for OTHER tenants' malicious objects. A site that merely loads
|
|
242
|
+
// an asset from such a host must not inherit that reputation — only an
|
|
243
|
+
// exact-URL match (an object the site actually loads) convicts. See
|
|
244
|
+
// queryUrlhausUrl, which still runs against the crawled URL inventory.
|
|
245
|
+
if (isMultiTenantHost(host)) return null;
|
|
246
|
+
const response = await postForm("https://urlhaus-api.abuse.ch/v1/host/", { host }, ctx);
|
|
247
|
+
if (!response || response.query_status !== "ok") return null;
|
|
248
|
+
const urls: any[] = Array.isArray(response.urls) ? response.urls : [];
|
|
249
|
+
// A host appearing in URLhaus is not automatically malware infrastructure:
|
|
250
|
+
// popular hosts (open redirectors, file hosts) collect entries when a single
|
|
251
|
+
// URL is abused. Score by how live and how recent the evidence is — a dead,
|
|
252
|
+
// years-old entry on an otherwise-legitimate host is weak signal, while a
|
|
253
|
+
// currently-online recent listing is a strong conviction.
|
|
254
|
+
const { score, basis, onlineCount } = urlhausHostStrength(urls);
|
|
255
|
+
return {
|
|
256
|
+
source: "urlhaus",
|
|
257
|
+
provider: "URLhaus",
|
|
258
|
+
host,
|
|
259
|
+
score,
|
|
260
|
+
detail: {
|
|
261
|
+
query_status: response.query_status,
|
|
262
|
+
url_count: urls.length,
|
|
263
|
+
online_count: onlineCount,
|
|
264
|
+
score_basis: basis,
|
|
265
|
+
sample: urls.slice(0, 5)
|
|
266
|
+
}
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const RECENT_INTEL_DAYS = 90;
|
|
271
|
+
|
|
272
|
+
/** Grade a URLhaus host listing by liveness and recency of its URLs. */
|
|
273
|
+
function urlhausHostStrength(urls: any[]): { score: number; basis: string; onlineCount: number } {
|
|
274
|
+
let anyOnline = false;
|
|
275
|
+
let recentOnline = false;
|
|
276
|
+
let recentOffline = false;
|
|
277
|
+
let onlineCount = 0;
|
|
278
|
+
for (const u of urls) {
|
|
279
|
+
const online = String(u?.url_status ?? "").toLowerCase() === "online";
|
|
280
|
+
const age = daysSince(u?.date_added);
|
|
281
|
+
const recent = age !== null && age <= RECENT_INTEL_DAYS;
|
|
282
|
+
if (online) {
|
|
283
|
+
anyOnline = true;
|
|
284
|
+
onlineCount += 1;
|
|
285
|
+
if (recent) recentOnline = true;
|
|
286
|
+
} else if (recent) {
|
|
287
|
+
recentOffline = true;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
if (recentOnline) return { score: 95, basis: "online_recent", onlineCount };
|
|
291
|
+
if (anyOnline) return { score: 75, basis: "online_aged", onlineCount };
|
|
292
|
+
if (recentOffline) return { score: 45, basis: "offline_recent", onlineCount };
|
|
293
|
+
return { score: 20, basis: "offline_aged", onlineCount };
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
async function queryUrlhausUrl(url: string, ctx: IntelContext): Promise<IntelMatch | null> {
|
|
297
|
+
const host = hostOf(url);
|
|
298
|
+
if (!host || isPrivateOrLocalHost(host)) return null;
|
|
299
|
+
const response = await postForm("https://urlhaus-api.abuse.ch/v1/url/", { url }, ctx);
|
|
300
|
+
if (!response || response.query_status !== "ok") return null;
|
|
301
|
+
return {
|
|
302
|
+
source: "urlhaus",
|
|
303
|
+
provider: "URLhaus",
|
|
304
|
+
url,
|
|
305
|
+
host,
|
|
306
|
+
score: LIVE_INTEL_SCORE,
|
|
307
|
+
detail: {
|
|
308
|
+
query_status: response.query_status,
|
|
309
|
+
threat: response.threat,
|
|
310
|
+
url_status: response.url_status,
|
|
311
|
+
tags: Array.isArray(response.tags) ? response.tags : []
|
|
312
|
+
}
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
async function queryThreatFoxHost(host: string, ctx: IntelContext): Promise<IntelMatch | null> {
|
|
317
|
+
if (isPrivateOrLocalHost(host)) return null;
|
|
318
|
+
const response = await postJson("https://threatfox-api.abuse.ch/api/v1/", { query: "search_ioc", search_term: host }, ctx);
|
|
319
|
+
if (!response || response.query_status !== "ok") return null;
|
|
320
|
+
const data: any[] = Array.isArray(response.data) ? response.data : [];
|
|
321
|
+
const target = host.toLowerCase();
|
|
322
|
+
// ThreatFox `search_ioc` is a SUBSTRING search: querying "google.com" returns
|
|
323
|
+
// IOCs that merely contain that string — "guard-google.com",
|
|
324
|
+
// "google.com-x18-...sslip.io", a malware file on "drive.google.com", etc.
|
|
325
|
+
// None of those make google.com itself malicious. Only domain/IP IOCs whose
|
|
326
|
+
// host EXACTLY equals the queried host actually convict that host. URL IOCs
|
|
327
|
+
// flag one path on a (possibly shared) host and are not a host-level verdict.
|
|
328
|
+
const exact = data.filter((ioc) => {
|
|
329
|
+
const type = String(ioc?.ioc_type ?? "");
|
|
330
|
+
if (type !== "domain" && type !== "ip:port" && type !== "ip") return false;
|
|
331
|
+
return iocHost(ioc?.ioc) === target;
|
|
332
|
+
});
|
|
333
|
+
if (!exact.length) return null;
|
|
334
|
+
return {
|
|
335
|
+
source: "threatfox",
|
|
336
|
+
provider: "ThreatFox",
|
|
337
|
+
host,
|
|
338
|
+
score: LIVE_INTEL_SCORE,
|
|
339
|
+
detail: {
|
|
340
|
+
query_status: response.query_status,
|
|
341
|
+
ioc_count: exact.length,
|
|
342
|
+
sample: exact.slice(0, 5)
|
|
343
|
+
}
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/** Extract a bare host from a ThreatFox IOC string (url, domain, or host:port). */
|
|
348
|
+
function iocHost(ioc: unknown): string | null {
|
|
349
|
+
if (!ioc) return null;
|
|
350
|
+
const s = String(ioc).trim();
|
|
351
|
+
if (s.includes("://")) return hostOf(s);
|
|
352
|
+
const host = s.split("/")[0].split(":")[0];
|
|
353
|
+
return host ? host.toLowerCase() : null;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/** Age in days of an abuse.ch timestamp ("2024-11-12 06:08:05 UTC"), or null. */
|
|
357
|
+
function daysSince(value: unknown): number | null {
|
|
358
|
+
if (!value) return null;
|
|
359
|
+
const t = Date.parse(String(value).replace(" UTC", "Z").replace(" ", "T"));
|
|
360
|
+
if (Number.isNaN(t)) return null;
|
|
361
|
+
return (Date.now() - t) / 86_400_000;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// One batched request covers every discovered URL.
|
|
365
|
+
async function queryGoogleSafeBrowsing(urls: string[], ctx: IntelContext): Promise<IntelMatch[]> {
|
|
366
|
+
const entries = urls.filter((url) => {
|
|
367
|
+
const host = hostOf(url);
|
|
368
|
+
return host && !isPrivateOrLocalHost(host);
|
|
369
|
+
});
|
|
370
|
+
if (!entries.length) return [];
|
|
371
|
+
const response = await postJson(
|
|
372
|
+
`https://safebrowsing.googleapis.com/v4/threatMatches:find?key=${encodeURIComponent(ctx.googleSafeBrowsingKey ?? "")}`,
|
|
373
|
+
{
|
|
374
|
+
client: { clientId: "q32-signal-scanner", clientVersion: "0.1" },
|
|
375
|
+
threatInfo: {
|
|
376
|
+
threatTypes: ["MALWARE", "SOCIAL_ENGINEERING", "UNWANTED_SOFTWARE", "POTENTIALLY_HARMFUL_APPLICATION"],
|
|
377
|
+
platformTypes: ["ANY_PLATFORM"],
|
|
378
|
+
threatEntryTypes: ["URL"],
|
|
379
|
+
threatEntries: entries.slice(0, 500).map((url) => ({ url }))
|
|
380
|
+
}
|
|
381
|
+
},
|
|
382
|
+
ctx
|
|
383
|
+
);
|
|
384
|
+
if (!response || !Array.isArray(response.matches)) return [];
|
|
385
|
+
return response.matches.map((match: any) => ({
|
|
386
|
+
source: "google-safebrowsing",
|
|
387
|
+
provider: "Google Safe Browsing",
|
|
388
|
+
url: typeof match?.threat?.url === "string" ? match.threat.url : undefined,
|
|
389
|
+
host: hostOf(String(match?.threat?.url ?? "")) ?? undefined,
|
|
390
|
+
score: LIVE_INTEL_SCORE,
|
|
391
|
+
detail: {
|
|
392
|
+
threat_type: match?.threatType,
|
|
393
|
+
platform_type: match?.platformType,
|
|
394
|
+
cache_duration: match?.cacheDuration
|
|
395
|
+
}
|
|
396
|
+
}));
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// ---- Findings ------------------------------------------------------------
|
|
400
|
+
|
|
401
|
+
/** Map an evidence score (0-100) onto the lib's severity/confidence buckets. */
|
|
402
|
+
export function severityForScore(score: number): Severity {
|
|
403
|
+
if (score >= 85) return "high";
|
|
404
|
+
if (score >= 60) return "medium";
|
|
405
|
+
if (score >= 40) return "low";
|
|
406
|
+
return "info";
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
function findingForMatch(match: IntelMatch, index: number): Finding {
|
|
410
|
+
const locationValue = match.url ?? match.host ?? "unknown";
|
|
411
|
+
const ruleId = `intel.${match.source}`;
|
|
412
|
+
const scoreModel: RuleScoreModel = { base: match.score, tags: ["hosting", "url"] };
|
|
413
|
+
return {
|
|
414
|
+
id: `${ruleId}:${index}`,
|
|
415
|
+
ruleId,
|
|
416
|
+
severity: severityForScore(match.score),
|
|
417
|
+
confidence: match.score >= 80 ? ("high" as Confidence) : ("medium" as Confidence),
|
|
418
|
+
score: match.score,
|
|
419
|
+
scoreModel,
|
|
420
|
+
title: `Known-bad ${match.url ? "URL" : "host"} flagged by ${match.provider}`,
|
|
421
|
+
description: `${match.provider} threat intelligence matched a crawled ${match.url ? "URL" : "host"} (score ${match.score}).`,
|
|
422
|
+
locationType: "url",
|
|
423
|
+
locationValue,
|
|
424
|
+
metadata: { intel_source: match.source, provider: match.provider, host: match.host, url: match.url, score: match.score, ...match.detail }
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// ---- HTTP + host helpers -------------------------------------------------
|
|
429
|
+
|
|
430
|
+
// abuse.ch (URLhaus + ThreatFox) require an Auth-Key header. Send it to those
|
|
431
|
+
// hosts only; never leak it to other endpoints (e.g. Safe Browsing).
|
|
432
|
+
function abuseChHeaders(url: string, ctx: IntelContext): Record<string, string> {
|
|
433
|
+
if (!ctx.abuseChAuthKey || !/\.abuse\.ch$/i.test(safeHost(url))) return {};
|
|
434
|
+
return { "Auth-Key": ctx.abuseChAuthKey };
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
function safeHost(url: string): string {
|
|
438
|
+
try {
|
|
439
|
+
return new URL(url).hostname.toLowerCase();
|
|
440
|
+
} catch {
|
|
441
|
+
return "";
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
async function postForm(url: string, body: Record<string, string>, ctx: IntelContext): Promise<Record<string, any> | null> {
|
|
446
|
+
const response = await ctx.fetch(url, {
|
|
447
|
+
method: "POST",
|
|
448
|
+
headers: { "content-type": "application/x-www-form-urlencoded", "user-agent": ctx.userAgent, ...abuseChHeaders(url, ctx) },
|
|
449
|
+
body: new URLSearchParams(body),
|
|
450
|
+
signal: AbortSignal.timeout(ctx.timeoutMs)
|
|
451
|
+
});
|
|
452
|
+
if (!response.ok) throw new Error(`${url} responded ${response.status}`);
|
|
453
|
+
const parsed = await response.json();
|
|
454
|
+
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? (parsed as Record<string, any>) : null;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
async function postJson(url: string, body: Record<string, unknown>, ctx: IntelContext): Promise<Record<string, any> | null> {
|
|
458
|
+
const response = await ctx.fetch(url, {
|
|
459
|
+
method: "POST",
|
|
460
|
+
headers: { "content-type": "application/json", "user-agent": ctx.userAgent, ...abuseChHeaders(url, ctx) },
|
|
461
|
+
body: JSON.stringify(body),
|
|
462
|
+
signal: AbortSignal.timeout(ctx.timeoutMs)
|
|
463
|
+
});
|
|
464
|
+
if (!response.ok) throw new Error(`${url} responded ${response.status}`);
|
|
465
|
+
const parsed = await response.json();
|
|
466
|
+
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? (parsed as Record<string, any>) : null;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
function hostOf(url: string): string | null {
|
|
470
|
+
try {
|
|
471
|
+
return new URL(url).hostname.toLowerCase();
|
|
472
|
+
} catch {
|
|
473
|
+
return null;
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
function hostsFromUrls(urls: string[]): string[] {
|
|
478
|
+
const hosts: string[] = [];
|
|
479
|
+
for (const url of urls) {
|
|
480
|
+
const host = hostOf(url);
|
|
481
|
+
if (host) hosts.push(host);
|
|
482
|
+
}
|
|
483
|
+
return hosts;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
function dedupe(values: string[]): string[] {
|
|
487
|
+
return [...new Set(values.filter(Boolean))];
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// Hosts that serve content for many independent tenants under one name: cloud
|
|
491
|
+
// object storage, file sharing, CDNs that publish arbitrary packages/content,
|
|
492
|
+
// and app/site-hosting platforms (one tenant per subdomain). One tenant's
|
|
493
|
+
// malicious object is not evidence about a different tenant, so these are never
|
|
494
|
+
// convicted at the host level — only an exact-URL match counts.
|
|
495
|
+
const MULTI_TENANT_HOST_EXACT = new Set([
|
|
496
|
+
"storage.googleapis.com",
|
|
497
|
+
"firebasestorage.googleapis.com",
|
|
498
|
+
"drive.google.com",
|
|
499
|
+
"docs.google.com",
|
|
500
|
+
"s3.amazonaws.com",
|
|
501
|
+
"raw.githubusercontent.com",
|
|
502
|
+
"gist.githubusercontent.com",
|
|
503
|
+
"objects.githubusercontent.com",
|
|
504
|
+
"cdn.jsdelivr.net",
|
|
505
|
+
"unpkg.com",
|
|
506
|
+
"files.catbox.moe",
|
|
507
|
+
"cdn.discordapp.com",
|
|
508
|
+
"media.discordapp.net"
|
|
509
|
+
]);
|
|
510
|
+
|
|
511
|
+
const MULTI_TENANT_HOST_SUFFIXES = [
|
|
512
|
+
// Cloud object storage
|
|
513
|
+
".amazonaws.com",
|
|
514
|
+
".blob.core.windows.net",
|
|
515
|
+
".r2.dev",
|
|
516
|
+
".r2.cloudflarestorage.com",
|
|
517
|
+
".digitaloceanspaces.com",
|
|
518
|
+
".googleusercontent.com",
|
|
519
|
+
".dropboxusercontent.com",
|
|
520
|
+
// CDNs serving arbitrary tenant/package content
|
|
521
|
+
".cloudfront.net",
|
|
522
|
+
".akamaihd.net",
|
|
523
|
+
".b-cdn.net",
|
|
524
|
+
".fastly.net",
|
|
525
|
+
// App / site hosting platforms (one tenant per subdomain)
|
|
526
|
+
".web.app",
|
|
527
|
+
".firebaseapp.com",
|
|
528
|
+
".netlify.app",
|
|
529
|
+
".vercel.app",
|
|
530
|
+
".github.io",
|
|
531
|
+
".herokuapp.com",
|
|
532
|
+
".pages.dev",
|
|
533
|
+
".workers.dev",
|
|
534
|
+
".azurewebsites.net",
|
|
535
|
+
".appspot.com",
|
|
536
|
+
".glitch.me",
|
|
537
|
+
".repl.co",
|
|
538
|
+
".surge.sh"
|
|
539
|
+
];
|
|
540
|
+
|
|
541
|
+
export function isMultiTenantHost(host: string): boolean {
|
|
542
|
+
const lower = host.toLowerCase();
|
|
543
|
+
if (MULTI_TENANT_HOST_EXACT.has(lower)) return true;
|
|
544
|
+
return MULTI_TENANT_HOST_SUFFIXES.some((suffix) => lower.endsWith(suffix));
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
export function isPrivateOrLocalHost(host: string): boolean {
|
|
548
|
+
const lower = host.toLowerCase();
|
|
549
|
+
if (lower === "localhost" || lower.endsWith(".localhost") || lower.endsWith(".local")) return true;
|
|
550
|
+
if (/^\d+\.\d+\.\d+\.\d+$/.test(lower)) {
|
|
551
|
+
const parts = lower.split(".").map(Number);
|
|
552
|
+
return (
|
|
553
|
+
parts[0] === 10 ||
|
|
554
|
+
parts[0] === 127 ||
|
|
555
|
+
(parts[0] === 172 && parts[1] >= 16 && parts[1] <= 31) ||
|
|
556
|
+
(parts[0] === 192 && parts[1] === 168) ||
|
|
557
|
+
(parts[0] === 169 && parts[1] === 254)
|
|
558
|
+
);
|
|
559
|
+
}
|
|
560
|
+
return lower === "::1" || lower.startsWith("fc") || lower.startsWith("fd") || lower.startsWith("fe80");
|
|
561
|
+
}
|
package/src/node-tls.ts
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { connect } from "node:tls";
|
|
2
|
+
import type { PeerCertificate } from "node:tls";
|
|
3
|
+
import type { TlsMetadata } from "./index";
|
|
4
|
+
|
|
5
|
+
export interface CollectTlsMetadataOptions {
|
|
6
|
+
port?: number;
|
|
7
|
+
timeoutMs?: number;
|
|
8
|
+
rejectUnauthorized?: boolean;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export async function collectTlsMetadata(target: string | URL, options: CollectTlsMetadataOptions = {}): Promise<TlsMetadata | undefined> {
|
|
12
|
+
const url = typeof target === "string" ? new URL(target.includes("://") ? target : `https://${target}`) : target;
|
|
13
|
+
if (url.protocol !== "https:") return undefined;
|
|
14
|
+
const port = options.port ?? (url.port ? Number(url.port) : 443);
|
|
15
|
+
const timeoutMs = options.timeoutMs ?? 5_000;
|
|
16
|
+
|
|
17
|
+
return await new Promise<TlsMetadata | undefined>((resolve) => {
|
|
18
|
+
const socket = connect({
|
|
19
|
+
host: url.hostname,
|
|
20
|
+
port,
|
|
21
|
+
servername: url.hostname,
|
|
22
|
+
rejectUnauthorized: options.rejectUnauthorized ?? false,
|
|
23
|
+
timeout: timeoutMs
|
|
24
|
+
});
|
|
25
|
+
const done = (metadata?: TlsMetadata): void => {
|
|
26
|
+
socket.removeAllListeners();
|
|
27
|
+
socket.destroy();
|
|
28
|
+
resolve(metadata);
|
|
29
|
+
};
|
|
30
|
+
socket.once("secureConnect", () => {
|
|
31
|
+
const certificate = socket.getPeerCertificate();
|
|
32
|
+
if (!certificate || Object.keys(certificate).length === 0) return done(undefined);
|
|
33
|
+
done({
|
|
34
|
+
authorized: socket.authorized,
|
|
35
|
+
authorizationError: socket.authorizationError ? String(socket.authorizationError) : null,
|
|
36
|
+
issuer: distinguishedName(certificate.issuer),
|
|
37
|
+
subject: distinguishedName(certificate.subject),
|
|
38
|
+
validFrom: certificate.valid_from,
|
|
39
|
+
validTo: certificate.valid_to,
|
|
40
|
+
fingerprint256: certificate.fingerprint256,
|
|
41
|
+
serialNumber: certificate.serialNumber
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
socket.once("timeout", () => done(undefined));
|
|
45
|
+
socket.once("error", () => done(undefined));
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function distinguishedName(value: PeerCertificate["issuer"]): string | undefined {
|
|
50
|
+
if (!value) return undefined;
|
|
51
|
+
const entries = Object.entries(value)
|
|
52
|
+
.filter(([, item]) => typeof item === "string" && item)
|
|
53
|
+
.map(([key, item]) => `${key}=${item}`);
|
|
54
|
+
return entries.length ? entries.join(", ") : undefined;
|
|
55
|
+
}
|