cipher-security 2.0.4 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,24 +3,68 @@
3
3
  // CIPHER is a trademark of defconxt.
4
4
 
5
5
  /**
6
- * CIPHER OSINT Pipeline — structured OSINT workflows.
6
+ * CIPHER OSINT Pipeline — comprehensive open-source intelligence.
7
7
  *
8
- * - Domain intelligence and WHOIS enrichment
9
- * - IP reputation and classification
10
- * - Document metadata extraction (EXIF, PDF)
11
- * - Investigation orchestration
8
+ * Integrates Bellingcat toolkit methodology with programmatic tool wrappers:
12
9
  *
13
- * Ported from pipeline/osint.py (322 LOC Python).
10
+ * Domain intelligence:
11
+ * - DNS (A/AAAA/MX/NS/TXT/CNAME/SOA via dig or node:dns)
12
+ * - WHOIS (registration, registrar, dates, name servers)
13
+ * - Certificate Transparency (crt.sh subdomain discovery)
14
+ * - Wayback Machine (archive snapshots)
15
+ * - Web technology fingerprinting (headers + HTML analysis)
16
+ * - URL reputation (urlscan.io when API key available)
17
+ *
18
+ * IP intelligence:
19
+ * - Reverse DNS, classification (private/public/multicast/loopback)
20
+ * - IP geolocation (ip-api.com — free, no key)
21
+ * - Abuse contact lookup (via WHOIS)
22
+ *
23
+ * People/username:
24
+ * - Sherlock (username → 400+ social platforms)
25
+ * - Holehe (email → account existence on 120+ services)
26
+ *
27
+ * Document metadata:
28
+ * - EXIF extraction (exiftool)
29
+ * - PDF metadata (pdfinfo)
30
+ *
31
+ * Archive/history:
32
+ * - Wayback Machine CDX API (snapshot history)
33
+ * - archive.today availability check
34
+ *
35
+ * All external tool calls degrade gracefully when tools aren't installed.
36
+ * All network calls use configurable timeouts. No API keys required for
37
+ * core functionality — optional keys unlock deeper queries.
38
+ *
39
+ * @module pipeline/osint
14
40
  */
15
41
 
16
- import { execFileSync } from 'node:child_process';
42
+ import { execFileSync, execSync } from 'node:child_process';
17
43
  import dns from 'node:dns';
18
44
  import net from 'node:net';
19
- import { promisify } from 'node:util';
45
+ import { resolve, dirname } from 'node:path';
46
+ import { existsSync } from 'node:fs';
47
+ import { fileURLToPath } from 'node:url';
20
48
 
21
- const resolve4 = promisify(dns.resolve4);
22
- const resolve6 = promisify(dns.resolve6);
23
- const reversePromise = promisify(dns.reverse);
49
+ const __dirname = dirname(fileURLToPath(import.meta.url));
50
+ const HTTP_TIMEOUT = 15000;
51
+
52
+ /**
53
+ * Find the Python venv path for OSINT tools (sherlock, holehe).
54
+ * Walks up from cli/lib/pipeline/ to find .venv/bin/.
55
+ * @returns {string|null}
56
+ */
57
+ function findVenvBin() {
58
+ let dir = resolve(__dirname, '..', '..', '..');
59
+ for (let i = 0; i < 5; i++) {
60
+ const venvBin = resolve(dir, '.venv', 'bin');
61
+ if (existsSync(venvBin)) return venvBin;
62
+ const parent = dirname(dir);
63
+ if (parent === dir) break;
64
+ dir = parent;
65
+ }
66
+ return null;
67
+ }
24
68
 
25
69
  // ---------------------------------------------------------------------------
26
70
  // IP classification helper
@@ -28,53 +72,37 @@ const reversePromise = promisify(dns.reverse);
28
72
 
29
73
  /**
30
74
  * Check if an IP address is private (RFC 1918 / RFC 4193).
31
- *
32
- * IPv4: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 127.0.0.0/8
33
- * IPv6: ::1, fc00::/7, fe80::/10
34
- *
35
75
  * @param {string} ip
36
76
  * @returns {boolean}
37
77
  */
38
78
  function isPrivateIP(ip) {
39
79
  const version = net.isIP(ip);
40
80
  if (version === 0) return false;
41
-
42
81
  if (version === 4) {
43
82
  const parts = ip.split('.').map(Number);
44
- if (parts[0] === 10) return true; // 10.0.0.0/8
45
- if (parts[0] === 172 && parts[1] >= 16 && parts[1] <= 31) return true; // 172.16.0.0/12
46
- if (parts[0] === 192 && parts[1] === 168) return true; // 192.168.0.0/16
47
- if (parts[0] === 127) return true; // 127.0.0.0/8
83
+ if (parts[0] === 10) return true;
84
+ if (parts[0] === 172 && parts[1] >= 16 && parts[1] <= 31) return true;
85
+ if (parts[0] === 192 && parts[1] === 168) return true;
86
+ if (parts[0] === 127) return true;
48
87
  return false;
49
88
  }
50
-
51
- // IPv6
52
89
  const lower = ip.toLowerCase();
53
90
  if (lower === '::1') return true;
54
-
55
- // Expand the first 16 bits for fc00::/7 and fe80::/10 checks
56
- // fc00::/7 covers fc00:: - fdff::
57
- // fe80::/10 covers fe80:: - febf::
58
- const expanded = _expandIPv6Prefix(lower);
59
- if (expanded !== null) {
60
- if (expanded >= 0xfc00 && expanded <= 0xfdff) return true; // fc00::/7
61
- if (expanded >= 0xfe80 && expanded <= 0xfebf) return true; // fe80::/10
62
- }
63
-
91
+ const parts = lower.split(':');
92
+ if (!parts[0]) return false;
93
+ const val = parseInt(parts[0], 16);
94
+ if (isNaN(val)) return false;
95
+ if (val >= 0xfc00 && val <= 0xfdff) return true;
96
+ if (val >= 0xfe80 && val <= 0xfebf) return true;
64
97
  return false;
65
98
  }
66
99
 
67
- /**
68
- * Extract the first 16-bit group from an IPv6 address.
69
- * @param {string} ip lowercased IPv6 address
70
- * @returns {number|null}
71
- */
72
- function _expandIPv6Prefix(ip) {
73
- // Handle :: expansion — we only need the first group
74
- const parts = ip.split(':');
75
- if (!parts[0]) return 0; // starts with :: (e.g. ::1)
76
- const val = parseInt(parts[0], 16);
77
- return isNaN(val) ? null : val;
100
+ function _isMulticast(ip, version) {
101
+ if (version === 4) {
102
+ const first = parseInt(ip.split('.')[0], 10);
103
+ return first >= 224 && first <= 239;
104
+ }
105
+ return ip.toLowerCase().startsWith('ff');
78
106
  }
79
107
 
80
108
  // ---------------------------------------------------------------------------
@@ -82,15 +110,6 @@ function _expandIPv6Prefix(ip) {
82
110
  // ---------------------------------------------------------------------------
83
111
 
84
112
  class OSINTResult {
85
- /**
86
- * @param {object} opts
87
- * @param {string} opts.source
88
- * @param {string} opts.query
89
- * @param {object} [opts.data={}]
90
- * @param {string} [opts.confidence='medium'] high | medium | low
91
- * @param {string} [opts.timestamp]
92
- * @param {string} [opts.collectionMethod='passive'] passive | active
93
- */
94
113
  constructor(opts) {
95
114
  this.source = opts.source;
96
115
  this.query = opts.query;
@@ -100,15 +119,8 @@ class OSINTResult {
100
119
  this.collectionMethod = opts.collectionMethod ?? 'passive';
101
120
  }
102
121
 
103
- /** @type {boolean} Quick success check — true unless data contains an error. */
104
- get success() {
105
- return !this.data.error;
106
- }
107
-
108
- /** @type {string|null} Error message if present. */
109
- get error() {
110
- return this.data.error ?? null;
111
- }
122
+ get success() { return !this.data.error; }
123
+ get error() { return this.data.error ?? null; }
112
124
 
113
125
  toDict() {
114
126
  return {
@@ -122,16 +134,47 @@ class OSINTResult {
122
134
  }
123
135
  }
124
136
 
137
+ // ---------------------------------------------------------------------------
138
+ // HTTP helper (uses global fetch, available in Node 18+)
139
+ // ---------------------------------------------------------------------------
140
+
141
+ async function fetchJSON(url, opts = {}) {
142
+ const controller = new AbortController();
143
+ const timer = setTimeout(() => controller.abort(), opts.timeout || HTTP_TIMEOUT);
144
+ try {
145
+ const resp = await fetch(url, {
146
+ signal: controller.signal,
147
+ headers: opts.headers || {},
148
+ });
149
+ if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
150
+ return await resp.json();
151
+ } finally {
152
+ clearTimeout(timer);
153
+ }
154
+ }
155
+
156
+ async function fetchText(url, opts = {}) {
157
+ const controller = new AbortController();
158
+ const timer = setTimeout(() => controller.abort(), opts.timeout || HTTP_TIMEOUT);
159
+ try {
160
+ const resp = await fetch(url, {
161
+ signal: controller.signal,
162
+ headers: opts.headers || { 'User-Agent': 'CIPHER-OSINT/2.0' },
163
+ });
164
+ if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
165
+ return await resp.text();
166
+ } finally {
167
+ clearTimeout(timer);
168
+ }
169
+ }
170
+
125
171
  // ---------------------------------------------------------------------------
126
172
  // Domain Intelligence
127
173
  // ---------------------------------------------------------------------------
128
174
 
129
175
  class DomainIntelligence {
130
176
  /**
131
- * Resolve DNS records for a domain (passive).
132
- * Uses `dig` subprocess with `dns.resolve` fallback.
133
- * @param {string} domain
134
- * @returns {OSINTResult}
177
+ * DNS record lookup via dig with node:dns fallback.
135
178
  */
136
179
  static dnsLookup(domain) {
137
180
  const data = { domain, records: {} };
@@ -142,71 +185,37 @@ class DomainIntelligence {
142
185
  if (hasDig) {
143
186
  try {
144
187
  const out = execFileSync('dig', ['+short', domain, rtype], {
145
- encoding: 'utf-8',
146
- timeout: 10000,
147
- stdio: ['pipe', 'pipe', 'pipe'],
188
+ encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'],
148
189
  });
149
- const records = out
150
- .split('\n')
151
- .map((r) => r.trim())
152
- .filter(Boolean);
153
- if (records.length > 0) {
154
- data.records[rtype] = records;
155
- }
190
+ const records = out.split('\n').map(r => r.trim()).filter(Boolean);
191
+ if (records.length > 0) data.records[rtype] = records;
156
192
  } catch (err) {
157
- // If dig doesn't exist (ENOENT), fall through to socket-based fallback
158
- if (err.code === 'ENOENT') {
159
- hasDig = false;
160
- // Fall through to fallback below
161
- } else {
162
- continue; // timeout or other error — skip this record type
163
- }
193
+ if (err.code === 'ENOENT') { hasDig = false; } else { continue; }
164
194
  }
165
195
  }
166
-
167
- if (!hasDig) {
168
- // Fallback: only A records via dns.resolve (sync not available, use socket)
169
- if (rtype === 'A') {
170
- try {
171
- const { address } = dns.lookup
172
- ? (() => {
173
- // Synchronous-ish: use execFileSync to call node
174
- const out = execFileSync(
175
- process.execPath,
176
- ['-e', `const dns=require('dns');dns.resolve4('${domain}',(e,a)=>console.log(JSON.stringify(a||[])))`],
177
- { encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] },
178
- );
179
- const ips = JSON.parse(out.trim());
180
- if (ips.length > 0) data.records.A = ips;
181
- return {};
182
- })()
183
- : {};
184
- } catch {
185
- // Can't resolve — skip
186
- }
187
- }
188
- break; // No dig = only A records
196
+ if (!hasDig && rtype === 'A') {
197
+ try {
198
+ const out = execFileSync(process.execPath,
199
+ ['-e', `const dns=require('dns');dns.resolve4('${domain}',(e,a)=>console.log(JSON.stringify(a||[])))`],
200
+ { encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] });
201
+ const ips = JSON.parse(out.trim());
202
+ if (ips.length > 0) data.records.A = ips;
203
+ } catch { /* skip */ }
204
+ break;
189
205
  }
190
206
  }
191
-
192
207
  return new OSINTResult({ source: 'dns', query: domain, data, confidence: 'high' });
193
208
  }
194
209
 
195
210
  /**
196
- * WHOIS lookup for domain registration info (passive).
197
- * @param {string} domain
198
- * @returns {OSINTResult}
211
+ * WHOIS registration lookup.
199
212
  */
200
213
  static whoisLookup(domain) {
201
214
  try {
202
215
  const out = execFileSync('whois', [domain], {
203
- encoding: 'utf-8',
204
- timeout: 15000,
205
- stdio: ['pipe', 'pipe', 'pipe'],
216
+ encoding: 'utf-8', timeout: 15000, stdio: ['pipe', 'pipe', 'pipe'],
206
217
  });
207
-
208
218
  const data = { domain, raw_length: out.length };
209
-
210
219
  const patterns = {
211
220
  registrar: /Registrar:\s*(.+)/i,
212
221
  creation_date: /Creat(?:ion|ed) Date:\s*(.+)/i,
@@ -216,25 +225,155 @@ class DomainIntelligence {
216
225
  registrant_org: /Registrant Organi[sz]ation:\s*(.+)/i,
217
226
  registrant_country: /Registrant Country:\s*(.+)/i,
218
227
  };
219
-
220
228
  for (const [key, pattern] of Object.entries(patterns)) {
221
- // Use matchAll for patterns with /g, exec for single match
222
229
  if (pattern.global) {
223
- const matches = [...out.matchAll(pattern)].map((m) => m[1]);
230
+ const matches = [...out.matchAll(pattern)].map(m => m[1]);
224
231
  if (matches.length > 0) data[key] = matches.length > 1 ? matches : matches[0];
225
232
  } else {
226
233
  const m = pattern.exec(out);
227
234
  if (m) data[key] = m[1];
228
235
  }
229
236
  }
230
-
231
237
  return new OSINTResult({ source: 'whois', query: domain, data, confidence: 'high' });
232
238
  } catch (err) {
233
- const errMsg = err.code === 'ENOENT' ? 'whois binary not found' : String(err.message || err);
234
239
  return new OSINTResult({
235
- source: 'whois',
236
- query: domain,
237
- data: { error: errMsg },
240
+ source: 'whois', query: domain,
241
+ data: { error: err.code === 'ENOENT' ? 'whois binary not found' : err.message },
242
+ confidence: 'low',
243
+ });
244
+ }
245
+ }
246
+
247
+ /**
248
+ * Certificate Transparency search via crt.sh for subdomain discovery.
249
+ */
250
+ static async certTransparency(domain) {
251
+ try {
252
+ const data = await fetchJSON(`https://crt.sh/?q=%25.${encodeURIComponent(domain)}&output=json`, { timeout: 20000 });
253
+ const subdomains = [...new Set(
254
+ (data || [])
255
+ .flatMap(entry => (entry.name_value || '').split('\n'))
256
+ .map(name => name.trim().toLowerCase().replace(/^\*\./, ''))
257
+ .filter(name => name.endsWith(domain) && name !== domain)
258
+ )].sort();
259
+ return new OSINTResult({
260
+ source: 'cert_transparency', query: domain,
261
+ data: { domain, subdomains, count: subdomains.length },
262
+ confidence: 'high',
263
+ });
264
+ } catch (err) {
265
+ return new OSINTResult({
266
+ source: 'cert_transparency', query: domain,
267
+ data: { error: `crt.sh query failed: ${err.message}` },
268
+ confidence: 'low',
269
+ });
270
+ }
271
+ }
272
+
273
+ /**
274
+ * Wayback Machine snapshot history via CDX API.
275
+ */
276
+ static async waybackHistory(domain) {
277
+ try {
278
+ const url = `https://web.archive.org/cdx/search/cdx?url=${encodeURIComponent(domain)}&output=json&limit=20&fl=timestamp,original,statuscode,mimetype`;
279
+ const data = await fetchJSON(url, { timeout: 20000 });
280
+ if (!data || data.length < 2) {
281
+ return new OSINTResult({
282
+ source: 'wayback_machine', query: domain,
283
+ data: { domain, snapshots: [], count: 0, first_seen: null, last_seen: null },
284
+ confidence: 'medium',
285
+ });
286
+ }
287
+ // First row is headers, rest are data
288
+ const snapshots = data.slice(1).map(row => ({
289
+ timestamp: row[0],
290
+ url: row[1],
291
+ status: row[2],
292
+ mime: row[3],
293
+ archive_url: `https://web.archive.org/web/${row[0]}/${row[1]}`,
294
+ }));
295
+ return new OSINTResult({
296
+ source: 'wayback_machine', query: domain,
297
+ data: {
298
+ domain,
299
+ snapshots,
300
+ count: snapshots.length,
301
+ first_seen: snapshots[0]?.timestamp || null,
302
+ last_seen: snapshots[snapshots.length - 1]?.timestamp || null,
303
+ },
304
+ confidence: 'high',
305
+ });
306
+ } catch (err) {
307
+ return new OSINTResult({
308
+ source: 'wayback_machine', query: domain,
309
+ data: { error: `Wayback Machine query failed: ${err.message}` },
310
+ confidence: 'low',
311
+ });
312
+ }
313
+ }
314
+
315
+ /**
316
+ * Web technology fingerprinting via HTTP headers and HTML meta analysis.
317
+ */
318
+ static async webTechFingerprint(domain) {
319
+ try {
320
+ const controller = new AbortController();
321
+ const timer = setTimeout(() => controller.abort(), HTTP_TIMEOUT);
322
+ const resp = await fetch(`https://${domain}`, {
323
+ signal: controller.signal,
324
+ redirect: 'follow',
325
+ headers: { 'User-Agent': 'CIPHER-OSINT/2.0' },
326
+ });
327
+ clearTimeout(timer);
328
+
329
+ const headers = Object.fromEntries(resp.headers.entries());
330
+ const html = await resp.text();
331
+ const techs = [];
332
+
333
+ // Server header
334
+ if (headers.server) techs.push({ name: headers.server, category: 'server', source: 'header' });
335
+ // X-Powered-By
336
+ if (headers['x-powered-by']) techs.push({ name: headers['x-powered-by'], category: 'framework', source: 'header' });
337
+ // CDN detection
338
+ if (headers['cf-ray']) techs.push({ name: 'Cloudflare', category: 'cdn', source: 'header' });
339
+ if (headers['x-amz-cf-id'] || headers['x-cache']) techs.push({ name: 'AWS CloudFront', category: 'cdn', source: 'header' });
340
+ if (headers['x-vercel-id']) techs.push({ name: 'Vercel', category: 'hosting', source: 'header' });
341
+ if (headers['x-netlify-request-id']) techs.push({ name: 'Netlify', category: 'hosting', source: 'header' });
342
+ // Security headers
343
+ const securityHeaders = {};
344
+ for (const h of ['strict-transport-security', 'content-security-policy', 'x-frame-options',
345
+ 'x-content-type-options', 'x-xss-protection', 'permissions-policy',
346
+ 'referrer-policy', 'cross-origin-opener-policy']) {
347
+ if (headers[h]) securityHeaders[h] = headers[h];
348
+ }
349
+ // HTML-based detection (limited, fast)
350
+ const htmlLower = html.slice(0, 50000).toLowerCase();
351
+ if (htmlLower.includes('wp-content') || htmlLower.includes('wordpress')) techs.push({ name: 'WordPress', category: 'cms', source: 'html' });
352
+ if (htmlLower.includes('drupal')) techs.push({ name: 'Drupal', category: 'cms', source: 'html' });
353
+ if (htmlLower.includes('joomla')) techs.push({ name: 'Joomla', category: 'cms', source: 'html' });
354
+ if (htmlLower.includes('shopify')) techs.push({ name: 'Shopify', category: 'ecommerce', source: 'html' });
355
+ if (htmlLower.includes('squarespace')) techs.push({ name: 'Squarespace', category: 'cms', source: 'html' });
356
+ if (htmlLower.includes('wix.com')) techs.push({ name: 'Wix', category: 'cms', source: 'html' });
357
+ if (htmlLower.includes('next/') || htmlLower.includes('__next')) techs.push({ name: 'Next.js', category: 'framework', source: 'html' });
358
+ if (htmlLower.includes('react')) techs.push({ name: 'React', category: 'framework', source: 'html' });
359
+ if (htmlLower.includes('vue')) techs.push({ name: 'Vue.js', category: 'framework', source: 'html' });
360
+ if (htmlLower.includes('angular')) techs.push({ name: 'Angular', category: 'framework', source: 'html' });
361
+ if (htmlLower.includes('bootstrap')) techs.push({ name: 'Bootstrap', category: 'css', source: 'html' });
362
+ if (htmlLower.includes('tailwind')) techs.push({ name: 'Tailwind CSS', category: 'css', source: 'html' });
363
+ if (htmlLower.includes('jquery')) techs.push({ name: 'jQuery', category: 'library', source: 'html' });
364
+ // Meta generator
365
+ const genMatch = html.match(/<meta[^>]+name=["']generator["'][^>]+content=["']([^"']+)["']/i);
366
+ if (genMatch) techs.push({ name: genMatch[1], category: 'generator', source: 'meta' });
367
+
368
+ return new OSINTResult({
369
+ source: 'web_tech', query: domain,
370
+ data: { domain, technologies: techs, security_headers: securityHeaders, status: resp.status, final_url: resp.url },
371
+ confidence: 'medium',
372
+ });
373
+ } catch (err) {
374
+ return new OSINTResult({
375
+ source: 'web_tech', query: domain,
376
+ data: { error: `Web tech scan failed: ${err.message}` },
238
377
  confidence: 'low',
239
378
  });
240
379
  }
@@ -246,153 +385,226 @@ class DomainIntelligence {
246
385
  // ---------------------------------------------------------------------------
247
386
 
248
387
  class IPIntelligence {
249
- /**
250
- * Reverse DNS lookup for an IP address.
251
- * @param {string} ip
252
- * @returns {OSINTResult}
253
- */
254
388
  static reverseDns(ip) {
255
389
  try {
256
- // Synchronous approach: use execFileSync with node subprocess
257
- const out = execFileSync(
258
- process.execPath,
390
+ const out = execFileSync(process.execPath,
259
391
  ['-e', `const dns=require('dns');dns.reverse('${ip}',(e,h)=>console.log(JSON.stringify(e?null:h[0])))`],
260
- { encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] },
261
- );
392
+ { encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] });
262
393
  const hostname = JSON.parse(out.trim());
263
394
  return new OSINTResult({
264
- source: 'reverse_dns',
265
- query: ip,
266
- data: { ip, hostname },
267
- confidence: hostname ? 'high' : 'low',
395
+ source: 'reverse_dns', query: ip,
396
+ data: { ip, hostname }, confidence: hostname ? 'high' : 'low',
268
397
  });
269
398
  } catch {
270
- return new OSINTResult({
271
- source: 'reverse_dns',
272
- query: ip,
273
- data: { ip, hostname: null },
274
- confidence: 'low',
275
- });
399
+ return new OSINTResult({ source: 'reverse_dns', query: ip, data: { ip, hostname: null }, confidence: 'low' });
276
400
  }
277
401
  }
278
402
 
279
- /**
280
- * Aggregate IP intelligence from available local tools.
281
- * @param {string} ip
282
- * @returns {OSINTResult}
283
- */
284
403
  static ipInfo(ip) {
285
404
  const data = { ip };
286
405
  const version = net.isIP(ip);
287
-
288
406
  if (version === 0) {
289
- data.error = 'Invalid IP address';
290
- return new OSINTResult({ source: 'ip_info', query: ip, data, confidence: 'low' });
407
+ return new OSINTResult({ source: 'ip_info', query: ip, data: { error: 'Invalid IP address' }, confidence: 'low' });
291
408
  }
292
-
293
409
  data.version = version;
294
410
  data.is_private = isPrivateIP(ip);
295
411
  data.is_loopback = (version === 4 && ip.startsWith('127.')) || ip === '::1';
296
412
  data.is_multicast = _isMulticast(ip, version);
297
413
 
298
- // Reverse DNS (best effort)
299
414
  try {
300
- const out = execFileSync(
301
- process.execPath,
415
+ const out = execFileSync(process.execPath,
302
416
  ['-e', `const dns=require('dns');dns.reverse('${ip}',(e,h)=>console.log(JSON.stringify(e?null:h?h[0]:null)))`],
303
- { encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] },
304
- );
417
+ { encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] });
305
418
  data.hostname = JSON.parse(out.trim());
306
- } catch {
307
- data.hostname = null;
308
- }
419
+ } catch { data.hostname = null; }
309
420
 
310
421
  return new OSINTResult({ source: 'ip_info', query: ip, data, confidence: 'high' });
311
422
  }
423
+
424
+ /**
425
+ * IP geolocation via ip-api.com (free, no key, 45 req/min).
426
+ */
427
+ static async geolocate(ip) {
428
+ if (isPrivateIP(ip)) {
429
+ return new OSINTResult({ source: 'ip_geo', query: ip, data: { ip, note: 'Private IP — no geolocation' }, confidence: 'low' });
430
+ }
431
+ try {
432
+ const data = await fetchJSON(`http://ip-api.com/json/${encodeURIComponent(ip)}?fields=status,message,country,countryCode,region,regionName,city,zip,lat,lon,timezone,isp,org,as,asname,query`);
433
+ if (data.status === 'fail') {
434
+ return new OSINTResult({ source: 'ip_geo', query: ip, data: { error: data.message }, confidence: 'low' });
435
+ }
436
+ return new OSINTResult({ source: 'ip_geo', query: ip, data, confidence: 'high' });
437
+ } catch (err) {
438
+ return new OSINTResult({ source: 'ip_geo', query: ip, data: { error: `Geolocation failed: ${err.message}` }, confidence: 'low' });
439
+ }
440
+ }
312
441
  }
313
442
 
314
- /**
315
- * Check if IP is multicast.
316
- * IPv4: 224.0.0.0 - 239.255.255.255
317
- * IPv6: ff00::/8
318
- * @param {string} ip
319
- * @param {number} version
320
- * @returns {boolean}
321
- */
322
- function _isMulticast(ip, version) {
323
- if (version === 4) {
324
- const first = parseInt(ip.split('.')[0], 10);
325
- return first >= 224 && first <= 239;
443
+ // ---------------------------------------------------------------------------
444
+ // Username/People Intelligence
445
+ // ---------------------------------------------------------------------------
446
+
447
+ class PeopleIntelligence {
448
+ /**
449
+ * Username search across 400+ platforms via Sherlock.
450
+ * Returns found accounts with URLs.
451
+ */
452
+ static usernameSearch(username) {
453
+ const venvBin = findVenvBin();
454
+ const sherlock = venvBin ? resolve(venvBin, 'sherlock') : 'sherlock';
455
+
456
+ try {
457
+ const out = execFileSync(sherlock, [username, '--print-found', '--timeout', '10', '--output', '/dev/null'], {
458
+ encoding: 'utf-8', timeout: 120000, stdio: ['pipe', 'pipe', 'pipe'],
459
+ });
460
+ const accounts = [];
461
+ for (const line of out.split('\n')) {
462
+ // Sherlock output: [+] SiteName: URL
463
+ const match = line.match(/^\[\+\]\s+(.+?):\s+(https?:\/\/.+)/);
464
+ if (match) {
465
+ accounts.push({ platform: match[1].trim(), url: match[2].trim() });
466
+ }
467
+ }
468
+ return new OSINTResult({
469
+ source: 'sherlock', query: username,
470
+ data: { username, accounts, count: accounts.length },
471
+ confidence: 'high', collectionMethod: 'active',
472
+ });
473
+ } catch (err) {
474
+ const msg = err.code === 'ENOENT' ? 'sherlock not installed (pip install sherlock-project)' : err.message;
475
+ return new OSINTResult({
476
+ source: 'sherlock', query: username,
477
+ data: { error: msg }, confidence: 'low',
478
+ });
479
+ }
480
+ }
481
+
482
+ /**
483
+ * Email account existence check across 120+ services via Holehe.
484
+ */
485
+ static emailOsint(email) {
486
+ const venvBin = findVenvBin();
487
+ const holehe = venvBin ? resolve(venvBin, 'holehe') : 'holehe';
488
+
489
+ try {
490
+ const out = execFileSync(holehe, [email, '--no-color', '--only-used', '-NP'], {
491
+ encoding: 'utf-8', timeout: 120000, stdio: ['pipe', 'pipe', 'pipe'],
492
+ });
493
+ const services = [];
494
+ for (const line of out.split('\n')) {
495
+ // Holehe output: [+] service.com (skip legend line)
496
+ const match = line.match(/^\[\+\]\s+(.+)/);
497
+ if (match && !match[1].includes('Email used') && !match[1].includes('Email not used')) {
498
+ services.push(match[1].trim());
499
+ }
500
+ }
501
+ return new OSINTResult({
502
+ source: 'holehe', query: email,
503
+ data: { email, services, count: services.length },
504
+ confidence: 'medium', collectionMethod: 'active',
505
+ });
506
+ } catch (err) {
507
+ const msg = err.code === 'ENOENT' ? 'holehe not installed (pip install holehe)' : err.message;
508
+ return new OSINTResult({
509
+ source: 'holehe', query: email,
510
+ data: { error: msg }, confidence: 'low',
511
+ });
512
+ }
326
513
  }
327
- return ip.toLowerCase().startsWith('ff');
328
514
  }
329
515
 
330
516
  // ---------------------------------------------------------------------------
331
- // Document Metadata
517
+ // Archive Intelligence
332
518
  // ---------------------------------------------------------------------------
333
519
 
334
- class DocumentMetadata {
520
+ class ArchiveIntelligence {
521
+ /**
522
+ * Check if a URL has been archived on archive.today.
523
+ */
524
+ static async archiveTodayCheck(url) {
525
+ try {
526
+ const resp = await fetchText(`https://archive.ph/newest/${url}`, { timeout: 15000 });
527
+ const hasArchive = resp.includes('id="CONTENT"') || resp.includes('archived');
528
+ return new OSINTResult({
529
+ source: 'archive_today', query: url,
530
+ data: { url, archived: hasArchive, check_url: `https://archive.ph/newest/${url}` },
531
+ confidence: 'medium',
532
+ });
533
+ } catch (err) {
534
+ return new OSINTResult({
535
+ source: 'archive_today', query: url,
536
+ data: { error: `archive.today check failed: ${err.message}` },
537
+ confidence: 'low',
538
+ });
539
+ }
540
+ }
541
+
335
542
  /**
336
- * Extract EXIF metadata from an image file.
337
- * @param {string} filepath
338
- * @returns {OSINTResult}
543
+ * Wayback Machine availability check for a specific URL.
339
544
  */
545
+ static async waybackCheck(url) {
546
+ try {
547
+ const data = await fetchJSON(`https://archive.org/wayback/available?url=${encodeURIComponent(url)}`);
548
+ const snapshot = data?.archived_snapshots?.closest;
549
+ return new OSINTResult({
550
+ source: 'wayback_check', query: url,
551
+ data: {
552
+ url,
553
+ available: !!snapshot,
554
+ closest_snapshot: snapshot ? { url: snapshot.url, timestamp: snapshot.timestamp, status: snapshot.status } : null,
555
+ },
556
+ confidence: snapshot ? 'high' : 'medium',
557
+ });
558
+ } catch (err) {
559
+ return new OSINTResult({
560
+ source: 'wayback_check', query: url,
561
+ data: { error: `Wayback check failed: ${err.message}` },
562
+ confidence: 'low',
563
+ });
564
+ }
565
+ }
566
+ }
567
+
568
+ // ---------------------------------------------------------------------------
569
+ // Document Metadata
570
+ // ---------------------------------------------------------------------------
571
+
572
+ class DocumentMetadata {
340
573
  static extractExif(filepath) {
341
574
  try {
342
575
  const out = execFileSync('exiftool', ['-json', filepath], {
343
- encoding: 'utf-8',
344
- timeout: 15000,
345
- stdio: ['pipe', 'pipe', 'pipe'],
576
+ encoding: 'utf-8', timeout: 15000, stdio: ['pipe', 'pipe', 'pipe'],
346
577
  });
347
578
  const metadata = JSON.parse(out);
348
579
  return new OSINTResult({
349
- source: 'exif',
350
- query: filepath,
580
+ source: 'exif', query: filepath,
351
581
  data: { metadata: Array.isArray(metadata) ? metadata[0] : metadata },
352
582
  confidence: 'high',
353
583
  });
354
584
  } catch (err) {
355
- const errMsg = err.code === 'ENOENT' ? 'exiftool not found' : 'Failed to extract EXIF';
356
585
  return new OSINTResult({
357
- source: 'exif',
358
- query: filepath,
359
- data: { error: errMsg },
586
+ source: 'exif', query: filepath,
587
+ data: { error: err.code === 'ENOENT' ? 'exiftool not found' : 'Failed to extract EXIF' },
360
588
  confidence: 'low',
361
589
  });
362
590
  }
363
591
  }
364
592
 
365
- /**
366
- * Extract metadata from a PDF file using pdfinfo.
367
- * @param {string} filepath
368
- * @returns {OSINTResult}
369
- */
370
593
  static extractPdfMetadata(filepath) {
371
594
  try {
372
595
  const out = execFileSync('pdfinfo', [filepath], {
373
- encoding: 'utf-8',
374
- timeout: 15000,
375
- stdio: ['pipe', 'pipe', 'pipe'],
596
+ encoding: 'utf-8', timeout: 15000, stdio: ['pipe', 'pipe', 'pipe'],
376
597
  });
377
598
  const data = {};
378
599
  for (const line of out.split('\n')) {
379
600
  const idx = line.indexOf(':');
380
- if (idx >= 0) {
381
- data[line.slice(0, idx).trim()] = line.slice(idx + 1).trim();
382
- }
601
+ if (idx >= 0) data[line.slice(0, idx).trim()] = line.slice(idx + 1).trim();
383
602
  }
384
- return new OSINTResult({
385
- source: 'pdf_metadata',
386
- query: filepath,
387
- data,
388
- confidence: 'high',
389
- });
603
+ return new OSINTResult({ source: 'pdf_metadata', query: filepath, data, confidence: 'high' });
390
604
  } catch (err) {
391
- const errMsg = err.code === 'ENOENT' ? 'pdfinfo not found' : 'Failed to extract PDF metadata';
392
605
  return new OSINTResult({
393
- source: 'pdf_metadata',
394
- query: filepath,
395
- data: { error: errMsg },
606
+ source: 'pdf_metadata', query: filepath,
607
+ data: { error: err.code === 'ENOENT' ? 'pdfinfo not found' : 'Failed to extract PDF metadata' },
396
608
  confidence: 'low',
397
609
  });
398
610
  }
@@ -400,7 +612,7 @@ class DocumentMetadata {
400
612
  }
401
613
 
402
614
  // ---------------------------------------------------------------------------
403
- // OSINT Pipeline — orchestrator
615
+ // OSINT Pipeline — comprehensive orchestrator
404
616
  // ---------------------------------------------------------------------------
405
617
 
406
618
  class OSINTPipeline {
@@ -409,20 +621,36 @@ class OSINTPipeline {
409
621
  }
410
622
 
411
623
  /**
412
- * Run full domain investigation pipeline.
624
+ * Full domain investigation — DNS, WHOIS, cert transparency,
625
+ * Wayback Machine, web tech, and IP pivot with geolocation.
413
626
  * @param {string} domain
414
- * @returns {OSINTResult[]}
627
+ * @returns {Promise<OSINTResult[]>}
415
628
  */
416
- investigateDomain(domain) {
629
+ async investigateDomain(domain) {
417
630
  const results = [];
631
+
632
+ // Synchronous lookups
418
633
  results.push(DomainIntelligence.dnsLookup(domain));
419
634
  results.push(DomainIntelligence.whoisLookup(domain));
420
635
 
421
- // Pivot: resolve IPs and investigate them
636
+ // Async lookups run in parallel
637
+ const asyncResults = await Promise.allSettled([
638
+ DomainIntelligence.certTransparency(domain),
639
+ DomainIntelligence.waybackHistory(domain),
640
+ DomainIntelligence.webTechFingerprint(domain),
641
+ ]);
642
+
643
+ for (const r of asyncResults) {
644
+ if (r.status === 'fulfilled') results.push(r.value);
645
+ }
646
+
647
+ // IP pivot — resolve IPs and geolocate them
422
648
  const dnsResult = results[0];
423
649
  const aRecords = dnsResult.data?.records?.A ?? [];
424
- for (const ip of aRecords.slice(0, 5)) {
650
+ for (const ip of aRecords.slice(0, 3)) {
425
651
  results.push(IPIntelligence.ipInfo(ip));
652
+ const geo = await IPIntelligence.geolocate(ip);
653
+ results.push(geo);
426
654
  }
427
655
 
428
656
  this._results.push(...results);
@@ -430,20 +658,57 @@ class OSINTPipeline {
430
658
  }
431
659
 
432
660
  /**
433
- * Run IP investigation pipeline.
661
+ * Full IP investigation — info, reverse DNS, geolocation.
434
662
  * @param {string} ip
435
- * @returns {OSINTResult[]}
663
+ * @returns {Promise<OSINTResult[]>}
436
664
  */
437
- investigateIp(ip) {
665
+ async investigateIp(ip) {
438
666
  const results = [IPIntelligence.ipInfo(ip), IPIntelligence.reverseDns(ip)];
667
+ const geo = await IPIntelligence.geolocate(ip);
668
+ results.push(geo);
669
+ this._results.push(...results);
670
+ return results;
671
+ }
672
+
673
+ /**
674
+ * Username OSINT — search across 400+ platforms via Sherlock.
675
+ * @param {string} username
676
+ * @returns {OSINTResult[]}
677
+ */
678
+ investigateUsername(username) {
679
+ const results = [PeopleIntelligence.usernameSearch(username)];
680
+ this._results.push(...results);
681
+ return results;
682
+ }
683
+
684
+ /**
685
+ * Email OSINT — check account existence across 120+ services.
686
+ * @param {string} email
687
+ * @returns {OSINTResult[]}
688
+ */
689
+ investigateEmail(email) {
690
+ const results = [PeopleIntelligence.emailOsint(email)];
439
691
  this._results.push(...results);
440
692
  return results;
441
693
  }
442
694
 
443
695
  /**
444
- * Extract metadata from a file.
445
- * @param {string} filepath
446
- * @returns {OSINTResult}
696
+ * URL archive investigation Wayback Machine + archive.today.
697
+ * @param {string} url
698
+ * @returns {Promise<OSINTResult[]>}
699
+ */
700
+ async investigateUrl(url) {
701
+ const results = await Promise.allSettled([
702
+ ArchiveIntelligence.waybackCheck(url),
703
+ ArchiveIntelligence.archiveTodayCheck(url),
704
+ ]);
705
+ const settled = results.filter(r => r.status === 'fulfilled').map(r => r.value);
706
+ this._results.push(...settled);
707
+ return settled;
708
+ }
709
+
710
+ /**
711
+ * Extract metadata from a file (EXIF for images, pdfinfo for PDFs).
447
712
  */
448
713
  extractMetadata(filepath) {
449
714
  const result = filepath.toLowerCase().endsWith('.pdf')
@@ -453,18 +718,8 @@ class OSINTPipeline {
453
718
  return result;
454
719
  }
455
720
 
456
- /**
457
- * Return all investigation results.
458
- * @returns {object[]}
459
- */
460
- getAllResults() {
461
- return this._results.map((r) => r.toDict());
462
- }
721
+ getAllResults() { return this._results.map(r => r.toDict()); }
463
722
 
464
- /**
465
- * Investigation summary statistics.
466
- * @returns {object}
467
- */
468
723
  summary() {
469
724
  const bySource = {};
470
725
  const byConfidence = {};
@@ -472,11 +727,7 @@ class OSINTPipeline {
472
727
  bySource[r.source] = (bySource[r.source] ?? 0) + 1;
473
728
  byConfidence[r.confidence] = (byConfidence[r.confidence] ?? 0) + 1;
474
729
  }
475
- return {
476
- total_results: this._results.length,
477
- by_source: bySource,
478
- by_confidence: byConfidence,
479
- };
730
+ return { total_results: this._results.length, by_source: bySource, by_confidence: byConfidence };
480
731
  }
481
732
  }
482
733
 
@@ -485,14 +736,12 @@ class OSINTPipeline {
485
736
  // ---------------------------------------------------------------------------
486
737
 
487
738
  export {
488
- // Helper
489
739
  isPrivateIP,
490
- // Data class
491
740
  OSINTResult,
492
- // Intelligence modules
493
741
  DomainIntelligence,
494
742
  IPIntelligence,
743
+ PeopleIntelligence,
744
+ ArchiveIntelligence,
495
745
  DocumentMetadata,
496
- // Pipeline
497
746
  OSINTPipeline,
498
747
  };