osintkit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +86 -0
  2. package/bin/osintkit.js +7 -0
  3. package/osintkit/__init__.py +3 -0
  4. package/osintkit/__pycache__/__init__.cpython-311.pyc +0 -0
  5. package/osintkit/__pycache__/cli.cpython-311.pyc +0 -0
  6. package/osintkit/__pycache__/config.cpython-311.pyc +0 -0
  7. package/osintkit/__pycache__/profiles.cpython-311.pyc +0 -0
  8. package/osintkit/__pycache__/risk.cpython-311.pyc +0 -0
  9. package/osintkit/__pycache__/scanner.cpython-311.pyc +0 -0
  10. package/osintkit/cli.py +613 -0
  11. package/osintkit/config.py +51 -0
  12. package/osintkit/modules/__init__.py +6 -0
  13. package/osintkit/modules/__pycache__/__init__.cpython-311.pyc +0 -0
  14. package/osintkit/modules/__pycache__/breach.cpython-311.pyc +0 -0
  15. package/osintkit/modules/__pycache__/brokers.cpython-311.pyc +0 -0
  16. package/osintkit/modules/__pycache__/certs.cpython-311.pyc +0 -0
  17. package/osintkit/modules/__pycache__/dark_web.cpython-311.pyc +0 -0
  18. package/osintkit/modules/__pycache__/gravatar.cpython-311.pyc +0 -0
  19. package/osintkit/modules/__pycache__/harvester.cpython-311.pyc +0 -0
  20. package/osintkit/modules/__pycache__/hibp.cpython-311.pyc +0 -0
  21. package/osintkit/modules/__pycache__/hibp_kanon.cpython-311.pyc +0 -0
  22. package/osintkit/modules/__pycache__/holehe.cpython-311.pyc +0 -0
  23. package/osintkit/modules/__pycache__/libphonenumber_info.cpython-311.pyc +0 -0
  24. package/osintkit/modules/__pycache__/paste.cpython-311.pyc +0 -0
  25. package/osintkit/modules/__pycache__/phone.cpython-311.pyc +0 -0
  26. package/osintkit/modules/__pycache__/sherlock.cpython-311.pyc +0 -0
  27. package/osintkit/modules/__pycache__/social.cpython-311.pyc +0 -0
  28. package/osintkit/modules/__pycache__/wayback.cpython-311.pyc +0 -0
  29. package/osintkit/modules/breach.py +82 -0
  30. package/osintkit/modules/brokers.py +56 -0
  31. package/osintkit/modules/certs.py +42 -0
  32. package/osintkit/modules/dark_web.py +51 -0
  33. package/osintkit/modules/gravatar.py +50 -0
  34. package/osintkit/modules/harvester.py +56 -0
  35. package/osintkit/modules/hibp.py +40 -0
  36. package/osintkit/modules/hibp_kanon.py +66 -0
  37. package/osintkit/modules/holehe.py +39 -0
  38. package/osintkit/modules/libphonenumber_info.py +79 -0
  39. package/osintkit/modules/paste.py +55 -0
  40. package/osintkit/modules/phone.py +32 -0
  41. package/osintkit/modules/sherlock.py +48 -0
  42. package/osintkit/modules/social.py +58 -0
  43. package/osintkit/modules/stage2/__init__.py +1 -0
  44. package/osintkit/modules/stage2/github_api.py +65 -0
  45. package/osintkit/modules/stage2/hunter.py +64 -0
  46. package/osintkit/modules/stage2/leakcheck.py +58 -0
  47. package/osintkit/modules/stage2/numverify.py +62 -0
  48. package/osintkit/modules/stage2/securitytrails.py +65 -0
  49. package/osintkit/modules/wayback.py +70 -0
  50. package/osintkit/output/__init__.py +1 -0
  51. package/osintkit/output/__pycache__/__init__.cpython-311.pyc +0 -0
  52. package/osintkit/output/__pycache__/html_writer.cpython-311.pyc +0 -0
  53. package/osintkit/output/__pycache__/json_writer.cpython-311.pyc +0 -0
  54. package/osintkit/output/__pycache__/md_writer.cpython-311.pyc +0 -0
  55. package/osintkit/output/html_writer.py +36 -0
  56. package/osintkit/output/json_writer.py +31 -0
  57. package/osintkit/output/md_writer.py +115 -0
  58. package/osintkit/output/templates/report.html +74 -0
  59. package/osintkit/profiles.py +116 -0
  60. package/osintkit/risk.py +42 -0
  61. package/osintkit/scanner.py +240 -0
  62. package/osintkit/setup.py +157 -0
  63. package/package.json +25 -0
  64. package/pyproject.toml +44 -0
  65. package/requirements.txt +9 -0
@@ -0,0 +1,82 @@
1
+ """Breach exposure check via multiple sources."""
2
+
3
+ import httpx
4
+ from typing import Any, Dict, List
5
+
6
+ from osintkit.config import APIKeys
7
+ from osintkit.modules import ModuleError
8
+
9
+
10
+ async def run_breach_exposure(inputs: Dict[str, Any], api_keys: APIKeys) -> List[Dict]:
11
+ """Check breach exposure with fallback chain: HIBP -> BreachDirectory -> LeakCheck."""
12
+ email = inputs.get("email")
13
+ if not email:
14
+ return []
15
+
16
+ if api_keys.hibp:
17
+ try:
18
+ return await _check_hibp(email, api_keys.hibp)
19
+ except ModuleError:
20
+ pass
21
+
22
+ if api_keys.breachdirectory:
23
+ try:
24
+ return await _check_breachdirectory(email, api_keys.breachdirectory)
25
+ except ModuleError:
26
+ pass
27
+
28
+ if api_keys.leakcheck:
29
+ try:
30
+ return await _check_leakcheck(email, api_keys.leakcheck)
31
+ except ModuleError:
32
+ pass
33
+
34
+ return []
35
+
36
+
37
+ async def _check_hibp(email: str, api_key: str) -> List[Dict]:
38
+ url = f"https://haveibeenpwned.com/api/v3/breaches?email={email}"
39
+ headers = {"hibp-api-key": api_key, "user-agent": "osintkit-CLI"}
40
+ resp = await httpx.AsyncClient(timeout=30, headers=headers).get(url)
41
+ if resp.status_code == 404:
42
+ return []
43
+ if resp.status_code != 200:
44
+ raise ModuleError(f"HIBP error: {resp.status_code}")
45
+ findings = []
46
+ for b in resp.json():
47
+ findings.append({"source": "hibp", "type": "breach",
48
+ "data": {"breach": b.get("Name"), "domain": b.get("Domain"),
49
+ "date": b.get("BreachDate"), "classes": b.get("DataClasses", [])},
50
+ "confidence": 0.95, "url": f"https://haveibeenpwned.com/breach/{b.get('Name')}"})
51
+ return findings
52
+
53
+
54
+ async def _check_breachdirectory(email: str, api_key: str) -> List[Dict]:
55
+ url = "https://breachdirectory.p.rapidapi.com/"
56
+ headers = {"X-RapidAPI-Key": api_key, "X-RapidAPI-Host": "breachdirectory.p.rapidapi.com"}
57
+ resp = await httpx.AsyncClient(timeout=30, headers=headers).get(url, params={"func": "auto", "term": email})
58
+ if resp.status_code != 200:
59
+ raise ModuleError(f"BreachDirectory error: {resp.status_code}")
60
+ findings = []
61
+ for b in resp.json().get("result", []):
62
+ findings.append({"source": "breachdirectory", "type": "breach",
63
+ "data": {"breach": b.get("breach"), "fields": b.get("fields", [])},
64
+ "confidence": 0.85, "url": None})
65
+ return findings
66
+
67
+
68
+ async def _check_leakcheck(email: str, api_key: str) -> List[Dict]:
69
+ url = f"https://leakcheck.io/api/public?check={email}"
70
+ headers = {"X-API-Key": api_key}
71
+ resp = await httpx.AsyncClient(timeout=30, headers=headers).get(url)
72
+ if resp.status_code != 200:
73
+ raise ModuleError(f"LeakCheck error: {resp.status_code}")
74
+ data = resp.json()
75
+ if not data.get("found"):
76
+ return []
77
+ findings = []
78
+ for s in data.get("sources", []):
79
+ findings.append({"source": "leakcheck", "type": "breach",
80
+ "data": {"breach": s.get("name"), "date": s.get("date")},
81
+ "confidence": 0.80, "url": None})
82
+ return findings
@@ -0,0 +1,56 @@
1
+ """Data broker search via Google CSE or direct HTTP."""
2
+
3
+ import httpx
4
+ from typing import Any, Dict, List
5
+
6
+ from osintkit.config import APIKeys
7
+ from osintkit.modules import ModuleError
8
+
9
+ BROKER_SITES = ["whitepages.com", "spokeo.com", "instantcheckmate.com",
10
+ "truepeoplesearch.com", "fastpeoplesearch.com", "familytreenow.com"]
11
+
12
+
13
+ async def run_data_brokers(inputs: Dict[str, Any], api_keys: APIKeys) -> List[Dict]:
14
+ """Search data brokers with fallback: Google CSE -> direct HTTP."""
15
+ name = inputs.get("name")
16
+ if not name:
17
+ return []
18
+
19
+ if api_keys.google_cse_key and api_keys.google_cse_cx:
20
+ try:
21
+ return await _search_google_cse(name, api_keys.google_cse_key, api_keys.google_cse_cx)
22
+ except ModuleError:
23
+ pass
24
+
25
+ return await _search_direct_http(name)
26
+
27
+
28
+ async def _search_google_cse(name: str, api_key: str, cx: str) -> List[Dict]:
29
+ url = "https://www.googleapis.com/customsearch/v1"
30
+ findings = []
31
+ client = httpx.AsyncClient(timeout=30)
32
+ for site in BROKER_SITES[:3]:
33
+ resp = await client.get(url, params={"key": api_key, "cx": cx, "q": f"{name} site:{site}"})
34
+ if resp.status_code == 200:
35
+ for item in resp.json().get("items", []):
36
+ findings.append({"source": "google_cse", "type": "data_broker",
37
+ "data": {"site": site, "title": item.get("title"), "snippet": item.get("snippet")},
38
+ "confidence": 0.85, "url": item.get("link")})
39
+ return findings
40
+
41
+
42
+ async def _search_direct_http(name: str) -> List[Dict]:
43
+ slug = name.replace(" ", "-").lower()
44
+ findings = []
45
+ client = httpx.AsyncClient(timeout=15, follow_redirects=True)
46
+ for site in BROKER_SITES:
47
+ url = f"https://{site}/person/{slug}"
48
+ try:
49
+ resp = await client.get(url)
50
+ if resp.status_code == 200:
51
+ findings.append({"source": "direct_http", "type": "data_broker_potential",
52
+ "data": {"site": site, "name": name, "url": url},
53
+ "confidence": 0.4, "url": url})
54
+ except httpx.HTTPError:
55
+ pass
56
+ return findings
@@ -0,0 +1,42 @@
1
+ """Certificate transparency check via crt.sh."""
2
+
3
+ import httpx
4
+ from typing import Any, Dict, List
5
+
6
+ from osintkit.modules import ModuleError
7
+
8
+ COMMON_DOMAINS = ["gmail.com", "yahoo.com", "hotmail.com", "outlook.com", "icloud.com"]
9
+
10
+
11
+ async def run_cert_transparency(inputs: Dict[str, Any]) -> List[Dict]:
12
+ """Query certificate transparency logs via crt.sh."""
13
+ email = inputs.get("email")
14
+ if not email or "@" not in email:
15
+ return []
16
+
17
+ domain = email.split("@")[1]
18
+ if domain in COMMON_DOMAINS:
19
+ return []
20
+
21
+ url = f"https://crt.sh/?q={domain}&output=json"
22
+ try:
23
+ resp = await httpx.AsyncClient(timeout=30).get(url)
24
+ if resp.status_code == 404:
25
+ return []
26
+ if resp.status_code != 200:
27
+ raise ModuleError(f"crt.sh error: {resp.status_code}")
28
+
29
+ certs = resp.json()
30
+ findings = []
31
+ seen = set()
32
+ for cert in certs:
33
+ for name in cert.get("name_value", "").split("\n"):
34
+ name = name.strip()
35
+ if name and name not in seen:
36
+ seen.add(name)
37
+ findings.append({"source": "crtsh", "type": "ssl_cert",
38
+ "data": {"domain": name, "issuer": cert.get("issuer_name")},
39
+ "confidence": 0.8, "url": f"https://crt.sh/?q={name}"})
40
+ return findings
41
+ except httpx.HTTPError as e:
42
+ raise ModuleError(f"crt.sh failed: {e}")
@@ -0,0 +1,51 @@
1
+ """Dark web search via Intelbase or Ahmia."""
2
+
3
+ import httpx
4
+ from typing import Any, Dict, List
5
+
6
+ from osintkit.config import APIKeys
7
+ from osintkit.modules import ModuleError
8
+
9
+
10
+ async def run_dark_web(inputs: Dict[str, Any], api_keys: APIKeys) -> List[Dict]:
11
+ """Search dark web with fallback: Intelbase -> Ahmia."""
12
+ query = inputs.get("email") or inputs.get("username") or inputs.get("name")
13
+ if not query:
14
+ return []
15
+
16
+ if api_keys.intelbase:
17
+ try:
18
+ return await _search_intelbase(query, api_keys.intelbase)
19
+ except ModuleError:
20
+ pass
21
+
22
+ try:
23
+ return await _search_ahmia(query)
24
+ except (httpx.ConnectError, httpx.TimeoutException, httpx.NetworkError,
25
+ httpx.RemoteProtocolError, Exception):
26
+ return []
27
+
28
+
29
+ async def _search_intelbase(query: str, api_key: str) -> List[Dict]:
30
+ url = "https://api.intelbase.is/v1/darkweb/search"
31
+ headers = {"Authorization": f"Bearer {api_key}"}
32
+ resp = await httpx.AsyncClient(timeout=60, headers=headers).post(url, json={"query": query})
33
+ if resp.status_code != 200:
34
+ raise ModuleError(f"Intelbase error: {resp.status_code}")
35
+ findings = []
36
+ for r in resp.json().get("results", []):
37
+ findings.append({"source": "intelbase", "type": "dark_web",
38
+ "data": {"title": r.get("title"), "url": r.get("url"), "snippet": r.get("snippet")},
39
+ "confidence": 0.7, "url": r.get("url")})
40
+ return findings
41
+
42
+
43
+ async def _search_ahmia(query: str) -> List[Dict]:
44
+ url = f"https://ahmia.fi/search/?q={query}"
45
+ resp = await httpx.AsyncClient(timeout=30).get(url)
46
+ if resp.status_code != 200:
47
+ raise ModuleError(f"Ahmia error: {resp.status_code}")
48
+ # Ahmia returns HTML - return query info
49
+ return [{"source": "ahmia", "type": "dark_web_search",
50
+ "data": {"query": query, "search_url": url, "note": "Manual review recommended"},
51
+ "confidence": 0.5, "url": url}]
@@ -0,0 +1,50 @@
1
+ """Gravatar profile lookup via MD5 email hash."""
2
+
3
+ import hashlib
4
+ from typing import Any, Dict, List
5
+
6
+ import httpx
7
+
8
+
9
+ async def run_gravatar(inputs: Dict[str, Any]) -> List[Dict]:
10
+ """Check Gravatar for an email address profile.
11
+
12
+ Uses MD5 hash of the email (lowercase, stripped) to query the Gravatar API.
13
+ Returns [] if no email or no profile found.
14
+ """
15
+ email = inputs.get("email")
16
+ if not email:
17
+ return []
18
+
19
+ email_normalized = email.strip().lower()
20
+ email_hash = hashlib.md5(email_normalized.encode()).hexdigest()
21
+ url = f"https://www.gravatar.com/{email_hash}.json"
22
+
23
+ try:
24
+ async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=5.0)) as client:
25
+ response = await client.get(url)
26
+
27
+ if response.status_code != 200:
28
+ return []
29
+
30
+ data = response.json()
31
+ entry = data.get("entry", [{}])[0]
32
+
33
+ name = entry.get("name", {})
34
+ display_name = entry.get("displayName", "")
35
+ formatted_name = name.get("formatted", "") if isinstance(name, dict) else str(name)
36
+ profile_url = entry.get("profileUrl", f"https://www.gravatar.com/{email_hash}")
37
+
38
+ return [{
39
+ "source": "gravatar",
40
+ "type": "email_profile",
41
+ "data": {
42
+ "hash": email_hash,
43
+ "display_name": display_name,
44
+ "formatted_name": formatted_name,
45
+ },
46
+ "url": profile_url,
47
+ }]
48
+
49
+ except Exception:
50
+ return []
@@ -0,0 +1,56 @@
1
+ """Web presence enumeration via theHarvester."""
2
+
3
+ import asyncio
4
+ import logging
5
+ import shutil
6
+ import tempfile
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List
10
+
11
+ from osintkit.modules import ModuleError
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ COMMON_PROVIDERS = ["gmail.com", "yahoo.com", "hotmail.com", "outlook.com", "icloud.com"]
16
+
17
+
18
+ async def run_web_presence(inputs: Dict[str, Any], timeout_seconds: int) -> List[Dict]:
19
+ """Run web presence enumeration using theHarvester."""
20
+ email = inputs.get("email")
21
+ if not email or "@" not in email:
22
+ return []
23
+
24
+ domain = email.split("@")[1]
25
+ if domain in COMMON_PROVIDERS:
26
+ return []
27
+
28
+ if not shutil.which("theHarvester"):
29
+ raise ModuleError("theHarvester not installed")
30
+
31
+ tmp_base = tempfile.mkdtemp(prefix="osintkit_harvester_")
32
+ output_file = Path(tmp_base) / f"harvester_{domain}.json"
33
+ try:
34
+ proc = await asyncio.create_subprocess_exec(
35
+ "theHarvester", "-d", domain, "-b", "all", "-f", str(output_file),
36
+ stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
37
+ )
38
+ await asyncio.wait_for(proc.communicate(), timeout=timeout_seconds + 30)
39
+
40
+ findings = []
41
+ if output_file.exists():
42
+ try:
43
+ with open(output_file) as f:
44
+ data = json.load(f)
45
+ for e in data.get("emails", []):
46
+ findings.append({"source": "harvester", "type": "associated_email",
47
+ "data": {"email": e, "domain": domain}, "confidence": 0.7, "url": None})
48
+ for h in data.get("hosts", []):
49
+ findings.append({"source": "harvester", "type": "associated_host",
50
+ "data": {"host": h}, "confidence": 0.6, "url": f"http://{h}"})
51
+ except Exception as e:
52
+ raise ModuleError(str(e))
53
+ shutil.rmtree(tmp_base, ignore_errors=True)
54
+ return findings
55
+ except asyncio.TimeoutError:
56
+ raise ModuleError("theHarvester timed out")
@@ -0,0 +1,40 @@
1
+ """Password exposure check via HIBP PwnedPasswords."""
2
+
3
+ import hashlib
4
+ import httpx
5
+ from typing import Any, Dict, List
6
+
7
+ from osintkit.modules import ModuleError
8
+
9
+
10
+ async def run_password_exposure(inputs: Dict[str, Any]) -> List[Dict]:
11
+ """Check email breach exposure via HIBP.
12
+
13
+ Note: HIBP email breach lookup requires a paid API key (hibp field in
14
+ config.yaml). The k-anonymity PwnedPasswords endpoint only accepts
15
+ password hashes, not email addresses. Without a key this module returns
16
+ [] and logs a message so the scanner does not report a failure.
17
+ """
18
+ import logging
19
+ logger = logging.getLogger(__name__)
20
+ logger.info(
21
+ "HIBP email lookup requires API key — configure hibp key for results"
22
+ )
23
+ return []
24
+
25
+
26
+ async def check_password_hash(password: str) -> int:
27
+ """Check how many times password appears in breaches."""
28
+ sha1_hash = hashlib.sha1(password.encode()).hexdigest().upper()
29
+ prefix, suffix = sha1_hash[:5], sha1_hash[5:]
30
+
31
+ url = f"https://api.pwnedpasswords.com/range/{prefix}"
32
+ try:
33
+ resp = await httpx.AsyncClient(timeout=15).get(url)
34
+ for line in resp.text.splitlines():
35
+ h, count = line.split(":")
36
+ if h == suffix:
37
+ return int(count)
38
+ return 0
39
+ except httpx.HTTPError as e:
40
+ raise ModuleError(f"HIBP failed: {e}")
@@ -0,0 +1,66 @@
1
+ """HIBP k-anonymity password exposure check using SHA1 prefix API."""
2
+
3
+ import hashlib
4
+ import logging
5
+ from typing import Any, Dict, List
6
+
7
+ import httpx
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ async def run_hibp_kanon(inputs: Dict[str, Any]) -> List[Dict]:
13
+ """Check HIBP pwnedpasswords k-anonymity endpoint.
14
+
15
+ Demonstrates the SHA1 prefix pattern: hashes the email address as a proxy
16
+ value and queries the first 5 characters of the SHA1 against the pwnedpasswords
17
+ range endpoint. This does NOT check actual passwords — it checks whether the
18
+ email's SHA1 hash happens to appear in the leaked password corpus (non-standard
19
+ but illustrates the k-anonymity pattern without exposing the full hash).
20
+
21
+ Returns count if a matching suffix is found in the range response.
22
+ """
23
+ email = inputs.get("email")
24
+ if not email:
25
+ return []
26
+
27
+ try:
28
+ sha1 = hashlib.sha1(email.strip().lower().encode()).hexdigest().upper()
29
+ prefix = sha1[:5]
30
+ suffix = sha1[5:]
31
+
32
+ async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=5.0)) as client:
33
+ response = await client.get(
34
+ f"https://api.pwnedpasswords.com/range/{prefix}",
35
+ headers={"Add-Padding": "true"},
36
+ )
37
+
38
+ if response.status_code != 200:
39
+ return []
40
+
41
+ count = 0
42
+ for line in response.text.splitlines():
43
+ parts = line.strip().split(":")
44
+ if len(parts) == 2 and parts[0].upper() == suffix:
45
+ try:
46
+ count = int(parts[1])
47
+ except ValueError:
48
+ pass
49
+ break
50
+
51
+ if count == 0:
52
+ return []
53
+
54
+ logger.info(f"hibp_kanon: email SHA1 prefix {prefix} found {count} times in pwned passwords")
55
+ return [{
56
+ "source": "hibp_kanon",
57
+ "type": "password_exposure",
58
+ "data": {
59
+ "hash_prefix": prefix,
60
+ "count": count,
61
+ "note": "Email SHA1 hash matched in pwnedpasswords range (non-standard pattern)",
62
+ },
63
+ }]
64
+
65
+ except Exception:
66
+ return []
@@ -0,0 +1,39 @@
1
+ """Email account enumeration via Holehe."""
2
+
3
+ import asyncio
4
+ import shutil
5
+ from typing import Any, Dict, List
6
+
7
+ from osintkit.modules import ModuleError
8
+
9
+
10
+ async def run_email_accounts(inputs: Dict[str, Any], timeout_seconds: int) -> List[Dict]:
11
+ """Run email account enumeration using Holehe."""
12
+ email = inputs.get("email")
13
+ if not email:
14
+ return []
15
+
16
+ if not shutil.which("holehe"):
17
+ raise ModuleError("Holehe not installed. Install with: pip install holehe")
18
+
19
+ try:
20
+ proc = await asyncio.create_subprocess_exec(
21
+ "holehe", "--email", email,
22
+ stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
23
+ )
24
+ stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout_seconds + 10)
25
+
26
+ output = stdout.decode()
27
+ findings = []
28
+ for line in output.splitlines():
29
+ if "+" in line or "used" in line.lower():
30
+ parts = line.strip().split()
31
+ if parts:
32
+ findings.append({
33
+ "source": "holehe", "type": "email_account",
34
+ "data": {"platform": parts[0], "email": email},
35
+ "confidence": 0.8, "url": None
36
+ })
37
+ return findings
38
+ except asyncio.TimeoutError:
39
+ raise ModuleError("Holehe timed out")
@@ -0,0 +1,79 @@
1
+ """Offline phone number analysis using the phonenumbers library."""
2
+
3
+ import logging
4
+ from typing import Any, Dict, List
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ async def run_libphonenumber(inputs: Dict[str, Any]) -> List[Dict]:
10
+ """Parse and analyse a phone number using the phonenumbers library.
11
+
12
+ Returns carrier, region, number type, validity, and E.164 format.
13
+ Returns [] if no phone input, if phonenumbers is not installed,
14
+ or if the number cannot be parsed.
15
+ """
16
+ phone = inputs.get("phone")
17
+ if not phone:
18
+ return []
19
+
20
+ try:
21
+ import phonenumbers
22
+ from phonenumbers import carrier, geocoder, number_type, is_valid_number, format_number
23
+ from phonenumbers import PhoneNumberFormat, PhoneNumberType
24
+ except ImportError:
25
+ logger.warning("phonenumbers library not installed; skipping phone analysis")
26
+ return []
27
+
28
+ try:
29
+ parsed = phonenumbers.parse(phone, None)
30
+ except phonenumbers.NumberParseException:
31
+ try:
32
+ # Retry with a default region hint
33
+ parsed = phonenumbers.parse(phone, "US")
34
+ except phonenumbers.NumberParseException:
35
+ logger.warning(f"Could not parse phone number: {phone!r}")
36
+ return []
37
+
38
+ valid = is_valid_number(parsed)
39
+ e164 = format_number(parsed, PhoneNumberFormat.E164)
40
+
41
+ try:
42
+ carrier_name = carrier.name_for_number(parsed, "en") or "unknown"
43
+ except Exception:
44
+ carrier_name = "unknown"
45
+
46
+ try:
47
+ region = geocoder.description_for_number(parsed, "en") or "unknown"
48
+ except Exception:
49
+ region = "unknown"
50
+
51
+ ntype_int = number_type(parsed)
52
+ type_map = {
53
+ PhoneNumberType.MOBILE: "mobile",
54
+ PhoneNumberType.FIXED_LINE: "fixed_line",
55
+ PhoneNumberType.FIXED_LINE_OR_MOBILE: "fixed_line_or_mobile",
56
+ PhoneNumberType.TOLL_FREE: "toll_free",
57
+ PhoneNumberType.PREMIUM_RATE: "premium_rate",
58
+ PhoneNumberType.SHARED_COST: "shared_cost",
59
+ PhoneNumberType.VOIP: "voip",
60
+ PhoneNumberType.PERSONAL_NUMBER: "personal_number",
61
+ PhoneNumberType.PAGER: "pager",
62
+ PhoneNumberType.UAN: "uan",
63
+ PhoneNumberType.VOICEMAIL: "voicemail",
64
+ PhoneNumberType.UNKNOWN: "unknown",
65
+ }
66
+ number_type_str = type_map.get(ntype_int, "unknown")
67
+
68
+ return [{
69
+ "source": "libphonenumber",
70
+ "type": "phone_info",
71
+ "data": {
72
+ "carrier": carrier_name,
73
+ "region": region,
74
+ "number_type": number_type_str,
75
+ "is_valid": valid,
76
+ "e164_format": e164,
77
+ },
78
+ "url": None,
79
+ }]
@@ -0,0 +1,55 @@
1
+ """Paste site search via Intelbase or psbdmp."""
2
+
3
+ import httpx
4
+ from typing import Any, Dict, List
5
+
6
+ from osintkit.config import APIKeys
7
+ from osintkit.modules import ModuleError
8
+
9
+
10
+ async def run_paste_sites(inputs: Dict[str, Any], api_keys: APIKeys) -> List[Dict]:
11
+ """Search paste sites with fallback: Intelbase -> psbdmp.ws."""
12
+ email = inputs.get("email")
13
+ if not email:
14
+ return []
15
+
16
+ if api_keys.intelbase:
17
+ try:
18
+ return await _search_intelbase_paste(email, api_keys.intelbase)
19
+ except ModuleError:
20
+ pass
21
+
22
+ try:
23
+ return await _search_psbdmp(email)
24
+ except (httpx.ConnectError, httpx.TimeoutException, httpx.NetworkError,
25
+ httpx.RemoteProtocolError, Exception):
26
+ return []
27
+
28
+
29
+ async def _search_intelbase_paste(email: str, api_key: str) -> List[Dict]:
30
+ url = "https://api.intelbase.is/v1/paste/search"
31
+ headers = {"Authorization": f"Bearer {api_key}"}
32
+ resp = await httpx.AsyncClient(timeout=60, headers=headers).post(url, json={"query": email})
33
+ if resp.status_code != 200:
34
+ raise ModuleError(f"Intelbase error: {resp.status_code}")
35
+ findings = []
36
+ for r in resp.json().get("results", []):
37
+ findings.append({"source": "intelbase", "type": "paste",
38
+ "data": {"paste_id": r.get("id"), "site": r.get("site"), "date": r.get("date")},
39
+ "confidence": 0.75, "url": r.get("url")})
40
+ return findings
41
+
42
+
43
+ async def _search_psbdmp(email: str) -> List[Dict]:
44
+ url = f"https://psbdmp.ws/api/v3/search/{email}"
45
+ resp = await httpx.AsyncClient(timeout=30).get(url)
46
+ if resp.status_code == 404:
47
+ return []
48
+ if resp.status_code != 200:
49
+ raise ModuleError(f"psbdmp error: {resp.status_code}")
50
+ findings = []
51
+ for p in resp.json().get("data", []):
52
+ findings.append({"source": "psbdmp", "type": "paste",
53
+ "data": {"paste_id": p.get("id"), "date": p.get("date")},
54
+ "confidence": 0.65, "url": f"https://psbdmp.ws/{p.get('id')}"})
55
+ return findings
@@ -0,0 +1,32 @@
1
+ """Phone validation via NumVerify."""
2
+
3
+ import httpx
4
+ from typing import Any, Dict, List
5
+
6
+ from osintkit.config import APIKeys
7
+ from osintkit.modules import ModuleError
8
+
9
+
10
+ async def run_phone(inputs: Dict[str, Any], api_keys: APIKeys) -> List[Dict]:
11
+ """Validate phone number using NumVerify (requires key)."""
12
+ phone = inputs.get("phone")
13
+ if not phone or not api_keys.numverify:
14
+ return []
15
+
16
+ clean = phone.lstrip("+").replace(" ", "").replace("-", "").replace("(", "").replace(")", "")
17
+ url = "https://apilayer.net/api/validate"
18
+
19
+ resp = await httpx.AsyncClient(timeout=30).get(url,
20
+ params={"access_key": api_keys.numverify, "number": clean, "format": 1})
21
+
22
+ if resp.status_code != 200:
23
+ raise ModuleError(f"NumVerify error: {resp.status_code}")
24
+
25
+ data = resp.json()
26
+ if not data.get("valid"):
27
+ return []
28
+
29
+ return [{"source": "numverify", "type": "phone_info",
30
+ "data": {"number": phone, "country": data.get("country_name"),
31
+ "carrier": data.get("carrier"), "line_type": data.get("line_type")},
32
+ "confidence": 0.9, "url": None}]