osintkit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +86 -0
  2. package/bin/osintkit.js +7 -0
  3. package/osintkit/__init__.py +3 -0
  4. package/osintkit/__pycache__/__init__.cpython-311.pyc +0 -0
  5. package/osintkit/__pycache__/cli.cpython-311.pyc +0 -0
  6. package/osintkit/__pycache__/config.cpython-311.pyc +0 -0
  7. package/osintkit/__pycache__/profiles.cpython-311.pyc +0 -0
  8. package/osintkit/__pycache__/risk.cpython-311.pyc +0 -0
  9. package/osintkit/__pycache__/scanner.cpython-311.pyc +0 -0
  10. package/osintkit/cli.py +613 -0
  11. package/osintkit/config.py +51 -0
  12. package/osintkit/modules/__init__.py +6 -0
  13. package/osintkit/modules/__pycache__/__init__.cpython-311.pyc +0 -0
  14. package/osintkit/modules/__pycache__/breach.cpython-311.pyc +0 -0
  15. package/osintkit/modules/__pycache__/brokers.cpython-311.pyc +0 -0
  16. package/osintkit/modules/__pycache__/certs.cpython-311.pyc +0 -0
  17. package/osintkit/modules/__pycache__/dark_web.cpython-311.pyc +0 -0
  18. package/osintkit/modules/__pycache__/gravatar.cpython-311.pyc +0 -0
  19. package/osintkit/modules/__pycache__/harvester.cpython-311.pyc +0 -0
  20. package/osintkit/modules/__pycache__/hibp.cpython-311.pyc +0 -0
  21. package/osintkit/modules/__pycache__/hibp_kanon.cpython-311.pyc +0 -0
  22. package/osintkit/modules/__pycache__/holehe.cpython-311.pyc +0 -0
  23. package/osintkit/modules/__pycache__/libphonenumber_info.cpython-311.pyc +0 -0
  24. package/osintkit/modules/__pycache__/paste.cpython-311.pyc +0 -0
  25. package/osintkit/modules/__pycache__/phone.cpython-311.pyc +0 -0
  26. package/osintkit/modules/__pycache__/sherlock.cpython-311.pyc +0 -0
  27. package/osintkit/modules/__pycache__/social.cpython-311.pyc +0 -0
  28. package/osintkit/modules/__pycache__/wayback.cpython-311.pyc +0 -0
  29. package/osintkit/modules/breach.py +82 -0
  30. package/osintkit/modules/brokers.py +56 -0
  31. package/osintkit/modules/certs.py +42 -0
  32. package/osintkit/modules/dark_web.py +51 -0
  33. package/osintkit/modules/gravatar.py +50 -0
  34. package/osintkit/modules/harvester.py +56 -0
  35. package/osintkit/modules/hibp.py +40 -0
  36. package/osintkit/modules/hibp_kanon.py +66 -0
  37. package/osintkit/modules/holehe.py +39 -0
  38. package/osintkit/modules/libphonenumber_info.py +79 -0
  39. package/osintkit/modules/paste.py +55 -0
  40. package/osintkit/modules/phone.py +32 -0
  41. package/osintkit/modules/sherlock.py +48 -0
  42. package/osintkit/modules/social.py +58 -0
  43. package/osintkit/modules/stage2/__init__.py +1 -0
  44. package/osintkit/modules/stage2/github_api.py +65 -0
  45. package/osintkit/modules/stage2/hunter.py +64 -0
  46. package/osintkit/modules/stage2/leakcheck.py +58 -0
  47. package/osintkit/modules/stage2/numverify.py +62 -0
  48. package/osintkit/modules/stage2/securitytrails.py +65 -0
  49. package/osintkit/modules/wayback.py +70 -0
  50. package/osintkit/output/__init__.py +1 -0
  51. package/osintkit/output/__pycache__/__init__.cpython-311.pyc +0 -0
  52. package/osintkit/output/__pycache__/html_writer.cpython-311.pyc +0 -0
  53. package/osintkit/output/__pycache__/json_writer.cpython-311.pyc +0 -0
  54. package/osintkit/output/__pycache__/md_writer.cpython-311.pyc +0 -0
  55. package/osintkit/output/html_writer.py +36 -0
  56. package/osintkit/output/json_writer.py +31 -0
  57. package/osintkit/output/md_writer.py +115 -0
  58. package/osintkit/output/templates/report.html +74 -0
  59. package/osintkit/profiles.py +116 -0
  60. package/osintkit/risk.py +42 -0
  61. package/osintkit/scanner.py +240 -0
  62. package/osintkit/setup.py +157 -0
  63. package/package.json +25 -0
  64. package/pyproject.toml +44 -0
  65. package/requirements.txt +9 -0
@@ -0,0 +1,48 @@
1
+ """Sherlock social profile enumeration module."""
2
+
3
+ import asyncio
4
+ import shutil
5
+ from typing import Any, Dict, List
6
+
7
+
8
+ async def run_sherlock(inputs: Dict[str, Any], timeout_seconds: int) -> List[Dict]:
9
+ """Run sherlock subprocess for username lookup.
10
+
11
+ Returns list of found social profiles across platforms.
12
+ Skips gracefully if sherlock is not installed.
13
+ """
14
+ username = inputs.get("username")
15
+ if not username:
16
+ return []
17
+
18
+ if not shutil.which("sherlock"):
19
+ return []
20
+
21
+ try:
22
+ proc = await asyncio.create_subprocess_exec(
23
+ "sherlock", username, "--print-found", "--timeout", "10",
24
+ stdout=asyncio.subprocess.PIPE,
25
+ stderr=asyncio.subprocess.PIPE,
26
+ )
27
+ stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=timeout_seconds + 15)
28
+
29
+ findings = []
30
+ for line in stdout.decode(errors="replace").splitlines():
31
+ line = line.strip()
32
+ if line.startswith("[+]"):
33
+ # Format: [+] Platform: https://...
34
+ parts = line[3:].strip().split(":", 1)
35
+ platform = parts[0].strip() if parts else "unknown"
36
+ url = parts[1].strip() if len(parts) > 1 else None
37
+ findings.append({
38
+ "source": "sherlock",
39
+ "type": "social_profile",
40
+ "data": {"platform": platform, "username": username},
41
+ "url": url,
42
+ })
43
+ return findings
44
+
45
+ except asyncio.TimeoutError:
46
+ return []
47
+ except Exception:
48
+ return []
@@ -0,0 +1,58 @@
1
+ """Social profile enumeration via Maigret."""
2
+
3
+ import asyncio
4
+ import shutil
5
+ import json
6
+ from typing import Any, Dict, List
7
+
8
+ from osintkit.modules import ModuleError
9
+
10
+
11
+ async def run_social_profiles(inputs: Dict[str, Any], timeout_seconds: int) -> List[Dict]:
12
+ """Run social profile enumeration using Maigret."""
13
+ username = inputs.get("username")
14
+ if not username:
15
+ return []
16
+
17
+ if not shutil.which("maigret"):
18
+ raise ModuleError("Maigret not installed. Install with: pip install maigret")
19
+
20
+ try:
21
+ import tempfile, shutil as _shutil
22
+ tmpdir = tempfile.mkdtemp(prefix="osintkit_maigret_")
23
+ proc = await asyncio.create_subprocess_exec(
24
+ "maigret", username,
25
+ "-J", "simple",
26
+ "--folderoutput", tmpdir,
27
+ "--timeout", str(timeout_seconds),
28
+ "--no-progressbar",
29
+ stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.PIPE,
30
+ )
31
+ try:
32
+ _, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout_seconds + 30)
33
+ except asyncio.TimeoutError:
34
+ proc.kill()
35
+ await proc.wait()
36
+ raise ModuleError("Maigret timed out")
37
+
38
+ findings = []
39
+ import os, pathlib
40
+ report_path = pathlib.Path(tmpdir) / f"report_{username}_simple.json"
41
+ if report_path.exists():
42
+ try:
43
+ raw = json.loads(report_path.read_text())
44
+ for site, data in raw.items():
45
+ if isinstance(data, dict) and data.get("status", {}).get("id") in ("claimed", "exists"):
46
+ findings.append({
47
+ "source": "maigret", "type": "social_profile",
48
+ "data": {"platform": site, "username": username},
49
+ "confidence": 0.9, "url": data.get("url_user")
50
+ })
51
+ except (json.JSONDecodeError, Exception):
52
+ pass
53
+ _shutil.rmtree(tmpdir, ignore_errors=True)
54
+ return findings
55
+ except asyncio.TimeoutError:
56
+ raise ModuleError(f"Maigret timed out")
57
+ except json.JSONDecodeError as e:
58
+ raise ModuleError(f"Parse error: {e}")
@@ -0,0 +1 @@
1
+ """Stage 2 OSINT modules — require API keys."""
@@ -0,0 +1,65 @@
1
+ """GitHub user lookup via GitHub API (Stage 2 — requires API token)."""
2
+
3
+ from typing import Dict, List
4
+
5
+ import httpx
6
+
7
+
8
+ async def run(inputs: dict, api_key: str) -> List[Dict]:
9
+ """Look up a GitHub user profile via the GitHub REST API.
10
+
11
+ Args:
12
+ inputs: dict with at least 'username' key (handle)
13
+ api_key: GitHub personal access token
14
+
15
+ Returns:
16
+ List with a single GitHub profile finding or [] if not found.
17
+
18
+ Raises:
19
+ Exception: on rate limiting (429) or invalid key (401/403).
20
+ """
21
+ username = inputs.get("username")
22
+ if not username:
23
+ return []
24
+
25
+ try:
26
+ async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=5.0)) as client:
27
+ response = await client.get(
28
+ f"https://api.github.com/users/{username}",
29
+ headers={
30
+ "Authorization": f"token {api_key}",
31
+ "Accept": "application/vnd.github.v3+json",
32
+ },
33
+ )
34
+
35
+ if response.status_code == 429:
36
+ raise Exception("429 rate limited")
37
+ if response.status_code in (401, 403):
38
+ raise Exception("401 invalid key")
39
+ if response.status_code == 404:
40
+ return []
41
+
42
+ data = response.json()
43
+
44
+ return [{
45
+ "source": "github_api",
46
+ "type": "social_profile",
47
+ "data": {
48
+ "login": data.get("login"),
49
+ "name": data.get("name"),
50
+ "bio": data.get("bio"),
51
+ "company": data.get("company"),
52
+ "location": data.get("location"),
53
+ "email": data.get("email"),
54
+ "public_repos": data.get("public_repos"),
55
+ "followers": data.get("followers"),
56
+ "following": data.get("following"),
57
+ "created_at": data.get("created_at"),
58
+ },
59
+ "url": data.get("html_url"),
60
+ }]
61
+
62
+ except Exception as e:
63
+ if "429" in str(e) or "401" in str(e):
64
+ raise
65
+ return []
@@ -0,0 +1,64 @@
1
+ """Hunter.io email verifier (Stage 2 — requires API key)."""
2
+
3
+ from typing import Dict, List
4
+
5
+ import httpx
6
+
7
+
8
+ async def run(inputs: dict, api_key: str) -> List[Dict]:
9
+ """Verify an email address via Hunter.io email-verifier endpoint.
10
+
11
+ Args:
12
+ inputs: dict with at least 'email' key
13
+ api_key: Hunter.io API key
14
+
15
+ Returns:
16
+ List with a single email verification finding or [] on no result.
17
+
18
+ Raises:
19
+ Exception: on rate limiting (429) or invalid key (401/403).
20
+ """
21
+ email = inputs.get("email")
22
+ if not email:
23
+ return []
24
+
25
+ try:
26
+ async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=5.0)) as client:
27
+ response = await client.get(
28
+ "https://api.hunter.io/v2/email-verifier",
29
+ params={"email": email, "api_key": api_key},
30
+ )
31
+
32
+ if response.status_code == 429:
33
+ raise Exception("429 rate limited")
34
+ if response.status_code in (401, 403):
35
+ raise Exception("401 invalid key")
36
+
37
+ data = response.json()
38
+ result = data.get("data", {})
39
+ if not result:
40
+ return []
41
+
42
+ return [{
43
+ "source": "hunter",
44
+ "type": "email_verification",
45
+ "data": {
46
+ "status": result.get("status"),
47
+ "score": result.get("score"),
48
+ "regexp": result.get("regexp"),
49
+ "gibberish": result.get("gibberish"),
50
+ "disposable": result.get("disposable"),
51
+ "webmail": result.get("webmail"),
52
+ "mx_records": result.get("mx_records"),
53
+ "smtp_server": result.get("smtp_server"),
54
+ "smtp_check": result.get("smtp_check"),
55
+ "accept_all": result.get("accept_all"),
56
+ "email": email,
57
+ },
58
+ "url": None,
59
+ }]
60
+
61
+ except Exception as e:
62
+ if "429" in str(e) or "401" in str(e):
63
+ raise
64
+ return []
@@ -0,0 +1,58 @@
1
+ """LeakCheck.io email breach lookup (Stage 2 — requires API key)."""
2
+
3
+ from typing import Dict, List
4
+
5
+ import httpx
6
+
7
+
8
+ async def run(inputs: dict, api_key: str) -> List[Dict]:
9
+ """Query LeakCheck.io for email breach records.
10
+
11
+ Args:
12
+ inputs: dict with at least 'email' key
13
+ api_key: LeakCheck API key
14
+
15
+ Returns:
16
+ List of breach finding dicts or [] on no results.
17
+
18
+ Raises:
19
+ Exception: on rate limiting (429) or invalid key (401/403).
20
+ """
21
+ email = inputs.get("email")
22
+ if not email:
23
+ return []
24
+
25
+ try:
26
+ async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=5.0)) as client:
27
+ response = await client.get(
28
+ "https://leakcheck.io/api/public",
29
+ params={"key": api_key, "check": email},
30
+ )
31
+
32
+ if response.status_code == 429:
33
+ raise Exception("429 rate limited")
34
+ if response.status_code in (401, 403):
35
+ raise Exception("401 invalid key")
36
+
37
+ data = response.json()
38
+ if not data.get("success") or not data.get("result"):
39
+ return []
40
+
41
+ findings = []
42
+ for record in data["result"]:
43
+ findings.append({
44
+ "source": "leakcheck",
45
+ "type": "breach_record",
46
+ "data": {
47
+ "source_name": record.get("source", {}).get("name", "unknown"),
48
+ "fields": record.get("fields", []),
49
+ "email": email,
50
+ },
51
+ "url": None,
52
+ })
53
+ return findings
54
+
55
+ except Exception as e:
56
+ if "429" in str(e) or "401" in str(e):
57
+ raise
58
+ return []
@@ -0,0 +1,62 @@
1
+ """NumVerify phone validation (Stage 2 — requires API key)."""
2
+
3
+ from typing import Dict, List
4
+
5
+ import httpx
6
+
7
+
8
+ async def run(inputs: dict, api_key: str) -> List[Dict]:
9
+ """Validate a phone number via the NumVerify/apilayer API.
10
+
11
+ Args:
12
+ inputs: dict with at least 'phone' key
13
+ api_key: NumVerify/apilayer access key
14
+
15
+ Returns:
16
+ List with a single phone validation finding or [] on no result.
17
+
18
+ Raises:
19
+ Exception: on rate limiting (429) or invalid key (401/403).
20
+ """
21
+ phone = inputs.get("phone")
22
+ if not phone:
23
+ return []
24
+
25
+ try:
26
+ async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=5.0)) as client:
27
+ response = await client.get(
28
+ "https://apilayer.net/api/validate",
29
+ params={"access_key": api_key, "number": phone},
30
+ )
31
+
32
+ if response.status_code == 429:
33
+ raise Exception("429 rate limited")
34
+ if response.status_code in (401, 403):
35
+ raise Exception("401 invalid key")
36
+
37
+ data = response.json()
38
+ if not data.get("valid") and data.get("error"):
39
+ return []
40
+
41
+ return [{
42
+ "source": "numverify",
43
+ "type": "phone_validation",
44
+ "data": {
45
+ "valid": data.get("valid"),
46
+ "number": data.get("number"),
47
+ "local_format": data.get("local_format"),
48
+ "international_format": data.get("international_format"),
49
+ "country_prefix": data.get("country_prefix"),
50
+ "country_code": data.get("country_code"),
51
+ "country_name": data.get("country_name"),
52
+ "location": data.get("location"),
53
+ "carrier": data.get("carrier"),
54
+ "line_type": data.get("line_type"),
55
+ },
56
+ "url": None,
57
+ }]
58
+
59
+ except Exception as e:
60
+ if "429" in str(e) or "401" in str(e):
61
+ raise
62
+ return []
@@ -0,0 +1,65 @@
1
+ """SecurityTrails subdomain enumeration (Stage 2 — requires API key)."""
2
+
3
+ from typing import Dict, List
4
+
5
+ import httpx
6
+
7
+
8
+ async def run(inputs: dict, api_key: str) -> List[Dict]:
9
+ """Enumerate subdomains for the target's email domain via SecurityTrails API.
10
+
11
+ Args:
12
+ inputs: dict with at least 'email' key (domain extracted) or 'domain' key
13
+ api_key: SecurityTrails API key
14
+
15
+ Returns:
16
+ List of subdomain findings or [] on no results.
17
+
18
+ Raises:
19
+ Exception: on rate limiting (429) or invalid key (401/403).
20
+ """
21
+ domain = inputs.get("domain")
22
+ if not domain:
23
+ email = inputs.get("email")
24
+ if email and "@" in email:
25
+ domain = email.split("@", 1)[1].strip()
26
+
27
+ if not domain:
28
+ return []
29
+
30
+ try:
31
+ async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=5.0)) as client:
32
+ response = await client.get(
33
+ f"https://api.securitytrails.com/v1/domain/{domain}/subdomains",
34
+ headers={"APIKEY": api_key},
35
+ )
36
+
37
+ if response.status_code == 429:
38
+ raise Exception("429 rate limited")
39
+ if response.status_code in (401, 403):
40
+ raise Exception("401 invalid key")
41
+
42
+ data = response.json()
43
+ subdomains = data.get("subdomains", [])
44
+ if not subdomains:
45
+ return []
46
+
47
+ findings = []
48
+ for sub in subdomains:
49
+ full = f"{sub}.{domain}"
50
+ findings.append({
51
+ "source": "securitytrails",
52
+ "type": "subdomain",
53
+ "data": {
54
+ "subdomain": sub,
55
+ "domain": domain,
56
+ "fqdn": full,
57
+ },
58
+ "url": f"https://{full}",
59
+ })
60
+ return findings
61
+
62
+ except Exception as e:
63
+ if "429" in str(e) or "401" in str(e):
64
+ raise
65
+ return []
@@ -0,0 +1,70 @@
1
+ """Wayback Machine CDX API lookup for email domain and username."""
2
+
3
+ from typing import Any, Dict, List
4
+
5
+ import httpx
6
+
7
+
8
+ async def run_wayback(inputs: Dict[str, Any]) -> List[Dict]:
9
+ """Query the Wayback CDX API for archived URLs related to target inputs.
10
+
11
+ Checks the email domain and any handles/usernames found in inputs.
12
+ Returns a list of archived URL findings.
13
+ """
14
+ targets = []
15
+
16
+ email = inputs.get("email")
17
+ if email and "@" in email:
18
+ domain = email.split("@", 1)[1].strip()
19
+ if domain:
20
+ targets.append(domain)
21
+
22
+ username = inputs.get("username")
23
+ if username:
24
+ targets.append(username)
25
+
26
+ if not targets:
27
+ return []
28
+
29
+ findings = []
30
+
31
+ async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=5.0)) as client:
32
+ for target in targets:
33
+ try:
34
+ params = {
35
+ "url": f"*.{target}",
36
+ "output": "json",
37
+ "limit": "5",
38
+ "fl": "original,timestamp",
39
+ }
40
+ response = await client.get(
41
+ "http://web.archive.org/cdx/search/cdx",
42
+ params=params,
43
+ )
44
+
45
+ if response.status_code != 200:
46
+ continue
47
+
48
+ rows = response.json()
49
+ # First row is header when output=json
50
+ if not rows or len(rows) < 2:
51
+ continue
52
+
53
+ for row in rows[1:]:
54
+ if len(row) >= 2:
55
+ archived_url, timestamp = row[0], row[1]
56
+ findings.append({
57
+ "source": "wayback",
58
+ "type": "web_archive",
59
+ "data": {
60
+ "url": archived_url,
61
+ "timestamp": timestamp,
62
+ "target": target,
63
+ },
64
+ "url": f"https://web.archive.org/web/{timestamp}/{archived_url}",
65
+ })
66
+
67
+ except Exception:
68
+ continue
69
+
70
+ return findings
@@ -0,0 +1 @@
1
+ """osintkit output writers."""
@@ -0,0 +1,36 @@
1
+ """HTML report writer using Jinja2."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any, Dict, Optional
5
+ from jinja2 import Environment, FileSystemLoader
6
+
7
+
8
+ def _scrub_keys(content: str, api_keys: Optional[Dict[str, str]] = None) -> str:
9
+ """Replace any API key values in content with [REDACTED]."""
10
+ if not api_keys:
11
+ return content
12
+ for _name, value in api_keys.items():
13
+ if value and len(value) > 6:
14
+ content = content.replace(value, "[REDACTED]")
15
+ return content
16
+
17
+
18
+ def write_html(
19
+ findings: Dict[str, Any],
20
+ output_dir: Path,
21
+ api_keys: Optional[Dict[str, str]] = None,
22
+ ) -> Path:
23
+ """Render HTML report via Jinja2 template.
24
+
25
+ API key values are scrubbed from the rendered output before writing.
26
+ """
27
+ templates_dir = Path(__file__).parent / "templates"
28
+ env = Environment(loader=FileSystemLoader(templates_dir))
29
+ template = env.get_template("report.html")
30
+
31
+ html_content = template.render(**findings)
32
+ html_content = _scrub_keys(html_content, api_keys)
33
+
34
+ output_file = output_dir / "report.html"
35
+ output_file.write_text(html_content, encoding="utf-8")
36
+ return output_file
@@ -0,0 +1,31 @@
1
+ """JSON output writer."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any, Dict, Optional
6
+
7
+
8
+ def _scrub_keys(content: str, api_keys: Optional[Dict[str, str]] = None) -> str:
9
+ """Replace any API key values in content with [REDACTED]."""
10
+ if not api_keys:
11
+ return content
12
+ for _name, value in api_keys.items():
13
+ if value and len(value) > 6:
14
+ content = content.replace(value, "[REDACTED]")
15
+ return content
16
+
17
+
18
+ def write_json(
19
+ findings: Dict[str, Any],
20
+ output_dir: Path,
21
+ api_keys: Optional[Dict[str, str]] = None,
22
+ ) -> Path:
23
+ """Write findings to JSON file with 2-space indent.
24
+
25
+ API key values are scrubbed from the output before writing.
26
+ """
27
+ output_file = output_dir / "findings.json"
28
+ content = json.dumps(findings, indent=2, default=str)
29
+ content = _scrub_keys(content, api_keys)
30
+ output_file.write_text(content, encoding="utf-8")
31
+ return output_file