emlpeek 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emlpeek-1.0.0/PKG-INFO +15 -0
- emlpeek-1.0.0/README.md +2 -0
- emlpeek-1.0.0/pyproject.toml +25 -0
- emlpeek-1.0.0/setup.cfg +4 -0
- emlpeek-1.0.0/src/emlpeek.egg-info/PKG-INFO +15 -0
- emlpeek-1.0.0/src/emlpeek.egg-info/SOURCES.txt +11 -0
- emlpeek-1.0.0/src/emlpeek.egg-info/dependency_links.txt +1 -0
- emlpeek-1.0.0/src/emlpeek.egg-info/top_level.txt +1 -0
- emlpeek-1.0.0/src/soc_email_investigator/__init__.py +0 -0
- emlpeek-1.0.0/src/soc_email_investigator/extract.py +125 -0
- emlpeek-1.0.0/src/soc_email_investigator/main.py +62 -0
- emlpeek-1.0.0/src/soc_email_investigator/report.py +59 -0
- emlpeek-1.0.0/src/soc_email_investigator/rules.py +63 -0
emlpeek-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: emlpeek
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Email investigation tool
|
|
5
|
+
Author-email: Your Name <you@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: email,investigation,forensics
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.9
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# emlpeek
|
|
15
|
+
Email investigation tool
|
emlpeek-1.0.0/README.md
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "emlpeek"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Email investigation tool"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Your Name", email = "you@example.com" }]
|
|
13
|
+
keywords = ["email", "investigation", "forensics"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[tool.setuptools]
|
|
21
|
+
package-dir = {"" = "src"}
|
|
22
|
+
|
|
23
|
+
[tool.setuptools.packages.find]
|
|
24
|
+
where = ["src"]
|
|
25
|
+
include = ["soc_email_investigator*"]
|
emlpeek-1.0.0/setup.cfg
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: emlpeek
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Email investigation tool
|
|
5
|
+
Author-email: Your Name <you@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: email,investigation,forensics
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.9
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# emlpeek
|
|
15
|
+
Email investigation tool
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/emlpeek.egg-info/PKG-INFO
|
|
4
|
+
src/emlpeek.egg-info/SOURCES.txt
|
|
5
|
+
src/emlpeek.egg-info/dependency_links.txt
|
|
6
|
+
src/emlpeek.egg-info/top_level.txt
|
|
7
|
+
src/soc_email_investigator/__init__.py
|
|
8
|
+
src/soc_email_investigator/extract.py
|
|
9
|
+
src/soc_email_investigator/main.py
|
|
10
|
+
src/soc_email_investigator/report.py
|
|
11
|
+
src/soc_email_investigator/rules.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
soc_email_investigator
|
|
File without changes
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from email import policy
|
|
3
|
+
from email.parser import BytesParser
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
URL_RE = re.compile(r"https?://[^\s\"'<>()]+", re.IGNORECASE)
|
|
8
|
+
EMAIL_LINE_RE = re.compile(r"^\s*([A-Z0-9._%+\-]+@[^>\s]+)\s*$", re.IGNORECASE)
|
|
9
|
+
|
|
10
|
+
def _extract_address_domain(address: str | None) -> str | None:
|
|
11
|
+
if not address:
|
|
12
|
+
return None
|
|
13
|
+
# Strip common forms like: "Name <user@domain.com>"
|
|
14
|
+
if "<" in address and ">" in address:
|
|
15
|
+
addr = address.split("<", 1)[1].split(">", 1)[0].strip()
|
|
16
|
+
else:
|
|
17
|
+
addr = address.strip()
|
|
18
|
+
if "@" not in addr:
|
|
19
|
+
return None
|
|
20
|
+
return addr.split("@", 1)[1].lower()
|
|
21
|
+
|
|
22
|
+
def _extract_urls(text: str) -> list[str]:
|
|
23
|
+
if not text:
|
|
24
|
+
return []
|
|
25
|
+
# Try common URL pattern
|
|
26
|
+
urls = URL_RE.findall(text)
|
|
27
|
+
# Cleanup trailing punctuation
|
|
28
|
+
cleaned = []
|
|
29
|
+
for u in urls:
|
|
30
|
+
cleaned.append(u.rstrip(").,;:!?\"'"))
|
|
31
|
+
return cleaned
|
|
32
|
+
|
|
33
|
+
def _url_host(url: str) -> str | None:
|
|
34
|
+
try:
|
|
35
|
+
parsed = urlparse(url)
|
|
36
|
+
host = parsed.hostname
|
|
37
|
+
if host:
|
|
38
|
+
return host
|
|
39
|
+
except Exception:
|
|
40
|
+
pass
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
def extract_from_eml(eml_path: Path) -> dict:
|
|
44
|
+
raw = eml_path.read_bytes()
|
|
45
|
+
|
|
46
|
+
msg = BytesParser(policy=policy.default).parsebytes(raw)
|
|
47
|
+
|
|
48
|
+
headers_text = ""
|
|
49
|
+
received_lines = []
|
|
50
|
+
for k, v in msg.raw_items():
|
|
51
|
+
# raw_items returns header fields as provided; we store full header text for "Received:"
|
|
52
|
+
if k is not None and k.lower() == "received":
|
|
53
|
+
received_lines.append(str(v).strip())
|
|
54
|
+
# For completeness, keep a header text dump too
|
|
55
|
+
headers_text += f"{k}: {v}\n" if k else f"{v}\n"
|
|
56
|
+
|
|
57
|
+
# Body extraction (best-effort)
|
|
58
|
+
body_parts = []
|
|
59
|
+
if msg.is_multipart():
|
|
60
|
+
for part in msg.walk():
|
|
61
|
+
ctype = part.get_content_type()
|
|
62
|
+
disp = str(part.get("Content-Disposition") or "")
|
|
63
|
+
if ctype in ("text/plain", "text/html") and "attachment" not in disp.lower():
|
|
64
|
+
try:
|
|
65
|
+
body_parts.append(part.get_content())
|
|
66
|
+
except Exception:
|
|
67
|
+
# fallback decode
|
|
68
|
+
payload = part.get_payload(decode=True) or b""
|
|
69
|
+
charset = part.get_content_charset() or "utf-8"
|
|
70
|
+
body_parts.append(payload.decode(charset, errors="replace"))
|
|
71
|
+
else:
|
|
72
|
+
try:
|
|
73
|
+
body_parts.append(msg.get_content())
|
|
74
|
+
except Exception:
|
|
75
|
+
payload = msg.get_payload(decode=True) or b""
|
|
76
|
+
charset = msg.get_content_charset() or "utf-8"
|
|
77
|
+
body_parts.append(payload.decode(charset, errors="replace"))
|
|
78
|
+
|
|
79
|
+
body_text = "\n".join([str(x) for x in body_parts if x is not None])
|
|
80
|
+
|
|
81
|
+
from_hdr = msg.get("From")
|
|
82
|
+
reply_to_hdr = msg.get("Reply-To")
|
|
83
|
+
to_hdr = msg.get("To")
|
|
84
|
+
subject = msg.get("Subject")
|
|
85
|
+
date = msg.get("Date")
|
|
86
|
+
authentication_results = msg.get("Authentication-Results")
|
|
87
|
+
|
|
88
|
+
from_domain = _extract_address_domain(from_hdr)
|
|
89
|
+
reply_to_domain = _extract_address_domain(reply_to_hdr)
|
|
90
|
+
|
|
91
|
+
urls = _extract_urls(body_text + "\n" + headers_text)
|
|
92
|
+
|
|
93
|
+
url_indicators = []
|
|
94
|
+
for u in urls:
|
|
95
|
+
host = _url_host(u)
|
|
96
|
+
host_type = "unknown"
|
|
97
|
+
if host:
|
|
98
|
+
if re.fullmatch(r"\d{1,3}(\.\d{1,3}){3}", host):
|
|
99
|
+
host_type = "ip_literal"
|
|
100
|
+
elif host.startswith("xn--"):
|
|
101
|
+
host_type = "punycode"
|
|
102
|
+
else:
|
|
103
|
+
host_type = "hostname"
|
|
104
|
+
url_indicators.append({
|
|
105
|
+
"url": u,
|
|
106
|
+
"host": host,
|
|
107
|
+
"host_type": host_type
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
indicators = {
|
|
111
|
+
"email": {
|
|
112
|
+
"from": from_hdr,
|
|
113
|
+
"from_domain": from_domain,
|
|
114
|
+
"reply_to": reply_to_hdr,
|
|
115
|
+
"reply_to_domain": reply_to_domain,
|
|
116
|
+
"to": to_hdr,
|
|
117
|
+
"subject": subject,
|
|
118
|
+
"date": date,
|
|
119
|
+
"authentication_results": authentication_results,
|
|
120
|
+
},
|
|
121
|
+
"received_lines": received_lines,
|
|
122
|
+
"urls": url_indicators
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return {"indicators": indicators}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
import json
|
|
5
|
+
import hashlib
|
|
6
|
+
|
|
7
|
+
from .extract import extract_from_eml
|
|
8
|
+
from .rules import run_rules
|
|
9
|
+
from .report import write_outputs
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
TOOL_VERSION = "v1.0"
|
|
13
|
+
TOOL_NAME = "emlpeek "
|
|
14
|
+
|
|
15
|
+
def sha256_file(p: Path) -> str:
|
|
16
|
+
h = hashlib.sha256()
|
|
17
|
+
with p.open("rb") as f:
|
|
18
|
+
for chunk in iter(lambda: f.read(1024 * 1024), b""):
|
|
19
|
+
h.update(chunk)
|
|
20
|
+
return h.hexdigest()
|
|
21
|
+
|
|
22
|
+
def main():
|
|
23
|
+
ap = argparse.ArgumentParser()
|
|
24
|
+
ap.add_argument("input_path")
|
|
25
|
+
ap.add_argument("--out", required=True)
|
|
26
|
+
args = ap.parse_args()
|
|
27
|
+
|
|
28
|
+
in_path = Path(args.input_path)
|
|
29
|
+
out_dir = Path(args.out)
|
|
30
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
31
|
+
|
|
32
|
+
eml_data = extract_from_eml(in_path)
|
|
33
|
+
signals = run_rules(eml_data)
|
|
34
|
+
|
|
35
|
+
evidence_manifest = {
|
|
36
|
+
"tool_name" :TOOL_NAME,
|
|
37
|
+
"tool_version": TOOL_VERSION,
|
|
38
|
+
"input": {
|
|
39
|
+
"path": str(in_path),
|
|
40
|
+
"filename": in_path.name,
|
|
41
|
+
"size_bytes": in_path.stat().st_size,
|
|
42
|
+
"sha256": sha256_file(in_path),
|
|
43
|
+
},
|
|
44
|
+
"run_utc": datetime.now(timezone.utc).isoformat()
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
write_outputs(
|
|
48
|
+
out_dir=out_dir,
|
|
49
|
+
indicators=eml_data["indicators"],
|
|
50
|
+
signals=signals,
|
|
51
|
+
verdict=signals["verdict"],
|
|
52
|
+
confidence=signals["confidence"],
|
|
53
|
+
report_context=signals["context"],
|
|
54
|
+
evidence_manifest=evidence_manifest,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if __name__ == "__main__":
|
|
58
|
+
main()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
|
|
4
|
+
def write_outputs(out_dir, indicators, signals, verdict, confidence, report_context, evidence_manifest):
|
|
5
|
+
# Save indicators
|
|
6
|
+
(out_dir / "indicators.json").write_text(
|
|
7
|
+
json.dumps(indicators, indent=2, ensure_ascii=False),
|
|
8
|
+
encoding="utf-8"
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
# Save signals
|
|
12
|
+
signals_out = {
|
|
13
|
+
"verdict": verdict,
|
|
14
|
+
"confidence": confidence,
|
|
15
|
+
"context": report_context
|
|
16
|
+
}
|
|
17
|
+
(out_dir / "signals.json").write_text(
|
|
18
|
+
json.dumps(signals_out, indent=2, ensure_ascii=False),
|
|
19
|
+
encoding="utf-8"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# Save evidence manifest
|
|
23
|
+
(out_dir / "evidence_manifest.json").write_text(
|
|
24
|
+
json.dumps(evidence_manifest, indent=2, ensure_ascii=False),
|
|
25
|
+
encoding="utf-8"
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Save report
|
|
29
|
+
run_utc = evidence_manifest.get("run_utc", "")
|
|
30
|
+
from_domain = indicators["email"].get("from_domain")
|
|
31
|
+
urls = indicators.get("urls", [])
|
|
32
|
+
|
|
33
|
+
lines = []
|
|
34
|
+
lines.append(f"# Email Investigation Report")
|
|
35
|
+
lines.append("")
|
|
36
|
+
lines.append(f"- **Verdict:** {verdict}")
|
|
37
|
+
lines.append(f"- **Confidence:** {confidence:.2f}")
|
|
38
|
+
if run_utc:
|
|
39
|
+
lines.append(f"- **Run (UTC):** {run_utc}")
|
|
40
|
+
lines.append("")
|
|
41
|
+
lines.append("## Key evidence")
|
|
42
|
+
lines.append(f"- From domain: {from_domain}")
|
|
43
|
+
lines.append(f"- URLs found: {len(urls)}")
|
|
44
|
+
if urls:
|
|
45
|
+
# list first few hosts/urls
|
|
46
|
+
for i, u in enumerate(urls[:10], start=1):
|
|
47
|
+
lines.append(f" {i}. {u.get('url')} (host: {u.get('host')}, type: {u.get('host_type')})")
|
|
48
|
+
lines.append("")
|
|
49
|
+
lines.append("## Why this verdict")
|
|
50
|
+
if report_context:
|
|
51
|
+
for c in report_context:
|
|
52
|
+
lines.append(f"- {c}")
|
|
53
|
+
else:
|
|
54
|
+
lines.append("- (no context recorded)")
|
|
55
|
+
lines.append("")
|
|
56
|
+
lines.append("## Notes")
|
|
57
|
+
lines.append("- This is a rule-based system for v1 (it does not guarantee guilt/innocence).")
|
|
58
|
+
|
|
59
|
+
(out_dir / "report.md").write_text("\n".join(lines), encoding="utf-8")
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
def run_rules(eml_data: dict) -> dict:
|
|
2
|
+
indicators = eml_data["indicators"]
|
|
3
|
+
email = indicators["email"]
|
|
4
|
+
|
|
5
|
+
urls = indicators.get("urls", [])
|
|
6
|
+
from_domain = email.get("from_domain")
|
|
7
|
+
reply_to_domain = email.get("reply_to_domain")
|
|
8
|
+
|
|
9
|
+
verdict = "uncertain"
|
|
10
|
+
confidence = 0.0
|
|
11
|
+
context = []
|
|
12
|
+
|
|
13
|
+
has_sender = bool(from_domain or reply_to_domain)
|
|
14
|
+
has_urls = len(urls) > 0
|
|
15
|
+
|
|
16
|
+
if not has_urls and not has_sender:
|
|
17
|
+
context.append("No URLs and no From/Reply-To domains found.")
|
|
18
|
+
return {"verdict": "uncertain", "confidence": 0.2, "context": context}
|
|
19
|
+
|
|
20
|
+
# choose base domain
|
|
21
|
+
base_domain = from_domain or reply_to_domain
|
|
22
|
+
|
|
23
|
+
# Rule 1: IP literal in URL => phishing
|
|
24
|
+
for u in urls:
|
|
25
|
+
if u.get("host_type") == "ip_literal":
|
|
26
|
+
verdict = "phishing"
|
|
27
|
+
confidence = 0.95
|
|
28
|
+
context.append(f"Found URL with IP-literal host: {u.get('host')}")
|
|
29
|
+
return {"verdict": verdict, "confidence": confidence, "context": context}
|
|
30
|
+
|
|
31
|
+
# Rule 2: suspicious host not related to sender domain (simple substring heuristic)
|
|
32
|
+
if has_urls and base_domain:
|
|
33
|
+
base = base_domain.lower()
|
|
34
|
+
for u in urls:
|
|
35
|
+
host = (u.get("host") or "").lower()
|
|
36
|
+
if not host:
|
|
37
|
+
continue
|
|
38
|
+
# if host doesn't share a substring with sender base domain, flag
|
|
39
|
+
if base not in host and host not in base:
|
|
40
|
+
verdict = "phishing"
|
|
41
|
+
confidence = 0.75
|
|
42
|
+
context.append(f"URL host {host} does not relate to sender domain {base}.")
|
|
43
|
+
break
|
|
44
|
+
|
|
45
|
+
# Rule 3: decide legit/uncertain if not phishing
|
|
46
|
+
if verdict != "phishing":
|
|
47
|
+
if has_urls and base_domain and len(context) == 0:
|
|
48
|
+
verdict = "legit"
|
|
49
|
+
confidence = 0.65
|
|
50
|
+
context.append("URLs found and no strong phishing indicators matched.")
|
|
51
|
+
else:
|
|
52
|
+
verdict = "uncertain"
|
|
53
|
+
confidence = 0.45
|
|
54
|
+
context.append("Not enough evidence to label as phishing/legit confidently.")
|
|
55
|
+
|
|
56
|
+
# Always record authentication-results presence (no guessing)
|
|
57
|
+
auth = email.get("authentication_results")
|
|
58
|
+
if auth:
|
|
59
|
+
context.append("Authentication-Results header is present (no pass/fail inferred).")
|
|
60
|
+
else:
|
|
61
|
+
context.append("Authentication-Results header not found (may reduce confidence).")
|
|
62
|
+
|
|
63
|
+
return {"verdict": verdict, "confidence": confidence, "context": context}
|