phinet-secure 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
phinet/__init__.py ADDED
@@ -0,0 +1,9 @@
1
+ from .model import PHINetBoost
2
+ from .feature_engine import PHINetFeatureEngine
3
+ from .bert_detector import BERTPhishingDetector
4
+
5
+ __all__ = [
6
+ "PHINetBoost",
7
+ "PHINetFeatureEngine",
8
+ "BERTPhishingDetector"
9
+ ]
@@ -0,0 +1,46 @@
1
+ from transformers import BertTokenizer, BertForSequenceClassification
2
+ import torch
3
+
4
+
5
+ class BERTPhishingDetector:
6
+
7
+ def __init__(self):
8
+
9
+ self.tokenizer = BertTokenizer.from_pretrained(
10
+ "bert-base-uncased"
11
+ )
12
+
13
+ self.model = BertForSequenceClassification.from_pretrained(
14
+ "bert-base-uncased",
15
+ num_labels=2
16
+ )
17
+
18
+ self.model.eval()
19
+
20
+
21
+ def preprocess(self, text):
22
+
23
+ inputs = self.tokenizer(
24
+ text,
25
+ return_tensors="pt",
26
+ truncation=True,
27
+ padding=True,
28
+ max_length=256
29
+ )
30
+
31
+ return inputs
32
+
33
+
34
+ def predict(self, text):
35
+
36
+ inputs = self.preprocess(text)
37
+
38
+ with torch.no_grad():
39
+
40
+ outputs = self.model(**inputs)
41
+
42
+ logits = outputs.logits
43
+
44
+ prediction = torch.argmax(logits, dim=1).item()
45
+
46
+ return prediction
phinet/cli.py ADDED
@@ -0,0 +1,44 @@
1
+ import argparse
2
+ import pandas as pd
3
+
4
+ from phinet import PHINetBoost, PHINetFeatureEngine
5
+
6
+
7
+ def main():
8
+
9
+ parser = argparse.ArgumentParser()
10
+
11
+ parser.add_argument("--email_body", required=True)
12
+
13
+ parser.add_argument("--url", default="")
14
+
15
+ parser.add_argument("--attachment", default="")
16
+
17
+ parser.add_argument("--email_id", default="")
18
+
19
+ args = parser.parse_args()
20
+
21
+ df = pd.DataFrame([{
22
+ "email_id": args.email_id,
23
+ "email_body": args.email_body,
24
+ "urls": args.url,
25
+ "attachments": args.attachment
26
+ }])
27
+
28
+ engine = PHINetFeatureEngine()
29
+
30
+ X = engine.transform(df)
31
+
32
+ model = PHINetBoost()
33
+
34
+ model.fit(X, [1])
35
+
36
+ prediction = model.predict(X)
37
+
38
+ if prediction[0] == 1:
39
+
40
+ print("⚠ Phishing Email Detected")
41
+
42
+ else:
43
+
44
+ print("✓ Legitimate Email")
@@ -0,0 +1,48 @@
1
+ from .utils import count_suspicious_words, urgency_score
2
+
3
+
4
+ class EmailAnalyzer:
5
+
6
+ def body_length(self, text):
7
+
8
+ if not text:
9
+ return 0
10
+
11
+ return len(text)
12
+
13
+
14
+ def suspicious_word_count(self, text):
15
+
16
+ return count_suspicious_words(text)
17
+
18
+
19
+ def urgency_score(self, text):
20
+
21
+ return urgency_score(text)
22
+
23
+
24
+ def attachment_risk(self, attachments):
25
+
26
+ if not attachments:
27
+ return 0
28
+
29
+ risky = [".exe", ".zip", ".bat", ".scr"]
30
+
31
+ for ext in risky:
32
+
33
+ if ext in attachments.lower():
34
+
35
+ return 1
36
+
37
+ return 0
38
+
39
+
40
+ def extract_domain(self, email):
41
+
42
+ try:
43
+
44
+ return email.split("@")[1]
45
+
46
+ except:
47
+
48
+ return "unknown"
@@ -0,0 +1,84 @@
1
+ import pandas as pd
2
+
3
+ from sklearn.preprocessing import LabelEncoder
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+
6
+ from .url_analyzer import URLAnalyzer
7
+ from .email_analyzer import EmailAnalyzer
8
+
9
+
10
+ class PHINetFeatureEngine:
11
+
12
+ def __init__(self):
13
+
14
+ self.url_analyzer = URLAnalyzer()
15
+
16
+ self.email_analyzer = EmailAnalyzer()
17
+
18
+ self.domain_encoder = LabelEncoder()
19
+
20
+ self.vectorizer = TfidfVectorizer(
21
+ max_features=200,
22
+ stop_words="english"
23
+ )
24
+
25
+
26
+ def transform(self, df):
27
+
28
+ features = []
29
+
30
+ domains = []
31
+
32
+ for _, row in df.iterrows():
33
+
34
+ email_body = row.get("email_body", "")
35
+
36
+ urls = row.get("urls", "")
37
+
38
+ attachments = row.get("attachments", "")
39
+
40
+ email_id = row.get("email_id", "")
41
+
42
+ domain = self.email_analyzer.extract_domain(email_id)
43
+
44
+ domains.append(domain)
45
+
46
+ feature_row = {
47
+
48
+ "body_length": self.email_analyzer.body_length(email_body),
49
+
50
+ "suspicious_words": self.email_analyzer.suspicious_word_count(email_body),
51
+
52
+ "urgency_score": self.email_analyzer.urgency_score(email_body),
53
+
54
+ "url_count": self.url_analyzer.url_count(urls),
55
+
56
+ "https_count": self.url_analyzer.https_count(urls),
57
+
58
+ "url_suspicious": self.url_analyzer.suspicious_pattern_score(urls),
59
+
60
+ "has_ip": self.url_analyzer.has_ip_address(urls),
61
+
62
+ "attachment_risk": self.email_analyzer.attachment_risk(attachments)
63
+ }
64
+
65
+ features.append(feature_row)
66
+
67
+ feature_df = pd.DataFrame(features)
68
+
69
+ feature_df["sender_domain"] = self.domain_encoder.fit_transform(domains)
70
+
71
+ text_data = df["email_body"].fillna("")
72
+
73
+ tfidf_matrix = self.vectorizer.fit_transform(text_data)
74
+
75
+ tfidf_df = pd.DataFrame(
76
+
77
+ tfidf_matrix.toarray(),
78
+
79
+ columns=self.vectorizer.get_feature_names_out()
80
+ )
81
+
82
+ final_features = pd.concat([feature_df, tfidf_df], axis=1)
83
+
84
+ return final_features
phinet/model.py ADDED
@@ -0,0 +1,129 @@
1
+ import numpy as np
2
+ import joblib
3
+
4
+ from sklearn.ensemble import GradientBoostingClassifier
5
+ from sklearn.ensemble import RandomForestClassifier
6
+
7
+ from .bert_detector import BERTPhishingDetector
8
+
9
+
10
+ class PHINetBoost:
11
+
12
+ def __init__(self, n_estimators=150):
13
+
14
+ # Traditional ML models
15
+ self.model1 = GradientBoostingClassifier(
16
+ n_estimators=n_estimators
17
+ )
18
+
19
+ self.model2 = RandomForestClassifier(
20
+ n_estimators=n_estimators
21
+ )
22
+
23
+ # AI Deep Learning Model
24
+ self.bert = BERTPhishingDetector()
25
+
26
+ self.is_trained = False
27
+
28
+
29
+ # -----------------------------
30
+ # Train ML Models
31
+ # -----------------------------
32
+ def fit(self, X, y):
33
+
34
+ self.model1.fit(X, y)
35
+
36
+ self.model2.fit(X, y)
37
+
38
+ self.is_trained = True
39
+
40
+
41
+ # -----------------------------
42
+ # Basic Ensemble Prediction
43
+ # -----------------------------
44
+ def predict(self, X):
45
+
46
+ if not self.is_trained:
47
+
48
+ raise Exception("Model must be trained before prediction")
49
+
50
+ p1 = self.model1.predict(X)
51
+
52
+ p2 = self.model2.predict(X)
53
+
54
+ final_predictions = []
55
+
56
+ for i in range(len(p1)):
57
+
58
+ if p1[i] + p2[i] >= 1:
59
+
60
+ final_predictions.append(1)
61
+
62
+ else:
63
+
64
+ final_predictions.append(0)
65
+
66
+ return np.array(final_predictions)
67
+
68
+
69
+ # -----------------------------
70
+ # Prediction Probability
71
+ # -----------------------------
72
+ def predict_proba(self, X):
73
+
74
+ prob1 = self.model1.predict_proba(X)
75
+
76
+ prob2 = self.model2.predict_proba(X)
77
+
78
+ final_prob = (prob1 + prob2) / 2
79
+
80
+ return final_prob
81
+
82
+
83
+ # -----------------------------
84
+ # AI Enhanced Prediction
85
+ # -----------------------------
86
+ def predict_with_ai(self, X, email_text):
87
+
88
+ if not self.is_trained:
89
+
90
+ raise Exception("Model must be trained before prediction")
91
+
92
+ ml_prediction = self.predict(X)[0]
93
+
94
+ bert_prediction = self.bert.predict(email_text)
95
+
96
+ # Ensemble decision
97
+ if ml_prediction + bert_prediction >= 1:
98
+
99
+ return 1
100
+
101
+ return 0
102
+
103
+
104
+ # -----------------------------
105
+ # Save Model
106
+ # -----------------------------
107
+ def save(self, path="phinet_model.pkl"):
108
+
109
+ joblib.dump(
110
+ {
111
+ "model1": self.model1,
112
+ "model2": self.model2
113
+ },
114
+ path
115
+ )
116
+
117
+
118
+ # -----------------------------
119
+ # Load Model
120
+ # -----------------------------
121
+ def load(self, path="phinet_model.pkl"):
122
+
123
+ data = joblib.load(path)
124
+
125
+ self.model1 = data["model1"]
126
+
127
+ self.model2 = data["model2"]
128
+
129
+ self.is_trained = True
phinet/test_model.py ADDED
@@ -0,0 +1,29 @@
1
+ import pandas as pd
2
+
3
+ from phinet import PHINetBoost, PHINetFeatureEngine
4
+
5
+
6
+ def test_prediction():
7
+
8
+ df = pd.DataFrame([{
9
+
10
+ "email_id": "scammer@fakebank.com",
11
+
12
+ "email_body": "Urgent verify your bank account immediately",
13
+
14
+ "urls": "http://fakebank-login.com",
15
+
16
+ "attachments": "dangerous.zip"
17
+ }])
18
+
19
+ engine = PHINetFeatureEngine()
20
+
21
+ X = engine.transform(df)
22
+
23
+ model = PHINetBoost()
24
+
25
+ model.fit(X, [1])
26
+
27
+ prediction = model.predict(X)
28
+
29
+ assert prediction[0] == 1
phinet/url_analyzer.py ADDED
@@ -0,0 +1,62 @@
1
+ from urllib.parse import urlparse
2
+ from .utils import contains_ip
3
+
4
+
5
+ class URLAnalyzer:
6
+
7
+ def url_count(self, urls):
8
+
9
+ if not urls:
10
+ return 0
11
+
12
+ return len(urls.split())
13
+
14
+
15
+ def https_count(self, urls):
16
+
17
+ if not urls:
18
+ return 0
19
+
20
+ return urls.count("https")
21
+
22
+
23
+ def suspicious_pattern_score(self, urls):
24
+
25
+ if not urls:
26
+ return 0
27
+
28
+ patterns = [
29
+ "login",
30
+ "verify",
31
+ "secure",
32
+ "update",
33
+ "bank"
34
+ ]
35
+
36
+ score = 0
37
+
38
+ for p in patterns:
39
+
40
+ if p in urls.lower():
41
+
42
+ score += 1
43
+
44
+ return score
45
+
46
+
47
+ def has_ip_address(self, urls):
48
+
49
+ return contains_ip(urls)
50
+
51
+
52
+ def extract_domain(self, url):
53
+
54
+ try:
55
+
56
+ parsed = urlparse(url)
57
+
58
+ return parsed.netloc
59
+
60
+ except:
61
+
62
+ return "unknown"
phinet/utils.py ADDED
@@ -0,0 +1,41 @@
1
+ import re
2
+
3
+ SUSPICIOUS_WORDS = [
4
+ "login", "verify", "update", "secure",
5
+ "bank", "account", "password", "click", "confirm"
6
+ ]
7
+
8
+ URGENCY_WORDS = [
9
+ "urgent", "immediately", "action required",
10
+ "verify now", "suspend"
11
+ ]
12
+
13
+
14
+ def count_suspicious_words(text):
15
+
16
+ if not text:
17
+ return 0
18
+
19
+ text = text.lower()
20
+
21
+ return sum(word in text for word in SUSPICIOUS_WORDS)
22
+
23
+
24
+ def urgency_score(text):
25
+
26
+ if not text:
27
+ return 0
28
+
29
+ text = text.lower()
30
+
31
+ return sum(word in text for word in URGENCY_WORDS)
32
+
33
+
34
+ def contains_ip(url):
35
+
36
+ if not url:
37
+ return 0
38
+
39
+ pattern = r'\d+\.\d+\.\d+\.\d+'
40
+
41
+ return 1 if re.search(pattern, url) else 0
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: phinet-secure
3
+ Version: 1.0.0
4
+ Summary: Advanced phishing email detection package using ML and AI
5
+ Author: Vasant Lohar
6
+ Requires-Python: >=3.8
7
+ License-File: LICENSE
8
+ Requires-Dist: pandas
9
+ Requires-Dist: numpy
10
+ Requires-Dist: scikit-learn
11
+ Requires-Dist: transformers
12
+ Requires-Dist: torch
13
+ Requires-Dist: joblib
14
+ Dynamic: author
15
+ Dynamic: license-file
16
+ Dynamic: requires-dist
17
+ Dynamic: requires-python
18
+ Dynamic: summary
@@ -0,0 +1,15 @@
1
+ phinet/__init__.py,sha256=maakAkMv-vS6DB-dNm2H-O4C8d4cdItpCJDGS6Ay-gc,222
2
+ phinet/bert_detector.py,sha256=2mJ9FZCY_jH0y933ij85ty07oVpbQwh6a4HwZAWKOso,942
3
+ phinet/cli.py,sha256=lWa7JFH7lzAHZxfdgDBrDo7f8HM2LGbOGndNsAeVwfM,871
4
+ phinet/email_analyzer.py,sha256=hN1cETYEJp9EFcGeD-UkMyw7ZZV-1fhYLAwGi5T6fqg,793
5
+ phinet/feature_engine.py,sha256=b7Ep4Tw3lH7Dzo2W1o5PorqvlpryJIKEkxLbtGJVIRk,2197
6
+ phinet/model.py,sha256=tVThhEY5aFM1jtoUccdHuGsM2lG3zwxMZxEPhUGZ6ak,2709
7
+ phinet/test_model.py,sha256=KuCHS7ZMuIoFi0zV5IyyrE8FGLmGSVw8VengdetfN_E,542
8
+ phinet/url_analyzer.py,sha256=4vg8f-lM-IUN3dsnnVrdTmfPljRwHRFENGyGGqcelu8,970
9
+ phinet/utils.py,sha256=WuspT1R1xSS7YsEFXdDuqqGZyIQ6_oY0iReVGhIuvy0,708
10
+ phinet_secure-1.0.0.dist-info/licenses/LICENSE,sha256=kzjkCfyFFOtP2UAq4WYs--vVYt_lIGW2kqAiQ8NSN_A,189
11
+ phinet_secure-1.0.0.dist-info/METADATA,sha256=5w4Q0WUaHNnjxJUI5tO7jgq88VZEk-2AbgybRtvDicE,453
12
+ phinet_secure-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
13
+ phinet_secure-1.0.0.dist-info/entry_points.txt,sha256=hBsAwe6WxuLsAW-LuIVYAXOSVH3PlWQisi6Z3IRLstA,48
14
+ phinet_secure-1.0.0.dist-info/top_level.txt,sha256=UOZ_EtqnEH5sL86cNZlrLLzDd92ysmHWCRyyJK4YclE,7
15
+ phinet_secure-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ phinet-scan = phinet.cli:main
@@ -0,0 +1,6 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Vasant Lohar
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software to deal in the Software without restriction.
@@ -0,0 +1 @@
1
+ phinet