phinet-secure 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Vasant Lohar
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software to deal in the Software without restriction.
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: phinet-secure
3
+ Version: 1.0.0
4
+ Summary: Advanced phishing email detection package using ML and AI
5
+ Author: Vasant Lohar
6
+ Requires-Python: >=3.8
7
+ License-File: LICENSE
8
+ Requires-Dist: pandas
9
+ Requires-Dist: numpy
10
+ Requires-Dist: scikit-learn
11
+ Requires-Dist: transformers
12
+ Requires-Dist: torch
13
+ Requires-Dist: joblib
14
+ Dynamic: author
15
+ Dynamic: license-file
16
+ Dynamic: requires-dist
17
+ Dynamic: requires-python
18
+ Dynamic: summary
@@ -0,0 +1,20 @@
1
+ # PHINet
2
+
3
+ PHINet is an advanced phishing email detection package using machine learning and AI.
4
+
5
+ Features
6
+
7
+ - Email feature analysis
8
+ - URL phishing detection
9
+ - Attachment risk detection
10
+ - NLP email analysis
11
+ - BERT deep learning model
12
+ - Ensemble ML model
13
+
14
+ Installation
15
+
16
+ pip install phinet
17
+
18
+ Usage
19
+
20
+ from phinet import PHINetBoost, PHINetFeatureEngine
@@ -0,0 +1,9 @@
1
+ from .model import PHINetBoost
2
+ from .feature_engine import PHINetFeatureEngine
3
+ from .bert_detector import BERTPhishingDetector
4
+
5
+ __all__ = [
6
+ "PHINetBoost",
7
+ "PHINetFeatureEngine",
8
+ "BERTPhishingDetector"
9
+ ]
@@ -0,0 +1,46 @@
1
+ from transformers import BertTokenizer, BertForSequenceClassification
2
+ import torch
3
+
4
+
5
+ class BERTPhishingDetector:
6
+
7
+ def __init__(self):
8
+
9
+ self.tokenizer = BertTokenizer.from_pretrained(
10
+ "bert-base-uncased"
11
+ )
12
+
13
+ self.model = BertForSequenceClassification.from_pretrained(
14
+ "bert-base-uncased",
15
+ num_labels=2
16
+ )
17
+
18
+ self.model.eval()
19
+
20
+
21
+ def preprocess(self, text):
22
+
23
+ inputs = self.tokenizer(
24
+ text,
25
+ return_tensors="pt",
26
+ truncation=True,
27
+ padding=True,
28
+ max_length=256
29
+ )
30
+
31
+ return inputs
32
+
33
+
34
+ def predict(self, text):
35
+
36
+ inputs = self.preprocess(text)
37
+
38
+ with torch.no_grad():
39
+
40
+ outputs = self.model(**inputs)
41
+
42
+ logits = outputs.logits
43
+
44
+ prediction = torch.argmax(logits, dim=1).item()
45
+
46
+ return prediction
@@ -0,0 +1,44 @@
1
+ import argparse
2
+ import pandas as pd
3
+
4
+ from phinet import PHINetBoost, PHINetFeatureEngine
5
+
6
+
7
+ def main():
8
+
9
+ parser = argparse.ArgumentParser()
10
+
11
+ parser.add_argument("--email_body", required=True)
12
+
13
+ parser.add_argument("--url", default="")
14
+
15
+ parser.add_argument("--attachment", default="")
16
+
17
+ parser.add_argument("--email_id", default="")
18
+
19
+ args = parser.parse_args()
20
+
21
+ df = pd.DataFrame([{
22
+ "email_id": args.email_id,
23
+ "email_body": args.email_body,
24
+ "urls": args.url,
25
+ "attachments": args.attachment
26
+ }])
27
+
28
+ engine = PHINetFeatureEngine()
29
+
30
+ X = engine.transform(df)
31
+
32
+ model = PHINetBoost()
33
+
34
+ model.fit(X, [1])
35
+
36
+ prediction = model.predict(X)
37
+
38
+ if prediction[0] == 1:
39
+
40
+ print("⚠ Phishing Email Detected")
41
+
42
+ else:
43
+
44
+ print("✓ Legitimate Email")
@@ -0,0 +1,48 @@
1
+ from .utils import count_suspicious_words, urgency_score
2
+
3
+
4
+ class EmailAnalyzer:
5
+
6
+ def body_length(self, text):
7
+
8
+ if not text:
9
+ return 0
10
+
11
+ return len(text)
12
+
13
+
14
+ def suspicious_word_count(self, text):
15
+
16
+ return count_suspicious_words(text)
17
+
18
+
19
+ def urgency_score(self, text):
20
+
21
+ return urgency_score(text)
22
+
23
+
24
+ def attachment_risk(self, attachments):
25
+
26
+ if not attachments:
27
+ return 0
28
+
29
+ risky = [".exe", ".zip", ".bat", ".scr"]
30
+
31
+ for ext in risky:
32
+
33
+ if ext in attachments.lower():
34
+
35
+ return 1
36
+
37
+ return 0
38
+
39
+
40
+ def extract_domain(self, email):
41
+
42
+ try:
43
+
44
+ return email.split("@")[1]
45
+
46
+ except:
47
+
48
+ return "unknown"
@@ -0,0 +1,84 @@
1
+ import pandas as pd
2
+
3
+ from sklearn.preprocessing import LabelEncoder
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+
6
+ from .url_analyzer import URLAnalyzer
7
+ from .email_analyzer import EmailAnalyzer
8
+
9
+
10
+ class PHINetFeatureEngine:
11
+
12
+ def __init__(self):
13
+
14
+ self.url_analyzer = URLAnalyzer()
15
+
16
+ self.email_analyzer = EmailAnalyzer()
17
+
18
+ self.domain_encoder = LabelEncoder()
19
+
20
+ self.vectorizer = TfidfVectorizer(
21
+ max_features=200,
22
+ stop_words="english"
23
+ )
24
+
25
+
26
+ def transform(self, df):
27
+
28
+ features = []
29
+
30
+ domains = []
31
+
32
+ for _, row in df.iterrows():
33
+
34
+ email_body = row.get("email_body", "")
35
+
36
+ urls = row.get("urls", "")
37
+
38
+ attachments = row.get("attachments", "")
39
+
40
+ email_id = row.get("email_id", "")
41
+
42
+ domain = self.email_analyzer.extract_domain(email_id)
43
+
44
+ domains.append(domain)
45
+
46
+ feature_row = {
47
+
48
+ "body_length": self.email_analyzer.body_length(email_body),
49
+
50
+ "suspicious_words": self.email_analyzer.suspicious_word_count(email_body),
51
+
52
+ "urgency_score": self.email_analyzer.urgency_score(email_body),
53
+
54
+ "url_count": self.url_analyzer.url_count(urls),
55
+
56
+ "https_count": self.url_analyzer.https_count(urls),
57
+
58
+ "url_suspicious": self.url_analyzer.suspicious_pattern_score(urls),
59
+
60
+ "has_ip": self.url_analyzer.has_ip_address(urls),
61
+
62
+ "attachment_risk": self.email_analyzer.attachment_risk(attachments)
63
+ }
64
+
65
+ features.append(feature_row)
66
+
67
+ feature_df = pd.DataFrame(features)
68
+
69
+ feature_df["sender_domain"] = self.domain_encoder.fit_transform(domains)
70
+
71
+ text_data = df["email_body"].fillna("")
72
+
73
+ tfidf_matrix = self.vectorizer.fit_transform(text_data)
74
+
75
+ tfidf_df = pd.DataFrame(
76
+
77
+ tfidf_matrix.toarray(),
78
+
79
+ columns=self.vectorizer.get_feature_names_out()
80
+ )
81
+
82
+ final_features = pd.concat([feature_df, tfidf_df], axis=1)
83
+
84
+ return final_features
@@ -0,0 +1,129 @@
1
+ import numpy as np
2
+ import joblib
3
+
4
+ from sklearn.ensemble import GradientBoostingClassifier
5
+ from sklearn.ensemble import RandomForestClassifier
6
+
7
+ from .bert_detector import BERTPhishingDetector
8
+
9
+
10
+ class PHINetBoost:
11
+
12
+ def __init__(self, n_estimators=150):
13
+
14
+ # Traditional ML models
15
+ self.model1 = GradientBoostingClassifier(
16
+ n_estimators=n_estimators
17
+ )
18
+
19
+ self.model2 = RandomForestClassifier(
20
+ n_estimators=n_estimators
21
+ )
22
+
23
+ # AI Deep Learning Model
24
+ self.bert = BERTPhishingDetector()
25
+
26
+ self.is_trained = False
27
+
28
+
29
+ # -----------------------------
30
+ # Train ML Models
31
+ # -----------------------------
32
+ def fit(self, X, y):
33
+
34
+ self.model1.fit(X, y)
35
+
36
+ self.model2.fit(X, y)
37
+
38
+ self.is_trained = True
39
+
40
+
41
+ # -----------------------------
42
+ # Basic Ensemble Prediction
43
+ # -----------------------------
44
+ def predict(self, X):
45
+
46
+ if not self.is_trained:
47
+
48
+ raise Exception("Model must be trained before prediction")
49
+
50
+ p1 = self.model1.predict(X)
51
+
52
+ p2 = self.model2.predict(X)
53
+
54
+ final_predictions = []
55
+
56
+ for i in range(len(p1)):
57
+
58
+ if p1[i] + p2[i] >= 1:
59
+
60
+ final_predictions.append(1)
61
+
62
+ else:
63
+
64
+ final_predictions.append(0)
65
+
66
+ return np.array(final_predictions)
67
+
68
+
69
+ # -----------------------------
70
+ # Prediction Probability
71
+ # -----------------------------
72
+ def predict_proba(self, X):
73
+
74
+ prob1 = self.model1.predict_proba(X)
75
+
76
+ prob2 = self.model2.predict_proba(X)
77
+
78
+ final_prob = (prob1 + prob2) / 2
79
+
80
+ return final_prob
81
+
82
+
83
+ # -----------------------------
84
+ # AI Enhanced Prediction
85
+ # -----------------------------
86
+ def predict_with_ai(self, X, email_text):
87
+
88
+ if not self.is_trained:
89
+
90
+ raise Exception("Model must be trained before prediction")
91
+
92
+ ml_prediction = self.predict(X)[0]
93
+
94
+ bert_prediction = self.bert.predict(email_text)
95
+
96
+ # Ensemble decision
97
+ if ml_prediction + bert_prediction >= 1:
98
+
99
+ return 1
100
+
101
+ return 0
102
+
103
+
104
+ # -----------------------------
105
+ # Save Model
106
+ # -----------------------------
107
+ def save(self, path="phinet_model.pkl"):
108
+
109
+ joblib.dump(
110
+ {
111
+ "model1": self.model1,
112
+ "model2": self.model2
113
+ },
114
+ path
115
+ )
116
+
117
+
118
+ # -----------------------------
119
+ # Load Model
120
+ # -----------------------------
121
+ def load(self, path="phinet_model.pkl"):
122
+
123
+ data = joblib.load(path)
124
+
125
+ self.model1 = data["model1"]
126
+
127
+ self.model2 = data["model2"]
128
+
129
+ self.is_trained = True
@@ -0,0 +1,29 @@
1
+ import pandas as pd
2
+
3
+ from phinet import PHINetBoost, PHINetFeatureEngine
4
+
5
+
6
+ def test_prediction():
7
+
8
+ df = pd.DataFrame([{
9
+
10
+ "email_id": "scammer@fakebank.com",
11
+
12
+ "email_body": "Urgent verify your bank account immediately",
13
+
14
+ "urls": "http://fakebank-login.com",
15
+
16
+ "attachments": "dangerous.zip"
17
+ }])
18
+
19
+ engine = PHINetFeatureEngine()
20
+
21
+ X = engine.transform(df)
22
+
23
+ model = PHINetBoost()
24
+
25
+ model.fit(X, [1])
26
+
27
+ prediction = model.predict(X)
28
+
29
+ assert prediction[0] == 1
@@ -0,0 +1,62 @@
1
+ from urllib.parse import urlparse
2
+ from .utils import contains_ip
3
+
4
+
5
+ class URLAnalyzer:
6
+
7
+ def url_count(self, urls):
8
+
9
+ if not urls:
10
+ return 0
11
+
12
+ return len(urls.split())
13
+
14
+
15
+ def https_count(self, urls):
16
+
17
+ if not urls:
18
+ return 0
19
+
20
+ return urls.count("https")
21
+
22
+
23
+ def suspicious_pattern_score(self, urls):
24
+
25
+ if not urls:
26
+ return 0
27
+
28
+ patterns = [
29
+ "login",
30
+ "verify",
31
+ "secure",
32
+ "update",
33
+ "bank"
34
+ ]
35
+
36
+ score = 0
37
+
38
+ for p in patterns:
39
+
40
+ if p in urls.lower():
41
+
42
+ score += 1
43
+
44
+ return score
45
+
46
+
47
+ def has_ip_address(self, urls):
48
+
49
+ return contains_ip(urls)
50
+
51
+
52
+ def extract_domain(self, url):
53
+
54
+ try:
55
+
56
+ parsed = urlparse(url)
57
+
58
+ return parsed.netloc
59
+
60
+ except:
61
+
62
+ return "unknown"
@@ -0,0 +1,41 @@
1
+ import re
2
+
3
+ SUSPICIOUS_WORDS = [
4
+ "login", "verify", "update", "secure",
5
+ "bank", "account", "password", "click", "confirm"
6
+ ]
7
+
8
+ URGENCY_WORDS = [
9
+ "urgent", "immediately", "action required",
10
+ "verify now", "suspend"
11
+ ]
12
+
13
+
14
+ def count_suspicious_words(text):
15
+
16
+ if not text:
17
+ return 0
18
+
19
+ text = text.lower()
20
+
21
+ return sum(word in text for word in SUSPICIOUS_WORDS)
22
+
23
+
24
+ def urgency_score(text):
25
+
26
+ if not text:
27
+ return 0
28
+
29
+ text = text.lower()
30
+
31
+ return sum(word in text for word in URGENCY_WORDS)
32
+
33
+
34
+ def contains_ip(url):
35
+
36
+ if not url:
37
+ return 0
38
+
39
+ pattern = r'\d+\.\d+\.\d+\.\d+'
40
+
41
+ return 1 if re.search(pattern, url) else 0
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: phinet-secure
3
+ Version: 1.0.0
4
+ Summary: Advanced phishing email detection package using ML and AI
5
+ Author: Vasant Lohar
6
+ Requires-Python: >=3.8
7
+ License-File: LICENSE
8
+ Requires-Dist: pandas
9
+ Requires-Dist: numpy
10
+ Requires-Dist: scikit-learn
11
+ Requires-Dist: transformers
12
+ Requires-Dist: torch
13
+ Requires-Dist: joblib
14
+ Dynamic: author
15
+ Dynamic: license-file
16
+ Dynamic: requires-dist
17
+ Dynamic: requires-python
18
+ Dynamic: summary
@@ -0,0 +1,19 @@
1
+ LICENSE
2
+ README.md
3
+ setup.py
4
+ phinet/__init__.py
5
+ phinet/bert_detector.py
6
+ phinet/cli.py
7
+ phinet/email_analyzer.py
8
+ phinet/feature_engine.py
9
+ phinet/model.py
10
+ phinet/test_model.py
11
+ phinet/url_analyzer.py
12
+ phinet/utils.py
13
+ phinet_secure.egg-info/PKG-INFO
14
+ phinet_secure.egg-info/SOURCES.txt
15
+ phinet_secure.egg-info/dependency_links.txt
16
+ phinet_secure.egg-info/entry_points.txt
17
+ phinet_secure.egg-info/requires.txt
18
+ phinet_secure.egg-info/top_level.txt
19
+ tests/test_model.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ phinet-scan = phinet.cli:main
@@ -0,0 +1,6 @@
1
+ pandas
2
+ numpy
3
+ scikit-learn
4
+ transformers
5
+ torch
6
+ joblib
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,23 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="phinet-secure",
5
+ version="1.0.0",
6
+ description="Advanced phishing email detection package using ML and AI",
7
+ author="Vasant Lohar",
8
+ packages=find_packages(),
9
+ install_requires=[
10
+ "pandas",
11
+ "numpy",
12
+ "scikit-learn",
13
+ "transformers",
14
+ "torch",
15
+ "joblib"
16
+ ],
17
+ entry_points={
18
+ "console_scripts": [
19
+ "phinet-scan=phinet.cli:main"
20
+ ]
21
+ },
22
+ python_requires=">=3.8",
23
+ )
@@ -0,0 +1,21 @@
1
+ import pandas as pd
2
+ from phinet import PHINetBoost, PHINetFeatureEngine
3
+
4
+ df = pd.DataFrame([{
5
+ "email_id": "scammer@fakebank.com",
6
+ "email_body": "Verify your account immediately",
7
+ "urls": "http://fakebank-login.com",
8
+ "attachments": "dangerous.zip"
9
+ }])
10
+
11
+ engine = PHINetFeatureEngine()
12
+
13
+ X = engine.transform(df)
14
+
15
+ model = PHINetBoost()
16
+
17
+ model.fit(X, [1])
18
+
19
+ prediction = model.predict(X)
20
+
21
+ print(prediction)