phinet-secure 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phinet_secure-1.0.0/LICENSE +6 -0
- phinet_secure-1.0.0/PKG-INFO +18 -0
- phinet_secure-1.0.0/README.md +20 -0
- phinet_secure-1.0.0/phinet/__init__.py +9 -0
- phinet_secure-1.0.0/phinet/bert_detector.py +46 -0
- phinet_secure-1.0.0/phinet/cli.py +44 -0
- phinet_secure-1.0.0/phinet/email_analyzer.py +48 -0
- phinet_secure-1.0.0/phinet/feature_engine.py +84 -0
- phinet_secure-1.0.0/phinet/model.py +129 -0
- phinet_secure-1.0.0/phinet/test_model.py +29 -0
- phinet_secure-1.0.0/phinet/url_analyzer.py +62 -0
- phinet_secure-1.0.0/phinet/utils.py +41 -0
- phinet_secure-1.0.0/phinet_secure.egg-info/PKG-INFO +18 -0
- phinet_secure-1.0.0/phinet_secure.egg-info/SOURCES.txt +19 -0
- phinet_secure-1.0.0/phinet_secure.egg-info/dependency_links.txt +1 -0
- phinet_secure-1.0.0/phinet_secure.egg-info/entry_points.txt +2 -0
- phinet_secure-1.0.0/phinet_secure.egg-info/requires.txt +6 -0
- phinet_secure-1.0.0/phinet_secure.egg-info/top_level.txt +1 -0
- phinet_secure-1.0.0/setup.cfg +4 -0
- phinet_secure-1.0.0/setup.py +23 -0
- phinet_secure-1.0.0/tests/test_model.py +21 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: phinet-secure
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Advanced phishing email detection package using ML and AI
|
|
5
|
+
Author: Vasant Lohar
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: pandas
|
|
9
|
+
Requires-Dist: numpy
|
|
10
|
+
Requires-Dist: scikit-learn
|
|
11
|
+
Requires-Dist: transformers
|
|
12
|
+
Requires-Dist: torch
|
|
13
|
+
Requires-Dist: joblib
|
|
14
|
+
Dynamic: author
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
Dynamic: requires-dist
|
|
17
|
+
Dynamic: requires-python
|
|
18
|
+
Dynamic: summary
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# PHINet
|
|
2
|
+
|
|
3
|
+
PHINet is an advanced phishing email detection package using machine learning and AI.
|
|
4
|
+
|
|
5
|
+
Features
|
|
6
|
+
|
|
7
|
+
- Email feature analysis
|
|
8
|
+
- URL phishing detection
|
|
9
|
+
- Attachment risk detection
|
|
10
|
+
- NLP email analysis
|
|
11
|
+
- BERT deep learning model
|
|
12
|
+
- Ensemble ML model
|
|
13
|
+
|
|
14
|
+
Installation
|
|
15
|
+
|
|
16
|
+
pip install phinet
|
|
17
|
+
|
|
18
|
+
Usage
|
|
19
|
+
|
|
20
|
+
from phinet import PHINetBoost, PHINetFeatureEngine
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from transformers import BertTokenizer, BertForSequenceClassification
|
|
2
|
+
import torch
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BERTPhishingDetector:
|
|
6
|
+
|
|
7
|
+
def __init__(self):
|
|
8
|
+
|
|
9
|
+
self.tokenizer = BertTokenizer.from_pretrained(
|
|
10
|
+
"bert-base-uncased"
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
self.model = BertForSequenceClassification.from_pretrained(
|
|
14
|
+
"bert-base-uncased",
|
|
15
|
+
num_labels=2
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
self.model.eval()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def preprocess(self, text):
|
|
22
|
+
|
|
23
|
+
inputs = self.tokenizer(
|
|
24
|
+
text,
|
|
25
|
+
return_tensors="pt",
|
|
26
|
+
truncation=True,
|
|
27
|
+
padding=True,
|
|
28
|
+
max_length=256
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
return inputs
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def predict(self, text):
|
|
35
|
+
|
|
36
|
+
inputs = self.preprocess(text)
|
|
37
|
+
|
|
38
|
+
with torch.no_grad():
|
|
39
|
+
|
|
40
|
+
outputs = self.model(**inputs)
|
|
41
|
+
|
|
42
|
+
logits = outputs.logits
|
|
43
|
+
|
|
44
|
+
prediction = torch.argmax(logits, dim=1).item()
|
|
45
|
+
|
|
46
|
+
return prediction
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
from phinet import PHINetBoost, PHINetFeatureEngine
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def main():
|
|
8
|
+
|
|
9
|
+
parser = argparse.ArgumentParser()
|
|
10
|
+
|
|
11
|
+
parser.add_argument("--email_body", required=True)
|
|
12
|
+
|
|
13
|
+
parser.add_argument("--url", default="")
|
|
14
|
+
|
|
15
|
+
parser.add_argument("--attachment", default="")
|
|
16
|
+
|
|
17
|
+
parser.add_argument("--email_id", default="")
|
|
18
|
+
|
|
19
|
+
args = parser.parse_args()
|
|
20
|
+
|
|
21
|
+
df = pd.DataFrame([{
|
|
22
|
+
"email_id": args.email_id,
|
|
23
|
+
"email_body": args.email_body,
|
|
24
|
+
"urls": args.url,
|
|
25
|
+
"attachments": args.attachment
|
|
26
|
+
}])
|
|
27
|
+
|
|
28
|
+
engine = PHINetFeatureEngine()
|
|
29
|
+
|
|
30
|
+
X = engine.transform(df)
|
|
31
|
+
|
|
32
|
+
model = PHINetBoost()
|
|
33
|
+
|
|
34
|
+
model.fit(X, [1])
|
|
35
|
+
|
|
36
|
+
prediction = model.predict(X)
|
|
37
|
+
|
|
38
|
+
if prediction[0] == 1:
|
|
39
|
+
|
|
40
|
+
print("⚠ Phishing Email Detected")
|
|
41
|
+
|
|
42
|
+
else:
|
|
43
|
+
|
|
44
|
+
print("✓ Legitimate Email")
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from .utils import count_suspicious_words, urgency_score
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class EmailAnalyzer:
|
|
5
|
+
|
|
6
|
+
def body_length(self, text):
|
|
7
|
+
|
|
8
|
+
if not text:
|
|
9
|
+
return 0
|
|
10
|
+
|
|
11
|
+
return len(text)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def suspicious_word_count(self, text):
|
|
15
|
+
|
|
16
|
+
return count_suspicious_words(text)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def urgency_score(self, text):
|
|
20
|
+
|
|
21
|
+
return urgency_score(text)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def attachment_risk(self, attachments):
|
|
25
|
+
|
|
26
|
+
if not attachments:
|
|
27
|
+
return 0
|
|
28
|
+
|
|
29
|
+
risky = [".exe", ".zip", ".bat", ".scr"]
|
|
30
|
+
|
|
31
|
+
for ext in risky:
|
|
32
|
+
|
|
33
|
+
if ext in attachments.lower():
|
|
34
|
+
|
|
35
|
+
return 1
|
|
36
|
+
|
|
37
|
+
return 0
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def extract_domain(self, email):
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
|
|
44
|
+
return email.split("@")[1]
|
|
45
|
+
|
|
46
|
+
except:
|
|
47
|
+
|
|
48
|
+
return "unknown"
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from sklearn.preprocessing import LabelEncoder
|
|
4
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
5
|
+
|
|
6
|
+
from .url_analyzer import URLAnalyzer
|
|
7
|
+
from .email_analyzer import EmailAnalyzer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PHINetFeatureEngine:
|
|
11
|
+
|
|
12
|
+
def __init__(self):
|
|
13
|
+
|
|
14
|
+
self.url_analyzer = URLAnalyzer()
|
|
15
|
+
|
|
16
|
+
self.email_analyzer = EmailAnalyzer()
|
|
17
|
+
|
|
18
|
+
self.domain_encoder = LabelEncoder()
|
|
19
|
+
|
|
20
|
+
self.vectorizer = TfidfVectorizer(
|
|
21
|
+
max_features=200,
|
|
22
|
+
stop_words="english"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def transform(self, df):
|
|
27
|
+
|
|
28
|
+
features = []
|
|
29
|
+
|
|
30
|
+
domains = []
|
|
31
|
+
|
|
32
|
+
for _, row in df.iterrows():
|
|
33
|
+
|
|
34
|
+
email_body = row.get("email_body", "")
|
|
35
|
+
|
|
36
|
+
urls = row.get("urls", "")
|
|
37
|
+
|
|
38
|
+
attachments = row.get("attachments", "")
|
|
39
|
+
|
|
40
|
+
email_id = row.get("email_id", "")
|
|
41
|
+
|
|
42
|
+
domain = self.email_analyzer.extract_domain(email_id)
|
|
43
|
+
|
|
44
|
+
domains.append(domain)
|
|
45
|
+
|
|
46
|
+
feature_row = {
|
|
47
|
+
|
|
48
|
+
"body_length": self.email_analyzer.body_length(email_body),
|
|
49
|
+
|
|
50
|
+
"suspicious_words": self.email_analyzer.suspicious_word_count(email_body),
|
|
51
|
+
|
|
52
|
+
"urgency_score": self.email_analyzer.urgency_score(email_body),
|
|
53
|
+
|
|
54
|
+
"url_count": self.url_analyzer.url_count(urls),
|
|
55
|
+
|
|
56
|
+
"https_count": self.url_analyzer.https_count(urls),
|
|
57
|
+
|
|
58
|
+
"url_suspicious": self.url_analyzer.suspicious_pattern_score(urls),
|
|
59
|
+
|
|
60
|
+
"has_ip": self.url_analyzer.has_ip_address(urls),
|
|
61
|
+
|
|
62
|
+
"attachment_risk": self.email_analyzer.attachment_risk(attachments)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
features.append(feature_row)
|
|
66
|
+
|
|
67
|
+
feature_df = pd.DataFrame(features)
|
|
68
|
+
|
|
69
|
+
feature_df["sender_domain"] = self.domain_encoder.fit_transform(domains)
|
|
70
|
+
|
|
71
|
+
text_data = df["email_body"].fillna("")
|
|
72
|
+
|
|
73
|
+
tfidf_matrix = self.vectorizer.fit_transform(text_data)
|
|
74
|
+
|
|
75
|
+
tfidf_df = pd.DataFrame(
|
|
76
|
+
|
|
77
|
+
tfidf_matrix.toarray(),
|
|
78
|
+
|
|
79
|
+
columns=self.vectorizer.get_feature_names_out()
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
final_features = pd.concat([feature_df, tfidf_df], axis=1)
|
|
83
|
+
|
|
84
|
+
return final_features
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import joblib
|
|
3
|
+
|
|
4
|
+
from sklearn.ensemble import GradientBoostingClassifier
|
|
5
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
6
|
+
|
|
7
|
+
from .bert_detector import BERTPhishingDetector
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PHINetBoost:
|
|
11
|
+
|
|
12
|
+
def __init__(self, n_estimators=150):
|
|
13
|
+
|
|
14
|
+
# Traditional ML models
|
|
15
|
+
self.model1 = GradientBoostingClassifier(
|
|
16
|
+
n_estimators=n_estimators
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
self.model2 = RandomForestClassifier(
|
|
20
|
+
n_estimators=n_estimators
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# AI Deep Learning Model
|
|
24
|
+
self.bert = BERTPhishingDetector()
|
|
25
|
+
|
|
26
|
+
self.is_trained = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# -----------------------------
|
|
30
|
+
# Train ML Models
|
|
31
|
+
# -----------------------------
|
|
32
|
+
def fit(self, X, y):
|
|
33
|
+
|
|
34
|
+
self.model1.fit(X, y)
|
|
35
|
+
|
|
36
|
+
self.model2.fit(X, y)
|
|
37
|
+
|
|
38
|
+
self.is_trained = True
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# -----------------------------
|
|
42
|
+
# Basic Ensemble Prediction
|
|
43
|
+
# -----------------------------
|
|
44
|
+
def predict(self, X):
|
|
45
|
+
|
|
46
|
+
if not self.is_trained:
|
|
47
|
+
|
|
48
|
+
raise Exception("Model must be trained before prediction")
|
|
49
|
+
|
|
50
|
+
p1 = self.model1.predict(X)
|
|
51
|
+
|
|
52
|
+
p2 = self.model2.predict(X)
|
|
53
|
+
|
|
54
|
+
final_predictions = []
|
|
55
|
+
|
|
56
|
+
for i in range(len(p1)):
|
|
57
|
+
|
|
58
|
+
if p1[i] + p2[i] >= 1:
|
|
59
|
+
|
|
60
|
+
final_predictions.append(1)
|
|
61
|
+
|
|
62
|
+
else:
|
|
63
|
+
|
|
64
|
+
final_predictions.append(0)
|
|
65
|
+
|
|
66
|
+
return np.array(final_predictions)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# -----------------------------
|
|
70
|
+
# Prediction Probability
|
|
71
|
+
# -----------------------------
|
|
72
|
+
def predict_proba(self, X):
|
|
73
|
+
|
|
74
|
+
prob1 = self.model1.predict_proba(X)
|
|
75
|
+
|
|
76
|
+
prob2 = self.model2.predict_proba(X)
|
|
77
|
+
|
|
78
|
+
final_prob = (prob1 + prob2) / 2
|
|
79
|
+
|
|
80
|
+
return final_prob
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# -----------------------------
|
|
84
|
+
# AI Enhanced Prediction
|
|
85
|
+
# -----------------------------
|
|
86
|
+
def predict_with_ai(self, X, email_text):
|
|
87
|
+
|
|
88
|
+
if not self.is_trained:
|
|
89
|
+
|
|
90
|
+
raise Exception("Model must be trained before prediction")
|
|
91
|
+
|
|
92
|
+
ml_prediction = self.predict(X)[0]
|
|
93
|
+
|
|
94
|
+
bert_prediction = self.bert.predict(email_text)
|
|
95
|
+
|
|
96
|
+
# Ensemble decision
|
|
97
|
+
if ml_prediction + bert_prediction >= 1:
|
|
98
|
+
|
|
99
|
+
return 1
|
|
100
|
+
|
|
101
|
+
return 0
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# -----------------------------
|
|
105
|
+
# Save Model
|
|
106
|
+
# -----------------------------
|
|
107
|
+
def save(self, path="phinet_model.pkl"):
|
|
108
|
+
|
|
109
|
+
joblib.dump(
|
|
110
|
+
{
|
|
111
|
+
"model1": self.model1,
|
|
112
|
+
"model2": self.model2
|
|
113
|
+
},
|
|
114
|
+
path
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# -----------------------------
|
|
119
|
+
# Load Model
|
|
120
|
+
# -----------------------------
|
|
121
|
+
def load(self, path="phinet_model.pkl"):
|
|
122
|
+
|
|
123
|
+
data = joblib.load(path)
|
|
124
|
+
|
|
125
|
+
self.model1 = data["model1"]
|
|
126
|
+
|
|
127
|
+
self.model2 = data["model2"]
|
|
128
|
+
|
|
129
|
+
self.is_trained = True
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from phinet import PHINetBoost, PHINetFeatureEngine
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_prediction():
|
|
7
|
+
|
|
8
|
+
df = pd.DataFrame([{
|
|
9
|
+
|
|
10
|
+
"email_id": "scammer@fakebank.com",
|
|
11
|
+
|
|
12
|
+
"email_body": "Urgent verify your bank account immediately",
|
|
13
|
+
|
|
14
|
+
"urls": "http://fakebank-login.com",
|
|
15
|
+
|
|
16
|
+
"attachments": "dangerous.zip"
|
|
17
|
+
}])
|
|
18
|
+
|
|
19
|
+
engine = PHINetFeatureEngine()
|
|
20
|
+
|
|
21
|
+
X = engine.transform(df)
|
|
22
|
+
|
|
23
|
+
model = PHINetBoost()
|
|
24
|
+
|
|
25
|
+
model.fit(X, [1])
|
|
26
|
+
|
|
27
|
+
prediction = model.predict(X)
|
|
28
|
+
|
|
29
|
+
assert prediction[0] == 1
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from urllib.parse import urlparse
|
|
2
|
+
from .utils import contains_ip
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class URLAnalyzer:
|
|
6
|
+
|
|
7
|
+
def url_count(self, urls):
|
|
8
|
+
|
|
9
|
+
if not urls:
|
|
10
|
+
return 0
|
|
11
|
+
|
|
12
|
+
return len(urls.split())
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def https_count(self, urls):
|
|
16
|
+
|
|
17
|
+
if not urls:
|
|
18
|
+
return 0
|
|
19
|
+
|
|
20
|
+
return urls.count("https")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def suspicious_pattern_score(self, urls):
|
|
24
|
+
|
|
25
|
+
if not urls:
|
|
26
|
+
return 0
|
|
27
|
+
|
|
28
|
+
patterns = [
|
|
29
|
+
"login",
|
|
30
|
+
"verify",
|
|
31
|
+
"secure",
|
|
32
|
+
"update",
|
|
33
|
+
"bank"
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
score = 0
|
|
37
|
+
|
|
38
|
+
for p in patterns:
|
|
39
|
+
|
|
40
|
+
if p in urls.lower():
|
|
41
|
+
|
|
42
|
+
score += 1
|
|
43
|
+
|
|
44
|
+
return score
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def has_ip_address(self, urls):
|
|
48
|
+
|
|
49
|
+
return contains_ip(urls)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def extract_domain(self, url):
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
|
|
56
|
+
parsed = urlparse(url)
|
|
57
|
+
|
|
58
|
+
return parsed.netloc
|
|
59
|
+
|
|
60
|
+
except:
|
|
61
|
+
|
|
62
|
+
return "unknown"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
SUSPICIOUS_WORDS = [
|
|
4
|
+
"login", "verify", "update", "secure",
|
|
5
|
+
"bank", "account", "password", "click", "confirm"
|
|
6
|
+
]
|
|
7
|
+
|
|
8
|
+
URGENCY_WORDS = [
|
|
9
|
+
"urgent", "immediately", "action required",
|
|
10
|
+
"verify now", "suspend"
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def count_suspicious_words(text):
|
|
15
|
+
|
|
16
|
+
if not text:
|
|
17
|
+
return 0
|
|
18
|
+
|
|
19
|
+
text = text.lower()
|
|
20
|
+
|
|
21
|
+
return sum(word in text for word in SUSPICIOUS_WORDS)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def urgency_score(text):
|
|
25
|
+
|
|
26
|
+
if not text:
|
|
27
|
+
return 0
|
|
28
|
+
|
|
29
|
+
text = text.lower()
|
|
30
|
+
|
|
31
|
+
return sum(word in text for word in URGENCY_WORDS)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def contains_ip(url):
|
|
35
|
+
|
|
36
|
+
if not url:
|
|
37
|
+
return 0
|
|
38
|
+
|
|
39
|
+
pattern = r'\d+\.\d+\.\d+\.\d+'
|
|
40
|
+
|
|
41
|
+
return 1 if re.search(pattern, url) else 0
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: phinet-secure
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Advanced phishing email detection package using ML and AI
|
|
5
|
+
Author: Vasant Lohar
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: pandas
|
|
9
|
+
Requires-Dist: numpy
|
|
10
|
+
Requires-Dist: scikit-learn
|
|
11
|
+
Requires-Dist: transformers
|
|
12
|
+
Requires-Dist: torch
|
|
13
|
+
Requires-Dist: joblib
|
|
14
|
+
Dynamic: author
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
Dynamic: requires-dist
|
|
17
|
+
Dynamic: requires-python
|
|
18
|
+
Dynamic: summary
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
setup.py
|
|
4
|
+
phinet/__init__.py
|
|
5
|
+
phinet/bert_detector.py
|
|
6
|
+
phinet/cli.py
|
|
7
|
+
phinet/email_analyzer.py
|
|
8
|
+
phinet/feature_engine.py
|
|
9
|
+
phinet/model.py
|
|
10
|
+
phinet/test_model.py
|
|
11
|
+
phinet/url_analyzer.py
|
|
12
|
+
phinet/utils.py
|
|
13
|
+
phinet_secure.egg-info/PKG-INFO
|
|
14
|
+
phinet_secure.egg-info/SOURCES.txt
|
|
15
|
+
phinet_secure.egg-info/dependency_links.txt
|
|
16
|
+
phinet_secure.egg-info/entry_points.txt
|
|
17
|
+
phinet_secure.egg-info/requires.txt
|
|
18
|
+
phinet_secure.egg-info/top_level.txt
|
|
19
|
+
tests/test_model.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
phinet
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="phinet-secure",
|
|
5
|
+
version="1.0.0",
|
|
6
|
+
description="Advanced phishing email detection package using ML and AI",
|
|
7
|
+
author="Vasant Lohar",
|
|
8
|
+
packages=find_packages(),
|
|
9
|
+
install_requires=[
|
|
10
|
+
"pandas",
|
|
11
|
+
"numpy",
|
|
12
|
+
"scikit-learn",
|
|
13
|
+
"transformers",
|
|
14
|
+
"torch",
|
|
15
|
+
"joblib"
|
|
16
|
+
],
|
|
17
|
+
entry_points={
|
|
18
|
+
"console_scripts": [
|
|
19
|
+
"phinet-scan=phinet.cli:main"
|
|
20
|
+
]
|
|
21
|
+
},
|
|
22
|
+
python_requires=">=3.8",
|
|
23
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from phinet import PHINetBoost, PHINetFeatureEngine
|
|
3
|
+
|
|
4
|
+
df = pd.DataFrame([{
|
|
5
|
+
"email_id": "scammer@fakebank.com",
|
|
6
|
+
"email_body": "Verify your account immediately",
|
|
7
|
+
"urls": "http://fakebank-login.com",
|
|
8
|
+
"attachments": "dangerous.zip"
|
|
9
|
+
}])
|
|
10
|
+
|
|
11
|
+
engine = PHINetFeatureEngine()
|
|
12
|
+
|
|
13
|
+
X = engine.transform(df)
|
|
14
|
+
|
|
15
|
+
model = PHINetBoost()
|
|
16
|
+
|
|
17
|
+
model.fit(X, [1])
|
|
18
|
+
|
|
19
|
+
prediction = model.predict(X)
|
|
20
|
+
|
|
21
|
+
print(prediction)
|