aiwaf 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiwaf might be problematic. Click here for more details.
- aiwaf/middleware.py +101 -32
- aiwaf/models.py +8 -0
- aiwaf/trainer.py +83 -55
- aiwaf-0.1.5.dist-info/METADATA +195 -0
- {aiwaf-0.1.2.dist-info → aiwaf-0.1.5.dist-info}/RECORD +8 -7
- aiwaf-0.1.5.dist-info/licenses/LICENSE +21 -0
- aiwaf-0.1.2.dist-info/METADATA +0 -187
- {aiwaf-0.1.2.dist-info → aiwaf-0.1.5.dist-info}/WHEEL +0 -0
- {aiwaf-0.1.2.dist-info → aiwaf-0.1.5.dist-info}/top_level.txt +0 -0
aiwaf/middleware.py
CHANGED
|
@@ -1,54 +1,104 @@
|
|
|
1
|
+
# aiwaf/middleware.py
|
|
2
|
+
|
|
1
3
|
import time
|
|
4
|
+
import re
|
|
5
|
+
import os
|
|
2
6
|
import numpy as np
|
|
3
7
|
import joblib
|
|
8
|
+
|
|
4
9
|
from collections import defaultdict
|
|
5
10
|
from django.utils.deprecation import MiddlewareMixin
|
|
6
11
|
from django.http import JsonResponse
|
|
7
12
|
from django.conf import settings
|
|
8
13
|
from django.core.cache import cache
|
|
9
|
-
from django.
|
|
14
|
+
from django.db.models import F
|
|
10
15
|
from django.apps import apps
|
|
11
|
-
from .
|
|
16
|
+
from django.urls import get_resolver
|
|
12
17
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
except AttributeError:
|
|
16
|
-
import importlib.resources
|
|
17
|
-
MODEL_PATH = importlib.resources.files("aiwaf").joinpath("resources/model.pkl")
|
|
18
|
+
from .blacklist_manager import BlacklistManager
|
|
19
|
+
from .models import DynamicKeyword
|
|
18
20
|
|
|
21
|
+
MODEL_PATH = getattr(
|
|
22
|
+
settings,
|
|
23
|
+
"AIWAF_MODEL_PATH",
|
|
24
|
+
os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
|
|
25
|
+
)
|
|
19
26
|
MODEL = joblib.load(MODEL_PATH)
|
|
20
27
|
|
|
28
|
+
STATIC_KW = getattr(
|
|
29
|
+
settings,
|
|
30
|
+
"AIWAF_MALICIOUS_KEYWORDS",
|
|
31
|
+
[
|
|
32
|
+
".php", "xmlrpc", "wp-", ".env", ".git", ".bak",
|
|
33
|
+
"conflg", "shell", "filemanager"
|
|
34
|
+
]
|
|
35
|
+
)
|
|
36
|
+
|
|
21
37
|
def get_ip(request):
|
|
22
38
|
xff = request.META.get("HTTP_X_FORWARDED_FOR")
|
|
23
39
|
if xff:
|
|
24
40
|
return xff.split(",")[0].strip()
|
|
25
41
|
return request.META.get("REMOTE_ADDR", "")
|
|
26
42
|
|
|
27
|
-
|
|
28
|
-
class IPBlockMiddleware:
|
|
43
|
+
class IPAndKeywordBlockMiddleware:
|
|
29
44
|
def __init__(self, get_response):
|
|
30
45
|
self.get_response = get_response
|
|
46
|
+
self.url_patterns = self._collect_view_paths()
|
|
47
|
+
|
|
48
|
+
def _collect_view_paths(self):
|
|
49
|
+
resolver = get_resolver()
|
|
50
|
+
patterns = set()
|
|
51
|
+
|
|
52
|
+
def extract(patterns_list, prefix=""):
|
|
53
|
+
for p in patterns_list:
|
|
54
|
+
if hasattr(p, "url_patterns"):
|
|
55
|
+
extract(p.url_patterns, prefix + str(p.pattern))
|
|
56
|
+
else:
|
|
57
|
+
pat = (prefix + str(p.pattern)).strip("^$")
|
|
58
|
+
patterns.add(pat)
|
|
59
|
+
extract(resolver.url_patterns)
|
|
60
|
+
return patterns
|
|
31
61
|
|
|
32
62
|
def __call__(self, request):
|
|
33
63
|
ip = get_ip(request)
|
|
64
|
+
path = request.path.lower()
|
|
34
65
|
if BlacklistManager.is_blocked(ip):
|
|
35
66
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
67
|
+
segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
|
|
68
|
+
for seg in segments:
|
|
69
|
+
obj, _ = DynamicKeyword.objects.get_or_create(keyword=seg)
|
|
70
|
+
DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
|
|
71
|
+
dynamic_top = list(
|
|
72
|
+
DynamicKeyword.objects
|
|
73
|
+
.order_by("-count")
|
|
74
|
+
.values_list("keyword", flat=True)[: getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)]
|
|
75
|
+
)
|
|
76
|
+
all_kw = set(STATIC_KW) | set(dynamic_top)
|
|
77
|
+
safe_kw = {kw for kw in all_kw if any(kw in pat for pat in self.url_patterns)}
|
|
78
|
+
suspicious_kw = all_kw - safe_kw
|
|
79
|
+
for seg in segments:
|
|
80
|
+
if seg in suspicious_kw:
|
|
81
|
+
BlacklistManager.block(ip, f"Keyword block: {seg}")
|
|
82
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
|
36
83
|
return self.get_response(request)
|
|
37
84
|
|
|
38
85
|
|
|
39
86
|
class RateLimitMiddleware:
|
|
40
|
-
WINDOW =
|
|
41
|
-
MAX
|
|
42
|
-
FLOOD
|
|
87
|
+
WINDOW = 10
|
|
88
|
+
MAX = 20
|
|
89
|
+
FLOOD = 10
|
|
90
|
+
|
|
43
91
|
def __init__(self, get_response):
|
|
44
92
|
self.get_response = get_response
|
|
45
93
|
self.logs = defaultdict(list)
|
|
94
|
+
|
|
46
95
|
def __call__(self, request):
|
|
47
|
-
ip
|
|
96
|
+
ip = get_ip(request)
|
|
48
97
|
now = time.time()
|
|
49
98
|
recs = [t for t in self.logs[ip] if now - t < self.WINDOW]
|
|
50
99
|
recs.append(now)
|
|
51
100
|
self.logs[ip] = recs
|
|
101
|
+
|
|
52
102
|
if len(recs) > self.MAX:
|
|
53
103
|
return JsonResponse({"error": "too_many_requests"}, status=429)
|
|
54
104
|
if len(recs) > self.FLOOD:
|
|
@@ -59,40 +109,57 @@ class RateLimitMiddleware:
|
|
|
59
109
|
|
|
60
110
|
|
|
61
111
|
class AIAnomalyMiddleware(MiddlewareMixin):
|
|
62
|
-
|
|
112
|
+
WINDOW = getattr(settings, "AIWAF_WINDOW_SECONDS", 60)
|
|
113
|
+
TOP_N = getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)
|
|
114
|
+
|
|
63
115
|
def process_request(self, request):
|
|
64
116
|
ip = get_ip(request)
|
|
65
117
|
if BlacklistManager.is_blocked(ip):
|
|
66
118
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
119
|
+
|
|
67
120
|
now = time.time()
|
|
68
121
|
key = f"aiwaf:{ip}"
|
|
69
122
|
data = cache.get(key, [])
|
|
123
|
+
# TODO: you may want to capture real status & response_time in process_response
|
|
70
124
|
data.append((now, request.path, 0, 0.0))
|
|
71
|
-
data = [d for d in data if now - d[0] < self.
|
|
72
|
-
cache.set(key, data, timeout=self.
|
|
125
|
+
data = [d for d in data if now - d[0] < self.WINDOW]
|
|
126
|
+
cache.set(key, data, timeout=self.WINDOW)
|
|
127
|
+
|
|
128
|
+
# update dynamic‐keyword counts
|
|
129
|
+
for seg in re.split(r"\W+", request.path.lower()):
|
|
130
|
+
if len(seg) > 3:
|
|
131
|
+
obj, _ = DynamicKeyword.objects.get_or_create(keyword=seg)
|
|
132
|
+
DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
|
|
133
|
+
|
|
73
134
|
if len(data) < 5:
|
|
74
135
|
return None
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
136
|
+
|
|
137
|
+
# pull top‐N dynamic tokens
|
|
138
|
+
top_dynamic = list(
|
|
139
|
+
DynamicKeyword.objects
|
|
140
|
+
.order_by("-count")
|
|
141
|
+
.values_list("keyword", flat=True)[: self.TOP_N]
|
|
80
142
|
)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
143
|
+
ALL_KW = set(STATIC_KW) | set(top_dynamic)
|
|
144
|
+
|
|
145
|
+
total = len(data)
|
|
146
|
+
ratio404 = sum(1 for (_, _, st, _) in data if st == 404) / total
|
|
147
|
+
hits = sum(any(kw in path.lower() for kw in ALL_KW) for (_, path, _, _) in data)
|
|
148
|
+
avg_rt = np.mean([rt for (_, _, _, rt) in data]) if data else 0.0
|
|
149
|
+
ivs = [data[i][0] - data[i - 1][0] for i in range(1, total)]
|
|
150
|
+
avg_iv = np.mean(ivs) if ivs else 0.0
|
|
151
|
+
|
|
152
|
+
X = np.array([[total, ratio404, hits, avg_rt, avg_iv]], dtype=float)
|
|
87
153
|
if MODEL.predict(X)[0] == -1:
|
|
88
154
|
BlacklistManager.block(ip, "AI anomaly")
|
|
89
155
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
156
|
+
|
|
90
157
|
return None
|
|
91
158
|
|
|
92
159
|
|
|
93
160
|
class HoneypotMiddleware(MiddlewareMixin):
|
|
94
161
|
def process_view(self, request, view_func, view_args, view_kwargs):
|
|
95
|
-
trap = request.POST.get(settings
|
|
162
|
+
trap = request.POST.get(getattr(settings, "AIWAF_HONEYPOT_FIELD", "hp_field"), "")
|
|
96
163
|
if trap:
|
|
97
164
|
ip = get_ip(request)
|
|
98
165
|
BlacklistManager.block(ip, "HONEYPOT triggered")
|
|
@@ -105,11 +172,13 @@ class UUIDTamperMiddleware(MiddlewareMixin):
|
|
|
105
172
|
uid = view_kwargs.get("uuid")
|
|
106
173
|
if not uid:
|
|
107
174
|
return None
|
|
175
|
+
|
|
108
176
|
ip = get_ip(request)
|
|
109
|
-
app_label =
|
|
110
|
-
|
|
111
|
-
for Model in
|
|
177
|
+
app_label = view_func.__module__.split(".")[0]
|
|
178
|
+
app_cfg = apps.get_app_config(app_label)
|
|
179
|
+
for Model in app_cfg.get_models():
|
|
112
180
|
if Model.objects.filter(pk=uid).exists():
|
|
113
181
|
return None
|
|
182
|
+
|
|
114
183
|
BlacklistManager.block(ip, "UUID tampering")
|
|
115
|
-
return JsonResponse({"error": "blocked"}, status=403)
|
|
184
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
aiwaf/models.py
CHANGED
|
@@ -26,3 +26,11 @@ class BlacklistEntry(models.Model):
|
|
|
26
26
|
|
|
27
27
|
def __str__(self):
|
|
28
28
|
return f"{self.ip_address} ({self.reason})"
|
|
29
|
+
|
|
30
|
+
class DynamicKeyword(models.Model):
|
|
31
|
+
keyword = models.CharField(max_length=100, unique=True)
|
|
32
|
+
count = models.PositiveIntegerField(default=0)
|
|
33
|
+
last_updated = models.DateTimeField(auto_now=True)
|
|
34
|
+
|
|
35
|
+
class Meta:
|
|
36
|
+
ordering = ['-count']
|
aiwaf/trainer.py
CHANGED
|
@@ -1,44 +1,48 @@
|
|
|
1
|
-
# aiwaf/trainer.py
|
|
2
|
-
|
|
3
1
|
import os
|
|
4
2
|
import glob
|
|
5
3
|
import gzip
|
|
6
4
|
import re
|
|
7
|
-
import joblib
|
|
8
5
|
from datetime import datetime
|
|
9
|
-
from collections import defaultdict
|
|
10
|
-
|
|
6
|
+
from collections import defaultdict, Counter
|
|
7
|
+
|
|
11
8
|
import pandas as pd
|
|
12
9
|
from sklearn.ensemble import IsolationForest
|
|
10
|
+
import joblib
|
|
11
|
+
|
|
13
12
|
from django.conf import settings
|
|
14
13
|
from django.apps import apps
|
|
14
|
+
from django.db.models import F
|
|
15
|
+
|
|
16
|
+
LOG_PATH = settings.AIWAF_ACCESS_LOG
|
|
17
|
+
MODEL_PATH = os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
|
|
18
|
+
|
|
19
|
+
STATIC_KW = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
|
|
20
|
+
STATUS_IDX = ["200", "403", "404", "500"]
|
|
15
21
|
|
|
16
|
-
LOG_PATH = settings.AIWAF_ACCESS_LOG
|
|
17
|
-
MODEL_PATH = os.path.join(
|
|
18
|
-
os.path.dirname(__file__),
|
|
19
|
-
"resources",
|
|
20
|
-
"model.pkl"
|
|
21
|
-
)
|
|
22
|
-
MALICIOUS_KEYWORDS = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
|
|
23
|
-
STATUS_CODES = ["200", "403", "404", "500"]
|
|
24
22
|
_LOG_RX = re.compile(
|
|
25
|
-
r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?"
|
|
23
|
+
r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" '
|
|
24
|
+
r'(\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
|
|
26
25
|
)
|
|
27
|
-
|
|
26
|
+
|
|
27
|
+
BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
|
|
28
|
+
DynamicKeyword = apps.get_model("aiwaf", "DynamicKeyword")
|
|
29
|
+
|
|
30
|
+
|
|
28
31
|
def _read_all_logs():
|
|
29
32
|
lines = []
|
|
30
33
|
if LOG_PATH and os.path.exists(LOG_PATH):
|
|
31
34
|
with open(LOG_PATH, "r", errors="ignore") as f:
|
|
32
|
-
lines
|
|
33
|
-
for path in sorted(glob.glob(LOG_PATH
|
|
35
|
+
lines.extend(f.readlines())
|
|
36
|
+
for path in sorted(glob.glob(f"{LOG_PATH}.*")):
|
|
34
37
|
opener = gzip.open if path.endswith(".gz") else open
|
|
35
38
|
try:
|
|
36
39
|
with opener(path, "rt", errors="ignore") as f:
|
|
37
|
-
lines
|
|
40
|
+
lines.extend(f.readlines())
|
|
38
41
|
except OSError:
|
|
39
42
|
continue
|
|
40
43
|
return lines
|
|
41
44
|
|
|
45
|
+
|
|
42
46
|
def _parse(line):
|
|
43
47
|
m = _LOG_RX.search(line)
|
|
44
48
|
if not m:
|
|
@@ -59,14 +63,14 @@ def _parse(line):
|
|
|
59
63
|
|
|
60
64
|
|
|
61
65
|
def train():
|
|
62
|
-
|
|
63
|
-
if not
|
|
64
|
-
print("No log lines found – check AIWAF_ACCESS_LOG")
|
|
66
|
+
raw_lines = _read_all_logs()
|
|
67
|
+
if not raw_lines:
|
|
68
|
+
print(" No log lines found – check AIWAF_ACCESS_LOG setting.")
|
|
65
69
|
return
|
|
66
70
|
parsed = []
|
|
67
|
-
ip_404
|
|
71
|
+
ip_404 = defaultdict(int)
|
|
68
72
|
ip_times = defaultdict(list)
|
|
69
|
-
for ln in
|
|
73
|
+
for ln in raw_lines:
|
|
70
74
|
rec = _parse(ln)
|
|
71
75
|
if not rec:
|
|
72
76
|
continue
|
|
@@ -74,7 +78,7 @@ def train():
|
|
|
74
78
|
ip_times[rec["ip"]].append(rec["timestamp"])
|
|
75
79
|
if rec["status"] == "404":
|
|
76
80
|
ip_404[rec["ip"]] += 1
|
|
77
|
-
|
|
81
|
+
blocked_404 = []
|
|
78
82
|
for ip, count in ip_404.items():
|
|
79
83
|
if count >= 6:
|
|
80
84
|
obj, created = BlacklistEntry.objects.get_or_create(
|
|
@@ -82,42 +86,66 @@ def train():
|
|
|
82
86
|
defaults={"reason": "Excessive 404s (≥6)"}
|
|
83
87
|
)
|
|
84
88
|
if created:
|
|
85
|
-
|
|
86
|
-
if
|
|
87
|
-
print(f"
|
|
88
|
-
|
|
89
|
+
blocked_404.append(ip)
|
|
90
|
+
if blocked_404:
|
|
91
|
+
print(f"Blocked {len(blocked_404)} IPs for 404 flood: {blocked_404}")
|
|
92
|
+
feature_dicts = []
|
|
89
93
|
for r in parsed:
|
|
90
|
-
ip
|
|
91
|
-
burst
|
|
94
|
+
ip = r["ip"]
|
|
95
|
+
burst = sum(
|
|
92
96
|
1 for t in ip_times[ip]
|
|
93
97
|
if (r["timestamp"] - t).total_seconds() <= 10
|
|
94
98
|
)
|
|
95
|
-
total404
|
|
96
|
-
kw_hits
|
|
97
|
-
status_idx =
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
99
|
+
total404 = ip_404[ip]
|
|
100
|
+
kw_hits = sum(k in r["path"].lower() for k in STATIC_KW)
|
|
101
|
+
status_idx = STATUS_IDX.index(r["status"]) if r["status"] in STATUS_IDX else -1
|
|
102
|
+
feature_dicts.append({
|
|
103
|
+
"ip": ip,
|
|
104
|
+
"path_len": len(r["path"]),
|
|
105
|
+
"kw_hits": kw_hits,
|
|
106
|
+
"resp_time": r["response_time"],
|
|
107
|
+
"status_idx": status_idx,
|
|
108
|
+
"burst_count": burst,
|
|
109
|
+
"total_404": total404,
|
|
110
|
+
})
|
|
111
|
+
|
|
112
|
+
if not feature_dicts:
|
|
113
|
+
print("⚠️ Nothing to train on – no valid log entries.")
|
|
109
114
|
return
|
|
110
115
|
|
|
111
|
-
df = pd.DataFrame(
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
]
|
|
117
|
-
).fillna(0).astype(float)
|
|
118
|
-
clf = IsolationForest(contamination=0.01, random_state=42)
|
|
119
|
-
clf.fit(df.values)
|
|
116
|
+
df = pd.DataFrame(feature_dicts)
|
|
117
|
+
feature_cols = [c for c in df.columns if c != "ip"]
|
|
118
|
+
X = df[feature_cols].astype(float).values
|
|
119
|
+
model = IsolationForest(contamination=0.01, random_state=42)
|
|
120
|
+
model.fit(X)
|
|
120
121
|
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
|
|
121
|
-
joblib.dump(
|
|
122
|
-
print(f"Model trained on {len(
|
|
122
|
+
joblib.dump(model, MODEL_PATH)
|
|
123
|
+
print(f"✅ Model trained on {len(X)} samples → {MODEL_PATH}")
|
|
124
|
+
preds = model.predict(X) # -1 for outliers
|
|
125
|
+
anomalous_ips = set(df.loc[preds == -1, 'ip'])
|
|
126
|
+
blocked_anom = []
|
|
127
|
+
for ip in anomalous_ips:
|
|
128
|
+
obj, created = BlacklistEntry.objects.get_or_create(
|
|
129
|
+
ip_address=ip,
|
|
130
|
+
defaults={"reason": "Anomalous behavior"}
|
|
131
|
+
)
|
|
132
|
+
if created:
|
|
133
|
+
blocked_anom.append(ip)
|
|
134
|
+
if blocked_anom:
|
|
135
|
+
print(f" Blocked {len(blocked_anom)} anomalous IPs: {blocked_anom}")
|
|
136
|
+
|
|
137
|
+
tokens = Counter()
|
|
138
|
+
for r in parsed:
|
|
139
|
+
if r["status"].startswith(("4", "5")):
|
|
140
|
+
for seg in re.split(r"\W+", r["path"].lower()):
|
|
141
|
+
if len(seg) > 3 and seg not in STATIC_KW:
|
|
142
|
+
tokens[seg] += 1
|
|
143
|
+
top_tokens = tokens.most_common(10)
|
|
144
|
+
for kw, cnt in top_tokens:
|
|
145
|
+
obj, _ = DynamicKeyword.objects.get_or_create(keyword=kw)
|
|
146
|
+
DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + cnt)
|
|
147
|
+
print(f"DynamicKeyword DB updated with top tokens: {[kw for kw, _ in top_tokens]}")
|
|
148
|
+
|
|
123
149
|
|
|
150
|
+
if __name__ == "__main__":
|
|
151
|
+
train()
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: aiwaf
|
|
3
|
+
Version: 0.1.5
|
|
4
|
+
Summary: AI-powered Web Application Firewall
|
|
5
|
+
Home-page: https://github.com/aayushgauba/aiwaf
|
|
6
|
+
Author: Aayush Gauba
|
|
7
|
+
Author-email: Aayush Gauba <gauba.aayush@gmail.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Dynamic: author
|
|
13
|
+
Dynamic: home-page
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
Dynamic: requires-python
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# AI‑WAF
|
|
19
|
+
|
|
20
|
+
> A self‑learning, Django‑friendly Web Application Firewall
|
|
21
|
+
> with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Package Structure
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
aiwaf/
|
|
29
|
+
├── __init__.py
|
|
30
|
+
├── blacklist_manager.py
|
|
31
|
+
├── middleware.py
|
|
32
|
+
├── trainer.py # exposes train()
|
|
33
|
+
├── utils.py
|
|
34
|
+
├── template_tags/
|
|
35
|
+
│ └── aiwaf_tags.py
|
|
36
|
+
├── resources/
|
|
37
|
+
│ ├── model.pkl # pre‑trained base model
|
|
38
|
+
│ └── dynamic_keywords.json # evolves daily
|
|
39
|
+
├── management/
|
|
40
|
+
│ └── commands/
|
|
41
|
+
│ └── detect_and_train.py # `python manage.py detect_and_train`
|
|
42
|
+
└── LICENSE
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Features
|
|
48
|
+
|
|
49
|
+
- **IP Blocklist**
|
|
50
|
+
Instantly blocks suspicious IPs (supports CSV fallback or Django model).
|
|
51
|
+
|
|
52
|
+
- **Rate Limiting**
|
|
53
|
+
Sliding‑window blocks flooders (> `AIWAF_RATE_MAX` per `AIWAF_RATE_WINDOW`), then blacklists them.
|
|
54
|
+
|
|
55
|
+
- **AI Anomaly Detection**
|
|
56
|
+
IsolationForest on features:
|
|
57
|
+
- Path length
|
|
58
|
+
- Keyword hits (static + dynamic)
|
|
59
|
+
- Response time
|
|
60
|
+
- Status‑code index
|
|
61
|
+
- Burst count
|
|
62
|
+
- Total 404s
|
|
63
|
+
|
|
64
|
+
- **Dynamic Keyword Extraction**
|
|
65
|
+
Every retrain: top 10 most frequent “words” from 4xx/5xx paths are appended to your malicious keyword set.
|
|
66
|
+
|
|
67
|
+
- **File‑Extension Probing Detection**
|
|
68
|
+
Tracks repeated 404s on common web‑extensions (e.g. `.php`, `.asp`) and auto‑blocks after a burst.
|
|
69
|
+
|
|
70
|
+
- **Honeypot Field**
|
|
71
|
+
Hidden form field (via template tag) that bots fill → instant block.
|
|
72
|
+
|
|
73
|
+
- **UUID Tampering Protection**
|
|
74
|
+
Any `<uuid:…>` URL that doesn’t map to **any** model in its Django app gets blocked.
|
|
75
|
+
|
|
76
|
+
- **Daily Retraining**
|
|
77
|
+
Reads rotated/gzipped logs, auto‑blocks 404 floods (≥6), retrains the model, updates `model.pkl` + `dynamic_keywords.json`.
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Installation
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
# From PyPI
|
|
85
|
+
pip install aiwaf
|
|
86
|
+
|
|
87
|
+
# Or for local development
|
|
88
|
+
git clone https://github.com/aayushgauba/aiwaf.git
|
|
89
|
+
cd aiwaf
|
|
90
|
+
pip install -e .
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## ⚙️ Configuration (`settings.py`)
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
INSTALLED_APPS += ["aiwaf"]
|
|
99
|
+
|
|
100
|
+
### Database Setup
|
|
101
|
+
|
|
102
|
+
After adding `aiwaf` to your `INSTALLED_APPS`, create the necessary tables for the IP‐blacklist and dynamic‐keyword models:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
python manage.py makemigrations aiwaf
|
|
106
|
+
python manage.py migrate
|
|
107
|
+
|
|
108
|
+
# Required
|
|
109
|
+
AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
|
|
110
|
+
|
|
111
|
+
# Optional (defaults shown)
|
|
112
|
+
AIWAF_MODEL_PATH = BASE_DIR / "aiwaf" / "resources" / "model.pkl"
|
|
113
|
+
AIWAF_HONEYPOT_FIELD = "hp_field"
|
|
114
|
+
AIWAF_RATE_WINDOW = 10 # seconds
|
|
115
|
+
AIWAF_RATE_MAX = 20 # max reqs/window
|
|
116
|
+
AIWAF_RATE_FLOOD = 10 # flood threshold
|
|
117
|
+
AIWAF_WINDOW_SECONDS = 60 # anomaly window
|
|
118
|
+
AIWAF_FILE_EXTENSIONS = [".php", ".asp", ".jsp"] # 404‑burst tracked extensions
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
> **Note:** You no longer need to define `AIWAF_MALICIOUS_KEYWORDS` or `AIWAF_STATUS_CODES` in your settings — they’re built in and evolve dynamically.
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## Middleware Setup
|
|
126
|
+
|
|
127
|
+
Add in **this** order to your `MIDDLEWARE` list:
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
MIDDLEWARE = [
|
|
131
|
+
"aiwaf.middleware.IPBlockMiddleware",
|
|
132
|
+
"aiwaf.middleware.RateLimitMiddleware",
|
|
133
|
+
"aiwaf.middleware.AIAnomalyMiddleware",
|
|
134
|
+
"aiwaf.middleware.HoneypotMiddleware",
|
|
135
|
+
"aiwaf.middleware.UUIDTamperMiddleware",
|
|
136
|
+
# ... other middleware ...
|
|
137
|
+
]
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Honeypot Field (in your template)
|
|
143
|
+
|
|
144
|
+
```django
|
|
145
|
+
{% load aiwaf_tags %}
|
|
146
|
+
|
|
147
|
+
<form method="post">
|
|
148
|
+
{% csrf_token %}
|
|
149
|
+
{% honeypot_field %}
|
|
150
|
+
<!-- your real fields -->
|
|
151
|
+
</form>
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
> Renders a hidden `<input name="hp_field" style="display:none">`.
|
|
155
|
+
> Any non‑empty submission → IP blacklisted.
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Running Detection & Training
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
python manage.py detect_and_train
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
**What happens:**
|
|
166
|
+
1. Read access logs
|
|
167
|
+
2. Auto‑block IPs with ≥ 6 total 404s
|
|
168
|
+
3. Extract features & train IsolationForest
|
|
169
|
+
4. Save `model.pkl`
|
|
170
|
+
5. Extract top 10 dynamic keywords from 4xx/5xx
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## How It Works
|
|
175
|
+
|
|
176
|
+
| Middleware | Purpose |
|
|
177
|
+
|------------------------------------|-----------------------------------------------------------------|
|
|
178
|
+
| IPAndKeywordBlockMiddleware | Blocks requests from known blacklisted IPs and Keywords |
|
|
179
|
+
| RateLimitMiddleware | Enforces burst & flood thresholds |
|
|
180
|
+
| AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
|
|
181
|
+
| HoneypotMiddleware | Detects bots filling hidden inputs in forms |
|
|
182
|
+
| UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## License
|
|
187
|
+
|
|
188
|
+
This project is licensed under the **MIT License**. See the [LICENSE](LICENSE) file for details.
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
## Credits
|
|
193
|
+
|
|
194
|
+
**AI‑WAF** by [Aayush Gauba](https://github.com/aayushgauba)
|
|
195
|
+
> “Let your firewall learn and evolve — keep your site a fortress.”
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
aiwaf/__init__.py,sha256=nQFpJ1YpX48snzLjEQCf8zD2YNh8v0b_kPTrXx8uBYc,46
|
|
2
2
|
aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
|
|
3
3
|
aiwaf/blacklist_manager.py,sha256=sM6uTH7zD6MOPGb0kzqV2aFut2vxKgft_UVeRJr7klw,392
|
|
4
|
-
aiwaf/middleware.py,sha256=
|
|
5
|
-
aiwaf/models.py,sha256=
|
|
4
|
+
aiwaf/middleware.py,sha256=04AbNgkwLMaYSiuEtw59A-O02tt4cqaKmP7XDNlkIG0,6359
|
|
5
|
+
aiwaf/models.py,sha256=8au1umopgCo0lthztTTRrYRJQUM7uX8eAeXgs3z45K4,1282
|
|
6
6
|
aiwaf/storage.py,sha256=bxCILzzvA1-q6nwclRE8WrfoRhe25H4VrsQDf0hl_lY,1903
|
|
7
|
-
aiwaf/trainer.py,sha256=
|
|
7
|
+
aiwaf/trainer.py,sha256=TKWJZzWTg892vdoSGWdCA0i-dKof2b29buWqJUrkr6k,4820
|
|
8
8
|
aiwaf/utils.py,sha256=RkEUWhhHy6tOk7V0UYv3cN4xhOR_7aBy9bjhwuV2cdA,1436
|
|
9
9
|
aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -12,7 +12,8 @@ aiwaf/management/commands/detect_and_train.py,sha256=-o-LZ7QZ5GeJPCekryox1DGXKMm
|
|
|
12
12
|
aiwaf/resources/model.pkl,sha256=rCCXH38SJrnaOba2WZrU1LQVzWT34x6bTVkq20XJU-Q,1091129
|
|
13
13
|
aiwaf/template_tags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
aiwaf/template_tags/aiwaf_tags.py,sha256=1KGqeioYmgKACDUiPkykSqI7DLQ6-Ypy1k00weWj9iY,399
|
|
15
|
-
aiwaf-0.1.
|
|
16
|
-
aiwaf-0.1.
|
|
17
|
-
aiwaf-0.1.
|
|
18
|
-
aiwaf-0.1.
|
|
15
|
+
aiwaf-0.1.5.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
|
|
16
|
+
aiwaf-0.1.5.dist-info/METADATA,sha256=g1hwdQBSJX1JBBnBim_TFtzjVMI5Ixl0WVrPPlnQCPg,5405
|
|
17
|
+
aiwaf-0.1.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
18
|
+
aiwaf-0.1.5.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
|
|
19
|
+
aiwaf-0.1.5.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Aayush Gauba
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
aiwaf-0.1.2.dist-info/METADATA
DELETED
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: aiwaf
|
|
3
|
-
Version: 0.1.2
|
|
4
|
-
Summary: AI-powered Web Application Firewall
|
|
5
|
-
Author: Aayush Gauba
|
|
6
|
-
Author-email: Aayush Gauba <gauba.aayush@gmail.com>
|
|
7
|
-
License: MIT
|
|
8
|
-
Requires-Python: >=3.8
|
|
9
|
-
Description-Content-Type: text/markdown
|
|
10
|
-
Dynamic: author
|
|
11
|
-
|
|
12
|
-
# AI‑WAF
|
|
13
|
-
|
|
14
|
-
> A self-learning, Django-friendly Web Application Firewall
|
|
15
|
-
> with rate-limiting, anomaly detection, honeypots, UUID-tamper protection, and daily retraining.
|
|
16
|
-
|
|
17
|
-
---
|
|
18
|
-
|
|
19
|
-
## Package Structure
|
|
20
|
-
|
|
21
|
-
```
|
|
22
|
-
aiwaf/
|
|
23
|
-
├── __init__.py
|
|
24
|
-
├── blacklist_manager.py
|
|
25
|
-
├── middleware.py
|
|
26
|
-
├── trainer.py # exposes detect_and_train()
|
|
27
|
-
├── utils.py
|
|
28
|
-
├── template_tags/
|
|
29
|
-
│ └── aiwaf_tags.py
|
|
30
|
-
├── resources/
|
|
31
|
-
│ └── model.pkl # pre-trained base model
|
|
32
|
-
├── management/
|
|
33
|
-
│ └── commands/
|
|
34
|
-
│ └── detect_and_train.py # python manage.py detect_and_train
|
|
35
|
-
└── LICENSE
|
|
36
|
-
```
|
|
37
|
-
|
|
38
|
-
---
|
|
39
|
-
|
|
40
|
-
## Features
|
|
41
|
-
|
|
42
|
-
- **IP Blocklist**
|
|
43
|
-
Automatically blocks suspicious IPs; optionally backed by CSV or Django model.
|
|
44
|
-
|
|
45
|
-
- **Rate Limiting**
|
|
46
|
-
Sliding window logic blocks IPs exceeding a threshold of requests per second.
|
|
47
|
-
|
|
48
|
-
- **AI Anomaly Detection**
|
|
49
|
-
IsolationForest trained on real logs with features like:
|
|
50
|
-
- Path length
|
|
51
|
-
- Keyword hits
|
|
52
|
-
- Response time
|
|
53
|
-
- Status code index
|
|
54
|
-
- Burst count
|
|
55
|
-
- Total 404s
|
|
56
|
-
|
|
57
|
-
- **Honeypot Field**
|
|
58
|
-
Hidden form field that bots are likely to fill — if triggered, the IP is blocked.
|
|
59
|
-
|
|
60
|
-
- **UUID Tampering Protection**
|
|
61
|
-
Detects if someone is probing by injecting random/nonexistent UUIDs into URLs.
|
|
62
|
-
|
|
63
|
-
- **Daily Retraining**
|
|
64
|
-
A single command retrains your model every day based on your logs.
|
|
65
|
-
|
|
66
|
-
---
|
|
67
|
-
|
|
68
|
-
## Installation
|
|
69
|
-
|
|
70
|
-
Install locally or from PyPI:
|
|
71
|
-
|
|
72
|
-
```bash
|
|
73
|
-
pip install aiwaf
|
|
74
|
-
```
|
|
75
|
-
|
|
76
|
-
Or for local dev:
|
|
77
|
-
|
|
78
|
-
```bash
|
|
79
|
-
git clone https://github.com/aayushgauba/aiwaf.git
|
|
80
|
-
cd aiwaf
|
|
81
|
-
pip install -e .
|
|
82
|
-
```
|
|
83
|
-
|
|
84
|
-
---
|
|
85
|
-
|
|
86
|
-
## ⚙️ Configuration (`settings.py`)
|
|
87
|
-
|
|
88
|
-
```python
|
|
89
|
-
INSTALLED_APPS += [
|
|
90
|
-
"aiwaf",
|
|
91
|
-
]
|
|
92
|
-
|
|
93
|
-
# Required
|
|
94
|
-
AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
|
|
95
|
-
|
|
96
|
-
# Optional (defaults included)
|
|
97
|
-
AIWAF_MODEL_PATH = BASE_DIR / "aiwaf" / "resources" / "model.pkl"
|
|
98
|
-
AIWAF_MALICIOUS_KEYWORDS = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
|
|
99
|
-
AIWAF_STATUS_CODES = ["200", "403", "404", "500"]
|
|
100
|
-
AIWAF_HONEYPOT_FIELD = "hp_field"
|
|
101
|
-
```
|
|
102
|
-
|
|
103
|
-
---
|
|
104
|
-
|
|
105
|
-
## Middleware Setup
|
|
106
|
-
|
|
107
|
-
Add to `MIDDLEWARE` in order:
|
|
108
|
-
|
|
109
|
-
```python
|
|
110
|
-
MIDDLEWARE = [
|
|
111
|
-
"aiwaf.middleware.IPBlockMiddleware",
|
|
112
|
-
"aiwaf.middleware.RateLimitMiddleware",
|
|
113
|
-
"aiwaf.middleware.AIAnomalyMiddleware",
|
|
114
|
-
"aiwaf.middleware.HoneypotMiddleware",
|
|
115
|
-
"aiwaf.middleware.UUIDTamperMiddleware",
|
|
116
|
-
...
|
|
117
|
-
]
|
|
118
|
-
```
|
|
119
|
-
|
|
120
|
-
---
|
|
121
|
-
|
|
122
|
-
## Honeypot Field (in template)
|
|
123
|
-
|
|
124
|
-
```html
|
|
125
|
-
{% load aiwaf_tags %}
|
|
126
|
-
|
|
127
|
-
<form method="post">
|
|
128
|
-
{% csrf_token %}
|
|
129
|
-
{% honeypot_field %}
|
|
130
|
-
<!-- other fields -->
|
|
131
|
-
</form>
|
|
132
|
-
```
|
|
133
|
-
|
|
134
|
-
The hidden field will be `<input type="hidden" name="hp_field">`.
|
|
135
|
-
If it’s ever filled → IP gets blocked.
|
|
136
|
-
|
|
137
|
-
---
|
|
138
|
-
|
|
139
|
-
## Run Detection + Training
|
|
140
|
-
|
|
141
|
-
```bash
|
|
142
|
-
python manage.py detect_and_train
|
|
143
|
-
```
|
|
144
|
-
|
|
145
|
-
What it does:
|
|
146
|
-
|
|
147
|
-
- Reads logs (supports `.gz` and rotated logs).
|
|
148
|
-
- Detects excessive 404s (≥6) → instant block.
|
|
149
|
-
- Builds feature vectors from logs.
|
|
150
|
-
- Trains IsolationForest and saves `model.pkl`.
|
|
151
|
-
|
|
152
|
-
Schedule it to run daily via `cron`, `Celery beat`, or systemd timer.
|
|
153
|
-
|
|
154
|
-
---
|
|
155
|
-
|
|
156
|
-
## How It Works (Simplified)
|
|
157
|
-
|
|
158
|
-
| Middleware | Functionality |
|
|
159
|
-
|------------------------|--------------------------------------------------------------|
|
|
160
|
-
| IPBlockMiddleware | Blocks requests from known blacklisted IPs |
|
|
161
|
-
| RateLimitMiddleware | Blocks flooders (>20/10s) and blacklists them (>10/10s) |
|
|
162
|
-
| AIAnomalyMiddleware | Uses ML to detect suspicious behavior in request patterns |
|
|
163
|
-
| HoneypotMiddleware | Detects bots filling hidden inputs in forms |
|
|
164
|
-
| UUIDTamperMiddleware | Detects guessing/probing by checking invalid UUID access |
|
|
165
|
-
|
|
166
|
-
---
|
|
167
|
-
|
|
168
|
-
## Development Roadmap
|
|
169
|
-
|
|
170
|
-
- [ ] Add CSV blocklist fallback
|
|
171
|
-
- [ ] Admin dashboard integration
|
|
172
|
-
- [ ] Auto-pruning of old block entries
|
|
173
|
-
- [ ] Real-time log streaming compatibility
|
|
174
|
-
- [ ] Docker/Helm deployment guide
|
|
175
|
-
|
|
176
|
-
---
|
|
177
|
-
|
|
178
|
-
## License
|
|
179
|
-
|
|
180
|
-
This project is licensed under the **MIT License** — see `LICENSE` for details.
|
|
181
|
-
|
|
182
|
-
---
|
|
183
|
-
|
|
184
|
-
## Credits
|
|
185
|
-
|
|
186
|
-
**AIWAF** by [Aayush Gauba](https://github.com/aayushgauba)
|
|
187
|
-
> "Let your firewall learn and evolve with your logs. Make your site a fortress."
|
|
File without changes
|
|
File without changes
|