aiwaf 0.1.0__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiwaf might be problematic. Click here for more details.
- aiwaf/middleware.py +69 -30
- aiwaf/models.py +8 -0
- aiwaf/trainer.py +75 -23
- aiwaf-0.1.3.dist-info/METADATA +181 -0
- {aiwaf-0.1.0.dist-info → aiwaf-0.1.3.dist-info}/RECORD +7 -8
- aiwaf-0.1.0.dist-info/METADATA +0 -13
- aiwaf-0.1.0.dist-info/entry_points.txt +0 -2
- {aiwaf-0.1.0.dist-info → aiwaf-0.1.3.dist-info}/WHEEL +0 -0
- {aiwaf-0.1.0.dist-info → aiwaf-0.1.3.dist-info}/top_level.txt +0 -0
aiwaf/middleware.py
CHANGED
|
@@ -1,23 +1,40 @@
|
|
|
1
|
+
# aiwaf/middleware.py
|
|
2
|
+
|
|
1
3
|
import time
|
|
4
|
+
import re
|
|
5
|
+
import os
|
|
2
6
|
import numpy as np
|
|
3
7
|
import joblib
|
|
8
|
+
|
|
4
9
|
from collections import defaultdict
|
|
5
10
|
from django.utils.deprecation import MiddlewareMixin
|
|
6
11
|
from django.http import JsonResponse
|
|
7
12
|
from django.conf import settings
|
|
8
13
|
from django.core.cache import cache
|
|
9
|
-
from django.
|
|
14
|
+
from django.db.models import F
|
|
10
15
|
from django.apps import apps
|
|
11
|
-
from .blacklist_manager import BlacklistManager
|
|
12
16
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
except AttributeError:
|
|
16
|
-
import importlib.resources
|
|
17
|
-
MODEL_PATH = importlib.resources.files("aiwaf").joinpath("resources/model.pkl")
|
|
17
|
+
from .blacklist_manager import BlacklistManager
|
|
18
|
+
from .models import DynamicKeyword
|
|
18
19
|
|
|
20
|
+
# ─── Model loading with fallback ────────────────────────────────────────────
|
|
21
|
+
MODEL_PATH = getattr(
|
|
22
|
+
settings,
|
|
23
|
+
"AIWAF_MODEL_PATH",
|
|
24
|
+
os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
|
|
25
|
+
)
|
|
19
26
|
MODEL = joblib.load(MODEL_PATH)
|
|
20
27
|
|
|
28
|
+
# ─── Static keywords default ────────────────────────────────────────────────
|
|
29
|
+
STATIC_KW = getattr(
|
|
30
|
+
settings,
|
|
31
|
+
"AIWAF_MALICIOUS_KEYWORDS",
|
|
32
|
+
[
|
|
33
|
+
".php", "xmlrpc", "wp-", ".env", ".git", ".bak",
|
|
34
|
+
"conflg", "shell", "filemanager"
|
|
35
|
+
]
|
|
36
|
+
)
|
|
37
|
+
|
|
21
38
|
def get_ip(request):
|
|
22
39
|
xff = request.META.get("HTTP_X_FORWARDED_FOR")
|
|
23
40
|
if xff:
|
|
@@ -37,18 +54,21 @@ class IPBlockMiddleware:
|
|
|
37
54
|
|
|
38
55
|
|
|
39
56
|
class RateLimitMiddleware:
|
|
40
|
-
WINDOW =
|
|
41
|
-
MAX
|
|
42
|
-
FLOOD
|
|
57
|
+
WINDOW = 10
|
|
58
|
+
MAX = 20
|
|
59
|
+
FLOOD = 10
|
|
60
|
+
|
|
43
61
|
def __init__(self, get_response):
|
|
44
62
|
self.get_response = get_response
|
|
45
63
|
self.logs = defaultdict(list)
|
|
64
|
+
|
|
46
65
|
def __call__(self, request):
|
|
47
|
-
ip
|
|
66
|
+
ip = get_ip(request)
|
|
48
67
|
now = time.time()
|
|
49
68
|
recs = [t for t in self.logs[ip] if now - t < self.WINDOW]
|
|
50
69
|
recs.append(now)
|
|
51
70
|
self.logs[ip] = recs
|
|
71
|
+
|
|
52
72
|
if len(recs) > self.MAX:
|
|
53
73
|
return JsonResponse({"error": "too_many_requests"}, status=429)
|
|
54
74
|
if len(recs) > self.FLOOD:
|
|
@@ -59,40 +79,57 @@ class RateLimitMiddleware:
|
|
|
59
79
|
|
|
60
80
|
|
|
61
81
|
class AIAnomalyMiddleware(MiddlewareMixin):
|
|
62
|
-
|
|
82
|
+
WINDOW = getattr(settings, "AIWAF_WINDOW_SECONDS", 60)
|
|
83
|
+
TOP_N = getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)
|
|
84
|
+
|
|
63
85
|
def process_request(self, request):
|
|
64
86
|
ip = get_ip(request)
|
|
65
87
|
if BlacklistManager.is_blocked(ip):
|
|
66
88
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
89
|
+
|
|
67
90
|
now = time.time()
|
|
68
91
|
key = f"aiwaf:{ip}"
|
|
69
92
|
data = cache.get(key, [])
|
|
93
|
+
# TODO: you may want to capture real status & response_time in process_response
|
|
70
94
|
data.append((now, request.path, 0, 0.0))
|
|
71
|
-
data = [d for d in data if now - d[0] < self.
|
|
72
|
-
cache.set(key, data, timeout=self.
|
|
95
|
+
data = [d for d in data if now - d[0] < self.WINDOW]
|
|
96
|
+
cache.set(key, data, timeout=self.WINDOW)
|
|
97
|
+
|
|
98
|
+
# update dynamic‐keyword counts
|
|
99
|
+
for seg in re.split(r"\W+", request.path.lower()):
|
|
100
|
+
if len(seg) > 3:
|
|
101
|
+
obj, _ = DynamicKeyword.objects.get_or_create(keyword=seg)
|
|
102
|
+
DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
|
|
103
|
+
|
|
73
104
|
if len(data) < 5:
|
|
74
105
|
return None
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
106
|
+
|
|
107
|
+
# pull top‐N dynamic tokens
|
|
108
|
+
top_dynamic = list(
|
|
109
|
+
DynamicKeyword.objects
|
|
110
|
+
.order_by("-count")
|
|
111
|
+
.values_list("keyword", flat=True)[: self.TOP_N]
|
|
80
112
|
)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
113
|
+
ALL_KW = set(STATIC_KW) | set(top_dynamic)
|
|
114
|
+
|
|
115
|
+
total = len(data)
|
|
116
|
+
ratio404 = sum(1 for (_, _, st, _) in data if st == 404) / total
|
|
117
|
+
hits = sum(any(kw in path.lower() for kw in ALL_KW) for (_, path, _, _) in data)
|
|
118
|
+
avg_rt = np.mean([rt for (_, _, _, rt) in data]) if data else 0.0
|
|
119
|
+
ivs = [data[i][0] - data[i - 1][0] for i in range(1, total)]
|
|
120
|
+
avg_iv = np.mean(ivs) if ivs else 0.0
|
|
121
|
+
|
|
122
|
+
X = np.array([[total, ratio404, hits, avg_rt, avg_iv]], dtype=float)
|
|
87
123
|
if MODEL.predict(X)[0] == -1:
|
|
88
124
|
BlacklistManager.block(ip, "AI anomaly")
|
|
89
125
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
126
|
+
|
|
90
127
|
return None
|
|
91
128
|
|
|
92
129
|
|
|
93
130
|
class HoneypotMiddleware(MiddlewareMixin):
|
|
94
131
|
def process_view(self, request, view_func, view_args, view_kwargs):
|
|
95
|
-
trap = request.POST.get(settings
|
|
132
|
+
trap = request.POST.get(getattr(settings, "AIWAF_HONEYPOT_FIELD", "hp_field"), "")
|
|
96
133
|
if trap:
|
|
97
134
|
ip = get_ip(request)
|
|
98
135
|
BlacklistManager.block(ip, "HONEYPOT triggered")
|
|
@@ -105,11 +142,13 @@ class UUIDTamperMiddleware(MiddlewareMixin):
|
|
|
105
142
|
uid = view_kwargs.get("uuid")
|
|
106
143
|
if not uid:
|
|
107
144
|
return None
|
|
145
|
+
|
|
108
146
|
ip = get_ip(request)
|
|
109
|
-
app_label =
|
|
110
|
-
|
|
111
|
-
for Model in
|
|
147
|
+
app_label = view_func.__module__.split(".")[0]
|
|
148
|
+
app_cfg = apps.get_app_config(app_label)
|
|
149
|
+
for Model in app_cfg.get_models():
|
|
112
150
|
if Model.objects.filter(pk=uid).exists():
|
|
113
151
|
return None
|
|
152
|
+
|
|
114
153
|
BlacklistManager.block(ip, "UUID tampering")
|
|
115
|
-
return JsonResponse({"error": "blocked"}, status=403)
|
|
154
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
aiwaf/models.py
CHANGED
|
@@ -26,3 +26,11 @@ class BlacklistEntry(models.Model):
|
|
|
26
26
|
|
|
27
27
|
def __str__(self):
|
|
28
28
|
return f"{self.ip_address} ({self.reason})"
|
|
29
|
+
|
|
30
|
+
class DynamicKeyword(models.Model):
|
|
31
|
+
keyword = models.CharField(max_length=100, unique=True)
|
|
32
|
+
count = models.PositiveIntegerField(default=0)
|
|
33
|
+
last_updated = models.DateTimeField(auto_now=True)
|
|
34
|
+
|
|
35
|
+
class Meta:
|
|
36
|
+
ordering = ['-count']
|
aiwaf/trainer.py
CHANGED
|
@@ -1,30 +1,50 @@
|
|
|
1
|
-
# aiwaf/trainer.py
|
|
2
|
-
|
|
3
1
|
import os
|
|
4
2
|
import glob
|
|
5
3
|
import gzip
|
|
6
4
|
import re
|
|
5
|
+
import json
|
|
7
6
|
import joblib
|
|
7
|
+
|
|
8
8
|
from datetime import datetime
|
|
9
|
-
from collections import defaultdict
|
|
10
|
-
|
|
9
|
+
from collections import defaultdict, Counter
|
|
10
|
+
|
|
11
11
|
import pandas as pd
|
|
12
12
|
from sklearn.ensemble import IsolationForest
|
|
13
|
+
|
|
13
14
|
from django.conf import settings
|
|
14
15
|
from django.apps import apps
|
|
15
16
|
|
|
16
|
-
|
|
17
|
+
# ─── CONFIG ────────────────────────────────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
# Where to read your access logs (and rotated/.gz siblings)
|
|
20
|
+
LOG_PATH = settings.AIWAF_ACCESS_LOG
|
|
21
|
+
|
|
22
|
+
# Where we save our trained model
|
|
17
23
|
MODEL_PATH = os.path.join(
|
|
18
24
|
os.path.dirname(__file__),
|
|
19
25
|
"resources",
|
|
20
26
|
"model.pkl"
|
|
21
27
|
)
|
|
22
|
-
|
|
23
|
-
|
|
28
|
+
|
|
29
|
+
# Static “malicious” path keywords & file extensions
|
|
30
|
+
MALICIOUS_KEYWORDS = [
|
|
31
|
+
".php", "xmlrpc", "wp-", ".env", ".git", ".bak",
|
|
32
|
+
"conflg", "shell", "filemanager"
|
|
33
|
+
]
|
|
34
|
+
STATUS_CODES = ["200", "403", "404", "500"]
|
|
35
|
+
|
|
36
|
+
# Regex for combined log with response-time=…
|
|
24
37
|
_LOG_RX = re.compile(
|
|
25
|
-
r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?"
|
|
38
|
+
r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" '
|
|
39
|
+
r'(\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
|
|
26
40
|
)
|
|
27
|
-
|
|
41
|
+
|
|
42
|
+
# Your Django model for storing blocked IPs
|
|
43
|
+
BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ─── READ & PARSE LOG LINES ─────────────────────────────────────────────────
|
|
47
|
+
|
|
28
48
|
def _read_all_logs():
|
|
29
49
|
lines = []
|
|
30
50
|
if LOG_PATH and os.path.exists(LOG_PATH):
|
|
@@ -58,14 +78,19 @@ def _parse(line):
|
|
|
58
78
|
}
|
|
59
79
|
|
|
60
80
|
|
|
81
|
+
# ─── TRAIN ENTRYPOINT ───────────────────────────────────────────────────────
|
|
82
|
+
|
|
61
83
|
def train():
|
|
62
84
|
raw = _read_all_logs()
|
|
63
85
|
if not raw:
|
|
64
|
-
print("No log lines found – check AIWAF_ACCESS_LOG")
|
|
86
|
+
print("❌ No log lines found – check settings.AIWAF_ACCESS_LOG")
|
|
65
87
|
return
|
|
66
|
-
|
|
88
|
+
|
|
89
|
+
parsed = []
|
|
67
90
|
ip_404 = defaultdict(int)
|
|
68
91
|
ip_times = defaultdict(list)
|
|
92
|
+
|
|
93
|
+
# parse + accumulate timestamps & 404 counts
|
|
69
94
|
for ln in raw:
|
|
70
95
|
rec = _parse(ln)
|
|
71
96
|
if not rec:
|
|
@@ -74,27 +99,32 @@ def train():
|
|
|
74
99
|
ip_times[rec["ip"]].append(rec["timestamp"])
|
|
75
100
|
if rec["status"] == "404":
|
|
76
101
|
ip_404[rec["ip"]] += 1
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
102
|
+
|
|
103
|
+
# auto-block IPs with >=6 total 404s
|
|
104
|
+
newly_blocked = []
|
|
105
|
+
for ip, cnt in ip_404.items():
|
|
106
|
+
if cnt >= 6:
|
|
80
107
|
obj, created = BlacklistEntry.objects.get_or_create(
|
|
81
108
|
ip_address=ip,
|
|
82
109
|
defaults={"reason": "Excessive 404s (≥6)"}
|
|
83
110
|
)
|
|
84
111
|
if created:
|
|
85
|
-
|
|
86
|
-
if
|
|
87
|
-
print(f"
|
|
112
|
+
newly_blocked.append(ip)
|
|
113
|
+
if newly_blocked:
|
|
114
|
+
print(f"🔒 Blocked {len(newly_blocked)} IPs for 404 flood: {newly_blocked}")
|
|
115
|
+
|
|
116
|
+
# build feature vectors
|
|
88
117
|
rows = []
|
|
89
118
|
for r in parsed:
|
|
90
|
-
ip
|
|
91
|
-
burst
|
|
119
|
+
ip = r["ip"]
|
|
120
|
+
burst = sum(
|
|
92
121
|
1 for t in ip_times[ip]
|
|
93
122
|
if (r["timestamp"] - t).total_seconds() <= 10
|
|
94
123
|
)
|
|
95
|
-
total404
|
|
96
|
-
kw_hits
|
|
124
|
+
total404 = ip_404[ip]
|
|
125
|
+
kw_hits = sum(k in r["path"].lower() for k in MALICIOUS_KEYWORDS)
|
|
97
126
|
status_idx = STATUS_CODES.index(r["status"]) if r["status"] in STATUS_CODES else -1
|
|
127
|
+
|
|
98
128
|
rows.append([
|
|
99
129
|
len(r["path"]),
|
|
100
130
|
kw_hits,
|
|
@@ -105,7 +135,7 @@ def train():
|
|
|
105
135
|
])
|
|
106
136
|
|
|
107
137
|
if not rows:
|
|
108
|
-
print("No entries to train on
|
|
138
|
+
print("⚠️ No entries to train on.")
|
|
109
139
|
return
|
|
110
140
|
|
|
111
141
|
df = pd.DataFrame(
|
|
@@ -115,9 +145,31 @@ def train():
|
|
|
115
145
|
"status_idx", "burst_count", "total_404"
|
|
116
146
|
]
|
|
117
147
|
).fillna(0).astype(float)
|
|
148
|
+
|
|
149
|
+
# train & save
|
|
118
150
|
clf = IsolationForest(contamination=0.01, random_state=42)
|
|
119
151
|
clf.fit(df.values)
|
|
120
152
|
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
|
|
121
153
|
joblib.dump(clf, MODEL_PATH)
|
|
122
|
-
print(f"Model trained on {len(df)} samples
|
|
154
|
+
print(f"✅ Model trained on {len(df)} samples → {MODEL_PATH}")
|
|
155
|
+
|
|
156
|
+
# extract top‑10 dynamic keywords from 4xx/5xx paths
|
|
157
|
+
tokens = Counter()
|
|
158
|
+
for r in parsed:
|
|
159
|
+
if r["status"].startswith(("4", "5")):
|
|
160
|
+
segs = re.split(r"\W+", r["path"].lower())
|
|
161
|
+
for seg in segs:
|
|
162
|
+
if len(seg) > 3 and seg not in MALICIOUS_KEYWORDS:
|
|
163
|
+
tokens[seg] += 1
|
|
164
|
+
|
|
165
|
+
new_kw = [kw for kw, _ in tokens.most_common(10)]
|
|
166
|
+
DK_FILE = os.path.join(os.path.dirname(__file__), "resources", "dynamic_keywords.json")
|
|
167
|
+
try:
|
|
168
|
+
existing = set(json.load(open(DK_FILE)))
|
|
169
|
+
except FileNotFoundError:
|
|
170
|
+
existing = set()
|
|
171
|
+
updated = sorted(existing | set(new_kw))
|
|
172
|
+
with open(DK_FILE, "w") as f:
|
|
173
|
+
json.dump(updated, f, indent=2)
|
|
123
174
|
|
|
175
|
+
print(f"📝 Updated dynamic keywords: {new_kw}")
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: aiwaf
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: AI-powered Web Application Firewall
|
|
5
|
+
Author: Aayush Gauba
|
|
6
|
+
Author-email: Aayush Gauba <gauba.aayush@gmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Requires-Python: >=3.8
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Dynamic: author
|
|
11
|
+
|
|
12
|
+
# AI‑WAF
|
|
13
|
+
|
|
14
|
+
> A self‑learning, Django‑friendly Web Application Firewall
|
|
15
|
+
> with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Package Structure
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
aiwaf/
|
|
23
|
+
├── __init__.py
|
|
24
|
+
├── blacklist_manager.py
|
|
25
|
+
├── middleware.py
|
|
26
|
+
├── trainer.py # exposes train()
|
|
27
|
+
├── utils.py
|
|
28
|
+
├── template_tags/
|
|
29
|
+
│ └── aiwaf_tags.py
|
|
30
|
+
├── resources/
|
|
31
|
+
│ ├── model.pkl # pre‑trained base model
|
|
32
|
+
│ └── dynamic_keywords.json # evolves daily
|
|
33
|
+
├── management/
|
|
34
|
+
│ └── commands/
|
|
35
|
+
│ └── detect_and_train.py # `python manage.py detect_and_train`
|
|
36
|
+
└── LICENSE
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Features
|
|
42
|
+
|
|
43
|
+
- **IP Blocklist**
|
|
44
|
+
Instantly blocks suspicious IPs (supports CSV fallback or Django model).
|
|
45
|
+
|
|
46
|
+
- **Rate Limiting**
|
|
47
|
+
Sliding‑window blocks flooders (> `AIWAF_RATE_MAX` per `AIWAF_RATE_WINDOW`), then blacklists them.
|
|
48
|
+
|
|
49
|
+
- **AI Anomaly Detection**
|
|
50
|
+
IsolationForest on features:
|
|
51
|
+
- Path length
|
|
52
|
+
- Keyword hits (static + dynamic)
|
|
53
|
+
- Response time
|
|
54
|
+
- Status‑code index
|
|
55
|
+
- Burst count
|
|
56
|
+
- Total 404s
|
|
57
|
+
|
|
58
|
+
- **Dynamic Keyword Extraction**
|
|
59
|
+
Every retrain: top 10 most frequent “words” from 4xx/5xx paths are appended to your malicious keyword set.
|
|
60
|
+
|
|
61
|
+
- **File‑Extension Probing Detection**
|
|
62
|
+
Tracks repeated 404s on common web‑extensions (e.g. `.php`, `.asp`) and auto‑blocks after a burst.
|
|
63
|
+
|
|
64
|
+
- **Honeypot Field**
|
|
65
|
+
Hidden form field (via template tag) that bots fill → instant block.
|
|
66
|
+
|
|
67
|
+
- **UUID Tampering Protection**
|
|
68
|
+
Any `<uuid:…>` URL that doesn’t map to **any** model in its Django app gets blocked.
|
|
69
|
+
|
|
70
|
+
- **Daily Retraining**
|
|
71
|
+
Reads rotated/gzipped logs, auto‑blocks 404 floods (≥6), retrains the model, updates `model.pkl` + `dynamic_keywords.json`.
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Installation
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
# From PyPI
|
|
79
|
+
pip install aiwaf
|
|
80
|
+
|
|
81
|
+
# Or for local development
|
|
82
|
+
git clone https://github.com/aayushgauba/aiwaf.git
|
|
83
|
+
cd aiwaf
|
|
84
|
+
pip install -e .
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## ⚙️ Configuration (`settings.py`)
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
INSTALLED_APPS += ["aiwaf"]
|
|
93
|
+
|
|
94
|
+
# Required
|
|
95
|
+
AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
|
|
96
|
+
|
|
97
|
+
# Optional (defaults shown)
|
|
98
|
+
AIWAF_MODEL_PATH = BASE_DIR / "aiwaf" / "resources" / "model.pkl"
|
|
99
|
+
AIWAF_HONEYPOT_FIELD = "hp_field"
|
|
100
|
+
AIWAF_RATE_WINDOW = 10 # seconds
|
|
101
|
+
AIWAF_RATE_MAX = 20 # max reqs/window
|
|
102
|
+
AIWAF_RATE_FLOOD = 10 # flood threshold
|
|
103
|
+
AIWAF_WINDOW_SECONDS = 60 # anomaly window
|
|
104
|
+
AIWAF_FILE_EXTENSIONS = [".php", ".asp", ".jsp"] # 404‑burst tracked extensions
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
> **Note:** You no longer need to define `AIWAF_MALICIOUS_KEYWORDS` or `AIWAF_STATUS_CODES` in your settings — they’re built in and evolve dynamically.
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## Middleware Setup
|
|
112
|
+
|
|
113
|
+
Add in **this** order to your `MIDDLEWARE` list:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
MIDDLEWARE = [
|
|
117
|
+
"aiwaf.middleware.IPBlockMiddleware",
|
|
118
|
+
"aiwaf.middleware.RateLimitMiddleware",
|
|
119
|
+
"aiwaf.middleware.AIAnomalyMiddleware",
|
|
120
|
+
"aiwaf.middleware.HoneypotMiddleware",
|
|
121
|
+
"aiwaf.middleware.UUIDTamperMiddleware",
|
|
122
|
+
# ... other middleware ...
|
|
123
|
+
]
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## Honeypot Field (in your template)
|
|
129
|
+
|
|
130
|
+
```django
|
|
131
|
+
{% load aiwaf_tags %}
|
|
132
|
+
|
|
133
|
+
<form method="post">
|
|
134
|
+
{% csrf_token %}
|
|
135
|
+
{% honeypot_field %}
|
|
136
|
+
<!-- your real fields -->
|
|
137
|
+
</form>
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
> Renders a hidden `<input name="hp_field" style="display:none">`.
|
|
141
|
+
> Any non‑empty submission → IP blacklisted.
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Running Detection & Training
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
python manage.py detect_and_train
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
**What happens:**
|
|
152
|
+
1. Read access logs
|
|
153
|
+
2. Auto‑block IPs with ≥ 6 total 404s
|
|
154
|
+
3. Extract features & train IsolationForest
|
|
155
|
+
4. Save `model.pkl`
|
|
156
|
+
5. Extract top 10 dynamic keywords from 4xx/5xx
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## How It Works
|
|
161
|
+
|
|
162
|
+
| Middleware | Purpose |
|
|
163
|
+
|--------------------------|------------------------------------------------------------------|
|
|
164
|
+
| IPBlockMiddleware | Blocks requests from known blacklisted IPs |
|
|
165
|
+
| RateLimitMiddleware | Enforces burst & flood thresholds |
|
|
166
|
+
| AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
|
|
167
|
+
| HoneypotMiddleware | Detects bots filling hidden inputs in forms |
|
|
168
|
+
| UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## License
|
|
173
|
+
|
|
174
|
+
This project is licensed under the **MIT License**. See the [LICENSE](LICENSE) file for details.
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Credits
|
|
179
|
+
|
|
180
|
+
**AI‑WAF** by [Aayush Gauba](https://github.com/aayushgauba)
|
|
181
|
+
> “Let your firewall learn and evolve — keep your site a fortress.”
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
aiwaf/__init__.py,sha256=nQFpJ1YpX48snzLjEQCf8zD2YNh8v0b_kPTrXx8uBYc,46
|
|
2
2
|
aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
|
|
3
3
|
aiwaf/blacklist_manager.py,sha256=sM6uTH7zD6MOPGb0kzqV2aFut2vxKgft_UVeRJr7klw,392
|
|
4
|
-
aiwaf/middleware.py,sha256=
|
|
5
|
-
aiwaf/models.py,sha256=
|
|
4
|
+
aiwaf/middleware.py,sha256=UIJ-1kA-NjKwpt3JS3vvsuhjaBXGliGt_4VKuL_OGq8,5254
|
|
5
|
+
aiwaf/models.py,sha256=8au1umopgCo0lthztTTRrYRJQUM7uX8eAeXgs3z45K4,1282
|
|
6
6
|
aiwaf/storage.py,sha256=bxCILzzvA1-q6nwclRE8WrfoRhe25H4VrsQDf0hl_lY,1903
|
|
7
|
-
aiwaf/trainer.py,sha256=
|
|
7
|
+
aiwaf/trainer.py,sha256=8hU9k3bF_9QIkGix3TqFl7YuNeQV9dPriY2WhLo6s40,5411
|
|
8
8
|
aiwaf/utils.py,sha256=RkEUWhhHy6tOk7V0UYv3cN4xhOR_7aBy9bjhwuV2cdA,1436
|
|
9
9
|
aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -12,8 +12,7 @@ aiwaf/management/commands/detect_and_train.py,sha256=-o-LZ7QZ5GeJPCekryox1DGXKMm
|
|
|
12
12
|
aiwaf/resources/model.pkl,sha256=rCCXH38SJrnaOba2WZrU1LQVzWT34x6bTVkq20XJU-Q,1091129
|
|
13
13
|
aiwaf/template_tags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
aiwaf/template_tags/aiwaf_tags.py,sha256=1KGqeioYmgKACDUiPkykSqI7DLQ6-Ypy1k00weWj9iY,399
|
|
15
|
-
aiwaf-0.1.
|
|
16
|
-
aiwaf-0.1.
|
|
17
|
-
aiwaf-0.1.
|
|
18
|
-
aiwaf-0.1.
|
|
19
|
-
aiwaf-0.1.0.dist-info/RECORD,,
|
|
15
|
+
aiwaf-0.1.3.dist-info/METADATA,sha256=zgcejLdSfeE_bcqAvuebUJHN2ynKxtE24wVWdRdA_EA,4977
|
|
16
|
+
aiwaf-0.1.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
17
|
+
aiwaf-0.1.3.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
|
|
18
|
+
aiwaf-0.1.3.dist-info/RECORD,,
|
aiwaf-0.1.0.dist-info/METADATA
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: aiwaf
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: AI‑driven pluggable Web Application Firewall for Django (CSV or DB storage)
|
|
5
|
-
Author: Aayush Gauba
|
|
6
|
-
Requires-Dist: django>=3.0
|
|
7
|
-
Requires-Dist: scikit-learn
|
|
8
|
-
Requires-Dist: numpy
|
|
9
|
-
Requires-Dist: pandas
|
|
10
|
-
Requires-Dist: joblib
|
|
11
|
-
Dynamic: author
|
|
12
|
-
Dynamic: requires-dist
|
|
13
|
-
Dynamic: summary
|
|
File without changes
|
|
File without changes
|