aiwaf 0.1.3__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiwaf might be problematic. Click here for more details.
- aiwaf-0.1.6/LICENSE +21 -0
- {aiwaf-0.1.3/aiwaf.egg-info → aiwaf-0.1.6}/PKG-INFO +24 -11
- aiwaf-0.1.3/PKG-INFO → aiwaf-0.1.6/README.md +18 -21
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/middleware.py +34 -4
- aiwaf-0.1.6/aiwaf/trainer.py +151 -0
- aiwaf-0.1.3/README.md → aiwaf-0.1.6/aiwaf.egg-info/PKG-INFO +35 -11
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf.egg-info/SOURCES.txt +1 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/pyproject.toml +2 -2
- aiwaf-0.1.6/setup.py +43 -0
- aiwaf-0.1.3/aiwaf/trainer.py +0 -175
- aiwaf-0.1.3/setup.py +0 -31
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/__init__.py +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/apps.py +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/blacklist_manager.py +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/management/__init__.py +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/management/commands/__init__.py +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/management/commands/detect_and_train.py +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/models.py +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/resources/model.pkl +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/storage.py +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/template_tags/__init__.py +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/template_tags/aiwaf_tags.py +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/utils.py +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf.egg-info/dependency_links.txt +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf.egg-info/top_level.txt +0 -0
- {aiwaf-0.1.3 → aiwaf-0.1.6}/setup.cfg +0 -0
aiwaf-0.1.6/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Aayush Gauba
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -1,15 +1,21 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aiwaf
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: AI-powered Web Application Firewall
|
|
5
|
+
Home-page: https://github.com/aayushgauba/aiwaf
|
|
5
6
|
Author: Aayush Gauba
|
|
6
7
|
Author-email: Aayush Gauba <gauba.aayush@gmail.com>
|
|
7
8
|
License: MIT
|
|
8
9
|
Requires-Python: >=3.8
|
|
9
10
|
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
10
12
|
Dynamic: author
|
|
13
|
+
Dynamic: home-page
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
Dynamic: requires-python
|
|
11
16
|
|
|
12
|
-
|
|
17
|
+
|
|
18
|
+
# AI‑WAF
|
|
13
19
|
|
|
14
20
|
> A self‑learning, Django‑friendly Web Application Firewall
|
|
15
21
|
> with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
|
|
@@ -91,6 +97,14 @@ pip install -e .
|
|
|
91
97
|
```python
|
|
92
98
|
INSTALLED_APPS += ["aiwaf"]
|
|
93
99
|
|
|
100
|
+
### Database Setup
|
|
101
|
+
|
|
102
|
+
After adding `aiwaf` to your `INSTALLED_APPS`, create the necessary tables for the IP‐blacklist and dynamic‐keyword models:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
python manage.py makemigrations aiwaf
|
|
106
|
+
python manage.py migrate
|
|
107
|
+
|
|
94
108
|
# Required
|
|
95
109
|
AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
|
|
96
110
|
|
|
@@ -114,7 +128,7 @@ Add in **this** order to your `MIDDLEWARE` list:
|
|
|
114
128
|
|
|
115
129
|
```python
|
|
116
130
|
MIDDLEWARE = [
|
|
117
|
-
"aiwaf.middleware.
|
|
131
|
+
"aiwaf.middleware.IPAndKeywordBlockMiddleware",
|
|
118
132
|
"aiwaf.middleware.RateLimitMiddleware",
|
|
119
133
|
"aiwaf.middleware.AIAnomalyMiddleware",
|
|
120
134
|
"aiwaf.middleware.HoneypotMiddleware",
|
|
@@ -159,14 +173,13 @@ python manage.py detect_and_train
|
|
|
159
173
|
|
|
160
174
|
## How It Works
|
|
161
175
|
|
|
162
|
-
| Middleware
|
|
163
|
-
|
|
164
|
-
|
|
|
165
|
-
| RateLimitMiddleware
|
|
166
|
-
| AIAnomalyMiddleware
|
|
167
|
-
| HoneypotMiddleware
|
|
168
|
-
| UUIDTamperMiddleware
|
|
169
|
-
|
|
176
|
+
| Middleware | Purpose |
|
|
177
|
+
|------------------------------------|-----------------------------------------------------------------|
|
|
178
|
+
| IPAndKeywordBlockMiddleware | Blocks requests from known blacklisted IPs and Keywords |
|
|
179
|
+
| RateLimitMiddleware | Enforces burst & flood thresholds |
|
|
180
|
+
| AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
|
|
181
|
+
| HoneypotMiddleware | Detects bots filling hidden inputs in forms |
|
|
182
|
+
| UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
|
|
170
183
|
---
|
|
171
184
|
|
|
172
185
|
## License
|
|
@@ -1,15 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
Version: 0.1.3
|
|
4
|
-
Summary: AI-powered Web Application Firewall
|
|
5
|
-
Author: Aayush Gauba
|
|
6
|
-
Author-email: Aayush Gauba <gauba.aayush@gmail.com>
|
|
7
|
-
License: MIT
|
|
8
|
-
Requires-Python: >=3.8
|
|
9
|
-
Description-Content-Type: text/markdown
|
|
10
|
-
Dynamic: author
|
|
11
|
-
|
|
12
|
-
# AI‑WAF
|
|
1
|
+
|
|
2
|
+
# AI‑WAF
|
|
13
3
|
|
|
14
4
|
> A self‑learning, Django‑friendly Web Application Firewall
|
|
15
5
|
> with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
|
|
@@ -91,6 +81,14 @@ pip install -e .
|
|
|
91
81
|
```python
|
|
92
82
|
INSTALLED_APPS += ["aiwaf"]
|
|
93
83
|
|
|
84
|
+
### Database Setup
|
|
85
|
+
|
|
86
|
+
After adding `aiwaf` to your `INSTALLED_APPS`, create the necessary tables for the IP‐blacklist and dynamic‐keyword models:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
python manage.py makemigrations aiwaf
|
|
90
|
+
python manage.py migrate
|
|
91
|
+
|
|
94
92
|
# Required
|
|
95
93
|
AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
|
|
96
94
|
|
|
@@ -114,7 +112,7 @@ Add in **this** order to your `MIDDLEWARE` list:
|
|
|
114
112
|
|
|
115
113
|
```python
|
|
116
114
|
MIDDLEWARE = [
|
|
117
|
-
"aiwaf.middleware.
|
|
115
|
+
"aiwaf.middleware.IPAndKeywordBlockMiddleware",
|
|
118
116
|
"aiwaf.middleware.RateLimitMiddleware",
|
|
119
117
|
"aiwaf.middleware.AIAnomalyMiddleware",
|
|
120
118
|
"aiwaf.middleware.HoneypotMiddleware",
|
|
@@ -159,14 +157,13 @@ python manage.py detect_and_train
|
|
|
159
157
|
|
|
160
158
|
## How It Works
|
|
161
159
|
|
|
162
|
-
| Middleware
|
|
163
|
-
|
|
164
|
-
|
|
|
165
|
-
| RateLimitMiddleware
|
|
166
|
-
| AIAnomalyMiddleware
|
|
167
|
-
| HoneypotMiddleware
|
|
168
|
-
| UUIDTamperMiddleware
|
|
169
|
-
|
|
160
|
+
| Middleware | Purpose |
|
|
161
|
+
|------------------------------------|-----------------------------------------------------------------|
|
|
162
|
+
| IPAndKeywordBlockMiddleware | Blocks requests from known blacklisted IPs and Keywords |
|
|
163
|
+
| RateLimitMiddleware | Enforces burst & flood thresholds |
|
|
164
|
+
| AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
|
|
165
|
+
| HoneypotMiddleware | Detects bots filling hidden inputs in forms |
|
|
166
|
+
| UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
|
|
170
167
|
---
|
|
171
168
|
|
|
172
169
|
## License
|
|
@@ -13,11 +13,11 @@ from django.conf import settings
|
|
|
13
13
|
from django.core.cache import cache
|
|
14
14
|
from django.db.models import F
|
|
15
15
|
from django.apps import apps
|
|
16
|
+
from django.urls import get_resolver
|
|
16
17
|
|
|
17
18
|
from .blacklist_manager import BlacklistManager
|
|
18
19
|
from .models import DynamicKeyword
|
|
19
20
|
|
|
20
|
-
# ─── Model loading with fallback ────────────────────────────────────────────
|
|
21
21
|
MODEL_PATH = getattr(
|
|
22
22
|
settings,
|
|
23
23
|
"AIWAF_MODEL_PATH",
|
|
@@ -25,7 +25,6 @@ MODEL_PATH = getattr(
|
|
|
25
25
|
)
|
|
26
26
|
MODEL = joblib.load(MODEL_PATH)
|
|
27
27
|
|
|
28
|
-
# ─── Static keywords default ────────────────────────────────────────────────
|
|
29
28
|
STATIC_KW = getattr(
|
|
30
29
|
settings,
|
|
31
30
|
"AIWAF_MALICIOUS_KEYWORDS",
|
|
@@ -41,15 +40,46 @@ def get_ip(request):
|
|
|
41
40
|
return xff.split(",")[0].strip()
|
|
42
41
|
return request.META.get("REMOTE_ADDR", "")
|
|
43
42
|
|
|
44
|
-
|
|
45
|
-
class IPBlockMiddleware:
|
|
43
|
+
class IPAndKeywordBlockMiddleware:
|
|
46
44
|
def __init__(self, get_response):
|
|
47
45
|
self.get_response = get_response
|
|
46
|
+
self.url_patterns = self._collect_view_paths()
|
|
47
|
+
|
|
48
|
+
def _collect_view_paths(self):
|
|
49
|
+
resolver = get_resolver()
|
|
50
|
+
patterns = set()
|
|
51
|
+
|
|
52
|
+
def extract(patterns_list, prefix=""):
|
|
53
|
+
for p in patterns_list:
|
|
54
|
+
if hasattr(p, "url_patterns"):
|
|
55
|
+
extract(p.url_patterns, prefix + str(p.pattern))
|
|
56
|
+
else:
|
|
57
|
+
pat = (prefix + str(p.pattern)).strip("^$")
|
|
58
|
+
patterns.add(pat)
|
|
59
|
+
extract(resolver.url_patterns)
|
|
60
|
+
return patterns
|
|
48
61
|
|
|
49
62
|
def __call__(self, request):
|
|
50
63
|
ip = get_ip(request)
|
|
64
|
+
path = request.path.lower()
|
|
51
65
|
if BlacklistManager.is_blocked(ip):
|
|
52
66
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
67
|
+
segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
|
|
68
|
+
for seg in segments:
|
|
69
|
+
obj, _ = DynamicKeyword.objects.get_or_create(keyword=seg)
|
|
70
|
+
DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
|
|
71
|
+
dynamic_top = list(
|
|
72
|
+
DynamicKeyword.objects
|
|
73
|
+
.order_by("-count")
|
|
74
|
+
.values_list("keyword", flat=True)[: getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)]
|
|
75
|
+
)
|
|
76
|
+
all_kw = set(STATIC_KW) | set(dynamic_top)
|
|
77
|
+
safe_kw = {kw for kw in all_kw if any(kw in pat for pat in self.url_patterns)}
|
|
78
|
+
suspicious_kw = all_kw - safe_kw
|
|
79
|
+
for seg in segments:
|
|
80
|
+
if seg in suspicious_kw:
|
|
81
|
+
BlacklistManager.block(ip, f"Keyword block: {seg}")
|
|
82
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
|
53
83
|
return self.get_response(request)
|
|
54
84
|
|
|
55
85
|
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import glob
|
|
3
|
+
import gzip
|
|
4
|
+
import re
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from collections import defaultdict, Counter
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from sklearn.ensemble import IsolationForest
|
|
10
|
+
import joblib
|
|
11
|
+
|
|
12
|
+
from django.conf import settings
|
|
13
|
+
from django.apps import apps
|
|
14
|
+
from django.db.models import F
|
|
15
|
+
|
|
16
|
+
LOG_PATH = settings.AIWAF_ACCESS_LOG
|
|
17
|
+
MODEL_PATH = os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
|
|
18
|
+
|
|
19
|
+
STATIC_KW = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
|
|
20
|
+
STATUS_IDX = ["200", "403", "404", "500"]
|
|
21
|
+
|
|
22
|
+
_LOG_RX = re.compile(
|
|
23
|
+
r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" '
|
|
24
|
+
r'(\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
|
|
28
|
+
DynamicKeyword = apps.get_model("aiwaf", "DynamicKeyword")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _read_all_logs():
|
|
32
|
+
lines = []
|
|
33
|
+
if LOG_PATH and os.path.exists(LOG_PATH):
|
|
34
|
+
with open(LOG_PATH, "r", errors="ignore") as f:
|
|
35
|
+
lines.extend(f.readlines())
|
|
36
|
+
for path in sorted(glob.glob(f"{LOG_PATH}.*")):
|
|
37
|
+
opener = gzip.open if path.endswith(".gz") else open
|
|
38
|
+
try:
|
|
39
|
+
with opener(path, "rt", errors="ignore") as f:
|
|
40
|
+
lines.extend(f.readlines())
|
|
41
|
+
except OSError:
|
|
42
|
+
continue
|
|
43
|
+
return lines
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _parse(line):
|
|
47
|
+
m = _LOG_RX.search(line)
|
|
48
|
+
if not m:
|
|
49
|
+
return None
|
|
50
|
+
ip, ts_str, path, status, ref, ua, rt = m.groups()
|
|
51
|
+
try:
|
|
52
|
+
ts = datetime.strptime(ts_str.split()[0], "%d/%b/%Y:%H:%M:%S")
|
|
53
|
+
except ValueError:
|
|
54
|
+
return None
|
|
55
|
+
return {
|
|
56
|
+
"ip": ip,
|
|
57
|
+
"timestamp": ts,
|
|
58
|
+
"path": path,
|
|
59
|
+
"status": status,
|
|
60
|
+
"ua": ua,
|
|
61
|
+
"response_time": float(rt),
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def train():
|
|
66
|
+
raw_lines = _read_all_logs()
|
|
67
|
+
if not raw_lines:
|
|
68
|
+
print(" No log lines found – check AIWAF_ACCESS_LOG setting.")
|
|
69
|
+
return
|
|
70
|
+
parsed = []
|
|
71
|
+
ip_404 = defaultdict(int)
|
|
72
|
+
ip_times = defaultdict(list)
|
|
73
|
+
for ln in raw_lines:
|
|
74
|
+
rec = _parse(ln)
|
|
75
|
+
if not rec:
|
|
76
|
+
continue
|
|
77
|
+
parsed.append(rec)
|
|
78
|
+
ip_times[rec["ip"]].append(rec["timestamp"])
|
|
79
|
+
if rec["status"] == "404":
|
|
80
|
+
ip_404[rec["ip"]] += 1
|
|
81
|
+
blocked_404 = []
|
|
82
|
+
for ip, count in ip_404.items():
|
|
83
|
+
if count >= 6:
|
|
84
|
+
obj, created = BlacklistEntry.objects.get_or_create(
|
|
85
|
+
ip_address=ip,
|
|
86
|
+
defaults={"reason": "Excessive 404s (≥6)"}
|
|
87
|
+
)
|
|
88
|
+
if created:
|
|
89
|
+
blocked_404.append(ip)
|
|
90
|
+
if blocked_404:
|
|
91
|
+
print(f"Blocked {len(blocked_404)} IPs for 404 flood: {blocked_404}")
|
|
92
|
+
feature_dicts = []
|
|
93
|
+
for r in parsed:
|
|
94
|
+
ip = r["ip"]
|
|
95
|
+
burst = sum(
|
|
96
|
+
1 for t in ip_times[ip]
|
|
97
|
+
if (r["timestamp"] - t).total_seconds() <= 10
|
|
98
|
+
)
|
|
99
|
+
total404 = ip_404[ip]
|
|
100
|
+
kw_hits = sum(k in r["path"].lower() for k in STATIC_KW)
|
|
101
|
+
status_idx = STATUS_IDX.index(r["status"]) if r["status"] in STATUS_IDX else -1
|
|
102
|
+
feature_dicts.append({
|
|
103
|
+
"ip": ip,
|
|
104
|
+
"path_len": len(r["path"]),
|
|
105
|
+
"kw_hits": kw_hits,
|
|
106
|
+
"resp_time": r["response_time"],
|
|
107
|
+
"status_idx": status_idx,
|
|
108
|
+
"burst_count": burst,
|
|
109
|
+
"total_404": total404,
|
|
110
|
+
})
|
|
111
|
+
|
|
112
|
+
if not feature_dicts:
|
|
113
|
+
print("⚠️ Nothing to train on – no valid log entries.")
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
df = pd.DataFrame(feature_dicts)
|
|
117
|
+
feature_cols = [c for c in df.columns if c != "ip"]
|
|
118
|
+
X = df[feature_cols].astype(float).values
|
|
119
|
+
model = IsolationForest(contamination=0.01, random_state=42)
|
|
120
|
+
model.fit(X)
|
|
121
|
+
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
|
|
122
|
+
joblib.dump(model, MODEL_PATH)
|
|
123
|
+
print(f"✅ Model trained on {len(X)} samples → {MODEL_PATH}")
|
|
124
|
+
preds = model.predict(X) # -1 for outliers
|
|
125
|
+
anomalous_ips = set(df.loc[preds == -1, 'ip'])
|
|
126
|
+
blocked_anom = []
|
|
127
|
+
for ip in anomalous_ips:
|
|
128
|
+
obj, created = BlacklistEntry.objects.get_or_create(
|
|
129
|
+
ip_address=ip,
|
|
130
|
+
defaults={"reason": "Anomalous behavior"}
|
|
131
|
+
)
|
|
132
|
+
if created:
|
|
133
|
+
blocked_anom.append(ip)
|
|
134
|
+
if blocked_anom:
|
|
135
|
+
print(f" Blocked {len(blocked_anom)} anomalous IPs: {blocked_anom}")
|
|
136
|
+
|
|
137
|
+
tokens = Counter()
|
|
138
|
+
for r in parsed:
|
|
139
|
+
if r["status"].startswith(("4", "5")):
|
|
140
|
+
for seg in re.split(r"\W+", r["path"].lower()):
|
|
141
|
+
if len(seg) > 3 and seg not in STATIC_KW:
|
|
142
|
+
tokens[seg] += 1
|
|
143
|
+
top_tokens = tokens.most_common(10)
|
|
144
|
+
for kw, cnt in top_tokens:
|
|
145
|
+
obj, _ = DynamicKeyword.objects.get_or_create(keyword=kw)
|
|
146
|
+
DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + cnt)
|
|
147
|
+
print(f"DynamicKeyword DB updated with top tokens: {[kw for kw, _ in top_tokens]}")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
if __name__ == "__main__":
|
|
151
|
+
train()
|
|
@@ -1,4 +1,21 @@
|
|
|
1
|
-
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: aiwaf
|
|
3
|
+
Version: 0.1.6
|
|
4
|
+
Summary: AI-powered Web Application Firewall
|
|
5
|
+
Home-page: https://github.com/aayushgauba/aiwaf
|
|
6
|
+
Author: Aayush Gauba
|
|
7
|
+
Author-email: Aayush Gauba <gauba.aayush@gmail.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Dynamic: author
|
|
13
|
+
Dynamic: home-page
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
Dynamic: requires-python
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# AI‑WAF
|
|
2
19
|
|
|
3
20
|
> A self‑learning, Django‑friendly Web Application Firewall
|
|
4
21
|
> with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
|
|
@@ -80,6 +97,14 @@ pip install -e .
|
|
|
80
97
|
```python
|
|
81
98
|
INSTALLED_APPS += ["aiwaf"]
|
|
82
99
|
|
|
100
|
+
### Database Setup
|
|
101
|
+
|
|
102
|
+
After adding `aiwaf` to your `INSTALLED_APPS`, create the necessary tables for the IP‐blacklist and dynamic‐keyword models:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
python manage.py makemigrations aiwaf
|
|
106
|
+
python manage.py migrate
|
|
107
|
+
|
|
83
108
|
# Required
|
|
84
109
|
AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
|
|
85
110
|
|
|
@@ -103,7 +128,7 @@ Add in **this** order to your `MIDDLEWARE` list:
|
|
|
103
128
|
|
|
104
129
|
```python
|
|
105
130
|
MIDDLEWARE = [
|
|
106
|
-
"aiwaf.middleware.
|
|
131
|
+
"aiwaf.middleware.IPAndKeywordBlockMiddleware",
|
|
107
132
|
"aiwaf.middleware.RateLimitMiddleware",
|
|
108
133
|
"aiwaf.middleware.AIAnomalyMiddleware",
|
|
109
134
|
"aiwaf.middleware.HoneypotMiddleware",
|
|
@@ -148,14 +173,13 @@ python manage.py detect_and_train
|
|
|
148
173
|
|
|
149
174
|
## How It Works
|
|
150
175
|
|
|
151
|
-
| Middleware
|
|
152
|
-
|
|
153
|
-
|
|
|
154
|
-
| RateLimitMiddleware
|
|
155
|
-
| AIAnomalyMiddleware
|
|
156
|
-
| HoneypotMiddleware
|
|
157
|
-
| UUIDTamperMiddleware
|
|
158
|
-
|
|
176
|
+
| Middleware | Purpose |
|
|
177
|
+
|------------------------------------|-----------------------------------------------------------------|
|
|
178
|
+
| IPAndKeywordBlockMiddleware | Blocks requests from known blacklisted IPs and Keywords |
|
|
179
|
+
| RateLimitMiddleware | Enforces burst & flood thresholds |
|
|
180
|
+
| AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
|
|
181
|
+
| HoneypotMiddleware | Detects bots filling hidden inputs in forms |
|
|
182
|
+
| UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
|
|
159
183
|
---
|
|
160
184
|
|
|
161
185
|
## License
|
|
@@ -167,4 +191,4 @@ This project is licensed under the **MIT License**. See the [LICENSE](LICENSE) f
|
|
|
167
191
|
## Credits
|
|
168
192
|
|
|
169
193
|
**AI‑WAF** by [Aayush Gauba](https://github.com/aayushgauba)
|
|
170
|
-
> “Let your firewall learn and evolve — keep your site a fortress.”
|
|
194
|
+
> “Let your firewall learn and evolve — keep your site a fortress.”
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "aiwaf"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.6"
|
|
4
4
|
description = "AI-powered Web Application Firewall"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.8"
|
|
7
7
|
license = {text = "MIT"}
|
|
8
8
|
authors = [{ name = "Aayush Gauba", email = "gauba.aayush@gmail.com" }]
|
|
9
|
-
dependencies = [ ]
|
|
9
|
+
dependencies = [ ]
|
aiwaf-0.1.6/setup.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# setup.py
|
|
2
|
+
from setuptools import setup, find_packages
|
|
3
|
+
import pathlib
|
|
4
|
+
|
|
5
|
+
HERE = pathlib.Path(__file__).parent
|
|
6
|
+
|
|
7
|
+
# read the long description from your README
|
|
8
|
+
long_description = (HERE / "README.md").read_text(encoding="utf-8")
|
|
9
|
+
|
|
10
|
+
setup(
|
|
11
|
+
name="aiwaf",
|
|
12
|
+
version="0.1.6",
|
|
13
|
+
description="AI‑driven, self‑learning Web Application Firewall for Django",
|
|
14
|
+
long_description=long_description,
|
|
15
|
+
long_description_content_type="text/markdown",
|
|
16
|
+
author="Aayush Gauba",
|
|
17
|
+
url="https://github.com/aayushgauba/aiwaf",
|
|
18
|
+
license="MIT",
|
|
19
|
+
packages=find_packages(exclude=["tests*", "docs*"]),
|
|
20
|
+
python_requires=">=3.8",
|
|
21
|
+
install_requires=[
|
|
22
|
+
"Django>=3.2",
|
|
23
|
+
"numpy>=1.21",
|
|
24
|
+
"pandas>=1.3",
|
|
25
|
+
"scikit-learn>=1.0",
|
|
26
|
+
"joblib>=1.1",
|
|
27
|
+
],
|
|
28
|
+
include_package_data=True,
|
|
29
|
+
package_data={
|
|
30
|
+
# include your pretrained model and any JSON resources
|
|
31
|
+
"aiwaf": ["resources/*.pkl", "resources/*.json"]
|
|
32
|
+
},
|
|
33
|
+
entry_points={
|
|
34
|
+
"console_scripts": [
|
|
35
|
+
"aiwaf-detect=aiwaf.trainer:train",
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
classifiers=[
|
|
39
|
+
"Framework :: Django",
|
|
40
|
+
"Programming Language :: Python :: 3",
|
|
41
|
+
"License :: MIT License",
|
|
42
|
+
],
|
|
43
|
+
)
|
aiwaf-0.1.3/aiwaf/trainer.py
DELETED
|
@@ -1,175 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import glob
|
|
3
|
-
import gzip
|
|
4
|
-
import re
|
|
5
|
-
import json
|
|
6
|
-
import joblib
|
|
7
|
-
|
|
8
|
-
from datetime import datetime
|
|
9
|
-
from collections import defaultdict, Counter
|
|
10
|
-
|
|
11
|
-
import pandas as pd
|
|
12
|
-
from sklearn.ensemble import IsolationForest
|
|
13
|
-
|
|
14
|
-
from django.conf import settings
|
|
15
|
-
from django.apps import apps
|
|
16
|
-
|
|
17
|
-
# ─── CONFIG ────────────────────────────────────────────────────────────────
|
|
18
|
-
|
|
19
|
-
# Where to read your access logs (and rotated/.gz siblings)
|
|
20
|
-
LOG_PATH = settings.AIWAF_ACCESS_LOG
|
|
21
|
-
|
|
22
|
-
# Where we save our trained model
|
|
23
|
-
MODEL_PATH = os.path.join(
|
|
24
|
-
os.path.dirname(__file__),
|
|
25
|
-
"resources",
|
|
26
|
-
"model.pkl"
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
# Static “malicious” path keywords & file extensions
|
|
30
|
-
MALICIOUS_KEYWORDS = [
|
|
31
|
-
".php", "xmlrpc", "wp-", ".env", ".git", ".bak",
|
|
32
|
-
"conflg", "shell", "filemanager"
|
|
33
|
-
]
|
|
34
|
-
STATUS_CODES = ["200", "403", "404", "500"]
|
|
35
|
-
|
|
36
|
-
# Regex for combined log with response-time=…
|
|
37
|
-
_LOG_RX = re.compile(
|
|
38
|
-
r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" '
|
|
39
|
-
r'(\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
# Your Django model for storing blocked IPs
|
|
43
|
-
BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
# ─── READ & PARSE LOG LINES ─────────────────────────────────────────────────
|
|
47
|
-
|
|
48
|
-
def _read_all_logs():
|
|
49
|
-
lines = []
|
|
50
|
-
if LOG_PATH and os.path.exists(LOG_PATH):
|
|
51
|
-
with open(LOG_PATH, "r", errors="ignore") as f:
|
|
52
|
-
lines += f.readlines()
|
|
53
|
-
for path in sorted(glob.glob(LOG_PATH + ".*")):
|
|
54
|
-
opener = gzip.open if path.endswith(".gz") else open
|
|
55
|
-
try:
|
|
56
|
-
with opener(path, "rt", errors="ignore") as f:
|
|
57
|
-
lines += f.readlines()
|
|
58
|
-
except OSError:
|
|
59
|
-
continue
|
|
60
|
-
return lines
|
|
61
|
-
|
|
62
|
-
def _parse(line):
|
|
63
|
-
m = _LOG_RX.search(line)
|
|
64
|
-
if not m:
|
|
65
|
-
return None
|
|
66
|
-
ip, ts_str, path, status, ref, ua, rt = m.groups()
|
|
67
|
-
try:
|
|
68
|
-
ts = datetime.strptime(ts_str.split()[0], "%d/%b/%Y:%H:%M:%S")
|
|
69
|
-
except ValueError:
|
|
70
|
-
return None
|
|
71
|
-
return {
|
|
72
|
-
"ip": ip,
|
|
73
|
-
"timestamp": ts,
|
|
74
|
-
"path": path,
|
|
75
|
-
"status": status,
|
|
76
|
-
"ua": ua,
|
|
77
|
-
"response_time": float(rt),
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
# ─── TRAIN ENTRYPOINT ───────────────────────────────────────────────────────
|
|
82
|
-
|
|
83
|
-
def train():
|
|
84
|
-
raw = _read_all_logs()
|
|
85
|
-
if not raw:
|
|
86
|
-
print("❌ No log lines found – check settings.AIWAF_ACCESS_LOG")
|
|
87
|
-
return
|
|
88
|
-
|
|
89
|
-
parsed = []
|
|
90
|
-
ip_404 = defaultdict(int)
|
|
91
|
-
ip_times = defaultdict(list)
|
|
92
|
-
|
|
93
|
-
# parse + accumulate timestamps & 404 counts
|
|
94
|
-
for ln in raw:
|
|
95
|
-
rec = _parse(ln)
|
|
96
|
-
if not rec:
|
|
97
|
-
continue
|
|
98
|
-
parsed.append(rec)
|
|
99
|
-
ip_times[rec["ip"]].append(rec["timestamp"])
|
|
100
|
-
if rec["status"] == "404":
|
|
101
|
-
ip_404[rec["ip"]] += 1
|
|
102
|
-
|
|
103
|
-
# auto-block IPs with >=6 total 404s
|
|
104
|
-
newly_blocked = []
|
|
105
|
-
for ip, cnt in ip_404.items():
|
|
106
|
-
if cnt >= 6:
|
|
107
|
-
obj, created = BlacklistEntry.objects.get_or_create(
|
|
108
|
-
ip_address=ip,
|
|
109
|
-
defaults={"reason": "Excessive 404s (≥6)"}
|
|
110
|
-
)
|
|
111
|
-
if created:
|
|
112
|
-
newly_blocked.append(ip)
|
|
113
|
-
if newly_blocked:
|
|
114
|
-
print(f"🔒 Blocked {len(newly_blocked)} IPs for 404 flood: {newly_blocked}")
|
|
115
|
-
|
|
116
|
-
# build feature vectors
|
|
117
|
-
rows = []
|
|
118
|
-
for r in parsed:
|
|
119
|
-
ip = r["ip"]
|
|
120
|
-
burst = sum(
|
|
121
|
-
1 for t in ip_times[ip]
|
|
122
|
-
if (r["timestamp"] - t).total_seconds() <= 10
|
|
123
|
-
)
|
|
124
|
-
total404 = ip_404[ip]
|
|
125
|
-
kw_hits = sum(k in r["path"].lower() for k in MALICIOUS_KEYWORDS)
|
|
126
|
-
status_idx = STATUS_CODES.index(r["status"]) if r["status"] in STATUS_CODES else -1
|
|
127
|
-
|
|
128
|
-
rows.append([
|
|
129
|
-
len(r["path"]),
|
|
130
|
-
kw_hits,
|
|
131
|
-
r["response_time"],
|
|
132
|
-
status_idx,
|
|
133
|
-
burst,
|
|
134
|
-
total404
|
|
135
|
-
])
|
|
136
|
-
|
|
137
|
-
if not rows:
|
|
138
|
-
print("⚠️ No entries to train on.")
|
|
139
|
-
return
|
|
140
|
-
|
|
141
|
-
df = pd.DataFrame(
|
|
142
|
-
rows,
|
|
143
|
-
columns=[
|
|
144
|
-
"path_len", "kw_hits", "resp_time",
|
|
145
|
-
"status_idx", "burst_count", "total_404"
|
|
146
|
-
]
|
|
147
|
-
).fillna(0).astype(float)
|
|
148
|
-
|
|
149
|
-
# train & save
|
|
150
|
-
clf = IsolationForest(contamination=0.01, random_state=42)
|
|
151
|
-
clf.fit(df.values)
|
|
152
|
-
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
|
|
153
|
-
joblib.dump(clf, MODEL_PATH)
|
|
154
|
-
print(f"✅ Model trained on {len(df)} samples → {MODEL_PATH}")
|
|
155
|
-
|
|
156
|
-
# extract top‑10 dynamic keywords from 4xx/5xx paths
|
|
157
|
-
tokens = Counter()
|
|
158
|
-
for r in parsed:
|
|
159
|
-
if r["status"].startswith(("4", "5")):
|
|
160
|
-
segs = re.split(r"\W+", r["path"].lower())
|
|
161
|
-
for seg in segs:
|
|
162
|
-
if len(seg) > 3 and seg not in MALICIOUS_KEYWORDS:
|
|
163
|
-
tokens[seg] += 1
|
|
164
|
-
|
|
165
|
-
new_kw = [kw for kw, _ in tokens.most_common(10)]
|
|
166
|
-
DK_FILE = os.path.join(os.path.dirname(__file__), "resources", "dynamic_keywords.json")
|
|
167
|
-
try:
|
|
168
|
-
existing = set(json.load(open(DK_FILE)))
|
|
169
|
-
except FileNotFoundError:
|
|
170
|
-
existing = set()
|
|
171
|
-
updated = sorted(existing | set(new_kw))
|
|
172
|
-
with open(DK_FILE, "w") as f:
|
|
173
|
-
json.dump(updated, f, indent=2)
|
|
174
|
-
|
|
175
|
-
print(f"📝 Updated dynamic keywords: {new_kw}")
|
aiwaf-0.1.3/setup.py
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
from setuptools import setup, find_packages
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
this_directory = Path(__file__).parent
|
|
5
|
-
long_description = (this_directory / "README.md").read_text(encoding="utf-8")
|
|
6
|
-
|
|
7
|
-
setup(
|
|
8
|
-
name="aiwaf",
|
|
9
|
-
version="0.1.3",
|
|
10
|
-
description="AI‑driven pluggable Web Application Firewall for Django (CSV or DB storage)",
|
|
11
|
-
long_description=long_description,
|
|
12
|
-
long_description_content_type="text/markdown", # <- required for markdown support
|
|
13
|
-
author="Aayush Gauba",
|
|
14
|
-
packages=find_packages(),
|
|
15
|
-
package_data={
|
|
16
|
-
"aiwaf": ["resources/*.pkl"],
|
|
17
|
-
},
|
|
18
|
-
include_package_data=True,
|
|
19
|
-
install_requires=[
|
|
20
|
-
"django>=3.0",
|
|
21
|
-
"scikit-learn",
|
|
22
|
-
"numpy",
|
|
23
|
-
"pandas",
|
|
24
|
-
"joblib",
|
|
25
|
-
],
|
|
26
|
-
entry_points={
|
|
27
|
-
"console_scripts": [
|
|
28
|
-
"aiwaf-detect=aiwaf.trainer:detect_and_train",
|
|
29
|
-
]
|
|
30
|
-
},
|
|
31
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|