aiwaf 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiwaf might be problematic. Click here for more details.
- aiwaf-0.1.0/PKG-INFO +13 -0
- aiwaf-0.1.0/README.md +176 -0
- aiwaf-0.1.0/aiwaf/__init__.py +1 -0
- aiwaf-0.1.0/aiwaf/apps.py +5 -0
- aiwaf-0.1.0/aiwaf/blacklist_manager.py +14 -0
- aiwaf-0.1.0/aiwaf/management/__init__.py +0 -0
- aiwaf-0.1.0/aiwaf/management/commands/__init__.py +0 -0
- aiwaf-0.1.0/aiwaf/management/commands/detect_and_train.py +10 -0
- aiwaf-0.1.0/aiwaf/middleware.py +115 -0
- aiwaf-0.1.0/aiwaf/models.py +28 -0
- aiwaf-0.1.0/aiwaf/resources/model.pkl +0 -0
- aiwaf-0.1.0/aiwaf/storage.py +61 -0
- aiwaf-0.1.0/aiwaf/template_tags/__init__.py +0 -0
- aiwaf-0.1.0/aiwaf/template_tags/aiwaf_tags.py +14 -0
- aiwaf-0.1.0/aiwaf/trainer.py +123 -0
- aiwaf-0.1.0/aiwaf/utils.py +50 -0
- aiwaf-0.1.0/aiwaf.egg-info/PKG-INFO +13 -0
- aiwaf-0.1.0/aiwaf.egg-info/SOURCES.txt +22 -0
- aiwaf-0.1.0/aiwaf.egg-info/dependency_links.txt +1 -0
- aiwaf-0.1.0/aiwaf.egg-info/entry_points.txt +2 -0
- aiwaf-0.1.0/aiwaf.egg-info/requires.txt +5 -0
- aiwaf-0.1.0/aiwaf.egg-info/top_level.txt +1 -0
- aiwaf-0.1.0/setup.cfg +4 -0
- aiwaf-0.1.0/setup.py +25 -0
aiwaf-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: aiwaf
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI‑driven pluggable Web Application Firewall for Django (CSV or DB storage)
|
|
5
|
+
Author: Aayush Gauba
|
|
6
|
+
Requires-Dist: django>=3.0
|
|
7
|
+
Requires-Dist: scikit-learn
|
|
8
|
+
Requires-Dist: numpy
|
|
9
|
+
Requires-Dist: pandas
|
|
10
|
+
Requires-Dist: joblib
|
|
11
|
+
Dynamic: author
|
|
12
|
+
Dynamic: requires-dist
|
|
13
|
+
Dynamic: summary
|
aiwaf-0.1.0/README.md
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# AI‑WAF
|
|
2
|
+
|
|
3
|
+
> A self-learning, Django-friendly Web Application Firewall
|
|
4
|
+
> with rate-limiting, anomaly detection, honeypots, UUID-tamper protection, and daily retraining.
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Package Structure
|
|
9
|
+
|
|
10
|
+
```
|
|
11
|
+
aiwaf/
|
|
12
|
+
├── __init__.py
|
|
13
|
+
├── blacklist_manager.py
|
|
14
|
+
├── middleware.py
|
|
15
|
+
├── trainer.py # exposes detect_and_train()
|
|
16
|
+
├── utils.py
|
|
17
|
+
├── template_tags/
|
|
18
|
+
│ └── aiwaf_tags.py
|
|
19
|
+
├── resources/
|
|
20
|
+
│ └── model.pkl # pre-trained base model
|
|
21
|
+
├── management/
|
|
22
|
+
│ └── commands/
|
|
23
|
+
│ └── detect_and_train.py # python manage.py detect_and_train
|
|
24
|
+
└── LICENSE
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Features
|
|
30
|
+
|
|
31
|
+
- **IP Blocklist**
|
|
32
|
+
Automatically blocks suspicious IPs; optionally backed by CSV or Django model.
|
|
33
|
+
|
|
34
|
+
- **Rate Limiting**
|
|
35
|
+
Sliding window logic blocks IPs exceeding a threshold of requests per second.
|
|
36
|
+
|
|
37
|
+
- **AI Anomaly Detection**
|
|
38
|
+
IsolationForest trained on real logs with features like:
|
|
39
|
+
- Path length
|
|
40
|
+
- Keyword hits
|
|
41
|
+
- Response time
|
|
42
|
+
- Status code index
|
|
43
|
+
- Burst count
|
|
44
|
+
- Total 404s
|
|
45
|
+
|
|
46
|
+
- **Honeypot Field**
|
|
47
|
+
Hidden form field that bots are likely to fill — if triggered, the IP is blocked.
|
|
48
|
+
|
|
49
|
+
- **UUID Tampering Protection**
|
|
50
|
+
Detects if someone is probing by injecting random/nonexistent UUIDs into URLs.
|
|
51
|
+
|
|
52
|
+
- **Daily Retraining**
|
|
53
|
+
A single command retrains your model every day based on your logs.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
Install locally or from PyPI:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
pip install aiwaf
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Or for local dev:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
git clone https://github.com/aayushgauba/aiwaf.git
|
|
69
|
+
cd aiwaf
|
|
70
|
+
pip install -e .
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## ⚙️ Configuration (`settings.py`)
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
INSTALLED_APPS += [
|
|
79
|
+
"aiwaf",
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
# Required
|
|
83
|
+
AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
|
|
84
|
+
|
|
85
|
+
# Optional (defaults included)
|
|
86
|
+
AIWAF_MODEL_PATH = BASE_DIR / "aiwaf" / "resources" / "model.pkl"
|
|
87
|
+
AIWAF_MALICIOUS_KEYWORDS = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
|
|
88
|
+
AIWAF_STATUS_CODES = ["200", "403", "404", "500"]
|
|
89
|
+
AIWAF_HONEYPOT_FIELD = "hp_field"
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Middleware Setup
|
|
95
|
+
|
|
96
|
+
Add to `MIDDLEWARE` in order:
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
MIDDLEWARE = [
|
|
100
|
+
"aiwaf.middleware.IPBlockMiddleware",
|
|
101
|
+
"aiwaf.middleware.RateLimitMiddleware",
|
|
102
|
+
"aiwaf.middleware.AIAnomalyMiddleware",
|
|
103
|
+
"aiwaf.middleware.HoneypotMiddleware",
|
|
104
|
+
"aiwaf.middleware.UUIDTamperMiddleware",
|
|
105
|
+
...
|
|
106
|
+
]
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## Honeypot Field (in template)
|
|
112
|
+
|
|
113
|
+
```html
|
|
114
|
+
{% load aiwaf_tags %}
|
|
115
|
+
|
|
116
|
+
<form method="post">
|
|
117
|
+
{% csrf_token %}
|
|
118
|
+
{% honeypot_field %}
|
|
119
|
+
<!-- other fields -->
|
|
120
|
+
</form>
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
The hidden field will be `<input type="hidden" name="hp_field">`.
|
|
124
|
+
If it’s ever filled → IP gets blocked.
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## Run Detection + Training
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
python manage.py detect_and_train
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
What it does:
|
|
135
|
+
|
|
136
|
+
- Reads logs (supports `.gz` and rotated logs).
|
|
137
|
+
- Detects excessive 404s (≥6) → instant block.
|
|
138
|
+
- Builds feature vectors from logs.
|
|
139
|
+
- Trains IsolationForest and saves `model.pkl`.
|
|
140
|
+
|
|
141
|
+
Schedule it to run daily via `cron`, `Celery beat`, or systemd timer.
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## How It Works (Simplified)
|
|
146
|
+
|
|
147
|
+
| Middleware | Functionality |
|
|
148
|
+
|------------------------|--------------------------------------------------------------|
|
|
149
|
+
| IPBlockMiddleware | Blocks requests from known blacklisted IPs |
|
|
150
|
+
| RateLimitMiddleware | Blocks flooders (>20/10s) and blacklists them (>10/10s) |
|
|
151
|
+
| AIAnomalyMiddleware | Uses ML to detect suspicious behavior in request patterns |
|
|
152
|
+
| HoneypotMiddleware | Detects bots filling hidden inputs in forms |
|
|
153
|
+
| UUIDTamperMiddleware | Detects guessing/probing by checking invalid UUID access |
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Development Roadmap
|
|
158
|
+
|
|
159
|
+
- [ ] Add CSV blocklist fallback
|
|
160
|
+
- [ ] Admin dashboard integration
|
|
161
|
+
- [ ] Auto-pruning of old block entries
|
|
162
|
+
- [ ] Real-time log streaming compatibility
|
|
163
|
+
- [ ] Docker/Helm deployment guide
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## License
|
|
168
|
+
|
|
169
|
+
This project is licensed under the **MIT License** — see `LICENSE` for details.
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## Credits
|
|
174
|
+
|
|
175
|
+
**AIWAF** by [Aayush Gauba](https://github.com/aayushgauba)
|
|
176
|
+
> "Let your firewall learn and evolve with your logs. Make your site a fortress."
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
default_app_config = "aiwaf.apps.AiwafConfig"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from .models import BlacklistEntry
|
|
2
|
+
|
|
3
|
+
class BlacklistManager:
|
|
4
|
+
@staticmethod
|
|
5
|
+
def block(ip, reason):
|
|
6
|
+
BlacklistEntry.objects.get_or_create(ip_address=ip, defaults={"reason": reason})
|
|
7
|
+
|
|
8
|
+
@staticmethod
|
|
9
|
+
def is_blocked(ip):
|
|
10
|
+
return BlacklistEntry.objects.filter(ip_address=ip).exists()
|
|
11
|
+
|
|
12
|
+
@staticmethod
|
|
13
|
+
def all_blocked():
|
|
14
|
+
return BlacklistEntry.objects.all()
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import numpy as np
|
|
3
|
+
import joblib
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from django.utils.deprecation import MiddlewareMixin
|
|
6
|
+
from django.http import JsonResponse
|
|
7
|
+
from django.conf import settings
|
|
8
|
+
from django.core.cache import cache
|
|
9
|
+
from django.urls import resolve
|
|
10
|
+
from django.apps import apps
|
|
11
|
+
from .blacklist_manager import BlacklistManager
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
MODEL_PATH = settings.AIWAF_MODEL_PATH
|
|
15
|
+
except AttributeError:
|
|
16
|
+
import importlib.resources
|
|
17
|
+
MODEL_PATH = importlib.resources.files("aiwaf").joinpath("resources/model.pkl")
|
|
18
|
+
|
|
19
|
+
MODEL = joblib.load(MODEL_PATH)
|
|
20
|
+
|
|
21
|
+
def get_ip(request):
|
|
22
|
+
xff = request.META.get("HTTP_X_FORWARDED_FOR")
|
|
23
|
+
if xff:
|
|
24
|
+
return xff.split(",")[0].strip()
|
|
25
|
+
return request.META.get("REMOTE_ADDR", "")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class IPBlockMiddleware:
|
|
29
|
+
def __init__(self, get_response):
|
|
30
|
+
self.get_response = get_response
|
|
31
|
+
|
|
32
|
+
def __call__(self, request):
|
|
33
|
+
ip = get_ip(request)
|
|
34
|
+
if BlacklistManager.is_blocked(ip):
|
|
35
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
|
36
|
+
return self.get_response(request)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class RateLimitMiddleware:
|
|
40
|
+
WINDOW = getattr(settings, "AIWAF_RATE_WINDOW", 10)
|
|
41
|
+
MAX = getattr(settings, "AIWAF_RATE_MAX", 20)
|
|
42
|
+
FLOOD = getattr(settings, "AIWAF_RATE_FLOOD", 10)
|
|
43
|
+
def __init__(self, get_response):
|
|
44
|
+
self.get_response = get_response
|
|
45
|
+
self.logs = defaultdict(list)
|
|
46
|
+
def __call__(self, request):
|
|
47
|
+
ip = get_ip(request)
|
|
48
|
+
now = time.time()
|
|
49
|
+
recs = [t for t in self.logs[ip] if now - t < self.WINDOW]
|
|
50
|
+
recs.append(now)
|
|
51
|
+
self.logs[ip] = recs
|
|
52
|
+
if len(recs) > self.MAX:
|
|
53
|
+
return JsonResponse({"error": "too_many_requests"}, status=429)
|
|
54
|
+
if len(recs) > self.FLOOD:
|
|
55
|
+
BlacklistManager.block(ip, "Flood pattern")
|
|
56
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
|
57
|
+
|
|
58
|
+
return self.get_response(request)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class AIAnomalyMiddleware(MiddlewareMixin):
|
|
62
|
+
WINDOW_SECONDS = getattr(settings, "AIWAF_WINDOW_SECONDS", 60)
|
|
63
|
+
def process_request(self, request):
|
|
64
|
+
ip = get_ip(request)
|
|
65
|
+
if BlacklistManager.is_blocked(ip):
|
|
66
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
|
67
|
+
now = time.time()
|
|
68
|
+
key = f"aiwaf:{ip}"
|
|
69
|
+
data = cache.get(key, [])
|
|
70
|
+
data.append((now, request.path, 0, 0.0))
|
|
71
|
+
data = [d for d in data if now - d[0] < self.WINDOW_SECONDS]
|
|
72
|
+
cache.set(key, data, timeout=self.WINDOW_SECONDS)
|
|
73
|
+
if len(data) < 5:
|
|
74
|
+
return None
|
|
75
|
+
total = len(data)
|
|
76
|
+
ratio_404 = sum(1 for (_, _, st, _) in data if st == 404) / total
|
|
77
|
+
hits = sum(
|
|
78
|
+
any(k in path.lower() for k in settings.AIWAF_MALICIOUS_KEYWORDS)
|
|
79
|
+
for (_, path, _, _) in data
|
|
80
|
+
)
|
|
81
|
+
avg_rt = np.mean([rt for (_, _, _, rt) in data]) if data else 0.0
|
|
82
|
+
intervals = [
|
|
83
|
+
data[i][0] - data[i-1][0] for i in range(1, total)
|
|
84
|
+
]
|
|
85
|
+
avg_iv = np.mean(intervals) if intervals else 0.0
|
|
86
|
+
X = np.array([[total, ratio_404, hits, avg_rt, avg_iv]], dtype=float)
|
|
87
|
+
if MODEL.predict(X)[0] == -1:
|
|
88
|
+
BlacklistManager.block(ip, "AI anomaly")
|
|
89
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class HoneypotMiddleware(MiddlewareMixin):
|
|
94
|
+
def process_view(self, request, view_func, view_args, view_kwargs):
|
|
95
|
+
trap = request.POST.get(settings.AIWAF_HONEYPOT_FIELD, "")
|
|
96
|
+
if trap:
|
|
97
|
+
ip = get_ip(request)
|
|
98
|
+
BlacklistManager.block(ip, "HONEYPOT triggered")
|
|
99
|
+
return JsonResponse({"error": "bot_detected"}, status=403)
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class UUIDTamperMiddleware(MiddlewareMixin):
|
|
104
|
+
def process_view(self, request, view_func, view_args, view_kwargs):
|
|
105
|
+
uid = view_kwargs.get("uuid")
|
|
106
|
+
if not uid:
|
|
107
|
+
return None
|
|
108
|
+
ip = get_ip(request)
|
|
109
|
+
app_label = view_kwargs.get("app_label") or view_func.__module__.split('.')[0]
|
|
110
|
+
app_config = apps.get_app_config(app_label)
|
|
111
|
+
for Model in app_config.get_models():
|
|
112
|
+
if Model.objects.filter(pk=uid).exists():
|
|
113
|
+
return None
|
|
114
|
+
BlacklistManager.block(ip, "UUID tampering")
|
|
115
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from django.db import models
|
|
2
|
+
|
|
3
|
+
class FeatureSample(models.Model):
|
|
4
|
+
ip = models.GenericIPAddressField(db_index=True)
|
|
5
|
+
path_len = models.IntegerField()
|
|
6
|
+
kw_hits = models.IntegerField()
|
|
7
|
+
resp_time = models.FloatField()
|
|
8
|
+
status_idx = models.IntegerField()
|
|
9
|
+
burst_count = models.IntegerField()
|
|
10
|
+
total_404 = models.IntegerField()
|
|
11
|
+
label = models.CharField(max_length=20, default="unlabeled")
|
|
12
|
+
created_at = models.DateTimeField(auto_now_add=True)
|
|
13
|
+
|
|
14
|
+
class Meta:
|
|
15
|
+
verbose_name = "WAF Feature Sample"
|
|
16
|
+
verbose_name_plural = "WAF Feature Samples"
|
|
17
|
+
indexes = [
|
|
18
|
+
models.Index(fields=["ip"]),
|
|
19
|
+
models.Index(fields=["created_at"]),
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
class BlacklistEntry(models.Model):
|
|
23
|
+
ip_address = models.GenericIPAddressField(unique=True, db_index=True)
|
|
24
|
+
reason = models.CharField(max_length=100)
|
|
25
|
+
created_at = models.DateTimeField(auto_now_add=True)
|
|
26
|
+
|
|
27
|
+
def __str__(self):
|
|
28
|
+
return f"{self.ip_address} ({self.reason})"
|
|
Binary file
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import os, csv, gzip, glob
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from django.conf import settings
|
|
5
|
+
from .models import FeatureSample
|
|
6
|
+
|
|
7
|
+
DATA_FILE = getattr(settings, "AIWAF_CSV_PATH", "access_samples.csv")
|
|
8
|
+
CSV_HEADER = [
|
|
9
|
+
"ip","path_len","kw_hits","resp_time",
|
|
10
|
+
"status_idx","burst_count","total_404","label"
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
class CsvFeatureStore:
|
|
14
|
+
@staticmethod
|
|
15
|
+
def persist_rows(rows):
|
|
16
|
+
new_file = not os.path.exists(DATA_FILE)
|
|
17
|
+
with open(DATA_FILE, "a", newline="", encoding="utf-8") as f:
|
|
18
|
+
w = csv.writer(f)
|
|
19
|
+
if new_file:
|
|
20
|
+
w.writerow(CSV_HEADER)
|
|
21
|
+
w.writerows(rows)
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def load_matrix():
|
|
25
|
+
if not os.path.exists(DATA_FILE):
|
|
26
|
+
return np.empty((0,6))
|
|
27
|
+
df = pd.read_csv(
|
|
28
|
+
DATA_FILE,
|
|
29
|
+
names=CSV_HEADER,
|
|
30
|
+
skiprows=1,
|
|
31
|
+
engine="python",
|
|
32
|
+
on_bad_lines="skip"
|
|
33
|
+
)
|
|
34
|
+
feature_cols = CSV_HEADER[1:7]
|
|
35
|
+
df[feature_cols] = df[feature_cols].apply(pd.to_numeric, errors="coerce").fillna(0)
|
|
36
|
+
return df[feature_cols].to_numpy()
|
|
37
|
+
|
|
38
|
+
class DbFeatureStore:
|
|
39
|
+
@staticmethod
|
|
40
|
+
def persist_rows(rows):
|
|
41
|
+
objs = []
|
|
42
|
+
for ip,pl,kw,rt,si,bc,t404,label in rows:
|
|
43
|
+
objs.append(FeatureSample(
|
|
44
|
+
ip=ip, path_len=pl, kw_hits=kw,
|
|
45
|
+
resp_time=rt, status_idx=si,
|
|
46
|
+
burst_count=bc, total_404=t404,
|
|
47
|
+
label=label
|
|
48
|
+
))
|
|
49
|
+
FeatureSample.objects.bulk_create(objs, ignore_conflicts=True)
|
|
50
|
+
|
|
51
|
+
@staticmethod
|
|
52
|
+
def load_matrix():
|
|
53
|
+
qs = FeatureSample.objects.all().values_list(
|
|
54
|
+
"path_len","kw_hits","resp_time","status_idx","burst_count","total_404"
|
|
55
|
+
)
|
|
56
|
+
return np.array(list(qs), dtype=float)
|
|
57
|
+
|
|
58
|
+
def get_store():
|
|
59
|
+
if getattr(settings, "AIWAF_FEATURE_STORE", "csv") == "db":
|
|
60
|
+
return DbFeatureStore
|
|
61
|
+
return CsvFeatureStore
|
|
File without changes
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from django import template
|
|
2
|
+
from django.utils.html import format_html
|
|
3
|
+
from django.conf import settings
|
|
4
|
+
|
|
5
|
+
register = template.Library()
|
|
6
|
+
|
|
7
|
+
@register.simple_tag
|
|
8
|
+
def honeypot_field(field_name=None):
|
|
9
|
+
|
|
10
|
+
name = field_name or getattr(settings, "AIWAF_HONEYPOT_FIELD", "hp_field")
|
|
11
|
+
return format_html(
|
|
12
|
+
'<input type="text" name="{}" hidden autocomplete="off" tabindex="-1" />',
|
|
13
|
+
name
|
|
14
|
+
)
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# aiwaf/trainer.py
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import glob
|
|
5
|
+
import gzip
|
|
6
|
+
import re
|
|
7
|
+
import joblib
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
from .models import BlacklistEntry
|
|
11
|
+
import pandas as pd
|
|
12
|
+
from sklearn.ensemble import IsolationForest
|
|
13
|
+
from django.conf import settings
|
|
14
|
+
from django.apps import apps
|
|
15
|
+
|
|
16
|
+
LOG_PATH = settings.AIWAF_ACCESS_LOG
|
|
17
|
+
MODEL_PATH = os.path.join(
|
|
18
|
+
os.path.dirname(__file__),
|
|
19
|
+
"resources",
|
|
20
|
+
"model.pkl"
|
|
21
|
+
)
|
|
22
|
+
MALICIOUS_KEYWORDS = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
|
|
23
|
+
STATUS_CODES = ["200", "403", "404", "500"]
|
|
24
|
+
_LOG_RX = re.compile(
|
|
25
|
+
r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" (\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
|
|
26
|
+
)
|
|
27
|
+
BlacklistedIP = BlacklistEntry.objects.all()
|
|
28
|
+
def _read_all_logs():
|
|
29
|
+
lines = []
|
|
30
|
+
if LOG_PATH and os.path.exists(LOG_PATH):
|
|
31
|
+
with open(LOG_PATH, "r", errors="ignore") as f:
|
|
32
|
+
lines += f.readlines()
|
|
33
|
+
for path in sorted(glob.glob(LOG_PATH + ".*")):
|
|
34
|
+
opener = gzip.open if path.endswith(".gz") else open
|
|
35
|
+
try:
|
|
36
|
+
with opener(path, "rt", errors="ignore") as f:
|
|
37
|
+
lines += f.readlines()
|
|
38
|
+
except OSError:
|
|
39
|
+
continue
|
|
40
|
+
return lines
|
|
41
|
+
|
|
42
|
+
def _parse(line):
|
|
43
|
+
m = _LOG_RX.search(line)
|
|
44
|
+
if not m:
|
|
45
|
+
return None
|
|
46
|
+
ip, ts_str, path, status, ref, ua, rt = m.groups()
|
|
47
|
+
try:
|
|
48
|
+
ts = datetime.strptime(ts_str.split()[0], "%d/%b/%Y:%H:%M:%S")
|
|
49
|
+
except ValueError:
|
|
50
|
+
return None
|
|
51
|
+
return {
|
|
52
|
+
"ip": ip,
|
|
53
|
+
"timestamp": ts,
|
|
54
|
+
"path": path,
|
|
55
|
+
"status": status,
|
|
56
|
+
"ua": ua,
|
|
57
|
+
"response_time": float(rt),
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def train():
|
|
62
|
+
raw = _read_all_logs()
|
|
63
|
+
if not raw:
|
|
64
|
+
print("No log lines found – check AIWAF_ACCESS_LOG")
|
|
65
|
+
return
|
|
66
|
+
parsed = []
|
|
67
|
+
ip_404 = defaultdict(int)
|
|
68
|
+
ip_times = defaultdict(list)
|
|
69
|
+
for ln in raw:
|
|
70
|
+
rec = _parse(ln)
|
|
71
|
+
if not rec:
|
|
72
|
+
continue
|
|
73
|
+
parsed.append(rec)
|
|
74
|
+
ip_times[rec["ip"]].append(rec["timestamp"])
|
|
75
|
+
if rec["status"] == "404":
|
|
76
|
+
ip_404[rec["ip"]] += 1
|
|
77
|
+
blocked = []
|
|
78
|
+
for ip, count in ip_404.items():
|
|
79
|
+
if count >= 6:
|
|
80
|
+
obj, created = BlacklistEntry.objects.get_or_create(
|
|
81
|
+
ip_address=ip,
|
|
82
|
+
defaults={"reason": "Excessive 404s (≥6)"}
|
|
83
|
+
)
|
|
84
|
+
if created:
|
|
85
|
+
blocked.append(ip)
|
|
86
|
+
if blocked:
|
|
87
|
+
print(f"Auto‑blocked {len(blocked)} IPs for ≥6 404s: {', '.join(blocked)}")
|
|
88
|
+
rows = []
|
|
89
|
+
for r in parsed:
|
|
90
|
+
ip = r["ip"]
|
|
91
|
+
burst = sum(
|
|
92
|
+
1 for t in ip_times[ip]
|
|
93
|
+
if (r["timestamp"] - t).total_seconds() <= 10
|
|
94
|
+
)
|
|
95
|
+
total404 = ip_404[ip]
|
|
96
|
+
kw_hits = sum(k in r["path"].lower() for k in MALICIOUS_KEYWORDS)
|
|
97
|
+
status_idx = STATUS_CODES.index(r["status"]) if r["status"] in STATUS_CODES else -1
|
|
98
|
+
rows.append([
|
|
99
|
+
len(r["path"]),
|
|
100
|
+
kw_hits,
|
|
101
|
+
r["response_time"],
|
|
102
|
+
status_idx,
|
|
103
|
+
burst,
|
|
104
|
+
total404
|
|
105
|
+
])
|
|
106
|
+
|
|
107
|
+
if not rows:
|
|
108
|
+
print("No entries to train on!")
|
|
109
|
+
return
|
|
110
|
+
|
|
111
|
+
df = pd.DataFrame(
|
|
112
|
+
rows,
|
|
113
|
+
columns=[
|
|
114
|
+
"path_len", "kw_hits", "resp_time",
|
|
115
|
+
"status_idx", "burst_count", "total_404"
|
|
116
|
+
]
|
|
117
|
+
).fillna(0).astype(float)
|
|
118
|
+
clf = IsolationForest(contamination=0.01, random_state=42)
|
|
119
|
+
clf.fit(df.values)
|
|
120
|
+
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
|
|
121
|
+
joblib.dump(clf, MODEL_PATH)
|
|
122
|
+
print(f"Model trained on {len(df)} samples and saved to {MODEL_PATH}")
|
|
123
|
+
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import glob
|
|
4
|
+
import gzip
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
_LOG_RX = re.compile(
|
|
8
|
+
r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(GET|POST) (.*?) HTTP/.*?" (\d{3}).*?"(.*?)" "(.*?)"'
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
def get_ip(request):
|
|
12
|
+
xff = request.META.get("HTTP_X_FORWARDED_FOR", "")
|
|
13
|
+
if xff:
|
|
14
|
+
return xff.split(",")[0].strip()
|
|
15
|
+
return request.META.get("REMOTE_ADDR", "")
|
|
16
|
+
|
|
17
|
+
def read_rotated_logs(base_path):
|
|
18
|
+
lines = []
|
|
19
|
+
if os.path.exists(base_path):
|
|
20
|
+
with open(base_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
21
|
+
lines.extend(f.readlines())
|
|
22
|
+
for path in sorted(glob.glob(base_path + ".*")):
|
|
23
|
+
opener = gzip.open if path.endswith(".gz") else open
|
|
24
|
+
try:
|
|
25
|
+
with opener(path, "rt", encoding="utf-8", errors="ignore") as f:
|
|
26
|
+
lines.extend(f.readlines())
|
|
27
|
+
except OSError:
|
|
28
|
+
continue
|
|
29
|
+
return lines
|
|
30
|
+
|
|
31
|
+
def parse_log_line(line):
|
|
32
|
+
m = _LOG_RX.search(line)
|
|
33
|
+
if not m:
|
|
34
|
+
return None
|
|
35
|
+
ip, ts_str, _, path, status, ref, ua = m.groups()
|
|
36
|
+
try:
|
|
37
|
+
ts = datetime.strptime(ts_str.split()[0], "%d/%b/%Y:%H:%M:%S")
|
|
38
|
+
except ValueError:
|
|
39
|
+
return None
|
|
40
|
+
rt_m = re.search(r'response-time=(\d+\.\d+)', line)
|
|
41
|
+
rt = float(rt_m.group(1)) if rt_m else 0.0
|
|
42
|
+
return {
|
|
43
|
+
"ip": ip,
|
|
44
|
+
"timestamp": ts,
|
|
45
|
+
"path": path,
|
|
46
|
+
"status": status,
|
|
47
|
+
"referer": ref,
|
|
48
|
+
"user_agent": ua,
|
|
49
|
+
"response_time": rt
|
|
50
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: aiwaf
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI‑driven pluggable Web Application Firewall for Django (CSV or DB storage)
|
|
5
|
+
Author: Aayush Gauba
|
|
6
|
+
Requires-Dist: django>=3.0
|
|
7
|
+
Requires-Dist: scikit-learn
|
|
8
|
+
Requires-Dist: numpy
|
|
9
|
+
Requires-Dist: pandas
|
|
10
|
+
Requires-Dist: joblib
|
|
11
|
+
Dynamic: author
|
|
12
|
+
Dynamic: requires-dist
|
|
13
|
+
Dynamic: summary
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
setup.py
|
|
3
|
+
aiwaf/__init__.py
|
|
4
|
+
aiwaf/apps.py
|
|
5
|
+
aiwaf/blacklist_manager.py
|
|
6
|
+
aiwaf/middleware.py
|
|
7
|
+
aiwaf/models.py
|
|
8
|
+
aiwaf/storage.py
|
|
9
|
+
aiwaf/trainer.py
|
|
10
|
+
aiwaf/utils.py
|
|
11
|
+
aiwaf.egg-info/PKG-INFO
|
|
12
|
+
aiwaf.egg-info/SOURCES.txt
|
|
13
|
+
aiwaf.egg-info/dependency_links.txt
|
|
14
|
+
aiwaf.egg-info/entry_points.txt
|
|
15
|
+
aiwaf.egg-info/requires.txt
|
|
16
|
+
aiwaf.egg-info/top_level.txt
|
|
17
|
+
aiwaf/management/__init__.py
|
|
18
|
+
aiwaf/management/commands/__init__.py
|
|
19
|
+
aiwaf/management/commands/detect_and_train.py
|
|
20
|
+
aiwaf/resources/model.pkl
|
|
21
|
+
aiwaf/template_tags/__init__.py
|
|
22
|
+
aiwaf/template_tags/aiwaf_tags.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
aiwaf
|
aiwaf-0.1.0/setup.cfg
ADDED
aiwaf-0.1.0/setup.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="aiwaf",
|
|
5
|
+
version="0.1.0",
|
|
6
|
+
description="AI‑driven pluggable Web Application Firewall for Django (CSV or DB storage)",
|
|
7
|
+
author="Aayush Gauba",
|
|
8
|
+
packages=find_packages(),
|
|
9
|
+
package_data={
|
|
10
|
+
"aiwaf": ["resources/*.pkl"],
|
|
11
|
+
},
|
|
12
|
+
include_package_data=True,
|
|
13
|
+
install_requires=[
|
|
14
|
+
"django>=3.0",
|
|
15
|
+
"scikit-learn",
|
|
16
|
+
"numpy",
|
|
17
|
+
"pandas",
|
|
18
|
+
"joblib",
|
|
19
|
+
],
|
|
20
|
+
entry_points={
|
|
21
|
+
"console_scripts": [
|
|
22
|
+
"aiwaf-detect=aiwaf.trainer:detect_and_train",
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
)
|