aiwaf 0.1.7__py3-none-any.whl → 0.1.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiwaf might be problematic. Click here for more details.
- aiwaf/middleware.py +20 -1
- aiwaf/trainer.py +22 -4
- {aiwaf-0.1.7.dist-info → aiwaf-0.1.7.1.dist-info}/METADATA +55 -41
- {aiwaf-0.1.7.dist-info → aiwaf-0.1.7.1.dist-info}/RECORD +7 -7
- {aiwaf-0.1.7.dist-info → aiwaf-0.1.7.1.dist-info}/WHEEL +0 -0
- {aiwaf-0.1.7.dist-info → aiwaf-0.1.7.1.dist-info}/licenses/LICENSE +0 -0
- {aiwaf-0.1.7.dist-info → aiwaf-0.1.7.1.dist-info}/top_level.txt +0 -0
aiwaf/middleware.py
CHANGED
|
@@ -18,6 +18,14 @@ from django.urls import get_resolver
|
|
|
18
18
|
from .blacklist_manager import BlacklistManager
|
|
19
19
|
from .models import DynamicKeyword
|
|
20
20
|
|
|
21
|
+
def is_exempt_path(path):
|
|
22
|
+
path = path.lower()
|
|
23
|
+
exempt_paths = getattr(settings, "AIWAF_EXEMPT_PATHS", [])
|
|
24
|
+
for exempt in exempt_paths:
|
|
25
|
+
if path == exempt or path.startswith(exempt.rstrip("/") + "/"):
|
|
26
|
+
return True
|
|
27
|
+
return False
|
|
28
|
+
|
|
21
29
|
MODEL_PATH = getattr(
|
|
22
30
|
settings,
|
|
23
31
|
"AIWAF_MODEL_PATH",
|
|
@@ -64,8 +72,11 @@ class IPAndKeywordBlockMiddleware:
|
|
|
64
72
|
return prefixes
|
|
65
73
|
|
|
66
74
|
def __call__(self, request):
|
|
75
|
+
raw_path = request.path.lower()
|
|
76
|
+
if is_exempt_path(raw_path):
|
|
77
|
+
return self.get_response(request)
|
|
67
78
|
ip = get_ip(request)
|
|
68
|
-
path =
|
|
79
|
+
path = raw_path.lstrip("/")
|
|
69
80
|
if BlacklistManager.is_blocked(ip):
|
|
70
81
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
71
82
|
segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
|
|
@@ -99,6 +110,8 @@ class RateLimitMiddleware:
|
|
|
99
110
|
self.logs = defaultdict(list)
|
|
100
111
|
|
|
101
112
|
def __call__(self, request):
|
|
113
|
+
if is_exempt_path(request.path):
|
|
114
|
+
return self.get_response(request)
|
|
102
115
|
ip = get_ip(request)
|
|
103
116
|
now = time.time()
|
|
104
117
|
recs = [t for t in self.logs[ip] if now - t < self.WINDOW]
|
|
@@ -119,6 +132,8 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
119
132
|
TOP_N = getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)
|
|
120
133
|
|
|
121
134
|
def process_request(self, request):
|
|
135
|
+
if is_exempt_path(request.path):
|
|
136
|
+
return None
|
|
122
137
|
ip = get_ip(request)
|
|
123
138
|
if BlacklistManager.is_blocked(ip):
|
|
124
139
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
@@ -160,6 +175,8 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
160
175
|
|
|
161
176
|
class HoneypotMiddleware(MiddlewareMixin):
|
|
162
177
|
def process_view(self, request, view_func, view_args, view_kwargs):
|
|
178
|
+
if is_exempt_path(request.path):
|
|
179
|
+
return None
|
|
163
180
|
trap = request.POST.get(getattr(settings, "AIWAF_HONEYPOT_FIELD", "hp_field"), "")
|
|
164
181
|
if trap:
|
|
165
182
|
ip = get_ip(request)
|
|
@@ -170,6 +187,8 @@ class HoneypotMiddleware(MiddlewareMixin):
|
|
|
170
187
|
|
|
171
188
|
class UUIDTamperMiddleware(MiddlewareMixin):
|
|
172
189
|
def process_view(self, request, view_func, view_args, view_kwargs):
|
|
190
|
+
if is_exempt_path(request.path):
|
|
191
|
+
return None
|
|
173
192
|
uid = view_kwargs.get("uuid")
|
|
174
193
|
if not uid:
|
|
175
194
|
return None
|
aiwaf/trainer.py
CHANGED
|
@@ -14,7 +14,6 @@ from django.apps import apps
|
|
|
14
14
|
from django.db.models import F
|
|
15
15
|
from django.urls import get_resolver
|
|
16
16
|
|
|
17
|
-
# ─── CONFIG ────────────────────────────────────────────────────────────────
|
|
18
17
|
|
|
19
18
|
LOG_PATH = settings.AIWAF_ACCESS_LOG
|
|
20
19
|
MODEL_PATH = os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
|
|
@@ -30,7 +29,13 @@ _LOG_RX = re.compile(
|
|
|
30
29
|
BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
|
|
31
30
|
DynamicKeyword = apps.get_model("aiwaf", "DynamicKeyword")
|
|
32
31
|
|
|
33
|
-
|
|
32
|
+
def is_exempt_path(path):
|
|
33
|
+
path = path.lower()
|
|
34
|
+
exempt_paths = getattr(settings, "AIWAF_EXEMPT_PATHS", [])
|
|
35
|
+
for exempt in exempt_paths:
|
|
36
|
+
if path == exempt or path.startswith(exempt.rstrip("/") + "/"):
|
|
37
|
+
return True
|
|
38
|
+
return False
|
|
34
39
|
|
|
35
40
|
def path_exists_in_django(path):
|
|
36
41
|
from django.urls import get_resolver
|
|
@@ -51,6 +56,18 @@ def path_exists_in_django(path):
|
|
|
51
56
|
return True
|
|
52
57
|
return False
|
|
53
58
|
|
|
59
|
+
def remove_exempt_keywords():
|
|
60
|
+
exempt_paths = getattr(settings, "AIWAF_EXEMPT_PATHS", [])
|
|
61
|
+
exempt_tokens = set()
|
|
62
|
+
|
|
63
|
+
for path in exempt_paths:
|
|
64
|
+
path = path.strip("/").lower()
|
|
65
|
+
segments = re.split(r"\W+", path)
|
|
66
|
+
exempt_tokens.update(seg for seg in segments if len(seg) > 3)
|
|
67
|
+
|
|
68
|
+
if exempt_tokens:
|
|
69
|
+
deleted_count, _ = DynamicKeyword.objects.filter(keyword__in=exempt_tokens).delete()
|
|
70
|
+
print(f"Removed {deleted_count} dynamic keywords that are now exempt: {list(exempt_tokens)}")
|
|
54
71
|
|
|
55
72
|
def _read_all_logs():
|
|
56
73
|
lines = []
|
|
@@ -88,6 +105,7 @@ def _parse(line):
|
|
|
88
105
|
|
|
89
106
|
|
|
90
107
|
def train():
|
|
108
|
+
remove_exempt_keywords()
|
|
91
109
|
raw_lines = _read_all_logs()
|
|
92
110
|
if not raw_lines:
|
|
93
111
|
print("No log lines found – check AIWAF_ACCESS_LOG setting.")
|
|
@@ -125,7 +143,7 @@ def train():
|
|
|
125
143
|
total404 = ip_404[ip]
|
|
126
144
|
is_known_path = path_exists_in_django(r["path"])
|
|
127
145
|
kw_hits = 0
|
|
128
|
-
if not is_known_path:
|
|
146
|
+
if not is_known_path and not is_exempt_path(r["path"]):
|
|
129
147
|
kw_hits = sum(k in r["path"].lower() for k in STATIC_KW)
|
|
130
148
|
status_idx = STATUS_IDX.index(r["status"]) if r["status"] in STATUS_IDX else -1
|
|
131
149
|
feature_dicts.append({
|
|
@@ -178,4 +196,4 @@ def train():
|
|
|
178
196
|
|
|
179
197
|
|
|
180
198
|
if __name__ == "__main__":
|
|
181
|
-
train()
|
|
199
|
+
train()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aiwaf
|
|
3
|
-
Version: 0.1.7
|
|
3
|
+
Version: 0.1.7.1
|
|
4
4
|
Summary: AI-powered Web Application Firewall
|
|
5
5
|
Home-page: https://github.com/aayushgauba/aiwaf
|
|
6
6
|
Author: Aayush Gauba
|
|
@@ -15,14 +15,14 @@ Dynamic: license-file
|
|
|
15
15
|
Dynamic: requires-python
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
# AI‑WAF
|
|
18
|
+
# AI‑WAF
|
|
19
19
|
|
|
20
20
|
> A self‑learning, Django‑friendly Web Application Firewall
|
|
21
|
-
> with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
|
|
21
|
+
> with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, exempt path awareness, and daily retraining.
|
|
22
22
|
|
|
23
23
|
---
|
|
24
24
|
|
|
25
|
-
## Package Structure
|
|
25
|
+
## 📁 Package Structure
|
|
26
26
|
|
|
27
27
|
```
|
|
28
28
|
aiwaf/
|
|
@@ -44,7 +44,7 @@ aiwaf/
|
|
|
44
44
|
|
|
45
45
|
---
|
|
46
46
|
|
|
47
|
-
## Features
|
|
47
|
+
## 🚀 Features
|
|
48
48
|
|
|
49
49
|
- **IP Blocklist**
|
|
50
50
|
Instantly blocks suspicious IPs (supports CSV fallback or Django model).
|
|
@@ -53,7 +53,7 @@ aiwaf/
|
|
|
53
53
|
Sliding‑window blocks flooders (> `AIWAF_RATE_MAX` per `AIWAF_RATE_WINDOW`), then blacklists them.
|
|
54
54
|
|
|
55
55
|
- **AI Anomaly Detection**
|
|
56
|
-
IsolationForest on
|
|
56
|
+
IsolationForest trained on:
|
|
57
57
|
- Path length
|
|
58
58
|
- Keyword hits (static + dynamic)
|
|
59
59
|
- Response time
|
|
@@ -61,34 +61,28 @@ aiwaf/
|
|
|
61
61
|
- Burst count
|
|
62
62
|
- Total 404s
|
|
63
63
|
|
|
64
|
-
- **Dynamic Keyword Extraction**
|
|
65
|
-
Every retrain
|
|
64
|
+
- **Dynamic Keyword Extraction & Cleanup**
|
|
65
|
+
- Every retrain adds top 10 keyword segments from 4xx/5xx paths
|
|
66
|
+
- **If a path is added to `AIWAF_EXEMPT_PATHS`, its keywords are automatically removed from the database**
|
|
66
67
|
|
|
67
68
|
- **File‑Extension Probing Detection**
|
|
68
|
-
Tracks repeated 404s on common
|
|
69
|
+
Tracks repeated 404s on common extensions (e.g. `.php`, `.asp`) and blocks IPs.
|
|
69
70
|
|
|
70
71
|
- **Honeypot Field**
|
|
71
|
-
Hidden
|
|
72
|
+
Hidden field for bot detection → IP blacklisted on fill.
|
|
72
73
|
|
|
73
74
|
- **UUID Tampering Protection**
|
|
74
|
-
|
|
75
|
+
Blocks guessed or invalid UUIDs that don’t resolve to real models.
|
|
75
76
|
|
|
76
|
-
- **
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
```bash
|
|
84
|
-
# From PyPI
|
|
85
|
-
pip install aiwaf
|
|
77
|
+
- **Exempt Path Awareness**
|
|
78
|
+
Fully respects `AIWAF_EXEMPT_PATHS` across all modules — exempt paths are:
|
|
79
|
+
- Skipped from keyword learning
|
|
80
|
+
- Immune to AI blocking
|
|
81
|
+
- Ignored in log training
|
|
82
|
+
- Cleaned from `DynamicKeyword` model automatically
|
|
86
83
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
cd aiwaf
|
|
90
|
-
pip install -e .
|
|
91
|
-
```
|
|
84
|
+
- **Daily Retraining**
|
|
85
|
+
Reads rotated logs, auto‑blocks 404 floods, retrains the IsolationForest, updates `model.pkl`, and evolves the keyword DB.
|
|
92
86
|
|
|
93
87
|
---
|
|
94
88
|
|
|
@@ -96,33 +90,51 @@ pip install -e .
|
|
|
96
90
|
|
|
97
91
|
```python
|
|
98
92
|
INSTALLED_APPS += ["aiwaf"]
|
|
93
|
+
```
|
|
99
94
|
|
|
100
95
|
### Database Setup
|
|
101
96
|
|
|
102
|
-
After adding `aiwaf` to your `INSTALLED_APPS`,
|
|
97
|
+
After adding `aiwaf` to your `INSTALLED_APPS`, run the following to create the necessary tables:
|
|
103
98
|
|
|
104
99
|
```bash
|
|
105
100
|
python manage.py makemigrations aiwaf
|
|
106
101
|
python manage.py migrate
|
|
102
|
+
```
|
|
107
103
|
|
|
108
|
-
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
### Required
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
109
|
AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
### Optional (defaults shown)
|
|
110
115
|
|
|
111
|
-
|
|
116
|
+
```python
|
|
112
117
|
AIWAF_MODEL_PATH = BASE_DIR / "aiwaf" / "resources" / "model.pkl"
|
|
113
118
|
AIWAF_HONEYPOT_FIELD = "hp_field"
|
|
114
119
|
AIWAF_RATE_WINDOW = 10 # seconds
|
|
115
|
-
AIWAF_RATE_MAX = 20 # max
|
|
120
|
+
AIWAF_RATE_MAX = 20 # max requests per window
|
|
116
121
|
AIWAF_RATE_FLOOD = 10 # flood threshold
|
|
117
|
-
AIWAF_WINDOW_SECONDS = 60 # anomaly window
|
|
118
|
-
AIWAF_FILE_EXTENSIONS = [".php", ".asp", ".jsp"]
|
|
122
|
+
AIWAF_WINDOW_SECONDS = 60 # anomaly detection window
|
|
123
|
+
AIWAF_FILE_EXTENSIONS = [".php", ".asp", ".jsp"]
|
|
124
|
+
AIWAF_EXEMPT_PATHS = [ # optional but highly recommended
|
|
125
|
+
"/favicon.ico",
|
|
126
|
+
"/robots.txt",
|
|
127
|
+
"/static/",
|
|
128
|
+
"/media/",
|
|
129
|
+
"/health/",
|
|
130
|
+
]
|
|
119
131
|
```
|
|
120
132
|
|
|
121
|
-
> **Note:** You no longer need to define `AIWAF_MALICIOUS_KEYWORDS` or `AIWAF_STATUS_CODES`
|
|
133
|
+
> **Note:** You no longer need to define `AIWAF_MALICIOUS_KEYWORDS` or `AIWAF_STATUS_CODES` — they evolve dynamically.
|
|
122
134
|
|
|
123
135
|
---
|
|
124
136
|
|
|
125
|
-
## Middleware Setup
|
|
137
|
+
## 🧱 Middleware Setup
|
|
126
138
|
|
|
127
139
|
Add in **this** order to your `MIDDLEWARE` list:
|
|
128
140
|
|
|
@@ -139,7 +151,7 @@ MIDDLEWARE = [
|
|
|
139
151
|
|
|
140
152
|
---
|
|
141
153
|
|
|
142
|
-
## Honeypot Field (in your template)
|
|
154
|
+
## 🕵️ Honeypot Field (in your template)
|
|
143
155
|
|
|
144
156
|
```django
|
|
145
157
|
{% load aiwaf_tags %}
|
|
@@ -156,22 +168,23 @@ MIDDLEWARE = [
|
|
|
156
168
|
|
|
157
169
|
---
|
|
158
170
|
|
|
159
|
-
## Running Detection & Training
|
|
171
|
+
## 🔁 Running Detection & Training
|
|
160
172
|
|
|
161
173
|
```bash
|
|
162
174
|
python manage.py detect_and_train
|
|
163
175
|
```
|
|
164
176
|
|
|
165
|
-
|
|
166
|
-
1. Read access logs
|
|
177
|
+
### What happens:
|
|
178
|
+
1. Read access logs (incl. rotated or gzipped)
|
|
167
179
|
2. Auto‑block IPs with ≥ 6 total 404s
|
|
168
180
|
3. Extract features & train IsolationForest
|
|
169
181
|
4. Save `model.pkl`
|
|
170
182
|
5. Extract top 10 dynamic keywords from 4xx/5xx
|
|
183
|
+
6. Remove any keywords associated with newly exempt paths
|
|
171
184
|
|
|
172
185
|
---
|
|
173
186
|
|
|
174
|
-
## How It Works
|
|
187
|
+
## 🧠 How It Works
|
|
175
188
|
|
|
176
189
|
| Middleware | Purpose |
|
|
177
190
|
|------------------------------------|-----------------------------------------------------------------|
|
|
@@ -180,15 +193,16 @@ python manage.py detect_and_train
|
|
|
180
193
|
| AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
|
|
181
194
|
| HoneypotMiddleware | Detects bots filling hidden inputs in forms |
|
|
182
195
|
| UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
|
|
196
|
+
|
|
183
197
|
---
|
|
184
198
|
|
|
185
|
-
## License
|
|
199
|
+
## 📄 License
|
|
186
200
|
|
|
187
201
|
This project is licensed under the **MIT License**. See the [LICENSE](LICENSE) file for details.
|
|
188
202
|
|
|
189
203
|
---
|
|
190
204
|
|
|
191
|
-
## Credits
|
|
205
|
+
## 👤 Credits
|
|
192
206
|
|
|
193
207
|
**AI‑WAF** by [Aayush Gauba](https://github.com/aayushgauba)
|
|
194
208
|
> “Let your firewall learn and evolve — keep your site a fortress.”
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
aiwaf/__init__.py,sha256=nQFpJ1YpX48snzLjEQCf8zD2YNh8v0b_kPTrXx8uBYc,46
|
|
2
2
|
aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
|
|
3
3
|
aiwaf/blacklist_manager.py,sha256=sM6uTH7zD6MOPGb0kzqV2aFut2vxKgft_UVeRJr7klw,392
|
|
4
|
-
aiwaf/middleware.py,sha256=
|
|
4
|
+
aiwaf/middleware.py,sha256=LTLHmQYIQ36WwfR9FEPLrmTbYgqxIh4X5Aen4VJ-vN0,7350
|
|
5
5
|
aiwaf/models.py,sha256=8au1umopgCo0lthztTTRrYRJQUM7uX8eAeXgs3z45K4,1282
|
|
6
6
|
aiwaf/storage.py,sha256=bxCILzzvA1-q6nwclRE8WrfoRhe25H4VrsQDf0hl_lY,1903
|
|
7
|
-
aiwaf/trainer.py,sha256=
|
|
7
|
+
aiwaf/trainer.py,sha256=ir5kFTeLQuhMd2h094ct03Wr-rNZsX-mZHwjLx29F54,6422
|
|
8
8
|
aiwaf/utils.py,sha256=RkEUWhhHy6tOk7V0UYv3cN4xhOR_7aBy9bjhwuV2cdA,1436
|
|
9
9
|
aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -12,8 +12,8 @@ aiwaf/management/commands/detect_and_train.py,sha256=-o-LZ7QZ5GeJPCekryox1DGXKMm
|
|
|
12
12
|
aiwaf/resources/model.pkl,sha256=rCCXH38SJrnaOba2WZrU1LQVzWT34x6bTVkq20XJU-Q,1091129
|
|
13
13
|
aiwaf/template_tags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
aiwaf/template_tags/aiwaf_tags.py,sha256=1KGqeioYmgKACDUiPkykSqI7DLQ6-Ypy1k00weWj9iY,399
|
|
15
|
-
aiwaf-0.1.7.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
|
|
16
|
-
aiwaf-0.1.7.dist-info/METADATA,sha256=
|
|
17
|
-
aiwaf-0.1.7.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
|
18
|
-
aiwaf-0.1.7.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
|
|
19
|
-
aiwaf-0.1.7.dist-info/RECORD,,
|
|
15
|
+
aiwaf-0.1.7.1.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
|
|
16
|
+
aiwaf-0.1.7.1.dist-info/METADATA,sha256=aO_1D_qSP_s4vKUj60a8VmsFcCLCyhBZii1tpbo3HqE,5790
|
|
17
|
+
aiwaf-0.1.7.1.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
|
18
|
+
aiwaf-0.1.7.1.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
|
|
19
|
+
aiwaf-0.1.7.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|