aiwaf 0.1.8.5__tar.gz → 0.1.8.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiwaf might be problematic. Click here for more details.
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/PKG-INFO +46 -2
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/README.md +45 -1
- aiwaf-0.1.8.7/aiwaf/blacklist_manager.py +24 -0
- aiwaf-0.1.8.7/aiwaf/decorators.py +28 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf/management/commands/add_ipexemption.py +8 -7
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf/management/commands/aiwaf_reset.py +16 -5
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf/middleware.py +20 -46
- aiwaf-0.1.8.7/aiwaf/storage.py +351 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf/trainer.py +30 -49
- aiwaf-0.1.8.7/aiwaf/utils.py +106 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf.egg-info/PKG-INFO +46 -2
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf.egg-info/SOURCES.txt +1 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/pyproject.toml +1 -1
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/setup.py +1 -1
- aiwaf-0.1.8.5/aiwaf/blacklist_manager.py +0 -14
- aiwaf-0.1.8.5/aiwaf/storage.py +0 -61
- aiwaf-0.1.8.5/aiwaf/utils.py +0 -50
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/LICENSE +0 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf/__init__.py +0 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf/apps.py +0 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf/management/__init__.py +0 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf/management/commands/__init__.py +0 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf/management/commands/detect_and_train.py +0 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf/models.py +0 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf/resources/model.pkl +0 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf/templatetags/__init__.py +0 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf/templatetags/aiwaf_tags.py +0 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf.egg-info/dependency_links.txt +0 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf.egg-info/requires.txt +0 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/aiwaf.egg-info/top_level.txt +0 -0
- {aiwaf-0.1.8.5 → aiwaf-0.1.8.7}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aiwaf
|
|
3
|
-
Version: 0.1.8.
|
|
3
|
+
Version: 0.1.8.7
|
|
4
4
|
Summary: AI-powered Web Application Firewall
|
|
5
5
|
Home-page: https://github.com/aayushgauba/aiwaf
|
|
6
6
|
Author: Aayush Gauba
|
|
@@ -99,7 +99,26 @@ AIWAF_EXEMPT_PATHS = [
|
|
|
99
99
|
]
|
|
100
100
|
```
|
|
101
101
|
|
|
102
|
-
|
|
102
|
+
**Exempt Views (Decorator):**
|
|
103
|
+
Use the `@aiwaf_exempt` decorator to exempt specific views from all AI-WAF protection:
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from aiwaf.decorators import aiwaf_exempt
|
|
107
|
+
from django.http import JsonResponse
|
|
108
|
+
|
|
109
|
+
@aiwaf_exempt
|
|
110
|
+
def my_api_view(request):
|
|
111
|
+
"""This view will be exempt from all AI-WAF protection"""
|
|
112
|
+
return JsonResponse({"status": "success"})
|
|
113
|
+
|
|
114
|
+
# Works with class-based views too
|
|
115
|
+
@aiwaf_exempt
|
|
116
|
+
class MyAPIView(View):
|
|
117
|
+
def get(self, request):
|
|
118
|
+
return JsonResponse({"method": "GET"})
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
All exempt paths and views are:
|
|
103
122
|
- Skipped from keyword learning
|
|
104
123
|
- Immune to AI blocking
|
|
105
124
|
- Ignored in log training
|
|
@@ -169,6 +188,31 @@ AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
|
|
|
169
188
|
|
|
170
189
|
---
|
|
171
190
|
|
|
191
|
+
### Storage Configuration
|
|
192
|
+
|
|
193
|
+
**Choose storage backend:**
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
# Use Django models (default) - requires database tables
|
|
197
|
+
AIWAF_STORAGE_MODE = "models"
|
|
198
|
+
|
|
199
|
+
# OR use CSV files - no database required
|
|
200
|
+
AIWAF_STORAGE_MODE = "csv"
|
|
201
|
+
AIWAF_CSV_DATA_DIR = "aiwaf_data" # Directory for CSV files
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
**CSV Mode Features:**
|
|
205
|
+
- No database migrations required
|
|
206
|
+
- Files stored in `aiwaf_data/` directory:
|
|
207
|
+
- `blacklist.csv` - Blocked IP addresses
|
|
208
|
+
- `exemptions.csv` - Exempt IP addresses
|
|
209
|
+
- `keywords.csv` - Dynamic keywords
|
|
210
|
+
- `access_samples.csv` - Feature samples for ML training
|
|
211
|
+
- Perfect for lightweight deployments or when you prefer file-based storage
|
|
212
|
+
- Management commands work identically in both modes
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
172
216
|
### Optional (defaults shown)
|
|
173
217
|
|
|
174
218
|
```python
|
|
@@ -78,7 +78,26 @@ AIWAF_EXEMPT_PATHS = [
|
|
|
78
78
|
]
|
|
79
79
|
```
|
|
80
80
|
|
|
81
|
-
|
|
81
|
+
**Exempt Views (Decorator):**
|
|
82
|
+
Use the `@aiwaf_exempt` decorator to exempt specific views from all AI-WAF protection:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from aiwaf.decorators import aiwaf_exempt
|
|
86
|
+
from django.http import JsonResponse
|
|
87
|
+
|
|
88
|
+
@aiwaf_exempt
|
|
89
|
+
def my_api_view(request):
|
|
90
|
+
"""This view will be exempt from all AI-WAF protection"""
|
|
91
|
+
return JsonResponse({"status": "success"})
|
|
92
|
+
|
|
93
|
+
# Works with class-based views too
|
|
94
|
+
@aiwaf_exempt
|
|
95
|
+
class MyAPIView(View):
|
|
96
|
+
def get(self, request):
|
|
97
|
+
return JsonResponse({"method": "GET"})
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
All exempt paths and views are:
|
|
82
101
|
- Skipped from keyword learning
|
|
83
102
|
- Immune to AI blocking
|
|
84
103
|
- Ignored in log training
|
|
@@ -148,6 +167,31 @@ AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
|
|
|
148
167
|
|
|
149
168
|
---
|
|
150
169
|
|
|
170
|
+
### Storage Configuration
|
|
171
|
+
|
|
172
|
+
**Choose storage backend:**
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
# Use Django models (default) - requires database tables
|
|
176
|
+
AIWAF_STORAGE_MODE = "models"
|
|
177
|
+
|
|
178
|
+
# OR use CSV files - no database required
|
|
179
|
+
AIWAF_STORAGE_MODE = "csv"
|
|
180
|
+
AIWAF_CSV_DATA_DIR = "aiwaf_data" # Directory for CSV files
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
**CSV Mode Features:**
|
|
184
|
+
- No database migrations required
|
|
185
|
+
- Files stored in `aiwaf_data/` directory:
|
|
186
|
+
- `blacklist.csv` - Blocked IP addresses
|
|
187
|
+
- `exemptions.csv` - Exempt IP addresses
|
|
188
|
+
- `keywords.csv` - Dynamic keywords
|
|
189
|
+
- `access_samples.csv` - Feature samples for ML training
|
|
190
|
+
- Perfect for lightweight deployments or when you prefer file-based storage
|
|
191
|
+
- Management commands work identically in both modes
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
151
195
|
### Optional (defaults shown)
|
|
152
196
|
|
|
153
197
|
```python
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# aiwaf/blacklist_manager.py
|
|
2
|
+
|
|
3
|
+
from .storage import get_blacklist_store
|
|
4
|
+
|
|
5
|
+
class BlacklistManager:
|
|
6
|
+
@staticmethod
|
|
7
|
+
def block(ip, reason):
|
|
8
|
+
store = get_blacklist_store()
|
|
9
|
+
store.add_ip(ip, reason)
|
|
10
|
+
|
|
11
|
+
@staticmethod
|
|
12
|
+
def is_blocked(ip):
|
|
13
|
+
store = get_blacklist_store()
|
|
14
|
+
return store.is_blocked(ip)
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def all_blocked():
|
|
18
|
+
store = get_blacklist_store()
|
|
19
|
+
return store.get_all()
|
|
20
|
+
|
|
21
|
+
@staticmethod
|
|
22
|
+
def unblock(ip):
|
|
23
|
+
store = get_blacklist_store()
|
|
24
|
+
store.remove_ip(ip)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from functools import wraps
|
|
2
|
+
from django.utils.decorators import method_decorator
|
|
3
|
+
|
|
4
|
+
def aiwaf_exempt(view_func):
|
|
5
|
+
"""
|
|
6
|
+
Decorator to exempt a view from AI-WAF protection.
|
|
7
|
+
Can be used on function-based views or class-based views.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
@aiwaf_exempt
|
|
11
|
+
def my_view(request):
|
|
12
|
+
return HttpResponse("This view is exempt from AI-WAF")
|
|
13
|
+
|
|
14
|
+
# Or for class-based views:
|
|
15
|
+
@method_decorator(aiwaf_exempt, name='dispatch')
|
|
16
|
+
class MyView(View):
|
|
17
|
+
pass
|
|
18
|
+
"""
|
|
19
|
+
@wraps(view_func)
|
|
20
|
+
def wrapped_view(*args, **kwargs):
|
|
21
|
+
return view_func(*args, **kwargs)
|
|
22
|
+
|
|
23
|
+
# Mark the view as AI-WAF exempt
|
|
24
|
+
wrapped_view.aiwaf_exempt = True
|
|
25
|
+
return wrapped_view
|
|
26
|
+
|
|
27
|
+
# For class-based views
|
|
28
|
+
aiwaf_exempt_view = method_decorator(aiwaf_exempt, name='dispatch')
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from django.core.management.base import BaseCommand, CommandError
|
|
2
|
-
from aiwaf.
|
|
2
|
+
from aiwaf.storage import get_exemption_store
|
|
3
3
|
|
|
4
4
|
class Command(BaseCommand):
|
|
5
5
|
help = 'Add an IP address to the IPExemption list (prevents blacklisting)'
|
|
@@ -11,12 +11,13 @@ class Command(BaseCommand):
|
|
|
11
11
|
def handle(self, *args, **options):
|
|
12
12
|
ip = options['ip']
|
|
13
13
|
reason = options['reason']
|
|
14
|
-
|
|
15
|
-
|
|
14
|
+
|
|
15
|
+
store = get_exemption_store()
|
|
16
|
+
|
|
17
|
+
if store.is_exempted(ip):
|
|
16
18
|
self.stdout.write(self.style.WARNING(f'IP {ip} is already exempted.'))
|
|
17
19
|
else:
|
|
20
|
+
store.add_ip(ip, reason)
|
|
18
21
|
self.stdout.write(self.style.SUCCESS(f'IP {ip} added to exemption list.'))
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
obj.save()
|
|
22
|
-
self.stdout.write(self.style.SUCCESS(f'Reason set to: {reason}'))
|
|
22
|
+
if reason:
|
|
23
|
+
self.stdout.write(self.style.SUCCESS(f'Reason: {reason}'))
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from django.core.management.base import BaseCommand
|
|
2
|
-
from aiwaf.
|
|
2
|
+
from aiwaf.storage import get_blacklist_store, get_exemption_store
|
|
3
3
|
|
|
4
4
|
class Command(BaseCommand):
|
|
5
5
|
help = 'Reset AI-WAF by clearing all blacklist and exemption (whitelist) entries'
|
|
@@ -26,9 +26,12 @@ class Command(BaseCommand):
|
|
|
26
26
|
exemptions_only = options['exemptions_only']
|
|
27
27
|
confirm = options['confirm']
|
|
28
28
|
|
|
29
|
+
blacklist_store = get_blacklist_store()
|
|
30
|
+
exemption_store = get_exemption_store()
|
|
31
|
+
|
|
29
32
|
# Count current entries
|
|
30
|
-
blacklist_count =
|
|
31
|
-
exemption_count =
|
|
33
|
+
blacklist_count = len(blacklist_store.get_all())
|
|
34
|
+
exemption_count = len(exemption_store.get_all())
|
|
32
35
|
|
|
33
36
|
if blacklist_only and exemptions_only:
|
|
34
37
|
self.stdout.write(self.style.ERROR('Cannot use both --blacklist-only and --exemptions-only flags'))
|
|
@@ -61,10 +64,18 @@ class Command(BaseCommand):
|
|
|
61
64
|
deleted_counts = {'blacklist': 0, 'exemptions': 0}
|
|
62
65
|
|
|
63
66
|
if clear_blacklist:
|
|
64
|
-
|
|
67
|
+
# Clear blacklist entries
|
|
68
|
+
blacklist_entries = blacklist_store.get_all()
|
|
69
|
+
for entry in blacklist_entries:
|
|
70
|
+
blacklist_store.remove_ip(entry['ip_address'])
|
|
71
|
+
deleted_counts['blacklist'] = len(blacklist_entries)
|
|
65
72
|
|
|
66
73
|
if clear_exemptions:
|
|
67
|
-
|
|
74
|
+
# Clear exemption entries
|
|
75
|
+
exemption_entries = exemption_store.get_all()
|
|
76
|
+
for entry in exemption_entries:
|
|
77
|
+
exemption_store.remove_ip(entry['ip_address'])
|
|
78
|
+
deleted_counts['exemptions'] = len(exemption_entries)
|
|
68
79
|
|
|
69
80
|
# Report results
|
|
70
81
|
if clear_blacklist and clear_exemptions:
|
|
@@ -14,37 +14,11 @@ from django.core.cache import cache
|
|
|
14
14
|
from django.db.models import F
|
|
15
15
|
from django.apps import apps
|
|
16
16
|
from django.urls import get_resolver
|
|
17
|
-
from .trainer import STATIC_KW, STATUS_IDX,
|
|
17
|
+
from .trainer import STATIC_KW, STATUS_IDX, path_exists_in_django
|
|
18
18
|
from .blacklist_manager import BlacklistManager
|
|
19
|
-
from .models import
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def is_exempt_path(path):
|
|
24
|
-
path = path.lower()
|
|
25
|
-
|
|
26
|
-
# Default login paths that should always be exempt
|
|
27
|
-
default_login_paths = [
|
|
28
|
-
"/admin/login/",
|
|
29
|
-
"/admin/",
|
|
30
|
-
"/login/",
|
|
31
|
-
"/accounts/login/",
|
|
32
|
-
"/auth/login/",
|
|
33
|
-
"/signin/",
|
|
34
|
-
]
|
|
35
|
-
|
|
36
|
-
# Check default login paths
|
|
37
|
-
for login_path in default_login_paths:
|
|
38
|
-
if path.startswith(login_path):
|
|
39
|
-
return True
|
|
40
|
-
|
|
41
|
-
# Check user-configured exempt paths
|
|
42
|
-
exempt_paths = getattr(settings, "AIWAF_EXEMPT_PATHS", [])
|
|
43
|
-
for exempt in exempt_paths:
|
|
44
|
-
if path == exempt or path.startswith(exempt.rstrip("/") + "/"):
|
|
45
|
-
return True
|
|
46
|
-
|
|
47
|
-
return False
|
|
19
|
+
from .models import IPExemption
|
|
20
|
+
from .utils import is_exempt, get_ip, is_ip_exempted
|
|
21
|
+
from .storage import get_keyword_store
|
|
48
22
|
|
|
49
23
|
MODEL_PATH = getattr(
|
|
50
24
|
settings,
|
|
@@ -93,7 +67,7 @@ class IPAndKeywordBlockMiddleware:
|
|
|
93
67
|
|
|
94
68
|
def __call__(self, request):
|
|
95
69
|
raw_path = request.path.lower()
|
|
96
|
-
if
|
|
70
|
+
if is_exempt(request):
|
|
97
71
|
return self.get_response(request)
|
|
98
72
|
ip = get_ip(request)
|
|
99
73
|
path = raw_path.lstrip("/")
|
|
@@ -101,15 +75,14 @@ class IPAndKeywordBlockMiddleware:
|
|
|
101
75
|
return self.get_response(request)
|
|
102
76
|
if BlacklistManager.is_blocked(ip):
|
|
103
77
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
78
|
+
|
|
79
|
+
keyword_store = get_keyword_store()
|
|
104
80
|
segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
|
|
81
|
+
|
|
105
82
|
for seg in segments:
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
dynamic_top =
|
|
109
|
-
DynamicKeyword.objects
|
|
110
|
-
.order_by("-count")
|
|
111
|
-
.values_list("keyword", flat=True)[: getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)]
|
|
112
|
-
)
|
|
83
|
+
keyword_store.add_keyword(seg)
|
|
84
|
+
|
|
85
|
+
dynamic_top = keyword_store.get_top_keywords(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
|
|
113
86
|
all_kw = set(STATIC_KW) | set(dynamic_top)
|
|
114
87
|
suspicious_kw = {
|
|
115
88
|
kw for kw in all_kw
|
|
@@ -132,7 +105,7 @@ class RateLimitMiddleware:
|
|
|
132
105
|
self.get_response = get_response
|
|
133
106
|
|
|
134
107
|
def __call__(self, request):
|
|
135
|
-
if
|
|
108
|
+
if is_exempt(request):
|
|
136
109
|
return self.get_response(request)
|
|
137
110
|
|
|
138
111
|
ip = get_ip(request)
|
|
@@ -161,7 +134,7 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
161
134
|
self.model = joblib.load(model_path)
|
|
162
135
|
|
|
163
136
|
def process_request(self, request):
|
|
164
|
-
if
|
|
137
|
+
if is_exempt(request):
|
|
165
138
|
return None
|
|
166
139
|
request._start_time = time.time()
|
|
167
140
|
ip = get_ip(request)
|
|
@@ -172,14 +145,14 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
172
145
|
return None
|
|
173
146
|
|
|
174
147
|
def process_response(self, request, response):
|
|
175
|
-
if
|
|
148
|
+
if is_exempt(request):
|
|
176
149
|
return response
|
|
177
150
|
ip = get_ip(request)
|
|
178
151
|
now = time.time()
|
|
179
152
|
key = f"aiwaf:{ip}"
|
|
180
153
|
data = cache.get(key, [])
|
|
181
154
|
path_len = len(request.path)
|
|
182
|
-
if not path_exists_in_django(request.path) and not
|
|
155
|
+
if not path_exists_in_django(request.path) and not is_exempt(request):
|
|
183
156
|
kw_hits = sum(1 for kw in STATIC_KW if kw in request.path.lower())
|
|
184
157
|
else:
|
|
185
158
|
kw_hits = 0
|
|
@@ -199,10 +172,11 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
199
172
|
data.append((now, request.path, response.status_code, resp_time))
|
|
200
173
|
data = [d for d in data if now - d[0] < self.WINDOW]
|
|
201
174
|
cache.set(key, data, timeout=self.WINDOW)
|
|
175
|
+
|
|
176
|
+
keyword_store = get_keyword_store()
|
|
202
177
|
for seg in re.split(r"\W+", request.path.lower()):
|
|
203
178
|
if len(seg) > 3:
|
|
204
|
-
|
|
205
|
-
DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
|
|
179
|
+
keyword_store.add_keyword(seg)
|
|
206
180
|
|
|
207
181
|
return response
|
|
208
182
|
|
|
@@ -211,7 +185,7 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
|
|
|
211
185
|
MIN_FORM_TIME = getattr(settings, "AIWAF_MIN_FORM_TIME", 1.0) # seconds
|
|
212
186
|
|
|
213
187
|
def process_request(self, request):
|
|
214
|
-
if
|
|
188
|
+
if is_exempt(request):
|
|
215
189
|
return None
|
|
216
190
|
|
|
217
191
|
ip = get_ip(request)
|
|
@@ -255,7 +229,7 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
|
|
|
255
229
|
|
|
256
230
|
class UUIDTamperMiddleware(MiddlewareMixin):
|
|
257
231
|
def process_view(self, request, view_func, view_args, view_kwargs):
|
|
258
|
-
if
|
|
232
|
+
if is_exempt(request):
|
|
259
233
|
return None
|
|
260
234
|
uid = view_kwargs.get("uuid")
|
|
261
235
|
if not uid:
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
import os, csv, gzip, glob
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from django.conf import settings
|
|
5
|
+
from django.utils import timezone
|
|
6
|
+
from .models import FeatureSample, BlacklistEntry, IPExemption, DynamicKeyword
|
|
7
|
+
|
|
8
|
+
# Configuration
|
|
9
|
+
STORAGE_MODE = getattr(settings, "AIWAF_STORAGE_MODE", "models") # "models" or "csv"
|
|
10
|
+
CSV_DATA_DIR = getattr(settings, "AIWAF_CSV_DATA_DIR", "aiwaf_data")
|
|
11
|
+
FEATURE_CSV = getattr(settings, "AIWAF_CSV_PATH", os.path.join(CSV_DATA_DIR, "access_samples.csv"))
|
|
12
|
+
BLACKLIST_CSV = os.path.join(CSV_DATA_DIR, "blacklist.csv")
|
|
13
|
+
EXEMPTION_CSV = os.path.join(CSV_DATA_DIR, "exemptions.csv")
|
|
14
|
+
KEYWORDS_CSV = os.path.join(CSV_DATA_DIR, "keywords.csv")
|
|
15
|
+
|
|
16
|
+
CSV_HEADER = [
|
|
17
|
+
"ip","path_len","kw_hits","resp_time",
|
|
18
|
+
"status_idx","burst_count","total_404","label"
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
def ensure_csv_directory():
|
|
22
|
+
"""Ensure the CSV data directory exists"""
|
|
23
|
+
if STORAGE_MODE == "csv" and not os.path.exists(CSV_DATA_DIR):
|
|
24
|
+
os.makedirs(CSV_DATA_DIR)
|
|
25
|
+
|
|
26
|
+
class CsvFeatureStore:
|
|
27
|
+
@staticmethod
|
|
28
|
+
def persist_rows(rows):
|
|
29
|
+
ensure_csv_directory()
|
|
30
|
+
new_file = not os.path.exists(FEATURE_CSV)
|
|
31
|
+
with open(FEATURE_CSV, "a", newline="", encoding="utf-8") as f:
|
|
32
|
+
w = csv.writer(f)
|
|
33
|
+
if new_file:
|
|
34
|
+
w.writerow(CSV_HEADER)
|
|
35
|
+
w.writerows(rows)
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def load_matrix():
|
|
39
|
+
if not os.path.exists(FEATURE_CSV):
|
|
40
|
+
return np.empty((0,6))
|
|
41
|
+
df = pd.read_csv(
|
|
42
|
+
FEATURE_CSV,
|
|
43
|
+
names=CSV_HEADER,
|
|
44
|
+
skiprows=1,
|
|
45
|
+
engine="python",
|
|
46
|
+
on_bad_lines="skip"
|
|
47
|
+
)
|
|
48
|
+
feature_cols = CSV_HEADER[1:7]
|
|
49
|
+
df[feature_cols] = df[feature_cols].apply(pd.to_numeric, errors="coerce").fillna(0)
|
|
50
|
+
return df[feature_cols].to_numpy()
|
|
51
|
+
|
|
52
|
+
class DbFeatureStore:
|
|
53
|
+
@staticmethod
|
|
54
|
+
def persist_rows(rows):
|
|
55
|
+
objs = []
|
|
56
|
+
for ip,pl,kw,rt,si,bc,t404,label in rows:
|
|
57
|
+
objs.append(FeatureSample(
|
|
58
|
+
ip=ip, path_len=pl, kw_hits=kw,
|
|
59
|
+
resp_time=rt, status_idx=si,
|
|
60
|
+
burst_count=bc, total_404=t404,
|
|
61
|
+
label=label
|
|
62
|
+
))
|
|
63
|
+
FeatureSample.objects.bulk_create(objs, ignore_conflicts=True)
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def load_matrix():
|
|
67
|
+
qs = FeatureSample.objects.all().values_list(
|
|
68
|
+
"path_len","kw_hits","resp_time","status_idx","burst_count","total_404"
|
|
69
|
+
)
|
|
70
|
+
return np.array(list(qs), dtype=float)
|
|
71
|
+
|
|
72
|
+
def get_store():
|
|
73
|
+
if getattr(settings, "AIWAF_FEATURE_STORE", "csv") == "db":
|
|
74
|
+
return DbFeatureStore
|
|
75
|
+
return CsvFeatureStore
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ============= CSV Storage Classes =============
|
|
79
|
+
|
|
80
|
+
class CsvBlacklistStore:
|
|
81
|
+
"""CSV-based storage for IP blacklist entries"""
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
def add_ip(ip_address, reason):
|
|
85
|
+
ensure_csv_directory()
|
|
86
|
+
# Check if IP already exists
|
|
87
|
+
if CsvBlacklistStore.is_blocked(ip_address):
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
# Add new entry
|
|
91
|
+
new_file = not os.path.exists(BLACKLIST_CSV)
|
|
92
|
+
with open(BLACKLIST_CSV, "a", newline="", encoding="utf-8") as f:
|
|
93
|
+
writer = csv.writer(f)
|
|
94
|
+
if new_file:
|
|
95
|
+
writer.writerow(["ip_address", "reason", "created_at"])
|
|
96
|
+
writer.writerow([ip_address, reason, timezone.now().isoformat()])
|
|
97
|
+
|
|
98
|
+
@staticmethod
|
|
99
|
+
def is_blocked(ip_address):
|
|
100
|
+
if not os.path.exists(BLACKLIST_CSV):
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
with open(BLACKLIST_CSV, "r", newline="", encoding="utf-8") as f:
|
|
104
|
+
reader = csv.DictReader(f)
|
|
105
|
+
for row in reader:
|
|
106
|
+
if row["ip_address"] == ip_address:
|
|
107
|
+
return True
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def get_all():
|
|
112
|
+
"""Return list of dictionaries with blacklist entries"""
|
|
113
|
+
if not os.path.exists(BLACKLIST_CSV):
|
|
114
|
+
return []
|
|
115
|
+
|
|
116
|
+
entries = []
|
|
117
|
+
with open(BLACKLIST_CSV, "r", newline="", encoding="utf-8") as f:
|
|
118
|
+
reader = csv.DictReader(f)
|
|
119
|
+
for row in reader:
|
|
120
|
+
entries.append(row)
|
|
121
|
+
return entries
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
def remove_ip(ip_address):
|
|
125
|
+
if not os.path.exists(BLACKLIST_CSV):
|
|
126
|
+
return
|
|
127
|
+
|
|
128
|
+
# Read all entries except the one to remove
|
|
129
|
+
entries = []
|
|
130
|
+
with open(BLACKLIST_CSV, "r", newline="", encoding="utf-8") as f:
|
|
131
|
+
reader = csv.DictReader(f)
|
|
132
|
+
entries = [row for row in reader if row["ip_address"] != ip_address]
|
|
133
|
+
|
|
134
|
+
# Write back the filtered entries
|
|
135
|
+
with open(BLACKLIST_CSV, "w", newline="", encoding="utf-8") as f:
|
|
136
|
+
if entries:
|
|
137
|
+
writer = csv.DictWriter(f, fieldnames=["ip_address", "reason", "created_at"])
|
|
138
|
+
writer.writeheader()
|
|
139
|
+
writer.writerows(entries)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class CsvExemptionStore:
|
|
143
|
+
"""CSV-based storage for IP exemption entries"""
|
|
144
|
+
|
|
145
|
+
@staticmethod
|
|
146
|
+
def add_ip(ip_address, reason=""):
|
|
147
|
+
ensure_csv_directory()
|
|
148
|
+
# Check if IP already exists
|
|
149
|
+
if CsvExemptionStore.is_exempted(ip_address):
|
|
150
|
+
return
|
|
151
|
+
|
|
152
|
+
# Add new entry
|
|
153
|
+
new_file = not os.path.exists(EXEMPTION_CSV)
|
|
154
|
+
with open(EXEMPTION_CSV, "a", newline="", encoding="utf-8") as f:
|
|
155
|
+
writer = csv.writer(f)
|
|
156
|
+
if new_file:
|
|
157
|
+
writer.writerow(["ip_address", "reason", "created_at"])
|
|
158
|
+
writer.writerow([ip_address, reason, timezone.now().isoformat()])
|
|
159
|
+
|
|
160
|
+
@staticmethod
|
|
161
|
+
def is_exempted(ip_address):
|
|
162
|
+
if not os.path.exists(EXEMPTION_CSV):
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
with open(EXEMPTION_CSV, "r", newline="", encoding="utf-8") as f:
|
|
166
|
+
reader = csv.DictReader(f)
|
|
167
|
+
for row in reader:
|
|
168
|
+
if row["ip_address"] == ip_address:
|
|
169
|
+
return True
|
|
170
|
+
return False
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def get_all():
|
|
174
|
+
"""Return list of dictionaries with exemption entries"""
|
|
175
|
+
if not os.path.exists(EXEMPTION_CSV):
|
|
176
|
+
return []
|
|
177
|
+
|
|
178
|
+
entries = []
|
|
179
|
+
with open(EXEMPTION_CSV, "r", newline="", encoding="utf-8") as f:
|
|
180
|
+
reader = csv.DictReader(f)
|
|
181
|
+
for row in reader:
|
|
182
|
+
entries.append(row)
|
|
183
|
+
return entries
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def remove_ip(ip_address):
|
|
187
|
+
if not os.path.exists(EXEMPTION_CSV):
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
# Read all entries except the one to remove
|
|
191
|
+
entries = []
|
|
192
|
+
with open(EXEMPTION_CSV, "r", newline="", encoding="utf-8") as f:
|
|
193
|
+
reader = csv.DictReader(f)
|
|
194
|
+
entries = [row for row in reader if row["ip_address"] != ip_address]
|
|
195
|
+
|
|
196
|
+
# Write back the filtered entries
|
|
197
|
+
with open(EXEMPTION_CSV, "w", newline="", encoding="utf-8") as f:
|
|
198
|
+
if entries:
|
|
199
|
+
writer = csv.DictWriter(f, fieldnames=["ip_address", "reason", "created_at"])
|
|
200
|
+
writer.writeheader()
|
|
201
|
+
writer.writerows(entries)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class CsvKeywordStore:
|
|
205
|
+
"""CSV-based storage for dynamic keywords"""
|
|
206
|
+
|
|
207
|
+
@staticmethod
|
|
208
|
+
def add_keyword(keyword, count=1):
|
|
209
|
+
ensure_csv_directory()
|
|
210
|
+
|
|
211
|
+
# Read existing keywords
|
|
212
|
+
keywords = CsvKeywordStore._load_keywords()
|
|
213
|
+
|
|
214
|
+
# Update or add keyword
|
|
215
|
+
keywords[keyword] = keywords.get(keyword, 0) + count
|
|
216
|
+
|
|
217
|
+
# Save back to file
|
|
218
|
+
CsvKeywordStore._save_keywords(keywords)
|
|
219
|
+
|
|
220
|
+
@staticmethod
|
|
221
|
+
def get_top_keywords(limit=10):
|
|
222
|
+
keywords = CsvKeywordStore._load_keywords()
|
|
223
|
+
# Sort by count in descending order and return top N
|
|
224
|
+
sorted_keywords = sorted(keywords.items(), key=lambda x: x[1], reverse=True)
|
|
225
|
+
return [kw for kw, count in sorted_keywords[:limit]]
|
|
226
|
+
|
|
227
|
+
@staticmethod
|
|
228
|
+
def remove_keyword(keyword):
|
|
229
|
+
keywords = CsvKeywordStore._load_keywords()
|
|
230
|
+
if keyword in keywords:
|
|
231
|
+
del keywords[keyword]
|
|
232
|
+
CsvKeywordStore._save_keywords(keywords)
|
|
233
|
+
|
|
234
|
+
@staticmethod
|
|
235
|
+
def clear_all():
|
|
236
|
+
if os.path.exists(KEYWORDS_CSV):
|
|
237
|
+
os.remove(KEYWORDS_CSV)
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def _load_keywords():
|
|
241
|
+
"""Load keywords from CSV file as a dictionary"""
|
|
242
|
+
if not os.path.exists(KEYWORDS_CSV):
|
|
243
|
+
return {}
|
|
244
|
+
|
|
245
|
+
keywords = {}
|
|
246
|
+
with open(KEYWORDS_CSV, "r", newline="", encoding="utf-8") as f:
|
|
247
|
+
reader = csv.DictReader(f)
|
|
248
|
+
for row in reader:
|
|
249
|
+
keywords[row["keyword"]] = int(row["count"])
|
|
250
|
+
return keywords
|
|
251
|
+
|
|
252
|
+
@staticmethod
|
|
253
|
+
def _save_keywords(keywords):
|
|
254
|
+
"""Save keywords dictionary to CSV file"""
|
|
255
|
+
with open(KEYWORDS_CSV, "w", newline="", encoding="utf-8") as f:
|
|
256
|
+
writer = csv.writer(f)
|
|
257
|
+
writer.writerow(["keyword", "count", "last_updated"])
|
|
258
|
+
for keyword, count in keywords.items():
|
|
259
|
+
writer.writerow([keyword, count, timezone.now().isoformat()])
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
# ============= Storage Factory Functions =============
|
|
263
|
+
|
|
264
|
+
def get_blacklist_store():
|
|
265
|
+
"""Return appropriate blacklist storage class based on settings"""
|
|
266
|
+
if STORAGE_MODE == "csv":
|
|
267
|
+
return CsvBlacklistStore
|
|
268
|
+
else:
|
|
269
|
+
# Return a wrapper for Django models
|
|
270
|
+
return ModelBlacklistStore
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def get_exemption_store():
|
|
274
|
+
"""Return appropriate exemption storage class based on settings"""
|
|
275
|
+
if STORAGE_MODE == "csv":
|
|
276
|
+
return CsvExemptionStore
|
|
277
|
+
else:
|
|
278
|
+
return ModelExemptionStore
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def get_keyword_store():
|
|
282
|
+
"""Return appropriate keyword storage class based on settings"""
|
|
283
|
+
if STORAGE_MODE == "csv":
|
|
284
|
+
return CsvKeywordStore
|
|
285
|
+
else:
|
|
286
|
+
return ModelKeywordStore
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
# ============= Django Model Wrappers =============
|
|
290
|
+
|
|
291
|
+
class ModelBlacklistStore:
|
|
292
|
+
"""Django model-based storage for blacklist entries"""
|
|
293
|
+
|
|
294
|
+
@staticmethod
|
|
295
|
+
def add_ip(ip_address, reason):
|
|
296
|
+
BlacklistEntry.objects.get_or_create(ip_address=ip_address, defaults={"reason": reason})
|
|
297
|
+
|
|
298
|
+
@staticmethod
|
|
299
|
+
def is_blocked(ip_address):
|
|
300
|
+
return BlacklistEntry.objects.filter(ip_address=ip_address).exists()
|
|
301
|
+
|
|
302
|
+
@staticmethod
|
|
303
|
+
def get_all():
|
|
304
|
+
return list(BlacklistEntry.objects.values("ip_address", "reason", "created_at"))
|
|
305
|
+
|
|
306
|
+
@staticmethod
|
|
307
|
+
def remove_ip(ip_address):
|
|
308
|
+
BlacklistEntry.objects.filter(ip_address=ip_address).delete()
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class ModelExemptionStore:
|
|
312
|
+
"""Django model-based storage for exemption entries"""
|
|
313
|
+
|
|
314
|
+
@staticmethod
|
|
315
|
+
def add_ip(ip_address, reason=""):
|
|
316
|
+
IPExemption.objects.get_or_create(ip_address=ip_address, defaults={"reason": reason})
|
|
317
|
+
|
|
318
|
+
@staticmethod
|
|
319
|
+
def is_exempted(ip_address):
|
|
320
|
+
return IPExemption.objects.filter(ip_address=ip_address).exists()
|
|
321
|
+
|
|
322
|
+
@staticmethod
|
|
323
|
+
def get_all():
|
|
324
|
+
return list(IPExemption.objects.values("ip_address", "reason", "created_at"))
|
|
325
|
+
|
|
326
|
+
@staticmethod
|
|
327
|
+
def remove_ip(ip_address):
|
|
328
|
+
IPExemption.objects.filter(ip_address=ip_address).delete()
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class ModelKeywordStore:
|
|
332
|
+
"""Django model-based storage for dynamic keywords"""
|
|
333
|
+
|
|
334
|
+
@staticmethod
|
|
335
|
+
def add_keyword(keyword, count=1):
|
|
336
|
+
obj, created = DynamicKeyword.objects.get_or_create(keyword=keyword, defaults={"count": count})
|
|
337
|
+
if not created:
|
|
338
|
+
obj.count += count
|
|
339
|
+
obj.save()
|
|
340
|
+
|
|
341
|
+
@staticmethod
|
|
342
|
+
def get_top_keywords(limit=10):
|
|
343
|
+
return list(DynamicKeyword.objects.order_by("-count").values_list("keyword", flat=True)[:limit])
|
|
344
|
+
|
|
345
|
+
@staticmethod
|
|
346
|
+
def remove_keyword(keyword):
|
|
347
|
+
DynamicKeyword.objects.filter(keyword=keyword).delete()
|
|
348
|
+
|
|
349
|
+
@staticmethod
|
|
350
|
+
def clear_all():
|
|
351
|
+
DynamicKeyword.objects.all().delete()
|
|
@@ -13,6 +13,8 @@ from sklearn.ensemble import IsolationForest
|
|
|
13
13
|
from django.conf import settings
|
|
14
14
|
from django.apps import apps
|
|
15
15
|
from django.db.models import F
|
|
16
|
+
from .utils import is_exempt_path
|
|
17
|
+
from .storage import get_blacklist_store, get_exemption_store, get_keyword_store
|
|
16
18
|
|
|
17
19
|
# ─────────── Configuration ───────────
|
|
18
20
|
LOG_PATH = settings.AIWAF_ACCESS_LOG
|
|
@@ -27,36 +29,6 @@ _LOG_RX = re.compile(
|
|
|
27
29
|
)
|
|
28
30
|
|
|
29
31
|
|
|
30
|
-
BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
|
|
31
|
-
DynamicKeyword = apps.get_model("aiwaf", "DynamicKeyword")
|
|
32
|
-
IPExemption = apps.get_model("aiwaf", "IPExemption")
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def is_exempt_path(path: str) -> bool:
|
|
36
|
-
path = path.lower()
|
|
37
|
-
|
|
38
|
-
# Default login paths that should always be exempt
|
|
39
|
-
default_login_paths = [
|
|
40
|
-
"/admin/login/",
|
|
41
|
-
"/admin/",
|
|
42
|
-
"/login/",
|
|
43
|
-
"/accounts/login/",
|
|
44
|
-
"/auth/login/",
|
|
45
|
-
"/signin/",
|
|
46
|
-
]
|
|
47
|
-
|
|
48
|
-
# Check default login paths
|
|
49
|
-
for login_path in default_login_paths:
|
|
50
|
-
if path.startswith(login_path):
|
|
51
|
-
return True
|
|
52
|
-
|
|
53
|
-
# Check user-configured exempt paths
|
|
54
|
-
for exempt in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
|
|
55
|
-
if path == exempt or path.startswith(exempt.rstrip("/") + "/"):
|
|
56
|
-
return True
|
|
57
|
-
return False
|
|
58
|
-
|
|
59
|
-
|
|
60
32
|
def path_exists_in_django(path: str) -> bool:
|
|
61
33
|
from django.urls import get_resolver
|
|
62
34
|
from django.urls.resolvers import URLResolver
|
|
@@ -78,13 +50,17 @@ def path_exists_in_django(path: str) -> bool:
|
|
|
78
50
|
|
|
79
51
|
|
|
80
52
|
def remove_exempt_keywords() -> None:
|
|
53
|
+
keyword_store = get_keyword_store()
|
|
81
54
|
exempt_tokens = set()
|
|
55
|
+
|
|
82
56
|
for path in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
|
|
83
57
|
for seg in re.split(r"\W+", path.strip("/").lower()):
|
|
84
58
|
if len(seg) > 3:
|
|
85
59
|
exempt_tokens.add(seg)
|
|
86
|
-
|
|
87
|
-
|
|
60
|
+
|
|
61
|
+
# Remove exempt tokens from keyword storage
|
|
62
|
+
for token in exempt_tokens:
|
|
63
|
+
keyword_store.remove_keyword(token)
|
|
88
64
|
|
|
89
65
|
|
|
90
66
|
def _read_all_logs() -> list[str]:
|
|
@@ -122,10 +98,15 @@ def _parse(line: str) -> dict | None:
|
|
|
122
98
|
|
|
123
99
|
def train() -> None:
|
|
124
100
|
remove_exempt_keywords()
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
101
|
+
|
|
102
|
+
# Remove any IPs in IPExemption from the blacklist using storage system
|
|
103
|
+
exemption_store = get_exemption_store()
|
|
104
|
+
blacklist_store = get_blacklist_store()
|
|
105
|
+
|
|
106
|
+
exempted_ips = [entry['ip_address'] for entry in exemption_store.get_all()]
|
|
107
|
+
for ip in exempted_ips:
|
|
108
|
+
blacklist_store.remove_ip(ip)
|
|
109
|
+
|
|
129
110
|
raw_lines = _read_all_logs()
|
|
130
111
|
if not raw_lines:
|
|
131
112
|
print("No log lines found – check AIWAF_ACCESS_LOG setting.")
|
|
@@ -157,10 +138,8 @@ def train() -> None:
|
|
|
157
138
|
|
|
158
139
|
# Don't block if majority of 404s are on login paths
|
|
159
140
|
if count > login_404s: # More non-login 404s than login 404s
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
defaults={"reason": f"Excessive 404s (≥6 non-login, {count}/{total_404s})"}
|
|
163
|
-
)
|
|
141
|
+
blacklist_store = get_blacklist_store()
|
|
142
|
+
blacklist_store.add_ip(ip, f"Excessive 404s (≥6 non-login, {count}/{total_404s})")
|
|
164
143
|
|
|
165
144
|
feature_dicts = []
|
|
166
145
|
for r in parsed:
|
|
@@ -211,10 +190,13 @@ def train() -> None:
|
|
|
211
190
|
if anomalous_ips:
|
|
212
191
|
print(f"⚠️ Detected {len(anomalous_ips)} potentially anomalous IPs during training")
|
|
213
192
|
|
|
193
|
+
exemption_store = get_exemption_store()
|
|
194
|
+
blacklist_store = get_blacklist_store()
|
|
214
195
|
blocked_count = 0
|
|
196
|
+
|
|
215
197
|
for ip in anomalous_ips:
|
|
216
198
|
# Skip if IP is exempted
|
|
217
|
-
if
|
|
199
|
+
if exemption_store.is_exempted(ip):
|
|
218
200
|
continue
|
|
219
201
|
|
|
220
202
|
# Get this IP's behavior from the data
|
|
@@ -237,10 +219,7 @@ def train() -> None:
|
|
|
237
219
|
continue
|
|
238
220
|
|
|
239
221
|
# Block if it shows clear signs of malicious behavior
|
|
240
|
-
|
|
241
|
-
ip_address=ip,
|
|
242
|
-
defaults={"reason": f"AI anomaly + suspicious patterns (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})"}
|
|
243
|
-
)
|
|
222
|
+
blacklist_store.add_ip(ip, f"AI anomaly + suspicious patterns (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})")
|
|
244
223
|
blocked_count += 1
|
|
245
224
|
print(f" - {ip}: Blocked for suspicious behavior (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})")
|
|
246
225
|
|
|
@@ -254,8 +233,10 @@ def train() -> None:
|
|
|
254
233
|
if len(seg) > 3 and seg not in STATIC_KW:
|
|
255
234
|
tokens[seg] += 1
|
|
256
235
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
236
|
+
keyword_store = get_keyword_store()
|
|
237
|
+
top_tokens = tokens.most_common(10)
|
|
238
|
+
|
|
239
|
+
for kw, cnt in top_tokens:
|
|
240
|
+
keyword_store.add_keyword(kw, cnt)
|
|
260
241
|
|
|
261
|
-
print(f"DynamicKeyword
|
|
242
|
+
print(f"DynamicKeyword storage updated with top tokens: {[kw for kw, _ in top_tokens]}")
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import glob
|
|
4
|
+
import gzip
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from django.conf import settings
|
|
7
|
+
from .storage import get_exemption_store
|
|
8
|
+
|
|
9
|
+
_LOG_RX = re.compile(
|
|
10
|
+
r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(GET|POST) (.*?) HTTP/.*?" (\d{3}).*?"(.*?)" "(.*?)"'
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
def get_ip(request):
|
|
14
|
+
xff = request.META.get("HTTP_X_FORWARDED_FOR", "")
|
|
15
|
+
if xff:
|
|
16
|
+
return xff.split(",")[0].strip()
|
|
17
|
+
return request.META.get("REMOTE_ADDR", "")
|
|
18
|
+
|
|
19
|
+
def read_rotated_logs(base_path):
|
|
20
|
+
lines = []
|
|
21
|
+
if os.path.exists(base_path):
|
|
22
|
+
with open(base_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
23
|
+
lines.extend(f.readlines())
|
|
24
|
+
for path in sorted(glob.glob(base_path + ".*")):
|
|
25
|
+
opener = gzip.open if path.endswith(".gz") else open
|
|
26
|
+
try:
|
|
27
|
+
with opener(path, "rt", encoding="utf-8", errors="ignore") as f:
|
|
28
|
+
lines.extend(f.readlines())
|
|
29
|
+
except OSError:
|
|
30
|
+
continue
|
|
31
|
+
return lines
|
|
32
|
+
|
|
33
|
+
def parse_log_line(line):
|
|
34
|
+
m = _LOG_RX.search(line)
|
|
35
|
+
if not m:
|
|
36
|
+
return None
|
|
37
|
+
ip, ts_str, _, path, status, ref, ua = m.groups()
|
|
38
|
+
try:
|
|
39
|
+
ts = datetime.strptime(ts_str.split()[0], "%d/%b/%Y:%H:%M:%S")
|
|
40
|
+
except ValueError:
|
|
41
|
+
return None
|
|
42
|
+
rt_m = re.search(r'response-time=(\d+\.\d+)', line)
|
|
43
|
+
rt = float(rt_m.group(1)) if rt_m else 0.0
|
|
44
|
+
return {
|
|
45
|
+
"ip": ip,
|
|
46
|
+
"timestamp": ts,
|
|
47
|
+
"path": path,
|
|
48
|
+
"status": status,
|
|
49
|
+
"referer": ref,
|
|
50
|
+
"user_agent": ua,
|
|
51
|
+
"response_time": rt
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
def is_ip_exempted(ip):
|
|
55
|
+
"""Check if IP is in exemption list"""
|
|
56
|
+
store = get_exemption_store()
|
|
57
|
+
return store.is_exempted(ip)
|
|
58
|
+
|
|
59
|
+
def is_view_exempt(request):
|
|
60
|
+
"""Check if the current view is marked as AI-WAF exempt"""
|
|
61
|
+
if hasattr(request, 'resolver_match') and request.resolver_match:
|
|
62
|
+
view_func = request.resolver_match.func
|
|
63
|
+
|
|
64
|
+
# Check if view function has aiwaf_exempt attribute
|
|
65
|
+
if hasattr(view_func, 'aiwaf_exempt'):
|
|
66
|
+
return True
|
|
67
|
+
|
|
68
|
+
# For class-based views, check the view class
|
|
69
|
+
if hasattr(view_func, 'view_class'):
|
|
70
|
+
view_class = view_func.view_class
|
|
71
|
+
if hasattr(view_class, 'aiwaf_exempt'):
|
|
72
|
+
return True
|
|
73
|
+
|
|
74
|
+
# Check dispatch method for method_decorator usage
|
|
75
|
+
dispatch_method = getattr(view_class, 'dispatch', None)
|
|
76
|
+
if dispatch_method and hasattr(dispatch_method, 'aiwaf_exempt'):
|
|
77
|
+
return True
|
|
78
|
+
|
|
79
|
+
return False
|
|
80
|
+
|
|
81
|
+
def is_exempt_path(path):
|
|
82
|
+
"""Check if path should be exempt from AI-WAF"""
|
|
83
|
+
path = path.lower()
|
|
84
|
+
|
|
85
|
+
# Default login paths (always exempt)
|
|
86
|
+
default_exempt = [
|
|
87
|
+
"/admin/login/", "/admin/", "/login/", "/accounts/login/",
|
|
88
|
+
"/auth/login/", "/signin/"
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
# Check default exempt paths
|
|
92
|
+
for exempt_path in default_exempt:
|
|
93
|
+
if path.startswith(exempt_path):
|
|
94
|
+
return True
|
|
95
|
+
|
|
96
|
+
# Check configured exempt paths
|
|
97
|
+
exempt_paths = getattr(settings, "AIWAF_EXEMPT_PATHS", [])
|
|
98
|
+
for exempt_path in exempt_paths:
|
|
99
|
+
if path == exempt_path or path.startswith(exempt_path.rstrip("/") + "/"):
|
|
100
|
+
return True
|
|
101
|
+
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
def is_exempt(request):
|
|
105
|
+
"""Check if request should be exempt (either by path or view decorator)"""
|
|
106
|
+
return is_exempt_path(request.path) or is_view_exempt(request)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aiwaf
|
|
3
|
-
Version: 0.1.8.
|
|
3
|
+
Version: 0.1.8.7
|
|
4
4
|
Summary: AI-powered Web Application Firewall
|
|
5
5
|
Home-page: https://github.com/aayushgauba/aiwaf
|
|
6
6
|
Author: Aayush Gauba
|
|
@@ -99,7 +99,26 @@ AIWAF_EXEMPT_PATHS = [
|
|
|
99
99
|
]
|
|
100
100
|
```
|
|
101
101
|
|
|
102
|
-
|
|
102
|
+
**Exempt Views (Decorator):**
|
|
103
|
+
Use the `@aiwaf_exempt` decorator to exempt specific views from all AI-WAF protection:
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from aiwaf.decorators import aiwaf_exempt
|
|
107
|
+
from django.http import JsonResponse
|
|
108
|
+
|
|
109
|
+
@aiwaf_exempt
|
|
110
|
+
def my_api_view(request):
|
|
111
|
+
"""This view will be exempt from all AI-WAF protection"""
|
|
112
|
+
return JsonResponse({"status": "success"})
|
|
113
|
+
|
|
114
|
+
# Works with class-based views too
|
|
115
|
+
@aiwaf_exempt
|
|
116
|
+
class MyAPIView(View):
|
|
117
|
+
def get(self, request):
|
|
118
|
+
return JsonResponse({"method": "GET"})
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
All exempt paths and views are:
|
|
103
122
|
- Skipped from keyword learning
|
|
104
123
|
- Immune to AI blocking
|
|
105
124
|
- Ignored in log training
|
|
@@ -169,6 +188,31 @@ AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
|
|
|
169
188
|
|
|
170
189
|
---
|
|
171
190
|
|
|
191
|
+
### Storage Configuration
|
|
192
|
+
|
|
193
|
+
**Choose storage backend:**
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
# Use Django models (default) - requires database tables
|
|
197
|
+
AIWAF_STORAGE_MODE = "models"
|
|
198
|
+
|
|
199
|
+
# OR use CSV files - no database required
|
|
200
|
+
AIWAF_STORAGE_MODE = "csv"
|
|
201
|
+
AIWAF_CSV_DATA_DIR = "aiwaf_data" # Directory for CSV files
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
**CSV Mode Features:**
|
|
205
|
+
- No database migrations required
|
|
206
|
+
- Files stored in `aiwaf_data/` directory:
|
|
207
|
+
- `blacklist.csv` - Blocked IP addresses
|
|
208
|
+
- `exemptions.csv` - Exempt IP addresses
|
|
209
|
+
- `keywords.csv` - Dynamic keywords
|
|
210
|
+
- `access_samples.csv` - Feature samples for ML training
|
|
211
|
+
- Perfect for lightweight deployments or when you prefer file-based storage
|
|
212
|
+
- Management commands work identically in both modes
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
172
216
|
### Optional (defaults shown)
|
|
173
217
|
|
|
174
218
|
```python
|
|
@@ -9,7 +9,7 @@ long_description = (HERE / "README.md").read_text(encoding="utf-8")
|
|
|
9
9
|
|
|
10
10
|
setup(
|
|
11
11
|
name="aiwaf",
|
|
12
|
-
version="0.1.8.
|
|
12
|
+
version="0.1.8.7",
|
|
13
13
|
description="AI‑driven, self‑learning Web Application Firewall for Django",
|
|
14
14
|
long_description=long_description,
|
|
15
15
|
long_description_content_type="text/markdown",
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
from .models import BlacklistEntry
|
|
2
|
-
|
|
3
|
-
class BlacklistManager:
|
|
4
|
-
@staticmethod
|
|
5
|
-
def block(ip, reason):
|
|
6
|
-
BlacklistEntry.objects.get_or_create(ip_address=ip, defaults={"reason": reason})
|
|
7
|
-
|
|
8
|
-
@staticmethod
|
|
9
|
-
def is_blocked(ip):
|
|
10
|
-
return BlacklistEntry.objects.filter(ip_address=ip).exists()
|
|
11
|
-
|
|
12
|
-
@staticmethod
|
|
13
|
-
def all_blocked():
|
|
14
|
-
return BlacklistEntry.objects.all()
|
aiwaf-0.1.8.5/aiwaf/storage.py
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
import os, csv, gzip, glob
|
|
2
|
-
import numpy as np
|
|
3
|
-
import pandas as pd
|
|
4
|
-
from django.conf import settings
|
|
5
|
-
from .models import FeatureSample
|
|
6
|
-
|
|
7
|
-
DATA_FILE = getattr(settings, "AIWAF_CSV_PATH", "access_samples.csv")
|
|
8
|
-
CSV_HEADER = [
|
|
9
|
-
"ip","path_len","kw_hits","resp_time",
|
|
10
|
-
"status_idx","burst_count","total_404","label"
|
|
11
|
-
]
|
|
12
|
-
|
|
13
|
-
class CsvFeatureStore:
|
|
14
|
-
@staticmethod
|
|
15
|
-
def persist_rows(rows):
|
|
16
|
-
new_file = not os.path.exists(DATA_FILE)
|
|
17
|
-
with open(DATA_FILE, "a", newline="", encoding="utf-8") as f:
|
|
18
|
-
w = csv.writer(f)
|
|
19
|
-
if new_file:
|
|
20
|
-
w.writerow(CSV_HEADER)
|
|
21
|
-
w.writerows(rows)
|
|
22
|
-
|
|
23
|
-
@staticmethod
|
|
24
|
-
def load_matrix():
|
|
25
|
-
if not os.path.exists(DATA_FILE):
|
|
26
|
-
return np.empty((0,6))
|
|
27
|
-
df = pd.read_csv(
|
|
28
|
-
DATA_FILE,
|
|
29
|
-
names=CSV_HEADER,
|
|
30
|
-
skiprows=1,
|
|
31
|
-
engine="python",
|
|
32
|
-
on_bad_lines="skip"
|
|
33
|
-
)
|
|
34
|
-
feature_cols = CSV_HEADER[1:7]
|
|
35
|
-
df[feature_cols] = df[feature_cols].apply(pd.to_numeric, errors="coerce").fillna(0)
|
|
36
|
-
return df[feature_cols].to_numpy()
|
|
37
|
-
|
|
38
|
-
class DbFeatureStore:
|
|
39
|
-
@staticmethod
|
|
40
|
-
def persist_rows(rows):
|
|
41
|
-
objs = []
|
|
42
|
-
for ip,pl,kw,rt,si,bc,t404,label in rows:
|
|
43
|
-
objs.append(FeatureSample(
|
|
44
|
-
ip=ip, path_len=pl, kw_hits=kw,
|
|
45
|
-
resp_time=rt, status_idx=si,
|
|
46
|
-
burst_count=bc, total_404=t404,
|
|
47
|
-
label=label
|
|
48
|
-
))
|
|
49
|
-
FeatureSample.objects.bulk_create(objs, ignore_conflicts=True)
|
|
50
|
-
|
|
51
|
-
@staticmethod
|
|
52
|
-
def load_matrix():
|
|
53
|
-
qs = FeatureSample.objects.all().values_list(
|
|
54
|
-
"path_len","kw_hits","resp_time","status_idx","burst_count","total_404"
|
|
55
|
-
)
|
|
56
|
-
return np.array(list(qs), dtype=float)
|
|
57
|
-
|
|
58
|
-
def get_store():
|
|
59
|
-
if getattr(settings, "AIWAF_FEATURE_STORE", "csv") == "db":
|
|
60
|
-
return DbFeatureStore
|
|
61
|
-
return CsvFeatureStore
|
aiwaf-0.1.8.5/aiwaf/utils.py
DELETED
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import re
|
|
3
|
-
import glob
|
|
4
|
-
import gzip
|
|
5
|
-
from datetime import datetime
|
|
6
|
-
|
|
7
|
-
_LOG_RX = re.compile(
|
|
8
|
-
r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(GET|POST) (.*?) HTTP/.*?" (\d{3}).*?"(.*?)" "(.*?)"'
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
def get_ip(request):
|
|
12
|
-
xff = request.META.get("HTTP_X_FORWARDED_FOR", "")
|
|
13
|
-
if xff:
|
|
14
|
-
return xff.split(",")[0].strip()
|
|
15
|
-
return request.META.get("REMOTE_ADDR", "")
|
|
16
|
-
|
|
17
|
-
def read_rotated_logs(base_path):
|
|
18
|
-
lines = []
|
|
19
|
-
if os.path.exists(base_path):
|
|
20
|
-
with open(base_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
21
|
-
lines.extend(f.readlines())
|
|
22
|
-
for path in sorted(glob.glob(base_path + ".*")):
|
|
23
|
-
opener = gzip.open if path.endswith(".gz") else open
|
|
24
|
-
try:
|
|
25
|
-
with opener(path, "rt", encoding="utf-8", errors="ignore") as f:
|
|
26
|
-
lines.extend(f.readlines())
|
|
27
|
-
except OSError:
|
|
28
|
-
continue
|
|
29
|
-
return lines
|
|
30
|
-
|
|
31
|
-
def parse_log_line(line):
|
|
32
|
-
m = _LOG_RX.search(line)
|
|
33
|
-
if not m:
|
|
34
|
-
return None
|
|
35
|
-
ip, ts_str, _, path, status, ref, ua = m.groups()
|
|
36
|
-
try:
|
|
37
|
-
ts = datetime.strptime(ts_str.split()[0], "%d/%b/%Y:%H:%M:%S")
|
|
38
|
-
except ValueError:
|
|
39
|
-
return None
|
|
40
|
-
rt_m = re.search(r'response-time=(\d+\.\d+)', line)
|
|
41
|
-
rt = float(rt_m.group(1)) if rt_m else 0.0
|
|
42
|
-
return {
|
|
43
|
-
"ip": ip,
|
|
44
|
-
"timestamp": ts,
|
|
45
|
-
"path": path,
|
|
46
|
-
"status": status,
|
|
47
|
-
"referer": ref,
|
|
48
|
-
"user_agent": ua,
|
|
49
|
-
"response_time": rt
|
|
50
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|