aiwaf 0.1.9.1.9__py3-none-any.whl → 0.1.9.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiwaf might be problematic. Click here for more details.
- aiwaf/__init__.py +1 -1
- aiwaf/management/commands/aiwaf_reset.py +107 -60
- aiwaf/middleware.py +311 -42
- aiwaf/storage.py +13 -0
- aiwaf/trainer.py +184 -9
- {aiwaf-0.1.9.1.9.dist-info → aiwaf-0.1.9.2.1.dist-info}/METADATA +126 -12
- {aiwaf-0.1.9.1.9.dist-info → aiwaf-0.1.9.2.1.dist-info}/RECORD +10 -10
- {aiwaf-0.1.9.1.9.dist-info → aiwaf-0.1.9.2.1.dist-info}/WHEEL +0 -0
- {aiwaf-0.1.9.1.9.dist-info → aiwaf-0.1.9.2.1.dist-info}/licenses/LICENSE +0 -0
- {aiwaf-0.1.9.1.9.dist-info → aiwaf-0.1.9.2.1.dist-info}/top_level.txt +0 -0
aiwaf/__init__.py
CHANGED
|
@@ -1,76 +1,115 @@
|
|
|
1
1
|
from django.core.management.base import BaseCommand
|
|
2
|
-
from aiwaf.storage import get_blacklist_store, get_exemption_store
|
|
2
|
+
from aiwaf.storage import get_blacklist_store, get_exemption_store, get_keyword_store
|
|
3
3
|
import sys
|
|
4
4
|
|
|
5
5
|
class Command(BaseCommand):
|
|
6
|
-
help = 'Reset AI-WAF by clearing
|
|
6
|
+
help = 'Reset AI-WAF by clearing blacklist, exemption, and/or keyword entries'
|
|
7
7
|
|
|
8
8
|
def add_arguments(self, parser):
|
|
9
9
|
parser.add_argument(
|
|
10
|
-
'--blacklist
|
|
10
|
+
'--blacklist',
|
|
11
11
|
action='store_true',
|
|
12
|
-
help='Clear
|
|
12
|
+
help='Clear blacklist entries (default: all)'
|
|
13
13
|
)
|
|
14
14
|
parser.add_argument(
|
|
15
|
-
'--exemptions
|
|
15
|
+
'--exemptions',
|
|
16
|
+
action='store_true',
|
|
17
|
+
help='Clear exemption entries (default: all)'
|
|
18
|
+
)
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
'--keywords',
|
|
16
21
|
action='store_true',
|
|
17
|
-
help='Clear
|
|
22
|
+
help='Clear learned dynamic keywords (default: all)'
|
|
18
23
|
)
|
|
19
24
|
parser.add_argument(
|
|
20
25
|
'--confirm',
|
|
21
26
|
action='store_true',
|
|
22
27
|
help='Skip confirmation prompt'
|
|
23
28
|
)
|
|
29
|
+
|
|
30
|
+
# Legacy flags for backward compatibility
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
'--blacklist-only',
|
|
33
|
+
action='store_true',
|
|
34
|
+
help='(Legacy) Clear only blacklist entries'
|
|
35
|
+
)
|
|
36
|
+
parser.add_argument(
|
|
37
|
+
'--exemptions-only',
|
|
38
|
+
action='store_true',
|
|
39
|
+
help='(Legacy) Clear only exemption entries'
|
|
40
|
+
)
|
|
24
41
|
|
|
25
42
|
def handle(self, *args, **options):
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
43
|
+
# Parse arguments
|
|
44
|
+
blacklist_flag = options.get('blacklist', False)
|
|
45
|
+
exemptions_flag = options.get('exemptions', False)
|
|
46
|
+
keywords_flag = options.get('keywords', False)
|
|
47
|
+
confirm = options.get('confirm', False)
|
|
48
|
+
|
|
49
|
+
# Legacy support
|
|
50
|
+
blacklist_only = options.get('blacklist_only', False)
|
|
51
|
+
exemptions_only = options.get('exemptions_only', False)
|
|
52
|
+
|
|
53
|
+
# Handle legacy flags
|
|
54
|
+
if blacklist_only:
|
|
55
|
+
blacklist_flag = True
|
|
56
|
+
exemptions_flag = False
|
|
57
|
+
keywords_flag = False
|
|
58
|
+
elif exemptions_only:
|
|
59
|
+
blacklist_flag = False
|
|
60
|
+
exemptions_flag = True
|
|
61
|
+
keywords_flag = False
|
|
62
|
+
|
|
63
|
+
# If no specific flags, clear everything
|
|
64
|
+
if not (blacklist_flag or exemptions_flag or keywords_flag):
|
|
65
|
+
blacklist_flag = exemptions_flag = keywords_flag = True
|
|
29
66
|
|
|
30
67
|
try:
|
|
31
68
|
blacklist_store = get_blacklist_store()
|
|
32
69
|
exemption_store = get_exemption_store()
|
|
70
|
+
keyword_store = get_keyword_store()
|
|
33
71
|
except Exception as e:
|
|
34
72
|
self.stdout.write(self.style.ERROR(f'Error initializing stores: {e}'))
|
|
35
73
|
return
|
|
36
74
|
|
|
37
75
|
# Count current entries safely
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
blacklist_count = len(blacklist_entries)
|
|
41
|
-
except Exception as e:
|
|
42
|
-
self.stdout.write(self.style.WARNING(f'Warning: Could not count blacklist entries: {e}'))
|
|
43
|
-
blacklist_count = 0
|
|
44
|
-
blacklist_entries = []
|
|
76
|
+
counts = {'blacklist': 0, 'exemptions': 0, 'keywords': 0}
|
|
77
|
+
entries = {'blacklist': [], 'exemptions': [], 'keywords': []}
|
|
45
78
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
exemption_entries = []
|
|
79
|
+
if blacklist_flag:
|
|
80
|
+
try:
|
|
81
|
+
entries['blacklist'] = blacklist_store.get_all()
|
|
82
|
+
counts['blacklist'] = len(entries['blacklist'])
|
|
83
|
+
except Exception as e:
|
|
84
|
+
self.stdout.write(self.style.WARNING(f'Warning: Could not count blacklist entries: {e}'))
|
|
53
85
|
|
|
54
|
-
if
|
|
55
|
-
|
|
56
|
-
|
|
86
|
+
if exemptions_flag:
|
|
87
|
+
try:
|
|
88
|
+
entries['exemptions'] = exemption_store.get_all()
|
|
89
|
+
counts['exemptions'] = len(entries['exemptions'])
|
|
90
|
+
except Exception as e:
|
|
91
|
+
self.stdout.write(self.style.WARNING(f'Warning: Could not count exemption entries: {e}'))
|
|
57
92
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
93
|
+
if keywords_flag:
|
|
94
|
+
try:
|
|
95
|
+
entries['keywords'] = keyword_store.get_all_keywords()
|
|
96
|
+
counts['keywords'] = len(entries['keywords'])
|
|
97
|
+
except Exception as e:
|
|
98
|
+
self.stdout.write(self.style.WARNING(f'Warning: Could not count keyword entries: {e}'))
|
|
99
|
+
|
|
100
|
+
# Build action description
|
|
101
|
+
actions = []
|
|
102
|
+
if blacklist_flag:
|
|
103
|
+
actions.append(f"{counts['blacklist']} blacklist entries")
|
|
104
|
+
if exemptions_flag:
|
|
105
|
+
actions.append(f"{counts['exemptions']} exemption entries")
|
|
106
|
+
if keywords_flag:
|
|
107
|
+
actions.append(f"{counts['keywords']} learned keywords")
|
|
108
|
+
|
|
109
|
+
action = "Clear " + ", ".join(actions)
|
|
71
110
|
|
|
72
111
|
# Show what will be cleared
|
|
73
|
-
self.stdout.write(f"AI-WAF Reset: {action}")
|
|
112
|
+
self.stdout.write(f"🔧 AI-WAF Reset: {action}")
|
|
74
113
|
|
|
75
114
|
if not confirm:
|
|
76
115
|
try:
|
|
@@ -83,12 +122,12 @@ class Command(BaseCommand):
|
|
|
83
122
|
return
|
|
84
123
|
|
|
85
124
|
# Perform the reset
|
|
86
|
-
deleted_counts = {'blacklist': 0, 'exemptions': 0, 'errors': []}
|
|
125
|
+
deleted_counts = {'blacklist': 0, 'exemptions': 0, 'keywords': 0, 'errors': []}
|
|
87
126
|
|
|
88
|
-
if
|
|
127
|
+
if blacklist_flag:
|
|
89
128
|
# Clear blacklist entries
|
|
90
129
|
try:
|
|
91
|
-
for entry in
|
|
130
|
+
for entry in entries['blacklist']:
|
|
92
131
|
try:
|
|
93
132
|
blacklist_store.remove_ip(entry['ip_address'])
|
|
94
133
|
deleted_counts['blacklist'] += 1
|
|
@@ -97,10 +136,10 @@ class Command(BaseCommand):
|
|
|
97
136
|
except Exception as e:
|
|
98
137
|
deleted_counts['errors'].append(f"Error clearing blacklist: {e}")
|
|
99
138
|
|
|
100
|
-
if
|
|
139
|
+
if exemptions_flag:
|
|
101
140
|
# Clear exemption entries
|
|
102
141
|
try:
|
|
103
|
-
for entry in
|
|
142
|
+
for entry in entries['exemptions']:
|
|
104
143
|
try:
|
|
105
144
|
exemption_store.remove_ip(entry['ip_address'])
|
|
106
145
|
deleted_counts['exemptions'] += 1
|
|
@@ -109,26 +148,34 @@ class Command(BaseCommand):
|
|
|
109
148
|
except Exception as e:
|
|
110
149
|
deleted_counts['errors'].append(f"Error clearing exemptions: {e}")
|
|
111
150
|
|
|
151
|
+
if keywords_flag:
|
|
152
|
+
# Clear keyword entries
|
|
153
|
+
try:
|
|
154
|
+
for keyword in entries['keywords']:
|
|
155
|
+
try:
|
|
156
|
+
keyword_store.remove_keyword(keyword)
|
|
157
|
+
deleted_counts['keywords'] += 1
|
|
158
|
+
except Exception as e:
|
|
159
|
+
deleted_counts['errors'].append(f"Error removing keyword '{keyword}': {e}")
|
|
160
|
+
except Exception as e:
|
|
161
|
+
deleted_counts['errors'].append(f"Error clearing keywords: {e}")
|
|
162
|
+
|
|
112
163
|
# Report results
|
|
113
164
|
if deleted_counts['errors']:
|
|
114
165
|
for error in deleted_counts['errors']:
|
|
115
166
|
self.stdout.write(self.style.WARNING(f"⚠️ {error}"))
|
|
116
167
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
elif clear_exemptions:
|
|
129
|
-
self.stdout.write(
|
|
130
|
-
self.style.SUCCESS(f"✅ Exemptions cleared: Deleted {deleted_counts['exemptions']} entries")
|
|
131
|
-
)
|
|
168
|
+
# Build success message
|
|
169
|
+
success_parts = []
|
|
170
|
+
if blacklist_flag:
|
|
171
|
+
success_parts.append(f"{deleted_counts['blacklist']} blacklist entries")
|
|
172
|
+
if exemptions_flag:
|
|
173
|
+
success_parts.append(f"{deleted_counts['exemptions']} exemption entries")
|
|
174
|
+
if keywords_flag:
|
|
175
|
+
success_parts.append(f"{deleted_counts['keywords']} learned keywords")
|
|
176
|
+
|
|
177
|
+
success_message = "✅ Reset complete: Deleted " + ", ".join(success_parts)
|
|
178
|
+
self.stdout.write(self.style.SUCCESS(success_message))
|
|
132
179
|
|
|
133
180
|
if deleted_counts['errors']:
|
|
134
181
|
self.stdout.write(
|
aiwaf/middleware.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import time
|
|
4
4
|
import re
|
|
5
5
|
import os
|
|
6
|
+
import warnings
|
|
6
7
|
import numpy as np
|
|
7
8
|
import joblib
|
|
8
9
|
from django.db.models import UUIDField
|
|
@@ -82,6 +83,177 @@ class IPAndKeywordBlockMiddleware:
|
|
|
82
83
|
def __init__(self, get_response):
|
|
83
84
|
self.get_response = get_response
|
|
84
85
|
self.safe_prefixes = self._collect_safe_prefixes()
|
|
86
|
+
self.exempt_keywords = self._get_exempt_keywords()
|
|
87
|
+
self.legitimate_path_keywords = self._get_legitimate_path_keywords()
|
|
88
|
+
|
|
89
|
+
def _get_exempt_keywords(self):
|
|
90
|
+
"""Get keywords that should be exempt from blocking"""
|
|
91
|
+
exempt_tokens = set()
|
|
92
|
+
|
|
93
|
+
# Extract from exempt paths
|
|
94
|
+
for path in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
|
|
95
|
+
for seg in re.split(r"\W+", path.strip("/").lower()):
|
|
96
|
+
if len(seg) > 3:
|
|
97
|
+
exempt_tokens.add(seg)
|
|
98
|
+
|
|
99
|
+
# Add explicit exempt keywords from settings
|
|
100
|
+
exempt_keywords = getattr(settings, "AIWAF_EXEMPT_KEYWORDS", [])
|
|
101
|
+
exempt_tokens.update(exempt_keywords)
|
|
102
|
+
|
|
103
|
+
return exempt_tokens
|
|
104
|
+
|
|
105
|
+
def _get_legitimate_path_keywords(self):
|
|
106
|
+
"""Get keywords that are legitimate in URL paths - uses same logic as trainer"""
|
|
107
|
+
# Import the enhanced function from trainer to ensure consistency
|
|
108
|
+
try:
|
|
109
|
+
from .trainer import get_legitimate_keywords
|
|
110
|
+
return get_legitimate_keywords()
|
|
111
|
+
except ImportError:
|
|
112
|
+
# Fallback to local implementation if trainer import fails
|
|
113
|
+
return self._get_legitimate_keywords_fallback()
|
|
114
|
+
|
|
115
|
+
def _get_legitimate_keywords_fallback(self):
|
|
116
|
+
"""Fallback implementation matching trainer.py logic"""
|
|
117
|
+
legitimate = set()
|
|
118
|
+
|
|
119
|
+
# Common legitimate path segments - matches trainer.py
|
|
120
|
+
default_legitimate = {
|
|
121
|
+
"profile", "user", "users", "account", "accounts", "settings", "dashboard",
|
|
122
|
+
"home", "about", "contact", "help", "search", "list", "lists",
|
|
123
|
+
"view", "views", "edit", "create", "update", "delete", "detail", "details",
|
|
124
|
+
"api", "auth", "login", "logout", "register", "signup", "signin",
|
|
125
|
+
"reset", "confirm", "activate", "verify", "page", "pages",
|
|
126
|
+
"category", "categories", "tag", "tags", "post", "posts",
|
|
127
|
+
"article", "articles", "blog", "blogs", "news", "item", "items",
|
|
128
|
+
"admin", "administration", "manage", "manager", "control", "panel",
|
|
129
|
+
"config", "configuration", "option", "options", "preference", "preferences"
|
|
130
|
+
}
|
|
131
|
+
legitimate.update(default_legitimate)
|
|
132
|
+
|
|
133
|
+
# Extract keywords from Django URL patterns and app names - matches trainer.py
|
|
134
|
+
legitimate.update(self._extract_django_route_keywords())
|
|
135
|
+
|
|
136
|
+
# Add from Django settings
|
|
137
|
+
allowed_path_keywords = getattr(settings, "AIWAF_ALLOWED_PATH_KEYWORDS", [])
|
|
138
|
+
legitimate.update(allowed_path_keywords)
|
|
139
|
+
|
|
140
|
+
# Add exempt keywords
|
|
141
|
+
exempt_keywords = getattr(settings, "AIWAF_EXEMPT_KEYWORDS", [])
|
|
142
|
+
legitimate.update(exempt_keywords)
|
|
143
|
+
|
|
144
|
+
return legitimate
|
|
145
|
+
|
|
146
|
+
def _extract_django_route_keywords(self):
|
|
147
|
+
"""Extract legitimate keywords from Django URL patterns, app names, and model names - matches trainer.py"""
|
|
148
|
+
keywords = set()
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
from django.urls.resolvers import URLResolver, URLPattern
|
|
152
|
+
|
|
153
|
+
# Extract from app names and labels
|
|
154
|
+
for app_config in apps.get_app_configs():
|
|
155
|
+
# Add app name and label
|
|
156
|
+
if app_config.name:
|
|
157
|
+
for segment in re.split(r'[._-]', app_config.name.lower()):
|
|
158
|
+
if len(segment) > 2:
|
|
159
|
+
keywords.add(segment)
|
|
160
|
+
|
|
161
|
+
if app_config.label and app_config.label != app_config.name:
|
|
162
|
+
for segment in re.split(r'[._-]', app_config.label.lower()):
|
|
163
|
+
if len(segment) > 2:
|
|
164
|
+
keywords.add(segment)
|
|
165
|
+
|
|
166
|
+
# Extract from model names in the app
|
|
167
|
+
try:
|
|
168
|
+
for model in app_config.get_models():
|
|
169
|
+
model_name = model._meta.model_name.lower()
|
|
170
|
+
if len(model_name) > 2:
|
|
171
|
+
keywords.add(model_name)
|
|
172
|
+
# Add plural form
|
|
173
|
+
if not model_name.endswith('s'):
|
|
174
|
+
keywords.add(f"{model_name}s")
|
|
175
|
+
except Exception:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
# Extract from URL patterns
|
|
179
|
+
def extract_from_pattern(pattern, prefix=""):
|
|
180
|
+
try:
|
|
181
|
+
if isinstance(pattern, URLResolver):
|
|
182
|
+
# Handle include() patterns
|
|
183
|
+
namespace = getattr(pattern, 'namespace', None)
|
|
184
|
+
if namespace:
|
|
185
|
+
for segment in re.split(r'[._-]', namespace.lower()):
|
|
186
|
+
if len(segment) > 2:
|
|
187
|
+
keywords.add(segment)
|
|
188
|
+
|
|
189
|
+
# Extract from the pattern itself
|
|
190
|
+
pattern_str = str(pattern.pattern)
|
|
191
|
+
for segment in re.findall(r'([a-zA-Z]\w{2,})', pattern_str):
|
|
192
|
+
keywords.add(segment.lower())
|
|
193
|
+
|
|
194
|
+
# Recurse into nested patterns
|
|
195
|
+
for nested_pattern in pattern.url_patterns:
|
|
196
|
+
extract_from_pattern(nested_pattern, prefix)
|
|
197
|
+
|
|
198
|
+
elif isinstance(pattern, URLPattern):
|
|
199
|
+
# Extract from URL pattern
|
|
200
|
+
pattern_str = str(pattern.pattern)
|
|
201
|
+
for segment in re.findall(r'([a-zA-Z]\w{2,})', pattern_str):
|
|
202
|
+
keywords.add(segment.lower())
|
|
203
|
+
|
|
204
|
+
# Extract from view name if available
|
|
205
|
+
if hasattr(pattern.callback, '__name__'):
|
|
206
|
+
view_name = pattern.callback.__name__.lower()
|
|
207
|
+
for segment in re.split(r'[._-]', view_name):
|
|
208
|
+
if len(segment) > 2 and segment != 'view':
|
|
209
|
+
keywords.add(segment)
|
|
210
|
+
|
|
211
|
+
except Exception:
|
|
212
|
+
pass
|
|
213
|
+
|
|
214
|
+
# Process all URL patterns
|
|
215
|
+
root_resolver = get_resolver()
|
|
216
|
+
for pattern in root_resolver.url_patterns:
|
|
217
|
+
extract_from_pattern(pattern)
|
|
218
|
+
|
|
219
|
+
except Exception as e:
|
|
220
|
+
# Silently continue if extraction fails
|
|
221
|
+
pass
|
|
222
|
+
|
|
223
|
+
# Filter out very common/generic words that might be suspicious
|
|
224
|
+
filtered_keywords = set()
|
|
225
|
+
for keyword in keywords:
|
|
226
|
+
if (len(keyword) >= 3 and
|
|
227
|
+
keyword not in ['www', 'com', 'org', 'net', 'int', 'str', 'obj', 'get', 'set', 'put', 'del']):
|
|
228
|
+
filtered_keywords.add(keyword)
|
|
229
|
+
|
|
230
|
+
return filtered_keywords
|
|
231
|
+
|
|
232
|
+
def _is_malicious_context(self, request, segment):
|
|
233
|
+
"""Determine if a keyword appears in a malicious context"""
|
|
234
|
+
path = request.path.lower()
|
|
235
|
+
|
|
236
|
+
# Check if this is a query parameter attack
|
|
237
|
+
query_string = request.META.get('QUERY_STRING', '').lower()
|
|
238
|
+
if segment in query_string and any(attack_pattern in query_string for attack_pattern in [
|
|
239
|
+
'union', 'select', 'drop', 'insert', 'script', 'alert', 'eval'
|
|
240
|
+
]):
|
|
241
|
+
return True
|
|
242
|
+
|
|
243
|
+
# Check if this looks like a file extension attack
|
|
244
|
+
if segment.startswith('.') and not path_exists_in_django(request.path):
|
|
245
|
+
return True
|
|
246
|
+
|
|
247
|
+
# Check if this looks like a directory traversal
|
|
248
|
+
if '../' in path or '..\\' in path:
|
|
249
|
+
return True
|
|
250
|
+
|
|
251
|
+
# Check if accessing non-existent paths with suspicious extensions
|
|
252
|
+
if (not path_exists_in_django(request.path) and
|
|
253
|
+
any(ext in segment for ext in ['.php', '.asp', '.jsp', '.cgi'])):
|
|
254
|
+
return True
|
|
255
|
+
|
|
256
|
+
return False
|
|
85
257
|
|
|
86
258
|
def _collect_safe_prefixes(self):
|
|
87
259
|
resolver = get_resolver()
|
|
@@ -102,35 +274,85 @@ class IPAndKeywordBlockMiddleware:
|
|
|
102
274
|
return prefixes
|
|
103
275
|
|
|
104
276
|
def __call__(self, request):
|
|
105
|
-
|
|
277
|
+
# First exemption check - early exit for exempt requests
|
|
106
278
|
if is_exempt(request):
|
|
107
279
|
return self.get_response(request)
|
|
280
|
+
|
|
281
|
+
raw_path = request.path.lower()
|
|
108
282
|
ip = get_ip(request)
|
|
109
283
|
path = raw_path.lstrip("/")
|
|
110
284
|
|
|
111
|
-
#
|
|
285
|
+
# Additional IP-level exemption check
|
|
286
|
+
from .storage import get_exemption_store
|
|
287
|
+
exemption_store = get_exemption_store()
|
|
288
|
+
if exemption_store.is_exempted(ip):
|
|
289
|
+
return self.get_response(request)
|
|
290
|
+
|
|
291
|
+
# BlacklistManager handles exemption checking internally
|
|
112
292
|
if BlacklistManager.is_blocked(ip):
|
|
113
293
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
114
294
|
|
|
295
|
+
# Check if path exists in Django - if yes, be more lenient
|
|
296
|
+
path_exists = path_exists_in_django(request.path)
|
|
297
|
+
|
|
115
298
|
keyword_store = get_keyword_store()
|
|
116
299
|
segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
|
|
117
300
|
|
|
301
|
+
# Only learn keywords from non-existent paths or suspicious contexts
|
|
118
302
|
for seg in segments:
|
|
119
|
-
|
|
303
|
+
if not path_exists or self._is_malicious_context(request, seg):
|
|
304
|
+
keyword_store.add_keyword(seg)
|
|
120
305
|
|
|
121
306
|
dynamic_top = keyword_store.get_top_keywords(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
|
|
122
307
|
all_kw = set(STATIC_KW) | set(dynamic_top)
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
308
|
+
|
|
309
|
+
# Enhanced filtering logic
|
|
310
|
+
suspicious_kw = set()
|
|
311
|
+
for kw in all_kw:
|
|
312
|
+
# Skip if keyword is explicitly exempted
|
|
313
|
+
if kw in self.exempt_keywords:
|
|
314
|
+
continue
|
|
315
|
+
|
|
316
|
+
# Skip if this is a legitimate path keyword and path exists in Django
|
|
317
|
+
if (kw in self.legitimate_path_keywords and
|
|
318
|
+
path_exists and
|
|
319
|
+
not self._is_malicious_context(request, kw)):
|
|
320
|
+
continue
|
|
321
|
+
|
|
322
|
+
# Skip if path starts with safe prefix
|
|
323
|
+
if any(path.startswith(prefix) for prefix in self.safe_prefixes if prefix):
|
|
324
|
+
continue
|
|
325
|
+
|
|
326
|
+
suspicious_kw.add(kw)
|
|
327
|
+
|
|
328
|
+
# Check segments against suspicious keywords
|
|
127
329
|
for seg in segments:
|
|
330
|
+
is_suspicious = False
|
|
331
|
+
block_reason = ""
|
|
332
|
+
|
|
333
|
+
# Check if segment is in learned suspicious keywords
|
|
128
334
|
if seg in suspicious_kw:
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
335
|
+
is_suspicious = True
|
|
336
|
+
block_reason = f"Learned keyword: {seg}"
|
|
337
|
+
|
|
338
|
+
# Also check if segment appears to be inherently malicious
|
|
339
|
+
elif (not path_exists and
|
|
340
|
+
seg not in self.legitimate_path_keywords and
|
|
341
|
+
(self._is_malicious_context(request, seg) or
|
|
342
|
+
any(malicious_pattern in seg for malicious_pattern in
|
|
343
|
+
['hack', 'exploit', 'attack', 'malicious', 'evil', 'backdoor', 'inject', 'xss']))):
|
|
344
|
+
is_suspicious = True
|
|
345
|
+
block_reason = f"Inherently suspicious: {seg}"
|
|
346
|
+
|
|
347
|
+
if is_suspicious:
|
|
348
|
+
# Additional context check before blocking
|
|
349
|
+
if self._is_malicious_context(request, seg) or not path_exists:
|
|
350
|
+
# Double-check exemption before blocking
|
|
351
|
+
if not exemption_store.is_exempted(ip):
|
|
352
|
+
BlacklistManager.block(ip, f"Keyword block: {block_reason}")
|
|
353
|
+
# Check again after blocking attempt (exempted IPs won't be blocked)
|
|
354
|
+
if BlacklistManager.is_blocked(ip):
|
|
355
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
|
134
356
|
return self.get_response(request)
|
|
135
357
|
|
|
136
358
|
|
|
@@ -143,22 +365,32 @@ class RateLimitMiddleware:
|
|
|
143
365
|
self.FLOOD = getattr(settings, "AIWAF_RATE_FLOOD", 40) # hard limit
|
|
144
366
|
|
|
145
367
|
def __call__(self, request):
|
|
368
|
+
# First exemption check - early exit for exempt requests
|
|
146
369
|
if is_exempt(request):
|
|
147
370
|
return self.get_response(request)
|
|
148
371
|
|
|
149
372
|
ip = get_ip(request)
|
|
373
|
+
|
|
374
|
+
# Additional IP-level exemption check
|
|
375
|
+
from .storage import get_exemption_store
|
|
376
|
+
exemption_store = get_exemption_store()
|
|
377
|
+
if exemption_store.is_exempted(ip):
|
|
378
|
+
return self.get_response(request)
|
|
379
|
+
|
|
150
380
|
key = f"ratelimit:{ip}"
|
|
151
381
|
now = time.time()
|
|
152
382
|
timestamps = cache.get(key, [])
|
|
153
383
|
timestamps = [t for t in timestamps if now - t < self.WINDOW]
|
|
154
384
|
timestamps.append(now)
|
|
155
385
|
cache.set(key, timestamps, timeout=self.WINDOW)
|
|
386
|
+
|
|
156
387
|
if len(timestamps) > self.FLOOD:
|
|
157
|
-
#
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
388
|
+
# Double-check exemption before blocking
|
|
389
|
+
if not exemption_store.is_exempted(ip):
|
|
390
|
+
BlacklistManager.block(ip, "Flood pattern")
|
|
391
|
+
# Check if actually blocked (exempted IPs won't be blocked)
|
|
392
|
+
if BlacklistManager.is_blocked(ip):
|
|
393
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
|
162
394
|
if len(timestamps) > self.MAX:
|
|
163
395
|
return JsonResponse({"error": "too_many_requests"}, status=429)
|
|
164
396
|
return self.get_response(request)
|
|
@@ -174,19 +406,37 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
174
406
|
self.model = MODEL
|
|
175
407
|
|
|
176
408
|
def process_request(self, request):
|
|
409
|
+
# First exemption check - early exit for exempt requests
|
|
177
410
|
if is_exempt(request):
|
|
178
411
|
return None
|
|
412
|
+
|
|
179
413
|
request._start_time = time.time()
|
|
180
414
|
ip = get_ip(request)
|
|
181
|
-
|
|
415
|
+
|
|
416
|
+
# Additional IP-level exemption check
|
|
417
|
+
from .storage import get_exemption_store
|
|
418
|
+
exemption_store = get_exemption_store()
|
|
419
|
+
if exemption_store.is_exempted(ip):
|
|
420
|
+
return None
|
|
421
|
+
|
|
422
|
+
# BlacklistManager handles exemption checking internally
|
|
182
423
|
if BlacklistManager.is_blocked(ip):
|
|
183
424
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
184
425
|
return None
|
|
185
426
|
|
|
186
427
|
def process_response(self, request, response):
|
|
428
|
+
# First exemption check - early exit for exempt requests
|
|
187
429
|
if is_exempt(request):
|
|
188
430
|
return response
|
|
431
|
+
|
|
189
432
|
ip = get_ip(request)
|
|
433
|
+
|
|
434
|
+
# Additional IP-level exemption check
|
|
435
|
+
from .storage import get_exemption_store
|
|
436
|
+
exemption_store = get_exemption_store()
|
|
437
|
+
if exemption_store.is_exempted(ip):
|
|
438
|
+
return response
|
|
439
|
+
|
|
190
440
|
now = time.time()
|
|
191
441
|
key = f"aiwaf:{ip}"
|
|
192
442
|
data = cache.get(key, [])
|
|
@@ -251,17 +501,20 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
251
501
|
# Anomalous but looks legitimate - don't block
|
|
252
502
|
pass
|
|
253
503
|
else:
|
|
254
|
-
#
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
504
|
+
# Double-check exemption before blocking
|
|
505
|
+
if not exemption_store.is_exempted(ip):
|
|
506
|
+
BlacklistManager.block(ip, f"AI anomaly + suspicious patterns (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})")
|
|
507
|
+
# Check if actually blocked (exempted IPs won't be blocked)
|
|
508
|
+
if BlacklistManager.is_blocked(ip):
|
|
509
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
|
259
510
|
else:
|
|
260
511
|
# No recent data to analyze - be more conservative, only block on very suspicious current request
|
|
261
512
|
if kw_hits >= 2 or status_idx == STATUS_IDX.index("404"):
|
|
262
|
-
|
|
263
|
-
if
|
|
264
|
-
|
|
513
|
+
# Double-check exemption before blocking
|
|
514
|
+
if not exemption_store.is_exempted(ip):
|
|
515
|
+
BlacklistManager.block(ip, "AI anomaly + immediate suspicious behavior")
|
|
516
|
+
if BlacklistManager.is_blocked(ip):
|
|
517
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
|
265
518
|
|
|
266
519
|
data.append((now, request.path, response.status_code, resp_time))
|
|
267
520
|
data = [d for d in data if now - d[0] < self.WINDOW]
|
|
@@ -283,7 +536,12 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
|
|
|
283
536
|
return None
|
|
284
537
|
|
|
285
538
|
ip = get_ip(request)
|
|
286
|
-
|
|
539
|
+
|
|
540
|
+
# Additional IP-level exemption check
|
|
541
|
+
from .storage import get_exemption_store
|
|
542
|
+
exemption_store = get_exemption_store()
|
|
543
|
+
if exemption_store.is_exempted(ip):
|
|
544
|
+
return None
|
|
287
545
|
|
|
288
546
|
if request.method == "GET":
|
|
289
547
|
# Store timestamp for this IP's GET request
|
|
@@ -300,11 +558,12 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
|
|
|
300
558
|
if not any(request.path.lower().startswith(login_path) for login_path in [
|
|
301
559
|
"/admin/login/", "/login/", "/accounts/login/", "/auth/login/", "/signin/"
|
|
302
560
|
]):
|
|
303
|
-
#
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
561
|
+
# Double-check exemption before blocking
|
|
562
|
+
if not exemption_store.is_exempted(ip):
|
|
563
|
+
BlacklistManager.block(ip, "Direct POST without GET")
|
|
564
|
+
# Check if actually blocked (exempted IPs won't be blocked)
|
|
565
|
+
if BlacklistManager.is_blocked(ip):
|
|
566
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
|
308
567
|
else:
|
|
309
568
|
# Check timing - be more lenient for login paths
|
|
310
569
|
time_diff = time.time() - get_time
|
|
@@ -317,11 +576,12 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
|
|
|
317
576
|
min_time = 0.1 # Very short threshold for login forms
|
|
318
577
|
|
|
319
578
|
if time_diff < min_time:
|
|
320
|
-
#
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
579
|
+
# Double-check exemption before blocking
|
|
580
|
+
if not exemption_store.is_exempted(ip):
|
|
581
|
+
BlacklistManager.block(ip, f"Form submitted too quickly ({time_diff:.2f}s)")
|
|
582
|
+
# Check if actually blocked (exempted IPs won't be blocked)
|
|
583
|
+
if BlacklistManager.is_blocked(ip):
|
|
584
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
|
325
585
|
|
|
326
586
|
return None
|
|
327
587
|
|
|
@@ -330,11 +590,19 @@ class UUIDTamperMiddleware(MiddlewareMixin):
|
|
|
330
590
|
def process_view(self, request, view_func, view_args, view_kwargs):
|
|
331
591
|
if is_exempt(request):
|
|
332
592
|
return None
|
|
593
|
+
|
|
333
594
|
uid = view_kwargs.get("uuid")
|
|
334
595
|
if not uid:
|
|
335
596
|
return None
|
|
336
597
|
|
|
337
598
|
ip = get_ip(request)
|
|
599
|
+
|
|
600
|
+
# Additional IP-level exemption check
|
|
601
|
+
from .storage import get_exemption_store
|
|
602
|
+
exemption_store = get_exemption_store()
|
|
603
|
+
if exemption_store.is_exempted(ip):
|
|
604
|
+
return None
|
|
605
|
+
|
|
338
606
|
app_label = view_func.__module__.split(".")[0]
|
|
339
607
|
app_cfg = apps.get_app_config(app_label)
|
|
340
608
|
for Model in app_cfg.get_models():
|
|
@@ -345,8 +613,9 @@ class UUIDTamperMiddleware(MiddlewareMixin):
|
|
|
345
613
|
except (ValueError, TypeError):
|
|
346
614
|
continue
|
|
347
615
|
|
|
348
|
-
#
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
616
|
+
# Double-check exemption before blocking
|
|
617
|
+
if not exemption_store.is_exempted(ip):
|
|
618
|
+
BlacklistManager.block(ip, "UUID tampering")
|
|
619
|
+
# Check if actually blocked (exempted IPs won't be blocked)
|
|
620
|
+
if BlacklistManager.is_blocked(ip):
|
|
621
|
+
return JsonResponse({"error": "blocked"}, status=403)
|
aiwaf/storage.py
CHANGED
|
@@ -284,6 +284,19 @@ class ModelKeywordStore:
|
|
|
284
284
|
except Exception:
|
|
285
285
|
return []
|
|
286
286
|
|
|
287
|
+
@staticmethod
|
|
288
|
+
def get_all_keywords():
|
|
289
|
+
"""Get all keywords"""
|
|
290
|
+
_import_models()
|
|
291
|
+
if DynamicKeyword is None:
|
|
292
|
+
return []
|
|
293
|
+
try:
|
|
294
|
+
return list(
|
|
295
|
+
DynamicKeyword.objects.all().values_list('keyword', flat=True)
|
|
296
|
+
)
|
|
297
|
+
except Exception:
|
|
298
|
+
return []
|
|
299
|
+
|
|
287
300
|
@staticmethod
|
|
288
301
|
def reset_keywords():
|
|
289
302
|
"""Reset all keyword counts"""
|
aiwaf/trainer.py
CHANGED
|
@@ -51,17 +51,153 @@ def path_exists_in_django(path: str) -> bool:
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
def remove_exempt_keywords() -> None:
|
|
54
|
+
"""Remove exempt keywords from dynamic keyword storage"""
|
|
54
55
|
keyword_store = get_keyword_store()
|
|
55
56
|
exempt_tokens = set()
|
|
56
57
|
|
|
58
|
+
# Extract tokens from exempt paths
|
|
57
59
|
for path in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
|
|
58
60
|
for seg in re.split(r"\W+", path.strip("/").lower()):
|
|
59
61
|
if len(seg) > 3:
|
|
60
62
|
exempt_tokens.add(seg)
|
|
61
63
|
|
|
64
|
+
# Add explicit exempt keywords from settings
|
|
65
|
+
explicit_exempt = getattr(settings, "AIWAF_EXEMPT_KEYWORDS", [])
|
|
66
|
+
exempt_tokens.update(explicit_exempt)
|
|
67
|
+
|
|
68
|
+
# Add legitimate path keywords to prevent them from being learned as suspicious
|
|
69
|
+
allowed_path_keywords = getattr(settings, "AIWAF_ALLOWED_PATH_KEYWORDS", [])
|
|
70
|
+
exempt_tokens.update(allowed_path_keywords)
|
|
71
|
+
|
|
62
72
|
# Remove exempt tokens from keyword storage
|
|
63
73
|
for token in exempt_tokens:
|
|
64
74
|
keyword_store.remove_keyword(token)
|
|
75
|
+
|
|
76
|
+
if exempt_tokens:
|
|
77
|
+
print(f"🧹 Removed {len(exempt_tokens)} exempt keywords from learning: {list(exempt_tokens)[:10]}")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_legitimate_keywords() -> set:
|
|
81
|
+
"""Get all legitimate keywords that shouldn't be learned as suspicious"""
|
|
82
|
+
legitimate = set()
|
|
83
|
+
|
|
84
|
+
# Common legitimate path segments
|
|
85
|
+
default_legitimate = {
|
|
86
|
+
"profile", "user", "users", "account", "accounts", "settings", "dashboard",
|
|
87
|
+
"home", "about", "contact", "help", "search", "list", "lists",
|
|
88
|
+
"view", "views", "edit", "create", "update", "delete", "detail", "details",
|
|
89
|
+
"api", "auth", "login", "logout", "register", "signup", "signin",
|
|
90
|
+
"reset", "confirm", "activate", "verify", "page", "pages",
|
|
91
|
+
"category", "categories", "tag", "tags", "post", "posts",
|
|
92
|
+
"article", "articles", "blog", "blogs", "news", "item", "items",
|
|
93
|
+
"admin", "administration", "manage", "manager", "control", "panel",
|
|
94
|
+
"config", "configuration", "option", "options", "preference", "preferences"
|
|
95
|
+
}
|
|
96
|
+
legitimate.update(default_legitimate)
|
|
97
|
+
|
|
98
|
+
# Extract keywords from Django URL patterns and app names
|
|
99
|
+
legitimate.update(_extract_django_route_keywords())
|
|
100
|
+
|
|
101
|
+
# Add from Django settings
|
|
102
|
+
allowed_path_keywords = getattr(settings, "AIWAF_ALLOWED_PATH_KEYWORDS", [])
|
|
103
|
+
legitimate.update(allowed_path_keywords)
|
|
104
|
+
|
|
105
|
+
# Add exempt keywords
|
|
106
|
+
exempt_keywords = getattr(settings, "AIWAF_EXEMPT_KEYWORDS", [])
|
|
107
|
+
legitimate.update(exempt_keywords)
|
|
108
|
+
|
|
109
|
+
return legitimate
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _extract_django_route_keywords() -> set:
|
|
113
|
+
"""Extract legitimate keywords from Django URL patterns, app names, and model names"""
|
|
114
|
+
keywords = set()
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
from django.urls import get_resolver
|
|
118
|
+
from django.urls.resolvers import URLResolver, URLPattern
|
|
119
|
+
from django.apps import apps
|
|
120
|
+
|
|
121
|
+
# Extract from app names and labels
|
|
122
|
+
for app_config in apps.get_app_configs():
|
|
123
|
+
# Add app name and label
|
|
124
|
+
if app_config.name:
|
|
125
|
+
for segment in re.split(r'[._-]', app_config.name.lower()):
|
|
126
|
+
if len(segment) > 2:
|
|
127
|
+
keywords.add(segment)
|
|
128
|
+
|
|
129
|
+
if app_config.label and app_config.label != app_config.name:
|
|
130
|
+
for segment in re.split(r'[._-]', app_config.label.lower()):
|
|
131
|
+
if len(segment) > 2:
|
|
132
|
+
keywords.add(segment)
|
|
133
|
+
|
|
134
|
+
# Extract from model names in the app
|
|
135
|
+
try:
|
|
136
|
+
for model in app_config.get_models():
|
|
137
|
+
model_name = model._meta.model_name.lower()
|
|
138
|
+
if len(model_name) > 2:
|
|
139
|
+
keywords.add(model_name)
|
|
140
|
+
# Add plural form
|
|
141
|
+
if not model_name.endswith('s'):
|
|
142
|
+
keywords.add(f"{model_name}s")
|
|
143
|
+
except Exception:
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
# Extract from URL patterns
|
|
147
|
+
def extract_from_pattern(pattern, prefix=""):
|
|
148
|
+
try:
|
|
149
|
+
if isinstance(pattern, URLResolver):
|
|
150
|
+
# Handle include() patterns
|
|
151
|
+
namespace = getattr(pattern, 'namespace', None)
|
|
152
|
+
if namespace:
|
|
153
|
+
for segment in re.split(r'[._-]', namespace.lower()):
|
|
154
|
+
if len(segment) > 2:
|
|
155
|
+
keywords.add(segment)
|
|
156
|
+
|
|
157
|
+
# Extract from the pattern itself
|
|
158
|
+
pattern_str = str(pattern.pattern)
|
|
159
|
+
for segment in re.findall(r'([a-zA-Z]\w{2,})', pattern_str):
|
|
160
|
+
keywords.add(segment.lower())
|
|
161
|
+
|
|
162
|
+
# Recurse into nested patterns
|
|
163
|
+
for nested_pattern in pattern.url_patterns:
|
|
164
|
+
extract_from_pattern(nested_pattern, prefix)
|
|
165
|
+
|
|
166
|
+
elif isinstance(pattern, URLPattern):
|
|
167
|
+
# Extract from URL pattern
|
|
168
|
+
pattern_str = str(pattern.pattern)
|
|
169
|
+
for segment in re.findall(r'([a-zA-Z]\w{2,})', pattern_str):
|
|
170
|
+
keywords.add(segment.lower())
|
|
171
|
+
|
|
172
|
+
# Extract from view name if available
|
|
173
|
+
if hasattr(pattern.callback, '__name__'):
|
|
174
|
+
view_name = pattern.callback.__name__.lower()
|
|
175
|
+
for segment in re.split(r'[._-]', view_name):
|
|
176
|
+
if len(segment) > 2 and segment != 'view':
|
|
177
|
+
keywords.add(segment)
|
|
178
|
+
|
|
179
|
+
except Exception:
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
# Process all URL patterns
|
|
183
|
+
root_resolver = get_resolver()
|
|
184
|
+
for pattern in root_resolver.url_patterns:
|
|
185
|
+
extract_from_pattern(pattern)
|
|
186
|
+
|
|
187
|
+
except Exception as e:
|
|
188
|
+
print(f"Warning: Could not extract Django route keywords: {e}")
|
|
189
|
+
|
|
190
|
+
# Filter out very common/generic words that might be suspicious
|
|
191
|
+
filtered_keywords = set()
|
|
192
|
+
for keyword in keywords:
|
|
193
|
+
if (len(keyword) >= 3 and
|
|
194
|
+
keyword not in ['www', 'com', 'org', 'net', 'int', 'str', 'obj', 'get', 'set', 'put', 'del']):
|
|
195
|
+
filtered_keywords.add(keyword)
|
|
196
|
+
|
|
197
|
+
if filtered_keywords:
|
|
198
|
+
print(f"🔗 Extracted {len(filtered_keywords)} legitimate keywords from Django routes and apps")
|
|
199
|
+
|
|
200
|
+
return filtered_keywords
|
|
65
201
|
|
|
66
202
|
|
|
67
203
|
def _read_all_logs() -> list[str]:
|
|
@@ -137,14 +273,20 @@ def _parse(line: str) -> dict | None:
|
|
|
137
273
|
|
|
138
274
|
|
|
139
275
|
def train() -> None:
|
|
276
|
+
"""Enhanced training with improved keyword filtering and exemption handling"""
|
|
277
|
+
print("🚀 Starting AIWAF enhanced training...")
|
|
278
|
+
|
|
279
|
+
# Remove exempt keywords first
|
|
140
280
|
remove_exempt_keywords()
|
|
141
281
|
|
|
142
282
|
# Remove any IPs in IPExemption from the blacklist using BlacklistManager
|
|
143
283
|
exemption_store = get_exemption_store()
|
|
144
284
|
|
|
145
285
|
exempted_ips = [entry['ip_address'] for entry in exemption_store.get_all()]
|
|
146
|
-
|
|
147
|
-
|
|
286
|
+
if exempted_ips:
|
|
287
|
+
print(f"🛡️ Found {len(exempted_ips)} exempted IPs - clearing from blacklist")
|
|
288
|
+
for ip in exempted_ips:
|
|
289
|
+
BlacklistManager.unblock(ip)
|
|
148
290
|
|
|
149
291
|
raw_lines = _read_all_logs()
|
|
150
292
|
if not raw_lines:
|
|
@@ -281,17 +423,50 @@ def train() -> None:
|
|
|
281
423
|
print(f" → Blocked {blocked_count}/{len(anomalous_ips)} anomalous IPs (others looked legitimate)")
|
|
282
424
|
|
|
283
425
|
tokens = Counter()
|
|
426
|
+
legitimate_keywords = get_legitimate_keywords()
|
|
427
|
+
|
|
428
|
+
print(f"🔍 Learning keywords from {len(parsed)} parsed requests...")
|
|
429
|
+
|
|
284
430
|
for r in parsed:
|
|
285
|
-
|
|
286
|
-
|
|
431
|
+
# Only learn from suspicious requests (errors on non-existent paths)
|
|
432
|
+
if (r["status"].startswith(("4", "5")) and
|
|
433
|
+
not path_exists_in_django(r["path"]) and
|
|
434
|
+
not is_exempt_path(r["path"])):
|
|
435
|
+
|
|
287
436
|
for seg in re.split(r"\W+", r["path"].lower()):
|
|
288
|
-
if len(seg) > 3 and
|
|
437
|
+
if (len(seg) > 3 and
|
|
438
|
+
seg not in STATIC_KW and
|
|
439
|
+
seg not in legitimate_keywords): # Don't learn legitimate keywords
|
|
289
440
|
tokens[seg] += 1
|
|
290
441
|
|
|
291
442
|
keyword_store = get_keyword_store()
|
|
292
|
-
top_tokens = tokens.most_common(10)
|
|
443
|
+
top_tokens = tokens.most_common(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
|
|
293
444
|
|
|
445
|
+
# Additional filtering: only add keywords that appear suspicious enough
|
|
446
|
+
filtered_tokens = []
|
|
294
447
|
for kw, cnt in top_tokens:
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
448
|
+
# Don't add keywords that might be legitimate
|
|
449
|
+
if (cnt >= 2 and # Must appear at least twice
|
|
450
|
+
len(kw) >= 4 and # Must be at least 4 characters
|
|
451
|
+
kw not in legitimate_keywords): # Not in legitimate set
|
|
452
|
+
filtered_tokens.append((kw, cnt))
|
|
453
|
+
keyword_store.add_keyword(kw, cnt)
|
|
454
|
+
|
|
455
|
+
if filtered_tokens:
|
|
456
|
+
print(f"📝 Added {len(filtered_tokens)} suspicious keywords: {[kw for kw, _ in filtered_tokens]}")
|
|
457
|
+
else:
|
|
458
|
+
print("✅ No new suspicious keywords learned (good sign!)")
|
|
459
|
+
|
|
460
|
+
print(f"🎯 Dynamic keyword learning complete. Excluded {len(legitimate_keywords)} legitimate keywords.")
|
|
461
|
+
|
|
462
|
+
# Training summary
|
|
463
|
+
print("\n" + "="*60)
|
|
464
|
+
print("🎉 AIWAF ENHANCED TRAINING COMPLETE")
|
|
465
|
+
print("="*60)
|
|
466
|
+
print(f"📊 Training Data: {len(parsed)} log entries processed")
|
|
467
|
+
print(f"🤖 AI Model: Trained with {len(feature_cols)} features")
|
|
468
|
+
print(f"🚫 Blocked IPs: {blocked_count if 'blocked_count' in locals() else 0} suspicious IPs blocked")
|
|
469
|
+
print(f"🔑 Keywords: {len(filtered_tokens)} new suspicious keywords learned")
|
|
470
|
+
print(f"🛡️ Exemptions: {len(exempted_ips)} IPs protected from blocking")
|
|
471
|
+
print(f"✅ Enhanced protection now active with context-aware filtering!")
|
|
472
|
+
print("="*60)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aiwaf
|
|
3
|
-
Version: 0.1.9.1
|
|
3
|
+
Version: 0.1.9.2.1
|
|
4
4
|
Summary: AI-powered Web Application Firewall
|
|
5
5
|
Home-page: https://github.com/aayushgauba/aiwaf
|
|
6
6
|
Author: Aayush Gauba
|
|
@@ -25,7 +25,13 @@ Dynamic: requires-python
|
|
|
25
25
|
# AI‑WAF
|
|
26
26
|
|
|
27
27
|
> A self‑learning, Django‑friendly Web Application Firewall
|
|
28
|
-
> with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection,
|
|
28
|
+
> with **enhanced context-aware protection**, rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, **smart keyword learning**, file‑extension probing detection, exempt path awareness, and daily retraining.
|
|
29
|
+
|
|
30
|
+
**🆕 Latest Enhancements:**
|
|
31
|
+
- ✅ **Smart Keyword Filtering** - Prevents blocking legitimate pages like `/profile/`
|
|
32
|
+
- ✅ **Granular Reset Commands** - Clear specific data types (`--blacklist`, `--keywords`, `--exemptions`)
|
|
33
|
+
- ✅ **Context-Aware Learning** - Only learns from suspicious requests, not legitimate site functionality
|
|
34
|
+
- ✅ **Enhanced Configuration** - `AIWAF_ALLOWED_PATH_KEYWORDS` and `AIWAF_EXEMPT_KEYWORDS`
|
|
29
35
|
|
|
30
36
|
---
|
|
31
37
|
|
|
@@ -88,9 +94,19 @@ aiwaf/
|
|
|
88
94
|
- Burst count
|
|
89
95
|
- Total 404s
|
|
90
96
|
|
|
91
|
-
- **Dynamic Keyword
|
|
92
|
-
-
|
|
93
|
-
- **
|
|
97
|
+
- **Enhanced Dynamic Keyword Learning with Django Route Protection**
|
|
98
|
+
- **Smart Context-Aware Learning**: Only learns keywords from suspicious requests on non-existent paths
|
|
99
|
+
- **Automatic Django Route Extraction**: Automatically excludes keywords from:
|
|
100
|
+
- Valid Django URL patterns (`/profile/`, `/admin/`, `/api/`, etc.)
|
|
101
|
+
- Django app names and model names (users, posts, categories)
|
|
102
|
+
- View function names and URL namespaces
|
|
103
|
+
- **Unified Logic**: Both trainer and middleware use identical legitimate keyword detection
|
|
104
|
+
- **Configuration Options**:
|
|
105
|
+
- `AIWAF_ALLOWED_PATH_KEYWORDS` - Explicitly allow certain keywords in legitimate paths
|
|
106
|
+
- `AIWAF_EXEMPT_KEYWORDS` - Keywords that should never trigger blocking
|
|
107
|
+
- **Automatic Cleanup**: Keywords from `AIWAF_EXEMPT_PATHS` are automatically removed from the database
|
|
108
|
+
- **False Positive Prevention**: Stops learning legitimate site functionality as "malicious"
|
|
109
|
+
- **Inherent Malicious Detection**: Middleware also blocks obviously malicious keywords (`hack`, `exploit`, `attack`) even if not yet learned
|
|
94
110
|
|
|
95
111
|
- **File‑Extension Probing Detection**
|
|
96
112
|
Tracks repeated 404s on common extensions (e.g. `.php`, `.asp`) and blocks IPs.
|
|
@@ -196,20 +212,44 @@ python manage.py add_ipexemption <ip-address> --reason "optional reason"
|
|
|
196
212
|
|
|
197
213
|
### Resetting AI-WAF
|
|
198
214
|
|
|
199
|
-
|
|
215
|
+
The `aiwaf_reset` command provides **granular control** for clearing different types of data:
|
|
200
216
|
|
|
201
217
|
```bash
|
|
202
|
-
# Clear everything (
|
|
218
|
+
# Clear everything (default - backward compatible)
|
|
203
219
|
python manage.py aiwaf_reset
|
|
204
220
|
|
|
205
|
-
# Clear everything without confirmation
|
|
221
|
+
# Clear everything without confirmation prompt
|
|
206
222
|
python manage.py aiwaf_reset --confirm
|
|
207
223
|
|
|
208
|
-
# Clear
|
|
209
|
-
python manage.py aiwaf_reset --blacklist
|
|
224
|
+
# 🆕 GRANULAR CONTROL - Clear specific data types
|
|
225
|
+
python manage.py aiwaf_reset --blacklist # Clear only blocked IPs
|
|
226
|
+
python manage.py aiwaf_reset --exemptions # Clear only exempted IPs
|
|
227
|
+
python manage.py aiwaf_reset --keywords # Clear only learned keywords
|
|
228
|
+
|
|
229
|
+
# 🔧 COMBINE OPTIONS - Mix and match as needed
|
|
230
|
+
python manage.py aiwaf_reset --blacklist --keywords # Keep exemptions
|
|
231
|
+
python manage.py aiwaf_reset --exemptions --keywords # Keep blacklist
|
|
232
|
+
python manage.py aiwaf_reset --blacklist --exemptions # Keep keywords
|
|
233
|
+
|
|
234
|
+
# 🚀 COMMON USE CASES
|
|
235
|
+
# Fix false positive keywords (like "profile" blocking legitimate pages)
|
|
236
|
+
python manage.py aiwaf_reset --keywords --confirm
|
|
237
|
+
python manage.py detect_and_train # Retrain with enhanced filtering
|
|
238
|
+
|
|
239
|
+
# Clear blocked IPs but preserve exemptions and learning
|
|
240
|
+
python manage.py aiwaf_reset --blacklist --confirm
|
|
241
|
+
|
|
242
|
+
# Legacy support (still works for backward compatibility)
|
|
243
|
+
python manage.py aiwaf_reset --blacklist-only # Legacy: blacklist only
|
|
244
|
+
python manage.py aiwaf_reset --exemptions-only # Legacy: exemptions only
|
|
245
|
+
```
|
|
210
246
|
|
|
211
|
-
|
|
212
|
-
|
|
247
|
+
**Enhanced Feedback:**
|
|
248
|
+
```bash
|
|
249
|
+
$ python manage.py aiwaf_reset --keywords
|
|
250
|
+
🔧 AI-WAF Reset: Clear 15 learned keywords
|
|
251
|
+
Are you sure you want to proceed? [y/N]: y
|
|
252
|
+
✅ Reset complete: Deleted 15 learned keywords
|
|
213
253
|
```
|
|
214
254
|
|
|
215
255
|
### Checking Dependencies
|
|
@@ -482,6 +522,21 @@ AIWAF_EXEMPT_PATHS = [ # optional but highly recommended
|
|
|
482
522
|
"/media/",
|
|
483
523
|
"/health/",
|
|
484
524
|
]
|
|
525
|
+
|
|
526
|
+
# 🆕 ENHANCED KEYWORD FILTERING OPTIONS
|
|
527
|
+
AIWAF_ALLOWED_PATH_KEYWORDS = [ # Keywords allowed in legitimate paths
|
|
528
|
+
"profile", "user", "account", "settings", "dashboard",
|
|
529
|
+
"admin", "api", "auth", "search", "contact", "about",
|
|
530
|
+
# Add your site-specific legitimate keywords
|
|
531
|
+
"buddycraft", "sc2", "starcraft", # Example: gaming site keywords
|
|
532
|
+
]
|
|
533
|
+
|
|
534
|
+
AIWAF_EXEMPT_KEYWORDS = [ # Keywords that never trigger blocking
|
|
535
|
+
"api", "webhook", "health", "static", "media",
|
|
536
|
+
"upload", "download", "backup", "profile"
|
|
537
|
+
]
|
|
538
|
+
|
|
539
|
+
AIWAF_DYNAMIC_TOP_N = 10 # Number of dynamic keywords to learn (default: 10)
|
|
485
540
|
```
|
|
486
541
|
|
|
487
542
|
> **Note:** You no longer need to define `AIWAF_MALICIOUS_KEYWORDS` or `AIWAF_STATUS_CODES` — they evolve dynamically.
|
|
@@ -680,6 +735,65 @@ python manage.py detect_and_train
|
|
|
680
735
|
|
|
681
736
|
---
|
|
682
737
|
|
|
738
|
+
## 🔧 Troubleshooting
|
|
739
|
+
|
|
740
|
+
### Legitimate Pages Being Blocked
|
|
741
|
+
|
|
742
|
+
**Problem**: Users can't access legitimate pages like `/en/profile/` due to keyword blocking.
|
|
743
|
+
|
|
744
|
+
**Cause**: AIWAF learned legitimate keywords (like "profile") as suspicious from previous traffic.
|
|
745
|
+
|
|
746
|
+
**Solution**:
|
|
747
|
+
```bash
|
|
748
|
+
# 1. Clear problematic learned keywords
|
|
749
|
+
python manage.py aiwaf_reset --keywords --confirm
|
|
750
|
+
|
|
751
|
+
# 2. Add legitimate keywords to settings
|
|
752
|
+
# In settings.py:
|
|
753
|
+
AIWAF_ALLOWED_PATH_KEYWORDS = [
|
|
754
|
+
"profile", "user", "account", "dashboard",
|
|
755
|
+
# Add your site-specific keywords
|
|
756
|
+
]
|
|
757
|
+
|
|
758
|
+
# 3. Retrain with enhanced filtering (won't learn legitimate keywords)
|
|
759
|
+
python manage.py detect_and_train
|
|
760
|
+
|
|
761
|
+
# 4. Test - legitimate pages should now work!
|
|
762
|
+
```
|
|
763
|
+
|
|
764
|
+
### Preventing Future False Positives
|
|
765
|
+
|
|
766
|
+
Configure AIWAF to recognize your site's legitimate keywords:
|
|
767
|
+
|
|
768
|
+
```python
|
|
769
|
+
# settings.py
|
|
770
|
+
AIWAF_ALLOWED_PATH_KEYWORDS = [
|
|
771
|
+
# Common legitimate keywords
|
|
772
|
+
"profile", "user", "account", "settings", "dashboard",
|
|
773
|
+
"admin", "search", "contact", "about", "help",
|
|
774
|
+
|
|
775
|
+
# Your site-specific keywords
|
|
776
|
+
"buddycraft", "sc2", "starcraft", # Gaming site example
|
|
777
|
+
"shop", "cart", "checkout", # E-commerce example
|
|
778
|
+
"blog", "article", "news", # Content site example
|
|
779
|
+
]
|
|
780
|
+
```
|
|
781
|
+
|
|
782
|
+
### Reset Command Options
|
|
783
|
+
|
|
784
|
+
```bash
|
|
785
|
+
# Clear everything (safest for troubleshooting)
|
|
786
|
+
python manage.py aiwaf_reset --confirm
|
|
787
|
+
|
|
788
|
+
# Clear only problematic keywords
|
|
789
|
+
python manage.py aiwaf_reset --keywords --confirm
|
|
790
|
+
|
|
791
|
+
# Clear blocked IPs but keep exemptions
|
|
792
|
+
python manage.py aiwaf_reset --blacklist --confirm
|
|
793
|
+
```
|
|
794
|
+
|
|
795
|
+
---
|
|
796
|
+
|
|
683
797
|
## 🧠 How It Works
|
|
684
798
|
|
|
685
799
|
| Middleware | Purpose |
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
aiwaf/__init__.py,sha256=
|
|
1
|
+
aiwaf/__init__.py,sha256=SLcMD_OTXr3DXtHpuCKxFvNl_pjrg-J5KLcJ-Swutuo,220
|
|
2
2
|
aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
|
|
3
3
|
aiwaf/blacklist_manager.py,sha256=LYCeKFB-7e_C6Bg2WeFJWFIIQlrfRMPuGp30ivrnhQY,1196
|
|
4
4
|
aiwaf/decorators.py,sha256=IUKOdM_gdroffImRZep1g1wT6gNqD10zGwcp28hsJCs,825
|
|
5
|
-
aiwaf/middleware.py,sha256=
|
|
5
|
+
aiwaf/middleware.py,sha256=8EC4AKfUjHhmVSKpquimkMUebBekr92pqyVF97wlbx0,27408
|
|
6
6
|
aiwaf/middleware_logger.py,sha256=LWZVDAnjh6CGESirA8eMbhGgJKB7lVDGRQqVroH95Lo,4742
|
|
7
7
|
aiwaf/models.py,sha256=vQxgY19BDVMjoO903UNrTZC1pNoLltMU6wbyWPoAEns,2719
|
|
8
|
-
aiwaf/storage.py,sha256=
|
|
9
|
-
aiwaf/trainer.py,sha256=
|
|
8
|
+
aiwaf/storage.py,sha256=5ImrZMRn3u7HNsPH0fDjWhDrD2tgG2IHVnOXtLz0fk4,10253
|
|
9
|
+
aiwaf/trainer.py,sha256=47HP81kTaJCOfyONUm18r-FVc1YeRvcliO_akpX3BqI,18613
|
|
10
10
|
aiwaf/utils.py,sha256=BJk5vJCYdGPl_4QQiknjhCbkzv5HZCXgFcBJDMJpHok,3390
|
|
11
11
|
aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -14,7 +14,7 @@ aiwaf/management/commands/add_exemption.py,sha256=U_ByfJw1EstAZ8DaSoRb97IGwYzXs0
|
|
|
14
14
|
aiwaf/management/commands/add_ipexemption.py,sha256=sSf3d9hGK9RqqlBYkCrnrd8KZWGT-derSpoWnEY4H60,952
|
|
15
15
|
aiwaf/management/commands/aiwaf_diagnose.py,sha256=nXFRhq66N4QC3e4scYJ2sUngJce-0yDxtBO3R2BllRM,6134
|
|
16
16
|
aiwaf/management/commands/aiwaf_logging.py,sha256=FCIqULn2tii2vD9VxL7vk3PV4k4vr7kaA00KyaCExYY,7692
|
|
17
|
-
aiwaf/management/commands/aiwaf_reset.py,sha256=
|
|
17
|
+
aiwaf/management/commands/aiwaf_reset.py,sha256=pcF0zOYDSqjpCwDtk2HYJZLgr76td8OFRENtl20c1dQ,7472
|
|
18
18
|
aiwaf/management/commands/check_dependencies.py,sha256=GOZl00pDwW2cJjDvIaCeB3yWxmeYcJDRTIpmOTLvy2c,37204
|
|
19
19
|
aiwaf/management/commands/clear_blacklist.py,sha256=Tisedg0EVlc3E01mA3hBZQorwMzc5j1cns-oYshja0g,2770
|
|
20
20
|
aiwaf/management/commands/clear_cache.py,sha256=cdnuTgxkhKLqT_6k6yTcEBlREovNRQxAE51ceXlGYMA,647
|
|
@@ -28,8 +28,8 @@ aiwaf/management/commands/test_exemption_fix.py,sha256=ngyGaHUCmQQ6y--6j4q1viZJt
|
|
|
28
28
|
aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
|
|
29
29
|
aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
30
|
aiwaf/templatetags/aiwaf_tags.py,sha256=XXfb7Tl4DjU3Sc40GbqdaqOEtKTUKELBEk58u83wBNw,357
|
|
31
|
-
aiwaf-0.1.9.1.
|
|
32
|
-
aiwaf-0.1.9.1.
|
|
33
|
-
aiwaf-0.1.9.1.
|
|
34
|
-
aiwaf-0.1.9.1.
|
|
35
|
-
aiwaf-0.1.9.1.
|
|
31
|
+
aiwaf-0.1.9.2.1.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
|
|
32
|
+
aiwaf-0.1.9.2.1.dist-info/METADATA,sha256=OgVYn0PPKBDcGCVlhYEFa7uc9XU4Rn-0ZS-W2CE9a1Q,26824
|
|
33
|
+
aiwaf-0.1.9.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
34
|
+
aiwaf-0.1.9.2.1.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
|
|
35
|
+
aiwaf-0.1.9.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|