aiwaf 0.1.9.1.9__py3-none-any.whl → 0.1.9.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiwaf might be problematic. Click here for more details.

aiwaf/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  default_app_config = "aiwaf.apps.AiwafConfig"
2
2
 
3
- __version__ = "0.1.9.1.9"
3
+ __version__ = "0.1.9.2.1"
4
4
 
5
5
  # Note: Middleware classes are available from aiwaf.middleware
6
6
  # Import them only when needed to avoid circular imports during Django app loading
@@ -1,76 +1,115 @@
1
1
  from django.core.management.base import BaseCommand
2
- from aiwaf.storage import get_blacklist_store, get_exemption_store
2
+ from aiwaf.storage import get_blacklist_store, get_exemption_store, get_keyword_store
3
3
  import sys
4
4
 
5
5
  class Command(BaseCommand):
6
- help = 'Reset AI-WAF by clearing all blacklist and exemption (whitelist) entries'
6
+ help = 'Reset AI-WAF by clearing blacklist, exemption, and/or keyword entries'
7
7
 
8
8
  def add_arguments(self, parser):
9
9
  parser.add_argument(
10
- '--blacklist-only',
10
+ '--blacklist',
11
11
  action='store_true',
12
- help='Clear only blacklist entries, keep exemptions'
12
+ help='Clear blacklist entries (default: all)'
13
13
  )
14
14
  parser.add_argument(
15
- '--exemptions-only',
15
+ '--exemptions',
16
+ action='store_true',
17
+ help='Clear exemption entries (default: all)'
18
+ )
19
+ parser.add_argument(
20
+ '--keywords',
16
21
  action='store_true',
17
- help='Clear only exemption entries, keep blacklist'
22
+ help='Clear learned dynamic keywords (default: all)'
18
23
  )
19
24
  parser.add_argument(
20
25
  '--confirm',
21
26
  action='store_true',
22
27
  help='Skip confirmation prompt'
23
28
  )
29
+
30
+ # Legacy flags for backward compatibility
31
+ parser.add_argument(
32
+ '--blacklist-only',
33
+ action='store_true',
34
+ help='(Legacy) Clear only blacklist entries'
35
+ )
36
+ parser.add_argument(
37
+ '--exemptions-only',
38
+ action='store_true',
39
+ help='(Legacy) Clear only exemption entries'
40
+ )
24
41
 
25
42
  def handle(self, *args, **options):
26
- blacklist_only = options['blacklist_only']
27
- exemptions_only = options['exemptions_only']
28
- confirm = options['confirm']
43
+ # Parse arguments
44
+ blacklist_flag = options.get('blacklist', False)
45
+ exemptions_flag = options.get('exemptions', False)
46
+ keywords_flag = options.get('keywords', False)
47
+ confirm = options.get('confirm', False)
48
+
49
+ # Legacy support
50
+ blacklist_only = options.get('blacklist_only', False)
51
+ exemptions_only = options.get('exemptions_only', False)
52
+
53
+ # Handle legacy flags
54
+ if blacklist_only:
55
+ blacklist_flag = True
56
+ exemptions_flag = False
57
+ keywords_flag = False
58
+ elif exemptions_only:
59
+ blacklist_flag = False
60
+ exemptions_flag = True
61
+ keywords_flag = False
62
+
63
+ # If no specific flags, clear everything
64
+ if not (blacklist_flag or exemptions_flag or keywords_flag):
65
+ blacklist_flag = exemptions_flag = keywords_flag = True
29
66
 
30
67
  try:
31
68
  blacklist_store = get_blacklist_store()
32
69
  exemption_store = get_exemption_store()
70
+ keyword_store = get_keyword_store()
33
71
  except Exception as e:
34
72
  self.stdout.write(self.style.ERROR(f'Error initializing stores: {e}'))
35
73
  return
36
74
 
37
75
  # Count current entries safely
38
- try:
39
- blacklist_entries = blacklist_store.get_all()
40
- blacklist_count = len(blacklist_entries)
41
- except Exception as e:
42
- self.stdout.write(self.style.WARNING(f'Warning: Could not count blacklist entries: {e}'))
43
- blacklist_count = 0
44
- blacklist_entries = []
76
+ counts = {'blacklist': 0, 'exemptions': 0, 'keywords': 0}
77
+ entries = {'blacklist': [], 'exemptions': [], 'keywords': []}
45
78
 
46
- try:
47
- exemption_entries = exemption_store.get_all()
48
- exemption_count = len(exemption_entries)
49
- except Exception as e:
50
- self.stdout.write(self.style.WARNING(f'Warning: Could not count exemption entries: {e}'))
51
- exemption_count = 0
52
- exemption_entries = []
79
+ if blacklist_flag:
80
+ try:
81
+ entries['blacklist'] = blacklist_store.get_all()
82
+ counts['blacklist'] = len(entries['blacklist'])
83
+ except Exception as e:
84
+ self.stdout.write(self.style.WARNING(f'Warning: Could not count blacklist entries: {e}'))
53
85
 
54
- if blacklist_only and exemptions_only:
55
- self.stdout.write(self.style.ERROR('Cannot use both --blacklist-only and --exemptions-only flags'))
56
- return
86
+ if exemptions_flag:
87
+ try:
88
+ entries['exemptions'] = exemption_store.get_all()
89
+ counts['exemptions'] = len(entries['exemptions'])
90
+ except Exception as e:
91
+ self.stdout.write(self.style.WARNING(f'Warning: Could not count exemption entries: {e}'))
57
92
 
58
- # Determine what to clear
59
- if blacklist_only:
60
- action = f"Clear {blacklist_count} blacklist entries"
61
- clear_blacklist = True
62
- clear_exemptions = False
63
- elif exemptions_only:
64
- action = f"Clear {exemption_count} exemption entries"
65
- clear_blacklist = False
66
- clear_exemptions = True
67
- else:
68
- action = f"Clear {blacklist_count} blacklist entries and {exemption_count} exemption entries"
69
- clear_blacklist = True
70
- clear_exemptions = True
93
+ if keywords_flag:
94
+ try:
95
+ entries['keywords'] = keyword_store.get_all_keywords()
96
+ counts['keywords'] = len(entries['keywords'])
97
+ except Exception as e:
98
+ self.stdout.write(self.style.WARNING(f'Warning: Could not count keyword entries: {e}'))
99
+
100
+ # Build action description
101
+ actions = []
102
+ if blacklist_flag:
103
+ actions.append(f"{counts['blacklist']} blacklist entries")
104
+ if exemptions_flag:
105
+ actions.append(f"{counts['exemptions']} exemption entries")
106
+ if keywords_flag:
107
+ actions.append(f"{counts['keywords']} learned keywords")
108
+
109
+ action = "Clear " + ", ".join(actions)
71
110
 
72
111
  # Show what will be cleared
73
- self.stdout.write(f"AI-WAF Reset: {action}")
112
+ self.stdout.write(f"🔧 AI-WAF Reset: {action}")
74
113
 
75
114
  if not confirm:
76
115
  try:
@@ -83,12 +122,12 @@ class Command(BaseCommand):
83
122
  return
84
123
 
85
124
  # Perform the reset
86
- deleted_counts = {'blacklist': 0, 'exemptions': 0, 'errors': []}
125
+ deleted_counts = {'blacklist': 0, 'exemptions': 0, 'keywords': 0, 'errors': []}
87
126
 
88
- if clear_blacklist:
127
+ if blacklist_flag:
89
128
  # Clear blacklist entries
90
129
  try:
91
- for entry in blacklist_entries:
130
+ for entry in entries['blacklist']:
92
131
  try:
93
132
  blacklist_store.remove_ip(entry['ip_address'])
94
133
  deleted_counts['blacklist'] += 1
@@ -97,10 +136,10 @@ class Command(BaseCommand):
97
136
  except Exception as e:
98
137
  deleted_counts['errors'].append(f"Error clearing blacklist: {e}")
99
138
 
100
- if clear_exemptions:
139
+ if exemptions_flag:
101
140
  # Clear exemption entries
102
141
  try:
103
- for entry in exemption_entries:
142
+ for entry in entries['exemptions']:
104
143
  try:
105
144
  exemption_store.remove_ip(entry['ip_address'])
106
145
  deleted_counts['exemptions'] += 1
@@ -109,26 +148,34 @@ class Command(BaseCommand):
109
148
  except Exception as e:
110
149
  deleted_counts['errors'].append(f"Error clearing exemptions: {e}")
111
150
 
151
+ if keywords_flag:
152
+ # Clear keyword entries
153
+ try:
154
+ for keyword in entries['keywords']:
155
+ try:
156
+ keyword_store.remove_keyword(keyword)
157
+ deleted_counts['keywords'] += 1
158
+ except Exception as e:
159
+ deleted_counts['errors'].append(f"Error removing keyword '{keyword}': {e}")
160
+ except Exception as e:
161
+ deleted_counts['errors'].append(f"Error clearing keywords: {e}")
162
+
112
163
  # Report results
113
164
  if deleted_counts['errors']:
114
165
  for error in deleted_counts['errors']:
115
166
  self.stdout.write(self.style.WARNING(f"⚠️ {error}"))
116
167
 
117
- if clear_blacklist and clear_exemptions:
118
- self.stdout.write(
119
- self.style.SUCCESS(
120
- f"✅ Reset complete: Deleted {deleted_counts['blacklist']} blacklist entries "
121
- f"and {deleted_counts['exemptions']} exemption entries"
122
- )
123
- )
124
- elif clear_blacklist:
125
- self.stdout.write(
126
- self.style.SUCCESS(f"✅ Blacklist cleared: Deleted {deleted_counts['blacklist']} entries")
127
- )
128
- elif clear_exemptions:
129
- self.stdout.write(
130
- self.style.SUCCESS(f"✅ Exemptions cleared: Deleted {deleted_counts['exemptions']} entries")
131
- )
168
+ # Build success message
169
+ success_parts = []
170
+ if blacklist_flag:
171
+ success_parts.append(f"{deleted_counts['blacklist']} blacklist entries")
172
+ if exemptions_flag:
173
+ success_parts.append(f"{deleted_counts['exemptions']} exemption entries")
174
+ if keywords_flag:
175
+ success_parts.append(f"{deleted_counts['keywords']} learned keywords")
176
+
177
+ success_message = "✅ Reset complete: Deleted " + ", ".join(success_parts)
178
+ self.stdout.write(self.style.SUCCESS(success_message))
132
179
 
133
180
  if deleted_counts['errors']:
134
181
  self.stdout.write(
aiwaf/middleware.py CHANGED
@@ -3,6 +3,7 @@
3
3
  import time
4
4
  import re
5
5
  import os
6
+ import warnings
6
7
  import numpy as np
7
8
  import joblib
8
9
  from django.db.models import UUIDField
@@ -82,6 +83,177 @@ class IPAndKeywordBlockMiddleware:
82
83
  def __init__(self, get_response):
83
84
  self.get_response = get_response
84
85
  self.safe_prefixes = self._collect_safe_prefixes()
86
+ self.exempt_keywords = self._get_exempt_keywords()
87
+ self.legitimate_path_keywords = self._get_legitimate_path_keywords()
88
+
89
+ def _get_exempt_keywords(self):
90
+ """Get keywords that should be exempt from blocking"""
91
+ exempt_tokens = set()
92
+
93
+ # Extract from exempt paths
94
+ for path in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
95
+ for seg in re.split(r"\W+", path.strip("/").lower()):
96
+ if len(seg) > 3:
97
+ exempt_tokens.add(seg)
98
+
99
+ # Add explicit exempt keywords from settings
100
+ exempt_keywords = getattr(settings, "AIWAF_EXEMPT_KEYWORDS", [])
101
+ exempt_tokens.update(exempt_keywords)
102
+
103
+ return exempt_tokens
104
+
105
+ def _get_legitimate_path_keywords(self):
106
+ """Get keywords that are legitimate in URL paths - uses same logic as trainer"""
107
+ # Import the enhanced function from trainer to ensure consistency
108
+ try:
109
+ from .trainer import get_legitimate_keywords
110
+ return get_legitimate_keywords()
111
+ except ImportError:
112
+ # Fallback to local implementation if trainer import fails
113
+ return self._get_legitimate_keywords_fallback()
114
+
115
+ def _get_legitimate_keywords_fallback(self):
116
+ """Fallback implementation matching trainer.py logic"""
117
+ legitimate = set()
118
+
119
+ # Common legitimate path segments - matches trainer.py
120
+ default_legitimate = {
121
+ "profile", "user", "users", "account", "accounts", "settings", "dashboard",
122
+ "home", "about", "contact", "help", "search", "list", "lists",
123
+ "view", "views", "edit", "create", "update", "delete", "detail", "details",
124
+ "api", "auth", "login", "logout", "register", "signup", "signin",
125
+ "reset", "confirm", "activate", "verify", "page", "pages",
126
+ "category", "categories", "tag", "tags", "post", "posts",
127
+ "article", "articles", "blog", "blogs", "news", "item", "items",
128
+ "admin", "administration", "manage", "manager", "control", "panel",
129
+ "config", "configuration", "option", "options", "preference", "preferences"
130
+ }
131
+ legitimate.update(default_legitimate)
132
+
133
+ # Extract keywords from Django URL patterns and app names - matches trainer.py
134
+ legitimate.update(self._extract_django_route_keywords())
135
+
136
+ # Add from Django settings
137
+ allowed_path_keywords = getattr(settings, "AIWAF_ALLOWED_PATH_KEYWORDS", [])
138
+ legitimate.update(allowed_path_keywords)
139
+
140
+ # Add exempt keywords
141
+ exempt_keywords = getattr(settings, "AIWAF_EXEMPT_KEYWORDS", [])
142
+ legitimate.update(exempt_keywords)
143
+
144
+ return legitimate
145
+
146
+ def _extract_django_route_keywords(self):
147
+ """Extract legitimate keywords from Django URL patterns, app names, and model names - matches trainer.py"""
148
+ keywords = set()
149
+
150
+ try:
151
+ from django.urls.resolvers import URLResolver, URLPattern
152
+
153
+ # Extract from app names and labels
154
+ for app_config in apps.get_app_configs():
155
+ # Add app name and label
156
+ if app_config.name:
157
+ for segment in re.split(r'[._-]', app_config.name.lower()):
158
+ if len(segment) > 2:
159
+ keywords.add(segment)
160
+
161
+ if app_config.label and app_config.label != app_config.name:
162
+ for segment in re.split(r'[._-]', app_config.label.lower()):
163
+ if len(segment) > 2:
164
+ keywords.add(segment)
165
+
166
+ # Extract from model names in the app
167
+ try:
168
+ for model in app_config.get_models():
169
+ model_name = model._meta.model_name.lower()
170
+ if len(model_name) > 2:
171
+ keywords.add(model_name)
172
+ # Add plural form
173
+ if not model_name.endswith('s'):
174
+ keywords.add(f"{model_name}s")
175
+ except Exception:
176
+ continue
177
+
178
+ # Extract from URL patterns
179
+ def extract_from_pattern(pattern, prefix=""):
180
+ try:
181
+ if isinstance(pattern, URLResolver):
182
+ # Handle include() patterns
183
+ namespace = getattr(pattern, 'namespace', None)
184
+ if namespace:
185
+ for segment in re.split(r'[._-]', namespace.lower()):
186
+ if len(segment) > 2:
187
+ keywords.add(segment)
188
+
189
+ # Extract from the pattern itself
190
+ pattern_str = str(pattern.pattern)
191
+ for segment in re.findall(r'([a-zA-Z]\w{2,})', pattern_str):
192
+ keywords.add(segment.lower())
193
+
194
+ # Recurse into nested patterns
195
+ for nested_pattern in pattern.url_patterns:
196
+ extract_from_pattern(nested_pattern, prefix)
197
+
198
+ elif isinstance(pattern, URLPattern):
199
+ # Extract from URL pattern
200
+ pattern_str = str(pattern.pattern)
201
+ for segment in re.findall(r'([a-zA-Z]\w{2,})', pattern_str):
202
+ keywords.add(segment.lower())
203
+
204
+ # Extract from view name if available
205
+ if hasattr(pattern.callback, '__name__'):
206
+ view_name = pattern.callback.__name__.lower()
207
+ for segment in re.split(r'[._-]', view_name):
208
+ if len(segment) > 2 and segment != 'view':
209
+ keywords.add(segment)
210
+
211
+ except Exception:
212
+ pass
213
+
214
+ # Process all URL patterns
215
+ root_resolver = get_resolver()
216
+ for pattern in root_resolver.url_patterns:
217
+ extract_from_pattern(pattern)
218
+
219
+ except Exception as e:
220
+ # Silently continue if extraction fails
221
+ pass
222
+
223
+ # Filter out very common/generic words that might be suspicious
224
+ filtered_keywords = set()
225
+ for keyword in keywords:
226
+ if (len(keyword) >= 3 and
227
+ keyword not in ['www', 'com', 'org', 'net', 'int', 'str', 'obj', 'get', 'set', 'put', 'del']):
228
+ filtered_keywords.add(keyword)
229
+
230
+ return filtered_keywords
231
+
232
+ def _is_malicious_context(self, request, segment):
233
+ """Determine if a keyword appears in a malicious context"""
234
+ path = request.path.lower()
235
+
236
+ # Check if this is a query parameter attack
237
+ query_string = request.META.get('QUERY_STRING', '').lower()
238
+ if segment in query_string and any(attack_pattern in query_string for attack_pattern in [
239
+ 'union', 'select', 'drop', 'insert', 'script', 'alert', 'eval'
240
+ ]):
241
+ return True
242
+
243
+ # Check if this looks like a file extension attack
244
+ if segment.startswith('.') and not path_exists_in_django(request.path):
245
+ return True
246
+
247
+ # Check if this looks like a directory traversal
248
+ if '../' in path or '..\\' in path:
249
+ return True
250
+
251
+ # Check if accessing non-existent paths with suspicious extensions
252
+ if (not path_exists_in_django(request.path) and
253
+ any(ext in segment for ext in ['.php', '.asp', '.jsp', '.cgi'])):
254
+ return True
255
+
256
+ return False
85
257
 
86
258
  def _collect_safe_prefixes(self):
87
259
  resolver = get_resolver()
@@ -102,35 +274,85 @@ class IPAndKeywordBlockMiddleware:
102
274
  return prefixes
103
275
 
104
276
  def __call__(self, request):
105
- raw_path = request.path.lower()
277
+ # First exemption check - early exit for exempt requests
106
278
  if is_exempt(request):
107
279
  return self.get_response(request)
280
+
281
+ raw_path = request.path.lower()
108
282
  ip = get_ip(request)
109
283
  path = raw_path.lstrip("/")
110
284
 
111
- # BlacklistManager now handles exemption checking internally
285
+ # Additional IP-level exemption check
286
+ from .storage import get_exemption_store
287
+ exemption_store = get_exemption_store()
288
+ if exemption_store.is_exempted(ip):
289
+ return self.get_response(request)
290
+
291
+ # BlacklistManager handles exemption checking internally
112
292
  if BlacklistManager.is_blocked(ip):
113
293
  return JsonResponse({"error": "blocked"}, status=403)
114
294
 
295
+ # Check if path exists in Django - if yes, be more lenient
296
+ path_exists = path_exists_in_django(request.path)
297
+
115
298
  keyword_store = get_keyword_store()
116
299
  segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
117
300
 
301
+ # Only learn keywords from non-existent paths or suspicious contexts
118
302
  for seg in segments:
119
- keyword_store.add_keyword(seg)
303
+ if not path_exists or self._is_malicious_context(request, seg):
304
+ keyword_store.add_keyword(seg)
120
305
 
121
306
  dynamic_top = keyword_store.get_top_keywords(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
122
307
  all_kw = set(STATIC_KW) | set(dynamic_top)
123
- suspicious_kw = {
124
- kw for kw in all_kw
125
- if not any(path.startswith(prefix) for prefix in self.safe_prefixes if prefix)
126
- }
308
+
309
+ # Enhanced filtering logic
310
+ suspicious_kw = set()
311
+ for kw in all_kw:
312
+ # Skip if keyword is explicitly exempted
313
+ if kw in self.exempt_keywords:
314
+ continue
315
+
316
+ # Skip if this is a legitimate path keyword and path exists in Django
317
+ if (kw in self.legitimate_path_keywords and
318
+ path_exists and
319
+ not self._is_malicious_context(request, kw)):
320
+ continue
321
+
322
+ # Skip if path starts with safe prefix
323
+ if any(path.startswith(prefix) for prefix in self.safe_prefixes if prefix):
324
+ continue
325
+
326
+ suspicious_kw.add(kw)
327
+
328
+ # Check segments against suspicious keywords
127
329
  for seg in segments:
330
+ is_suspicious = False
331
+ block_reason = ""
332
+
333
+ # Check if segment is in learned suspicious keywords
128
334
  if seg in suspicious_kw:
129
- # BlacklistManager.block() now checks exemptions internally
130
- BlacklistManager.block(ip, f"Keyword block: {seg}")
131
- # Check again after blocking attempt (exempted IPs won't be blocked)
132
- if BlacklistManager.is_blocked(ip):
133
- return JsonResponse({"error": "blocked"}, status=403)
335
+ is_suspicious = True
336
+ block_reason = f"Learned keyword: {seg}"
337
+
338
+ # Also check if segment appears to be inherently malicious
339
+ elif (not path_exists and
340
+ seg not in self.legitimate_path_keywords and
341
+ (self._is_malicious_context(request, seg) or
342
+ any(malicious_pattern in seg for malicious_pattern in
343
+ ['hack', 'exploit', 'attack', 'malicious', 'evil', 'backdoor', 'inject', 'xss']))):
344
+ is_suspicious = True
345
+ block_reason = f"Inherently suspicious: {seg}"
346
+
347
+ if is_suspicious:
348
+ # Additional context check before blocking
349
+ if self._is_malicious_context(request, seg) or not path_exists:
350
+ # Double-check exemption before blocking
351
+ if not exemption_store.is_exempted(ip):
352
+ BlacklistManager.block(ip, f"Keyword block: {block_reason}")
353
+ # Check again after blocking attempt (exempted IPs won't be blocked)
354
+ if BlacklistManager.is_blocked(ip):
355
+ return JsonResponse({"error": "blocked"}, status=403)
134
356
  return self.get_response(request)
135
357
 
136
358
 
@@ -143,22 +365,32 @@ class RateLimitMiddleware:
143
365
  self.FLOOD = getattr(settings, "AIWAF_RATE_FLOOD", 40) # hard limit
144
366
 
145
367
  def __call__(self, request):
368
+ # First exemption check - early exit for exempt requests
146
369
  if is_exempt(request):
147
370
  return self.get_response(request)
148
371
 
149
372
  ip = get_ip(request)
373
+
374
+ # Additional IP-level exemption check
375
+ from .storage import get_exemption_store
376
+ exemption_store = get_exemption_store()
377
+ if exemption_store.is_exempted(ip):
378
+ return self.get_response(request)
379
+
150
380
  key = f"ratelimit:{ip}"
151
381
  now = time.time()
152
382
  timestamps = cache.get(key, [])
153
383
  timestamps = [t for t in timestamps if now - t < self.WINDOW]
154
384
  timestamps.append(now)
155
385
  cache.set(key, timestamps, timeout=self.WINDOW)
386
+
156
387
  if len(timestamps) > self.FLOOD:
157
- # BlacklistManager.block() now checks exemptions internally
158
- BlacklistManager.block(ip, "Flood pattern")
159
- # Check if actually blocked (exempted IPs won't be blocked)
160
- if BlacklistManager.is_blocked(ip):
161
- return JsonResponse({"error": "blocked"}, status=403)
388
+ # Double-check exemption before blocking
389
+ if not exemption_store.is_exempted(ip):
390
+ BlacklistManager.block(ip, "Flood pattern")
391
+ # Check if actually blocked (exempted IPs won't be blocked)
392
+ if BlacklistManager.is_blocked(ip):
393
+ return JsonResponse({"error": "blocked"}, status=403)
162
394
  if len(timestamps) > self.MAX:
163
395
  return JsonResponse({"error": "too_many_requests"}, status=429)
164
396
  return self.get_response(request)
@@ -174,19 +406,37 @@ class AIAnomalyMiddleware(MiddlewareMixin):
174
406
  self.model = MODEL
175
407
 
176
408
  def process_request(self, request):
409
+ # First exemption check - early exit for exempt requests
177
410
  if is_exempt(request):
178
411
  return None
412
+
179
413
  request._start_time = time.time()
180
414
  ip = get_ip(request)
181
- # BlacklistManager now handles exemption checking internally
415
+
416
+ # Additional IP-level exemption check
417
+ from .storage import get_exemption_store
418
+ exemption_store = get_exemption_store()
419
+ if exemption_store.is_exempted(ip):
420
+ return None
421
+
422
+ # BlacklistManager handles exemption checking internally
182
423
  if BlacklistManager.is_blocked(ip):
183
424
  return JsonResponse({"error": "blocked"}, status=403)
184
425
  return None
185
426
 
186
427
  def process_response(self, request, response):
428
+ # First exemption check - early exit for exempt requests
187
429
  if is_exempt(request):
188
430
  return response
431
+
189
432
  ip = get_ip(request)
433
+
434
+ # Additional IP-level exemption check
435
+ from .storage import get_exemption_store
436
+ exemption_store = get_exemption_store()
437
+ if exemption_store.is_exempted(ip):
438
+ return response
439
+
190
440
  now = time.time()
191
441
  key = f"aiwaf:{ip}"
192
442
  data = cache.get(key, [])
@@ -251,17 +501,20 @@ class AIAnomalyMiddleware(MiddlewareMixin):
251
501
  # Anomalous but looks legitimate - don't block
252
502
  pass
253
503
  else:
254
- # Block if it shows clear signs of malicious behavior
255
- BlacklistManager.block(ip, f"AI anomaly + suspicious patterns (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})")
256
- # Check if actually blocked (exempted IPs won't be blocked)
257
- if BlacklistManager.is_blocked(ip):
258
- return JsonResponse({"error": "blocked"}, status=403)
504
+ # Double-check exemption before blocking
505
+ if not exemption_store.is_exempted(ip):
506
+ BlacklistManager.block(ip, f"AI anomaly + suspicious patterns (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})")
507
+ # Check if actually blocked (exempted IPs won't be blocked)
508
+ if BlacklistManager.is_blocked(ip):
509
+ return JsonResponse({"error": "blocked"}, status=403)
259
510
  else:
260
511
  # No recent data to analyze - be more conservative, only block on very suspicious current request
261
512
  if kw_hits >= 2 or status_idx == STATUS_IDX.index("404"):
262
- BlacklistManager.block(ip, "AI anomaly + immediate suspicious behavior")
263
- if BlacklistManager.is_blocked(ip):
264
- return JsonResponse({"error": "blocked"}, status=403)
513
+ # Double-check exemption before blocking
514
+ if not exemption_store.is_exempted(ip):
515
+ BlacklistManager.block(ip, "AI anomaly + immediate suspicious behavior")
516
+ if BlacklistManager.is_blocked(ip):
517
+ return JsonResponse({"error": "blocked"}, status=403)
265
518
 
266
519
  data.append((now, request.path, response.status_code, resp_time))
267
520
  data = [d for d in data if now - d[0] < self.WINDOW]
@@ -283,7 +536,12 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
283
536
  return None
284
537
 
285
538
  ip = get_ip(request)
286
- # BlacklistManager now handles exemption checking internally
539
+
540
+ # Additional IP-level exemption check
541
+ from .storage import get_exemption_store
542
+ exemption_store = get_exemption_store()
543
+ if exemption_store.is_exempted(ip):
544
+ return None
287
545
 
288
546
  if request.method == "GET":
289
547
  # Store timestamp for this IP's GET request
@@ -300,11 +558,12 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
300
558
  if not any(request.path.lower().startswith(login_path) for login_path in [
301
559
  "/admin/login/", "/login/", "/accounts/login/", "/auth/login/", "/signin/"
302
560
  ]):
303
- # BlacklistManager.block() now checks exemptions internally
304
- BlacklistManager.block(ip, "Direct POST without GET")
305
- # Check if actually blocked (exempted IPs won't be blocked)
306
- if BlacklistManager.is_blocked(ip):
307
- return JsonResponse({"error": "blocked"}, status=403)
561
+ # Double-check exemption before blocking
562
+ if not exemption_store.is_exempted(ip):
563
+ BlacklistManager.block(ip, "Direct POST without GET")
564
+ # Check if actually blocked (exempted IPs won't be blocked)
565
+ if BlacklistManager.is_blocked(ip):
566
+ return JsonResponse({"error": "blocked"}, status=403)
308
567
  else:
309
568
  # Check timing - be more lenient for login paths
310
569
  time_diff = time.time() - get_time
@@ -317,11 +576,12 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
317
576
  min_time = 0.1 # Very short threshold for login forms
318
577
 
319
578
  if time_diff < min_time:
320
- # BlacklistManager.block() now checks exemptions internally
321
- BlacklistManager.block(ip, f"Form submitted too quickly ({time_diff:.2f}s)")
322
- # Check if actually blocked (exempted IPs won't be blocked)
323
- if BlacklistManager.is_blocked(ip):
324
- return JsonResponse({"error": "blocked"}, status=403)
579
+ # Double-check exemption before blocking
580
+ if not exemption_store.is_exempted(ip):
581
+ BlacklistManager.block(ip, f"Form submitted too quickly ({time_diff:.2f}s)")
582
+ # Check if actually blocked (exempted IPs won't be blocked)
583
+ if BlacklistManager.is_blocked(ip):
584
+ return JsonResponse({"error": "blocked"}, status=403)
325
585
 
326
586
  return None
327
587
 
@@ -330,11 +590,19 @@ class UUIDTamperMiddleware(MiddlewareMixin):
330
590
  def process_view(self, request, view_func, view_args, view_kwargs):
331
591
  if is_exempt(request):
332
592
  return None
593
+
333
594
  uid = view_kwargs.get("uuid")
334
595
  if not uid:
335
596
  return None
336
597
 
337
598
  ip = get_ip(request)
599
+
600
+ # Additional IP-level exemption check
601
+ from .storage import get_exemption_store
602
+ exemption_store = get_exemption_store()
603
+ if exemption_store.is_exempted(ip):
604
+ return None
605
+
338
606
  app_label = view_func.__module__.split(".")[0]
339
607
  app_cfg = apps.get_app_config(app_label)
340
608
  for Model in app_cfg.get_models():
@@ -345,8 +613,9 @@ class UUIDTamperMiddleware(MiddlewareMixin):
345
613
  except (ValueError, TypeError):
346
614
  continue
347
615
 
348
- # BlacklistManager.block() now checks exemptions internally
349
- BlacklistManager.block(ip, "UUID tampering")
350
- # Check if actually blocked (exempted IPs won't be blocked)
351
- if BlacklistManager.is_blocked(ip):
352
- return JsonResponse({"error": "blocked"}, status=403)
616
+ # Double-check exemption before blocking
617
+ if not exemption_store.is_exempted(ip):
618
+ BlacklistManager.block(ip, "UUID tampering")
619
+ # Check if actually blocked (exempted IPs won't be blocked)
620
+ if BlacklistManager.is_blocked(ip):
621
+ return JsonResponse({"error": "blocked"}, status=403)
aiwaf/storage.py CHANGED
@@ -284,6 +284,19 @@ class ModelKeywordStore:
284
284
  except Exception:
285
285
  return []
286
286
 
287
+ @staticmethod
288
+ def get_all_keywords():
289
+ """Get all keywords"""
290
+ _import_models()
291
+ if DynamicKeyword is None:
292
+ return []
293
+ try:
294
+ return list(
295
+ DynamicKeyword.objects.all().values_list('keyword', flat=True)
296
+ )
297
+ except Exception:
298
+ return []
299
+
287
300
  @staticmethod
288
301
  def reset_keywords():
289
302
  """Reset all keyword counts"""
aiwaf/trainer.py CHANGED
@@ -51,17 +51,153 @@ def path_exists_in_django(path: str) -> bool:
51
51
 
52
52
 
53
53
  def remove_exempt_keywords() -> None:
54
+ """Remove exempt keywords from dynamic keyword storage"""
54
55
  keyword_store = get_keyword_store()
55
56
  exempt_tokens = set()
56
57
 
58
+ # Extract tokens from exempt paths
57
59
  for path in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
58
60
  for seg in re.split(r"\W+", path.strip("/").lower()):
59
61
  if len(seg) > 3:
60
62
  exempt_tokens.add(seg)
61
63
 
64
+ # Add explicit exempt keywords from settings
65
+ explicit_exempt = getattr(settings, "AIWAF_EXEMPT_KEYWORDS", [])
66
+ exempt_tokens.update(explicit_exempt)
67
+
68
+ # Add legitimate path keywords to prevent them from being learned as suspicious
69
+ allowed_path_keywords = getattr(settings, "AIWAF_ALLOWED_PATH_KEYWORDS", [])
70
+ exempt_tokens.update(allowed_path_keywords)
71
+
62
72
  # Remove exempt tokens from keyword storage
63
73
  for token in exempt_tokens:
64
74
  keyword_store.remove_keyword(token)
75
+
76
+ if exempt_tokens:
77
+ print(f"🧹 Removed {len(exempt_tokens)} exempt keywords from learning: {list(exempt_tokens)[:10]}")
78
+
79
+
80
+ def get_legitimate_keywords() -> set:
81
+ """Get all legitimate keywords that shouldn't be learned as suspicious"""
82
+ legitimate = set()
83
+
84
+ # Common legitimate path segments
85
+ default_legitimate = {
86
+ "profile", "user", "users", "account", "accounts", "settings", "dashboard",
87
+ "home", "about", "contact", "help", "search", "list", "lists",
88
+ "view", "views", "edit", "create", "update", "delete", "detail", "details",
89
+ "api", "auth", "login", "logout", "register", "signup", "signin",
90
+ "reset", "confirm", "activate", "verify", "page", "pages",
91
+ "category", "categories", "tag", "tags", "post", "posts",
92
+ "article", "articles", "blog", "blogs", "news", "item", "items",
93
+ "admin", "administration", "manage", "manager", "control", "panel",
94
+ "config", "configuration", "option", "options", "preference", "preferences"
95
+ }
96
+ legitimate.update(default_legitimate)
97
+
98
+ # Extract keywords from Django URL patterns and app names
99
+ legitimate.update(_extract_django_route_keywords())
100
+
101
+ # Add from Django settings
102
+ allowed_path_keywords = getattr(settings, "AIWAF_ALLOWED_PATH_KEYWORDS", [])
103
+ legitimate.update(allowed_path_keywords)
104
+
105
+ # Add exempt keywords
106
+ exempt_keywords = getattr(settings, "AIWAF_EXEMPT_KEYWORDS", [])
107
+ legitimate.update(exempt_keywords)
108
+
109
+ return legitimate
110
+
111
+
112
+ def _extract_django_route_keywords() -> set:
113
+ """Extract legitimate keywords from Django URL patterns, app names, and model names"""
114
+ keywords = set()
115
+
116
+ try:
117
+ from django.urls import get_resolver
118
+ from django.urls.resolvers import URLResolver, URLPattern
119
+ from django.apps import apps
120
+
121
+ # Extract from app names and labels
122
+ for app_config in apps.get_app_configs():
123
+ # Add app name and label
124
+ if app_config.name:
125
+ for segment in re.split(r'[._-]', app_config.name.lower()):
126
+ if len(segment) > 2:
127
+ keywords.add(segment)
128
+
129
+ if app_config.label and app_config.label != app_config.name:
130
+ for segment in re.split(r'[._-]', app_config.label.lower()):
131
+ if len(segment) > 2:
132
+ keywords.add(segment)
133
+
134
+ # Extract from model names in the app
135
+ try:
136
+ for model in app_config.get_models():
137
+ model_name = model._meta.model_name.lower()
138
+ if len(model_name) > 2:
139
+ keywords.add(model_name)
140
+ # Add plural form
141
+ if not model_name.endswith('s'):
142
+ keywords.add(f"{model_name}s")
143
+ except Exception:
144
+ continue
145
+
146
+ # Extract from URL patterns
147
+ def extract_from_pattern(pattern, prefix=""):
148
+ try:
149
+ if isinstance(pattern, URLResolver):
150
+ # Handle include() patterns
151
+ namespace = getattr(pattern, 'namespace', None)
152
+ if namespace:
153
+ for segment in re.split(r'[._-]', namespace.lower()):
154
+ if len(segment) > 2:
155
+ keywords.add(segment)
156
+
157
+ # Extract from the pattern itself
158
+ pattern_str = str(pattern.pattern)
159
+ for segment in re.findall(r'([a-zA-Z]\w{2,})', pattern_str):
160
+ keywords.add(segment.lower())
161
+
162
+ # Recurse into nested patterns
163
+ for nested_pattern in pattern.url_patterns:
164
+ extract_from_pattern(nested_pattern, prefix)
165
+
166
+ elif isinstance(pattern, URLPattern):
167
+ # Extract from URL pattern
168
+ pattern_str = str(pattern.pattern)
169
+ for segment in re.findall(r'([a-zA-Z]\w{2,})', pattern_str):
170
+ keywords.add(segment.lower())
171
+
172
+ # Extract from view name if available
173
+ if hasattr(pattern.callback, '__name__'):
174
+ view_name = pattern.callback.__name__.lower()
175
+ for segment in re.split(r'[._-]', view_name):
176
+ if len(segment) > 2 and segment != 'view':
177
+ keywords.add(segment)
178
+
179
+ except Exception:
180
+ pass
181
+
182
+ # Process all URL patterns
183
+ root_resolver = get_resolver()
184
+ for pattern in root_resolver.url_patterns:
185
+ extract_from_pattern(pattern)
186
+
187
+ except Exception as e:
188
+ print(f"Warning: Could not extract Django route keywords: {e}")
189
+
190
+ # Filter out very common/generic words that might be suspicious
191
+ filtered_keywords = set()
192
+ for keyword in keywords:
193
+ if (len(keyword) >= 3 and
194
+ keyword not in ['www', 'com', 'org', 'net', 'int', 'str', 'obj', 'get', 'set', 'put', 'del']):
195
+ filtered_keywords.add(keyword)
196
+
197
+ if filtered_keywords:
198
+ print(f"🔗 Extracted {len(filtered_keywords)} legitimate keywords from Django routes and apps")
199
+
200
+ return filtered_keywords
65
201
 
66
202
 
67
203
  def _read_all_logs() -> list[str]:
@@ -137,14 +273,20 @@ def _parse(line: str) -> dict | None:
137
273
 
138
274
 
139
275
  def train() -> None:
276
+ """Enhanced training with improved keyword filtering and exemption handling"""
277
+ print("🚀 Starting AIWAF enhanced training...")
278
+
279
+ # Remove exempt keywords first
140
280
  remove_exempt_keywords()
141
281
 
142
282
  # Remove any IPs in IPExemption from the blacklist using BlacklistManager
143
283
  exemption_store = get_exemption_store()
144
284
 
145
285
  exempted_ips = [entry['ip_address'] for entry in exemption_store.get_all()]
146
- for ip in exempted_ips:
147
- BlacklistManager.unblock(ip)
286
+ if exempted_ips:
287
+ print(f"🛡️ Found {len(exempted_ips)} exempted IPs - clearing from blacklist")
288
+ for ip in exempted_ips:
289
+ BlacklistManager.unblock(ip)
148
290
 
149
291
  raw_lines = _read_all_logs()
150
292
  if not raw_lines:
@@ -281,17 +423,50 @@ def train() -> None:
281
423
  print(f" → Blocked {blocked_count}/{len(anomalous_ips)} anomalous IPs (others looked legitimate)")
282
424
 
283
425
  tokens = Counter()
426
+ legitimate_keywords = get_legitimate_keywords()
427
+
428
+ print(f"🔍 Learning keywords from {len(parsed)} parsed requests...")
429
+
284
430
  for r in parsed:
285
- if (r["status"].startswith(("4", "5"))
286
- and not path_exists_in_django(r["path"])):
431
+ # Only learn from suspicious requests (errors on non-existent paths)
432
+ if (r["status"].startswith(("4", "5")) and
433
+ not path_exists_in_django(r["path"]) and
434
+ not is_exempt_path(r["path"])):
435
+
287
436
  for seg in re.split(r"\W+", r["path"].lower()):
288
- if len(seg) > 3 and seg not in STATIC_KW:
437
+ if (len(seg) > 3 and
438
+ seg not in STATIC_KW and
439
+ seg not in legitimate_keywords): # Don't learn legitimate keywords
289
440
  tokens[seg] += 1
290
441
 
291
442
  keyword_store = get_keyword_store()
292
- top_tokens = tokens.most_common(10)
443
+ top_tokens = tokens.most_common(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
293
444
 
445
+ # Additional filtering: only add keywords that appear suspicious enough
446
+ filtered_tokens = []
294
447
  for kw, cnt in top_tokens:
295
- keyword_store.add_keyword(kw, cnt)
296
-
297
- print(f"DynamicKeyword storage updated with top tokens: {[kw for kw, _ in top_tokens]}")
448
+ # Don't add keywords that might be legitimate
449
+ if (cnt >= 2 and # Must appear at least twice
450
+ len(kw) >= 4 and # Must be at least 4 characters
451
+ kw not in legitimate_keywords): # Not in legitimate set
452
+ filtered_tokens.append((kw, cnt))
453
+ keyword_store.add_keyword(kw, cnt)
454
+
455
+ if filtered_tokens:
456
+ print(f"📝 Added {len(filtered_tokens)} suspicious keywords: {[kw for kw, _ in filtered_tokens]}")
457
+ else:
458
+ print("✅ No new suspicious keywords learned (good sign!)")
459
+
460
+ print(f"🎯 Dynamic keyword learning complete. Excluded {len(legitimate_keywords)} legitimate keywords.")
461
+
462
+ # Training summary
463
+ print("\n" + "="*60)
464
+ print("🎉 AIWAF ENHANCED TRAINING COMPLETE")
465
+ print("="*60)
466
+ print(f"📊 Training Data: {len(parsed)} log entries processed")
467
+ print(f"🤖 AI Model: Trained with {len(feature_cols)} features")
468
+ print(f"🚫 Blocked IPs: {blocked_count if 'blocked_count' in locals() else 0} suspicious IPs blocked")
469
+ print(f"🔑 Keywords: {len(filtered_tokens)} new suspicious keywords learned")
470
+ print(f"🛡️ Exemptions: {len(exempted_ips)} IPs protected from blocking")
471
+ print(f"✅ Enhanced protection now active with context-aware filtering!")
472
+ print("="*60)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiwaf
3
- Version: 0.1.9.1.9
3
+ Version: 0.1.9.2.1
4
4
  Summary: AI-powered Web Application Firewall
5
5
  Home-page: https://github.com/aayushgauba/aiwaf
6
6
  Author: Aayush Gauba
@@ -25,7 +25,13 @@ Dynamic: requires-python
25
25
  # AI‑WAF
26
26
 
27
27
  > A self‑learning, Django‑friendly Web Application Firewall
28
- > with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, exempt path awareness, and daily retraining.
28
+ > with **enhanced context-aware protection**, rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, **smart keyword learning**, file‑extension probing detection, exempt path awareness, and daily retraining.
29
+
30
+ **🆕 Latest Enhancements:**
31
+ - ✅ **Smart Keyword Filtering** - Prevents blocking legitimate pages like `/profile/`
32
+ - ✅ **Granular Reset Commands** - Clear specific data types (`--blacklist`, `--keywords`, `--exemptions`)
33
+ - ✅ **Context-Aware Learning** - Only learns from suspicious requests, not legitimate site functionality
34
+ - ✅ **Enhanced Configuration** - `AIWAF_ALLOWED_PATH_KEYWORDS` and `AIWAF_EXEMPT_KEYWORDS`
29
35
 
30
36
  ---
31
37
 
@@ -88,9 +94,19 @@ aiwaf/
88
94
  - Burst count
89
95
  - Total 404s
90
96
 
91
- - **Dynamic Keyword Extraction & Cleanup**
92
- - Every retrain adds top 10 keyword segments from 4xx/5xx paths
93
- - **If a path is added to `AIWAF_EXEMPT_PATHS`, its keywords are automatically removed from the database**
97
+ - **Enhanced Dynamic Keyword Learning with Django Route Protection**
98
+ - **Smart Context-Aware Learning**: Only learns keywords from suspicious requests on non-existent paths
99
+ - **Automatic Django Route Extraction**: Automatically excludes keywords from:
100
+ - Valid Django URL patterns (`/profile/`, `/admin/`, `/api/`, etc.)
101
+ - Django app names and model names (users, posts, categories)
102
+ - View function names and URL namespaces
103
+ - **Unified Logic**: Both trainer and middleware use identical legitimate keyword detection
104
+ - **Configuration Options**:
105
+ - `AIWAF_ALLOWED_PATH_KEYWORDS` - Explicitly allow certain keywords in legitimate paths
106
+ - `AIWAF_EXEMPT_KEYWORDS` - Keywords that should never trigger blocking
107
+ - **Automatic Cleanup**: Keywords from `AIWAF_EXEMPT_PATHS` are automatically removed from the database
108
+ - **False Positive Prevention**: Stops learning legitimate site functionality as "malicious"
109
+ - **Inherent Malicious Detection**: Middleware also blocks obviously malicious keywords (`hack`, `exploit`, `attack`) even if not yet learned
94
110
 
95
111
  - **File‑Extension Probing Detection**
96
112
  Tracks repeated 404s on common extensions (e.g. `.php`, `.asp`) and blocks IPs.
@@ -196,20 +212,44 @@ python manage.py add_ipexemption <ip-address> --reason "optional reason"
196
212
 
197
213
  ### Resetting AI-WAF
198
214
 
199
- Clear all blacklist and exemption entries:
215
+ The `aiwaf_reset` command provides **granular control** for clearing different types of data:
200
216
 
201
217
  ```bash
202
- # Clear everything (with confirmation prompt)
218
+ # Clear everything (default - backward compatible)
203
219
  python manage.py aiwaf_reset
204
220
 
205
- # Clear everything without confirmation
221
+ # Clear everything without confirmation prompt
206
222
  python manage.py aiwaf_reset --confirm
207
223
 
208
- # Clear only blacklist entries
209
- python manage.py aiwaf_reset --blacklist-only
224
+ # 🆕 GRANULAR CONTROL - Clear specific data types
225
+ python manage.py aiwaf_reset --blacklist # Clear only blocked IPs
226
+ python manage.py aiwaf_reset --exemptions # Clear only exempted IPs
227
+ python manage.py aiwaf_reset --keywords # Clear only learned keywords
228
+
229
+ # 🔧 COMBINE OPTIONS - Mix and match as needed
230
+ python manage.py aiwaf_reset --blacklist --keywords # Keep exemptions
231
+ python manage.py aiwaf_reset --exemptions --keywords # Keep blacklist
232
+ python manage.py aiwaf_reset --blacklist --exemptions # Keep keywords
233
+
234
+ # 🚀 COMMON USE CASES
235
+ # Fix false positive keywords (like "profile" blocking legitimate pages)
236
+ python manage.py aiwaf_reset --keywords --confirm
237
+ python manage.py detect_and_train # Retrain with enhanced filtering
238
+
239
+ # Clear blocked IPs but preserve exemptions and learning
240
+ python manage.py aiwaf_reset --blacklist --confirm
241
+
242
+ # Legacy support (still works for backward compatibility)
243
+ python manage.py aiwaf_reset --blacklist-only # Legacy: blacklist only
244
+ python manage.py aiwaf_reset --exemptions-only # Legacy: exemptions only
245
+ ```
210
246
 
211
- # Clear only exemption entries
212
- python manage.py aiwaf_reset --exemptions-only
247
+ **Enhanced Feedback:**
248
+ ```bash
249
+ $ python manage.py aiwaf_reset --keywords
250
+ 🔧 AI-WAF Reset: Clear 15 learned keywords
251
+ Are you sure you want to proceed? [y/N]: y
252
+ ✅ Reset complete: Deleted 15 learned keywords
213
253
  ```
214
254
 
215
255
  ### Checking Dependencies
@@ -482,6 +522,21 @@ AIWAF_EXEMPT_PATHS = [ # optional but highly recommended
482
522
  "/media/",
483
523
  "/health/",
484
524
  ]
525
+
526
+ # 🆕 ENHANCED KEYWORD FILTERING OPTIONS
527
+ AIWAF_ALLOWED_PATH_KEYWORDS = [ # Keywords allowed in legitimate paths
528
+ "profile", "user", "account", "settings", "dashboard",
529
+ "admin", "api", "auth", "search", "contact", "about",
530
+ # Add your site-specific legitimate keywords
531
+ "buddycraft", "sc2", "starcraft", # Example: gaming site keywords
532
+ ]
533
+
534
+ AIWAF_EXEMPT_KEYWORDS = [ # Keywords that never trigger blocking
535
+ "api", "webhook", "health", "static", "media",
536
+ "upload", "download", "backup", "profile"
537
+ ]
538
+
539
+ AIWAF_DYNAMIC_TOP_N = 10 # Number of dynamic keywords to learn (default: 10)
485
540
  ```
486
541
 
487
542
  > **Note:** You no longer need to define `AIWAF_MALICIOUS_KEYWORDS` or `AIWAF_STATUS_CODES` — they evolve dynamically.
@@ -680,6 +735,65 @@ python manage.py detect_and_train
680
735
 
681
736
  ---
682
737
 
738
+ ## 🔧 Troubleshooting
739
+
740
+ ### Legitimate Pages Being Blocked
741
+
742
+ **Problem**: Users can't access legitimate pages like `/en/profile/` due to keyword blocking.
743
+
744
+ **Cause**: AIWAF learned legitimate keywords (like "profile") as suspicious from previous traffic.
745
+
746
+ **Solution**:
747
+ ```bash
748
+ # 1. Clear problematic learned keywords
749
+ python manage.py aiwaf_reset --keywords --confirm
750
+
751
+ # 2. Add legitimate keywords to settings
752
+ # In settings.py:
753
+ AIWAF_ALLOWED_PATH_KEYWORDS = [
754
+ "profile", "user", "account", "dashboard",
755
+ # Add your site-specific keywords
756
+ ]
757
+
758
+ # 3. Retrain with enhanced filtering (won't learn legitimate keywords)
759
+ python manage.py detect_and_train
760
+
761
+ # 4. Test - legitimate pages should now work!
762
+ ```
763
+
764
+ ### Preventing Future False Positives
765
+
766
+ Configure AIWAF to recognize your site's legitimate keywords:
767
+
768
+ ```python
769
+ # settings.py
770
+ AIWAF_ALLOWED_PATH_KEYWORDS = [
771
+ # Common legitimate keywords
772
+ "profile", "user", "account", "settings", "dashboard",
773
+ "admin", "search", "contact", "about", "help",
774
+
775
+ # Your site-specific keywords
776
+ "buddycraft", "sc2", "starcraft", # Gaming site example
777
+ "shop", "cart", "checkout", # E-commerce example
778
+ "blog", "article", "news", # Content site example
779
+ ]
780
+ ```
781
+
782
+ ### Reset Command Options
783
+
784
+ ```bash
785
+ # Clear everything (safest for troubleshooting)
786
+ python manage.py aiwaf_reset --confirm
787
+
788
+ # Clear only problematic keywords
789
+ python manage.py aiwaf_reset --keywords --confirm
790
+
791
+ # Clear blocked IPs but keep exemptions
792
+ python manage.py aiwaf_reset --blacklist --confirm
793
+ ```
794
+
795
+ ---
796
+
683
797
  ## 🧠 How It Works
684
798
 
685
799
  | Middleware | Purpose |
@@ -1,12 +1,12 @@
1
- aiwaf/__init__.py,sha256=BGGn_OwueGmxbbWRV-PwE7HGpzB5Ol61jhYI6z4tHug,220
1
+ aiwaf/__init__.py,sha256=SLcMD_OTXr3DXtHpuCKxFvNl_pjrg-J5KLcJ-Swutuo,220
2
2
  aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
3
3
  aiwaf/blacklist_manager.py,sha256=LYCeKFB-7e_C6Bg2WeFJWFIIQlrfRMPuGp30ivrnhQY,1196
4
4
  aiwaf/decorators.py,sha256=IUKOdM_gdroffImRZep1g1wT6gNqD10zGwcp28hsJCs,825
5
- aiwaf/middleware.py,sha256=EMAQA_Gnz0jv4nevlognT921ZeBEro13J_DSv_mQ3Dw,15482
5
+ aiwaf/middleware.py,sha256=8EC4AKfUjHhmVSKpquimkMUebBekr92pqyVF97wlbx0,27408
6
6
  aiwaf/middleware_logger.py,sha256=LWZVDAnjh6CGESirA8eMbhGgJKB7lVDGRQqVroH95Lo,4742
7
7
  aiwaf/models.py,sha256=vQxgY19BDVMjoO903UNrTZC1pNoLltMU6wbyWPoAEns,2719
8
- aiwaf/storage.py,sha256=vswojWT8KEH5h24TQ9wwYCsxRUOjaAKudtFJnFxNHKk,9914
9
- aiwaf/trainer.py,sha256=1RPjWVOdGQ3qSrjFopw8HKu7THVTMvF4nNYouij6i_A,10685
8
+ aiwaf/storage.py,sha256=5ImrZMRn3u7HNsPH0fDjWhDrD2tgG2IHVnOXtLz0fk4,10253
9
+ aiwaf/trainer.py,sha256=47HP81kTaJCOfyONUm18r-FVc1YeRvcliO_akpX3BqI,18613
10
10
  aiwaf/utils.py,sha256=BJk5vJCYdGPl_4QQiknjhCbkzv5HZCXgFcBJDMJpHok,3390
11
11
  aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -14,7 +14,7 @@ aiwaf/management/commands/add_exemption.py,sha256=U_ByfJw1EstAZ8DaSoRb97IGwYzXs0
14
14
  aiwaf/management/commands/add_ipexemption.py,sha256=sSf3d9hGK9RqqlBYkCrnrd8KZWGT-derSpoWnEY4H60,952
15
15
  aiwaf/management/commands/aiwaf_diagnose.py,sha256=nXFRhq66N4QC3e4scYJ2sUngJce-0yDxtBO3R2BllRM,6134
16
16
  aiwaf/management/commands/aiwaf_logging.py,sha256=FCIqULn2tii2vD9VxL7vk3PV4k4vr7kaA00KyaCExYY,7692
17
- aiwaf/management/commands/aiwaf_reset.py,sha256=wG7EcdPqkxmjF2ivQOmZ7swuvHVJ_OVLgOEijGLvmFs,5586
17
+ aiwaf/management/commands/aiwaf_reset.py,sha256=pcF0zOYDSqjpCwDtk2HYJZLgr76td8OFRENtl20c1dQ,7472
18
18
  aiwaf/management/commands/check_dependencies.py,sha256=GOZl00pDwW2cJjDvIaCeB3yWxmeYcJDRTIpmOTLvy2c,37204
19
19
  aiwaf/management/commands/clear_blacklist.py,sha256=Tisedg0EVlc3E01mA3hBZQorwMzc5j1cns-oYshja0g,2770
20
20
  aiwaf/management/commands/clear_cache.py,sha256=cdnuTgxkhKLqT_6k6yTcEBlREovNRQxAE51ceXlGYMA,647
@@ -28,8 +28,8 @@ aiwaf/management/commands/test_exemption_fix.py,sha256=ngyGaHUCmQQ6y--6j4q1viZJt
28
28
  aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
29
29
  aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
30
  aiwaf/templatetags/aiwaf_tags.py,sha256=XXfb7Tl4DjU3Sc40GbqdaqOEtKTUKELBEk58u83wBNw,357
31
- aiwaf-0.1.9.1.9.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
32
- aiwaf-0.1.9.1.9.dist-info/METADATA,sha256=YeyuawG8pPFTBrOOBp8MayiGxCdyywAFvKKMY8dIk-M,22145
33
- aiwaf-0.1.9.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
- aiwaf-0.1.9.1.9.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
35
- aiwaf-0.1.9.1.9.dist-info/RECORD,,
31
+ aiwaf-0.1.9.2.1.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
32
+ aiwaf-0.1.9.2.1.dist-info/METADATA,sha256=OgVYn0PPKBDcGCVlhYEFa7uc9XU4Rn-0ZS-W2CE9a1Q,26824
33
+ aiwaf-0.1.9.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
+ aiwaf-0.1.9.2.1.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
35
+ aiwaf-0.1.9.2.1.dist-info/RECORD,,