PyPI - aiwaf - Versions diffs - 0.1.9.1.8__py3-none-any.whl → 0.1.9.2.0__py3-none-any.whl - Mend

aiwaf 0.1.9.1.8py3-none-any.whl → 0.1.9.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiwaf might be problematic. Click here for more details.

Files changed (10) hide show

aiwaf/__init__.py +1 -1
aiwaf/management/commands/aiwaf_reset.py +107 -60
aiwaf/middleware.py +210 -42
aiwaf/storage.py +23 -0
aiwaf/trainer.py +90 -9
{aiwaf-0.1.9.1.8.dist-info → aiwaf-0.1.9.2.0.dist-info}/METADATA +121 -12
{aiwaf-0.1.9.1.8.dist-info → aiwaf-0.1.9.2.0.dist-info}/RECORD +10 -10
{aiwaf-0.1.9.1.8.dist-info → aiwaf-0.1.9.2.0.dist-info}/WHEEL +0 -0
{aiwaf-0.1.9.1.8.dist-info → aiwaf-0.1.9.2.0.dist-info}/licenses/LICENSE +0 -0
{aiwaf-0.1.9.1.8.dist-info → aiwaf-0.1.9.2.0.dist-info}/top_level.txt +0 -0

aiwaf/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 default_app_config = "aiwaf.apps.AiwafConfig"
-__version__ = "0.1.9.1.8"
+__version__ = "0.1.9.2.0"
 # Note: Middleware classes are available from aiwaf.middleware
 # Import them only when needed to avoid circular imports during Django app loading

aiwaf/management/commands/aiwaf_reset.py CHANGED Viewed

@@ -1,76 +1,115 @@
 from django.core.management.base import BaseCommand
-from aiwaf.storage import get_blacklist_store, get_exemption_store
+from aiwaf.storage import get_blacklist_store, get_exemption_store, get_keyword_store
 import sys
 class Command(BaseCommand):
-    help = 'Reset AI-WAF by clearing all blacklist and exemption (whitelist) entries'
+    help = 'Reset AI-WAF by clearing blacklist, exemption, and/or keyword entries'
     def add_arguments(self, parser):
         parser.add_argument(
-            '--blacklist-only',
+            '--blacklist',
             action='store_true',
-            help='Clear only blacklist entries, keep exemptions'
+            help='Clear blacklist entries (default: all)'
         )
         parser.add_argument(
-            '--exemptions-only',
+            '--exemptions',
+            action='store_true',
+            help='Clear exemption entries (default: all)'
+        )
+        parser.add_argument(
+            '--keywords',
             action='store_true',
-            help='Clear only exemption entries, keep blacklist'
+            help='Clear learned dynamic keywords (default: all)'
         )
         parser.add_argument(
             '--confirm',
             action='store_true',
             help='Skip confirmation prompt'
         )
+        # Legacy flags for backward compatibility
+        parser.add_argument(
+            '--blacklist-only',
+            action='store_true',
+            help='(Legacy) Clear only blacklist entries'
+        )
+        parser.add_argument(
+            '--exemptions-only',
+            action='store_true',
+            help='(Legacy) Clear only exemption entries'
+        )
     def handle(self, *args, **options):
-        blacklist_only = options['blacklist_only']
-        exemptions_only = options['exemptions_only']
-        confirm = options['confirm']
+        # Parse arguments
+        blacklist_flag = options.get('blacklist', False)
+        exemptions_flag = options.get('exemptions', False)
+        keywords_flag = options.get('keywords', False)
+        confirm = options.get('confirm', False)
+        # Legacy support
+        blacklist_only = options.get('blacklist_only', False)
+        exemptions_only = options.get('exemptions_only', False)
+        # Handle legacy flags
+        if blacklist_only:
+            blacklist_flag = True
+            exemptions_flag = False
+            keywords_flag = False
+        elif exemptions_only:
+            blacklist_flag = False
+            exemptions_flag = True
+            keywords_flag = False
+        # If no specific flags, clear everything
+        if not (blacklist_flag or exemptions_flag or keywords_flag):
+            blacklist_flag = exemptions_flag = keywords_flag = True
         try:
             blacklist_store = get_blacklist_store()
             exemption_store = get_exemption_store()
+            keyword_store = get_keyword_store()
         except Exception as e:
             self.stdout.write(self.style.ERROR(f'Error initializing stores: {e}'))
             return
         # Count current entries safely
-        try:
-            blacklist_entries = blacklist_store.get_all()
-            blacklist_count = len(blacklist_entries)
-        except Exception as e:
-            self.stdout.write(self.style.WARNING(f'Warning: Could not count blacklist entries: {e}'))
-            blacklist_count = 0
-            blacklist_entries = []
+        counts = {'blacklist': 0, 'exemptions': 0, 'keywords': 0}
+        entries = {'blacklist': [], 'exemptions': [], 'keywords': []}
-        try:
-            exemption_entries = exemption_store.get_all()
-            exemption_count = len(exemption_entries)
-        except Exception as e:
-            self.stdout.write(self.style.WARNING(f'Warning: Could not count exemption entries: {e}'))
-            exemption_count = 0
-            exemption_entries = []
+        if blacklist_flag:
+            try:
+                entries['blacklist'] = blacklist_store.get_all()
+                counts['blacklist'] = len(entries['blacklist'])
+            except Exception as e:
+                self.stdout.write(self.style.WARNING(f'Warning: Could not count blacklist entries: {e}'))
-        if blacklist_only and exemptions_only:
-            self.stdout.write(self.style.ERROR('Cannot use both --blacklist-only and --exemptions-only flags'))
-            return
+        if exemptions_flag:
+            try:
+                entries['exemptions'] = exemption_store.get_all()
+                counts['exemptions'] = len(entries['exemptions'])
+            except Exception as e:
+                self.stdout.write(self.style.WARNING(f'Warning: Could not count exemption entries: {e}'))
-        # Determine what to clear
-        if blacklist_only:
-            action = f"Clear {blacklist_count} blacklist entries"
-            clear_blacklist = True
-            clear_exemptions = False
-        elif exemptions_only:
-            action = f"Clear {exemption_count} exemption entries"
-            clear_blacklist = False
-            clear_exemptions = True
-        else:
-            action = f"Clear {blacklist_count} blacklist entries and {exemption_count} exemption entries"
-            clear_blacklist = True
-            clear_exemptions = True
+        if keywords_flag:
+            try:
+                entries['keywords'] = keyword_store.get_all_keywords()
+                counts['keywords'] = len(entries['keywords'])
+            except Exception as e:
+                self.stdout.write(self.style.WARNING(f'Warning: Could not count keyword entries: {e}'))
+        # Build action description
+        actions = []
+        if blacklist_flag:
+            actions.append(f"{counts['blacklist']} blacklist entries")
+        if exemptions_flag:
+            actions.append(f"{counts['exemptions']} exemption entries")
+        if keywords_flag:
+            actions.append(f"{counts['keywords']} learned keywords")
+        action = "Clear " + ", ".join(actions)
         # Show what will be cleared
-        self.stdout.write(f"AI-WAF Reset: {action}")
+        self.stdout.write(f"🔧 AI-WAF Reset: {action}")
         if not confirm:
             try:
@@ -83,12 +122,12 @@ class Command(BaseCommand):
                 return
         # Perform the reset
-        deleted_counts = {'blacklist': 0, 'exemptions': 0, 'errors': []}
+        deleted_counts = {'blacklist': 0, 'exemptions': 0, 'keywords': 0, 'errors': []}
-        if clear_blacklist:
+        if blacklist_flag:
             # Clear blacklist entries
             try:
-                for entry in blacklist_entries:
+                for entry in entries['blacklist']:
                     try:
                         blacklist_store.remove_ip(entry['ip_address'])
                         deleted_counts['blacklist'] += 1
@@ -97,10 +136,10 @@ class Command(BaseCommand):
             except Exception as e:
                 deleted_counts['errors'].append(f"Error clearing blacklist: {e}")
-        if clear_exemptions:
+        if exemptions_flag:
             # Clear exemption entries
             try:
-                for entry in exemption_entries:
+                for entry in entries['exemptions']:
                     try:
                         exemption_store.remove_ip(entry['ip_address'])
                         deleted_counts['exemptions'] += 1
@@ -109,26 +148,34 @@ class Command(BaseCommand):
             except Exception as e:
                 deleted_counts['errors'].append(f"Error clearing exemptions: {e}")
+        if keywords_flag:
+            # Clear keyword entries
+            try:
+                for keyword in entries['keywords']:
+                    try:
+                        keyword_store.remove_keyword(keyword)
+                        deleted_counts['keywords'] += 1
+                    except Exception as e:
+                        deleted_counts['errors'].append(f"Error removing keyword '{keyword}': {e}")
+            except Exception as e:
+                deleted_counts['errors'].append(f"Error clearing keywords: {e}")
         # Report results
         if deleted_counts['errors']:
             for error in deleted_counts['errors']:
                 self.stdout.write(self.style.WARNING(f"⚠️  {error}"))
-        if clear_blacklist and clear_exemptions:
-            self.stdout.write(
-                self.style.SUCCESS(
-                    f"✅ Reset complete: Deleted {deleted_counts['blacklist']} blacklist entries "
-                    f"and {deleted_counts['exemptions']} exemption entries"
-                )
-            )
-        elif clear_blacklist:
-            self.stdout.write(
-                self.style.SUCCESS(f"✅ Blacklist cleared: Deleted {deleted_counts['blacklist']} entries")
-            )
-        elif clear_exemptions:
-            self.stdout.write(
-                self.style.SUCCESS(f"✅ Exemptions cleared: Deleted {deleted_counts['exemptions']} entries")
-            )
+        # Build success message
+        success_parts = []
+        if blacklist_flag:
+            success_parts.append(f"{deleted_counts['blacklist']} blacklist entries")
+        if exemptions_flag:
+            success_parts.append(f"{deleted_counts['exemptions']} exemption entries")
+        if keywords_flag:
+            success_parts.append(f"{deleted_counts['keywords']} learned keywords")
+        success_message = "✅ Reset complete: Deleted " + ", ".join(success_parts)
+        self.stdout.write(self.style.SUCCESS(success_message))
         if deleted_counts['errors']:
             self.stdout.write(

aiwaf/middleware.py CHANGED Viewed

@@ -3,6 +3,7 @@
 import time
 import re
 import os
+import warnings
 import numpy as np
 import joblib
 from django.db.models import UUIDField
@@ -82,6 +83,93 @@ class IPAndKeywordBlockMiddleware:
     def __init__(self, get_response):
         self.get_response = get_response
         self.safe_prefixes = self._collect_safe_prefixes()
+        self.exempt_keywords = self._get_exempt_keywords()
+        self.legitimate_path_keywords = self._get_legitimate_path_keywords()
+    def _get_exempt_keywords(self):
+        """Get keywords that should be exempt from blocking"""
+        exempt_tokens = set()
+        # Extract from exempt paths
+        for path in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
+            for seg in re.split(r"\W+", path.strip("/").lower()):
+                if len(seg) > 3:
+                    exempt_tokens.add(seg)
+        # Add explicit exempt keywords from settings
+        exempt_keywords = getattr(settings, "AIWAF_EXEMPT_KEYWORDS", [])
+        exempt_tokens.update(exempt_keywords)
+        return exempt_tokens
+    def _get_legitimate_path_keywords(self):
+        """Get keywords that are legitimate in URL paths"""
+        # Extract from Django URL patterns
+        legitimate_keywords = set()
+        # Add common legitimate path segments
+        default_legitimate = {
+            "profile", "user", "account", "settings", "dashboard",
+            "home", "about", "contact", "help", "search", "list",
+            "view", "edit", "create", "update", "delete", "detail",
+            "api", "auth", "login", "logout", "register", "signup",
+            "reset", "confirm", "activate", "verify", "page",
+            "category", "tag", "post", "article", "blog", "news"
+        }
+        legitimate_keywords.update(default_legitimate)
+        # Add from Django settings
+        allowed_path_keywords = getattr(settings, "AIWAF_ALLOWED_PATH_KEYWORDS", [])
+        legitimate_keywords.update(allowed_path_keywords)
+        # Extract from actual Django URL patterns
+        resolver = get_resolver()
+        self._extract_path_keywords_from_urls(resolver.url_patterns, legitimate_keywords)
+        return legitimate_keywords
+    def _extract_path_keywords_from_urls(self, url_patterns, keywords, prefix=""):
+        """Extract legitimate keywords from Django URL patterns"""
+        for pattern in url_patterns:
+            if hasattr(pattern, 'url_patterns'):  # include()
+                new_prefix = prefix + str(pattern.pattern).strip('^$/')
+                self._extract_path_keywords_from_urls(pattern.url_patterns, keywords, new_prefix)
+            else:
+                # Extract static path segments from URL pattern
+                pattern_str = str(pattern.pattern).strip('^$/')
+                full_path = (prefix + '/' + pattern_str).strip('/')
+                # Extract meaningful segments (not regex patterns)
+                segments = re.findall(r'[a-zA-Z]{3,}', full_path)
+                for seg in segments:
+                    if seg.lower() not in {'http', 'https', 'www'}:
+                        keywords.add(seg.lower())
+    def _is_malicious_context(self, request, segment):
+        """Determine if a keyword appears in a malicious context"""
+        path = request.path.lower()
+        # Check if this is a query parameter attack
+        query_string = request.META.get('QUERY_STRING', '').lower()
+        if segment in query_string and any(attack_pattern in query_string for attack_pattern in [
+            'union', 'select', 'drop', 'insert', 'script', 'alert', 'eval'
+        ]):
+            return True
+        # Check if this looks like a file extension attack
+        if segment.startswith('.') and not path_exists_in_django(request.path):
+            return True
+        # Check if this looks like a directory traversal
+        if '../' in path or '..\\' in path:
+            return True
+        # Check if accessing non-existent paths with suspicious extensions
+        if (not path_exists_in_django(request.path) and
+            any(ext in segment for ext in ['.php', '.asp', '.jsp', '.cgi'])):
+            return True
+        return False
     def _collect_safe_prefixes(self):
         resolver = get_resolver()
@@ -102,35 +190,68 @@ class IPAndKeywordBlockMiddleware:
         return prefixes
     def __call__(self, request):
-        raw_path = request.path.lower()
+        # First exemption check - early exit for exempt requests
         if is_exempt(request):
             return self.get_response(request)
+        raw_path = request.path.lower()
         ip = get_ip(request)
         path = raw_path.lstrip("/")
-        # BlacklistManager now handles exemption checking internally
+        # Additional IP-level exemption check
+        from .storage import get_exemption_store
+        exemption_store = get_exemption_store()
+        if exemption_store.is_exempted(ip):
+            return self.get_response(request)
+        # BlacklistManager handles exemption checking internally
         if BlacklistManager.is_blocked(ip):
             return JsonResponse({"error": "blocked"}, status=403)
+        # Check if path exists in Django - if yes, be more lenient
+        path_exists = path_exists_in_django(request.path)
         keyword_store = get_keyword_store()
         segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
+        # Only learn keywords from non-existent paths or suspicious contexts
         for seg in segments:
-            keyword_store.add_keyword(seg)
+            if not path_exists or self._is_malicious_context(request, seg):
+                keyword_store.add_keyword(seg)
         dynamic_top = keyword_store.get_top_keywords(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
         all_kw = set(STATIC_KW) | set(dynamic_top)
-        suspicious_kw = {
-            kw for kw in all_kw
-            if not any(path.startswith(prefix) for prefix in self.safe_prefixes if prefix)
-        }
+        # Enhanced filtering logic
+        suspicious_kw = set()
+        for kw in all_kw:
+            # Skip if keyword is explicitly exempted
+            if kw in self.exempt_keywords:
+                continue
+            # Skip if this is a legitimate path keyword and path exists in Django
+            if (kw in self.legitimate_path_keywords and
+                path_exists and
+                not self._is_malicious_context(request, kw)):
+                continue
+            # Skip if path starts with safe prefix
+            if any(path.startswith(prefix) for prefix in self.safe_prefixes if prefix):
+                continue
+            suspicious_kw.add(kw)
+        # Check segments against suspicious keywords
         for seg in segments:
             if seg in suspicious_kw:
-                # BlacklistManager.block() now checks exemptions internally
-                BlacklistManager.block(ip, f"Keyword block: {seg}")
-                # Check again after blocking attempt (exempted IPs won't be blocked)
-                if BlacklistManager.is_blocked(ip):
-                    return JsonResponse({"error": "blocked"}, status=403)
+                # Additional context check before blocking
+                if self._is_malicious_context(request, seg) or not path_exists:
+                    # Double-check exemption before blocking
+                    if not exemption_store.is_exempted(ip):
+                        BlacklistManager.block(ip, f"Keyword block: {seg} (context: malicious)")
+                        # Check again after blocking attempt (exempted IPs won't be blocked)
+                        if BlacklistManager.is_blocked(ip):
+                            return JsonResponse({"error": "blocked"}, status=403)
         return self.get_response(request)
@@ -143,22 +264,32 @@ class RateLimitMiddleware:
         self.FLOOD = getattr(settings, "AIWAF_RATE_FLOOD", 40)    # hard limit
     def __call__(self, request):
+        # First exemption check - early exit for exempt requests
         if is_exempt(request):
             return self.get_response(request)
         ip = get_ip(request)
+        # Additional IP-level exemption check
+        from .storage import get_exemption_store
+        exemption_store = get_exemption_store()
+        if exemption_store.is_exempted(ip):
+            return self.get_response(request)
         key = f"ratelimit:{ip}"
         now = time.time()
         timestamps = cache.get(key, [])
         timestamps = [t for t in timestamps if now - t < self.WINDOW]
         timestamps.append(now)
         cache.set(key, timestamps, timeout=self.WINDOW)
         if len(timestamps) > self.FLOOD:
-            # BlacklistManager.block() now checks exemptions internally
-            BlacklistManager.block(ip, "Flood pattern")
-            # Check if actually blocked (exempted IPs won't be blocked)
-            if BlacklistManager.is_blocked(ip):
-                return JsonResponse({"error": "blocked"}, status=403)
+            # Double-check exemption before blocking
+            if not exemption_store.is_exempted(ip):
+                BlacklistManager.block(ip, "Flood pattern")
+                # Check if actually blocked (exempted IPs won't be blocked)
+                if BlacklistManager.is_blocked(ip):
+                    return JsonResponse({"error": "blocked"}, status=403)
         if len(timestamps) > self.MAX:
             return JsonResponse({"error": "too_many_requests"}, status=429)
         return self.get_response(request)
@@ -174,19 +305,37 @@ class AIAnomalyMiddleware(MiddlewareMixin):
         self.model = MODEL
     def process_request(self, request):
+        # First exemption check - early exit for exempt requests
         if is_exempt(request):
             return None
         request._start_time = time.time()
         ip = get_ip(request)
-        # BlacklistManager now handles exemption checking internally
+        # Additional IP-level exemption check
+        from .storage import get_exemption_store
+        exemption_store = get_exemption_store()
+        if exemption_store.is_exempted(ip):
+            return None
+        # BlacklistManager handles exemption checking internally
         if BlacklistManager.is_blocked(ip):
             return JsonResponse({"error": "blocked"}, status=403)
         return None
     def process_response(self, request, response):
+        # First exemption check - early exit for exempt requests
         if is_exempt(request):
             return response
         ip = get_ip(request)
+        # Additional IP-level exemption check
+        from .storage import get_exemption_store
+        exemption_store = get_exemption_store()
+        if exemption_store.is_exempted(ip):
+            return response
         now = time.time()
         key = f"aiwaf:{ip}"
         data = cache.get(key, [])
@@ -251,17 +400,20 @@ class AIAnomalyMiddleware(MiddlewareMixin):
                     # Anomalous but looks legitimate - don't block
                     pass
                 else:
-                    # Block if it shows clear signs of malicious behavior
-                    BlacklistManager.block(ip, f"AI anomaly + suspicious patterns (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})")
-                    # Check if actually blocked (exempted IPs won't be blocked)
-                    if BlacklistManager.is_blocked(ip):
-                        return JsonResponse({"error": "blocked"}, status=403)
+                    # Double-check exemption before blocking
+                    if not exemption_store.is_exempted(ip):
+                        BlacklistManager.block(ip, f"AI anomaly + suspicious patterns (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})")
+                        # Check if actually blocked (exempted IPs won't be blocked)
+                        if BlacklistManager.is_blocked(ip):
+                            return JsonResponse({"error": "blocked"}, status=403)
             else:
                 # No recent data to analyze - be more conservative, only block on very suspicious current request
                 if kw_hits >= 2 or status_idx == STATUS_IDX.index("404"):
-                    BlacklistManager.block(ip, "AI anomaly + immediate suspicious behavior")
-                    if BlacklistManager.is_blocked(ip):
-                        return JsonResponse({"error": "blocked"}, status=403)
+                    # Double-check exemption before blocking
+                    if not exemption_store.is_exempted(ip):
+                        BlacklistManager.block(ip, "AI anomaly + immediate suspicious behavior")
+                        if BlacklistManager.is_blocked(ip):
+                            return JsonResponse({"error": "blocked"}, status=403)
         data.append((now, request.path, response.status_code, resp_time))
         data = [d for d in data if now - d[0] < self.WINDOW]
@@ -283,7 +435,12 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
             return None
         ip = get_ip(request)
-        # BlacklistManager now handles exemption checking internally
+        # Additional IP-level exemption check
+        from .storage import get_exemption_store
+        exemption_store = get_exemption_store()
+        if exemption_store.is_exempted(ip):
+            return None
         if request.method == "GET":
             # Store timestamp for this IP's GET request
@@ -300,11 +457,12 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
                 if not any(request.path.lower().startswith(login_path) for login_path in [
                     "/admin/login/", "/login/", "/accounts/login/", "/auth/login/", "/signin/"
                 ]):
-                    # BlacklistManager.block() now checks exemptions internally
-                    BlacklistManager.block(ip, "Direct POST without GET")
-                    # Check if actually blocked (exempted IPs won't be blocked)
-                    if BlacklistManager.is_blocked(ip):
-                        return JsonResponse({"error": "blocked"}, status=403)
+                    # Double-check exemption before blocking
+                    if not exemption_store.is_exempted(ip):
+                        BlacklistManager.block(ip, "Direct POST without GET")
+                        # Check if actually blocked (exempted IPs won't be blocked)
+                        if BlacklistManager.is_blocked(ip):
+                            return JsonResponse({"error": "blocked"}, status=403)
             else:
                 # Check timing - be more lenient for login paths
                 time_diff = time.time() - get_time
@@ -317,11 +475,12 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
                     min_time = 0.1  # Very short threshold for login forms
                 if time_diff < min_time:
-                    # BlacklistManager.block() now checks exemptions internally
-                    BlacklistManager.block(ip, f"Form submitted too quickly ({time_diff:.2f}s)")
-                    # Check if actually blocked (exempted IPs won't be blocked)
-                    if BlacklistManager.is_blocked(ip):
-                        return JsonResponse({"error": "blocked"}, status=403)
+                    # Double-check exemption before blocking
+                    if not exemption_store.is_exempted(ip):
+                        BlacklistManager.block(ip, f"Form submitted too quickly ({time_diff:.2f}s)")
+                        # Check if actually blocked (exempted IPs won't be blocked)
+                        if BlacklistManager.is_blocked(ip):
+                            return JsonResponse({"error": "blocked"}, status=403)
         return None
@@ -330,11 +489,19 @@ class UUIDTamperMiddleware(MiddlewareMixin):
     def process_view(self, request, view_func, view_args, view_kwargs):
         if is_exempt(request):
             return None
         uid = view_kwargs.get("uuid")
         if not uid:
             return None
         ip = get_ip(request)
+        # Additional IP-level exemption check
+        from .storage import get_exemption_store
+        exemption_store = get_exemption_store()
+        if exemption_store.is_exempted(ip):
+            return None
         app_label = view_func.__module__.split(".")[0]
         app_cfg   = apps.get_app_config(app_label)
         for Model in app_cfg.get_models():
@@ -345,8 +512,9 @@ class UUIDTamperMiddleware(MiddlewareMixin):
                 except (ValueError, TypeError):
                     continue
-        # BlacklistManager.block() now checks exemptions internally
-        BlacklistManager.block(ip, "UUID tampering")
-        # Check if actually blocked (exempted IPs won't be blocked)
-        if BlacklistManager.is_blocked(ip):
-            return JsonResponse({"error": "blocked"}, status=403)
+        # Double-check exemption before blocking
+        if not exemption_store.is_exempted(ip):
+            BlacklistManager.block(ip, "UUID tampering")
+            # Check if actually blocked (exempted IPs won't be blocked)
+            if BlacklistManager.is_blocked(ip):
+                return JsonResponse({"error": "blocked"}, status=403)

aiwaf/storage.py CHANGED Viewed

@@ -195,6 +195,16 @@ class ModelExemptionStore:
         except Exception as e:
             print(f"Error removing exemption for IP {ip}: {e}")
+    @staticmethod
+    def remove_ip(ip):
+        """Remove IP from exemption list (alias for remove_exemption)"""
+        ModelExemptionStore.remove_exemption(ip)
+    @staticmethod
+    def add_ip(ip, reason="Manual exemption"):
+        """Add IP to exemption list (alias for add_exemption)"""
+        ModelExemptionStore.add_exemption(ip, reason)
     @staticmethod
     def get_all_exempted_ips():
         """Get all exempted IPs"""
@@ -274,6 +284,19 @@ class ModelKeywordStore:
         except Exception:
             return []
+    @staticmethod
+    def get_all_keywords():
+        """Get all keywords"""
+        _import_models()
+        if DynamicKeyword is None:
+            return []
+        try:
+            return list(
+                DynamicKeyword.objects.all().values_list('keyword', flat=True)
+            )
+        except Exception:
+            return []
     @staticmethod
     def reset_keywords():
         """Reset all keyword counts"""

aiwaf/trainer.py CHANGED Viewed

@@ -51,17 +51,59 @@ def path_exists_in_django(path: str) -> bool:
 def remove_exempt_keywords() -> None:
+    """Remove exempt keywords from dynamic keyword storage"""
     keyword_store = get_keyword_store()
     exempt_tokens = set()
+    # Extract tokens from exempt paths
     for path in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
         for seg in re.split(r"\W+", path.strip("/").lower()):
             if len(seg) > 3:
                 exempt_tokens.add(seg)
+    # Add explicit exempt keywords from settings
+    explicit_exempt = getattr(settings, "AIWAF_EXEMPT_KEYWORDS", [])
+    exempt_tokens.update(explicit_exempt)
+    # Add legitimate path keywords to prevent them from being learned as suspicious
+    allowed_path_keywords = getattr(settings, "AIWAF_ALLOWED_PATH_KEYWORDS", [])
+    exempt_tokens.update(allowed_path_keywords)
     # Remove exempt tokens from keyword storage
     for token in exempt_tokens:
         keyword_store.remove_keyword(token)
+    if exempt_tokens:
+        print(f"🧹 Removed {len(exempt_tokens)} exempt keywords from learning: {list(exempt_tokens)[:10]}")
+def get_legitimate_keywords() -> set:
+    """Get all legitimate keywords that shouldn't be learned as suspicious"""
+    legitimate = set()
+    # Common legitimate path segments
+    default_legitimate = {
+        "profile", "user", "users", "account", "accounts", "settings", "dashboard",
+        "home", "about", "contact", "help", "search", "list", "lists",
+        "view", "views", "edit", "create", "update", "delete", "detail", "details",
+        "api", "auth", "login", "logout", "register", "signup", "signin",
+        "reset", "confirm", "activate", "verify", "page", "pages",
+        "category", "categories", "tag", "tags", "post", "posts",
+        "article", "articles", "blog", "blogs", "news", "item", "items",
+        "admin", "administration", "manage", "manager", "control", "panel",
+        "config", "configuration", "option", "options", "preference", "preferences"
+    }
+    legitimate.update(default_legitimate)
+    # Add from Django settings
+    allowed_path_keywords = getattr(settings, "AIWAF_ALLOWED_PATH_KEYWORDS", [])
+    legitimate.update(allowed_path_keywords)
+    # Add exempt keywords
+    exempt_keywords = getattr(settings, "AIWAF_EXEMPT_KEYWORDS", [])
+    legitimate.update(exempt_keywords)
+    return legitimate
 def _read_all_logs() -> list[str]:
@@ -137,14 +179,20 @@ def _parse(line: str) -> dict | None:
 def train() -> None:
+    """Enhanced training with improved keyword filtering and exemption handling"""
+    print("🚀 Starting AIWAF enhanced training...")
+    # Remove exempt keywords first
     remove_exempt_keywords()
     # Remove any IPs in IPExemption from the blacklist using BlacklistManager
     exemption_store = get_exemption_store()
     exempted_ips = [entry['ip_address'] for entry in exemption_store.get_all()]
-    for ip in exempted_ips:
-        BlacklistManager.unblock(ip)
+    if exempted_ips:
+        print(f"🛡️  Found {len(exempted_ips)} exempted IPs - clearing from blacklist")
+        for ip in exempted_ips:
+            BlacklistManager.unblock(ip)
     raw_lines = _read_all_logs()
     if not raw_lines:
@@ -281,17 +329,50 @@ def train() -> None:
         print(f"   → Blocked {blocked_count}/{len(anomalous_ips)} anomalous IPs (others looked legitimate)")
     tokens = Counter()
+    legitimate_keywords = get_legitimate_keywords()
+    print(f"🔍 Learning keywords from {len(parsed)} parsed requests...")
     for r in parsed:
-        if (r["status"].startswith(("4", "5"))
-            and not path_exists_in_django(r["path"])):
+        # Only learn from suspicious requests (errors on non-existent paths)
+        if (r["status"].startswith(("4", "5")) and
+            not path_exists_in_django(r["path"]) and
+            not is_exempt_path(r["path"])):
             for seg in re.split(r"\W+", r["path"].lower()):
-                if len(seg) > 3 and seg not in STATIC_KW:
+                if (len(seg) > 3 and
+                    seg not in STATIC_KW and
+                    seg not in legitimate_keywords):  # Don't learn legitimate keywords
                     tokens[seg] += 1
     keyword_store = get_keyword_store()
-    top_tokens = tokens.most_common(10)
+    top_tokens = tokens.most_common(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
+    # Additional filtering: only add keywords that appear suspicious enough
+    filtered_tokens = []
     for kw, cnt in top_tokens:
-        keyword_store.add_keyword(kw, cnt)
-    print(f"DynamicKeyword storage updated with top tokens: {[kw for kw, _ in top_tokens]}")
+        # Don't add keywords that might be legitimate
+        if (cnt >= 2 and  # Must appear at least twice
+            len(kw) >= 4 and  # Must be at least 4 characters
+            kw not in legitimate_keywords):  # Not in legitimate set
+            filtered_tokens.append((kw, cnt))
+            keyword_store.add_keyword(kw, cnt)
+    if filtered_tokens:
+        print(f"📝 Added {len(filtered_tokens)} suspicious keywords: {[kw for kw, _ in filtered_tokens]}")
+    else:
+        print("✅ No new suspicious keywords learned (good sign!)")
+    print(f"🎯 Dynamic keyword learning complete. Excluded {len(legitimate_keywords)} legitimate keywords.")
+    # Training summary
+    print("\n" + "="*60)
+    print("🎉 AIWAF ENHANCED TRAINING COMPLETE")
+    print("="*60)
+    print(f"📊 Training Data: {len(parsed)} log entries processed")
+    print(f"🤖 AI Model: Trained with {len(feature_cols)} features")
+    print(f"🚫 Blocked IPs: {blocked_count if 'blocked_count' in locals() else 0} suspicious IPs blocked")
+    print(f"🔑 Keywords: {len(filtered_tokens)} new suspicious keywords learned")
+    print(f"🛡️  Exemptions: {len(exempted_ips)} IPs protected from blocking")
+    print(f"✅ Enhanced protection now active with context-aware filtering!")
+    print("="*60)

{aiwaf-0.1.9.1.8.dist-info → aiwaf-0.1.9.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aiwaf
-Version: 0.1.9.1.8
+Version: 0.1.9.2.0
 Summary: AI-powered Web Application Firewall
 Home-page: https://github.com/aayushgauba/aiwaf
 Author: Aayush Gauba
@@ -25,7 +25,13 @@ Dynamic: requires-python
 # AI‑WAF
 > A self‑learning, Django‑friendly Web Application Firewall
-> with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, exempt path awareness, and daily retraining.
+> with **enhanced context-aware protection**, rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, **smart keyword learning**, file‑extension probing detection, exempt path awareness, and daily retraining.
+**🆕 Latest Enhancements:**
+- ✅ **Smart Keyword Filtering** - Prevents blocking legitimate pages like `/profile/`
+- ✅ **Granular Reset Commands** - Clear specific data types (`--blacklist`, `--keywords`, `--exemptions`)
+- ✅ **Context-Aware Learning** - Only learns from suspicious requests, not legitimate site functionality
+- ✅ **Enhanced Configuration** - `AIWAF_ALLOWED_PATH_KEYWORDS` and `AIWAF_EXEMPT_KEYWORDS`
 ---
@@ -88,9 +94,14 @@ aiwaf/
   - Burst count
   - Total 404s
-- **Dynamic Keyword Extraction & Cleanup**
-  - Every retrain adds top 10 keyword segments from 4xx/5xx paths
-  - **If a path is added to `AIWAF_EXEMPT_PATHS`, its keywords are automatically removed from the database**
+- **Enhanced Dynamic Keyword Learning**
+  - **Smart Context-Aware Learning**: Only learns keywords from suspicious requests on non-existent paths
+  - **Legitimate Path Protection**: Automatically excludes keywords from valid Django URLs (like `/profile/`, `/admin/`)
+  - **Configuration Options**:
+    - `AIWAF_ALLOWED_PATH_KEYWORDS` - Explicitly allow certain keywords in legitimate paths
+    - `AIWAF_EXEMPT_KEYWORDS` - Keywords that should never trigger blocking
+  - **Automatic Cleanup**: Keywords from `AIWAF_EXEMPT_PATHS` are automatically removed from the database
+  - **False Positive Prevention**: Stops learning legitimate site functionality as "malicious"
 - **File‑Extension Probing Detection**
   Tracks repeated 404s on common extensions (e.g. `.php`, `.asp`) and blocks IPs.
@@ -196,20 +207,44 @@ python manage.py add_ipexemption <ip-address> --reason "optional reason"
 ### Resetting AI-WAF
-Clear all blacklist and exemption entries:
+The `aiwaf_reset` command provides **granular control** for clearing different types of data:
 ```bash
-# Clear everything (with confirmation prompt)
+# Clear everything (default - backward compatible)
 python manage.py aiwaf_reset
-# Clear everything without confirmation
+# Clear everything without confirmation prompt
 python manage.py aiwaf_reset --confirm
-# Clear only blacklist entries
-python manage.py aiwaf_reset --blacklist-only
+# 🆕 GRANULAR CONTROL - Clear specific data types
+python manage.py aiwaf_reset --blacklist      # Clear only blocked IPs
+python manage.py aiwaf_reset --exemptions     # Clear only exempted IPs
+python manage.py aiwaf_reset --keywords       # Clear only learned keywords
+# 🔧 COMBINE OPTIONS - Mix and match as needed
+python manage.py aiwaf_reset --blacklist --keywords      # Keep exemptions
+python manage.py aiwaf_reset --exemptions --keywords     # Keep blacklist
+python manage.py aiwaf_reset --blacklist --exemptions    # Keep keywords
+# 🚀 COMMON USE CASES
+# Fix false positive keywords (like "profile" blocking legitimate pages)
+python manage.py aiwaf_reset --keywords --confirm
+python manage.py detect_and_train  # Retrain with enhanced filtering
+# Clear blocked IPs but preserve exemptions and learning
+python manage.py aiwaf_reset --blacklist --confirm
+# Legacy support (still works for backward compatibility)
+python manage.py aiwaf_reset --blacklist-only    # Legacy: blacklist only
+python manage.py aiwaf_reset --exemptions-only   # Legacy: exemptions only
+```
-# Clear only exemption entries
-python manage.py aiwaf_reset --exemptions-only
+**Enhanced Feedback:**
+```bash
+$ python manage.py aiwaf_reset --keywords
+🔧 AI-WAF Reset: Clear 15 learned keywords
+Are you sure you want to proceed? [y/N]: y
+✅ Reset complete: Deleted 15 learned keywords
 ```
 ### Checking Dependencies
@@ -482,6 +517,21 @@ AIWAF_EXEMPT_PATHS = [          # optional but highly recommended
     "/media/",
     "/health/",
 ]
+# 🆕 ENHANCED KEYWORD FILTERING OPTIONS
+AIWAF_ALLOWED_PATH_KEYWORDS = [  # Keywords allowed in legitimate paths
+    "profile", "user", "account", "settings", "dashboard",
+    "admin", "api", "auth", "search", "contact", "about",
+    # Add your site-specific legitimate keywords
+    "buddycraft", "sc2", "starcraft",  # Example: gaming site keywords
+]
+AIWAF_EXEMPT_KEYWORDS = [        # Keywords that never trigger blocking
+    "api", "webhook", "health", "static", "media",
+    "upload", "download", "backup", "profile"
+]
+AIWAF_DYNAMIC_TOP_N = 10        # Number of dynamic keywords to learn (default: 10)
 ```
 > **Note:** You no longer need to define `AIWAF_MALICIOUS_KEYWORDS` or `AIWAF_STATUS_CODES` — they evolve dynamically.
@@ -680,6 +730,65 @@ python manage.py detect_and_train
 ---
+## 🔧 Troubleshooting
+### Legitimate Pages Being Blocked
+**Problem**: Users can't access legitimate pages like `/en/profile/` due to keyword blocking.
+**Cause**: AIWAF learned legitimate keywords (like "profile") as suspicious from previous traffic.
+**Solution**:
+```bash
+# 1. Clear problematic learned keywords
+python manage.py aiwaf_reset --keywords --confirm
+# 2. Add legitimate keywords to settings
+# In settings.py:
+AIWAF_ALLOWED_PATH_KEYWORDS = [
+    "profile", "user", "account", "dashboard",
+    # Add your site-specific keywords
+]
+# 3. Retrain with enhanced filtering (won't learn legitimate keywords)
+python manage.py detect_and_train
+# 4. Test - legitimate pages should now work!
+```
+### Preventing Future False Positives
+Configure AIWAF to recognize your site's legitimate keywords:
+```python
+# settings.py
+AIWAF_ALLOWED_PATH_KEYWORDS = [
+    # Common legitimate keywords
+    "profile", "user", "account", "settings", "dashboard",
+    "admin", "search", "contact", "about", "help",
+    # Your site-specific keywords
+    "buddycraft", "sc2", "starcraft",  # Gaming site example
+    "shop", "cart", "checkout",        # E-commerce example
+    "blog", "article", "news",         # Content site example
+]
+```
+### Reset Command Options
+```bash
+# Clear everything (safest for troubleshooting)
+python manage.py aiwaf_reset --confirm
+# Clear only problematic keywords
+python manage.py aiwaf_reset --keywords --confirm
+# Clear blocked IPs but keep exemptions
+python manage.py aiwaf_reset --blacklist --confirm
+```
+---
 ## 🧠 How It Works
 | Middleware                         | Purpose                                                         |

{aiwaf-0.1.9.1.8.dist-info → aiwaf-0.1.9.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-aiwaf/__init__.py,sha256=6Skp_JeKxRpqn9klnS2qAtRKuXdwAMCdNd6OVq8AKyk,220
+aiwaf/__init__.py,sha256=Bn2DcnLiYvx-vkOUfIbQtnKUDidKK1rAUNuXuPa36MM,220
 aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
 aiwaf/blacklist_manager.py,sha256=LYCeKFB-7e_C6Bg2WeFJWFIIQlrfRMPuGp30ivrnhQY,1196
 aiwaf/decorators.py,sha256=IUKOdM_gdroffImRZep1g1wT6gNqD10zGwcp28hsJCs,825
-aiwaf/middleware.py,sha256=EMAQA_Gnz0jv4nevlognT921ZeBEro13J_DSv_mQ3Dw,15482
+aiwaf/middleware.py,sha256=D1HavBGJbpPneOtkkCVFddlOQwCdoWcugmHOvn5THDU,22614
 aiwaf/middleware_logger.py,sha256=LWZVDAnjh6CGESirA8eMbhGgJKB7lVDGRQqVroH95Lo,4742
 aiwaf/models.py,sha256=vQxgY19BDVMjoO903UNrTZC1pNoLltMU6wbyWPoAEns,2719
-aiwaf/storage.py,sha256=UlCk-j0xtiEPlC7RFHGz811Dh-GP-Ce5ZSuKQp_GY2k,9565
-aiwaf/trainer.py,sha256=1RPjWVOdGQ3qSrjFopw8HKu7THVTMvF4nNYouij6i_A,10685
+aiwaf/storage.py,sha256=5ImrZMRn3u7HNsPH0fDjWhDrD2tgG2IHVnOXtLz0fk4,10253
+aiwaf/trainer.py,sha256=UHkfrbJI47bGJPCz0Vws6r23WvGpemMHf5ScHWG_I1I,14568
 aiwaf/utils.py,sha256=BJk5vJCYdGPl_4QQiknjhCbkzv5HZCXgFcBJDMJpHok,3390
 aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -14,7 +14,7 @@ aiwaf/management/commands/add_exemption.py,sha256=U_ByfJw1EstAZ8DaSoRb97IGwYzXs0
 aiwaf/management/commands/add_ipexemption.py,sha256=sSf3d9hGK9RqqlBYkCrnrd8KZWGT-derSpoWnEY4H60,952
 aiwaf/management/commands/aiwaf_diagnose.py,sha256=nXFRhq66N4QC3e4scYJ2sUngJce-0yDxtBO3R2BllRM,6134
 aiwaf/management/commands/aiwaf_logging.py,sha256=FCIqULn2tii2vD9VxL7vk3PV4k4vr7kaA00KyaCExYY,7692
-aiwaf/management/commands/aiwaf_reset.py,sha256=wG7EcdPqkxmjF2ivQOmZ7swuvHVJ_OVLgOEijGLvmFs,5586
+aiwaf/management/commands/aiwaf_reset.py,sha256=pcF0zOYDSqjpCwDtk2HYJZLgr76td8OFRENtl20c1dQ,7472
 aiwaf/management/commands/check_dependencies.py,sha256=GOZl00pDwW2cJjDvIaCeB3yWxmeYcJDRTIpmOTLvy2c,37204
 aiwaf/management/commands/clear_blacklist.py,sha256=Tisedg0EVlc3E01mA3hBZQorwMzc5j1cns-oYshja0g,2770
 aiwaf/management/commands/clear_cache.py,sha256=cdnuTgxkhKLqT_6k6yTcEBlREovNRQxAE51ceXlGYMA,647
@@ -28,8 +28,8 @@ aiwaf/management/commands/test_exemption_fix.py,sha256=ngyGaHUCmQQ6y--6j4q1viZJt
 aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
 aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiwaf/templatetags/aiwaf_tags.py,sha256=XXfb7Tl4DjU3Sc40GbqdaqOEtKTUKELBEk58u83wBNw,357
-aiwaf-0.1.9.1.8.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
-aiwaf-0.1.9.1.8.dist-info/METADATA,sha256=MaHb3s4la_-tR6UZ5ht5-jbdvu6PO8mZavNYktvGVMM,22145
-aiwaf-0.1.9.1.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-aiwaf-0.1.9.1.8.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
-aiwaf-0.1.9.1.8.dist-info/RECORD,,
+aiwaf-0.1.9.2.0.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
+aiwaf-0.1.9.2.0.dist-info/METADATA,sha256=HM_8Dh89XWhMKtLDkqrvA7fxzR97F9Ph1sY2bYOk9Mc,26414
+aiwaf-0.1.9.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+aiwaf-0.1.9.2.0.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
+aiwaf-0.1.9.2.0.dist-info/RECORD,,

{aiwaf-0.1.9.1.8.dist-info → aiwaf-0.1.9.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{aiwaf-0.1.9.1.8.dist-info → aiwaf-0.1.9.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{aiwaf-0.1.9.1.8.dist-info → aiwaf-0.1.9.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

aiwaf 0.1.9.1.8__py3-none-any.whl → 0.1.9.2.0__py3-none-any.whl

Potentially problematic release.

aiwaf 0.1.9.1.8py3-none-any.whl → 0.1.9.2.0py3-none-any.whl