aiwaf 0.1.8.6__py3-none-any.whl → 0.1.8.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiwaf might be problematic. Click here for more details.

@@ -1,14 +1,24 @@
1
- from .models import BlacklistEntry
1
+ # aiwaf/blacklist_manager.py
2
+
3
+ from .storage import get_blacklist_store
2
4
 
3
5
  class BlacklistManager:
4
6
  @staticmethod
5
7
  def block(ip, reason):
6
- BlacklistEntry.objects.get_or_create(ip_address=ip, defaults={"reason": reason})
8
+ store = get_blacklist_store()
9
+ store.add_ip(ip, reason)
7
10
 
8
11
  @staticmethod
9
12
  def is_blocked(ip):
10
- return BlacklistEntry.objects.filter(ip_address=ip).exists()
13
+ store = get_blacklist_store()
14
+ return store.is_blocked(ip)
11
15
 
12
16
  @staticmethod
13
17
  def all_blocked():
14
- return BlacklistEntry.objects.all()
18
+ store = get_blacklist_store()
19
+ return store.get_all()
20
+
21
+ @staticmethod
22
+ def unblock(ip):
23
+ store = get_blacklist_store()
24
+ store.remove_ip(ip)
@@ -1,5 +1,5 @@
1
1
  from django.core.management.base import BaseCommand, CommandError
2
- from aiwaf.models import IPExemption
2
+ from aiwaf.storage import get_exemption_store
3
3
 
4
4
  class Command(BaseCommand):
5
5
  help = 'Add an IP address to the IPExemption list (prevents blacklisting)'
@@ -11,12 +11,13 @@ class Command(BaseCommand):
11
11
  def handle(self, *args, **options):
12
12
  ip = options['ip']
13
13
  reason = options['reason']
14
- obj, created = IPExemption.objects.get_or_create(ip_address=ip, defaults={'reason': reason})
15
- if not created:
14
+
15
+ store = get_exemption_store()
16
+
17
+ if store.is_exempted(ip):
16
18
  self.stdout.write(self.style.WARNING(f'IP {ip} is already exempted.'))
17
19
  else:
20
+ store.add_ip(ip, reason)
18
21
  self.stdout.write(self.style.SUCCESS(f'IP {ip} added to exemption list.'))
19
- if reason:
20
- obj.reason = reason
21
- obj.save()
22
- self.stdout.write(self.style.SUCCESS(f'Reason set to: {reason}'))
22
+ if reason:
23
+ self.stdout.write(self.style.SUCCESS(f'Reason: {reason}'))
@@ -0,0 +1,166 @@
1
+ from django.core.management.base import BaseCommand
2
+ from django.conf import settings
3
+ import os
4
+
5
+ class Command(BaseCommand):
6
+ help = 'Manage AI-WAF middleware logging settings and view log status'
7
+
8
+ def add_arguments(self, parser):
9
+ parser.add_argument(
10
+ '--enable',
11
+ action='store_true',
12
+ help='Enable middleware logging (shows settings to add)'
13
+ )
14
+ parser.add_argument(
15
+ '--disable',
16
+ action='store_true',
17
+ help='Disable middleware logging (shows settings to remove)'
18
+ )
19
+ parser.add_argument(
20
+ '--status',
21
+ action='store_true',
22
+ help='Show current middleware logging status'
23
+ )
24
+ parser.add_argument(
25
+ '--clear',
26
+ action='store_true',
27
+ help='Clear/delete middleware log files'
28
+ )
29
+
30
+ def handle(self, *args, **options):
31
+ if options['enable']:
32
+ self._show_enable_instructions()
33
+ elif options['disable']:
34
+ self._show_disable_instructions()
35
+ elif options['clear']:
36
+ self._clear_logs()
37
+ else:
38
+ self._show_status()
39
+
40
+ def _show_status(self):
41
+ """Show current middleware logging configuration"""
42
+ self.stdout.write(self.style.HTTP_INFO("🔍 AI-WAF Middleware Logging Status"))
43
+ self.stdout.write("")
44
+
45
+ # Check settings
46
+ logging_enabled = getattr(settings, 'AIWAF_MIDDLEWARE_LOGGING', False)
47
+ log_file = getattr(settings, 'AIWAF_MIDDLEWARE_LOG', 'aiwaf_requests.log')
48
+ csv_format = getattr(settings, 'AIWAF_MIDDLEWARE_CSV', True)
49
+ csv_file = log_file.replace('.log', '.csv') if csv_format else None
50
+
51
+ # Status
52
+ status_color = self.style.SUCCESS if logging_enabled else self.style.WARNING
53
+ self.stdout.write(f"Status: {status_color('ENABLED' if logging_enabled else 'DISABLED')}")
54
+ self.stdout.write(f"Log File: {log_file}")
55
+ if csv_format:
56
+ self.stdout.write(f"CSV File: {csv_file}")
57
+ self.stdout.write(f"Format: {'CSV' if csv_format else 'Text'}")
58
+ self.stdout.write("")
59
+
60
+ # File existence and sizes
61
+ if logging_enabled:
62
+ self.stdout.write("📁 Log Files:")
63
+
64
+ if csv_format and csv_file:
65
+ if os.path.exists(csv_file):
66
+ size = os.path.getsize(csv_file)
67
+ lines = self._count_csv_lines(csv_file)
68
+ self.stdout.write(f" ✅ {csv_file} ({size:,} bytes, {lines:,} entries)")
69
+ else:
70
+ self.stdout.write(f" ❌ {csv_file} (not found)")
71
+
72
+ if os.path.exists(log_file):
73
+ size = os.path.getsize(log_file)
74
+ self.stdout.write(f" ✅ {log_file} ({size:,} bytes)")
75
+ else:
76
+ self.stdout.write(f" ❌ {log_file} (not found)")
77
+
78
+ # Middleware check
79
+ middleware_list = getattr(settings, 'MIDDLEWARE', [])
80
+ middleware_installed = 'aiwaf.middleware_logger.AIWAFLoggerMiddleware' in middleware_list
81
+
82
+ self.stdout.write("")
83
+ middleware_color = self.style.SUCCESS if middleware_installed else self.style.ERROR
84
+ self.stdout.write(f"Middleware: {middleware_color('INSTALLED' if middleware_installed else 'NOT INSTALLED')}")
85
+
86
+ if logging_enabled and not middleware_installed:
87
+ self.stdout.write(self.style.WARNING("⚠️ Logging is enabled but middleware is not installed!"))
88
+
89
+ def _show_enable_instructions(self):
90
+ """Show instructions for enabling middleware logging"""
91
+ self.stdout.write(self.style.SUCCESS("🚀 Enable AI-WAF Middleware Logging"))
92
+ self.stdout.write("")
93
+ self.stdout.write("Add these settings to your Django settings.py:")
94
+ self.stdout.write("")
95
+ self.stdout.write(self.style.HTTP_INFO("# Enable AI-WAF middleware logging"))
96
+ self.stdout.write(self.style.HTTP_INFO("AIWAF_MIDDLEWARE_LOGGING = True"))
97
+ self.stdout.write(self.style.HTTP_INFO("AIWAF_MIDDLEWARE_LOG = 'aiwaf_requests.log' # Optional"))
98
+ self.stdout.write(self.style.HTTP_INFO("AIWAF_MIDDLEWARE_CSV = True # Optional (default: True)"))
99
+ self.stdout.write("")
100
+ self.stdout.write("Add middleware to MIDDLEWARE list (preferably near the end):")
101
+ self.stdout.write("")
102
+ self.stdout.write(self.style.HTTP_INFO("MIDDLEWARE = ["))
103
+ self.stdout.write(self.style.HTTP_INFO(" # ... your existing middleware ..."))
104
+ self.stdout.write(self.style.HTTP_INFO(" 'aiwaf.middleware_logger.AIWAFLoggerMiddleware',"))
105
+ self.stdout.write(self.style.HTTP_INFO("]"))
106
+ self.stdout.write("")
107
+ self.stdout.write("Benefits:")
108
+ self.stdout.write(" ✅ Fallback when main access logs unavailable")
109
+ self.stdout.write(" ✅ CSV format for easy analysis")
110
+ self.stdout.write(" ✅ Automatic integration with AI-WAF trainer")
111
+ self.stdout.write(" ✅ Captures response times for better detection")
112
+
113
+ def _show_disable_instructions(self):
114
+ """Show instructions for disabling middleware logging"""
115
+ self.stdout.write(self.style.WARNING("⏹️ Disable AI-WAF Middleware Logging"))
116
+ self.stdout.write("")
117
+ self.stdout.write("To disable, update your Django settings.py:")
118
+ self.stdout.write("")
119
+ self.stdout.write(self.style.HTTP_INFO("# Disable AI-WAF middleware logging"))
120
+ self.stdout.write(self.style.HTTP_INFO("AIWAF_MIDDLEWARE_LOGGING = False"))
121
+ self.stdout.write("")
122
+ self.stdout.write("And remove from MIDDLEWARE list:")
123
+ self.stdout.write("")
124
+ self.stdout.write(self.style.HTTP_INFO("MIDDLEWARE = ["))
125
+ self.stdout.write(self.style.HTTP_INFO(" # ... your existing middleware ..."))
126
+ self.stdout.write(self.style.HTTP_INFO(" # 'aiwaf.middleware_logger.AIWAFLoggerMiddleware', # Remove this line"))
127
+ self.stdout.write(self.style.HTTP_INFO("]"))
128
+
129
+ def _clear_logs(self):
130
+ """Clear/delete middleware log files"""
131
+ log_file = getattr(settings, 'AIWAF_MIDDLEWARE_LOG', 'aiwaf_requests.log')
132
+ csv_format = getattr(settings, 'AIWAF_MIDDLEWARE_CSV', True)
133
+ csv_file = log_file.replace('.log', '.csv') if csv_format else None
134
+
135
+ files_deleted = 0
136
+
137
+ # Delete CSV file
138
+ if csv_file and os.path.exists(csv_file):
139
+ try:
140
+ os.remove(csv_file)
141
+ self.stdout.write(self.style.SUCCESS(f"✅ Deleted {csv_file}"))
142
+ files_deleted += 1
143
+ except Exception as e:
144
+ self.stdout.write(self.style.ERROR(f"❌ Failed to delete {csv_file}: {e}"))
145
+
146
+ # Delete text log file
147
+ if os.path.exists(log_file):
148
+ try:
149
+ os.remove(log_file)
150
+ self.stdout.write(self.style.SUCCESS(f"✅ Deleted {log_file}"))
151
+ files_deleted += 1
152
+ except Exception as e:
153
+ self.stdout.write(self.style.ERROR(f"❌ Failed to delete {log_file}: {e}"))
154
+
155
+ if files_deleted == 0:
156
+ self.stdout.write(self.style.WARNING("ℹ️ No log files found to delete"))
157
+ else:
158
+ self.stdout.write(self.style.SUCCESS(f"🗑️ Deleted {files_deleted} log file(s)"))
159
+
160
+ def _count_csv_lines(self, csv_file):
161
+ """Count lines in CSV file (excluding header)"""
162
+ try:
163
+ with open(csv_file, 'r', encoding='utf-8') as f:
164
+ return sum(1 for line in f) - 1 # Subtract header
165
+ except:
166
+ return 0
@@ -1,5 +1,5 @@
1
1
  from django.core.management.base import BaseCommand
2
- from aiwaf.models import BlacklistEntry, IPExemption
2
+ from aiwaf.storage import get_blacklist_store, get_exemption_store
3
3
 
4
4
  class Command(BaseCommand):
5
5
  help = 'Reset AI-WAF by clearing all blacklist and exemption (whitelist) entries'
@@ -26,9 +26,12 @@ class Command(BaseCommand):
26
26
  exemptions_only = options['exemptions_only']
27
27
  confirm = options['confirm']
28
28
 
29
+ blacklist_store = get_blacklist_store()
30
+ exemption_store = get_exemption_store()
31
+
29
32
  # Count current entries
30
- blacklist_count = BlacklistEntry.objects.count()
31
- exemption_count = IPExemption.objects.count()
33
+ blacklist_count = len(blacklist_store.get_all())
34
+ exemption_count = len(exemption_store.get_all())
32
35
 
33
36
  if blacklist_only and exemptions_only:
34
37
  self.stdout.write(self.style.ERROR('Cannot use both --blacklist-only and --exemptions-only flags'))
@@ -61,10 +64,18 @@ class Command(BaseCommand):
61
64
  deleted_counts = {'blacklist': 0, 'exemptions': 0}
62
65
 
63
66
  if clear_blacklist:
64
- deleted_counts['blacklist'], _ = BlacklistEntry.objects.all().delete()
67
+ # Clear blacklist entries
68
+ blacklist_entries = blacklist_store.get_all()
69
+ for entry in blacklist_entries:
70
+ blacklist_store.remove_ip(entry['ip_address'])
71
+ deleted_counts['blacklist'] = len(blacklist_entries)
65
72
 
66
73
  if clear_exemptions:
67
- deleted_counts['exemptions'], _ = IPExemption.objects.all().delete()
74
+ # Clear exemption entries
75
+ exemption_entries = exemption_store.get_all()
76
+ for entry in exemption_entries:
77
+ exemption_store.remove_ip(entry['ip_address'])
78
+ deleted_counts['exemptions'] = len(exemption_entries)
68
79
 
69
80
  # Report results
70
81
  if clear_blacklist and clear_exemptions:
aiwaf/middleware.py CHANGED
@@ -16,8 +16,9 @@ from django.apps import apps
16
16
  from django.urls import get_resolver
17
17
  from .trainer import STATIC_KW, STATUS_IDX, path_exists_in_django
18
18
  from .blacklist_manager import BlacklistManager
19
- from .models import DynamicKeyword, IPExemption
19
+ from .models import IPExemption
20
20
  from .utils import is_exempt, get_ip, is_ip_exempted
21
+ from .storage import get_keyword_store
21
22
 
22
23
  MODEL_PATH = getattr(
23
24
  settings,
@@ -74,15 +75,14 @@ class IPAndKeywordBlockMiddleware:
74
75
  return self.get_response(request)
75
76
  if BlacklistManager.is_blocked(ip):
76
77
  return JsonResponse({"error": "blocked"}, status=403)
78
+
79
+ keyword_store = get_keyword_store()
77
80
  segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
81
+
78
82
  for seg in segments:
79
- obj, _ = DynamicKeyword.objects.get_or_create(keyword=seg)
80
- DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
81
- dynamic_top = list(
82
- DynamicKeyword.objects
83
- .order_by("-count")
84
- .values_list("keyword", flat=True)[: getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)]
85
- )
83
+ keyword_store.add_keyword(seg)
84
+
85
+ dynamic_top = keyword_store.get_top_keywords(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
86
86
  all_kw = set(STATIC_KW) | set(dynamic_top)
87
87
  suspicious_kw = {
88
88
  kw for kw in all_kw
@@ -172,10 +172,11 @@ class AIAnomalyMiddleware(MiddlewareMixin):
172
172
  data.append((now, request.path, response.status_code, resp_time))
173
173
  data = [d for d in data if now - d[0] < self.WINDOW]
174
174
  cache.set(key, data, timeout=self.WINDOW)
175
+
176
+ keyword_store = get_keyword_store()
175
177
  for seg in re.split(r"\W+", request.path.lower()):
176
178
  if len(seg) > 3:
177
- obj, _ = DynamicKeyword.objects.get_or_create(keyword=seg)
178
- DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
179
+ keyword_store.add_keyword(seg)
179
180
 
180
181
  return response
181
182
 
@@ -0,0 +1,160 @@
1
+ # aiwaf/middleware_logger.py
2
+
3
+ import os
4
+ import csv
5
+ import time
6
+ from datetime import datetime
7
+ from django.conf import settings
8
+ from django.utils.deprecation import MiddlewareMixin
9
+ from .utils import get_ip
10
+
11
+ class AIWAFLoggerMiddleware(MiddlewareMixin):
12
+ """
13
+ Middleware that logs requests to a CSV file for AI-WAF training.
14
+ Acts as a fallback when main access logs are unavailable.
15
+ """
16
+
17
+ def __init__(self, get_response):
18
+ super().__init__(get_response)
19
+ self.log_file = getattr(settings, "AIWAF_MIDDLEWARE_LOG", "aiwaf_requests.log")
20
+ self.csv_format = getattr(settings, "AIWAF_MIDDLEWARE_CSV", True)
21
+ self.log_enabled = getattr(settings, "AIWAF_MIDDLEWARE_LOGGING", False)
22
+
23
+ # CSV file path (if using CSV format)
24
+ if self.csv_format and self.log_enabled:
25
+ self.csv_file = self.log_file.replace('.log', '.csv')
26
+ self._ensure_csv_header()
27
+
28
+ def _ensure_csv_header(self):
29
+ """Ensure CSV file has proper header row"""
30
+ if not os.path.exists(self.csv_file):
31
+ os.makedirs(os.path.dirname(self.csv_file), exist_ok=True) if os.path.dirname(self.csv_file) else None
32
+ with open(self.csv_file, 'w', newline='', encoding='utf-8') as f:
33
+ writer = csv.writer(f)
34
+ writer.writerow([
35
+ 'timestamp', 'ip_address', 'method', 'path', 'status_code',
36
+ 'response_time', 'user_agent', 'referer', 'content_length'
37
+ ])
38
+
39
+ def process_request(self, request):
40
+ """Store request start time"""
41
+ request._aiwaf_start_time = time.time()
42
+ return None
43
+
44
+ def process_response(self, request, response):
45
+ """Log the completed request"""
46
+ if not self.log_enabled:
47
+ return response
48
+
49
+ # Calculate response time
50
+ start_time = getattr(request, '_aiwaf_start_time', time.time())
51
+ response_time = time.time() - start_time
52
+
53
+ # Extract request data
54
+ log_data = {
55
+ 'timestamp': datetime.now().strftime('%d/%b/%Y:%H:%M:%S +0000'),
56
+ 'ip_address': get_ip(request),
57
+ 'method': request.method,
58
+ 'path': request.path,
59
+ 'status_code': response.status_code,
60
+ 'response_time': f"{response_time:.3f}",
61
+ 'user_agent': request.META.get('HTTP_USER_AGENT', '-'),
62
+ 'referer': request.META.get('HTTP_REFERER', '-'),
63
+ 'content_length': response.get('Content-Length', '-')
64
+ }
65
+
66
+ if self.csv_format:
67
+ self._log_to_csv(log_data)
68
+ else:
69
+ self._log_to_text(log_data)
70
+
71
+ return response
72
+
73
+ def _log_to_csv(self, data):
74
+ """Write log entry to CSV file"""
75
+ try:
76
+ with open(self.csv_file, 'a', newline='', encoding='utf-8') as f:
77
+ writer = csv.writer(f)
78
+ writer.writerow([
79
+ data['timestamp'], data['ip_address'], data['method'],
80
+ data['path'], data['status_code'], data['response_time'],
81
+ data['user_agent'], data['referer'], data['content_length']
82
+ ])
83
+ except Exception as e:
84
+ # Fail silently to avoid breaking the application
85
+ pass
86
+
87
+ def _log_to_text(self, data):
88
+ """Write log entry in common log format"""
89
+ try:
90
+ # Common Log Format with response time
91
+ log_line = f'{data["ip_address"]} - - [{data["timestamp"]}] "{data["method"]} {data["path"]} HTTP/1.1" {data["status_code"]} {data["content_length"]} "{data["referer"]}" "{data["user_agent"]}" response-time={data["response_time"]}\n'
92
+
93
+ with open(self.log_file, 'a', encoding='utf-8') as f:
94
+ f.write(log_line)
95
+ except Exception as e:
96
+ # Fail silently to avoid breaking the application
97
+ pass
98
+
99
+
100
+ class AIWAFCSVLogParser:
101
+ """
102
+ Parser for AI-WAF CSV logs that converts them to the format expected by trainer.py
103
+ """
104
+
105
+ @staticmethod
106
+ def parse_csv_log(csv_file_path):
107
+ """
108
+ Parse CSV log file and return records in the format expected by trainer.py
109
+ Returns list of dictionaries with keys: ip, timestamp, path, status, referer, user_agent, response_time
110
+ """
111
+ records = []
112
+
113
+ if not os.path.exists(csv_file_path):
114
+ return records
115
+
116
+ try:
117
+ with open(csv_file_path, 'r', newline='', encoding='utf-8') as f:
118
+ reader = csv.DictReader(f)
119
+ for row in reader:
120
+ try:
121
+ # Convert timestamp to datetime object
122
+ timestamp = datetime.strptime(row['timestamp'], '%d/%b/%Y:%H:%M:%S +0000')
123
+
124
+ record = {
125
+ 'ip': row['ip_address'],
126
+ 'timestamp': timestamp,
127
+ 'path': row['path'],
128
+ 'status': row['status_code'],
129
+ 'referer': row['referer'],
130
+ 'user_agent': row['user_agent'],
131
+ 'response_time': float(row['response_time'])
132
+ }
133
+ records.append(record)
134
+ except (ValueError, KeyError) as e:
135
+ # Skip malformed rows
136
+ continue
137
+ except Exception as e:
138
+ # Return empty list if file can't be read
139
+ pass
140
+
141
+ return records
142
+
143
+ @staticmethod
144
+ def get_log_lines_for_trainer(csv_file_path):
145
+ """
146
+ Convert CSV log to format compatible with trainer.py's _read_all_logs()
147
+ Returns list of log line strings
148
+ """
149
+ records = AIWAFCSVLogParser.parse_csv_log(csv_file_path)
150
+ log_lines = []
151
+
152
+ for record in records:
153
+ # Convert back to common log format that trainer.py expects
154
+ timestamp_str = record['timestamp'].strftime('%d/%b/%Y:%H:%M:%S +0000')
155
+ content_length = '-' # We don't track this in our format
156
+
157
+ log_line = f'{record["ip"]} - - [{timestamp_str}] "GET {record["path"]} HTTP/1.1" {record["status"]} {content_length} "{record["referer"]}" "{record["user_agent"]}" response-time={record["response_time"]:.3f}'
158
+ log_lines.append(log_line)
159
+
160
+ return log_lines
aiwaf/storage.py CHANGED
@@ -2,19 +2,33 @@ import os, csv, gzip, glob
2
2
  import numpy as np
3
3
  import pandas as pd
4
4
  from django.conf import settings
5
- from .models import FeatureSample
5
+ from django.utils import timezone
6
+ from .models import FeatureSample, BlacklistEntry, IPExemption, DynamicKeyword
7
+
8
+ # Configuration
9
+ STORAGE_MODE = getattr(settings, "AIWAF_STORAGE_MODE", "models") # "models" or "csv"
10
+ CSV_DATA_DIR = getattr(settings, "AIWAF_CSV_DATA_DIR", "aiwaf_data")
11
+ FEATURE_CSV = getattr(settings, "AIWAF_CSV_PATH", os.path.join(CSV_DATA_DIR, "access_samples.csv"))
12
+ BLACKLIST_CSV = os.path.join(CSV_DATA_DIR, "blacklist.csv")
13
+ EXEMPTION_CSV = os.path.join(CSV_DATA_DIR, "exemptions.csv")
14
+ KEYWORDS_CSV = os.path.join(CSV_DATA_DIR, "keywords.csv")
6
15
 
7
- DATA_FILE = getattr(settings, "AIWAF_CSV_PATH", "access_samples.csv")
8
16
  CSV_HEADER = [
9
17
  "ip","path_len","kw_hits","resp_time",
10
18
  "status_idx","burst_count","total_404","label"
11
19
  ]
12
20
 
21
+ def ensure_csv_directory():
22
+ """Ensure the CSV data directory exists"""
23
+ if STORAGE_MODE == "csv" and not os.path.exists(CSV_DATA_DIR):
24
+ os.makedirs(CSV_DATA_DIR)
25
+
13
26
  class CsvFeatureStore:
14
27
  @staticmethod
15
28
  def persist_rows(rows):
16
- new_file = not os.path.exists(DATA_FILE)
17
- with open(DATA_FILE, "a", newline="", encoding="utf-8") as f:
29
+ ensure_csv_directory()
30
+ new_file = not os.path.exists(FEATURE_CSV)
31
+ with open(FEATURE_CSV, "a", newline="", encoding="utf-8") as f:
18
32
  w = csv.writer(f)
19
33
  if new_file:
20
34
  w.writerow(CSV_HEADER)
@@ -22,10 +36,10 @@ class CsvFeatureStore:
22
36
 
23
37
  @staticmethod
24
38
  def load_matrix():
25
- if not os.path.exists(DATA_FILE):
39
+ if not os.path.exists(FEATURE_CSV):
26
40
  return np.empty((0,6))
27
41
  df = pd.read_csv(
28
- DATA_FILE,
42
+ FEATURE_CSV,
29
43
  names=CSV_HEADER,
30
44
  skiprows=1,
31
45
  engine="python",
@@ -59,3 +73,279 @@ def get_store():
59
73
  if getattr(settings, "AIWAF_FEATURE_STORE", "csv") == "db":
60
74
  return DbFeatureStore
61
75
  return CsvFeatureStore
76
+
77
+
78
+ # ============= CSV Storage Classes =============
79
+
80
+ class CsvBlacklistStore:
81
+ """CSV-based storage for IP blacklist entries"""
82
+
83
+ @staticmethod
84
+ def add_ip(ip_address, reason):
85
+ ensure_csv_directory()
86
+ # Check if IP already exists
87
+ if CsvBlacklistStore.is_blocked(ip_address):
88
+ return
89
+
90
+ # Add new entry
91
+ new_file = not os.path.exists(BLACKLIST_CSV)
92
+ with open(BLACKLIST_CSV, "a", newline="", encoding="utf-8") as f:
93
+ writer = csv.writer(f)
94
+ if new_file:
95
+ writer.writerow(["ip_address", "reason", "created_at"])
96
+ writer.writerow([ip_address, reason, timezone.now().isoformat()])
97
+
98
+ @staticmethod
99
+ def is_blocked(ip_address):
100
+ if not os.path.exists(BLACKLIST_CSV):
101
+ return False
102
+
103
+ with open(BLACKLIST_CSV, "r", newline="", encoding="utf-8") as f:
104
+ reader = csv.DictReader(f)
105
+ for row in reader:
106
+ if row["ip_address"] == ip_address:
107
+ return True
108
+ return False
109
+
110
+ @staticmethod
111
+ def get_all():
112
+ """Return list of dictionaries with blacklist entries"""
113
+ if not os.path.exists(BLACKLIST_CSV):
114
+ return []
115
+
116
+ entries = []
117
+ with open(BLACKLIST_CSV, "r", newline="", encoding="utf-8") as f:
118
+ reader = csv.DictReader(f)
119
+ for row in reader:
120
+ entries.append(row)
121
+ return entries
122
+
123
+ @staticmethod
124
+ def remove_ip(ip_address):
125
+ if not os.path.exists(BLACKLIST_CSV):
126
+ return
127
+
128
+ # Read all entries except the one to remove
129
+ entries = []
130
+ with open(BLACKLIST_CSV, "r", newline="", encoding="utf-8") as f:
131
+ reader = csv.DictReader(f)
132
+ entries = [row for row in reader if row["ip_address"] != ip_address]
133
+
134
+ # Write back the filtered entries
135
+ with open(BLACKLIST_CSV, "w", newline="", encoding="utf-8") as f:
136
+ if entries:
137
+ writer = csv.DictWriter(f, fieldnames=["ip_address", "reason", "created_at"])
138
+ writer.writeheader()
139
+ writer.writerows(entries)
140
+
141
+
142
+ class CsvExemptionStore:
143
+ """CSV-based storage for IP exemption entries"""
144
+
145
+ @staticmethod
146
+ def add_ip(ip_address, reason=""):
147
+ ensure_csv_directory()
148
+ # Check if IP already exists
149
+ if CsvExemptionStore.is_exempted(ip_address):
150
+ return
151
+
152
+ # Add new entry
153
+ new_file = not os.path.exists(EXEMPTION_CSV)
154
+ with open(EXEMPTION_CSV, "a", newline="", encoding="utf-8") as f:
155
+ writer = csv.writer(f)
156
+ if new_file:
157
+ writer.writerow(["ip_address", "reason", "created_at"])
158
+ writer.writerow([ip_address, reason, timezone.now().isoformat()])
159
+
160
+ @staticmethod
161
+ def is_exempted(ip_address):
162
+ if not os.path.exists(EXEMPTION_CSV):
163
+ return False
164
+
165
+ with open(EXEMPTION_CSV, "r", newline="", encoding="utf-8") as f:
166
+ reader = csv.DictReader(f)
167
+ for row in reader:
168
+ if row["ip_address"] == ip_address:
169
+ return True
170
+ return False
171
+
172
+ @staticmethod
173
+ def get_all():
174
+ """Return list of dictionaries with exemption entries"""
175
+ if not os.path.exists(EXEMPTION_CSV):
176
+ return []
177
+
178
+ entries = []
179
+ with open(EXEMPTION_CSV, "r", newline="", encoding="utf-8") as f:
180
+ reader = csv.DictReader(f)
181
+ for row in reader:
182
+ entries.append(row)
183
+ return entries
184
+
185
+ @staticmethod
186
+ def remove_ip(ip_address):
187
+ if not os.path.exists(EXEMPTION_CSV):
188
+ return
189
+
190
+ # Read all entries except the one to remove
191
+ entries = []
192
+ with open(EXEMPTION_CSV, "r", newline="", encoding="utf-8") as f:
193
+ reader = csv.DictReader(f)
194
+ entries = [row for row in reader if row["ip_address"] != ip_address]
195
+
196
+ # Write back the filtered entries
197
+ with open(EXEMPTION_CSV, "w", newline="", encoding="utf-8") as f:
198
+ if entries:
199
+ writer = csv.DictWriter(f, fieldnames=["ip_address", "reason", "created_at"])
200
+ writer.writeheader()
201
+ writer.writerows(entries)
202
+
203
+
204
+ class CsvKeywordStore:
205
+ """CSV-based storage for dynamic keywords"""
206
+
207
+ @staticmethod
208
+ def add_keyword(keyword, count=1):
209
+ ensure_csv_directory()
210
+
211
+ # Read existing keywords
212
+ keywords = CsvKeywordStore._load_keywords()
213
+
214
+ # Update or add keyword
215
+ keywords[keyword] = keywords.get(keyword, 0) + count
216
+
217
+ # Save back to file
218
+ CsvKeywordStore._save_keywords(keywords)
219
+
220
+ @staticmethod
221
+ def get_top_keywords(limit=10):
222
+ keywords = CsvKeywordStore._load_keywords()
223
+ # Sort by count in descending order and return top N
224
+ sorted_keywords = sorted(keywords.items(), key=lambda x: x[1], reverse=True)
225
+ return [kw for kw, count in sorted_keywords[:limit]]
226
+
227
+ @staticmethod
228
+ def remove_keyword(keyword):
229
+ keywords = CsvKeywordStore._load_keywords()
230
+ if keyword in keywords:
231
+ del keywords[keyword]
232
+ CsvKeywordStore._save_keywords(keywords)
233
+
234
+ @staticmethod
235
+ def clear_all():
236
+ if os.path.exists(KEYWORDS_CSV):
237
+ os.remove(KEYWORDS_CSV)
238
+
239
+ @staticmethod
240
+ def _load_keywords():
241
+ """Load keywords from CSV file as a dictionary"""
242
+ if not os.path.exists(KEYWORDS_CSV):
243
+ return {}
244
+
245
+ keywords = {}
246
+ with open(KEYWORDS_CSV, "r", newline="", encoding="utf-8") as f:
247
+ reader = csv.DictReader(f)
248
+ for row in reader:
249
+ keywords[row["keyword"]] = int(row["count"])
250
+ return keywords
251
+
252
+ @staticmethod
253
+ def _save_keywords(keywords):
254
+ """Save keywords dictionary to CSV file"""
255
+ with open(KEYWORDS_CSV, "w", newline="", encoding="utf-8") as f:
256
+ writer = csv.writer(f)
257
+ writer.writerow(["keyword", "count", "last_updated"])
258
+ for keyword, count in keywords.items():
259
+ writer.writerow([keyword, count, timezone.now().isoformat()])
260
+
261
+
262
+ # ============= Storage Factory Functions =============
263
+
264
+ def get_blacklist_store():
265
+ """Return appropriate blacklist storage class based on settings"""
266
+ if STORAGE_MODE == "csv":
267
+ return CsvBlacklistStore
268
+ else:
269
+ # Return a wrapper for Django models
270
+ return ModelBlacklistStore
271
+
272
+
273
+ def get_exemption_store():
274
+ """Return appropriate exemption storage class based on settings"""
275
+ if STORAGE_MODE == "csv":
276
+ return CsvExemptionStore
277
+ else:
278
+ return ModelExemptionStore
279
+
280
+
281
+ def get_keyword_store():
282
+ """Return appropriate keyword storage class based on settings"""
283
+ if STORAGE_MODE == "csv":
284
+ return CsvKeywordStore
285
+ else:
286
+ return ModelKeywordStore
287
+
288
+
289
+ # ============= Django Model Wrappers =============
290
+
291
+ class ModelBlacklistStore:
292
+ """Django model-based storage for blacklist entries"""
293
+
294
+ @staticmethod
295
+ def add_ip(ip_address, reason):
296
+ BlacklistEntry.objects.get_or_create(ip_address=ip_address, defaults={"reason": reason})
297
+
298
+ @staticmethod
299
+ def is_blocked(ip_address):
300
+ return BlacklistEntry.objects.filter(ip_address=ip_address).exists()
301
+
302
+ @staticmethod
303
+ def get_all():
304
+ return list(BlacklistEntry.objects.values("ip_address", "reason", "created_at"))
305
+
306
+ @staticmethod
307
+ def remove_ip(ip_address):
308
+ BlacklistEntry.objects.filter(ip_address=ip_address).delete()
309
+
310
+
311
+ class ModelExemptionStore:
312
+ """Django model-based storage for exemption entries"""
313
+
314
+ @staticmethod
315
+ def add_ip(ip_address, reason=""):
316
+ IPExemption.objects.get_or_create(ip_address=ip_address, defaults={"reason": reason})
317
+
318
+ @staticmethod
319
+ def is_exempted(ip_address):
320
+ return IPExemption.objects.filter(ip_address=ip_address).exists()
321
+
322
+ @staticmethod
323
+ def get_all():
324
+ return list(IPExemption.objects.values("ip_address", "reason", "created_at"))
325
+
326
+ @staticmethod
327
+ def remove_ip(ip_address):
328
+ IPExemption.objects.filter(ip_address=ip_address).delete()
329
+
330
+
331
+ class ModelKeywordStore:
332
+ """Django model-based storage for dynamic keywords"""
333
+
334
+ @staticmethod
335
+ def add_keyword(keyword, count=1):
336
+ obj, created = DynamicKeyword.objects.get_or_create(keyword=keyword, defaults={"count": count})
337
+ if not created:
338
+ obj.count += count
339
+ obj.save()
340
+
341
+ @staticmethod
342
+ def get_top_keywords(limit=10):
343
+ return list(DynamicKeyword.objects.order_by("-count").values_list("keyword", flat=True)[:limit])
344
+
345
+ @staticmethod
346
+ def remove_keyword(keyword):
347
+ DynamicKeyword.objects.filter(keyword=keyword).delete()
348
+
349
+ @staticmethod
350
+ def clear_all():
351
+ DynamicKeyword.objects.all().delete()
aiwaf/trainer.py CHANGED
@@ -14,6 +14,7 @@ from django.conf import settings
14
14
  from django.apps import apps
15
15
  from django.db.models import F
16
16
  from .utils import is_exempt_path
17
+ from .storage import get_blacklist_store, get_exemption_store, get_keyword_store
17
18
 
18
19
  # ─────────── Configuration ───────────
19
20
  LOG_PATH = settings.AIWAF_ACCESS_LOG
@@ -28,11 +29,6 @@ _LOG_RX = re.compile(
28
29
  )
29
30
 
30
31
 
31
- BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
32
- DynamicKeyword = apps.get_model("aiwaf", "DynamicKeyword")
33
- IPExemption = apps.get_model("aiwaf", "IPExemption")
34
-
35
-
36
32
  def path_exists_in_django(path: str) -> bool:
37
33
  from django.urls import get_resolver
38
34
  from django.urls.resolvers import URLResolver
@@ -54,27 +50,46 @@ def path_exists_in_django(path: str) -> bool:
54
50
 
55
51
 
56
52
  def remove_exempt_keywords() -> None:
53
+ keyword_store = get_keyword_store()
57
54
  exempt_tokens = set()
55
+
58
56
  for path in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
59
57
  for seg in re.split(r"\W+", path.strip("/").lower()):
60
58
  if len(seg) > 3:
61
59
  exempt_tokens.add(seg)
62
- if exempt_tokens:
63
- DynamicKeyword.objects.filter(keyword__in=exempt_tokens).delete()
60
+
61
+ # Remove exempt tokens from keyword storage
62
+ for token in exempt_tokens:
63
+ keyword_store.remove_keyword(token)
64
64
 
65
65
 
66
66
  def _read_all_logs() -> list[str]:
67
67
  lines = []
68
+
69
+ # First try to read from main access log
68
70
  if LOG_PATH and os.path.exists(LOG_PATH):
69
71
  with open(LOG_PATH, "r", errors="ignore") as f:
70
72
  lines.extend(f.readlines())
71
- for p in sorted(glob.glob(f"{LOG_PATH}.*")):
72
- opener = gzip.open if p.endswith(".gz") else open
73
- try:
74
- with opener(p, "rt", errors="ignore") as f:
75
- lines.extend(f.readlines())
76
- except OSError:
77
- continue
73
+ for p in sorted(glob.glob(f"{LOG_PATH}.*")):
74
+ opener = gzip.open if p.endswith(".gz") else open
75
+ try:
76
+ with opener(p, "rt", errors="ignore") as f:
77
+ lines.extend(f.readlines())
78
+ except OSError:
79
+ continue
80
+
81
+ # If no lines found from main log, try AI-WAF middleware CSV log
82
+ if not lines:
83
+ middleware_csv = getattr(settings, "AIWAF_MIDDLEWARE_LOG", "aiwaf_requests.log").replace('.log', '.csv')
84
+ if os.path.exists(middleware_csv):
85
+ try:
86
+ from .middleware_logger import AIWAFCSVLogParser
87
+ csv_lines = AIWAFCSVLogParser.get_log_lines_for_trainer(middleware_csv)
88
+ lines.extend(csv_lines)
89
+ print(f"📋 Using AI-WAF middleware CSV log: {middleware_csv} ({len(csv_lines)} entries)")
90
+ except Exception as e:
91
+ print(f"⚠️ Failed to read middleware CSV log: {e}")
92
+
78
93
  return lines
79
94
 
80
95
 
@@ -98,10 +113,15 @@ def _parse(line: str) -> dict | None:
98
113
 
99
114
  def train() -> None:
100
115
  remove_exempt_keywords()
101
- # Remove any IPs in IPExemption from the blacklist
102
- exempt_ips = set(IPExemption.objects.values_list("ip_address", flat=True))
103
- if exempt_ips:
104
- BlacklistEntry.objects.filter(ip_address__in=exempt_ips).delete()
116
+
117
+ # Remove any IPs in IPExemption from the blacklist using storage system
118
+ exemption_store = get_exemption_store()
119
+ blacklist_store = get_blacklist_store()
120
+
121
+ exempted_ips = [entry['ip_address'] for entry in exemption_store.get_all()]
122
+ for ip in exempted_ips:
123
+ blacklist_store.remove_ip(ip)
124
+
105
125
  raw_lines = _read_all_logs()
106
126
  if not raw_lines:
107
127
  print("No log lines found – check AIWAF_ACCESS_LOG setting.")
@@ -133,10 +153,8 @@ def train() -> None:
133
153
 
134
154
  # Don't block if majority of 404s are on login paths
135
155
  if count > login_404s: # More non-login 404s than login 404s
136
- BlacklistEntry.objects.get_or_create(
137
- ip_address=ip,
138
- defaults={"reason": f"Excessive 404s (≥6 non-login, {count}/{total_404s})"}
139
- )
156
+ blacklist_store = get_blacklist_store()
157
+ blacklist_store.add_ip(ip, f"Excessive 404s (≥6 non-login, {count}/{total_404s})")
140
158
 
141
159
  feature_dicts = []
142
160
  for r in parsed:
@@ -187,10 +205,13 @@ def train() -> None:
187
205
  if anomalous_ips:
188
206
  print(f"⚠️ Detected {len(anomalous_ips)} potentially anomalous IPs during training")
189
207
 
208
+ exemption_store = get_exemption_store()
209
+ blacklist_store = get_blacklist_store()
190
210
  blocked_count = 0
211
+
191
212
  for ip in anomalous_ips:
192
213
  # Skip if IP is exempted
193
- if IPExemption.objects.filter(ip_address=ip).exists():
214
+ if exemption_store.is_exempted(ip):
194
215
  continue
195
216
 
196
217
  # Get this IP's behavior from the data
@@ -213,10 +234,7 @@ def train() -> None:
213
234
  continue
214
235
 
215
236
  # Block if it shows clear signs of malicious behavior
216
- BlacklistEntry.objects.get_or_create(
217
- ip_address=ip,
218
- defaults={"reason": f"AI anomaly + suspicious patterns (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})"}
219
- )
237
+ blacklist_store.add_ip(ip, f"AI anomaly + suspicious patterns (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})")
220
238
  blocked_count += 1
221
239
  print(f" - {ip}: Blocked for suspicious behavior (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})")
222
240
 
@@ -230,8 +248,10 @@ def train() -> None:
230
248
  if len(seg) > 3 and seg not in STATIC_KW:
231
249
  tokens[seg] += 1
232
250
 
233
- for kw, cnt in tokens.most_common(10):
234
- obj, _ = DynamicKeyword.objects.get_or_create(keyword=kw)
235
- DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + cnt)
251
+ keyword_store = get_keyword_store()
252
+ top_tokens = tokens.most_common(10)
253
+
254
+ for kw, cnt in top_tokens:
255
+ keyword_store.add_keyword(kw, cnt)
236
256
 
237
- print(f"DynamicKeyword DB updated with top tokens: {[kw for kw, _ in tokens.most_common(10)]}")
257
+ print(f"DynamicKeyword storage updated with top tokens: {[kw for kw, _ in top_tokens]}")
aiwaf/utils.py CHANGED
@@ -4,7 +4,7 @@ import glob
4
4
  import gzip
5
5
  from datetime import datetime
6
6
  from django.conf import settings
7
- from .models import IPExemption
7
+ from .storage import get_exemption_store
8
8
 
9
9
  _LOG_RX = re.compile(
10
10
  r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(GET|POST) (.*?) HTTP/.*?" (\d{3}).*?"(.*?)" "(.*?)"'
@@ -53,7 +53,8 @@ def parse_log_line(line):
53
53
 
54
54
  def is_ip_exempted(ip):
55
55
  """Check if IP is in exemption list"""
56
- return IPExemption.objects.filter(ip_address=ip).exists()
56
+ store = get_exemption_store()
57
+ return store.is_exempted(ip)
57
58
 
58
59
  def is_view_exempt(request):
59
60
  """Check if the current view is marked as AI-WAF exempt"""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiwaf
3
- Version: 0.1.8.6
3
+ Version: 0.1.8.8
4
4
  Summary: AI-powered Web Application Firewall
5
5
  Home-page: https://github.com/aayushgauba/aiwaf
6
6
  Author: Aayush Gauba
@@ -83,7 +83,28 @@ aiwaf/
83
83
  - Submit forms faster than `AIWAF_MIN_FORM_TIME` seconds (default: 1 second)
84
84
 
85
85
  - **UUID Tampering Protection**
86
- Blocks guessed or invalid UUIDs that dont resolve to real models.
86
+ Blocks guessed or invalid UUIDs that don't resolve to real models.
87
+
88
+ - **Built-in Request Logger**
89
+ Optional middleware logger that captures requests to CSV:
90
+ - **Automatic fallback** when main access logs unavailable
91
+ - **CSV format** for easy analysis and training
92
+ - **Captures response times** for better anomaly detection
93
+ - **Zero configuration** - works out of the box
94
+
95
+
96
+ **Exempt Path & IP Awareness**
97
+
98
+ **Exempt Paths:**
99
+ AI‑WAF automatically exempts common login paths (`/admin/`, `/login/`, `/accounts/login/`, etc.) from all blocking mechanisms. You can add additional exempt paths in your Django `settings.py`:
100
+
101
+ ```python
102
+ AIWAF_EXEMPT_PATHS = [
103
+ "/api/webhooks/",
104
+ "/health/",
105
+ "/special-endpoint/",
106
+ ]
107
+ ```
87
108
 
88
109
 
89
110
  **Exempt Path & IP Awareness**
@@ -188,6 +209,67 @@ AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
188
209
 
189
210
  ---
190
211
 
212
+ ### Storage Configuration
213
+
214
+ **Choose storage backend:**
215
+
216
+ ```python
217
+ # Use Django models (default) - requires database tables
218
+ AIWAF_STORAGE_MODE = "models"
219
+
220
+ # OR use CSV files - no database required
221
+ AIWAF_STORAGE_MODE = "csv"
222
+ AIWAF_CSV_DATA_DIR = "aiwaf_data" # Directory for CSV files
223
+ ```
224
+
225
+ **CSV Mode Features:**
226
+ - No database migrations required
227
+ - Files stored in `aiwaf_data/` directory:
228
+ - `blacklist.csv` - Blocked IP addresses
229
+ - `exemptions.csv` - Exempt IP addresses
230
+ - `keywords.csv` - Dynamic keywords
231
+ - `access_samples.csv` - Feature samples for ML training
232
+ - Perfect for lightweight deployments or when you prefer file-based storage
233
+ - Management commands work identically in both modes
234
+
235
+ ---
236
+
237
+ ### Built-in Request Logger (Optional)
238
+
239
+ Enable AI-WAF's built-in request logger as a fallback when main access logs aren't available:
240
+
241
+ ```python
242
+ # Enable middleware logging
243
+ AIWAF_MIDDLEWARE_LOGGING = True # Enable/disable logging
244
+ AIWAF_MIDDLEWARE_LOG = "aiwaf_requests.log" # Log file path
245
+ AIWAF_MIDDLEWARE_CSV = True # Use CSV format (recommended)
246
+ ```
247
+
248
+ **Then add middleware to MIDDLEWARE list:**
249
+
250
+ ```python
251
+ MIDDLEWARE = [
252
+ # ... your existing middleware ...
253
+ 'aiwaf.middleware_logger.AIWAFLoggerMiddleware', # Add near the end
254
+ ]
255
+ ```
256
+
257
+ **Manage middleware logging:**
258
+
259
+ ```bash
260
+ python manage.py aiwaf_logging --status # Check logging status
261
+ python manage.py aiwaf_logging --enable # Show setup instructions
262
+ python manage.py aiwaf_logging --clear # Clear log files
263
+ ```
264
+
265
+ **Benefits:**
266
+ - **Automatic fallback** when `AIWAF_ACCESS_LOG` unavailable
267
+ - **CSV format** with precise timestamps and response times
268
+ - **Zero configuration** - trainer automatically detects and uses CSV logs
269
+ - **Lightweight** - fails silently to avoid breaking your application
270
+
271
+ ---
272
+
191
273
  ### Optional (defaults shown)
192
274
 
193
275
  ```python
@@ -219,14 +301,40 @@ Add in **this** order to your `MIDDLEWARE` list:
219
301
  ```python
220
302
  MIDDLEWARE = [
221
303
  "aiwaf.middleware.IPAndKeywordBlockMiddleware",
222
- "aiwaf.middleware.RateLimitMiddleware",
304
+ "aiwaf.middleware.RateLimitMiddleware",
223
305
  "aiwaf.middleware.AIAnomalyMiddleware",
224
306
  "aiwaf.middleware.HoneypotTimingMiddleware",
225
307
  "aiwaf.middleware.UUIDTamperMiddleware",
226
308
  # ... other middleware ...
309
+ "aiwaf.middleware_logger.AIWAFLoggerMiddleware", # Optional: Add if using built-in logger
227
310
  ]
228
311
  ```
229
312
 
313
+ > **⚠️ Order matters!** AI-WAF protection middleware should come early. The logger middleware should come near the end to capture final response data.
314
+
315
+ ---
316
+
317
+ ## Running Detection & Training
318
+
319
+ ```bash
320
+ python manage.py detect_and_train
321
+ ```
322
+
323
+ ### What happens:
324
+ 1. Read access logs (incl. rotated or gzipped) **OR** AI-WAF middleware CSV logs
325
+ 2. Auto‑block IPs with ≥ 6 total 404s
326
+ 3. Extract features & train IsolationForest
327
+ 4. Save `model.pkl`
328
+ 5. Extract top 10 dynamic keywords from 4xx/5xx
329
+ 6. Remove any keywords associated with newly exempt paths
330
+
331
+ **Note:** If main access log (`AIWAF_ACCESS_LOG`) is unavailable, trainer automatically falls back to AI-WAF middleware CSV logs.
332
+
333
+ ---
334
+
335
+ ## 🧠 How It Works
336
+ ```
337
+
230
338
  ---
231
339
 
232
340
  ## Running Detection & Training
@@ -0,0 +1,24 @@
1
+ aiwaf/__init__.py,sha256=nQFpJ1YpX48snzLjEQCf8zD2YNh8v0b_kPTrXx8uBYc,46
2
+ aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
3
+ aiwaf/blacklist_manager.py,sha256=92ltIrFfv8WOC4CXwvNVZYfivkRZHGNg3E2QAbHQipQ,550
4
+ aiwaf/decorators.py,sha256=IUKOdM_gdroffImRZep1g1wT6gNqD10zGwcp28hsJCs,825
5
+ aiwaf/middleware.py,sha256=1JPrc0npI_a5bnB-thN0ME1ehfTbWBl1j9wTndZwRdQ,9505
6
+ aiwaf/middleware_logger.py,sha256=uTYTvIc4Mv1pjY50aXaqQ5cWAO9qqquijAyVMs1KWlM,6517
7
+ aiwaf/models.py,sha256=XaG1pd_oZu3y-fw66u4wblGlWcUY9gvsTNKGD0kQk7Y,1672
8
+ aiwaf/storage.py,sha256=Z0KWArfLmOHnvUcL5aVx8W_aHMr-qoEW8FVGrM23BvA,11639
9
+ aiwaf/trainer.py,sha256=bgVoBewnNVMJdgxcNchfhsPOnFXxStoBOqNhFYnpsqs,9244
10
+ aiwaf/utils.py,sha256=BJk5vJCYdGPl_4QQiknjhCbkzv5HZCXgFcBJDMJpHok,3390
11
+ aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ aiwaf/management/commands/add_ipexemption.py,sha256=srgdVPDJtF7G9GGIqaZ7L3qTuNheoS_uwlhlRO4W2bc,945
14
+ aiwaf/management/commands/aiwaf_logging.py,sha256=FCIqULn2tii2vD9VxL7vk3PV4k4vr7kaA00KyaCExYY,7692
15
+ aiwaf/management/commands/aiwaf_reset.py,sha256=0FIBqpZS8xgFFvAKJ-0zAC_-QNQwRkOHpXb8N-OdFr8,3740
16
+ aiwaf/management/commands/detect_and_train.py,sha256=-o-LZ7QZ5GeJPCekryox1DGXKMmFEkwwrcDsiM166K0,269
17
+ aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
18
+ aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ aiwaf/templatetags/aiwaf_tags.py,sha256=XXfb7Tl4DjU3Sc40GbqdaqOEtKTUKELBEk58u83wBNw,357
20
+ aiwaf-0.1.8.8.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
21
+ aiwaf-0.1.8.8.dist-info/METADATA,sha256=851Url25O97G0KGi3gHFF-zYSaI91BHiK7CbKJrLbk0,11261
22
+ aiwaf-0.1.8.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
+ aiwaf-0.1.8.8.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
24
+ aiwaf-0.1.8.8.dist-info/RECORD,,
@@ -1,22 +0,0 @@
1
- aiwaf/__init__.py,sha256=nQFpJ1YpX48snzLjEQCf8zD2YNh8v0b_kPTrXx8uBYc,46
2
- aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
3
- aiwaf/blacklist_manager.py,sha256=sM6uTH7zD6MOPGb0kzqV2aFut2vxKgft_UVeRJr7klw,392
4
- aiwaf/decorators.py,sha256=IUKOdM_gdroffImRZep1g1wT6gNqD10zGwcp28hsJCs,825
5
- aiwaf/middleware.py,sha256=eMad-wvQWALkH2nIhjssU9Y-AqFleP3Gm0lRu3qE0Bw,9679
6
- aiwaf/models.py,sha256=XaG1pd_oZu3y-fw66u4wblGlWcUY9gvsTNKGD0kQk7Y,1672
7
- aiwaf/storage.py,sha256=bxCILzzvA1-q6nwclRE8WrfoRhe25H4VrsQDf0hl_lY,1903
8
- aiwaf/trainer.py,sha256=R00q_QQ1o2UmdIWMWNh847BGBrnI6j-hfjNalojfnhU,8494
9
- aiwaf/utils.py,sha256=s-rtUrWQFVv-nuGxe2hz5-LLvB6TbZXKj6do46DwrkA,3376
10
- aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- aiwaf/management/commands/add_ipexemption.py,sha256=LWN21_ydqSjU3_hUnkou4Ciyrk_479zLvcKdWm8hkC0,988
13
- aiwaf/management/commands/aiwaf_reset.py,sha256=dUTYX6Z6_X3Ft3lqF_McXE7OdKADlQFGFWvjdvFVZFI,3245
14
- aiwaf/management/commands/detect_and_train.py,sha256=-o-LZ7QZ5GeJPCekryox1DGXKMmFEkwwrcDsiM166K0,269
15
- aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
16
- aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- aiwaf/templatetags/aiwaf_tags.py,sha256=XXfb7Tl4DjU3Sc40GbqdaqOEtKTUKELBEk58u83wBNw,357
18
- aiwaf-0.1.8.6.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
19
- aiwaf-0.1.8.6.dist-info/METADATA,sha256=s6gux1GQJsbvphRXyVkVPm63_bVWIXcJcagWtBSlgpE,7955
20
- aiwaf-0.1.8.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
- aiwaf-0.1.8.6.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
22
- aiwaf-0.1.8.6.dist-info/RECORD,,