aiwaf 0.1.8.6__py3-none-any.whl → 0.1.8.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiwaf might be problematic. Click here for more details.
- aiwaf/blacklist_manager.py +14 -4
- aiwaf/management/commands/add_ipexemption.py +8 -7
- aiwaf/management/commands/aiwaf_logging.py +166 -0
- aiwaf/management/commands/aiwaf_reset.py +16 -5
- aiwaf/middleware.py +11 -10
- aiwaf/middleware_logger.py +160 -0
- aiwaf/storage.py +296 -6
- aiwaf/trainer.py +51 -31
- aiwaf/utils.py +3 -2
- {aiwaf-0.1.8.6.dist-info → aiwaf-0.1.8.8.dist-info}/METADATA +111 -3
- aiwaf-0.1.8.8.dist-info/RECORD +24 -0
- aiwaf-0.1.8.6.dist-info/RECORD +0 -22
- {aiwaf-0.1.8.6.dist-info → aiwaf-0.1.8.8.dist-info}/WHEEL +0 -0
- {aiwaf-0.1.8.6.dist-info → aiwaf-0.1.8.8.dist-info}/licenses/LICENSE +0 -0
- {aiwaf-0.1.8.6.dist-info → aiwaf-0.1.8.8.dist-info}/top_level.txt +0 -0
aiwaf/blacklist_manager.py
CHANGED
|
@@ -1,14 +1,24 @@
|
|
|
1
|
-
|
|
1
|
+
# aiwaf/blacklist_manager.py
|
|
2
|
+
|
|
3
|
+
from .storage import get_blacklist_store
|
|
2
4
|
|
|
3
5
|
class BlacklistManager:
|
|
4
6
|
@staticmethod
|
|
5
7
|
def block(ip, reason):
|
|
6
|
-
|
|
8
|
+
store = get_blacklist_store()
|
|
9
|
+
store.add_ip(ip, reason)
|
|
7
10
|
|
|
8
11
|
@staticmethod
|
|
9
12
|
def is_blocked(ip):
|
|
10
|
-
|
|
13
|
+
store = get_blacklist_store()
|
|
14
|
+
return store.is_blocked(ip)
|
|
11
15
|
|
|
12
16
|
@staticmethod
|
|
13
17
|
def all_blocked():
|
|
14
|
-
|
|
18
|
+
store = get_blacklist_store()
|
|
19
|
+
return store.get_all()
|
|
20
|
+
|
|
21
|
+
@staticmethod
|
|
22
|
+
def unblock(ip):
|
|
23
|
+
store = get_blacklist_store()
|
|
24
|
+
store.remove_ip(ip)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from django.core.management.base import BaseCommand, CommandError
|
|
2
|
-
from aiwaf.
|
|
2
|
+
from aiwaf.storage import get_exemption_store
|
|
3
3
|
|
|
4
4
|
class Command(BaseCommand):
|
|
5
5
|
help = 'Add an IP address to the IPExemption list (prevents blacklisting)'
|
|
@@ -11,12 +11,13 @@ class Command(BaseCommand):
|
|
|
11
11
|
def handle(self, *args, **options):
|
|
12
12
|
ip = options['ip']
|
|
13
13
|
reason = options['reason']
|
|
14
|
-
|
|
15
|
-
|
|
14
|
+
|
|
15
|
+
store = get_exemption_store()
|
|
16
|
+
|
|
17
|
+
if store.is_exempted(ip):
|
|
16
18
|
self.stdout.write(self.style.WARNING(f'IP {ip} is already exempted.'))
|
|
17
19
|
else:
|
|
20
|
+
store.add_ip(ip, reason)
|
|
18
21
|
self.stdout.write(self.style.SUCCESS(f'IP {ip} added to exemption list.'))
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
obj.save()
|
|
22
|
-
self.stdout.write(self.style.SUCCESS(f'Reason set to: {reason}'))
|
|
22
|
+
if reason:
|
|
23
|
+
self.stdout.write(self.style.SUCCESS(f'Reason: {reason}'))
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
from django.core.management.base import BaseCommand
|
|
2
|
+
from django.conf import settings
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
class Command(BaseCommand):
|
|
6
|
+
help = 'Manage AI-WAF middleware logging settings and view log status'
|
|
7
|
+
|
|
8
|
+
def add_arguments(self, parser):
|
|
9
|
+
parser.add_argument(
|
|
10
|
+
'--enable',
|
|
11
|
+
action='store_true',
|
|
12
|
+
help='Enable middleware logging (shows settings to add)'
|
|
13
|
+
)
|
|
14
|
+
parser.add_argument(
|
|
15
|
+
'--disable',
|
|
16
|
+
action='store_true',
|
|
17
|
+
help='Disable middleware logging (shows settings to remove)'
|
|
18
|
+
)
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
'--status',
|
|
21
|
+
action='store_true',
|
|
22
|
+
help='Show current middleware logging status'
|
|
23
|
+
)
|
|
24
|
+
parser.add_argument(
|
|
25
|
+
'--clear',
|
|
26
|
+
action='store_true',
|
|
27
|
+
help='Clear/delete middleware log files'
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
def handle(self, *args, **options):
|
|
31
|
+
if options['enable']:
|
|
32
|
+
self._show_enable_instructions()
|
|
33
|
+
elif options['disable']:
|
|
34
|
+
self._show_disable_instructions()
|
|
35
|
+
elif options['clear']:
|
|
36
|
+
self._clear_logs()
|
|
37
|
+
else:
|
|
38
|
+
self._show_status()
|
|
39
|
+
|
|
40
|
+
def _show_status(self):
|
|
41
|
+
"""Show current middleware logging configuration"""
|
|
42
|
+
self.stdout.write(self.style.HTTP_INFO("🔍 AI-WAF Middleware Logging Status"))
|
|
43
|
+
self.stdout.write("")
|
|
44
|
+
|
|
45
|
+
# Check settings
|
|
46
|
+
logging_enabled = getattr(settings, 'AIWAF_MIDDLEWARE_LOGGING', False)
|
|
47
|
+
log_file = getattr(settings, 'AIWAF_MIDDLEWARE_LOG', 'aiwaf_requests.log')
|
|
48
|
+
csv_format = getattr(settings, 'AIWAF_MIDDLEWARE_CSV', True)
|
|
49
|
+
csv_file = log_file.replace('.log', '.csv') if csv_format else None
|
|
50
|
+
|
|
51
|
+
# Status
|
|
52
|
+
status_color = self.style.SUCCESS if logging_enabled else self.style.WARNING
|
|
53
|
+
self.stdout.write(f"Status: {status_color('ENABLED' if logging_enabled else 'DISABLED')}")
|
|
54
|
+
self.stdout.write(f"Log File: {log_file}")
|
|
55
|
+
if csv_format:
|
|
56
|
+
self.stdout.write(f"CSV File: {csv_file}")
|
|
57
|
+
self.stdout.write(f"Format: {'CSV' if csv_format else 'Text'}")
|
|
58
|
+
self.stdout.write("")
|
|
59
|
+
|
|
60
|
+
# File existence and sizes
|
|
61
|
+
if logging_enabled:
|
|
62
|
+
self.stdout.write("📁 Log Files:")
|
|
63
|
+
|
|
64
|
+
if csv_format and csv_file:
|
|
65
|
+
if os.path.exists(csv_file):
|
|
66
|
+
size = os.path.getsize(csv_file)
|
|
67
|
+
lines = self._count_csv_lines(csv_file)
|
|
68
|
+
self.stdout.write(f" ✅ {csv_file} ({size:,} bytes, {lines:,} entries)")
|
|
69
|
+
else:
|
|
70
|
+
self.stdout.write(f" ❌ {csv_file} (not found)")
|
|
71
|
+
|
|
72
|
+
if os.path.exists(log_file):
|
|
73
|
+
size = os.path.getsize(log_file)
|
|
74
|
+
self.stdout.write(f" ✅ {log_file} ({size:,} bytes)")
|
|
75
|
+
else:
|
|
76
|
+
self.stdout.write(f" ❌ {log_file} (not found)")
|
|
77
|
+
|
|
78
|
+
# Middleware check
|
|
79
|
+
middleware_list = getattr(settings, 'MIDDLEWARE', [])
|
|
80
|
+
middleware_installed = 'aiwaf.middleware_logger.AIWAFLoggerMiddleware' in middleware_list
|
|
81
|
+
|
|
82
|
+
self.stdout.write("")
|
|
83
|
+
middleware_color = self.style.SUCCESS if middleware_installed else self.style.ERROR
|
|
84
|
+
self.stdout.write(f"Middleware: {middleware_color('INSTALLED' if middleware_installed else 'NOT INSTALLED')}")
|
|
85
|
+
|
|
86
|
+
if logging_enabled and not middleware_installed:
|
|
87
|
+
self.stdout.write(self.style.WARNING("⚠️ Logging is enabled but middleware is not installed!"))
|
|
88
|
+
|
|
89
|
+
def _show_enable_instructions(self):
|
|
90
|
+
"""Show instructions for enabling middleware logging"""
|
|
91
|
+
self.stdout.write(self.style.SUCCESS("🚀 Enable AI-WAF Middleware Logging"))
|
|
92
|
+
self.stdout.write("")
|
|
93
|
+
self.stdout.write("Add these settings to your Django settings.py:")
|
|
94
|
+
self.stdout.write("")
|
|
95
|
+
self.stdout.write(self.style.HTTP_INFO("# Enable AI-WAF middleware logging"))
|
|
96
|
+
self.stdout.write(self.style.HTTP_INFO("AIWAF_MIDDLEWARE_LOGGING = True"))
|
|
97
|
+
self.stdout.write(self.style.HTTP_INFO("AIWAF_MIDDLEWARE_LOG = 'aiwaf_requests.log' # Optional"))
|
|
98
|
+
self.stdout.write(self.style.HTTP_INFO("AIWAF_MIDDLEWARE_CSV = True # Optional (default: True)"))
|
|
99
|
+
self.stdout.write("")
|
|
100
|
+
self.stdout.write("Add middleware to MIDDLEWARE list (preferably near the end):")
|
|
101
|
+
self.stdout.write("")
|
|
102
|
+
self.stdout.write(self.style.HTTP_INFO("MIDDLEWARE = ["))
|
|
103
|
+
self.stdout.write(self.style.HTTP_INFO(" # ... your existing middleware ..."))
|
|
104
|
+
self.stdout.write(self.style.HTTP_INFO(" 'aiwaf.middleware_logger.AIWAFLoggerMiddleware',"))
|
|
105
|
+
self.stdout.write(self.style.HTTP_INFO("]"))
|
|
106
|
+
self.stdout.write("")
|
|
107
|
+
self.stdout.write("Benefits:")
|
|
108
|
+
self.stdout.write(" ✅ Fallback when main access logs unavailable")
|
|
109
|
+
self.stdout.write(" ✅ CSV format for easy analysis")
|
|
110
|
+
self.stdout.write(" ✅ Automatic integration with AI-WAF trainer")
|
|
111
|
+
self.stdout.write(" ✅ Captures response times for better detection")
|
|
112
|
+
|
|
113
|
+
def _show_disable_instructions(self):
|
|
114
|
+
"""Show instructions for disabling middleware logging"""
|
|
115
|
+
self.stdout.write(self.style.WARNING("⏹️ Disable AI-WAF Middleware Logging"))
|
|
116
|
+
self.stdout.write("")
|
|
117
|
+
self.stdout.write("To disable, update your Django settings.py:")
|
|
118
|
+
self.stdout.write("")
|
|
119
|
+
self.stdout.write(self.style.HTTP_INFO("# Disable AI-WAF middleware logging"))
|
|
120
|
+
self.stdout.write(self.style.HTTP_INFO("AIWAF_MIDDLEWARE_LOGGING = False"))
|
|
121
|
+
self.stdout.write("")
|
|
122
|
+
self.stdout.write("And remove from MIDDLEWARE list:")
|
|
123
|
+
self.stdout.write("")
|
|
124
|
+
self.stdout.write(self.style.HTTP_INFO("MIDDLEWARE = ["))
|
|
125
|
+
self.stdout.write(self.style.HTTP_INFO(" # ... your existing middleware ..."))
|
|
126
|
+
self.stdout.write(self.style.HTTP_INFO(" # 'aiwaf.middleware_logger.AIWAFLoggerMiddleware', # Remove this line"))
|
|
127
|
+
self.stdout.write(self.style.HTTP_INFO("]"))
|
|
128
|
+
|
|
129
|
+
def _clear_logs(self):
|
|
130
|
+
"""Clear/delete middleware log files"""
|
|
131
|
+
log_file = getattr(settings, 'AIWAF_MIDDLEWARE_LOG', 'aiwaf_requests.log')
|
|
132
|
+
csv_format = getattr(settings, 'AIWAF_MIDDLEWARE_CSV', True)
|
|
133
|
+
csv_file = log_file.replace('.log', '.csv') if csv_format else None
|
|
134
|
+
|
|
135
|
+
files_deleted = 0
|
|
136
|
+
|
|
137
|
+
# Delete CSV file
|
|
138
|
+
if csv_file and os.path.exists(csv_file):
|
|
139
|
+
try:
|
|
140
|
+
os.remove(csv_file)
|
|
141
|
+
self.stdout.write(self.style.SUCCESS(f"✅ Deleted {csv_file}"))
|
|
142
|
+
files_deleted += 1
|
|
143
|
+
except Exception as e:
|
|
144
|
+
self.stdout.write(self.style.ERROR(f"❌ Failed to delete {csv_file}: {e}"))
|
|
145
|
+
|
|
146
|
+
# Delete text log file
|
|
147
|
+
if os.path.exists(log_file):
|
|
148
|
+
try:
|
|
149
|
+
os.remove(log_file)
|
|
150
|
+
self.stdout.write(self.style.SUCCESS(f"✅ Deleted {log_file}"))
|
|
151
|
+
files_deleted += 1
|
|
152
|
+
except Exception as e:
|
|
153
|
+
self.stdout.write(self.style.ERROR(f"❌ Failed to delete {log_file}: {e}"))
|
|
154
|
+
|
|
155
|
+
if files_deleted == 0:
|
|
156
|
+
self.stdout.write(self.style.WARNING("ℹ️ No log files found to delete"))
|
|
157
|
+
else:
|
|
158
|
+
self.stdout.write(self.style.SUCCESS(f"🗑️ Deleted {files_deleted} log file(s)"))
|
|
159
|
+
|
|
160
|
+
def _count_csv_lines(self, csv_file):
|
|
161
|
+
"""Count lines in CSV file (excluding header)"""
|
|
162
|
+
try:
|
|
163
|
+
with open(csv_file, 'r', encoding='utf-8') as f:
|
|
164
|
+
return sum(1 for line in f) - 1 # Subtract header
|
|
165
|
+
except:
|
|
166
|
+
return 0
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from django.core.management.base import BaseCommand
|
|
2
|
-
from aiwaf.
|
|
2
|
+
from aiwaf.storage import get_blacklist_store, get_exemption_store
|
|
3
3
|
|
|
4
4
|
class Command(BaseCommand):
|
|
5
5
|
help = 'Reset AI-WAF by clearing all blacklist and exemption (whitelist) entries'
|
|
@@ -26,9 +26,12 @@ class Command(BaseCommand):
|
|
|
26
26
|
exemptions_only = options['exemptions_only']
|
|
27
27
|
confirm = options['confirm']
|
|
28
28
|
|
|
29
|
+
blacklist_store = get_blacklist_store()
|
|
30
|
+
exemption_store = get_exemption_store()
|
|
31
|
+
|
|
29
32
|
# Count current entries
|
|
30
|
-
blacklist_count =
|
|
31
|
-
exemption_count =
|
|
33
|
+
blacklist_count = len(blacklist_store.get_all())
|
|
34
|
+
exemption_count = len(exemption_store.get_all())
|
|
32
35
|
|
|
33
36
|
if blacklist_only and exemptions_only:
|
|
34
37
|
self.stdout.write(self.style.ERROR('Cannot use both --blacklist-only and --exemptions-only flags'))
|
|
@@ -61,10 +64,18 @@ class Command(BaseCommand):
|
|
|
61
64
|
deleted_counts = {'blacklist': 0, 'exemptions': 0}
|
|
62
65
|
|
|
63
66
|
if clear_blacklist:
|
|
64
|
-
|
|
67
|
+
# Clear blacklist entries
|
|
68
|
+
blacklist_entries = blacklist_store.get_all()
|
|
69
|
+
for entry in blacklist_entries:
|
|
70
|
+
blacklist_store.remove_ip(entry['ip_address'])
|
|
71
|
+
deleted_counts['blacklist'] = len(blacklist_entries)
|
|
65
72
|
|
|
66
73
|
if clear_exemptions:
|
|
67
|
-
|
|
74
|
+
# Clear exemption entries
|
|
75
|
+
exemption_entries = exemption_store.get_all()
|
|
76
|
+
for entry in exemption_entries:
|
|
77
|
+
exemption_store.remove_ip(entry['ip_address'])
|
|
78
|
+
deleted_counts['exemptions'] = len(exemption_entries)
|
|
68
79
|
|
|
69
80
|
# Report results
|
|
70
81
|
if clear_blacklist and clear_exemptions:
|
aiwaf/middleware.py
CHANGED
|
@@ -16,8 +16,9 @@ from django.apps import apps
|
|
|
16
16
|
from django.urls import get_resolver
|
|
17
17
|
from .trainer import STATIC_KW, STATUS_IDX, path_exists_in_django
|
|
18
18
|
from .blacklist_manager import BlacklistManager
|
|
19
|
-
from .models import
|
|
19
|
+
from .models import IPExemption
|
|
20
20
|
from .utils import is_exempt, get_ip, is_ip_exempted
|
|
21
|
+
from .storage import get_keyword_store
|
|
21
22
|
|
|
22
23
|
MODEL_PATH = getattr(
|
|
23
24
|
settings,
|
|
@@ -74,15 +75,14 @@ class IPAndKeywordBlockMiddleware:
|
|
|
74
75
|
return self.get_response(request)
|
|
75
76
|
if BlacklistManager.is_blocked(ip):
|
|
76
77
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
78
|
+
|
|
79
|
+
keyword_store = get_keyword_store()
|
|
77
80
|
segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
|
|
81
|
+
|
|
78
82
|
for seg in segments:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
dynamic_top =
|
|
82
|
-
DynamicKeyword.objects
|
|
83
|
-
.order_by("-count")
|
|
84
|
-
.values_list("keyword", flat=True)[: getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)]
|
|
85
|
-
)
|
|
83
|
+
keyword_store.add_keyword(seg)
|
|
84
|
+
|
|
85
|
+
dynamic_top = keyword_store.get_top_keywords(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
|
|
86
86
|
all_kw = set(STATIC_KW) | set(dynamic_top)
|
|
87
87
|
suspicious_kw = {
|
|
88
88
|
kw for kw in all_kw
|
|
@@ -172,10 +172,11 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
172
172
|
data.append((now, request.path, response.status_code, resp_time))
|
|
173
173
|
data = [d for d in data if now - d[0] < self.WINDOW]
|
|
174
174
|
cache.set(key, data, timeout=self.WINDOW)
|
|
175
|
+
|
|
176
|
+
keyword_store = get_keyword_store()
|
|
175
177
|
for seg in re.split(r"\W+", request.path.lower()):
|
|
176
178
|
if len(seg) > 3:
|
|
177
|
-
|
|
178
|
-
DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
|
|
179
|
+
keyword_store.add_keyword(seg)
|
|
179
180
|
|
|
180
181
|
return response
|
|
181
182
|
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# aiwaf/middleware_logger.py
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import csv
|
|
5
|
+
import time
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from django.conf import settings
|
|
8
|
+
from django.utils.deprecation import MiddlewareMixin
|
|
9
|
+
from .utils import get_ip
|
|
10
|
+
|
|
11
|
+
class AIWAFLoggerMiddleware(MiddlewareMixin):
|
|
12
|
+
"""
|
|
13
|
+
Middleware that logs requests to a CSV file for AI-WAF training.
|
|
14
|
+
Acts as a fallback when main access logs are unavailable.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, get_response):
|
|
18
|
+
super().__init__(get_response)
|
|
19
|
+
self.log_file = getattr(settings, "AIWAF_MIDDLEWARE_LOG", "aiwaf_requests.log")
|
|
20
|
+
self.csv_format = getattr(settings, "AIWAF_MIDDLEWARE_CSV", True)
|
|
21
|
+
self.log_enabled = getattr(settings, "AIWAF_MIDDLEWARE_LOGGING", False)
|
|
22
|
+
|
|
23
|
+
# CSV file path (if using CSV format)
|
|
24
|
+
if self.csv_format and self.log_enabled:
|
|
25
|
+
self.csv_file = self.log_file.replace('.log', '.csv')
|
|
26
|
+
self._ensure_csv_header()
|
|
27
|
+
|
|
28
|
+
def _ensure_csv_header(self):
|
|
29
|
+
"""Ensure CSV file has proper header row"""
|
|
30
|
+
if not os.path.exists(self.csv_file):
|
|
31
|
+
os.makedirs(os.path.dirname(self.csv_file), exist_ok=True) if os.path.dirname(self.csv_file) else None
|
|
32
|
+
with open(self.csv_file, 'w', newline='', encoding='utf-8') as f:
|
|
33
|
+
writer = csv.writer(f)
|
|
34
|
+
writer.writerow([
|
|
35
|
+
'timestamp', 'ip_address', 'method', 'path', 'status_code',
|
|
36
|
+
'response_time', 'user_agent', 'referer', 'content_length'
|
|
37
|
+
])
|
|
38
|
+
|
|
39
|
+
def process_request(self, request):
|
|
40
|
+
"""Store request start time"""
|
|
41
|
+
request._aiwaf_start_time = time.time()
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
def process_response(self, request, response):
|
|
45
|
+
"""Log the completed request"""
|
|
46
|
+
if not self.log_enabled:
|
|
47
|
+
return response
|
|
48
|
+
|
|
49
|
+
# Calculate response time
|
|
50
|
+
start_time = getattr(request, '_aiwaf_start_time', time.time())
|
|
51
|
+
response_time = time.time() - start_time
|
|
52
|
+
|
|
53
|
+
# Extract request data
|
|
54
|
+
log_data = {
|
|
55
|
+
'timestamp': datetime.now().strftime('%d/%b/%Y:%H:%M:%S +0000'),
|
|
56
|
+
'ip_address': get_ip(request),
|
|
57
|
+
'method': request.method,
|
|
58
|
+
'path': request.path,
|
|
59
|
+
'status_code': response.status_code,
|
|
60
|
+
'response_time': f"{response_time:.3f}",
|
|
61
|
+
'user_agent': request.META.get('HTTP_USER_AGENT', '-'),
|
|
62
|
+
'referer': request.META.get('HTTP_REFERER', '-'),
|
|
63
|
+
'content_length': response.get('Content-Length', '-')
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if self.csv_format:
|
|
67
|
+
self._log_to_csv(log_data)
|
|
68
|
+
else:
|
|
69
|
+
self._log_to_text(log_data)
|
|
70
|
+
|
|
71
|
+
return response
|
|
72
|
+
|
|
73
|
+
def _log_to_csv(self, data):
|
|
74
|
+
"""Write log entry to CSV file"""
|
|
75
|
+
try:
|
|
76
|
+
with open(self.csv_file, 'a', newline='', encoding='utf-8') as f:
|
|
77
|
+
writer = csv.writer(f)
|
|
78
|
+
writer.writerow([
|
|
79
|
+
data['timestamp'], data['ip_address'], data['method'],
|
|
80
|
+
data['path'], data['status_code'], data['response_time'],
|
|
81
|
+
data['user_agent'], data['referer'], data['content_length']
|
|
82
|
+
])
|
|
83
|
+
except Exception as e:
|
|
84
|
+
# Fail silently to avoid breaking the application
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
def _log_to_text(self, data):
|
|
88
|
+
"""Write log entry in common log format"""
|
|
89
|
+
try:
|
|
90
|
+
# Common Log Format with response time
|
|
91
|
+
log_line = f'{data["ip_address"]} - - [{data["timestamp"]}] "{data["method"]} {data["path"]} HTTP/1.1" {data["status_code"]} {data["content_length"]} "{data["referer"]}" "{data["user_agent"]}" response-time={data["response_time"]}\n'
|
|
92
|
+
|
|
93
|
+
with open(self.log_file, 'a', encoding='utf-8') as f:
|
|
94
|
+
f.write(log_line)
|
|
95
|
+
except Exception as e:
|
|
96
|
+
# Fail silently to avoid breaking the application
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class AIWAFCSVLogParser:
|
|
101
|
+
"""
|
|
102
|
+
Parser for AI-WAF CSV logs that converts them to the format expected by trainer.py
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
@staticmethod
|
|
106
|
+
def parse_csv_log(csv_file_path):
|
|
107
|
+
"""
|
|
108
|
+
Parse CSV log file and return records in the format expected by trainer.py
|
|
109
|
+
Returns list of dictionaries with keys: ip, timestamp, path, status, referer, user_agent, response_time
|
|
110
|
+
"""
|
|
111
|
+
records = []
|
|
112
|
+
|
|
113
|
+
if not os.path.exists(csv_file_path):
|
|
114
|
+
return records
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
with open(csv_file_path, 'r', newline='', encoding='utf-8') as f:
|
|
118
|
+
reader = csv.DictReader(f)
|
|
119
|
+
for row in reader:
|
|
120
|
+
try:
|
|
121
|
+
# Convert timestamp to datetime object
|
|
122
|
+
timestamp = datetime.strptime(row['timestamp'], '%d/%b/%Y:%H:%M:%S +0000')
|
|
123
|
+
|
|
124
|
+
record = {
|
|
125
|
+
'ip': row['ip_address'],
|
|
126
|
+
'timestamp': timestamp,
|
|
127
|
+
'path': row['path'],
|
|
128
|
+
'status': row['status_code'],
|
|
129
|
+
'referer': row['referer'],
|
|
130
|
+
'user_agent': row['user_agent'],
|
|
131
|
+
'response_time': float(row['response_time'])
|
|
132
|
+
}
|
|
133
|
+
records.append(record)
|
|
134
|
+
except (ValueError, KeyError) as e:
|
|
135
|
+
# Skip malformed rows
|
|
136
|
+
continue
|
|
137
|
+
except Exception as e:
|
|
138
|
+
# Return empty list if file can't be read
|
|
139
|
+
pass
|
|
140
|
+
|
|
141
|
+
return records
|
|
142
|
+
|
|
143
|
+
@staticmethod
|
|
144
|
+
def get_log_lines_for_trainer(csv_file_path):
|
|
145
|
+
"""
|
|
146
|
+
Convert CSV log to format compatible with trainer.py's _read_all_logs()
|
|
147
|
+
Returns list of log line strings
|
|
148
|
+
"""
|
|
149
|
+
records = AIWAFCSVLogParser.parse_csv_log(csv_file_path)
|
|
150
|
+
log_lines = []
|
|
151
|
+
|
|
152
|
+
for record in records:
|
|
153
|
+
# Convert back to common log format that trainer.py expects
|
|
154
|
+
timestamp_str = record['timestamp'].strftime('%d/%b/%Y:%H:%M:%S +0000')
|
|
155
|
+
content_length = '-' # We don't track this in our format
|
|
156
|
+
|
|
157
|
+
log_line = f'{record["ip"]} - - [{timestamp_str}] "GET {record["path"]} HTTP/1.1" {record["status"]} {content_length} "{record["referer"]}" "{record["user_agent"]}" response-time={record["response_time"]:.3f}'
|
|
158
|
+
log_lines.append(log_line)
|
|
159
|
+
|
|
160
|
+
return log_lines
|
aiwaf/storage.py
CHANGED
|
@@ -2,19 +2,33 @@ import os, csv, gzip, glob
|
|
|
2
2
|
import numpy as np
|
|
3
3
|
import pandas as pd
|
|
4
4
|
from django.conf import settings
|
|
5
|
-
from .
|
|
5
|
+
from django.utils import timezone
|
|
6
|
+
from .models import FeatureSample, BlacklistEntry, IPExemption, DynamicKeyword
|
|
7
|
+
|
|
8
|
+
# Configuration
|
|
9
|
+
STORAGE_MODE = getattr(settings, "AIWAF_STORAGE_MODE", "models") # "models" or "csv"
|
|
10
|
+
CSV_DATA_DIR = getattr(settings, "AIWAF_CSV_DATA_DIR", "aiwaf_data")
|
|
11
|
+
FEATURE_CSV = getattr(settings, "AIWAF_CSV_PATH", os.path.join(CSV_DATA_DIR, "access_samples.csv"))
|
|
12
|
+
BLACKLIST_CSV = os.path.join(CSV_DATA_DIR, "blacklist.csv")
|
|
13
|
+
EXEMPTION_CSV = os.path.join(CSV_DATA_DIR, "exemptions.csv")
|
|
14
|
+
KEYWORDS_CSV = os.path.join(CSV_DATA_DIR, "keywords.csv")
|
|
6
15
|
|
|
7
|
-
DATA_FILE = getattr(settings, "AIWAF_CSV_PATH", "access_samples.csv")
|
|
8
16
|
CSV_HEADER = [
|
|
9
17
|
"ip","path_len","kw_hits","resp_time",
|
|
10
18
|
"status_idx","burst_count","total_404","label"
|
|
11
19
|
]
|
|
12
20
|
|
|
21
|
+
def ensure_csv_directory():
|
|
22
|
+
"""Ensure the CSV data directory exists"""
|
|
23
|
+
if STORAGE_MODE == "csv" and not os.path.exists(CSV_DATA_DIR):
|
|
24
|
+
os.makedirs(CSV_DATA_DIR)
|
|
25
|
+
|
|
13
26
|
class CsvFeatureStore:
|
|
14
27
|
@staticmethod
|
|
15
28
|
def persist_rows(rows):
|
|
16
|
-
|
|
17
|
-
|
|
29
|
+
ensure_csv_directory()
|
|
30
|
+
new_file = not os.path.exists(FEATURE_CSV)
|
|
31
|
+
with open(FEATURE_CSV, "a", newline="", encoding="utf-8") as f:
|
|
18
32
|
w = csv.writer(f)
|
|
19
33
|
if new_file:
|
|
20
34
|
w.writerow(CSV_HEADER)
|
|
@@ -22,10 +36,10 @@ class CsvFeatureStore:
|
|
|
22
36
|
|
|
23
37
|
@staticmethod
|
|
24
38
|
def load_matrix():
|
|
25
|
-
if not os.path.exists(
|
|
39
|
+
if not os.path.exists(FEATURE_CSV):
|
|
26
40
|
return np.empty((0,6))
|
|
27
41
|
df = pd.read_csv(
|
|
28
|
-
|
|
42
|
+
FEATURE_CSV,
|
|
29
43
|
names=CSV_HEADER,
|
|
30
44
|
skiprows=1,
|
|
31
45
|
engine="python",
|
|
@@ -59,3 +73,279 @@ def get_store():
|
|
|
59
73
|
if getattr(settings, "AIWAF_FEATURE_STORE", "csv") == "db":
|
|
60
74
|
return DbFeatureStore
|
|
61
75
|
return CsvFeatureStore
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ============= CSV Storage Classes =============
|
|
79
|
+
|
|
80
|
+
class CsvBlacklistStore:
|
|
81
|
+
"""CSV-based storage for IP blacklist entries"""
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
def add_ip(ip_address, reason):
|
|
85
|
+
ensure_csv_directory()
|
|
86
|
+
# Check if IP already exists
|
|
87
|
+
if CsvBlacklistStore.is_blocked(ip_address):
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
# Add new entry
|
|
91
|
+
new_file = not os.path.exists(BLACKLIST_CSV)
|
|
92
|
+
with open(BLACKLIST_CSV, "a", newline="", encoding="utf-8") as f:
|
|
93
|
+
writer = csv.writer(f)
|
|
94
|
+
if new_file:
|
|
95
|
+
writer.writerow(["ip_address", "reason", "created_at"])
|
|
96
|
+
writer.writerow([ip_address, reason, timezone.now().isoformat()])
|
|
97
|
+
|
|
98
|
+
@staticmethod
|
|
99
|
+
def is_blocked(ip_address):
|
|
100
|
+
if not os.path.exists(BLACKLIST_CSV):
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
with open(BLACKLIST_CSV, "r", newline="", encoding="utf-8") as f:
|
|
104
|
+
reader = csv.DictReader(f)
|
|
105
|
+
for row in reader:
|
|
106
|
+
if row["ip_address"] == ip_address:
|
|
107
|
+
return True
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def get_all():
|
|
112
|
+
"""Return list of dictionaries with blacklist entries"""
|
|
113
|
+
if not os.path.exists(BLACKLIST_CSV):
|
|
114
|
+
return []
|
|
115
|
+
|
|
116
|
+
entries = []
|
|
117
|
+
with open(BLACKLIST_CSV, "r", newline="", encoding="utf-8") as f:
|
|
118
|
+
reader = csv.DictReader(f)
|
|
119
|
+
for row in reader:
|
|
120
|
+
entries.append(row)
|
|
121
|
+
return entries
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
def remove_ip(ip_address):
|
|
125
|
+
if not os.path.exists(BLACKLIST_CSV):
|
|
126
|
+
return
|
|
127
|
+
|
|
128
|
+
# Read all entries except the one to remove
|
|
129
|
+
entries = []
|
|
130
|
+
with open(BLACKLIST_CSV, "r", newline="", encoding="utf-8") as f:
|
|
131
|
+
reader = csv.DictReader(f)
|
|
132
|
+
entries = [row for row in reader if row["ip_address"] != ip_address]
|
|
133
|
+
|
|
134
|
+
# Write back the filtered entries
|
|
135
|
+
with open(BLACKLIST_CSV, "w", newline="", encoding="utf-8") as f:
|
|
136
|
+
if entries:
|
|
137
|
+
writer = csv.DictWriter(f, fieldnames=["ip_address", "reason", "created_at"])
|
|
138
|
+
writer.writeheader()
|
|
139
|
+
writer.writerows(entries)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class CsvExemptionStore:
|
|
143
|
+
"""CSV-based storage for IP exemption entries"""
|
|
144
|
+
|
|
145
|
+
@staticmethod
|
|
146
|
+
def add_ip(ip_address, reason=""):
|
|
147
|
+
ensure_csv_directory()
|
|
148
|
+
# Check if IP already exists
|
|
149
|
+
if CsvExemptionStore.is_exempted(ip_address):
|
|
150
|
+
return
|
|
151
|
+
|
|
152
|
+
# Add new entry
|
|
153
|
+
new_file = not os.path.exists(EXEMPTION_CSV)
|
|
154
|
+
with open(EXEMPTION_CSV, "a", newline="", encoding="utf-8") as f:
|
|
155
|
+
writer = csv.writer(f)
|
|
156
|
+
if new_file:
|
|
157
|
+
writer.writerow(["ip_address", "reason", "created_at"])
|
|
158
|
+
writer.writerow([ip_address, reason, timezone.now().isoformat()])
|
|
159
|
+
|
|
160
|
+
@staticmethod
|
|
161
|
+
def is_exempted(ip_address):
|
|
162
|
+
if not os.path.exists(EXEMPTION_CSV):
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
with open(EXEMPTION_CSV, "r", newline="", encoding="utf-8") as f:
|
|
166
|
+
reader = csv.DictReader(f)
|
|
167
|
+
for row in reader:
|
|
168
|
+
if row["ip_address"] == ip_address:
|
|
169
|
+
return True
|
|
170
|
+
return False
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def get_all():
|
|
174
|
+
"""Return list of dictionaries with exemption entries"""
|
|
175
|
+
if not os.path.exists(EXEMPTION_CSV):
|
|
176
|
+
return []
|
|
177
|
+
|
|
178
|
+
entries = []
|
|
179
|
+
with open(EXEMPTION_CSV, "r", newline="", encoding="utf-8") as f:
|
|
180
|
+
reader = csv.DictReader(f)
|
|
181
|
+
for row in reader:
|
|
182
|
+
entries.append(row)
|
|
183
|
+
return entries
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def remove_ip(ip_address):
|
|
187
|
+
if not os.path.exists(EXEMPTION_CSV):
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
# Read all entries except the one to remove
|
|
191
|
+
entries = []
|
|
192
|
+
with open(EXEMPTION_CSV, "r", newline="", encoding="utf-8") as f:
|
|
193
|
+
reader = csv.DictReader(f)
|
|
194
|
+
entries = [row for row in reader if row["ip_address"] != ip_address]
|
|
195
|
+
|
|
196
|
+
# Write back the filtered entries
|
|
197
|
+
with open(EXEMPTION_CSV, "w", newline="", encoding="utf-8") as f:
|
|
198
|
+
if entries:
|
|
199
|
+
writer = csv.DictWriter(f, fieldnames=["ip_address", "reason", "created_at"])
|
|
200
|
+
writer.writeheader()
|
|
201
|
+
writer.writerows(entries)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class CsvKeywordStore:
|
|
205
|
+
"""CSV-based storage for dynamic keywords"""
|
|
206
|
+
|
|
207
|
+
@staticmethod
|
|
208
|
+
def add_keyword(keyword, count=1):
|
|
209
|
+
ensure_csv_directory()
|
|
210
|
+
|
|
211
|
+
# Read existing keywords
|
|
212
|
+
keywords = CsvKeywordStore._load_keywords()
|
|
213
|
+
|
|
214
|
+
# Update or add keyword
|
|
215
|
+
keywords[keyword] = keywords.get(keyword, 0) + count
|
|
216
|
+
|
|
217
|
+
# Save back to file
|
|
218
|
+
CsvKeywordStore._save_keywords(keywords)
|
|
219
|
+
|
|
220
|
+
@staticmethod
|
|
221
|
+
def get_top_keywords(limit=10):
|
|
222
|
+
keywords = CsvKeywordStore._load_keywords()
|
|
223
|
+
# Sort by count in descending order and return top N
|
|
224
|
+
sorted_keywords = sorted(keywords.items(), key=lambda x: x[1], reverse=True)
|
|
225
|
+
return [kw for kw, count in sorted_keywords[:limit]]
|
|
226
|
+
|
|
227
|
+
@staticmethod
|
|
228
|
+
def remove_keyword(keyword):
|
|
229
|
+
keywords = CsvKeywordStore._load_keywords()
|
|
230
|
+
if keyword in keywords:
|
|
231
|
+
del keywords[keyword]
|
|
232
|
+
CsvKeywordStore._save_keywords(keywords)
|
|
233
|
+
|
|
234
|
+
@staticmethod
|
|
235
|
+
def clear_all():
|
|
236
|
+
if os.path.exists(KEYWORDS_CSV):
|
|
237
|
+
os.remove(KEYWORDS_CSV)
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def _load_keywords():
|
|
241
|
+
"""Load keywords from CSV file as a dictionary"""
|
|
242
|
+
if not os.path.exists(KEYWORDS_CSV):
|
|
243
|
+
return {}
|
|
244
|
+
|
|
245
|
+
keywords = {}
|
|
246
|
+
with open(KEYWORDS_CSV, "r", newline="", encoding="utf-8") as f:
|
|
247
|
+
reader = csv.DictReader(f)
|
|
248
|
+
for row in reader:
|
|
249
|
+
keywords[row["keyword"]] = int(row["count"])
|
|
250
|
+
return keywords
|
|
251
|
+
|
|
252
|
+
@staticmethod
|
|
253
|
+
def _save_keywords(keywords):
|
|
254
|
+
"""Save keywords dictionary to CSV file"""
|
|
255
|
+
with open(KEYWORDS_CSV, "w", newline="", encoding="utf-8") as f:
|
|
256
|
+
writer = csv.writer(f)
|
|
257
|
+
writer.writerow(["keyword", "count", "last_updated"])
|
|
258
|
+
for keyword, count in keywords.items():
|
|
259
|
+
writer.writerow([keyword, count, timezone.now().isoformat()])
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
# ============= Storage Factory Functions =============
|
|
263
|
+
|
|
264
|
+
def get_blacklist_store():
|
|
265
|
+
"""Return appropriate blacklist storage class based on settings"""
|
|
266
|
+
if STORAGE_MODE == "csv":
|
|
267
|
+
return CsvBlacklistStore
|
|
268
|
+
else:
|
|
269
|
+
# Return a wrapper for Django models
|
|
270
|
+
return ModelBlacklistStore
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def get_exemption_store():
|
|
274
|
+
"""Return appropriate exemption storage class based on settings"""
|
|
275
|
+
if STORAGE_MODE == "csv":
|
|
276
|
+
return CsvExemptionStore
|
|
277
|
+
else:
|
|
278
|
+
return ModelExemptionStore
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def get_keyword_store():
|
|
282
|
+
"""Return appropriate keyword storage class based on settings"""
|
|
283
|
+
if STORAGE_MODE == "csv":
|
|
284
|
+
return CsvKeywordStore
|
|
285
|
+
else:
|
|
286
|
+
return ModelKeywordStore
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
# ============= Django Model Wrappers =============
|
|
290
|
+
|
|
291
|
+
class ModelBlacklistStore:
|
|
292
|
+
"""Django model-based storage for blacklist entries"""
|
|
293
|
+
|
|
294
|
+
@staticmethod
|
|
295
|
+
def add_ip(ip_address, reason):
|
|
296
|
+
BlacklistEntry.objects.get_or_create(ip_address=ip_address, defaults={"reason": reason})
|
|
297
|
+
|
|
298
|
+
@staticmethod
|
|
299
|
+
def is_blocked(ip_address):
|
|
300
|
+
return BlacklistEntry.objects.filter(ip_address=ip_address).exists()
|
|
301
|
+
|
|
302
|
+
@staticmethod
|
|
303
|
+
def get_all():
|
|
304
|
+
return list(BlacklistEntry.objects.values("ip_address", "reason", "created_at"))
|
|
305
|
+
|
|
306
|
+
@staticmethod
|
|
307
|
+
def remove_ip(ip_address):
|
|
308
|
+
BlacklistEntry.objects.filter(ip_address=ip_address).delete()
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class ModelExemptionStore:
|
|
312
|
+
"""Django model-based storage for exemption entries"""
|
|
313
|
+
|
|
314
|
+
@staticmethod
|
|
315
|
+
def add_ip(ip_address, reason=""):
|
|
316
|
+
IPExemption.objects.get_or_create(ip_address=ip_address, defaults={"reason": reason})
|
|
317
|
+
|
|
318
|
+
@staticmethod
|
|
319
|
+
def is_exempted(ip_address):
|
|
320
|
+
return IPExemption.objects.filter(ip_address=ip_address).exists()
|
|
321
|
+
|
|
322
|
+
@staticmethod
|
|
323
|
+
def get_all():
|
|
324
|
+
return list(IPExemption.objects.values("ip_address", "reason", "created_at"))
|
|
325
|
+
|
|
326
|
+
@staticmethod
|
|
327
|
+
def remove_ip(ip_address):
|
|
328
|
+
IPExemption.objects.filter(ip_address=ip_address).delete()
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class ModelKeywordStore:
|
|
332
|
+
"""Django model-based storage for dynamic keywords"""
|
|
333
|
+
|
|
334
|
+
@staticmethod
|
|
335
|
+
def add_keyword(keyword, count=1):
|
|
336
|
+
obj, created = DynamicKeyword.objects.get_or_create(keyword=keyword, defaults={"count": count})
|
|
337
|
+
if not created:
|
|
338
|
+
obj.count += count
|
|
339
|
+
obj.save()
|
|
340
|
+
|
|
341
|
+
@staticmethod
|
|
342
|
+
def get_top_keywords(limit=10):
|
|
343
|
+
return list(DynamicKeyword.objects.order_by("-count").values_list("keyword", flat=True)[:limit])
|
|
344
|
+
|
|
345
|
+
@staticmethod
|
|
346
|
+
def remove_keyword(keyword):
|
|
347
|
+
DynamicKeyword.objects.filter(keyword=keyword).delete()
|
|
348
|
+
|
|
349
|
+
@staticmethod
|
|
350
|
+
def clear_all():
|
|
351
|
+
DynamicKeyword.objects.all().delete()
|
aiwaf/trainer.py
CHANGED
|
@@ -14,6 +14,7 @@ from django.conf import settings
|
|
|
14
14
|
from django.apps import apps
|
|
15
15
|
from django.db.models import F
|
|
16
16
|
from .utils import is_exempt_path
|
|
17
|
+
from .storage import get_blacklist_store, get_exemption_store, get_keyword_store
|
|
17
18
|
|
|
18
19
|
# ─────────── Configuration ───────────
|
|
19
20
|
LOG_PATH = settings.AIWAF_ACCESS_LOG
|
|
@@ -28,11 +29,6 @@ _LOG_RX = re.compile(
|
|
|
28
29
|
)
|
|
29
30
|
|
|
30
31
|
|
|
31
|
-
BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
|
|
32
|
-
DynamicKeyword = apps.get_model("aiwaf", "DynamicKeyword")
|
|
33
|
-
IPExemption = apps.get_model("aiwaf", "IPExemption")
|
|
34
|
-
|
|
35
|
-
|
|
36
32
|
def path_exists_in_django(path: str) -> bool:
|
|
37
33
|
from django.urls import get_resolver
|
|
38
34
|
from django.urls.resolvers import URLResolver
|
|
@@ -54,27 +50,46 @@ def path_exists_in_django(path: str) -> bool:
|
|
|
54
50
|
|
|
55
51
|
|
|
56
52
|
def remove_exempt_keywords() -> None:
|
|
53
|
+
keyword_store = get_keyword_store()
|
|
57
54
|
exempt_tokens = set()
|
|
55
|
+
|
|
58
56
|
for path in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
|
|
59
57
|
for seg in re.split(r"\W+", path.strip("/").lower()):
|
|
60
58
|
if len(seg) > 3:
|
|
61
59
|
exempt_tokens.add(seg)
|
|
62
|
-
|
|
63
|
-
|
|
60
|
+
|
|
61
|
+
# Remove exempt tokens from keyword storage
|
|
62
|
+
for token in exempt_tokens:
|
|
63
|
+
keyword_store.remove_keyword(token)
|
|
64
64
|
|
|
65
65
|
|
|
66
66
|
def _read_all_logs() -> list[str]:
|
|
67
67
|
lines = []
|
|
68
|
+
|
|
69
|
+
# First try to read from main access log
|
|
68
70
|
if LOG_PATH and os.path.exists(LOG_PATH):
|
|
69
71
|
with open(LOG_PATH, "r", errors="ignore") as f:
|
|
70
72
|
lines.extend(f.readlines())
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
73
|
+
for p in sorted(glob.glob(f"{LOG_PATH}.*")):
|
|
74
|
+
opener = gzip.open if p.endswith(".gz") else open
|
|
75
|
+
try:
|
|
76
|
+
with opener(p, "rt", errors="ignore") as f:
|
|
77
|
+
lines.extend(f.readlines())
|
|
78
|
+
except OSError:
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
# If no lines found from main log, try AI-WAF middleware CSV log
|
|
82
|
+
if not lines:
|
|
83
|
+
middleware_csv = getattr(settings, "AIWAF_MIDDLEWARE_LOG", "aiwaf_requests.log").replace('.log', '.csv')
|
|
84
|
+
if os.path.exists(middleware_csv):
|
|
85
|
+
try:
|
|
86
|
+
from .middleware_logger import AIWAFCSVLogParser
|
|
87
|
+
csv_lines = AIWAFCSVLogParser.get_log_lines_for_trainer(middleware_csv)
|
|
88
|
+
lines.extend(csv_lines)
|
|
89
|
+
print(f"📋 Using AI-WAF middleware CSV log: {middleware_csv} ({len(csv_lines)} entries)")
|
|
90
|
+
except Exception as e:
|
|
91
|
+
print(f"⚠️ Failed to read middleware CSV log: {e}")
|
|
92
|
+
|
|
78
93
|
return lines
|
|
79
94
|
|
|
80
95
|
|
|
@@ -98,10 +113,15 @@ def _parse(line: str) -> dict | None:
|
|
|
98
113
|
|
|
99
114
|
def train() -> None:
|
|
100
115
|
remove_exempt_keywords()
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
116
|
+
|
|
117
|
+
# Remove any IPs in IPExemption from the blacklist using storage system
|
|
118
|
+
exemption_store = get_exemption_store()
|
|
119
|
+
blacklist_store = get_blacklist_store()
|
|
120
|
+
|
|
121
|
+
exempted_ips = [entry['ip_address'] for entry in exemption_store.get_all()]
|
|
122
|
+
for ip in exempted_ips:
|
|
123
|
+
blacklist_store.remove_ip(ip)
|
|
124
|
+
|
|
105
125
|
raw_lines = _read_all_logs()
|
|
106
126
|
if not raw_lines:
|
|
107
127
|
print("No log lines found – check AIWAF_ACCESS_LOG setting.")
|
|
@@ -133,10 +153,8 @@ def train() -> None:
|
|
|
133
153
|
|
|
134
154
|
# Don't block if majority of 404s are on login paths
|
|
135
155
|
if count > login_404s: # More non-login 404s than login 404s
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
defaults={"reason": f"Excessive 404s (≥6 non-login, {count}/{total_404s})"}
|
|
139
|
-
)
|
|
156
|
+
blacklist_store = get_blacklist_store()
|
|
157
|
+
blacklist_store.add_ip(ip, f"Excessive 404s (≥6 non-login, {count}/{total_404s})")
|
|
140
158
|
|
|
141
159
|
feature_dicts = []
|
|
142
160
|
for r in parsed:
|
|
@@ -187,10 +205,13 @@ def train() -> None:
|
|
|
187
205
|
if anomalous_ips:
|
|
188
206
|
print(f"⚠️ Detected {len(anomalous_ips)} potentially anomalous IPs during training")
|
|
189
207
|
|
|
208
|
+
exemption_store = get_exemption_store()
|
|
209
|
+
blacklist_store = get_blacklist_store()
|
|
190
210
|
blocked_count = 0
|
|
211
|
+
|
|
191
212
|
for ip in anomalous_ips:
|
|
192
213
|
# Skip if IP is exempted
|
|
193
|
-
if
|
|
214
|
+
if exemption_store.is_exempted(ip):
|
|
194
215
|
continue
|
|
195
216
|
|
|
196
217
|
# Get this IP's behavior from the data
|
|
@@ -213,10 +234,7 @@ def train() -> None:
|
|
|
213
234
|
continue
|
|
214
235
|
|
|
215
236
|
# Block if it shows clear signs of malicious behavior
|
|
216
|
-
|
|
217
|
-
ip_address=ip,
|
|
218
|
-
defaults={"reason": f"AI anomaly + suspicious patterns (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})"}
|
|
219
|
-
)
|
|
237
|
+
blacklist_store.add_ip(ip, f"AI anomaly + suspicious patterns (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})")
|
|
220
238
|
blocked_count += 1
|
|
221
239
|
print(f" - {ip}: Blocked for suspicious behavior (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})")
|
|
222
240
|
|
|
@@ -230,8 +248,10 @@ def train() -> None:
|
|
|
230
248
|
if len(seg) > 3 and seg not in STATIC_KW:
|
|
231
249
|
tokens[seg] += 1
|
|
232
250
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
251
|
+
keyword_store = get_keyword_store()
|
|
252
|
+
top_tokens = tokens.most_common(10)
|
|
253
|
+
|
|
254
|
+
for kw, cnt in top_tokens:
|
|
255
|
+
keyword_store.add_keyword(kw, cnt)
|
|
236
256
|
|
|
237
|
-
print(f"DynamicKeyword
|
|
257
|
+
print(f"DynamicKeyword storage updated with top tokens: {[kw for kw, _ in top_tokens]}")
|
aiwaf/utils.py
CHANGED
|
@@ -4,7 +4,7 @@ import glob
|
|
|
4
4
|
import gzip
|
|
5
5
|
from datetime import datetime
|
|
6
6
|
from django.conf import settings
|
|
7
|
-
from .
|
|
7
|
+
from .storage import get_exemption_store
|
|
8
8
|
|
|
9
9
|
_LOG_RX = re.compile(
|
|
10
10
|
r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(GET|POST) (.*?) HTTP/.*?" (\d{3}).*?"(.*?)" "(.*?)"'
|
|
@@ -53,7 +53,8 @@ def parse_log_line(line):
|
|
|
53
53
|
|
|
54
54
|
def is_ip_exempted(ip):
|
|
55
55
|
"""Check if IP is in exemption list"""
|
|
56
|
-
|
|
56
|
+
store = get_exemption_store()
|
|
57
|
+
return store.is_exempted(ip)
|
|
57
58
|
|
|
58
59
|
def is_view_exempt(request):
|
|
59
60
|
"""Check if the current view is marked as AI-WAF exempt"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aiwaf
|
|
3
|
-
Version: 0.1.8.
|
|
3
|
+
Version: 0.1.8.8
|
|
4
4
|
Summary: AI-powered Web Application Firewall
|
|
5
5
|
Home-page: https://github.com/aayushgauba/aiwaf
|
|
6
6
|
Author: Aayush Gauba
|
|
@@ -83,7 +83,28 @@ aiwaf/
|
|
|
83
83
|
- Submit forms faster than `AIWAF_MIN_FORM_TIME` seconds (default: 1 second)
|
|
84
84
|
|
|
85
85
|
- **UUID Tampering Protection**
|
|
86
|
-
Blocks guessed or invalid UUIDs that don
|
|
86
|
+
Blocks guessed or invalid UUIDs that don't resolve to real models.
|
|
87
|
+
|
|
88
|
+
- **Built-in Request Logger**
|
|
89
|
+
Optional middleware logger that captures requests to CSV:
|
|
90
|
+
- **Automatic fallback** when main access logs unavailable
|
|
91
|
+
- **CSV format** for easy analysis and training
|
|
92
|
+
- **Captures response times** for better anomaly detection
|
|
93
|
+
- **Zero configuration** - works out of the box
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
**Exempt Path & IP Awareness**
|
|
97
|
+
|
|
98
|
+
**Exempt Paths:**
|
|
99
|
+
AI‑WAF automatically exempts common login paths (`/admin/`, `/login/`, `/accounts/login/`, etc.) from all blocking mechanisms. You can add additional exempt paths in your Django `settings.py`:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
AIWAF_EXEMPT_PATHS = [
|
|
103
|
+
"/api/webhooks/",
|
|
104
|
+
"/health/",
|
|
105
|
+
"/special-endpoint/",
|
|
106
|
+
]
|
|
107
|
+
```
|
|
87
108
|
|
|
88
109
|
|
|
89
110
|
**Exempt Path & IP Awareness**
|
|
@@ -188,6 +209,67 @@ AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
|
|
|
188
209
|
|
|
189
210
|
---
|
|
190
211
|
|
|
212
|
+
### Storage Configuration
|
|
213
|
+
|
|
214
|
+
**Choose storage backend:**
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
# Use Django models (default) - requires database tables
|
|
218
|
+
AIWAF_STORAGE_MODE = "models"
|
|
219
|
+
|
|
220
|
+
# OR use CSV files - no database required
|
|
221
|
+
AIWAF_STORAGE_MODE = "csv"
|
|
222
|
+
AIWAF_CSV_DATA_DIR = "aiwaf_data" # Directory for CSV files
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
**CSV Mode Features:**
|
|
226
|
+
- No database migrations required
|
|
227
|
+
- Files stored in `aiwaf_data/` directory:
|
|
228
|
+
- `blacklist.csv` - Blocked IP addresses
|
|
229
|
+
- `exemptions.csv` - Exempt IP addresses
|
|
230
|
+
- `keywords.csv` - Dynamic keywords
|
|
231
|
+
- `access_samples.csv` - Feature samples for ML training
|
|
232
|
+
- Perfect for lightweight deployments or when you prefer file-based storage
|
|
233
|
+
- Management commands work identically in both modes
|
|
234
|
+
|
|
235
|
+
---
|
|
236
|
+
|
|
237
|
+
### Built-in Request Logger (Optional)
|
|
238
|
+
|
|
239
|
+
Enable AI-WAF's built-in request logger as a fallback when main access logs aren't available:
|
|
240
|
+
|
|
241
|
+
```python
|
|
242
|
+
# Enable middleware logging
|
|
243
|
+
AIWAF_MIDDLEWARE_LOGGING = True # Enable/disable logging
|
|
244
|
+
AIWAF_MIDDLEWARE_LOG = "aiwaf_requests.log" # Log file path
|
|
245
|
+
AIWAF_MIDDLEWARE_CSV = True # Use CSV format (recommended)
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
**Then add middleware to MIDDLEWARE list:**
|
|
249
|
+
|
|
250
|
+
```python
|
|
251
|
+
MIDDLEWARE = [
|
|
252
|
+
# ... your existing middleware ...
|
|
253
|
+
'aiwaf.middleware_logger.AIWAFLoggerMiddleware', # Add near the end
|
|
254
|
+
]
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
**Manage middleware logging:**
|
|
258
|
+
|
|
259
|
+
```bash
|
|
260
|
+
python manage.py aiwaf_logging --status # Check logging status
|
|
261
|
+
python manage.py aiwaf_logging --enable # Show setup instructions
|
|
262
|
+
python manage.py aiwaf_logging --clear # Clear log files
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
**Benefits:**
|
|
266
|
+
- **Automatic fallback** when `AIWAF_ACCESS_LOG` unavailable
|
|
267
|
+
- **CSV format** with precise timestamps and response times
|
|
268
|
+
- **Zero configuration** - trainer automatically detects and uses CSV logs
|
|
269
|
+
- **Lightweight** - fails silently to avoid breaking your application
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
191
273
|
### Optional (defaults shown)
|
|
192
274
|
|
|
193
275
|
```python
|
|
@@ -219,14 +301,40 @@ Add in **this** order to your `MIDDLEWARE` list:
|
|
|
219
301
|
```python
|
|
220
302
|
MIDDLEWARE = [
|
|
221
303
|
"aiwaf.middleware.IPAndKeywordBlockMiddleware",
|
|
222
|
-
"aiwaf.middleware.RateLimitMiddleware",
|
|
304
|
+
"aiwaf.middleware.RateLimitMiddleware",
|
|
223
305
|
"aiwaf.middleware.AIAnomalyMiddleware",
|
|
224
306
|
"aiwaf.middleware.HoneypotTimingMiddleware",
|
|
225
307
|
"aiwaf.middleware.UUIDTamperMiddleware",
|
|
226
308
|
# ... other middleware ...
|
|
309
|
+
"aiwaf.middleware_logger.AIWAFLoggerMiddleware", # Optional: Add if using built-in logger
|
|
227
310
|
]
|
|
228
311
|
```
|
|
229
312
|
|
|
313
|
+
> **⚠️ Order matters!** AI-WAF protection middleware should come early. The logger middleware should come near the end to capture final response data.
|
|
314
|
+
|
|
315
|
+
---
|
|
316
|
+
|
|
317
|
+
## Running Detection & Training
|
|
318
|
+
|
|
319
|
+
```bash
|
|
320
|
+
python manage.py detect_and_train
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### What happens:
|
|
324
|
+
1. Read access logs (incl. rotated or gzipped) **OR** AI-WAF middleware CSV logs
|
|
325
|
+
2. Auto‑block IPs with ≥ 6 total 404s
|
|
326
|
+
3. Extract features & train IsolationForest
|
|
327
|
+
4. Save `model.pkl`
|
|
328
|
+
5. Extract top 10 dynamic keywords from 4xx/5xx
|
|
329
|
+
6. Remove any keywords associated with newly exempt paths
|
|
330
|
+
|
|
331
|
+
**Note:** If main access log (`AIWAF_ACCESS_LOG`) is unavailable, trainer automatically falls back to AI-WAF middleware CSV logs.
|
|
332
|
+
|
|
333
|
+
---
|
|
334
|
+
|
|
335
|
+
## 🧠 How It Works
|
|
336
|
+
```
|
|
337
|
+
|
|
230
338
|
---
|
|
231
339
|
|
|
232
340
|
## Running Detection & Training
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
aiwaf/__init__.py,sha256=nQFpJ1YpX48snzLjEQCf8zD2YNh8v0b_kPTrXx8uBYc,46
|
|
2
|
+
aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
|
|
3
|
+
aiwaf/blacklist_manager.py,sha256=92ltIrFfv8WOC4CXwvNVZYfivkRZHGNg3E2QAbHQipQ,550
|
|
4
|
+
aiwaf/decorators.py,sha256=IUKOdM_gdroffImRZep1g1wT6gNqD10zGwcp28hsJCs,825
|
|
5
|
+
aiwaf/middleware.py,sha256=1JPrc0npI_a5bnB-thN0ME1ehfTbWBl1j9wTndZwRdQ,9505
|
|
6
|
+
aiwaf/middleware_logger.py,sha256=uTYTvIc4Mv1pjY50aXaqQ5cWAO9qqquijAyVMs1KWlM,6517
|
|
7
|
+
aiwaf/models.py,sha256=XaG1pd_oZu3y-fw66u4wblGlWcUY9gvsTNKGD0kQk7Y,1672
|
|
8
|
+
aiwaf/storage.py,sha256=Z0KWArfLmOHnvUcL5aVx8W_aHMr-qoEW8FVGrM23BvA,11639
|
|
9
|
+
aiwaf/trainer.py,sha256=bgVoBewnNVMJdgxcNchfhsPOnFXxStoBOqNhFYnpsqs,9244
|
|
10
|
+
aiwaf/utils.py,sha256=BJk5vJCYdGPl_4QQiknjhCbkzv5HZCXgFcBJDMJpHok,3390
|
|
11
|
+
aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
aiwaf/management/commands/add_ipexemption.py,sha256=srgdVPDJtF7G9GGIqaZ7L3qTuNheoS_uwlhlRO4W2bc,945
|
|
14
|
+
aiwaf/management/commands/aiwaf_logging.py,sha256=FCIqULn2tii2vD9VxL7vk3PV4k4vr7kaA00KyaCExYY,7692
|
|
15
|
+
aiwaf/management/commands/aiwaf_reset.py,sha256=0FIBqpZS8xgFFvAKJ-0zAC_-QNQwRkOHpXb8N-OdFr8,3740
|
|
16
|
+
aiwaf/management/commands/detect_and_train.py,sha256=-o-LZ7QZ5GeJPCekryox1DGXKMmFEkwwrcDsiM166K0,269
|
|
17
|
+
aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
|
|
18
|
+
aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
aiwaf/templatetags/aiwaf_tags.py,sha256=XXfb7Tl4DjU3Sc40GbqdaqOEtKTUKELBEk58u83wBNw,357
|
|
20
|
+
aiwaf-0.1.8.8.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
|
|
21
|
+
aiwaf-0.1.8.8.dist-info/METADATA,sha256=851Url25O97G0KGi3gHFF-zYSaI91BHiK7CbKJrLbk0,11261
|
|
22
|
+
aiwaf-0.1.8.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
23
|
+
aiwaf-0.1.8.8.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
|
|
24
|
+
aiwaf-0.1.8.8.dist-info/RECORD,,
|
aiwaf-0.1.8.6.dist-info/RECORD
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
aiwaf/__init__.py,sha256=nQFpJ1YpX48snzLjEQCf8zD2YNh8v0b_kPTrXx8uBYc,46
|
|
2
|
-
aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
|
|
3
|
-
aiwaf/blacklist_manager.py,sha256=sM6uTH7zD6MOPGb0kzqV2aFut2vxKgft_UVeRJr7klw,392
|
|
4
|
-
aiwaf/decorators.py,sha256=IUKOdM_gdroffImRZep1g1wT6gNqD10zGwcp28hsJCs,825
|
|
5
|
-
aiwaf/middleware.py,sha256=eMad-wvQWALkH2nIhjssU9Y-AqFleP3Gm0lRu3qE0Bw,9679
|
|
6
|
-
aiwaf/models.py,sha256=XaG1pd_oZu3y-fw66u4wblGlWcUY9gvsTNKGD0kQk7Y,1672
|
|
7
|
-
aiwaf/storage.py,sha256=bxCILzzvA1-q6nwclRE8WrfoRhe25H4VrsQDf0hl_lY,1903
|
|
8
|
-
aiwaf/trainer.py,sha256=R00q_QQ1o2UmdIWMWNh847BGBrnI6j-hfjNalojfnhU,8494
|
|
9
|
-
aiwaf/utils.py,sha256=s-rtUrWQFVv-nuGxe2hz5-LLvB6TbZXKj6do46DwrkA,3376
|
|
10
|
-
aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
aiwaf/management/commands/add_ipexemption.py,sha256=LWN21_ydqSjU3_hUnkou4Ciyrk_479zLvcKdWm8hkC0,988
|
|
13
|
-
aiwaf/management/commands/aiwaf_reset.py,sha256=dUTYX6Z6_X3Ft3lqF_McXE7OdKADlQFGFWvjdvFVZFI,3245
|
|
14
|
-
aiwaf/management/commands/detect_and_train.py,sha256=-o-LZ7QZ5GeJPCekryox1DGXKMmFEkwwrcDsiM166K0,269
|
|
15
|
-
aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
|
|
16
|
-
aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
aiwaf/templatetags/aiwaf_tags.py,sha256=XXfb7Tl4DjU3Sc40GbqdaqOEtKTUKELBEk58u83wBNw,357
|
|
18
|
-
aiwaf-0.1.8.6.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
|
|
19
|
-
aiwaf-0.1.8.6.dist-info/METADATA,sha256=s6gux1GQJsbvphRXyVkVPm63_bVWIXcJcagWtBSlgpE,7955
|
|
20
|
-
aiwaf-0.1.8.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
21
|
-
aiwaf-0.1.8.6.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
|
|
22
|
-
aiwaf-0.1.8.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|