aiwaf 0.1.9.2.1__py3-none-any.whl → 0.1.9.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiwaf might be problematic. Click here for more details.
- aiwaf/__init__.py +1 -1
- aiwaf/management/commands/aiwaf_list.py +81 -0
- aiwaf/middleware.py +64 -6
- aiwaf/trainer.py +169 -34
- {aiwaf-0.1.9.2.1.dist-info → aiwaf-0.1.9.2.3.dist-info}/METADATA +1 -1
- {aiwaf-0.1.9.2.1.dist-info → aiwaf-0.1.9.2.3.dist-info}/RECORD +9 -8
- {aiwaf-0.1.9.2.1.dist-info → aiwaf-0.1.9.2.3.dist-info}/WHEEL +0 -0
- {aiwaf-0.1.9.2.1.dist-info → aiwaf-0.1.9.2.3.dist-info}/licenses/LICENSE +0 -0
- {aiwaf-0.1.9.2.1.dist-info → aiwaf-0.1.9.2.3.dist-info}/top_level.txt +0 -0
aiwaf/__init__.py
CHANGED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from django.core.management.base import BaseCommand
|
|
2
|
+
from django.utils import timezone
|
|
3
|
+
from aiwaf.storage import get_blacklist_store, get_exemption_store, get_keyword_store
|
|
4
|
+
from datetime import timedelta
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
def _sort(items, order):
|
|
8
|
+
reverse = (order == "newest")
|
|
9
|
+
return sorted(items, key=lambda x: x.get("created_at") or timezone.make_aware(timezone.datetime.min),
|
|
10
|
+
reverse=reverse)
|
|
11
|
+
|
|
12
|
+
def _filter_since(items, seconds):
|
|
13
|
+
if not seconds: return items
|
|
14
|
+
cutoff = timezone.now() - timedelta(seconds=seconds)
|
|
15
|
+
return [it for it in items if it.get("created_at") and it["created_at"] >= cutoff]
|
|
16
|
+
|
|
17
|
+
def _print_table(rows, headers):
|
|
18
|
+
widths = [len(h) for h in headers]
|
|
19
|
+
for r in rows:
|
|
20
|
+
for i, cell in enumerate(r):
|
|
21
|
+
widths[i] = max(widths[i], len(str(cell)))
|
|
22
|
+
print(" | ".join(h.ljust(widths[i]) for i, h in enumerate(headers)))
|
|
23
|
+
print("-+-".join("-" * w for w in widths))
|
|
24
|
+
for r in rows:
|
|
25
|
+
print(" | ".join(str(cell).ljust(widths[i]) for i, cell in enumerate(r)))
|
|
26
|
+
|
|
27
|
+
class Command(BaseCommand):
|
|
28
|
+
help = "Lister les données AIWAF (IPs bloquées, exemptions, mots-clés dynamiques)."
|
|
29
|
+
|
|
30
|
+
def add_arguments(self, parser):
|
|
31
|
+
grp = parser.add_mutually_exclusive_group()
|
|
32
|
+
grp.add_argument("--ips", action="store_true", help="Lister les IPs bloquées (défaut).")
|
|
33
|
+
grp.add_argument("--exemptions", action="store_true", help="Lister les IPs exemptées.")
|
|
34
|
+
grp.add_argument("--keywords", action="store_true", help="Lister les mots-clés dynamiques.")
|
|
35
|
+
grp.add_argument("--all", action="store_true", help="Tout lister.")
|
|
36
|
+
parser.add_argument("--format", choices=["table", "json"], default="table")
|
|
37
|
+
parser.add_argument("--limit", type=int, default=100)
|
|
38
|
+
parser.add_argument("--order", choices=["newest", "oldest"], default="newest")
|
|
39
|
+
parser.add_argument("--since", type=int, help="Fenêtre en secondes (ex: 86400 = 24h).")
|
|
40
|
+
|
|
41
|
+
def handle(self, *args, **o):
|
|
42
|
+
if not any([o["exemptions"], o["keywords"], o["all"]]): # défaut = ips
|
|
43
|
+
o["ips"] = True
|
|
44
|
+
payload = {}
|
|
45
|
+
|
|
46
|
+
if o["all"] or o["ips"]:
|
|
47
|
+
data = get_blacklist_store().get_all()
|
|
48
|
+
data = _filter_since(data, o.get("since"))
|
|
49
|
+
data = _sort(data, o["order"])[:o["limit"]]
|
|
50
|
+
payload["ips"] = data
|
|
51
|
+
|
|
52
|
+
if o["all"] or o["exemptions"]:
|
|
53
|
+
data = get_exemption_store().get_all()
|
|
54
|
+
data = _filter_since(data, o.get("since"))
|
|
55
|
+
data = _sort(data, o["order"])[:o["limit"]]
|
|
56
|
+
payload["exemptions"] = data
|
|
57
|
+
|
|
58
|
+
if o["all"] or o["keywords"]:
|
|
59
|
+
kws = get_keyword_store().get_top_keywords(o["limit"])
|
|
60
|
+
payload["keywords"] = [{"keyword": k} for k in kws]
|
|
61
|
+
|
|
62
|
+
if o["format"] == "json":
|
|
63
|
+
def _default(v):
|
|
64
|
+
try: return v.isoformat()
|
|
65
|
+
except Exception: return str(v)
|
|
66
|
+
self.stdout.write(json.dumps(payload, ensure_ascii=False, indent=2, default=_default))
|
|
67
|
+
else:
|
|
68
|
+
if "ips" in payload:
|
|
69
|
+
print("\n== IPs bloquées ==")
|
|
70
|
+
rows = [[r.get("ip_address",""), r.get("reason",""), r.get("created_at","")]
|
|
71
|
+
for r in payload["ips"]]
|
|
72
|
+
_print_table(rows, ["ip_address", "reason", "created_at"])
|
|
73
|
+
if "exemptions" in payload:
|
|
74
|
+
print("\n== Exemptions ==")
|
|
75
|
+
rows = [[r.get("ip_address",""), r.get("reason",""), r.get("created_at","")]
|
|
76
|
+
for r in payload["exemptions"]]
|
|
77
|
+
_print_table(rows, ["ip_address", "reason", "created_at"])
|
|
78
|
+
if "keywords" in payload:
|
|
79
|
+
print("\n== Mots-clés dynamiques ==")
|
|
80
|
+
rows = [[r["keyword"]] for r in payload["keywords"]]
|
|
81
|
+
_print_table(rows, ["keyword"])
|
aiwaf/middleware.py
CHANGED
|
@@ -298,11 +298,15 @@ class IPAndKeywordBlockMiddleware:
|
|
|
298
298
|
keyword_store = get_keyword_store()
|
|
299
299
|
segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
|
|
300
300
|
|
|
301
|
-
#
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
301
|
+
# Smart learning: only learn from suspicious contexts, never from valid paths
|
|
302
|
+
if not path_exists: # Only learn from non-existent paths
|
|
303
|
+
for seg in segments:
|
|
304
|
+
# Only learn if it's not a legitimate keyword AND in a suspicious context
|
|
305
|
+
if (seg not in self.legitimate_path_keywords and
|
|
306
|
+
seg not in self.exempt_keywords and
|
|
307
|
+
self._is_malicious_context(request, seg)):
|
|
308
|
+
keyword_store.add_keyword(seg)
|
|
309
|
+
|
|
306
310
|
dynamic_top = keyword_store.get_top_keywords(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
|
|
307
311
|
all_kw = set(STATIC_KW) | set(dynamic_top)
|
|
308
312
|
|
|
@@ -345,7 +349,29 @@ class IPAndKeywordBlockMiddleware:
|
|
|
345
349
|
block_reason = f"Inherently suspicious: {seg}"
|
|
346
350
|
|
|
347
351
|
if is_suspicious:
|
|
348
|
-
# Additional context check before blocking
|
|
352
|
+
# Additional context check before blocking - be more conservative with valid paths
|
|
353
|
+
if path_exists:
|
|
354
|
+
# For valid paths, only block if there are VERY strong malicious indicators
|
|
355
|
+
very_strong_indicators = [
|
|
356
|
+
# Multiple attack patterns in same request
|
|
357
|
+
sum([
|
|
358
|
+
'../' in request.path, '..\\' in request.path,
|
|
359
|
+
any(param in request.GET for param in ['cmd', 'exec', 'system']),
|
|
360
|
+
request.path.count('%') > 5, # Heavy URL encoding
|
|
361
|
+
len([s for s in segments if s in self.malicious_keywords]) > 2
|
|
362
|
+
]) >= 2,
|
|
363
|
+
|
|
364
|
+
# Obvious attack attempts on valid paths
|
|
365
|
+
any(attack in request.path.lower() for attack in [
|
|
366
|
+
'union+select', 'drop+table', '<script', 'javascript:',
|
|
367
|
+
'onload=', 'onerror=', '${', '{{', 'eval('
|
|
368
|
+
])
|
|
369
|
+
]
|
|
370
|
+
|
|
371
|
+
if not any(very_strong_indicators):
|
|
372
|
+
continue # Skip blocking for valid paths without very strong indicators
|
|
373
|
+
|
|
374
|
+
# For non-existent paths or paths with very strong indicators, proceed with blocking
|
|
349
375
|
if self._is_malicious_context(request, seg) or not path_exists:
|
|
350
376
|
# Double-check exemption before blocking
|
|
351
377
|
if not exemption_store.is_exempted(ip):
|
|
@@ -405,6 +431,38 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
405
431
|
# Use the safely loaded global MODEL instead of loading again
|
|
406
432
|
self.model = MODEL
|
|
407
433
|
|
|
434
|
+
def _is_malicious_context(self, request, keyword):
|
|
435
|
+
"""
|
|
436
|
+
Determine if a keyword appears in a malicious context.
|
|
437
|
+
Only learn keywords when we have strong indicators of malicious intent.
|
|
438
|
+
"""
|
|
439
|
+
# Don't learn from valid Django paths
|
|
440
|
+
if path_exists_in_django(request.path):
|
|
441
|
+
return False
|
|
442
|
+
|
|
443
|
+
# Strong malicious indicators
|
|
444
|
+
malicious_indicators = [
|
|
445
|
+
# Multiple consecutive suspicious segments
|
|
446
|
+
len([seg for seg in re.split(r"\W+", request.path) if seg in self.malicious_keywords]) > 1,
|
|
447
|
+
|
|
448
|
+
# Common attack patterns
|
|
449
|
+
any(pattern in request.path.lower() for pattern in [
|
|
450
|
+
'../', '..\\', '.env', 'wp-admin', 'phpmyadmin', 'config',
|
|
451
|
+
'backup', 'database', 'mysql', 'passwd', 'shadow'
|
|
452
|
+
]),
|
|
453
|
+
|
|
454
|
+
# Suspicious query parameters
|
|
455
|
+
any(param in request.GET for param in ['cmd', 'exec', 'system', 'shell']),
|
|
456
|
+
|
|
457
|
+
# Multiple directory traversal attempts
|
|
458
|
+
request.path.count('../') > 2 or request.path.count('..\\') > 2,
|
|
459
|
+
|
|
460
|
+
# Encoded attack patterns
|
|
461
|
+
any(encoded in request.path for encoded in ['%2e%2e', '%252e', '%c0%ae']),
|
|
462
|
+
]
|
|
463
|
+
|
|
464
|
+
return any(malicious_indicators)
|
|
465
|
+
|
|
408
466
|
def process_request(self, request):
|
|
409
467
|
# First exemption check - early exit for exempt requests
|
|
410
468
|
if is_exempt(request):
|
aiwaf/trainer.py
CHANGED
|
@@ -34,19 +34,34 @@ def path_exists_in_django(path: str) -> bool:
|
|
|
34
34
|
from django.urls import get_resolver
|
|
35
35
|
from django.urls.resolvers import URLResolver
|
|
36
36
|
|
|
37
|
-
candidate = path.split("?")[0].
|
|
37
|
+
candidate = path.split("?")[0].strip("/") # Remove query params and normalize slashes
|
|
38
|
+
|
|
39
|
+
# Try exact resolution first - this is the most reliable method
|
|
38
40
|
try:
|
|
39
41
|
get_resolver().resolve(f"/{candidate}")
|
|
40
42
|
return True
|
|
41
43
|
except:
|
|
42
44
|
pass
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
45
|
+
|
|
46
|
+
# Also try with trailing slash if it doesn't have one
|
|
47
|
+
if not candidate.endswith("/"):
|
|
48
|
+
try:
|
|
49
|
+
get_resolver().resolve(f"/{candidate}/")
|
|
50
|
+
return True
|
|
51
|
+
except:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
# Try without trailing slash if it has one
|
|
55
|
+
if candidate.endswith("/"):
|
|
56
|
+
try:
|
|
57
|
+
get_resolver().resolve(f"/{candidate.rstrip('/')}")
|
|
58
|
+
return True
|
|
59
|
+
except:
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
# If direct resolution fails, be conservative
|
|
63
|
+
# Only do basic prefix matching for known include patterns
|
|
64
|
+
# but don't assume sub-paths exist just because the prefix exists
|
|
50
65
|
return False
|
|
51
66
|
|
|
52
67
|
|
|
@@ -81,7 +96,7 @@ def get_legitimate_keywords() -> set:
|
|
|
81
96
|
"""Get all legitimate keywords that shouldn't be learned as suspicious"""
|
|
82
97
|
legitimate = set()
|
|
83
98
|
|
|
84
|
-
# Common legitimate path segments
|
|
99
|
+
# Common legitimate path segments - expanded set
|
|
85
100
|
default_legitimate = {
|
|
86
101
|
"profile", "user", "users", "account", "accounts", "settings", "dashboard",
|
|
87
102
|
"home", "about", "contact", "help", "search", "list", "lists",
|
|
@@ -91,7 +106,32 @@ def get_legitimate_keywords() -> set:
|
|
|
91
106
|
"category", "categories", "tag", "tags", "post", "posts",
|
|
92
107
|
"article", "articles", "blog", "blogs", "news", "item", "items",
|
|
93
108
|
"admin", "administration", "manage", "manager", "control", "panel",
|
|
94
|
-
"config", "configuration", "option", "options", "preference", "preferences"
|
|
109
|
+
"config", "configuration", "option", "options", "preference", "preferences",
|
|
110
|
+
|
|
111
|
+
# Django built-in app keywords
|
|
112
|
+
"contenttypes", "contenttype", "sessions", "session", "messages", "message",
|
|
113
|
+
"staticfiles", "static", "sites", "site", "flatpages", "flatpage",
|
|
114
|
+
"redirects", "redirect", "permissions", "permission", "groups", "group",
|
|
115
|
+
|
|
116
|
+
# Common third-party package keywords
|
|
117
|
+
"token", "tokens", "oauth", "social", "rest", "framework", "cors",
|
|
118
|
+
"debug", "toolbar", "extensions", "allauth", "crispy", "forms",
|
|
119
|
+
"channels", "celery", "redis", "cache", "email", "mail",
|
|
120
|
+
|
|
121
|
+
# Common API/web development terms
|
|
122
|
+
"endpoint", "endpoints", "resource", "resources", "data", "export",
|
|
123
|
+
"import", "upload", "download", "file", "files", "media", "images",
|
|
124
|
+
"documents", "reports", "analytics", "stats", "statistics",
|
|
125
|
+
|
|
126
|
+
# Common business/application terms
|
|
127
|
+
"customer", "customers", "client", "clients", "company", "companies",
|
|
128
|
+
"department", "departments", "employee", "employees", "team", "teams",
|
|
129
|
+
"project", "projects", "task", "tasks", "event", "events",
|
|
130
|
+
"notification", "notifications", "alert", "alerts",
|
|
131
|
+
|
|
132
|
+
# Language/localization
|
|
133
|
+
"language", "languages", "locale", "locales", "translation", "translations",
|
|
134
|
+
"en", "fr", "de", "es", "it", "pt", "ru", "ja", "zh", "ko"
|
|
95
135
|
}
|
|
96
136
|
legitimate.update(default_legitimate)
|
|
97
137
|
|
|
@@ -120,30 +160,41 @@ def _extract_django_route_keywords() -> set:
|
|
|
120
160
|
|
|
121
161
|
# Extract from app names and labels
|
|
122
162
|
for app_config in apps.get_app_configs():
|
|
123
|
-
# Add app name and label
|
|
163
|
+
# Add app name and label - improved parsing
|
|
124
164
|
if app_config.name:
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
165
|
+
app_parts = app_config.name.lower().replace('-', '_').split('.')
|
|
166
|
+
for part in app_parts:
|
|
167
|
+
for segment in re.split(r'[._-]', part):
|
|
168
|
+
if len(segment) > 2:
|
|
169
|
+
keywords.add(segment)
|
|
128
170
|
|
|
129
171
|
if app_config.label and app_config.label != app_config.name:
|
|
130
172
|
for segment in re.split(r'[._-]', app_config.label.lower()):
|
|
131
173
|
if len(segment) > 2:
|
|
132
174
|
keywords.add(segment)
|
|
133
175
|
|
|
134
|
-
# Extract from model names in the app
|
|
176
|
+
# Extract from model names in the app - improved handling
|
|
135
177
|
try:
|
|
136
178
|
for model in app_config.get_models():
|
|
137
179
|
model_name = model._meta.model_name.lower()
|
|
138
180
|
if len(model_name) > 2:
|
|
139
181
|
keywords.add(model_name)
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
182
|
+
# Add plural form
|
|
183
|
+
if not model_name.endswith('s'):
|
|
184
|
+
keywords.add(f"{model_name}s")
|
|
185
|
+
|
|
186
|
+
# Also add verbose names if different
|
|
187
|
+
verbose_name = str(model._meta.verbose_name).lower()
|
|
188
|
+
verbose_name_plural = str(model._meta.verbose_name_plural).lower()
|
|
189
|
+
|
|
190
|
+
for name in [verbose_name, verbose_name_plural]:
|
|
191
|
+
for segment in re.split(r'[^a-zA-Z]+', name):
|
|
192
|
+
if len(segment) > 2 and segment != model_name:
|
|
193
|
+
keywords.add(segment)
|
|
143
194
|
except Exception:
|
|
144
195
|
continue
|
|
145
196
|
|
|
146
|
-
# Extract from URL patterns
|
|
197
|
+
# Extract from URL patterns - improved extraction
|
|
147
198
|
def extract_from_pattern(pattern, prefix=""):
|
|
148
199
|
try:
|
|
149
200
|
if isinstance(pattern, URLResolver):
|
|
@@ -154,26 +205,41 @@ def _extract_django_route_keywords() -> set:
|
|
|
154
205
|
if len(segment) > 2:
|
|
155
206
|
keywords.add(segment)
|
|
156
207
|
|
|
157
|
-
# Extract from the pattern itself
|
|
208
|
+
# Extract from the pattern itself - more comprehensive
|
|
158
209
|
pattern_str = str(pattern.pattern)
|
|
159
|
-
|
|
160
|
-
|
|
210
|
+
# Get literal path segments (not regex parts)
|
|
211
|
+
literal_parts = re.findall(r'([a-zA-Z][a-zA-Z0-9_-]*)', pattern_str)
|
|
212
|
+
for part in literal_parts:
|
|
213
|
+
if len(part) > 2:
|
|
214
|
+
keywords.add(part.lower())
|
|
161
215
|
|
|
162
216
|
# Recurse into nested patterns
|
|
163
|
-
|
|
164
|
-
|
|
217
|
+
try:
|
|
218
|
+
for nested_pattern in pattern.url_patterns:
|
|
219
|
+
extract_from_pattern(nested_pattern, prefix)
|
|
220
|
+
except:
|
|
221
|
+
pass
|
|
165
222
|
|
|
166
223
|
elif isinstance(pattern, URLPattern):
|
|
167
|
-
# Extract from URL pattern
|
|
224
|
+
# Extract from URL pattern - more comprehensive
|
|
168
225
|
pattern_str = str(pattern.pattern)
|
|
169
|
-
|
|
170
|
-
|
|
226
|
+
literal_parts = re.findall(r'([a-zA-Z][a-zA-Z0-9_-]*)', pattern_str)
|
|
227
|
+
for part in literal_parts:
|
|
228
|
+
if len(part) > 2:
|
|
229
|
+
keywords.add(part.lower())
|
|
171
230
|
|
|
172
231
|
# Extract from view name if available
|
|
173
232
|
if hasattr(pattern.callback, '__name__'):
|
|
174
233
|
view_name = pattern.callback.__name__.lower()
|
|
175
234
|
for segment in re.split(r'[._-]', view_name):
|
|
176
|
-
if len(segment) > 2 and segment
|
|
235
|
+
if len(segment) > 2 and segment not in ['view', 'class', 'function']:
|
|
236
|
+
keywords.add(segment)
|
|
237
|
+
|
|
238
|
+
# Extract from view class name if it's a class-based view
|
|
239
|
+
if hasattr(pattern.callback, 'view_class'):
|
|
240
|
+
class_name = pattern.callback.view_class.__name__.lower()
|
|
241
|
+
for segment in re.split(r'[._-]', class_name):
|
|
242
|
+
if len(segment) > 2 and segment not in ['view', 'class']:
|
|
177
243
|
keywords.add(segment)
|
|
178
244
|
|
|
179
245
|
except Exception:
|
|
@@ -188,10 +254,20 @@ def _extract_django_route_keywords() -> set:
|
|
|
188
254
|
print(f"Warning: Could not extract Django route keywords: {e}")
|
|
189
255
|
|
|
190
256
|
# Filter out very common/generic words that might be suspicious
|
|
257
|
+
# Expanded filter list
|
|
191
258
|
filtered_keywords = set()
|
|
259
|
+
exclude_words = {
|
|
260
|
+
'www', 'com', 'org', 'net', 'int', 'str', 'obj', 'get', 'set', 'put', 'del',
|
|
261
|
+
'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had', 'her',
|
|
262
|
+
'was', 'one', 'our', 'out', 'day', 'had', 'has', 'his', 'how', 'man', 'new',
|
|
263
|
+
'now', 'old', 'see', 'two', 'who', 'boy', 'did', 'its', 'let', 'put', 'say',
|
|
264
|
+
'she', 'too', 'use', 'var', 'way', 'may', 'end', 'why', 'any', 'app', 'run'
|
|
265
|
+
}
|
|
266
|
+
|
|
192
267
|
for keyword in keywords:
|
|
193
268
|
if (len(keyword) >= 3 and
|
|
194
|
-
keyword not in
|
|
269
|
+
keyword not in exclude_words and
|
|
270
|
+
not keyword.isdigit()):
|
|
195
271
|
filtered_keywords.add(keyword)
|
|
196
272
|
|
|
197
273
|
if filtered_keywords:
|
|
@@ -272,6 +348,50 @@ def _parse(line: str) -> dict | None:
|
|
|
272
348
|
}
|
|
273
349
|
|
|
274
350
|
|
|
351
|
+
def _is_malicious_context_trainer(path: str, keyword: str, status: str = "404") -> bool:
|
|
352
|
+
"""
|
|
353
|
+
Determine if a keyword from log analysis appears in a malicious context.
|
|
354
|
+
This is the trainer version of the middleware's _is_malicious_context method.
|
|
355
|
+
"""
|
|
356
|
+
# Don't learn from valid Django paths
|
|
357
|
+
if path_exists_in_django(path):
|
|
358
|
+
return False
|
|
359
|
+
|
|
360
|
+
# Strong malicious indicators for log analysis
|
|
361
|
+
malicious_indicators = [
|
|
362
|
+
# Multiple suspicious segments in path
|
|
363
|
+
len([seg for seg in re.split(r"\W+", path) if seg in STATIC_KW]) > 1,
|
|
364
|
+
|
|
365
|
+
# Common attack patterns
|
|
366
|
+
any(pattern in path.lower() for pattern in [
|
|
367
|
+
'../', '..\\', '.env', 'wp-admin', 'phpmyadmin', 'config',
|
|
368
|
+
'backup', 'database', 'mysql', 'passwd', 'shadow', 'xmlrpc',
|
|
369
|
+
'shell', 'cmd', 'exec', 'eval', 'system'
|
|
370
|
+
]),
|
|
371
|
+
|
|
372
|
+
# Path indicates obvious attack attempt
|
|
373
|
+
any(attack in path.lower() for attack in [
|
|
374
|
+
'union+select', 'drop+table', '<script', 'javascript:',
|
|
375
|
+
'${', '{{', 'onload=', 'onerror=', 'file://', 'http://'
|
|
376
|
+
]),
|
|
377
|
+
|
|
378
|
+
# Multiple directory traversal attempts
|
|
379
|
+
path.count('../') > 1 or path.count('..\\') > 1,
|
|
380
|
+
|
|
381
|
+
# Encoded attack patterns
|
|
382
|
+
any(encoded in path for encoded in ['%2e%2e', '%252e', '%c0%ae', '%3c%73%63%72%69%70%74']),
|
|
383
|
+
|
|
384
|
+
# 404 status with suspicious characteristics
|
|
385
|
+
status == "404" and (
|
|
386
|
+
len(path) > 50 or # Very long paths are often attacks
|
|
387
|
+
path.count('/') > 10 or # Too many directory levels
|
|
388
|
+
any(c in path for c in ['<', '>', '{', '}', '$', '`']) # Special characters
|
|
389
|
+
),
|
|
390
|
+
]
|
|
391
|
+
|
|
392
|
+
return any(malicious_indicators)
|
|
393
|
+
|
|
394
|
+
|
|
275
395
|
def train() -> None:
|
|
276
396
|
"""Enhanced training with improved keyword filtering and exemption handling"""
|
|
277
397
|
print("🚀 Starting AIWAF enhanced training...")
|
|
@@ -436,28 +556,43 @@ def train() -> None:
|
|
|
436
556
|
for seg in re.split(r"\W+", r["path"].lower()):
|
|
437
557
|
if (len(seg) > 3 and
|
|
438
558
|
seg not in STATIC_KW and
|
|
439
|
-
seg not in legitimate_keywords
|
|
559
|
+
seg not in legitimate_keywords and # Don't learn legitimate keywords
|
|
560
|
+
_is_malicious_context_trainer(r["path"], seg, r["status"])): # Smart context check
|
|
440
561
|
tokens[seg] += 1
|
|
441
562
|
|
|
442
563
|
keyword_store = get_keyword_store()
|
|
443
564
|
top_tokens = tokens.most_common(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
|
|
444
565
|
|
|
445
|
-
# Additional filtering: only add keywords that appear suspicious enough
|
|
566
|
+
# Additional filtering: only add keywords that appear suspicious enough AND in malicious context
|
|
446
567
|
filtered_tokens = []
|
|
568
|
+
learned_from_paths = [] # Track which paths we learned from
|
|
569
|
+
|
|
447
570
|
for kw, cnt in top_tokens:
|
|
448
|
-
#
|
|
571
|
+
# Find example paths where this keyword appeared
|
|
572
|
+
example_paths = [r["path"] for r in parsed
|
|
573
|
+
if kw in r["path"].lower() and
|
|
574
|
+
r["status"].startswith(("4", "5")) and
|
|
575
|
+
not path_exists_in_django(r["path"])]
|
|
576
|
+
|
|
577
|
+
# Only add if keyword appears in malicious contexts
|
|
449
578
|
if (cnt >= 2 and # Must appear at least twice
|
|
450
579
|
len(kw) >= 4 and # Must be at least 4 characters
|
|
451
|
-
kw not in legitimate_keywords
|
|
580
|
+
kw not in legitimate_keywords and # Not in legitimate set
|
|
581
|
+
example_paths and # Has example paths
|
|
582
|
+
any(_is_malicious_context_trainer(path, kw) for path in example_paths[:3])): # Check first 3 paths
|
|
583
|
+
|
|
452
584
|
filtered_tokens.append((kw, cnt))
|
|
453
585
|
keyword_store.add_keyword(kw, cnt)
|
|
586
|
+
learned_from_paths.extend(example_paths[:2]) # Track first 2 example paths
|
|
454
587
|
|
|
455
588
|
if filtered_tokens:
|
|
456
589
|
print(f"📝 Added {len(filtered_tokens)} suspicious keywords: {[kw for kw, _ in filtered_tokens]}")
|
|
590
|
+
print(f"🎯 Example malicious paths learned from: {learned_from_paths[:5]}") # Show first 5
|
|
457
591
|
else:
|
|
458
592
|
print("✅ No new suspicious keywords learned (good sign!)")
|
|
459
593
|
|
|
460
|
-
print(f"🎯
|
|
594
|
+
print(f"🎯 Smart keyword learning complete. Excluded {len(legitimate_keywords)} legitimate keywords.")
|
|
595
|
+
print(f"🔒 Used malicious context analysis to filter out false positives.")
|
|
461
596
|
|
|
462
597
|
# Training summary
|
|
463
598
|
print("\n" + "="*60)
|
|
@@ -1,18 +1,19 @@
|
|
|
1
|
-
aiwaf/__init__.py,sha256=
|
|
1
|
+
aiwaf/__init__.py,sha256=oJ3sGVirmahdoT5DpCTp_liJkyeRv9FllrYYKRkThnU,220
|
|
2
2
|
aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
|
|
3
3
|
aiwaf/blacklist_manager.py,sha256=LYCeKFB-7e_C6Bg2WeFJWFIIQlrfRMPuGp30ivrnhQY,1196
|
|
4
4
|
aiwaf/decorators.py,sha256=IUKOdM_gdroffImRZep1g1wT6gNqD10zGwcp28hsJCs,825
|
|
5
|
-
aiwaf/middleware.py,sha256
|
|
5
|
+
aiwaf/middleware.py,sha256=-w_uOaZgakFoJkvmJUB7atqcYQr3nSd9HbSKlP8_178,30370
|
|
6
6
|
aiwaf/middleware_logger.py,sha256=LWZVDAnjh6CGESirA8eMbhGgJKB7lVDGRQqVroH95Lo,4742
|
|
7
7
|
aiwaf/models.py,sha256=vQxgY19BDVMjoO903UNrTZC1pNoLltMU6wbyWPoAEns,2719
|
|
8
8
|
aiwaf/storage.py,sha256=5ImrZMRn3u7HNsPH0fDjWhDrD2tgG2IHVnOXtLz0fk4,10253
|
|
9
|
-
aiwaf/trainer.py,sha256=
|
|
9
|
+
aiwaf/trainer.py,sha256=E9jNPq1EHJkKpX1loZrUd2BDBAvH79w_Ltbdb1fsc0Q,25259
|
|
10
10
|
aiwaf/utils.py,sha256=BJk5vJCYdGPl_4QQiknjhCbkzv5HZCXgFcBJDMJpHok,3390
|
|
11
11
|
aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
aiwaf/management/commands/add_exemption.py,sha256=U_ByfJw1EstAZ8DaSoRb97IGwYzXs0DBJkVAqeN4Wak,1128
|
|
14
14
|
aiwaf/management/commands/add_ipexemption.py,sha256=sSf3d9hGK9RqqlBYkCrnrd8KZWGT-derSpoWnEY4H60,952
|
|
15
15
|
aiwaf/management/commands/aiwaf_diagnose.py,sha256=nXFRhq66N4QC3e4scYJ2sUngJce-0yDxtBO3R2BllRM,6134
|
|
16
|
+
aiwaf/management/commands/aiwaf_list.py,sha256=tZK3FugApmPxxvmoB4-nLY9fpZJgiRtD137Bre5hEp8,3839
|
|
16
17
|
aiwaf/management/commands/aiwaf_logging.py,sha256=FCIqULn2tii2vD9VxL7vk3PV4k4vr7kaA00KyaCExYY,7692
|
|
17
18
|
aiwaf/management/commands/aiwaf_reset.py,sha256=pcF0zOYDSqjpCwDtk2HYJZLgr76td8OFRENtl20c1dQ,7472
|
|
18
19
|
aiwaf/management/commands/check_dependencies.py,sha256=GOZl00pDwW2cJjDvIaCeB3yWxmeYcJDRTIpmOTLvy2c,37204
|
|
@@ -28,8 +29,8 @@ aiwaf/management/commands/test_exemption_fix.py,sha256=ngyGaHUCmQQ6y--6j4q1viZJt
|
|
|
28
29
|
aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
|
|
29
30
|
aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
31
|
aiwaf/templatetags/aiwaf_tags.py,sha256=XXfb7Tl4DjU3Sc40GbqdaqOEtKTUKELBEk58u83wBNw,357
|
|
31
|
-
aiwaf-0.1.9.2.
|
|
32
|
-
aiwaf-0.1.9.2.
|
|
33
|
-
aiwaf-0.1.9.2.
|
|
34
|
-
aiwaf-0.1.9.2.
|
|
35
|
-
aiwaf-0.1.9.2.
|
|
32
|
+
aiwaf-0.1.9.2.3.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
|
|
33
|
+
aiwaf-0.1.9.2.3.dist-info/METADATA,sha256=nLqJ4rOXO6IFxBr_0EBdnlYRk824Uii9KYLnsObfJx0,26824
|
|
34
|
+
aiwaf-0.1.9.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
35
|
+
aiwaf-0.1.9.2.3.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
|
|
36
|
+
aiwaf-0.1.9.2.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|