aiwaf 0.1.9.2.2__tar.gz → 0.1.9.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiwaf might be problematic. Click here for more details.
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/PKG-INFO +1 -1
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/__init__.py +1 -1
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/middleware.py +77 -10
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/trainer.py +164 -27
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf.egg-info/PKG-INFO +1 -1
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/pyproject.toml +1 -1
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/setup.py +1 -1
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/LICENSE +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/README.md +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/apps.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/blacklist_manager.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/decorators.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/__init__.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/__init__.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/add_exemption.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/add_ipexemption.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/aiwaf_diagnose.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/aiwaf_list.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/aiwaf_logging.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/aiwaf_reset.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/check_dependencies.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/clear_blacklist.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/clear_cache.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/debug_csv.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/detect_and_train.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/diagnose_blocking.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/regenerate_model.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/setup_models.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/test_exemption.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/management/commands/test_exemption_fix.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/middleware_logger.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/models.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/resources/model.pkl +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/storage.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/templatetags/__init__.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/templatetags/aiwaf_tags.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf/utils.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf.egg-info/SOURCES.txt +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf.egg-info/dependency_links.txt +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf.egg-info/requires.txt +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/aiwaf.egg-info/top_level.txt +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.4}/setup.cfg +0 -0
|
@@ -179,17 +179,26 @@ class IPAndKeywordBlockMiddleware:
|
|
|
179
179
|
def extract_from_pattern(pattern, prefix=""):
|
|
180
180
|
try:
|
|
181
181
|
if isinstance(pattern, URLResolver):
|
|
182
|
-
# Handle include() patterns
|
|
182
|
+
# Handle include() patterns - be permissive for URL prefixes that route to apps
|
|
183
183
|
namespace = getattr(pattern, 'namespace', None)
|
|
184
184
|
if namespace:
|
|
185
185
|
for segment in re.split(r'[._-]', namespace.lower()):
|
|
186
186
|
if len(segment) > 2:
|
|
187
187
|
keywords.add(segment)
|
|
188
188
|
|
|
189
|
-
# Extract from the pattern itself
|
|
189
|
+
# Extract from the pattern itself - improved logic for include() patterns
|
|
190
190
|
pattern_str = str(pattern.pattern)
|
|
191
|
-
|
|
192
|
-
|
|
191
|
+
# Get literal path segments (not regex parts)
|
|
192
|
+
literal_parts = re.findall(r'([a-zA-Z][a-zA-Z0-9_-]*)', pattern_str)
|
|
193
|
+
|
|
194
|
+
# For include() patterns, be more permissive since they're routing to existing apps
|
|
195
|
+
# The key insight: if someone includes an app's URLs, the prefix is legitimate by design
|
|
196
|
+
for part in literal_parts:
|
|
197
|
+
if len(part) > 2:
|
|
198
|
+
part_lower = part.lower()
|
|
199
|
+
# For URLResolver (include patterns), be more permissive
|
|
200
|
+
# These are URL prefixes that route to actual app functionality
|
|
201
|
+
keywords.add(part_lower)
|
|
193
202
|
|
|
194
203
|
# Recurse into nested patterns
|
|
195
204
|
for nested_pattern in pattern.url_patterns:
|
|
@@ -298,11 +307,15 @@ class IPAndKeywordBlockMiddleware:
|
|
|
298
307
|
keyword_store = get_keyword_store()
|
|
299
308
|
segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
|
|
300
309
|
|
|
301
|
-
#
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
310
|
+
# Smart learning: only learn from suspicious contexts, never from valid paths
|
|
311
|
+
if not path_exists: # Only learn from non-existent paths
|
|
312
|
+
for seg in segments:
|
|
313
|
+
# Only learn if it's not a legitimate keyword AND in a suspicious context
|
|
314
|
+
if (seg not in self.legitimate_path_keywords and
|
|
315
|
+
seg not in self.exempt_keywords and
|
|
316
|
+
self._is_malicious_context(request, seg)):
|
|
317
|
+
keyword_store.add_keyword(seg)
|
|
318
|
+
|
|
306
319
|
dynamic_top = keyword_store.get_top_keywords(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
|
|
307
320
|
all_kw = set(STATIC_KW) | set(dynamic_top)
|
|
308
321
|
|
|
@@ -345,7 +358,29 @@ class IPAndKeywordBlockMiddleware:
|
|
|
345
358
|
block_reason = f"Inherently suspicious: {seg}"
|
|
346
359
|
|
|
347
360
|
if is_suspicious:
|
|
348
|
-
# Additional context check before blocking
|
|
361
|
+
# Additional context check before blocking - be more conservative with valid paths
|
|
362
|
+
if path_exists:
|
|
363
|
+
# For valid paths, only block if there are VERY strong malicious indicators
|
|
364
|
+
very_strong_indicators = [
|
|
365
|
+
# Multiple attack patterns in same request
|
|
366
|
+
sum([
|
|
367
|
+
'../' in request.path, '..\\' in request.path,
|
|
368
|
+
any(param in request.GET for param in ['cmd', 'exec', 'system']),
|
|
369
|
+
request.path.count('%') > 5, # Heavy URL encoding
|
|
370
|
+
len([s for s in segments if s in self.malicious_keywords]) > 2
|
|
371
|
+
]) >= 2,
|
|
372
|
+
|
|
373
|
+
# Obvious attack attempts on valid paths
|
|
374
|
+
any(attack in request.path.lower() for attack in [
|
|
375
|
+
'union+select', 'drop+table', '<script', 'javascript:',
|
|
376
|
+
'onload=', 'onerror=', '${', '{{', 'eval('
|
|
377
|
+
])
|
|
378
|
+
]
|
|
379
|
+
|
|
380
|
+
if not any(very_strong_indicators):
|
|
381
|
+
continue # Skip blocking for valid paths without very strong indicators
|
|
382
|
+
|
|
383
|
+
# For non-existent paths or paths with very strong indicators, proceed with blocking
|
|
349
384
|
if self._is_malicious_context(request, seg) or not path_exists:
|
|
350
385
|
# Double-check exemption before blocking
|
|
351
386
|
if not exemption_store.is_exempted(ip):
|
|
@@ -405,6 +440,38 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
405
440
|
# Use the safely loaded global MODEL instead of loading again
|
|
406
441
|
self.model = MODEL
|
|
407
442
|
|
|
443
|
+
def _is_malicious_context(self, request, keyword):
|
|
444
|
+
"""
|
|
445
|
+
Determine if a keyword appears in a malicious context.
|
|
446
|
+
Only learn keywords when we have strong indicators of malicious intent.
|
|
447
|
+
"""
|
|
448
|
+
# Don't learn from valid Django paths
|
|
449
|
+
if path_exists_in_django(request.path):
|
|
450
|
+
return False
|
|
451
|
+
|
|
452
|
+
# Strong malicious indicators
|
|
453
|
+
malicious_indicators = [
|
|
454
|
+
# Multiple consecutive suspicious segments
|
|
455
|
+
len([seg for seg in re.split(r"\W+", request.path) if seg in self.malicious_keywords]) > 1,
|
|
456
|
+
|
|
457
|
+
# Common attack patterns
|
|
458
|
+
any(pattern in request.path.lower() for pattern in [
|
|
459
|
+
'../', '..\\', '.env', 'wp-admin', 'phpmyadmin', 'config',
|
|
460
|
+
'backup', 'database', 'mysql', 'passwd', 'shadow'
|
|
461
|
+
]),
|
|
462
|
+
|
|
463
|
+
# Suspicious query parameters
|
|
464
|
+
any(param in request.GET for param in ['cmd', 'exec', 'system', 'shell']),
|
|
465
|
+
|
|
466
|
+
# Multiple directory traversal attempts
|
|
467
|
+
request.path.count('../') > 2 or request.path.count('..\\') > 2,
|
|
468
|
+
|
|
469
|
+
# Encoded attack patterns
|
|
470
|
+
any(encoded in request.path for encoded in ['%2e%2e', '%252e', '%c0%ae']),
|
|
471
|
+
]
|
|
472
|
+
|
|
473
|
+
return any(malicious_indicators)
|
|
474
|
+
|
|
408
475
|
def process_request(self, request):
|
|
409
476
|
# First exemption check - early exit for exempt requests
|
|
410
477
|
if is_exempt(request):
|
|
@@ -96,7 +96,7 @@ def get_legitimate_keywords() -> set:
|
|
|
96
96
|
"""Get all legitimate keywords that shouldn't be learned as suspicious"""
|
|
97
97
|
legitimate = set()
|
|
98
98
|
|
|
99
|
-
# Common legitimate path segments
|
|
99
|
+
# Common legitimate path segments - expanded set
|
|
100
100
|
default_legitimate = {
|
|
101
101
|
"profile", "user", "users", "account", "accounts", "settings", "dashboard",
|
|
102
102
|
"home", "about", "contact", "help", "search", "list", "lists",
|
|
@@ -106,7 +106,32 @@ def get_legitimate_keywords() -> set:
|
|
|
106
106
|
"category", "categories", "tag", "tags", "post", "posts",
|
|
107
107
|
"article", "articles", "blog", "blogs", "news", "item", "items",
|
|
108
108
|
"admin", "administration", "manage", "manager", "control", "panel",
|
|
109
|
-
"config", "configuration", "option", "options", "preference", "preferences"
|
|
109
|
+
"config", "configuration", "option", "options", "preference", "preferences",
|
|
110
|
+
|
|
111
|
+
# Django built-in app keywords
|
|
112
|
+
"contenttypes", "contenttype", "sessions", "session", "messages", "message",
|
|
113
|
+
"staticfiles", "static", "sites", "site", "flatpages", "flatpage",
|
|
114
|
+
"redirects", "redirect", "permissions", "permission", "groups", "group",
|
|
115
|
+
|
|
116
|
+
# Common third-party package keywords
|
|
117
|
+
"token", "tokens", "oauth", "social", "rest", "framework", "cors",
|
|
118
|
+
"debug", "toolbar", "extensions", "allauth", "crispy", "forms",
|
|
119
|
+
"channels", "celery", "redis", "cache", "email", "mail",
|
|
120
|
+
|
|
121
|
+
# Common API/web development terms
|
|
122
|
+
"endpoint", "endpoints", "resource", "resources", "data", "export",
|
|
123
|
+
"import", "upload", "download", "file", "files", "media", "images",
|
|
124
|
+
"documents", "reports", "analytics", "stats", "statistics",
|
|
125
|
+
|
|
126
|
+
# Common business/application terms
|
|
127
|
+
"customer", "customers", "client", "clients", "company", "companies",
|
|
128
|
+
"department", "departments", "employee", "employees", "team", "teams",
|
|
129
|
+
"project", "projects", "task", "tasks", "event", "events",
|
|
130
|
+
"notification", "notifications", "alert", "alerts",
|
|
131
|
+
|
|
132
|
+
# Language/localization
|
|
133
|
+
"language", "languages", "locale", "locales", "translation", "translations",
|
|
134
|
+
"en", "fr", "de", "es", "it", "pt", "ru", "ja", "zh", "ko"
|
|
110
135
|
}
|
|
111
136
|
legitimate.update(default_legitimate)
|
|
112
137
|
|
|
@@ -135,60 +160,103 @@ def _extract_django_route_keywords() -> set:
|
|
|
135
160
|
|
|
136
161
|
# Extract from app names and labels
|
|
137
162
|
for app_config in apps.get_app_configs():
|
|
138
|
-
# Add app name and label
|
|
163
|
+
# Add app name and label - improved parsing
|
|
139
164
|
if app_config.name:
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
165
|
+
app_parts = app_config.name.lower().replace('-', '_').split('.')
|
|
166
|
+
for part in app_parts:
|
|
167
|
+
for segment in re.split(r'[._-]', part):
|
|
168
|
+
if len(segment) > 2:
|
|
169
|
+
keywords.add(segment)
|
|
143
170
|
|
|
144
171
|
if app_config.label and app_config.label != app_config.name:
|
|
145
172
|
for segment in re.split(r'[._-]', app_config.label.lower()):
|
|
146
173
|
if len(segment) > 2:
|
|
147
174
|
keywords.add(segment)
|
|
148
175
|
|
|
149
|
-
# Extract from model names in the app
|
|
176
|
+
# Extract from model names in the app - improved handling
|
|
150
177
|
try:
|
|
151
178
|
for model in app_config.get_models():
|
|
152
179
|
model_name = model._meta.model_name.lower()
|
|
153
180
|
if len(model_name) > 2:
|
|
154
181
|
keywords.add(model_name)
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
182
|
+
# Add plural form
|
|
183
|
+
if not model_name.endswith('s'):
|
|
184
|
+
keywords.add(f"{model_name}s")
|
|
185
|
+
|
|
186
|
+
# Also add verbose names if different
|
|
187
|
+
verbose_name = str(model._meta.verbose_name).lower()
|
|
188
|
+
verbose_name_plural = str(model._meta.verbose_name_plural).lower()
|
|
189
|
+
|
|
190
|
+
for name in [verbose_name, verbose_name_plural]:
|
|
191
|
+
for segment in re.split(r'[^a-zA-Z]+', name):
|
|
192
|
+
if len(segment) > 2 and segment != model_name:
|
|
193
|
+
keywords.add(segment)
|
|
158
194
|
except Exception:
|
|
159
195
|
continue
|
|
160
196
|
|
|
161
|
-
# Extract from URL patterns
|
|
197
|
+
# Extract from URL patterns - improved extraction
|
|
162
198
|
def extract_from_pattern(pattern, prefix=""):
|
|
163
199
|
try:
|
|
164
200
|
if isinstance(pattern, URLResolver):
|
|
165
|
-
# Handle include() patterns
|
|
201
|
+
# Handle include() patterns - check if they include legitimate apps
|
|
166
202
|
namespace = getattr(pattern, 'namespace', None)
|
|
167
203
|
if namespace:
|
|
168
204
|
for segment in re.split(r'[._-]', namespace.lower()):
|
|
169
205
|
if len(segment) > 2:
|
|
170
206
|
keywords.add(segment)
|
|
171
207
|
|
|
172
|
-
# Extract from the pattern itself
|
|
208
|
+
# Extract from the pattern itself - improved logic for include() patterns
|
|
173
209
|
pattern_str = str(pattern.pattern)
|
|
174
|
-
|
|
175
|
-
|
|
210
|
+
# Get literal path segments (not regex parts)
|
|
211
|
+
literal_parts = re.findall(r'([a-zA-Z][a-zA-Z0-9_-]*)', pattern_str)
|
|
212
|
+
|
|
213
|
+
# Get list of actual Django app names to validate against
|
|
214
|
+
app_names = set()
|
|
215
|
+
for app_config in apps.get_app_configs():
|
|
216
|
+
app_parts = app_config.name.lower().replace('-', '_').split('.')
|
|
217
|
+
for part in app_parts:
|
|
218
|
+
for segment in re.split(r'[._-]', part):
|
|
219
|
+
if len(segment) > 2:
|
|
220
|
+
app_names.add(segment)
|
|
221
|
+
if app_config.label:
|
|
222
|
+
app_names.add(app_config.label.lower())
|
|
223
|
+
|
|
224
|
+
# For include() patterns, be more permissive since they're routing to existing apps
|
|
225
|
+
# The key insight: if someone includes an app's URLs, the prefix is legitimate by design
|
|
226
|
+
for part in literal_parts:
|
|
227
|
+
if len(part) > 2:
|
|
228
|
+
part_lower = part.lower()
|
|
229
|
+
# For URLResolver (include patterns), be more permissive
|
|
230
|
+
# These are URL prefixes that route to actual app functionality
|
|
231
|
+
keywords.add(part_lower)
|
|
176
232
|
|
|
177
233
|
# Recurse into nested patterns
|
|
178
|
-
|
|
179
|
-
|
|
234
|
+
try:
|
|
235
|
+
for nested_pattern in pattern.url_patterns:
|
|
236
|
+
extract_from_pattern(nested_pattern, prefix)
|
|
237
|
+
except:
|
|
238
|
+
pass
|
|
180
239
|
|
|
181
240
|
elif isinstance(pattern, URLPattern):
|
|
182
|
-
# Extract from URL pattern
|
|
241
|
+
# Extract from URL pattern - more comprehensive
|
|
183
242
|
pattern_str = str(pattern.pattern)
|
|
184
|
-
|
|
185
|
-
|
|
243
|
+
literal_parts = re.findall(r'([a-zA-Z][a-zA-Z0-9_-]*)', pattern_str)
|
|
244
|
+
for part in literal_parts:
|
|
245
|
+
if len(part) > 2:
|
|
246
|
+
keywords.add(part.lower())
|
|
186
247
|
|
|
187
248
|
# Extract from view name if available
|
|
188
249
|
if hasattr(pattern.callback, '__name__'):
|
|
189
250
|
view_name = pattern.callback.__name__.lower()
|
|
190
251
|
for segment in re.split(r'[._-]', view_name):
|
|
191
|
-
if len(segment) > 2 and segment
|
|
252
|
+
if len(segment) > 2 and segment not in ['view', 'class', 'function']:
|
|
253
|
+
keywords.add(segment)
|
|
254
|
+
|
|
255
|
+
# Extract from view class name if it's a class-based view
|
|
256
|
+
if hasattr(pattern.callback, 'view_class'):
|
|
257
|
+
class_name = pattern.callback.view_class.__name__.lower()
|
|
258
|
+
for segment in re.split(r'[._-]', class_name):
|
|
259
|
+
if len(segment) > 2 and segment not in ['view', 'class']:
|
|
192
260
|
keywords.add(segment)
|
|
193
261
|
|
|
194
262
|
except Exception:
|
|
@@ -203,10 +271,20 @@ def _extract_django_route_keywords() -> set:
|
|
|
203
271
|
print(f"Warning: Could not extract Django route keywords: {e}")
|
|
204
272
|
|
|
205
273
|
# Filter out very common/generic words that might be suspicious
|
|
274
|
+
# Expanded filter list
|
|
206
275
|
filtered_keywords = set()
|
|
276
|
+
exclude_words = {
|
|
277
|
+
'www', 'com', 'org', 'net', 'int', 'str', 'obj', 'get', 'set', 'put', 'del',
|
|
278
|
+
'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had', 'her',
|
|
279
|
+
'was', 'one', 'our', 'out', 'day', 'had', 'has', 'his', 'how', 'man', 'new',
|
|
280
|
+
'now', 'old', 'see', 'two', 'who', 'boy', 'did', 'its', 'let', 'put', 'say',
|
|
281
|
+
'she', 'too', 'use', 'var', 'way', 'may', 'end', 'why', 'any', 'app', 'run'
|
|
282
|
+
}
|
|
283
|
+
|
|
207
284
|
for keyword in keywords:
|
|
208
285
|
if (len(keyword) >= 3 and
|
|
209
|
-
keyword not in
|
|
286
|
+
keyword not in exclude_words and
|
|
287
|
+
not keyword.isdigit()):
|
|
210
288
|
filtered_keywords.add(keyword)
|
|
211
289
|
|
|
212
290
|
if filtered_keywords:
|
|
@@ -287,6 +365,50 @@ def _parse(line: str) -> dict | None:
|
|
|
287
365
|
}
|
|
288
366
|
|
|
289
367
|
|
|
368
|
+
def _is_malicious_context_trainer(path: str, keyword: str, status: str = "404") -> bool:
|
|
369
|
+
"""
|
|
370
|
+
Determine if a keyword from log analysis appears in a malicious context.
|
|
371
|
+
This is the trainer version of the middleware's _is_malicious_context method.
|
|
372
|
+
"""
|
|
373
|
+
# Don't learn from valid Django paths
|
|
374
|
+
if path_exists_in_django(path):
|
|
375
|
+
return False
|
|
376
|
+
|
|
377
|
+
# Strong malicious indicators for log analysis
|
|
378
|
+
malicious_indicators = [
|
|
379
|
+
# Multiple suspicious segments in path
|
|
380
|
+
len([seg for seg in re.split(r"\W+", path) if seg in STATIC_KW]) > 1,
|
|
381
|
+
|
|
382
|
+
# Common attack patterns
|
|
383
|
+
any(pattern in path.lower() for pattern in [
|
|
384
|
+
'../', '..\\', '.env', 'wp-admin', 'phpmyadmin', 'config',
|
|
385
|
+
'backup', 'database', 'mysql', 'passwd', 'shadow', 'xmlrpc',
|
|
386
|
+
'shell', 'cmd', 'exec', 'eval', 'system'
|
|
387
|
+
]),
|
|
388
|
+
|
|
389
|
+
# Path indicates obvious attack attempt
|
|
390
|
+
any(attack in path.lower() for attack in [
|
|
391
|
+
'union+select', 'drop+table', '<script', 'javascript:',
|
|
392
|
+
'${', '{{', 'onload=', 'onerror=', 'file://', 'http://'
|
|
393
|
+
]),
|
|
394
|
+
|
|
395
|
+
# Multiple directory traversal attempts
|
|
396
|
+
path.count('../') > 1 or path.count('..\\') > 1,
|
|
397
|
+
|
|
398
|
+
# Encoded attack patterns
|
|
399
|
+
any(encoded in path for encoded in ['%2e%2e', '%252e', '%c0%ae', '%3c%73%63%72%69%70%74']),
|
|
400
|
+
|
|
401
|
+
# 404 status with suspicious characteristics
|
|
402
|
+
status == "404" and (
|
|
403
|
+
len(path) > 50 or # Very long paths are often attacks
|
|
404
|
+
path.count('/') > 10 or # Too many directory levels
|
|
405
|
+
any(c in path for c in ['<', '>', '{', '}', '$', '`']) # Special characters
|
|
406
|
+
),
|
|
407
|
+
]
|
|
408
|
+
|
|
409
|
+
return any(malicious_indicators)
|
|
410
|
+
|
|
411
|
+
|
|
290
412
|
def train() -> None:
|
|
291
413
|
"""Enhanced training with improved keyword filtering and exemption handling"""
|
|
292
414
|
print("🚀 Starting AIWAF enhanced training...")
|
|
@@ -451,28 +573,43 @@ def train() -> None:
|
|
|
451
573
|
for seg in re.split(r"\W+", r["path"].lower()):
|
|
452
574
|
if (len(seg) > 3 and
|
|
453
575
|
seg not in STATIC_KW and
|
|
454
|
-
seg not in legitimate_keywords
|
|
576
|
+
seg not in legitimate_keywords and # Don't learn legitimate keywords
|
|
577
|
+
_is_malicious_context_trainer(r["path"], seg, r["status"])): # Smart context check
|
|
455
578
|
tokens[seg] += 1
|
|
456
579
|
|
|
457
580
|
keyword_store = get_keyword_store()
|
|
458
581
|
top_tokens = tokens.most_common(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
|
|
459
582
|
|
|
460
|
-
# Additional filtering: only add keywords that appear suspicious enough
|
|
583
|
+
# Additional filtering: only add keywords that appear suspicious enough AND in malicious context
|
|
461
584
|
filtered_tokens = []
|
|
585
|
+
learned_from_paths = [] # Track which paths we learned from
|
|
586
|
+
|
|
462
587
|
for kw, cnt in top_tokens:
|
|
463
|
-
#
|
|
588
|
+
# Find example paths where this keyword appeared
|
|
589
|
+
example_paths = [r["path"] for r in parsed
|
|
590
|
+
if kw in r["path"].lower() and
|
|
591
|
+
r["status"].startswith(("4", "5")) and
|
|
592
|
+
not path_exists_in_django(r["path"])]
|
|
593
|
+
|
|
594
|
+
# Only add if keyword appears in malicious contexts
|
|
464
595
|
if (cnt >= 2 and # Must appear at least twice
|
|
465
596
|
len(kw) >= 4 and # Must be at least 4 characters
|
|
466
|
-
kw not in legitimate_keywords
|
|
597
|
+
kw not in legitimate_keywords and # Not in legitimate set
|
|
598
|
+
example_paths and # Has example paths
|
|
599
|
+
any(_is_malicious_context_trainer(path, kw) for path in example_paths[:3])): # Check first 3 paths
|
|
600
|
+
|
|
467
601
|
filtered_tokens.append((kw, cnt))
|
|
468
602
|
keyword_store.add_keyword(kw, cnt)
|
|
603
|
+
learned_from_paths.extend(example_paths[:2]) # Track first 2 example paths
|
|
469
604
|
|
|
470
605
|
if filtered_tokens:
|
|
471
606
|
print(f"📝 Added {len(filtered_tokens)} suspicious keywords: {[kw for kw, _ in filtered_tokens]}")
|
|
607
|
+
print(f"🎯 Example malicious paths learned from: {learned_from_paths[:5]}") # Show first 5
|
|
472
608
|
else:
|
|
473
609
|
print("✅ No new suspicious keywords learned (good sign!)")
|
|
474
610
|
|
|
475
|
-
print(f"🎯
|
|
611
|
+
print(f"🎯 Smart keyword learning complete. Excluded {len(legitimate_keywords)} legitimate keywords.")
|
|
612
|
+
print(f"🔒 Used malicious context analysis to filter out false positives.")
|
|
476
613
|
|
|
477
614
|
# Training summary
|
|
478
615
|
print("\n" + "="*60)
|
|
@@ -9,7 +9,7 @@ long_description = (HERE / "README.md").read_text(encoding="utf-8")
|
|
|
9
9
|
|
|
10
10
|
setup(
|
|
11
11
|
name="aiwaf",
|
|
12
|
-
version="0.1.9.2.
|
|
12
|
+
version="0.1.9.2.4",
|
|
13
13
|
description="AI‑driven, self‑learning Web Application Firewall for Django",
|
|
14
14
|
long_description=long_description,
|
|
15
15
|
long_description_content_type="text/markdown",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|