aiwaf 0.1.9.2.2__tar.gz → 0.1.9.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiwaf might be problematic. Click here for more details.
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/PKG-INFO +1 -1
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/__init__.py +1 -1
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/middleware.py +64 -6
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/trainer.py +146 -26
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf.egg-info/PKG-INFO +1 -1
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/pyproject.toml +1 -1
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/setup.py +1 -1
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/LICENSE +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/README.md +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/apps.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/blacklist_manager.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/decorators.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/__init__.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/__init__.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/add_exemption.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/add_ipexemption.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/aiwaf_diagnose.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/aiwaf_list.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/aiwaf_logging.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/aiwaf_reset.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/check_dependencies.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/clear_blacklist.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/clear_cache.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/debug_csv.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/detect_and_train.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/diagnose_blocking.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/regenerate_model.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/setup_models.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/test_exemption.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/management/commands/test_exemption_fix.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/middleware_logger.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/models.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/resources/model.pkl +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/storage.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/templatetags/__init__.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/templatetags/aiwaf_tags.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf/utils.py +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf.egg-info/SOURCES.txt +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf.egg-info/dependency_links.txt +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf.egg-info/requires.txt +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/aiwaf.egg-info/top_level.txt +0 -0
- {aiwaf-0.1.9.2.2 → aiwaf-0.1.9.2.3}/setup.cfg +0 -0
|
@@ -298,11 +298,15 @@ class IPAndKeywordBlockMiddleware:
|
|
|
298
298
|
keyword_store = get_keyword_store()
|
|
299
299
|
segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
|
|
300
300
|
|
|
301
|
-
#
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
301
|
+
# Smart learning: only learn from suspicious contexts, never from valid paths
|
|
302
|
+
if not path_exists: # Only learn from non-existent paths
|
|
303
|
+
for seg in segments:
|
|
304
|
+
# Only learn if it's not a legitimate keyword AND in a suspicious context
|
|
305
|
+
if (seg not in self.legitimate_path_keywords and
|
|
306
|
+
seg not in self.exempt_keywords and
|
|
307
|
+
self._is_malicious_context(request, seg)):
|
|
308
|
+
keyword_store.add_keyword(seg)
|
|
309
|
+
|
|
306
310
|
dynamic_top = keyword_store.get_top_keywords(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
|
|
307
311
|
all_kw = set(STATIC_KW) | set(dynamic_top)
|
|
308
312
|
|
|
@@ -345,7 +349,29 @@ class IPAndKeywordBlockMiddleware:
|
|
|
345
349
|
block_reason = f"Inherently suspicious: {seg}"
|
|
346
350
|
|
|
347
351
|
if is_suspicious:
|
|
348
|
-
# Additional context check before blocking
|
|
352
|
+
# Additional context check before blocking - be more conservative with valid paths
|
|
353
|
+
if path_exists:
|
|
354
|
+
# For valid paths, only block if there are VERY strong malicious indicators
|
|
355
|
+
very_strong_indicators = [
|
|
356
|
+
# Multiple attack patterns in same request
|
|
357
|
+
sum([
|
|
358
|
+
'../' in request.path, '..\\' in request.path,
|
|
359
|
+
any(param in request.GET for param in ['cmd', 'exec', 'system']),
|
|
360
|
+
request.path.count('%') > 5, # Heavy URL encoding
|
|
361
|
+
len([s for s in segments if s in self.malicious_keywords]) > 2
|
|
362
|
+
]) >= 2,
|
|
363
|
+
|
|
364
|
+
# Obvious attack attempts on valid paths
|
|
365
|
+
any(attack in request.path.lower() for attack in [
|
|
366
|
+
'union+select', 'drop+table', '<script', 'javascript:',
|
|
367
|
+
'onload=', 'onerror=', '${', '{{', 'eval('
|
|
368
|
+
])
|
|
369
|
+
]
|
|
370
|
+
|
|
371
|
+
if not any(very_strong_indicators):
|
|
372
|
+
continue # Skip blocking for valid paths without very strong indicators
|
|
373
|
+
|
|
374
|
+
# For non-existent paths or paths with very strong indicators, proceed with blocking
|
|
349
375
|
if self._is_malicious_context(request, seg) or not path_exists:
|
|
350
376
|
# Double-check exemption before blocking
|
|
351
377
|
if not exemption_store.is_exempted(ip):
|
|
@@ -405,6 +431,38 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
405
431
|
# Use the safely loaded global MODEL instead of loading again
|
|
406
432
|
self.model = MODEL
|
|
407
433
|
|
|
434
|
+
def _is_malicious_context(self, request, keyword):
|
|
435
|
+
"""
|
|
436
|
+
Determine if a keyword appears in a malicious context.
|
|
437
|
+
Only learn keywords when we have strong indicators of malicious intent.
|
|
438
|
+
"""
|
|
439
|
+
# Don't learn from valid Django paths
|
|
440
|
+
if path_exists_in_django(request.path):
|
|
441
|
+
return False
|
|
442
|
+
|
|
443
|
+
# Strong malicious indicators
|
|
444
|
+
malicious_indicators = [
|
|
445
|
+
# Multiple consecutive suspicious segments
|
|
446
|
+
len([seg for seg in re.split(r"\W+", request.path) if seg in self.malicious_keywords]) > 1,
|
|
447
|
+
|
|
448
|
+
# Common attack patterns
|
|
449
|
+
any(pattern in request.path.lower() for pattern in [
|
|
450
|
+
'../', '..\\', '.env', 'wp-admin', 'phpmyadmin', 'config',
|
|
451
|
+
'backup', 'database', 'mysql', 'passwd', 'shadow'
|
|
452
|
+
]),
|
|
453
|
+
|
|
454
|
+
# Suspicious query parameters
|
|
455
|
+
any(param in request.GET for param in ['cmd', 'exec', 'system', 'shell']),
|
|
456
|
+
|
|
457
|
+
# Multiple directory traversal attempts
|
|
458
|
+
request.path.count('../') > 2 or request.path.count('..\\') > 2,
|
|
459
|
+
|
|
460
|
+
# Encoded attack patterns
|
|
461
|
+
any(encoded in request.path for encoded in ['%2e%2e', '%252e', '%c0%ae']),
|
|
462
|
+
]
|
|
463
|
+
|
|
464
|
+
return any(malicious_indicators)
|
|
465
|
+
|
|
408
466
|
def process_request(self, request):
|
|
409
467
|
# First exemption check - early exit for exempt requests
|
|
410
468
|
if is_exempt(request):
|
|
@@ -96,7 +96,7 @@ def get_legitimate_keywords() -> set:
|
|
|
96
96
|
"""Get all legitimate keywords that shouldn't be learned as suspicious"""
|
|
97
97
|
legitimate = set()
|
|
98
98
|
|
|
99
|
-
# Common legitimate path segments
|
|
99
|
+
# Common legitimate path segments - expanded set
|
|
100
100
|
default_legitimate = {
|
|
101
101
|
"profile", "user", "users", "account", "accounts", "settings", "dashboard",
|
|
102
102
|
"home", "about", "contact", "help", "search", "list", "lists",
|
|
@@ -106,7 +106,32 @@ def get_legitimate_keywords() -> set:
|
|
|
106
106
|
"category", "categories", "tag", "tags", "post", "posts",
|
|
107
107
|
"article", "articles", "blog", "blogs", "news", "item", "items",
|
|
108
108
|
"admin", "administration", "manage", "manager", "control", "panel",
|
|
109
|
-
"config", "configuration", "option", "options", "preference", "preferences"
|
|
109
|
+
"config", "configuration", "option", "options", "preference", "preferences",
|
|
110
|
+
|
|
111
|
+
# Django built-in app keywords
|
|
112
|
+
"contenttypes", "contenttype", "sessions", "session", "messages", "message",
|
|
113
|
+
"staticfiles", "static", "sites", "site", "flatpages", "flatpage",
|
|
114
|
+
"redirects", "redirect", "permissions", "permission", "groups", "group",
|
|
115
|
+
|
|
116
|
+
# Common third-party package keywords
|
|
117
|
+
"token", "tokens", "oauth", "social", "rest", "framework", "cors",
|
|
118
|
+
"debug", "toolbar", "extensions", "allauth", "crispy", "forms",
|
|
119
|
+
"channels", "celery", "redis", "cache", "email", "mail",
|
|
120
|
+
|
|
121
|
+
# Common API/web development terms
|
|
122
|
+
"endpoint", "endpoints", "resource", "resources", "data", "export",
|
|
123
|
+
"import", "upload", "download", "file", "files", "media", "images",
|
|
124
|
+
"documents", "reports", "analytics", "stats", "statistics",
|
|
125
|
+
|
|
126
|
+
# Common business/application terms
|
|
127
|
+
"customer", "customers", "client", "clients", "company", "companies",
|
|
128
|
+
"department", "departments", "employee", "employees", "team", "teams",
|
|
129
|
+
"project", "projects", "task", "tasks", "event", "events",
|
|
130
|
+
"notification", "notifications", "alert", "alerts",
|
|
131
|
+
|
|
132
|
+
# Language/localization
|
|
133
|
+
"language", "languages", "locale", "locales", "translation", "translations",
|
|
134
|
+
"en", "fr", "de", "es", "it", "pt", "ru", "ja", "zh", "ko"
|
|
110
135
|
}
|
|
111
136
|
legitimate.update(default_legitimate)
|
|
112
137
|
|
|
@@ -135,30 +160,41 @@ def _extract_django_route_keywords() -> set:
|
|
|
135
160
|
|
|
136
161
|
# Extract from app names and labels
|
|
137
162
|
for app_config in apps.get_app_configs():
|
|
138
|
-
# Add app name and label
|
|
163
|
+
# Add app name and label - improved parsing
|
|
139
164
|
if app_config.name:
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
165
|
+
app_parts = app_config.name.lower().replace('-', '_').split('.')
|
|
166
|
+
for part in app_parts:
|
|
167
|
+
for segment in re.split(r'[._-]', part):
|
|
168
|
+
if len(segment) > 2:
|
|
169
|
+
keywords.add(segment)
|
|
143
170
|
|
|
144
171
|
if app_config.label and app_config.label != app_config.name:
|
|
145
172
|
for segment in re.split(r'[._-]', app_config.label.lower()):
|
|
146
173
|
if len(segment) > 2:
|
|
147
174
|
keywords.add(segment)
|
|
148
175
|
|
|
149
|
-
# Extract from model names in the app
|
|
176
|
+
# Extract from model names in the app - improved handling
|
|
150
177
|
try:
|
|
151
178
|
for model in app_config.get_models():
|
|
152
179
|
model_name = model._meta.model_name.lower()
|
|
153
180
|
if len(model_name) > 2:
|
|
154
181
|
keywords.add(model_name)
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
182
|
+
# Add plural form
|
|
183
|
+
if not model_name.endswith('s'):
|
|
184
|
+
keywords.add(f"{model_name}s")
|
|
185
|
+
|
|
186
|
+
# Also add verbose names if different
|
|
187
|
+
verbose_name = str(model._meta.verbose_name).lower()
|
|
188
|
+
verbose_name_plural = str(model._meta.verbose_name_plural).lower()
|
|
189
|
+
|
|
190
|
+
for name in [verbose_name, verbose_name_plural]:
|
|
191
|
+
for segment in re.split(r'[^a-zA-Z]+', name):
|
|
192
|
+
if len(segment) > 2 and segment != model_name:
|
|
193
|
+
keywords.add(segment)
|
|
158
194
|
except Exception:
|
|
159
195
|
continue
|
|
160
196
|
|
|
161
|
-
# Extract from URL patterns
|
|
197
|
+
# Extract from URL patterns - improved extraction
|
|
162
198
|
def extract_from_pattern(pattern, prefix=""):
|
|
163
199
|
try:
|
|
164
200
|
if isinstance(pattern, URLResolver):
|
|
@@ -169,26 +205,41 @@ def _extract_django_route_keywords() -> set:
|
|
|
169
205
|
if len(segment) > 2:
|
|
170
206
|
keywords.add(segment)
|
|
171
207
|
|
|
172
|
-
# Extract from the pattern itself
|
|
208
|
+
# Extract from the pattern itself - more comprehensive
|
|
173
209
|
pattern_str = str(pattern.pattern)
|
|
174
|
-
|
|
175
|
-
|
|
210
|
+
# Get literal path segments (not regex parts)
|
|
211
|
+
literal_parts = re.findall(r'([a-zA-Z][a-zA-Z0-9_-]*)', pattern_str)
|
|
212
|
+
for part in literal_parts:
|
|
213
|
+
if len(part) > 2:
|
|
214
|
+
keywords.add(part.lower())
|
|
176
215
|
|
|
177
216
|
# Recurse into nested patterns
|
|
178
|
-
|
|
179
|
-
|
|
217
|
+
try:
|
|
218
|
+
for nested_pattern in pattern.url_patterns:
|
|
219
|
+
extract_from_pattern(nested_pattern, prefix)
|
|
220
|
+
except:
|
|
221
|
+
pass
|
|
180
222
|
|
|
181
223
|
elif isinstance(pattern, URLPattern):
|
|
182
|
-
# Extract from URL pattern
|
|
224
|
+
# Extract from URL pattern - more comprehensive
|
|
183
225
|
pattern_str = str(pattern.pattern)
|
|
184
|
-
|
|
185
|
-
|
|
226
|
+
literal_parts = re.findall(r'([a-zA-Z][a-zA-Z0-9_-]*)', pattern_str)
|
|
227
|
+
for part in literal_parts:
|
|
228
|
+
if len(part) > 2:
|
|
229
|
+
keywords.add(part.lower())
|
|
186
230
|
|
|
187
231
|
# Extract from view name if available
|
|
188
232
|
if hasattr(pattern.callback, '__name__'):
|
|
189
233
|
view_name = pattern.callback.__name__.lower()
|
|
190
234
|
for segment in re.split(r'[._-]', view_name):
|
|
191
|
-
if len(segment) > 2 and segment
|
|
235
|
+
if len(segment) > 2 and segment not in ['view', 'class', 'function']:
|
|
236
|
+
keywords.add(segment)
|
|
237
|
+
|
|
238
|
+
# Extract from view class name if it's a class-based view
|
|
239
|
+
if hasattr(pattern.callback, 'view_class'):
|
|
240
|
+
class_name = pattern.callback.view_class.__name__.lower()
|
|
241
|
+
for segment in re.split(r'[._-]', class_name):
|
|
242
|
+
if len(segment) > 2 and segment not in ['view', 'class']:
|
|
192
243
|
keywords.add(segment)
|
|
193
244
|
|
|
194
245
|
except Exception:
|
|
@@ -203,10 +254,20 @@ def _extract_django_route_keywords() -> set:
|
|
|
203
254
|
print(f"Warning: Could not extract Django route keywords: {e}")
|
|
204
255
|
|
|
205
256
|
# Filter out very common/generic words that might be suspicious
|
|
257
|
+
# Expanded filter list
|
|
206
258
|
filtered_keywords = set()
|
|
259
|
+
exclude_words = {
|
|
260
|
+
'www', 'com', 'org', 'net', 'int', 'str', 'obj', 'get', 'set', 'put', 'del',
|
|
261
|
+
'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had', 'her',
|
|
262
|
+
'was', 'one', 'our', 'out', 'day', 'had', 'has', 'his', 'how', 'man', 'new',
|
|
263
|
+
'now', 'old', 'see', 'two', 'who', 'boy', 'did', 'its', 'let', 'put', 'say',
|
|
264
|
+
'she', 'too', 'use', 'var', 'way', 'may', 'end', 'why', 'any', 'app', 'run'
|
|
265
|
+
}
|
|
266
|
+
|
|
207
267
|
for keyword in keywords:
|
|
208
268
|
if (len(keyword) >= 3 and
|
|
209
|
-
keyword not in
|
|
269
|
+
keyword not in exclude_words and
|
|
270
|
+
not keyword.isdigit()):
|
|
210
271
|
filtered_keywords.add(keyword)
|
|
211
272
|
|
|
212
273
|
if filtered_keywords:
|
|
@@ -287,6 +348,50 @@ def _parse(line: str) -> dict | None:
|
|
|
287
348
|
}
|
|
288
349
|
|
|
289
350
|
|
|
351
|
+
def _is_malicious_context_trainer(path: str, keyword: str, status: str = "404") -> bool:
|
|
352
|
+
"""
|
|
353
|
+
Determine if a keyword from log analysis appears in a malicious context.
|
|
354
|
+
This is the trainer version of the middleware's _is_malicious_context method.
|
|
355
|
+
"""
|
|
356
|
+
# Don't learn from valid Django paths
|
|
357
|
+
if path_exists_in_django(path):
|
|
358
|
+
return False
|
|
359
|
+
|
|
360
|
+
# Strong malicious indicators for log analysis
|
|
361
|
+
malicious_indicators = [
|
|
362
|
+
# Multiple suspicious segments in path
|
|
363
|
+
len([seg for seg in re.split(r"\W+", path) if seg in STATIC_KW]) > 1,
|
|
364
|
+
|
|
365
|
+
# Common attack patterns
|
|
366
|
+
any(pattern in path.lower() for pattern in [
|
|
367
|
+
'../', '..\\', '.env', 'wp-admin', 'phpmyadmin', 'config',
|
|
368
|
+
'backup', 'database', 'mysql', 'passwd', 'shadow', 'xmlrpc',
|
|
369
|
+
'shell', 'cmd', 'exec', 'eval', 'system'
|
|
370
|
+
]),
|
|
371
|
+
|
|
372
|
+
# Path indicates obvious attack attempt
|
|
373
|
+
any(attack in path.lower() for attack in [
|
|
374
|
+
'union+select', 'drop+table', '<script', 'javascript:',
|
|
375
|
+
'${', '{{', 'onload=', 'onerror=', 'file://', 'http://'
|
|
376
|
+
]),
|
|
377
|
+
|
|
378
|
+
# Multiple directory traversal attempts
|
|
379
|
+
path.count('../') > 1 or path.count('..\\') > 1,
|
|
380
|
+
|
|
381
|
+
# Encoded attack patterns
|
|
382
|
+
any(encoded in path for encoded in ['%2e%2e', '%252e', '%c0%ae', '%3c%73%63%72%69%70%74']),
|
|
383
|
+
|
|
384
|
+
# 404 status with suspicious characteristics
|
|
385
|
+
status == "404" and (
|
|
386
|
+
len(path) > 50 or # Very long paths are often attacks
|
|
387
|
+
path.count('/') > 10 or # Too many directory levels
|
|
388
|
+
any(c in path for c in ['<', '>', '{', '}', '$', '`']) # Special characters
|
|
389
|
+
),
|
|
390
|
+
]
|
|
391
|
+
|
|
392
|
+
return any(malicious_indicators)
|
|
393
|
+
|
|
394
|
+
|
|
290
395
|
def train() -> None:
|
|
291
396
|
"""Enhanced training with improved keyword filtering and exemption handling"""
|
|
292
397
|
print("🚀 Starting AIWAF enhanced training...")
|
|
@@ -451,28 +556,43 @@ def train() -> None:
|
|
|
451
556
|
for seg in re.split(r"\W+", r["path"].lower()):
|
|
452
557
|
if (len(seg) > 3 and
|
|
453
558
|
seg not in STATIC_KW and
|
|
454
|
-
seg not in legitimate_keywords
|
|
559
|
+
seg not in legitimate_keywords and # Don't learn legitimate keywords
|
|
560
|
+
_is_malicious_context_trainer(r["path"], seg, r["status"])): # Smart context check
|
|
455
561
|
tokens[seg] += 1
|
|
456
562
|
|
|
457
563
|
keyword_store = get_keyword_store()
|
|
458
564
|
top_tokens = tokens.most_common(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
|
|
459
565
|
|
|
460
|
-
# Additional filtering: only add keywords that appear suspicious enough
|
|
566
|
+
# Additional filtering: only add keywords that appear suspicious enough AND in malicious context
|
|
461
567
|
filtered_tokens = []
|
|
568
|
+
learned_from_paths = [] # Track which paths we learned from
|
|
569
|
+
|
|
462
570
|
for kw, cnt in top_tokens:
|
|
463
|
-
#
|
|
571
|
+
# Find example paths where this keyword appeared
|
|
572
|
+
example_paths = [r["path"] for r in parsed
|
|
573
|
+
if kw in r["path"].lower() and
|
|
574
|
+
r["status"].startswith(("4", "5")) and
|
|
575
|
+
not path_exists_in_django(r["path"])]
|
|
576
|
+
|
|
577
|
+
# Only add if keyword appears in malicious contexts
|
|
464
578
|
if (cnt >= 2 and # Must appear at least twice
|
|
465
579
|
len(kw) >= 4 and # Must be at least 4 characters
|
|
466
|
-
kw not in legitimate_keywords
|
|
580
|
+
kw not in legitimate_keywords and # Not in legitimate set
|
|
581
|
+
example_paths and # Has example paths
|
|
582
|
+
any(_is_malicious_context_trainer(path, kw) for path in example_paths[:3])): # Check first 3 paths
|
|
583
|
+
|
|
467
584
|
filtered_tokens.append((kw, cnt))
|
|
468
585
|
keyword_store.add_keyword(kw, cnt)
|
|
586
|
+
learned_from_paths.extend(example_paths[:2]) # Track first 2 example paths
|
|
469
587
|
|
|
470
588
|
if filtered_tokens:
|
|
471
589
|
print(f"📝 Added {len(filtered_tokens)} suspicious keywords: {[kw for kw, _ in filtered_tokens]}")
|
|
590
|
+
print(f"🎯 Example malicious paths learned from: {learned_from_paths[:5]}") # Show first 5
|
|
472
591
|
else:
|
|
473
592
|
print("✅ No new suspicious keywords learned (good sign!)")
|
|
474
593
|
|
|
475
|
-
print(f"🎯
|
|
594
|
+
print(f"🎯 Smart keyword learning complete. Excluded {len(legitimate_keywords)} legitimate keywords.")
|
|
595
|
+
print(f"🔒 Used malicious context analysis to filter out false positives.")
|
|
476
596
|
|
|
477
597
|
# Training summary
|
|
478
598
|
print("\n" + "="*60)
|
|
@@ -9,7 +9,7 @@ long_description = (HERE / "README.md").read_text(encoding="utf-8")
|
|
|
9
9
|
|
|
10
10
|
setup(
|
|
11
11
|
name="aiwaf",
|
|
12
|
-
version="0.1.9.2.
|
|
12
|
+
version="0.1.9.2.3",
|
|
13
13
|
description="AI‑driven, self‑learning Web Application Firewall for Django",
|
|
14
14
|
long_description=long_description,
|
|
15
15
|
long_description_content_type="text/markdown",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|