aiwaf 0.1.9.2.2__py3-none-any.whl → 0.1.9.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiwaf might be problematic. Click here for more details.

aiwaf/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  default_app_config = "aiwaf.apps.AiwafConfig"
2
2
 
3
- __version__ = "0.1.9.2.2"
3
+ __version__ = "0.1.9.2.3"
4
4
 
5
5
  # Note: Middleware classes are available from aiwaf.middleware
6
6
  # Import them only when needed to avoid circular imports during Django app loading
aiwaf/middleware.py CHANGED
@@ -298,11 +298,15 @@ class IPAndKeywordBlockMiddleware:
298
298
  keyword_store = get_keyword_store()
299
299
  segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
300
300
 
301
- # Only learn keywords from non-existent paths or suspicious contexts
302
- for seg in segments:
303
- if not path_exists or self._is_malicious_context(request, seg):
304
- keyword_store.add_keyword(seg)
305
-
301
+ # Smart learning: only learn from suspicious contexts, never from valid paths
302
+ if not path_exists: # Only learn from non-existent paths
303
+ for seg in segments:
304
+ # Only learn if it's not a legitimate keyword AND in a suspicious context
305
+ if (seg not in self.legitimate_path_keywords and
306
+ seg not in self.exempt_keywords and
307
+ self._is_malicious_context(request, seg)):
308
+ keyword_store.add_keyword(seg)
309
+
306
310
  dynamic_top = keyword_store.get_top_keywords(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
307
311
  all_kw = set(STATIC_KW) | set(dynamic_top)
308
312
 
@@ -345,7 +349,29 @@ class IPAndKeywordBlockMiddleware:
345
349
  block_reason = f"Inherently suspicious: {seg}"
346
350
 
347
351
  if is_suspicious:
348
- # Additional context check before blocking
352
+ # Additional context check before blocking - be more conservative with valid paths
353
+ if path_exists:
354
+ # For valid paths, only block if there are VERY strong malicious indicators
355
+ very_strong_indicators = [
356
+ # Multiple attack patterns in same request
357
+ sum([
358
+ '../' in request.path, '..\\' in request.path,
359
+ any(param in request.GET for param in ['cmd', 'exec', 'system']),
360
+ request.path.count('%') > 5, # Heavy URL encoding
361
+ len([s for s in segments if s in self.malicious_keywords]) > 2
362
+ ]) >= 2,
363
+
364
+ # Obvious attack attempts on valid paths
365
+ any(attack in request.path.lower() for attack in [
366
+ 'union+select', 'drop+table', '<script', 'javascript:',
367
+ 'onload=', 'onerror=', '${', '{{', 'eval('
368
+ ])
369
+ ]
370
+
371
+ if not any(very_strong_indicators):
372
+ continue # Skip blocking for valid paths without very strong indicators
373
+
374
+ # For non-existent paths or paths with very strong indicators, proceed with blocking
349
375
  if self._is_malicious_context(request, seg) or not path_exists:
350
376
  # Double-check exemption before blocking
351
377
  if not exemption_store.is_exempted(ip):
@@ -405,6 +431,38 @@ class AIAnomalyMiddleware(MiddlewareMixin):
405
431
  # Use the safely loaded global MODEL instead of loading again
406
432
  self.model = MODEL
407
433
 
434
+ def _is_malicious_context(self, request, keyword):
435
+ """
436
+ Determine if a keyword appears in a malicious context.
437
+ Only learn keywords when we have strong indicators of malicious intent.
438
+ """
439
+ # Don't learn from valid Django paths
440
+ if path_exists_in_django(request.path):
441
+ return False
442
+
443
+ # Strong malicious indicators
444
+ malicious_indicators = [
445
+ # Multiple consecutive suspicious segments
446
+ len([seg for seg in re.split(r"\W+", request.path) if seg in self.malicious_keywords]) > 1,
447
+
448
+ # Common attack patterns
449
+ any(pattern in request.path.lower() for pattern in [
450
+ '../', '..\\', '.env', 'wp-admin', 'phpmyadmin', 'config',
451
+ 'backup', 'database', 'mysql', 'passwd', 'shadow'
452
+ ]),
453
+
454
+ # Suspicious query parameters
455
+ any(param in request.GET for param in ['cmd', 'exec', 'system', 'shell']),
456
+
457
+ # Multiple directory traversal attempts
458
+ request.path.count('../') > 2 or request.path.count('..\\') > 2,
459
+
460
+ # Encoded attack patterns
461
+ any(encoded in request.path for encoded in ['%2e%2e', '%252e', '%c0%ae']),
462
+ ]
463
+
464
+ return any(malicious_indicators)
465
+
408
466
  def process_request(self, request):
409
467
  # First exemption check - early exit for exempt requests
410
468
  if is_exempt(request):
aiwaf/trainer.py CHANGED
@@ -96,7 +96,7 @@ def get_legitimate_keywords() -> set:
96
96
  """Get all legitimate keywords that shouldn't be learned as suspicious"""
97
97
  legitimate = set()
98
98
 
99
- # Common legitimate path segments
99
+ # Common legitimate path segments - expanded set
100
100
  default_legitimate = {
101
101
  "profile", "user", "users", "account", "accounts", "settings", "dashboard",
102
102
  "home", "about", "contact", "help", "search", "list", "lists",
@@ -106,7 +106,32 @@ def get_legitimate_keywords() -> set:
106
106
  "category", "categories", "tag", "tags", "post", "posts",
107
107
  "article", "articles", "blog", "blogs", "news", "item", "items",
108
108
  "admin", "administration", "manage", "manager", "control", "panel",
109
- "config", "configuration", "option", "options", "preference", "preferences"
109
+ "config", "configuration", "option", "options", "preference", "preferences",
110
+
111
+ # Django built-in app keywords
112
+ "contenttypes", "contenttype", "sessions", "session", "messages", "message",
113
+ "staticfiles", "static", "sites", "site", "flatpages", "flatpage",
114
+ "redirects", "redirect", "permissions", "permission", "groups", "group",
115
+
116
+ # Common third-party package keywords
117
+ "token", "tokens", "oauth", "social", "rest", "framework", "cors",
118
+ "debug", "toolbar", "extensions", "allauth", "crispy", "forms",
119
+ "channels", "celery", "redis", "cache", "email", "mail",
120
+
121
+ # Common API/web development terms
122
+ "endpoint", "endpoints", "resource", "resources", "data", "export",
123
+ "import", "upload", "download", "file", "files", "media", "images",
124
+ "documents", "reports", "analytics", "stats", "statistics",
125
+
126
+ # Common business/application terms
127
+ "customer", "customers", "client", "clients", "company", "companies",
128
+ "department", "departments", "employee", "employees", "team", "teams",
129
+ "project", "projects", "task", "tasks", "event", "events",
130
+ "notification", "notifications", "alert", "alerts",
131
+
132
+ # Language/localization
133
+ "language", "languages", "locale", "locales", "translation", "translations",
134
+ "en", "fr", "de", "es", "it", "pt", "ru", "ja", "zh", "ko"
110
135
  }
111
136
  legitimate.update(default_legitimate)
112
137
 
@@ -135,30 +160,41 @@ def _extract_django_route_keywords() -> set:
135
160
 
136
161
  # Extract from app names and labels
137
162
  for app_config in apps.get_app_configs():
138
- # Add app name and label
163
+ # Add app name and label - improved parsing
139
164
  if app_config.name:
140
- for segment in re.split(r'[._-]', app_config.name.lower()):
141
- if len(segment) > 2:
142
- keywords.add(segment)
165
+ app_parts = app_config.name.lower().replace('-', '_').split('.')
166
+ for part in app_parts:
167
+ for segment in re.split(r'[._-]', part):
168
+ if len(segment) > 2:
169
+ keywords.add(segment)
143
170
 
144
171
  if app_config.label and app_config.label != app_config.name:
145
172
  for segment in re.split(r'[._-]', app_config.label.lower()):
146
173
  if len(segment) > 2:
147
174
  keywords.add(segment)
148
175
 
149
- # Extract from model names in the app
176
+ # Extract from model names in the app - improved handling
150
177
  try:
151
178
  for model in app_config.get_models():
152
179
  model_name = model._meta.model_name.lower()
153
180
  if len(model_name) > 2:
154
181
  keywords.add(model_name)
155
- # Add plural form
156
- if not model_name.endswith('s'):
157
- keywords.add(f"{model_name}s")
182
+ # Add plural form
183
+ if not model_name.endswith('s'):
184
+ keywords.add(f"{model_name}s")
185
+
186
+ # Also add verbose names if different
187
+ verbose_name = str(model._meta.verbose_name).lower()
188
+ verbose_name_plural = str(model._meta.verbose_name_plural).lower()
189
+
190
+ for name in [verbose_name, verbose_name_plural]:
191
+ for segment in re.split(r'[^a-zA-Z]+', name):
192
+ if len(segment) > 2 and segment != model_name:
193
+ keywords.add(segment)
158
194
  except Exception:
159
195
  continue
160
196
 
161
- # Extract from URL patterns
197
+ # Extract from URL patterns - improved extraction
162
198
  def extract_from_pattern(pattern, prefix=""):
163
199
  try:
164
200
  if isinstance(pattern, URLResolver):
@@ -169,26 +205,41 @@ def _extract_django_route_keywords() -> set:
169
205
  if len(segment) > 2:
170
206
  keywords.add(segment)
171
207
 
172
- # Extract from the pattern itself
208
+ # Extract from the pattern itself - more comprehensive
173
209
  pattern_str = str(pattern.pattern)
174
- for segment in re.findall(r'([a-zA-Z]\w{2,})', pattern_str):
175
- keywords.add(segment.lower())
210
+ # Get literal path segments (not regex parts)
211
+ literal_parts = re.findall(r'([a-zA-Z][a-zA-Z0-9_-]*)', pattern_str)
212
+ for part in literal_parts:
213
+ if len(part) > 2:
214
+ keywords.add(part.lower())
176
215
 
177
216
  # Recurse into nested patterns
178
- for nested_pattern in pattern.url_patterns:
179
- extract_from_pattern(nested_pattern, prefix)
217
+ try:
218
+ for nested_pattern in pattern.url_patterns:
219
+ extract_from_pattern(nested_pattern, prefix)
220
+ except:
221
+ pass
180
222
 
181
223
  elif isinstance(pattern, URLPattern):
182
- # Extract from URL pattern
224
+ # Extract from URL pattern - more comprehensive
183
225
  pattern_str = str(pattern.pattern)
184
- for segment in re.findall(r'([a-zA-Z]\w{2,})', pattern_str):
185
- keywords.add(segment.lower())
226
+ literal_parts = re.findall(r'([a-zA-Z][a-zA-Z0-9_-]*)', pattern_str)
227
+ for part in literal_parts:
228
+ if len(part) > 2:
229
+ keywords.add(part.lower())
186
230
 
187
231
  # Extract from view name if available
188
232
  if hasattr(pattern.callback, '__name__'):
189
233
  view_name = pattern.callback.__name__.lower()
190
234
  for segment in re.split(r'[._-]', view_name):
191
- if len(segment) > 2 and segment != 'view':
235
+ if len(segment) > 2 and segment not in ['view', 'class', 'function']:
236
+ keywords.add(segment)
237
+
238
+ # Extract from view class name if it's a class-based view
239
+ if hasattr(pattern.callback, 'view_class'):
240
+ class_name = pattern.callback.view_class.__name__.lower()
241
+ for segment in re.split(r'[._-]', class_name):
242
+ if len(segment) > 2 and segment not in ['view', 'class']:
192
243
  keywords.add(segment)
193
244
 
194
245
  except Exception:
@@ -203,10 +254,20 @@ def _extract_django_route_keywords() -> set:
203
254
  print(f"Warning: Could not extract Django route keywords: {e}")
204
255
 
205
256
  # Filter out very common/generic words that might be suspicious
257
+ # Expanded filter list
206
258
  filtered_keywords = set()
259
+ exclude_words = {
260
+ 'www', 'com', 'org', 'net', 'int', 'str', 'obj', 'get', 'set', 'put', 'del',
261
+ 'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had', 'her',
262
+ 'was', 'one', 'our', 'out', 'day', 'had', 'has', 'his', 'how', 'man', 'new',
263
+ 'now', 'old', 'see', 'two', 'who', 'boy', 'did', 'its', 'let', 'put', 'say',
264
+ 'she', 'too', 'use', 'var', 'way', 'may', 'end', 'why', 'any', 'app', 'run'
265
+ }
266
+
207
267
  for keyword in keywords:
208
268
  if (len(keyword) >= 3 and
209
- keyword not in ['www', 'com', 'org', 'net', 'int', 'str', 'obj', 'get', 'set', 'put', 'del']):
269
+ keyword not in exclude_words and
270
+ not keyword.isdigit()):
210
271
  filtered_keywords.add(keyword)
211
272
 
212
273
  if filtered_keywords:
@@ -287,6 +348,50 @@ def _parse(line: str) -> dict | None:
287
348
  }
288
349
 
289
350
 
351
+ def _is_malicious_context_trainer(path: str, keyword: str, status: str = "404") -> bool:
352
+ """
353
+ Determine if a keyword from log analysis appears in a malicious context.
354
+ This is the trainer version of the middleware's _is_malicious_context method.
355
+ """
356
+ # Don't learn from valid Django paths
357
+ if path_exists_in_django(path):
358
+ return False
359
+
360
+ # Strong malicious indicators for log analysis
361
+ malicious_indicators = [
362
+ # Multiple suspicious segments in path
363
+ len([seg for seg in re.split(r"\W+", path) if seg in STATIC_KW]) > 1,
364
+
365
+ # Common attack patterns
366
+ any(pattern in path.lower() for pattern in [
367
+ '../', '..\\', '.env', 'wp-admin', 'phpmyadmin', 'config',
368
+ 'backup', 'database', 'mysql', 'passwd', 'shadow', 'xmlrpc',
369
+ 'shell', 'cmd', 'exec', 'eval', 'system'
370
+ ]),
371
+
372
+ # Path indicates obvious attack attempt
373
+ any(attack in path.lower() for attack in [
374
+ 'union+select', 'drop+table', '<script', 'javascript:',
375
+ '${', '{{', 'onload=', 'onerror=', 'file://', 'http://'
376
+ ]),
377
+
378
+ # Multiple directory traversal attempts
379
+ path.count('../') > 1 or path.count('..\\') > 1,
380
+
381
+ # Encoded attack patterns
382
+ any(encoded in path for encoded in ['%2e%2e', '%252e', '%c0%ae', '%3c%73%63%72%69%70%74']),
383
+
384
+ # 404 status with suspicious characteristics
385
+ status == "404" and (
386
+ len(path) > 50 or # Very long paths are often attacks
387
+ path.count('/') > 10 or # Too many directory levels
388
+ any(c in path for c in ['<', '>', '{', '}', '$', '`']) # Special characters
389
+ ),
390
+ ]
391
+
392
+ return any(malicious_indicators)
393
+
394
+
290
395
  def train() -> None:
291
396
  """Enhanced training with improved keyword filtering and exemption handling"""
292
397
  print("🚀 Starting AIWAF enhanced training...")
@@ -451,28 +556,43 @@ def train() -> None:
451
556
  for seg in re.split(r"\W+", r["path"].lower()):
452
557
  if (len(seg) > 3 and
453
558
  seg not in STATIC_KW and
454
- seg not in legitimate_keywords): # Don't learn legitimate keywords
559
+ seg not in legitimate_keywords and # Don't learn legitimate keywords
560
+ _is_malicious_context_trainer(r["path"], seg, r["status"])): # Smart context check
455
561
  tokens[seg] += 1
456
562
 
457
563
  keyword_store = get_keyword_store()
458
564
  top_tokens = tokens.most_common(getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10))
459
565
 
460
- # Additional filtering: only add keywords that appear suspicious enough
566
+ # Additional filtering: only add keywords that appear suspicious enough AND in malicious context
461
567
  filtered_tokens = []
568
+ learned_from_paths = [] # Track which paths we learned from
569
+
462
570
  for kw, cnt in top_tokens:
463
- # Don't add keywords that might be legitimate
571
+ # Find example paths where this keyword appeared
572
+ example_paths = [r["path"] for r in parsed
573
+ if kw in r["path"].lower() and
574
+ r["status"].startswith(("4", "5")) and
575
+ not path_exists_in_django(r["path"])]
576
+
577
+ # Only add if keyword appears in malicious contexts
464
578
  if (cnt >= 2 and # Must appear at least twice
465
579
  len(kw) >= 4 and # Must be at least 4 characters
466
- kw not in legitimate_keywords): # Not in legitimate set
580
+ kw not in legitimate_keywords and # Not in legitimate set
581
+ example_paths and # Has example paths
582
+ any(_is_malicious_context_trainer(path, kw) for path in example_paths[:3])): # Check first 3 paths
583
+
467
584
  filtered_tokens.append((kw, cnt))
468
585
  keyword_store.add_keyword(kw, cnt)
586
+ learned_from_paths.extend(example_paths[:2]) # Track first 2 example paths
469
587
 
470
588
  if filtered_tokens:
471
589
  print(f"📝 Added {len(filtered_tokens)} suspicious keywords: {[kw for kw, _ in filtered_tokens]}")
590
+ print(f"🎯 Example malicious paths learned from: {learned_from_paths[:5]}") # Show first 5
472
591
  else:
473
592
  print("✅ No new suspicious keywords learned (good sign!)")
474
593
 
475
- print(f"🎯 Dynamic keyword learning complete. Excluded {len(legitimate_keywords)} legitimate keywords.")
594
+ print(f"🎯 Smart keyword learning complete. Excluded {len(legitimate_keywords)} legitimate keywords.")
595
+ print(f"🔒 Used malicious context analysis to filter out false positives.")
476
596
 
477
597
  # Training summary
478
598
  print("\n" + "="*60)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiwaf
3
- Version: 0.1.9.2.2
3
+ Version: 0.1.9.2.3
4
4
  Summary: AI-powered Web Application Firewall
5
5
  Home-page: https://github.com/aayushgauba/aiwaf
6
6
  Author: Aayush Gauba
@@ -1,12 +1,12 @@
1
- aiwaf/__init__.py,sha256=EwzM3mRDf7i5IVX0-pTMpVFRf51lpIIbh6ZzkzOw10M,220
1
+ aiwaf/__init__.py,sha256=oJ3sGVirmahdoT5DpCTp_liJkyeRv9FllrYYKRkThnU,220
2
2
  aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
3
3
  aiwaf/blacklist_manager.py,sha256=LYCeKFB-7e_C6Bg2WeFJWFIIQlrfRMPuGp30ivrnhQY,1196
4
4
  aiwaf/decorators.py,sha256=IUKOdM_gdroffImRZep1g1wT6gNqD10zGwcp28hsJCs,825
5
- aiwaf/middleware.py,sha256=8EC4AKfUjHhmVSKpquimkMUebBekr92pqyVF97wlbx0,27408
5
+ aiwaf/middleware.py,sha256=-w_uOaZgakFoJkvmJUB7atqcYQr3nSd9HbSKlP8_178,30370
6
6
  aiwaf/middleware_logger.py,sha256=LWZVDAnjh6CGESirA8eMbhGgJKB7lVDGRQqVroH95Lo,4742
7
7
  aiwaf/models.py,sha256=vQxgY19BDVMjoO903UNrTZC1pNoLltMU6wbyWPoAEns,2719
8
8
  aiwaf/storage.py,sha256=5ImrZMRn3u7HNsPH0fDjWhDrD2tgG2IHVnOXtLz0fk4,10253
9
- aiwaf/trainer.py,sha256=U-X79nFhSTEbVexFHo3IXFf1HgvXrFnQ__WqTar0o4M,19118
9
+ aiwaf/trainer.py,sha256=E9jNPq1EHJkKpX1loZrUd2BDBAvH79w_Ltbdb1fsc0Q,25259
10
10
  aiwaf/utils.py,sha256=BJk5vJCYdGPl_4QQiknjhCbkzv5HZCXgFcBJDMJpHok,3390
11
11
  aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -29,8 +29,8 @@ aiwaf/management/commands/test_exemption_fix.py,sha256=ngyGaHUCmQQ6y--6j4q1viZJt
29
29
  aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
30
30
  aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
31
  aiwaf/templatetags/aiwaf_tags.py,sha256=XXfb7Tl4DjU3Sc40GbqdaqOEtKTUKELBEk58u83wBNw,357
32
- aiwaf-0.1.9.2.2.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
33
- aiwaf-0.1.9.2.2.dist-info/METADATA,sha256=NFu9QZWsGPcmAJaHeJroSXZL_PstDr33NbSffV94bLQ,26824
34
- aiwaf-0.1.9.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
- aiwaf-0.1.9.2.2.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
36
- aiwaf-0.1.9.2.2.dist-info/RECORD,,
32
+ aiwaf-0.1.9.2.3.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
33
+ aiwaf-0.1.9.2.3.dist-info/METADATA,sha256=nLqJ4rOXO6IFxBr_0EBdnlYRk824Uii9KYLnsObfJx0,26824
34
+ aiwaf-0.1.9.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
+ aiwaf-0.1.9.2.3.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
36
+ aiwaf-0.1.9.2.3.dist-info/RECORD,,