aiwaf 0.1.7.6__py3-none-any.whl → 0.1.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiwaf might be problematic. Click here for more details.

aiwaf/resources/model.pkl CHANGED
Binary file
aiwaf/trainer.py CHANGED
@@ -2,23 +2,23 @@ import os
2
2
  import glob
3
3
  import gzip
4
4
  import re
5
+ import joblib
6
+
5
7
  from datetime import datetime
6
8
  from collections import defaultdict, Counter
7
9
 
8
10
  import pandas as pd
9
11
  from sklearn.ensemble import IsolationForest
10
- import joblib
11
12
 
12
13
  from django.conf import settings
13
14
  from django.apps import apps
14
15
  from django.db.models import F
15
- from django.urls import get_resolver
16
16
 
17
-
18
- LOG_PATH = settings.AIWAF_ACCESS_LOG
17
+ # ─────────── Configuration ───────────
18
+ LOG_PATH = settings.AIWAF_ACCESS_LOG
19
19
  MODEL_PATH = os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
20
20
 
21
- STATIC_KW = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
21
+ STATIC_KW = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
22
22
  STATUS_IDX = ["200", "403", "404", "500"]
23
23
 
24
24
  _LOG_RX = re.compile(
@@ -29,109 +29,105 @@ _LOG_RX = re.compile(
29
29
  BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
30
30
  DynamicKeyword = apps.get_model("aiwaf", "DynamicKeyword")
31
31
 
32
- def is_exempt_path(path):
32
+
33
+ def is_exempt_path(path: str) -> bool:
33
34
  path = path.lower()
34
- exempt_paths = getattr(settings, "AIWAF_EXEMPT_PATHS", [])
35
- for exempt in exempt_paths:
35
+ for exempt in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
36
36
  if path == exempt or path.startswith(exempt.rstrip("/") + "/"):
37
37
  return True
38
38
  return False
39
39
 
40
- def path_exists_in_django(path):
40
+
41
+ def path_exists_in_django(path: str) -> bool:
41
42
  from django.urls import get_resolver
42
- from django.urls.resolvers import URLPattern, URLResolver
43
+ from django.urls.resolvers import URLResolver
43
44
 
44
- path = path.split("?")[0].lstrip("/")
45
+ candidate = path.split("?")[0].lstrip("/")
45
46
  try:
46
- get_resolver().resolve(f"/{path}")
47
+ get_resolver().resolve(f"/{candidate}")
47
48
  return True
48
49
  except:
49
50
  pass
50
- parts = path.split("/")
51
- root_resolver = get_resolver()
52
- for pattern in root_resolver.url_patterns:
53
- if isinstance(pattern, URLResolver):
54
- prefix = pattern.pattern.describe().strip("^/")
55
- if prefix and path.startswith(prefix):
51
+
52
+ root = get_resolver()
53
+ for p in root.url_patterns:
54
+ if isinstance(p, URLResolver):
55
+ prefix = p.pattern.describe().strip("^/")
56
+ if prefix and candidate.startswith(prefix):
56
57
  return True
57
58
  return False
58
59
 
59
- def remove_exempt_keywords():
60
- exempt_paths = getattr(settings, "AIWAF_EXEMPT_PATHS", [])
61
- exempt_tokens = set()
62
-
63
- for path in exempt_paths:
64
- path = path.strip("/").lower()
65
- segments = re.split(r"\W+", path)
66
- exempt_tokens.update(seg for seg in segments if len(seg) > 3)
67
60
 
61
+ def remove_exempt_keywords() -> None:
62
+ exempt_tokens = set()
63
+ for path in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
64
+ for seg in re.split(r"\W+", path.strip("/").lower()):
65
+ if len(seg) > 3:
66
+ exempt_tokens.add(seg)
68
67
  if exempt_tokens:
69
- deleted_count, _ = DynamicKeyword.objects.filter(keyword__in=exempt_tokens).delete()
70
- print(f"Removed {deleted_count} dynamic keywords that are now exempt: {list(exempt_tokens)}")
68
+ DynamicKeyword.objects.filter(keyword__in=exempt_tokens).delete()
69
+
71
70
 
72
- def _read_all_logs():
71
+ def _read_all_logs() -> list[str]:
73
72
  lines = []
74
73
  if LOG_PATH and os.path.exists(LOG_PATH):
75
74
  with open(LOG_PATH, "r", errors="ignore") as f:
76
75
  lines.extend(f.readlines())
77
- for path in sorted(glob.glob(f"{LOG_PATH}.*")):
78
- opener = gzip.open if path.endswith(".gz") else open
76
+ for p in sorted(glob.glob(f"{LOG_PATH}.*")):
77
+ opener = gzip.open if p.endswith(".gz") else open
79
78
  try:
80
- with opener(path, "rt", errors="ignore") as f:
79
+ with opener(p, "rt", errors="ignore") as f:
81
80
  lines.extend(f.readlines())
82
81
  except OSError:
83
82
  continue
84
83
  return lines
85
84
 
86
85
 
87
- def _parse(line):
86
+ def _parse(line: str) -> dict | None:
88
87
  m = _LOG_RX.search(line)
89
88
  if not m:
90
89
  return None
91
- ip, ts_str, path, status, ref, ua, rt = m.groups()
90
+ ip, ts_str, path, status, *_ , rt = m.groups()
92
91
  try:
93
92
  ts = datetime.strptime(ts_str.split()[0], "%d/%b/%Y:%H:%M:%S")
94
93
  except ValueError:
95
94
  return None
96
95
  return {
97
- "ip": ip,
98
- "timestamp": ts,
99
- "path": path,
100
- "status": status,
101
- "ua": ua,
96
+ "ip": ip,
97
+ "timestamp": ts,
98
+ "path": path,
99
+ "status": status,
102
100
  "response_time": float(rt),
103
101
  }
104
102
 
105
103
 
106
-
107
- def train():
104
+ def train() -> None:
108
105
  remove_exempt_keywords()
109
106
  raw_lines = _read_all_logs()
110
107
  if not raw_lines:
111
108
  print("No log lines found – check AIWAF_ACCESS_LOG setting.")
112
109
  return
110
+
113
111
  parsed = []
114
- ip_404 = defaultdict(int)
112
+ ip_404 = defaultdict(int)
115
113
  ip_times = defaultdict(list)
116
- for ln in raw_lines:
117
- rec = _parse(ln)
114
+
115
+ for line in raw_lines:
116
+ rec = _parse(line)
118
117
  if not rec:
119
118
  continue
120
119
  parsed.append(rec)
121
120
  ip_times[rec["ip"]].append(rec["timestamp"])
122
121
  if rec["status"] == "404":
123
122
  ip_404[rec["ip"]] += 1
124
- blocked_404 = []
123
+
124
+ # 3. Optional immediate 404‐flood blocking
125
125
  for ip, count in ip_404.items():
126
126
  if count >= 6:
127
- obj, created = BlacklistEntry.objects.get_or_create(
127
+ BlacklistEntry.objects.get_or_create(
128
128
  ip_address=ip,
129
129
  defaults={"reason": "Excessive 404s (≥6)"}
130
130
  )
131
- if created:
132
- blocked_404.append(ip)
133
- if blocked_404:
134
- print(f"Blocked {len(blocked_404)} IPs for 404 flood: {blocked_404}")
135
131
 
136
132
  feature_dicts = []
137
133
  for r in parsed:
@@ -140,60 +136,55 @@ def train():
140
136
  1 for t in ip_times[ip]
141
137
  if (r["timestamp"] - t).total_seconds() <= 10
142
138
  )
143
- total404 = ip_404[ip]
144
- is_known_path = path_exists_in_django(r["path"])
145
- kw_hits = 0
146
- if not is_known_path and not is_exempt_path(r["path"]):
139
+ total404 = ip_404[ip]
140
+ known_path = path_exists_in_django(r["path"])
141
+ kw_hits = 0
142
+ if not known_path and not is_exempt_path(r["path"]):
147
143
  kw_hits = sum(k in r["path"].lower() for k in STATIC_KW)
144
+
148
145
  status_idx = STATUS_IDX.index(r["status"]) if r["status"] in STATUS_IDX else -1
146
+
149
147
  feature_dicts.append({
150
- "ip": ip,
151
- "path_len": len(r["path"]),
152
- "kw_hits": kw_hits,
153
- "resp_time": r["response_time"],
154
- "status_idx": status_idx,
155
- "burst_count": burst,
156
- "total_404": total404,
148
+ "ip": ip,
149
+ "path_len": len(r["path"]),
150
+ "kw_hits": kw_hits,
151
+ "resp_time": r["response_time"],
152
+ "status_idx": status_idx,
153
+ "burst_count": burst,
154
+ "total_404": total404,
157
155
  })
158
156
 
159
157
  if not feature_dicts:
160
158
  print("⚠️ Nothing to train on – no valid log entries.")
161
159
  return
160
+
162
161
  df = pd.DataFrame(feature_dicts)
163
162
  feature_cols = [c for c in df.columns if c != "ip"]
164
163
  X = df[feature_cols].astype(float).values
165
164
  model = IsolationForest(contamination=0.01, random_state=42)
166
165
  model.fit(X)
166
+
167
167
  os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
168
168
  joblib.dump(model, MODEL_PATH)
169
169
  print(f"Model trained on {len(X)} samples → {MODEL_PATH}")
170
170
  preds = model.predict(X)
171
- anomalous_ips = set(df.loc[preds == -1, 'ip'])
172
- blocked_anom = []
171
+ anomalous_ips = set(df.loc[preds == -1, "ip"])
173
172
  for ip in anomalous_ips:
174
- obj, created = BlacklistEntry.objects.get_or_create(
173
+ BlacklistEntry.objects.get_or_create(
175
174
  ip_address=ip,
176
175
  defaults={"reason": "Anomalous behavior"}
177
176
  )
178
- if created:
179
- blocked_anom.append(ip)
180
- if blocked_anom:
181
- print(f"🚫 Blocked {len(blocked_anom)} anomalous IPs: {blocked_anom}")
177
+
182
178
  tokens = Counter()
183
179
  for r in parsed:
184
- if r["status"].startswith(("4", "5")) and not path_exists_in_django(r["path"]):
180
+ if (r["status"].startswith(("4", "5"))
181
+ and not path_exists_in_django(r["path"])):
185
182
  for seg in re.split(r"\W+", r["path"].lower()):
186
183
  if len(seg) > 3 and seg not in STATIC_KW:
187
184
  tokens[seg] += 1
188
185
 
189
- top_tokens = tokens.most_common(10)
190
- for kw, cnt in top_tokens:
186
+ for kw, cnt in tokens.most_common(10):
191
187
  obj, _ = DynamicKeyword.objects.get_or_create(keyword=kw)
192
188
  DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + cnt)
193
189
 
194
- print(f" DynamicKeyword DB updated with top tokens: {[kw for kw, _ in top_tokens]}")
195
-
196
-
197
-
198
- if __name__ == "__main__":
199
- train()
190
+ print(f"DynamicKeyword DB updated with top tokens: {[kw for kw, _ in tokens.most_common(10)]}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiwaf
3
- Version: 0.1.7.6
3
+ Version: 0.1.7.7
4
4
  Summary: AI-powered Web Application Firewall
5
5
  Home-page: https://github.com/aayushgauba/aiwaf
6
6
  Author: Aayush Gauba
@@ -4,16 +4,16 @@ aiwaf/blacklist_manager.py,sha256=sM6uTH7zD6MOPGb0kzqV2aFut2vxKgft_UVeRJr7klw,39
4
4
  aiwaf/middleware.py,sha256=kH77E1xWVIjQF6frUGM6kdoz-gZXGAh43Fj-2hPEbSM,7990
5
5
  aiwaf/models.py,sha256=8au1umopgCo0lthztTTRrYRJQUM7uX8eAeXgs3z45K4,1282
6
6
  aiwaf/storage.py,sha256=bxCILzzvA1-q6nwclRE8WrfoRhe25H4VrsQDf0hl_lY,1903
7
- aiwaf/trainer.py,sha256=ir5kFTeLQuhMd2h094ct03Wr-rNZsX-mZHwjLx29F54,6422
7
+ aiwaf/trainer.py,sha256=iYWciq-MPZgseIfMDbup6GGD83_nMKdXJwcvDn1yQAk,5936
8
8
  aiwaf/utils.py,sha256=RkEUWhhHy6tOk7V0UYv3cN4xhOR_7aBy9bjhwuV2cdA,1436
9
9
  aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  aiwaf/management/commands/detect_and_train.py,sha256=-o-LZ7QZ5GeJPCekryox1DGXKMmFEkwwrcDsiM166K0,269
12
- aiwaf/resources/model.pkl,sha256=rCCXH38SJrnaOba2WZrU1LQVzWT34x6bTVkq20XJU-Q,1091129
12
+ aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
13
13
  aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  aiwaf/templatetags/aiwaf_tags.py,sha256=1KGqeioYmgKACDUiPkykSqI7DLQ6-Ypy1k00weWj9iY,399
15
- aiwaf-0.1.7.6.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
16
- aiwaf-0.1.7.6.dist-info/METADATA,sha256=wzS_EmYIHPo4JULdOAoVZvWn7Yo2I9qrRkcWkHw-k34,6116
17
- aiwaf-0.1.7.6.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
18
- aiwaf-0.1.7.6.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
19
- aiwaf-0.1.7.6.dist-info/RECORD,,
15
+ aiwaf-0.1.7.7.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
16
+ aiwaf-0.1.7.7.dist-info/METADATA,sha256=myjlXQiSGSBIsgp-aV1Cp7V-LepjbxPzlFvXmJP8-NQ,6116
17
+ aiwaf-0.1.7.7.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
18
+ aiwaf-0.1.7.7.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
19
+ aiwaf-0.1.7.7.dist-info/RECORD,,