aiwaf 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiwaf might be problematic. Click here for more details.

aiwaf/middleware.py CHANGED
@@ -13,11 +13,11 @@ from django.conf import settings
13
13
  from django.core.cache import cache
14
14
  from django.db.models import F
15
15
  from django.apps import apps
16
+ from django.urls import get_resolver
16
17
 
17
18
  from .blacklist_manager import BlacklistManager
18
19
  from .models import DynamicKeyword
19
20
 
20
- # ─── Model loading with fallback ────────────────────────────────────────────
21
21
  MODEL_PATH = getattr(
22
22
  settings,
23
23
  "AIWAF_MODEL_PATH",
@@ -25,7 +25,6 @@ MODEL_PATH = getattr(
25
25
  )
26
26
  MODEL = joblib.load(MODEL_PATH)
27
27
 
28
- # ─── Static keywords default ────────────────────────────────────────────────
29
28
  STATIC_KW = getattr(
30
29
  settings,
31
30
  "AIWAF_MALICIOUS_KEYWORDS",
@@ -41,15 +40,46 @@ def get_ip(request):
41
40
  return xff.split(",")[0].strip()
42
41
  return request.META.get("REMOTE_ADDR", "")
43
42
 
44
-
45
- class IPBlockMiddleware:
43
+ class IPAndKeywordBlockMiddleware:
46
44
  def __init__(self, get_response):
47
45
  self.get_response = get_response
46
+ self.url_patterns = self._collect_view_paths()
47
+
48
+ def _collect_view_paths(self):
49
+ resolver = get_resolver()
50
+ patterns = set()
51
+
52
+ def extract(patterns_list, prefix=""):
53
+ for p in patterns_list:
54
+ if hasattr(p, "url_patterns"):
55
+ extract(p.url_patterns, prefix + str(p.pattern))
56
+ else:
57
+ pat = (prefix + str(p.pattern)).strip("^$")
58
+ patterns.add(pat)
59
+ extract(resolver.url_patterns)
60
+ return patterns
48
61
 
49
62
  def __call__(self, request):
50
63
  ip = get_ip(request)
64
+ path = request.path.lower()
51
65
  if BlacklistManager.is_blocked(ip):
52
66
  return JsonResponse({"error": "blocked"}, status=403)
67
+ segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
68
+ for seg in segments:
69
+ obj, _ = DynamicKeyword.objects.get_or_create(keyword=seg)
70
+ DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
71
+ dynamic_top = list(
72
+ DynamicKeyword.objects
73
+ .order_by("-count")
74
+ .values_list("keyword", flat=True)[: getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)]
75
+ )
76
+ all_kw = set(STATIC_KW) | set(dynamic_top)
77
+ safe_kw = {kw for kw in all_kw if any(kw in pat for pat in self.url_patterns)}
78
+ suspicious_kw = all_kw - safe_kw
79
+ for seg in segments:
80
+ if seg in suspicious_kw:
81
+ BlacklistManager.block(ip, f"Keyword block: {seg}")
82
+ return JsonResponse({"error": "blocked"}, status=403)
53
83
  return self.get_response(request)
54
84
 
55
85
 
aiwaf/trainer.py CHANGED
@@ -2,63 +2,47 @@ import os
2
2
  import glob
3
3
  import gzip
4
4
  import re
5
- import json
6
- import joblib
7
-
8
5
  from datetime import datetime
9
6
  from collections import defaultdict, Counter
10
7
 
11
8
  import pandas as pd
12
9
  from sklearn.ensemble import IsolationForest
10
+ import joblib
13
11
 
14
12
  from django.conf import settings
15
13
  from django.apps import apps
14
+ from django.db.models import F
16
15
 
17
- # ─── CONFIG ────────────────────────────────────────────────────────────────
18
-
19
- # Where to read your access logs (and rotated/.gz siblings)
20
16
  LOG_PATH = settings.AIWAF_ACCESS_LOG
17
+ MODEL_PATH = os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
21
18
 
22
- # Where we save our trained model
23
- MODEL_PATH = os.path.join(
24
- os.path.dirname(__file__),
25
- "resources",
26
- "model.pkl"
27
- )
28
-
29
- # Static “malicious” path keywords & file extensions
30
- MALICIOUS_KEYWORDS = [
31
- ".php", "xmlrpc", "wp-", ".env", ".git", ".bak",
32
- "conflg", "shell", "filemanager"
33
- ]
34
- STATUS_CODES = ["200", "403", "404", "500"]
19
+ STATIC_KW = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
20
+ STATUS_IDX = ["200", "403", "404", "500"]
35
21
 
36
- # Regex for combined log with response-time=…
37
22
  _LOG_RX = re.compile(
38
23
  r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" '
39
24
  r'(\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
40
25
  )
41
26
 
42
- # Your Django model for storing blocked IPs
43
27
  BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
28
+ DynamicKeyword = apps.get_model("aiwaf", "DynamicKeyword")
44
29
 
45
30
 
46
- # ─── READ & PARSE LOG LINES ─────────────────────────────────────────────────
47
-
48
31
  def _read_all_logs():
49
32
  lines = []
50
33
  if LOG_PATH and os.path.exists(LOG_PATH):
51
34
  with open(LOG_PATH, "r", errors="ignore") as f:
52
- lines += f.readlines()
53
- for path in sorted(glob.glob(LOG_PATH + ".*")):
35
+ lines.extend(f.readlines())
36
+ for path in sorted(glob.glob(f"{LOG_PATH}.*")):
54
37
  opener = gzip.open if path.endswith(".gz") else open
55
38
  try:
56
39
  with opener(path, "rt", errors="ignore") as f:
57
- lines += f.readlines()
40
+ lines.extend(f.readlines())
58
41
  except OSError:
59
42
  continue
60
43
  return lines
61
44
 
45
+
62
46
  def _parse(line):
63
47
  m = _LOG_RX.search(line)
64
48
  if not m:
@@ -78,20 +62,15 @@ def _parse(line):
78
62
  }
79
63
 
80
64
 
81
- # ─── TRAIN ENTRYPOINT ───────────────────────────────────────────────────────
82
-
83
65
  def train():
84
- raw = _read_all_logs()
85
- if not raw:
86
- print(" No log lines found – check settings.AIWAF_ACCESS_LOG")
66
+ raw_lines = _read_all_logs()
67
+ if not raw_lines:
68
+ print(" No log lines found – check AIWAF_ACCESS_LOG setting.")
87
69
  return
88
-
89
- parsed = []
90
- ip_404 = defaultdict(int)
70
+ parsed = []
71
+ ip_404 = defaultdict(int)
91
72
  ip_times = defaultdict(list)
92
-
93
- # parse + accumulate timestamps & 404 counts
94
- for ln in raw:
73
+ for ln in raw_lines:
95
74
  rec = _parse(ln)
96
75
  if not rec:
97
76
  continue
@@ -99,77 +78,74 @@ def train():
99
78
  ip_times[rec["ip"]].append(rec["timestamp"])
100
79
  if rec["status"] == "404":
101
80
  ip_404[rec["ip"]] += 1
102
-
103
- # auto-block IPs with >=6 total 404s
104
- newly_blocked = []
105
- for ip, cnt in ip_404.items():
106
- if cnt >= 6:
81
+ blocked_404 = []
82
+ for ip, count in ip_404.items():
83
+ if count >= 6:
107
84
  obj, created = BlacklistEntry.objects.get_or_create(
108
85
  ip_address=ip,
109
86
  defaults={"reason": "Excessive 404s (≥6)"}
110
87
  )
111
88
  if created:
112
- newly_blocked.append(ip)
113
- if newly_blocked:
114
- print(f"🔒 Blocked {len(newly_blocked)} IPs for 404 flood: {newly_blocked}")
115
-
116
- # build feature vectors
117
- rows = []
89
+ blocked_404.append(ip)
90
+ if blocked_404:
91
+ print(f"Blocked {len(blocked_404)} IPs for 404 flood: {blocked_404}")
92
+ feature_dicts = []
118
93
  for r in parsed:
119
- ip = r["ip"]
120
- burst = sum(
94
+ ip = r["ip"]
95
+ burst = sum(
121
96
  1 for t in ip_times[ip]
122
97
  if (r["timestamp"] - t).total_seconds() <= 10
123
98
  )
124
- total404 = ip_404[ip]
125
- kw_hits = sum(k in r["path"].lower() for k in MALICIOUS_KEYWORDS)
126
- status_idx = STATUS_CODES.index(r["status"]) if r["status"] in STATUS_CODES else -1
127
-
128
- rows.append([
129
- len(r["path"]),
130
- kw_hits,
131
- r["response_time"],
132
- status_idx,
133
- burst,
134
- total404
135
- ])
136
-
137
- if not rows:
138
- print("⚠️ No entries to train on.")
99
+ total404 = ip_404[ip]
100
+ kw_hits = sum(k in r["path"].lower() for k in STATIC_KW)
101
+ status_idx = STATUS_IDX.index(r["status"]) if r["status"] in STATUS_IDX else -1
102
+ feature_dicts.append({
103
+ "ip": ip,
104
+ "path_len": len(r["path"]),
105
+ "kw_hits": kw_hits,
106
+ "resp_time": r["response_time"],
107
+ "status_idx": status_idx,
108
+ "burst_count": burst,
109
+ "total_404": total404,
110
+ })
111
+
112
+ if not feature_dicts:
113
+ print("⚠️ Nothing to train on – no valid log entries.")
139
114
  return
140
115
 
141
- df = pd.DataFrame(
142
- rows,
143
- columns=[
144
- "path_len", "kw_hits", "resp_time",
145
- "status_idx", "burst_count", "total_404"
146
- ]
147
- ).fillna(0).astype(float)
148
-
149
- # train & save
150
- clf = IsolationForest(contamination=0.01, random_state=42)
151
- clf.fit(df.values)
116
+ df = pd.DataFrame(feature_dicts)
117
+ feature_cols = [c for c in df.columns if c != "ip"]
118
+ X = df[feature_cols].astype(float).values
119
+ model = IsolationForest(contamination=0.01, random_state=42)
120
+ model.fit(X)
152
121
  os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
153
- joblib.dump(clf, MODEL_PATH)
154
- print(f"✅ Model trained on {len(df)} samples → {MODEL_PATH}")
122
+ joblib.dump(model, MODEL_PATH)
123
+ print(f"✅ Model trained on {len(X)} samples → {MODEL_PATH}")
124
+ preds = model.predict(X) # -1 for outliers
125
+ anomalous_ips = set(df.loc[preds == -1, 'ip'])
126
+ blocked_anom = []
127
+ for ip in anomalous_ips:
128
+ obj, created = BlacklistEntry.objects.get_or_create(
129
+ ip_address=ip,
130
+ defaults={"reason": "Anomalous behavior"}
131
+ )
132
+ if created:
133
+ blocked_anom.append(ip)
134
+ if blocked_anom:
135
+ print(f" Blocked {len(blocked_anom)} anomalous IPs: {blocked_anom}")
155
136
 
156
- # extract top‑10 dynamic keywords from 4xx/5xx paths
157
137
  tokens = Counter()
158
138
  for r in parsed:
159
139
  if r["status"].startswith(("4", "5")):
160
- segs = re.split(r"\W+", r["path"].lower())
161
- for seg in segs:
162
- if len(seg) > 3 and seg not in MALICIOUS_KEYWORDS:
140
+ for seg in re.split(r"\W+", r["path"].lower()):
141
+ if len(seg) > 3 and seg not in STATIC_KW:
163
142
  tokens[seg] += 1
143
+ top_tokens = tokens.most_common(10)
144
+ for kw, cnt in top_tokens:
145
+ obj, _ = DynamicKeyword.objects.get_or_create(keyword=kw)
146
+ DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + cnt)
147
+ print(f"DynamicKeyword DB updated with top tokens: {[kw for kw, _ in top_tokens]}")
164
148
 
165
- new_kw = [kw for kw, _ in tokens.most_common(10)]
166
- DK_FILE = os.path.join(os.path.dirname(__file__), "resources", "dynamic_keywords.json")
167
- try:
168
- existing = set(json.load(open(DK_FILE)))
169
- except FileNotFoundError:
170
- existing = set()
171
- updated = sorted(existing | set(new_kw))
172
- with open(DK_FILE, "w") as f:
173
- json.dump(updated, f, indent=2)
174
-
175
- print(f"📝 Updated dynamic keywords: {new_kw}")
149
+
150
+ if __name__ == "__main__":
151
+ train()
@@ -1,15 +1,21 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiwaf
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: AI-powered Web Application Firewall
5
+ Home-page: https://github.com/aayushgauba/aiwaf
5
6
  Author: Aayush Gauba
6
7
  Author-email: Aayush Gauba <gauba.aayush@gmail.com>
7
8
  License: MIT
8
9
  Requires-Python: >=3.8
9
10
  Description-Content-Type: text/markdown
11
+ License-File: LICENSE
10
12
  Dynamic: author
13
+ Dynamic: home-page
14
+ Dynamic: license-file
15
+ Dynamic: requires-python
11
16
 
12
- # AI‑WAF
17
+
18
+ # AI‑WAF
13
19
 
14
20
  > A self‑learning, Django‑friendly Web Application Firewall
15
21
  > with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
@@ -91,6 +97,14 @@ pip install -e .
91
97
  ```python
92
98
  INSTALLED_APPS += ["aiwaf"]
93
99
 
100
+ ### Database Setup
101
+
102
+ After adding `aiwaf` to your `INSTALLED_APPS`, create the necessary tables for the IP‐blacklist and dynamic‐keyword models:
103
+
104
+ ```bash
105
+ python manage.py makemigrations aiwaf
106
+ python manage.py migrate
107
+
94
108
  # Required
95
109
  AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
96
110
 
@@ -159,13 +173,13 @@ python manage.py detect_and_train
159
173
 
160
174
  ## How It Works
161
175
 
162
- | Middleware | Purpose |
163
- |--------------------------|------------------------------------------------------------------|
164
- | IPBlockMiddleware | Blocks requests from known blacklisted IPs |
165
- | RateLimitMiddleware | Enforces burst & flood thresholds |
166
- | AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
167
- | HoneypotMiddleware | Detects bots filling hidden inputs in forms |
168
- | UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
176
+ | Middleware | Purpose |
177
+ |------------------------------------|-----------------------------------------------------------------|
178
+ | IPAndKeywordBlockMiddleware | Blocks requests from known blacklisted IPs and Keywords |
179
+ | RateLimitMiddleware | Enforces burst & flood thresholds |
180
+ | AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
181
+ | HoneypotMiddleware | Detects bots filling hidden inputs in forms |
182
+ | UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
169
183
 
170
184
  ---
171
185
 
@@ -1,10 +1,10 @@
1
1
  aiwaf/__init__.py,sha256=nQFpJ1YpX48snzLjEQCf8zD2YNh8v0b_kPTrXx8uBYc,46
2
2
  aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
3
3
  aiwaf/blacklist_manager.py,sha256=sM6uTH7zD6MOPGb0kzqV2aFut2vxKgft_UVeRJr7klw,392
4
- aiwaf/middleware.py,sha256=UIJ-1kA-NjKwpt3JS3vvsuhjaBXGliGt_4VKuL_OGq8,5254
4
+ aiwaf/middleware.py,sha256=04AbNgkwLMaYSiuEtw59A-O02tt4cqaKmP7XDNlkIG0,6359
5
5
  aiwaf/models.py,sha256=8au1umopgCo0lthztTTRrYRJQUM7uX8eAeXgs3z45K4,1282
6
6
  aiwaf/storage.py,sha256=bxCILzzvA1-q6nwclRE8WrfoRhe25H4VrsQDf0hl_lY,1903
7
- aiwaf/trainer.py,sha256=8hU9k3bF_9QIkGix3TqFl7YuNeQV9dPriY2WhLo6s40,5411
7
+ aiwaf/trainer.py,sha256=TKWJZzWTg892vdoSGWdCA0i-dKof2b29buWqJUrkr6k,4820
8
8
  aiwaf/utils.py,sha256=RkEUWhhHy6tOk7V0UYv3cN4xhOR_7aBy9bjhwuV2cdA,1436
9
9
  aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,7 +12,8 @@ aiwaf/management/commands/detect_and_train.py,sha256=-o-LZ7QZ5GeJPCekryox1DGXKMm
12
12
  aiwaf/resources/model.pkl,sha256=rCCXH38SJrnaOba2WZrU1LQVzWT34x6bTVkq20XJU-Q,1091129
13
13
  aiwaf/template_tags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  aiwaf/template_tags/aiwaf_tags.py,sha256=1KGqeioYmgKACDUiPkykSqI7DLQ6-Ypy1k00weWj9iY,399
15
- aiwaf-0.1.3.dist-info/METADATA,sha256=zgcejLdSfeE_bcqAvuebUJHN2ynKxtE24wVWdRdA_EA,4977
16
- aiwaf-0.1.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
17
- aiwaf-0.1.3.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
18
- aiwaf-0.1.3.dist-info/RECORD,,
15
+ aiwaf-0.1.5.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
16
+ aiwaf-0.1.5.dist-info/METADATA,sha256=g1hwdQBSJX1JBBnBim_TFtzjVMI5Ixl0WVrPPlnQCPg,5405
17
+ aiwaf-0.1.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
18
+ aiwaf-0.1.5.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
19
+ aiwaf-0.1.5.dist-info/RECORD,,
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Aayush Gauba
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
File without changes