aiwaf 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiwaf might be problematic. Click here for more details.

aiwaf/middleware.py CHANGED
@@ -1,54 +1,104 @@
1
+ # aiwaf/middleware.py
2
+
1
3
  import time
4
+ import re
5
+ import os
2
6
  import numpy as np
3
7
  import joblib
8
+
4
9
  from collections import defaultdict
5
10
  from django.utils.deprecation import MiddlewareMixin
6
11
  from django.http import JsonResponse
7
12
  from django.conf import settings
8
13
  from django.core.cache import cache
9
- from django.urls import resolve
14
+ from django.db.models import F
10
15
  from django.apps import apps
11
- from .blacklist_manager import BlacklistManager
16
+ from django.urls import get_resolver
12
17
 
13
- try:
14
- MODEL_PATH = settings.AIWAF_MODEL_PATH
15
- except AttributeError:
16
- import importlib.resources
17
- MODEL_PATH = importlib.resources.files("aiwaf").joinpath("resources/model.pkl")
18
+ from .blacklist_manager import BlacklistManager
19
+ from .models import DynamicKeyword
18
20
 
21
+ MODEL_PATH = getattr(
22
+ settings,
23
+ "AIWAF_MODEL_PATH",
24
+ os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
25
+ )
19
26
  MODEL = joblib.load(MODEL_PATH)
20
27
 
28
+ STATIC_KW = getattr(
29
+ settings,
30
+ "AIWAF_MALICIOUS_KEYWORDS",
31
+ [
32
+ ".php", "xmlrpc", "wp-", ".env", ".git", ".bak",
33
+ "conflg", "shell", "filemanager"
34
+ ]
35
+ )
36
+
21
37
  def get_ip(request):
22
38
  xff = request.META.get("HTTP_X_FORWARDED_FOR")
23
39
  if xff:
24
40
  return xff.split(",")[0].strip()
25
41
  return request.META.get("REMOTE_ADDR", "")
26
42
 
27
-
28
- class IPBlockMiddleware:
43
+ class IPAndKeywordBlockMiddleware:
29
44
  def __init__(self, get_response):
30
45
  self.get_response = get_response
46
+ self.url_patterns = self._collect_view_paths()
47
+
48
+ def _collect_view_paths(self):
49
+ resolver = get_resolver()
50
+ patterns = set()
51
+
52
+ def extract(patterns_list, prefix=""):
53
+ for p in patterns_list:
54
+ if hasattr(p, "url_patterns"):
55
+ extract(p.url_patterns, prefix + str(p.pattern))
56
+ else:
57
+ pat = (prefix + str(p.pattern)).strip("^$")
58
+ patterns.add(pat)
59
+ extract(resolver.url_patterns)
60
+ return patterns
31
61
 
32
62
  def __call__(self, request):
33
63
  ip = get_ip(request)
64
+ path = request.path.lower()
34
65
  if BlacklistManager.is_blocked(ip):
35
66
  return JsonResponse({"error": "blocked"}, status=403)
67
+ segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
68
+ for seg in segments:
69
+ obj, _ = DynamicKeyword.objects.get_or_create(keyword=seg)
70
+ DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
71
+ dynamic_top = list(
72
+ DynamicKeyword.objects
73
+ .order_by("-count")
74
+ .values_list("keyword", flat=True)[: getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)]
75
+ )
76
+ all_kw = set(STATIC_KW) | set(dynamic_top)
77
+ safe_kw = {kw for kw in all_kw if any(kw in pat for pat in self.url_patterns)}
78
+ suspicious_kw = all_kw - safe_kw
79
+ for seg in segments:
80
+ if seg in suspicious_kw:
81
+ BlacklistManager.block(ip, f"Keyword block: {seg}")
82
+ return JsonResponse({"error": "blocked"}, status=403)
36
83
  return self.get_response(request)
37
84
 
38
85
 
39
86
  class RateLimitMiddleware:
40
- WINDOW = getattr(settings, "AIWAF_RATE_WINDOW", 10)
41
- MAX = getattr(settings, "AIWAF_RATE_MAX", 20)
42
- FLOOD = getattr(settings, "AIWAF_RATE_FLOOD", 10)
87
+ WINDOW = 10
88
+ MAX = 20
89
+ FLOOD = 10
90
+
43
91
  def __init__(self, get_response):
44
92
  self.get_response = get_response
45
93
  self.logs = defaultdict(list)
94
+
46
95
  def __call__(self, request):
47
- ip = get_ip(request)
96
+ ip = get_ip(request)
48
97
  now = time.time()
49
98
  recs = [t for t in self.logs[ip] if now - t < self.WINDOW]
50
99
  recs.append(now)
51
100
  self.logs[ip] = recs
101
+
52
102
  if len(recs) > self.MAX:
53
103
  return JsonResponse({"error": "too_many_requests"}, status=429)
54
104
  if len(recs) > self.FLOOD:
@@ -59,40 +109,57 @@ class RateLimitMiddleware:
59
109
 
60
110
 
61
111
  class AIAnomalyMiddleware(MiddlewareMixin):
62
- WINDOW_SECONDS = getattr(settings, "AIWAF_WINDOW_SECONDS", 60)
112
+ WINDOW = getattr(settings, "AIWAF_WINDOW_SECONDS", 60)
113
+ TOP_N = getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)
114
+
63
115
  def process_request(self, request):
64
116
  ip = get_ip(request)
65
117
  if BlacklistManager.is_blocked(ip):
66
118
  return JsonResponse({"error": "blocked"}, status=403)
119
+
67
120
  now = time.time()
68
121
  key = f"aiwaf:{ip}"
69
122
  data = cache.get(key, [])
123
+ # TODO: you may want to capture real status & response_time in process_response
70
124
  data.append((now, request.path, 0, 0.0))
71
- data = [d for d in data if now - d[0] < self.WINDOW_SECONDS]
72
- cache.set(key, data, timeout=self.WINDOW_SECONDS)
125
+ data = [d for d in data if now - d[0] < self.WINDOW]
126
+ cache.set(key, data, timeout=self.WINDOW)
127
+
128
+ # update dynamic‐keyword counts
129
+ for seg in re.split(r"\W+", request.path.lower()):
130
+ if len(seg) > 3:
131
+ obj, _ = DynamicKeyword.objects.get_or_create(keyword=seg)
132
+ DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
133
+
73
134
  if len(data) < 5:
74
135
  return None
75
- total = len(data)
76
- ratio_404 = sum(1 for (_, _, st, _) in data if st == 404) / total
77
- hits = sum(
78
- any(k in path.lower() for k in settings.AIWAF_MALICIOUS_KEYWORDS)
79
- for (_, path, _, _) in data
136
+
137
+ # pull top‐N dynamic tokens
138
+ top_dynamic = list(
139
+ DynamicKeyword.objects
140
+ .order_by("-count")
141
+ .values_list("keyword", flat=True)[: self.TOP_N]
80
142
  )
81
- avg_rt = np.mean([rt for (_, _, _, rt) in data]) if data else 0.0
82
- intervals = [
83
- data[i][0] - data[i-1][0] for i in range(1, total)
84
- ]
85
- avg_iv = np.mean(intervals) if intervals else 0.0
86
- X = np.array([[total, ratio_404, hits, avg_rt, avg_iv]], dtype=float)
143
+ ALL_KW = set(STATIC_KW) | set(top_dynamic)
144
+
145
+ total = len(data)
146
+ ratio404 = sum(1 for (_, _, st, _) in data if st == 404) / total
147
+ hits = sum(any(kw in path.lower() for kw in ALL_KW) for (_, path, _, _) in data)
148
+ avg_rt = np.mean([rt for (_, _, _, rt) in data]) if data else 0.0
149
+ ivs = [data[i][0] - data[i - 1][0] for i in range(1, total)]
150
+ avg_iv = np.mean(ivs) if ivs else 0.0
151
+
152
+ X = np.array([[total, ratio404, hits, avg_rt, avg_iv]], dtype=float)
87
153
  if MODEL.predict(X)[0] == -1:
88
154
  BlacklistManager.block(ip, "AI anomaly")
89
155
  return JsonResponse({"error": "blocked"}, status=403)
156
+
90
157
  return None
91
158
 
92
159
 
93
160
  class HoneypotMiddleware(MiddlewareMixin):
94
161
  def process_view(self, request, view_func, view_args, view_kwargs):
95
- trap = request.POST.get(settings.AIWAF_HONEYPOT_FIELD, "")
162
+ trap = request.POST.get(getattr(settings, "AIWAF_HONEYPOT_FIELD", "hp_field"), "")
96
163
  if trap:
97
164
  ip = get_ip(request)
98
165
  BlacklistManager.block(ip, "HONEYPOT triggered")
@@ -105,11 +172,13 @@ class UUIDTamperMiddleware(MiddlewareMixin):
105
172
  uid = view_kwargs.get("uuid")
106
173
  if not uid:
107
174
  return None
175
+
108
176
  ip = get_ip(request)
109
- app_label = view_kwargs.get("app_label") or view_func.__module__.split('.')[0]
110
- app_config = apps.get_app_config(app_label)
111
- for Model in app_config.get_models():
177
+ app_label = view_func.__module__.split(".")[0]
178
+ app_cfg = apps.get_app_config(app_label)
179
+ for Model in app_cfg.get_models():
112
180
  if Model.objects.filter(pk=uid).exists():
113
181
  return None
182
+
114
183
  BlacklistManager.block(ip, "UUID tampering")
115
- return JsonResponse({"error": "blocked"}, status=403)
184
+ return JsonResponse({"error": "blocked"}, status=403)
aiwaf/models.py CHANGED
@@ -26,3 +26,11 @@ class BlacklistEntry(models.Model):
26
26
 
27
27
  def __str__(self):
28
28
  return f"{self.ip_address} ({self.reason})"
29
+
30
+ class DynamicKeyword(models.Model):
31
+ keyword = models.CharField(max_length=100, unique=True)
32
+ count = models.PositiveIntegerField(default=0)
33
+ last_updated = models.DateTimeField(auto_now=True)
34
+
35
+ class Meta:
36
+ ordering = ['-count']
aiwaf/trainer.py CHANGED
@@ -1,44 +1,48 @@
1
- # aiwaf/trainer.py
2
-
3
1
  import os
4
2
  import glob
5
3
  import gzip
6
4
  import re
7
- import joblib
8
5
  from datetime import datetime
9
- from collections import defaultdict
10
- from .models import BlacklistEntry
6
+ from collections import defaultdict, Counter
7
+
11
8
  import pandas as pd
12
9
  from sklearn.ensemble import IsolationForest
10
+ import joblib
11
+
13
12
  from django.conf import settings
14
13
  from django.apps import apps
14
+ from django.db.models import F
15
+
16
+ LOG_PATH = settings.AIWAF_ACCESS_LOG
17
+ MODEL_PATH = os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
18
+
19
+ STATIC_KW = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
20
+ STATUS_IDX = ["200", "403", "404", "500"]
15
21
 
16
- LOG_PATH = settings.AIWAF_ACCESS_LOG
17
- MODEL_PATH = os.path.join(
18
- os.path.dirname(__file__),
19
- "resources",
20
- "model.pkl"
21
- )
22
- MALICIOUS_KEYWORDS = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
23
- STATUS_CODES = ["200", "403", "404", "500"]
24
22
  _LOG_RX = re.compile(
25
- r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" (\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
23
+ r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" '
24
+ r'(\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
26
25
  )
27
- BlacklistedIP = BlacklistEntry.objects.all()
26
+
27
+ BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
28
+ DynamicKeyword = apps.get_model("aiwaf", "DynamicKeyword")
29
+
30
+
28
31
  def _read_all_logs():
29
32
  lines = []
30
33
  if LOG_PATH and os.path.exists(LOG_PATH):
31
34
  with open(LOG_PATH, "r", errors="ignore") as f:
32
- lines += f.readlines()
33
- for path in sorted(glob.glob(LOG_PATH + ".*")):
35
+ lines.extend(f.readlines())
36
+ for path in sorted(glob.glob(f"{LOG_PATH}.*")):
34
37
  opener = gzip.open if path.endswith(".gz") else open
35
38
  try:
36
39
  with opener(path, "rt", errors="ignore") as f:
37
- lines += f.readlines()
40
+ lines.extend(f.readlines())
38
41
  except OSError:
39
42
  continue
40
43
  return lines
41
44
 
45
+
42
46
  def _parse(line):
43
47
  m = _LOG_RX.search(line)
44
48
  if not m:
@@ -59,14 +63,14 @@ def _parse(line):
59
63
 
60
64
 
61
65
  def train():
62
- raw = _read_all_logs()
63
- if not raw:
64
- print("No log lines found – check AIWAF_ACCESS_LOG")
66
+ raw_lines = _read_all_logs()
67
+ if not raw_lines:
68
+ print(" No log lines found – check AIWAF_ACCESS_LOG setting.")
65
69
  return
66
70
  parsed = []
67
- ip_404 = defaultdict(int)
71
+ ip_404 = defaultdict(int)
68
72
  ip_times = defaultdict(list)
69
- for ln in raw:
73
+ for ln in raw_lines:
70
74
  rec = _parse(ln)
71
75
  if not rec:
72
76
  continue
@@ -74,7 +78,7 @@ def train():
74
78
  ip_times[rec["ip"]].append(rec["timestamp"])
75
79
  if rec["status"] == "404":
76
80
  ip_404[rec["ip"]] += 1
77
- blocked = []
81
+ blocked_404 = []
78
82
  for ip, count in ip_404.items():
79
83
  if count >= 6:
80
84
  obj, created = BlacklistEntry.objects.get_or_create(
@@ -82,42 +86,66 @@ def train():
82
86
  defaults={"reason": "Excessive 404s (≥6)"}
83
87
  )
84
88
  if created:
85
- blocked.append(ip)
86
- if blocked:
87
- print(f"Auto‑blocked {len(blocked)} IPs for ≥6 404s: {', '.join(blocked)}")
88
- rows = []
89
+ blocked_404.append(ip)
90
+ if blocked_404:
91
+ print(f"Blocked {len(blocked_404)} IPs for 404 flood: {blocked_404}")
92
+ feature_dicts = []
89
93
  for r in parsed:
90
- ip = r["ip"]
91
- burst = sum(
94
+ ip = r["ip"]
95
+ burst = sum(
92
96
  1 for t in ip_times[ip]
93
97
  if (r["timestamp"] - t).total_seconds() <= 10
94
98
  )
95
- total404 = ip_404[ip]
96
- kw_hits = sum(k in r["path"].lower() for k in MALICIOUS_KEYWORDS)
97
- status_idx = STATUS_CODES.index(r["status"]) if r["status"] in STATUS_CODES else -1
98
- rows.append([
99
- len(r["path"]),
100
- kw_hits,
101
- r["response_time"],
102
- status_idx,
103
- burst,
104
- total404
105
- ])
106
-
107
- if not rows:
108
- print("No entries to train on!")
99
+ total404 = ip_404[ip]
100
+ kw_hits = sum(k in r["path"].lower() for k in STATIC_KW)
101
+ status_idx = STATUS_IDX.index(r["status"]) if r["status"] in STATUS_IDX else -1
102
+ feature_dicts.append({
103
+ "ip": ip,
104
+ "path_len": len(r["path"]),
105
+ "kw_hits": kw_hits,
106
+ "resp_time": r["response_time"],
107
+ "status_idx": status_idx,
108
+ "burst_count": burst,
109
+ "total_404": total404,
110
+ })
111
+
112
+ if not feature_dicts:
113
+ print("⚠️ Nothing to train on – no valid log entries.")
109
114
  return
110
115
 
111
- df = pd.DataFrame(
112
- rows,
113
- columns=[
114
- "path_len", "kw_hits", "resp_time",
115
- "status_idx", "burst_count", "total_404"
116
- ]
117
- ).fillna(0).astype(float)
118
- clf = IsolationForest(contamination=0.01, random_state=42)
119
- clf.fit(df.values)
116
+ df = pd.DataFrame(feature_dicts)
117
+ feature_cols = [c for c in df.columns if c != "ip"]
118
+ X = df[feature_cols].astype(float).values
119
+ model = IsolationForest(contamination=0.01, random_state=42)
120
+ model.fit(X)
120
121
  os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
121
- joblib.dump(clf, MODEL_PATH)
122
- print(f"Model trained on {len(df)} samples and saved to {MODEL_PATH}")
122
+ joblib.dump(model, MODEL_PATH)
123
+ print(f"Model trained on {len(X)} samples {MODEL_PATH}")
124
+ preds = model.predict(X) # -1 for outliers
125
+ anomalous_ips = set(df.loc[preds == -1, 'ip'])
126
+ blocked_anom = []
127
+ for ip in anomalous_ips:
128
+ obj, created = BlacklistEntry.objects.get_or_create(
129
+ ip_address=ip,
130
+ defaults={"reason": "Anomalous behavior"}
131
+ )
132
+ if created:
133
+ blocked_anom.append(ip)
134
+ if blocked_anom:
135
+ print(f" Blocked {len(blocked_anom)} anomalous IPs: {blocked_anom}")
136
+
137
+ tokens = Counter()
138
+ for r in parsed:
139
+ if r["status"].startswith(("4", "5")):
140
+ for seg in re.split(r"\W+", r["path"].lower()):
141
+ if len(seg) > 3 and seg not in STATIC_KW:
142
+ tokens[seg] += 1
143
+ top_tokens = tokens.most_common(10)
144
+ for kw, cnt in top_tokens:
145
+ obj, _ = DynamicKeyword.objects.get_or_create(keyword=kw)
146
+ DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + cnt)
147
+ print(f"DynamicKeyword DB updated with top tokens: {[kw for kw, _ in top_tokens]}")
148
+
123
149
 
150
+ if __name__ == "__main__":
151
+ train()
@@ -0,0 +1,195 @@
1
+ Metadata-Version: 2.4
2
+ Name: aiwaf
3
+ Version: 0.1.5
4
+ Summary: AI-powered Web Application Firewall
5
+ Home-page: https://github.com/aayushgauba/aiwaf
6
+ Author: Aayush Gauba
7
+ Author-email: Aayush Gauba <gauba.aayush@gmail.com>
8
+ License: MIT
9
+ Requires-Python: >=3.8
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Dynamic: author
13
+ Dynamic: home-page
14
+ Dynamic: license-file
15
+ Dynamic: requires-python
16
+
17
+
18
+ # AI‑WAF
19
+
20
+ > A self‑learning, Django‑friendly Web Application Firewall
21
+ > with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
22
+
23
+ ---
24
+
25
+ ## Package Structure
26
+
27
+ ```
28
+ aiwaf/
29
+ ├── __init__.py
30
+ ├── blacklist_manager.py
31
+ ├── middleware.py
32
+ ├── trainer.py # exposes train()
33
+ ├── utils.py
34
+ ├── template_tags/
35
+ │ └── aiwaf_tags.py
36
+ ├── resources/
37
+ │ ├── model.pkl # pre‑trained base model
38
+ │ └── dynamic_keywords.json # evolves daily
39
+ ├── management/
40
+ │ └── commands/
41
+ │ └── detect_and_train.py # `python manage.py detect_and_train`
42
+ └── LICENSE
43
+ ```
44
+
45
+ ---
46
+
47
+ ## Features
48
+
49
+ - **IP Blocklist**
50
+ Instantly blocks suspicious IPs (supports CSV fallback or Django model).
51
+
52
+ - **Rate Limiting**
53
+ Sliding‑window blocks flooders (> `AIWAF_RATE_MAX` per `AIWAF_RATE_WINDOW`), then blacklists them.
54
+
55
+ - **AI Anomaly Detection**
56
+ IsolationForest on features:
57
+ - Path length
58
+ - Keyword hits (static + dynamic)
59
+ - Response time
60
+ - Status‑code index
61
+ - Burst count
62
+ - Total 404s
63
+
64
+ - **Dynamic Keyword Extraction**
65
+ Every retrain: top 10 most frequent “words” from 4xx/5xx paths are appended to your malicious keyword set.
66
+
67
+ - **File‑Extension Probing Detection**
68
+ Tracks repeated 404s on common web‑extensions (e.g. `.php`, `.asp`) and auto‑blocks after a burst.
69
+
70
+ - **Honeypot Field**
71
+ Hidden form field (via template tag) that bots fill → instant block.
72
+
73
+ - **UUID Tampering Protection**
74
+ Any `<uuid:…>` URL that doesn’t map to **any** model in its Django app gets blocked.
75
+
76
+ - **Daily Retraining**
77
+ Reads rotated/gzipped logs, auto‑blocks 404 floods (≥6), retrains the model, updates `model.pkl` + `dynamic_keywords.json`.
78
+
79
+ ---
80
+
81
+ ## Installation
82
+
83
+ ```bash
84
+ # From PyPI
85
+ pip install aiwaf
86
+
87
+ # Or for local development
88
+ git clone https://github.com/aayushgauba/aiwaf.git
89
+ cd aiwaf
90
+ pip install -e .
91
+ ```
92
+
93
+ ---
94
+
95
+ ## ⚙️ Configuration (`settings.py`)
96
+
97
+ ```python
98
+ INSTALLED_APPS += ["aiwaf"]
99
+
100
+ ### Database Setup
101
+
102
+ After adding `aiwaf` to your `INSTALLED_APPS`, create the necessary tables for the IP‐blacklist and dynamic‐keyword models:
103
+
104
+ ```bash
105
+ python manage.py makemigrations aiwaf
106
+ python manage.py migrate
107
+
108
+ # Required
109
+ AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
110
+
111
+ # Optional (defaults shown)
112
+ AIWAF_MODEL_PATH = BASE_DIR / "aiwaf" / "resources" / "model.pkl"
113
+ AIWAF_HONEYPOT_FIELD = "hp_field"
114
+ AIWAF_RATE_WINDOW = 10 # seconds
115
+ AIWAF_RATE_MAX = 20 # max reqs/window
116
+ AIWAF_RATE_FLOOD = 10 # flood threshold
117
+ AIWAF_WINDOW_SECONDS = 60 # anomaly window
118
+ AIWAF_FILE_EXTENSIONS = [".php", ".asp", ".jsp"] # 404‑burst tracked extensions
119
+ ```
120
+
121
+ > **Note:** You no longer need to define `AIWAF_MALICIOUS_KEYWORDS` or `AIWAF_STATUS_CODES` in your settings — they’re built in and evolve dynamically.
122
+
123
+ ---
124
+
125
+ ## Middleware Setup
126
+
127
+ Add in **this** order to your `MIDDLEWARE` list:
128
+
129
+ ```python
130
+ MIDDLEWARE = [
131
+ "aiwaf.middleware.IPBlockMiddleware",
132
+ "aiwaf.middleware.RateLimitMiddleware",
133
+ "aiwaf.middleware.AIAnomalyMiddleware",
134
+ "aiwaf.middleware.HoneypotMiddleware",
135
+ "aiwaf.middleware.UUIDTamperMiddleware",
136
+ # ... other middleware ...
137
+ ]
138
+ ```
139
+
140
+ ---
141
+
142
+ ## Honeypot Field (in your template)
143
+
144
+ ```django
145
+ {% load aiwaf_tags %}
146
+
147
+ <form method="post">
148
+ {% csrf_token %}
149
+ {% honeypot_field %}
150
+ <!-- your real fields -->
151
+ </form>
152
+ ```
153
+
154
+ > Renders a hidden `<input name="hp_field" style="display:none">`.
155
+ > Any non‑empty submission → IP blacklisted.
156
+
157
+ ---
158
+
159
+ ## Running Detection & Training
160
+
161
+ ```bash
162
+ python manage.py detect_and_train
163
+ ```
164
+
165
+ **What happens:**
166
+ 1. Read access logs
167
+ 2. Auto‑block IPs with ≥ 6 total 404s
168
+ 3. Extract features & train IsolationForest
169
+ 4. Save `model.pkl`
170
+ 5. Extract top 10 dynamic keywords from 4xx/5xx
171
+
172
+ ---
173
+
174
+ ## How It Works
175
+
176
+ | Middleware | Purpose |
177
+ |------------------------------------|-----------------------------------------------------------------|
178
+ | IPAndKeywordBlockMiddleware | Blocks requests from known blacklisted IPs and Keywords |
179
+ | RateLimitMiddleware | Enforces burst & flood thresholds |
180
+ | AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
181
+ | HoneypotMiddleware | Detects bots filling hidden inputs in forms |
182
+ | UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
183
+
184
+ ---
185
+
186
+ ## License
187
+
188
+ This project is licensed under the **MIT License**. See the [LICENSE](LICENSE) file for details.
189
+
190
+ ---
191
+
192
+ ## Credits
193
+
194
+ **AI‑WAF** by [Aayush Gauba](https://github.com/aayushgauba)
195
+ > “Let your firewall learn and evolve — keep your site a fortress.”
@@ -1,10 +1,10 @@
1
1
  aiwaf/__init__.py,sha256=nQFpJ1YpX48snzLjEQCf8zD2YNh8v0b_kPTrXx8uBYc,46
2
2
  aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
3
3
  aiwaf/blacklist_manager.py,sha256=sM6uTH7zD6MOPGb0kzqV2aFut2vxKgft_UVeRJr7klw,392
4
- aiwaf/middleware.py,sha256=WHoXMtKZ_WpueSPylH4XXpKzM9-cqPDunDM0fuklzg0,4220
5
- aiwaf/models.py,sha256=GTojMOZ3M5pDDNmpU4o353M3w59jEclzHqJgofHzfBA,1021
4
+ aiwaf/middleware.py,sha256=04AbNgkwLMaYSiuEtw59A-O02tt4cqaKmP7XDNlkIG0,6359
5
+ aiwaf/models.py,sha256=8au1umopgCo0lthztTTRrYRJQUM7uX8eAeXgs3z45K4,1282
6
6
  aiwaf/storage.py,sha256=bxCILzzvA1-q6nwclRE8WrfoRhe25H4VrsQDf0hl_lY,1903
7
- aiwaf/trainer.py,sha256=8eLrq3bOmRle4KDRWnzxnc-Gv6oo5IErqoDGO_v_qG4,3629
7
+ aiwaf/trainer.py,sha256=TKWJZzWTg892vdoSGWdCA0i-dKof2b29buWqJUrkr6k,4820
8
8
  aiwaf/utils.py,sha256=RkEUWhhHy6tOk7V0UYv3cN4xhOR_7aBy9bjhwuV2cdA,1436
9
9
  aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,7 +12,8 @@ aiwaf/management/commands/detect_and_train.py,sha256=-o-LZ7QZ5GeJPCekryox1DGXKMm
12
12
  aiwaf/resources/model.pkl,sha256=rCCXH38SJrnaOba2WZrU1LQVzWT34x6bTVkq20XJU-Q,1091129
13
13
  aiwaf/template_tags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  aiwaf/template_tags/aiwaf_tags.py,sha256=1KGqeioYmgKACDUiPkykSqI7DLQ6-Ypy1k00weWj9iY,399
15
- aiwaf-0.1.2.dist-info/METADATA,sha256=htVp1TT0tRdAFDIxC1mjv6opQMQIHy8MhHPv0NDPIp4,4396
16
- aiwaf-0.1.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
17
- aiwaf-0.1.2.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
18
- aiwaf-0.1.2.dist-info/RECORD,,
15
+ aiwaf-0.1.5.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
16
+ aiwaf-0.1.5.dist-info/METADATA,sha256=g1hwdQBSJX1JBBnBim_TFtzjVMI5Ixl0WVrPPlnQCPg,5405
17
+ aiwaf-0.1.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
18
+ aiwaf-0.1.5.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
19
+ aiwaf-0.1.5.dist-info/RECORD,,
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Aayush Gauba
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,187 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: aiwaf
3
- Version: 0.1.2
4
- Summary: AI-powered Web Application Firewall
5
- Author: Aayush Gauba
6
- Author-email: Aayush Gauba <gauba.aayush@gmail.com>
7
- License: MIT
8
- Requires-Python: >=3.8
9
- Description-Content-Type: text/markdown
10
- Dynamic: author
11
-
12
- # AI‑WAF
13
-
14
- > A self-learning, Django-friendly Web Application Firewall
15
- > with rate-limiting, anomaly detection, honeypots, UUID-tamper protection, and daily retraining.
16
-
17
- ---
18
-
19
- ## Package Structure
20
-
21
- ```
22
- aiwaf/
23
- ├── __init__.py
24
- ├── blacklist_manager.py
25
- ├── middleware.py
26
- ├── trainer.py # exposes detect_and_train()
27
- ├── utils.py
28
- ├── template_tags/
29
- │ └── aiwaf_tags.py
30
- ├── resources/
31
- │ └── model.pkl # pre-trained base model
32
- ├── management/
33
- │ └── commands/
34
- │ └── detect_and_train.py # python manage.py detect_and_train
35
- └── LICENSE
36
- ```
37
-
38
- ---
39
-
40
- ## Features
41
-
42
- - **IP Blocklist**
43
- Automatically blocks suspicious IPs; optionally backed by CSV or Django model.
44
-
45
- - **Rate Limiting**
46
- Sliding window logic blocks IPs exceeding a threshold of requests per second.
47
-
48
- - **AI Anomaly Detection**
49
- IsolationForest trained on real logs with features like:
50
- - Path length
51
- - Keyword hits
52
- - Response time
53
- - Status code index
54
- - Burst count
55
- - Total 404s
56
-
57
- - **Honeypot Field**
58
- Hidden form field that bots are likely to fill — if triggered, the IP is blocked.
59
-
60
- - **UUID Tampering Protection**
61
- Detects if someone is probing by injecting random/nonexistent UUIDs into URLs.
62
-
63
- - **Daily Retraining**
64
- A single command retrains your model every day based on your logs.
65
-
66
- ---
67
-
68
- ## Installation
69
-
70
- Install locally or from PyPI:
71
-
72
- ```bash
73
- pip install aiwaf
74
- ```
75
-
76
- Or for local dev:
77
-
78
- ```bash
79
- git clone https://github.com/aayushgauba/aiwaf.git
80
- cd aiwaf
81
- pip install -e .
82
- ```
83
-
84
- ---
85
-
86
- ## ⚙️ Configuration (`settings.py`)
87
-
88
- ```python
89
- INSTALLED_APPS += [
90
- "aiwaf",
91
- ]
92
-
93
- # Required
94
- AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
95
-
96
- # Optional (defaults included)
97
- AIWAF_MODEL_PATH = BASE_DIR / "aiwaf" / "resources" / "model.pkl"
98
- AIWAF_MALICIOUS_KEYWORDS = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
99
- AIWAF_STATUS_CODES = ["200", "403", "404", "500"]
100
- AIWAF_HONEYPOT_FIELD = "hp_field"
101
- ```
102
-
103
- ---
104
-
105
- ## Middleware Setup
106
-
107
- Add to `MIDDLEWARE` in order:
108
-
109
- ```python
110
- MIDDLEWARE = [
111
- "aiwaf.middleware.IPBlockMiddleware",
112
- "aiwaf.middleware.RateLimitMiddleware",
113
- "aiwaf.middleware.AIAnomalyMiddleware",
114
- "aiwaf.middleware.HoneypotMiddleware",
115
- "aiwaf.middleware.UUIDTamperMiddleware",
116
- ...
117
- ]
118
- ```
119
-
120
- ---
121
-
122
- ## Honeypot Field (in template)
123
-
124
- ```html
125
- {% load aiwaf_tags %}
126
-
127
- <form method="post">
128
- {% csrf_token %}
129
- {% honeypot_field %}
130
- <!-- other fields -->
131
- </form>
132
- ```
133
-
134
- The hidden field will be `<input type="hidden" name="hp_field">`.
135
- If it’s ever filled → IP gets blocked.
136
-
137
- ---
138
-
139
- ## Run Detection + Training
140
-
141
- ```bash
142
- python manage.py detect_and_train
143
- ```
144
-
145
- What it does:
146
-
147
- - Reads logs (supports `.gz` and rotated logs).
148
- - Detects excessive 404s (≥6) → instant block.
149
- - Builds feature vectors from logs.
150
- - Trains IsolationForest and saves `model.pkl`.
151
-
152
- Schedule it to run daily via `cron`, `Celery beat`, or systemd timer.
153
-
154
- ---
155
-
156
- ## How It Works (Simplified)
157
-
158
- | Middleware | Functionality |
159
- |------------------------|--------------------------------------------------------------|
160
- | IPBlockMiddleware | Blocks requests from known blacklisted IPs |
161
- | RateLimitMiddleware | Blocks flooders (>20/10s) and blacklists them (>10/10s) |
162
- | AIAnomalyMiddleware | Uses ML to detect suspicious behavior in request patterns |
163
- | HoneypotMiddleware | Detects bots filling hidden inputs in forms |
164
- | UUIDTamperMiddleware | Detects guessing/probing by checking invalid UUID access |
165
-
166
- ---
167
-
168
- ## Development Roadmap
169
-
170
- - [ ] Add CSV blocklist fallback
171
- - [ ] Admin dashboard integration
172
- - [ ] Auto-pruning of old block entries
173
- - [ ] Real-time log streaming compatibility
174
- - [ ] Docker/Helm deployment guide
175
-
176
- ---
177
-
178
- ## License
179
-
180
- This project is licensed under the **MIT License** — see `LICENSE` for details.
181
-
182
- ---
183
-
184
- ## Credits
185
-
186
- **AIWAF** by [Aayush Gauba](https://github.com/aayushgauba)
187
- > "Let your firewall learn and evolve with your logs. Make your site a fortress."
File without changes