aiwaf 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiwaf might be problematic. Click here for more details.

aiwaf/middleware.py CHANGED
@@ -1,23 +1,40 @@
1
+ # aiwaf/middleware.py
2
+
1
3
  import time
4
+ import re
5
+ import os
2
6
  import numpy as np
3
7
  import joblib
8
+
4
9
  from collections import defaultdict
5
10
  from django.utils.deprecation import MiddlewareMixin
6
11
  from django.http import JsonResponse
7
12
  from django.conf import settings
8
13
  from django.core.cache import cache
9
- from django.urls import resolve
14
+ from django.db.models import F
10
15
  from django.apps import apps
11
- from .blacklist_manager import BlacklistManager
12
16
 
13
- try:
14
- MODEL_PATH = settings.AIWAF_MODEL_PATH
15
- except AttributeError:
16
- import importlib.resources
17
- MODEL_PATH = importlib.resources.files("aiwaf").joinpath("resources/model.pkl")
17
+ from .blacklist_manager import BlacklistManager
18
+ from .models import DynamicKeyword
18
19
 
20
+ # ─── Model loading with fallback ────────────────────────────────────────────
21
+ MODEL_PATH = getattr(
22
+ settings,
23
+ "AIWAF_MODEL_PATH",
24
+ os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
25
+ )
19
26
  MODEL = joblib.load(MODEL_PATH)
20
27
 
28
+ # ─── Static keywords default ────────────────────────────────────────────────
29
+ STATIC_KW = getattr(
30
+ settings,
31
+ "AIWAF_MALICIOUS_KEYWORDS",
32
+ [
33
+ ".php", "xmlrpc", "wp-", ".env", ".git", ".bak",
34
+ "conflg", "shell", "filemanager"
35
+ ]
36
+ )
37
+
21
38
  def get_ip(request):
22
39
  xff = request.META.get("HTTP_X_FORWARDED_FOR")
23
40
  if xff:
@@ -37,18 +54,21 @@ class IPBlockMiddleware:
37
54
 
38
55
 
39
56
  class RateLimitMiddleware:
40
- WINDOW = getattr(settings, "AIWAF_RATE_WINDOW", 10)
41
- MAX = getattr(settings, "AIWAF_RATE_MAX", 20)
42
- FLOOD = getattr(settings, "AIWAF_RATE_FLOOD", 10)
57
+ WINDOW = 10
58
+ MAX = 20
59
+ FLOOD = 10
60
+
43
61
  def __init__(self, get_response):
44
62
  self.get_response = get_response
45
63
  self.logs = defaultdict(list)
64
+
46
65
  def __call__(self, request):
47
- ip = get_ip(request)
66
+ ip = get_ip(request)
48
67
  now = time.time()
49
68
  recs = [t for t in self.logs[ip] if now - t < self.WINDOW]
50
69
  recs.append(now)
51
70
  self.logs[ip] = recs
71
+
52
72
  if len(recs) > self.MAX:
53
73
  return JsonResponse({"error": "too_many_requests"}, status=429)
54
74
  if len(recs) > self.FLOOD:
@@ -59,40 +79,57 @@ class RateLimitMiddleware:
59
79
 
60
80
 
61
81
  class AIAnomalyMiddleware(MiddlewareMixin):
62
- WINDOW_SECONDS = getattr(settings, "AIWAF_WINDOW_SECONDS", 60)
82
+ WINDOW = getattr(settings, "AIWAF_WINDOW_SECONDS", 60)
83
+ TOP_N = getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)
84
+
63
85
  def process_request(self, request):
64
86
  ip = get_ip(request)
65
87
  if BlacklistManager.is_blocked(ip):
66
88
  return JsonResponse({"error": "blocked"}, status=403)
89
+
67
90
  now = time.time()
68
91
  key = f"aiwaf:{ip}"
69
92
  data = cache.get(key, [])
93
+ # TODO: you may want to capture real status & response_time in process_response
70
94
  data.append((now, request.path, 0, 0.0))
71
- data = [d for d in data if now - d[0] < self.WINDOW_SECONDS]
72
- cache.set(key, data, timeout=self.WINDOW_SECONDS)
95
+ data = [d for d in data if now - d[0] < self.WINDOW]
96
+ cache.set(key, data, timeout=self.WINDOW)
97
+
98
+ # update dynamic‐keyword counts
99
+ for seg in re.split(r"\W+", request.path.lower()):
100
+ if len(seg) > 3:
101
+ obj, _ = DynamicKeyword.objects.get_or_create(keyword=seg)
102
+ DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
103
+
73
104
  if len(data) < 5:
74
105
  return None
75
- total = len(data)
76
- ratio_404 = sum(1 for (_, _, st, _) in data if st == 404) / total
77
- hits = sum(
78
- any(k in path.lower() for k in settings.AIWAF_MALICIOUS_KEYWORDS)
79
- for (_, path, _, _) in data
106
+
107
+ # pull top‐N dynamic tokens
108
+ top_dynamic = list(
109
+ DynamicKeyword.objects
110
+ .order_by("-count")
111
+ .values_list("keyword", flat=True)[: self.TOP_N]
80
112
  )
81
- avg_rt = np.mean([rt for (_, _, _, rt) in data]) if data else 0.0
82
- intervals = [
83
- data[i][0] - data[i-1][0] for i in range(1, total)
84
- ]
85
- avg_iv = np.mean(intervals) if intervals else 0.0
86
- X = np.array([[total, ratio_404, hits, avg_rt, avg_iv]], dtype=float)
113
+ ALL_KW = set(STATIC_KW) | set(top_dynamic)
114
+
115
+ total = len(data)
116
+ ratio404 = sum(1 for (_, _, st, _) in data if st == 404) / total
117
+ hits = sum(any(kw in path.lower() for kw in ALL_KW) for (_, path, _, _) in data)
118
+ avg_rt = np.mean([rt for (_, _, _, rt) in data]) if data else 0.0
119
+ ivs = [data[i][0] - data[i - 1][0] for i in range(1, total)]
120
+ avg_iv = np.mean(ivs) if ivs else 0.0
121
+
122
+ X = np.array([[total, ratio404, hits, avg_rt, avg_iv]], dtype=float)
87
123
  if MODEL.predict(X)[0] == -1:
88
124
  BlacklistManager.block(ip, "AI anomaly")
89
125
  return JsonResponse({"error": "blocked"}, status=403)
126
+
90
127
  return None
91
128
 
92
129
 
93
130
  class HoneypotMiddleware(MiddlewareMixin):
94
131
  def process_view(self, request, view_func, view_args, view_kwargs):
95
- trap = request.POST.get(settings.AIWAF_HONEYPOT_FIELD, "")
132
+ trap = request.POST.get(getattr(settings, "AIWAF_HONEYPOT_FIELD", "hp_field"), "")
96
133
  if trap:
97
134
  ip = get_ip(request)
98
135
  BlacklistManager.block(ip, "HONEYPOT triggered")
@@ -105,11 +142,13 @@ class UUIDTamperMiddleware(MiddlewareMixin):
105
142
  uid = view_kwargs.get("uuid")
106
143
  if not uid:
107
144
  return None
145
+
108
146
  ip = get_ip(request)
109
- app_label = view_kwargs.get("app_label") or view_func.__module__.split('.')[0]
110
- app_config = apps.get_app_config(app_label)
111
- for Model in app_config.get_models():
147
+ app_label = view_func.__module__.split(".")[0]
148
+ app_cfg = apps.get_app_config(app_label)
149
+ for Model in app_cfg.get_models():
112
150
  if Model.objects.filter(pk=uid).exists():
113
151
  return None
152
+
114
153
  BlacklistManager.block(ip, "UUID tampering")
115
- return JsonResponse({"error": "blocked"}, status=403)
154
+ return JsonResponse({"error": "blocked"}, status=403)
aiwaf/models.py CHANGED
@@ -26,3 +26,11 @@ class BlacklistEntry(models.Model):
26
26
 
27
27
  def __str__(self):
28
28
  return f"{self.ip_address} ({self.reason})"
29
+
30
+ class DynamicKeyword(models.Model):
31
+ keyword = models.CharField(max_length=100, unique=True)
32
+ count = models.PositiveIntegerField(default=0)
33
+ last_updated = models.DateTimeField(auto_now=True)
34
+
35
+ class Meta:
36
+ ordering = ['-count']
aiwaf/trainer.py CHANGED
@@ -1,30 +1,50 @@
1
- # aiwaf/trainer.py
2
-
3
1
  import os
4
2
  import glob
5
3
  import gzip
6
4
  import re
5
+ import json
7
6
  import joblib
7
+
8
8
  from datetime import datetime
9
- from collections import defaultdict
10
- from .models import BlacklistEntry
9
+ from collections import defaultdict, Counter
10
+
11
11
  import pandas as pd
12
12
  from sklearn.ensemble import IsolationForest
13
+
13
14
  from django.conf import settings
14
15
  from django.apps import apps
15
16
 
16
- LOG_PATH = settings.AIWAF_ACCESS_LOG
17
+ # ─── CONFIG ────────────────────────────────────────────────────────────────
18
+
19
+ # Where to read your access logs (and rotated/.gz siblings)
20
+ LOG_PATH = settings.AIWAF_ACCESS_LOG
21
+
22
+ # Where we save our trained model
17
23
  MODEL_PATH = os.path.join(
18
24
  os.path.dirname(__file__),
19
25
  "resources",
20
26
  "model.pkl"
21
27
  )
22
- MALICIOUS_KEYWORDS = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
23
- STATUS_CODES = ["200", "403", "404", "500"]
28
+
29
+ # Static “malicious” path keywords & file extensions
30
+ MALICIOUS_KEYWORDS = [
31
+ ".php", "xmlrpc", "wp-", ".env", ".git", ".bak",
32
+ "conflg", "shell", "filemanager"
33
+ ]
34
+ STATUS_CODES = ["200", "403", "404", "500"]
35
+
36
+ # Regex for combined log with response-time=…
24
37
  _LOG_RX = re.compile(
25
- r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" (\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
38
+ r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" '
39
+ r'(\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
26
40
  )
27
- BlacklistedIP = BlacklistEntry.objects.all()
41
+
42
+ # Your Django model for storing blocked IPs
43
+ BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
44
+
45
+
46
+ # ─── READ & PARSE LOG LINES ─────────────────────────────────────────────────
47
+
28
48
  def _read_all_logs():
29
49
  lines = []
30
50
  if LOG_PATH and os.path.exists(LOG_PATH):
@@ -58,14 +78,19 @@ def _parse(line):
58
78
  }
59
79
 
60
80
 
81
+ # ─── TRAIN ENTRYPOINT ───────────────────────────────────────────────────────
82
+
61
83
  def train():
62
84
  raw = _read_all_logs()
63
85
  if not raw:
64
- print("No log lines found – check AIWAF_ACCESS_LOG")
86
+ print("No log lines found – check settings.AIWAF_ACCESS_LOG")
65
87
  return
66
- parsed = []
88
+
89
+ parsed = []
67
90
  ip_404 = defaultdict(int)
68
91
  ip_times = defaultdict(list)
92
+
93
+ # parse + accumulate timestamps & 404 counts
69
94
  for ln in raw:
70
95
  rec = _parse(ln)
71
96
  if not rec:
@@ -74,27 +99,32 @@ def train():
74
99
  ip_times[rec["ip"]].append(rec["timestamp"])
75
100
  if rec["status"] == "404":
76
101
  ip_404[rec["ip"]] += 1
77
- blocked = []
78
- for ip, count in ip_404.items():
79
- if count >= 6:
102
+
103
+ # auto-block IPs with >=6 total 404s
104
+ newly_blocked = []
105
+ for ip, cnt in ip_404.items():
106
+ if cnt >= 6:
80
107
  obj, created = BlacklistEntry.objects.get_or_create(
81
108
  ip_address=ip,
82
109
  defaults={"reason": "Excessive 404s (≥6)"}
83
110
  )
84
111
  if created:
85
- blocked.append(ip)
86
- if blocked:
87
- print(f"Auto‑blocked {len(blocked)} IPs for ≥6 404s: {', '.join(blocked)}")
112
+ newly_blocked.append(ip)
113
+ if newly_blocked:
114
+ print(f"🔒 Blocked {len(newly_blocked)} IPs for 404 flood: {newly_blocked}")
115
+
116
+ # build feature vectors
88
117
  rows = []
89
118
  for r in parsed:
90
- ip = r["ip"]
91
- burst = sum(
119
+ ip = r["ip"]
120
+ burst = sum(
92
121
  1 for t in ip_times[ip]
93
122
  if (r["timestamp"] - t).total_seconds() <= 10
94
123
  )
95
- total404 = ip_404[ip]
96
- kw_hits = sum(k in r["path"].lower() for k in MALICIOUS_KEYWORDS)
124
+ total404 = ip_404[ip]
125
+ kw_hits = sum(k in r["path"].lower() for k in MALICIOUS_KEYWORDS)
97
126
  status_idx = STATUS_CODES.index(r["status"]) if r["status"] in STATUS_CODES else -1
127
+
98
128
  rows.append([
99
129
  len(r["path"]),
100
130
  kw_hits,
@@ -105,7 +135,7 @@ def train():
105
135
  ])
106
136
 
107
137
  if not rows:
108
- print("No entries to train on!")
138
+ print("⚠️ No entries to train on.")
109
139
  return
110
140
 
111
141
  df = pd.DataFrame(
@@ -115,9 +145,31 @@ def train():
115
145
  "status_idx", "burst_count", "total_404"
116
146
  ]
117
147
  ).fillna(0).astype(float)
148
+
149
+ # train & save
118
150
  clf = IsolationForest(contamination=0.01, random_state=42)
119
151
  clf.fit(df.values)
120
152
  os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
121
153
  joblib.dump(clf, MODEL_PATH)
122
- print(f"Model trained on {len(df)} samples and saved to {MODEL_PATH}")
154
+ print(f"Model trained on {len(df)} samples {MODEL_PATH}")
155
+
156
+ # extract top‑10 dynamic keywords from 4xx/5xx paths
157
+ tokens = Counter()
158
+ for r in parsed:
159
+ if r["status"].startswith(("4", "5")):
160
+ segs = re.split(r"\W+", r["path"].lower())
161
+ for seg in segs:
162
+ if len(seg) > 3 and seg not in MALICIOUS_KEYWORDS:
163
+ tokens[seg] += 1
164
+
165
+ new_kw = [kw for kw, _ in tokens.most_common(10)]
166
+ DK_FILE = os.path.join(os.path.dirname(__file__), "resources", "dynamic_keywords.json")
167
+ try:
168
+ existing = set(json.load(open(DK_FILE)))
169
+ except FileNotFoundError:
170
+ existing = set()
171
+ updated = sorted(existing | set(new_kw))
172
+ with open(DK_FILE, "w") as f:
173
+ json.dump(updated, f, indent=2)
123
174
 
175
+ print(f"📝 Updated dynamic keywords: {new_kw}")
@@ -0,0 +1,181 @@
1
+ Metadata-Version: 2.4
2
+ Name: aiwaf
3
+ Version: 0.1.3
4
+ Summary: AI-powered Web Application Firewall
5
+ Author: Aayush Gauba
6
+ Author-email: Aayush Gauba <gauba.aayush@gmail.com>
7
+ License: MIT
8
+ Requires-Python: >=3.8
9
+ Description-Content-Type: text/markdown
10
+ Dynamic: author
11
+
12
+ # AI‑WAF
13
+
14
+ > A self‑learning, Django‑friendly Web Application Firewall
15
+ > with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
16
+
17
+ ---
18
+
19
+ ## Package Structure
20
+
21
+ ```
22
+ aiwaf/
23
+ ├── __init__.py
24
+ ├── blacklist_manager.py
25
+ ├── middleware.py
26
+ ├── trainer.py # exposes train()
27
+ ├── utils.py
28
+ ├── template_tags/
29
+ │ └── aiwaf_tags.py
30
+ ├── resources/
31
+ │ ├── model.pkl # pre‑trained base model
32
+ │ └── dynamic_keywords.json # evolves daily
33
+ ├── management/
34
+ │ └── commands/
35
+ │ └── detect_and_train.py # `python manage.py detect_and_train`
36
+ └── LICENSE
37
+ ```
38
+
39
+ ---
40
+
41
+ ## Features
42
+
43
+ - **IP Blocklist**
44
+ Instantly blocks suspicious IPs (supports CSV fallback or Django model).
45
+
46
+ - **Rate Limiting**
47
+ Sliding‑window blocks flooders (> `AIWAF_RATE_MAX` per `AIWAF_RATE_WINDOW`), then blacklists them.
48
+
49
+ - **AI Anomaly Detection**
50
+ IsolationForest on features:
51
+ - Path length
52
+ - Keyword hits (static + dynamic)
53
+ - Response time
54
+ - Status‑code index
55
+ - Burst count
56
+ - Total 404s
57
+
58
+ - **Dynamic Keyword Extraction**
59
+ Every retrain: top 10 most frequent “words” from 4xx/5xx paths are appended to your malicious keyword set.
60
+
61
+ - **File‑Extension Probing Detection**
62
+ Tracks repeated 404s on common web‑extensions (e.g. `.php`, `.asp`) and auto‑blocks after a burst.
63
+
64
+ - **Honeypot Field**
65
+ Hidden form field (via template tag) that bots fill → instant block.
66
+
67
+ - **UUID Tampering Protection**
68
+ Any `<uuid:…>` URL that doesn’t map to **any** model in its Django app gets blocked.
69
+
70
+ - **Daily Retraining**
71
+ Reads rotated/gzipped logs, auto‑blocks 404 floods (≥6), retrains the model, updates `model.pkl` + `dynamic_keywords.json`.
72
+
73
+ ---
74
+
75
+ ## Installation
76
+
77
+ ```bash
78
+ # From PyPI
79
+ pip install aiwaf
80
+
81
+ # Or for local development
82
+ git clone https://github.com/aayushgauba/aiwaf.git
83
+ cd aiwaf
84
+ pip install -e .
85
+ ```
86
+
87
+ ---
88
+
89
+ ## ⚙️ Configuration (`settings.py`)
90
+
91
+ ```python
92
+ INSTALLED_APPS += ["aiwaf"]
93
+
94
+ # Required
95
+ AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
96
+
97
+ # Optional (defaults shown)
98
+ AIWAF_MODEL_PATH = BASE_DIR / "aiwaf" / "resources" / "model.pkl"
99
+ AIWAF_HONEYPOT_FIELD = "hp_field"
100
+ AIWAF_RATE_WINDOW = 10 # seconds
101
+ AIWAF_RATE_MAX = 20 # max reqs/window
102
+ AIWAF_RATE_FLOOD = 10 # flood threshold
103
+ AIWAF_WINDOW_SECONDS = 60 # anomaly window
104
+ AIWAF_FILE_EXTENSIONS = [".php", ".asp", ".jsp"] # 404‑burst tracked extensions
105
+ ```
106
+
107
+ > **Note:** You no longer need to define `AIWAF_MALICIOUS_KEYWORDS` or `AIWAF_STATUS_CODES` in your settings — they’re built in and evolve dynamically.
108
+
109
+ ---
110
+
111
+ ## Middleware Setup
112
+
113
+ Add in **this** order to your `MIDDLEWARE` list:
114
+
115
+ ```python
116
+ MIDDLEWARE = [
117
+ "aiwaf.middleware.IPBlockMiddleware",
118
+ "aiwaf.middleware.RateLimitMiddleware",
119
+ "aiwaf.middleware.AIAnomalyMiddleware",
120
+ "aiwaf.middleware.HoneypotMiddleware",
121
+ "aiwaf.middleware.UUIDTamperMiddleware",
122
+ # ... other middleware ...
123
+ ]
124
+ ```
125
+
126
+ ---
127
+
128
+ ## Honeypot Field (in your template)
129
+
130
+ ```django
131
+ {% load aiwaf_tags %}
132
+
133
+ <form method="post">
134
+ {% csrf_token %}
135
+ {% honeypot_field %}
136
+ <!-- your real fields -->
137
+ </form>
138
+ ```
139
+
140
+ > Renders a hidden `<input name="hp_field" style="display:none">`.
141
+ > Any non‑empty submission → IP blacklisted.
142
+
143
+ ---
144
+
145
+ ## Running Detection & Training
146
+
147
+ ```bash
148
+ python manage.py detect_and_train
149
+ ```
150
+
151
+ **What happens:**
152
+ 1. Read access logs
153
+ 2. Auto‑block IPs with ≥ 6 total 404s
154
+ 3. Extract features & train IsolationForest
155
+ 4. Save `model.pkl`
156
+ 5. Extract top 10 dynamic keywords from 4xx/5xx
157
+
158
+ ---
159
+
160
+ ## How It Works
161
+
162
+ | Middleware | Purpose |
163
+ |--------------------------|------------------------------------------------------------------|
164
+ | IPBlockMiddleware | Blocks requests from known blacklisted IPs |
165
+ | RateLimitMiddleware | Enforces burst & flood thresholds |
166
+ | AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
167
+ | HoneypotMiddleware | Detects bots filling hidden inputs in forms |
168
+ | UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
169
+
170
+ ---
171
+
172
+ ## License
173
+
174
+ This project is licensed under the **MIT License**. See the [LICENSE](LICENSE) file for details.
175
+
176
+ ---
177
+
178
+ ## Credits
179
+
180
+ **AI‑WAF** by [Aayush Gauba](https://github.com/aayushgauba)
181
+ > “Let your firewall learn and evolve — keep your site a fortress.”
@@ -1,10 +1,10 @@
1
1
  aiwaf/__init__.py,sha256=nQFpJ1YpX48snzLjEQCf8zD2YNh8v0b_kPTrXx8uBYc,46
2
2
  aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
3
3
  aiwaf/blacklist_manager.py,sha256=sM6uTH7zD6MOPGb0kzqV2aFut2vxKgft_UVeRJr7klw,392
4
- aiwaf/middleware.py,sha256=WHoXMtKZ_WpueSPylH4XXpKzM9-cqPDunDM0fuklzg0,4220
5
- aiwaf/models.py,sha256=GTojMOZ3M5pDDNmpU4o353M3w59jEclzHqJgofHzfBA,1021
4
+ aiwaf/middleware.py,sha256=UIJ-1kA-NjKwpt3JS3vvsuhjaBXGliGt_4VKuL_OGq8,5254
5
+ aiwaf/models.py,sha256=8au1umopgCo0lthztTTRrYRJQUM7uX8eAeXgs3z45K4,1282
6
6
  aiwaf/storage.py,sha256=bxCILzzvA1-q6nwclRE8WrfoRhe25H4VrsQDf0hl_lY,1903
7
- aiwaf/trainer.py,sha256=8eLrq3bOmRle4KDRWnzxnc-Gv6oo5IErqoDGO_v_qG4,3629
7
+ aiwaf/trainer.py,sha256=8hU9k3bF_9QIkGix3TqFl7YuNeQV9dPriY2WhLo6s40,5411
8
8
  aiwaf/utils.py,sha256=RkEUWhhHy6tOk7V0UYv3cN4xhOR_7aBy9bjhwuV2cdA,1436
9
9
  aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,7 +12,7 @@ aiwaf/management/commands/detect_and_train.py,sha256=-o-LZ7QZ5GeJPCekryox1DGXKMm
12
12
  aiwaf/resources/model.pkl,sha256=rCCXH38SJrnaOba2WZrU1LQVzWT34x6bTVkq20XJU-Q,1091129
13
13
  aiwaf/template_tags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  aiwaf/template_tags/aiwaf_tags.py,sha256=1KGqeioYmgKACDUiPkykSqI7DLQ6-Ypy1k00weWj9iY,399
15
- aiwaf-0.1.2.dist-info/METADATA,sha256=htVp1TT0tRdAFDIxC1mjv6opQMQIHy8MhHPv0NDPIp4,4396
16
- aiwaf-0.1.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
17
- aiwaf-0.1.2.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
18
- aiwaf-0.1.2.dist-info/RECORD,,
15
+ aiwaf-0.1.3.dist-info/METADATA,sha256=zgcejLdSfeE_bcqAvuebUJHN2ynKxtE24wVWdRdA_EA,4977
16
+ aiwaf-0.1.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
17
+ aiwaf-0.1.3.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
18
+ aiwaf-0.1.3.dist-info/RECORD,,
@@ -1,187 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: aiwaf
3
- Version: 0.1.2
4
- Summary: AI-powered Web Application Firewall
5
- Author: Aayush Gauba
6
- Author-email: Aayush Gauba <gauba.aayush@gmail.com>
7
- License: MIT
8
- Requires-Python: >=3.8
9
- Description-Content-Type: text/markdown
10
- Dynamic: author
11
-
12
- # AI‑WAF
13
-
14
- > A self-learning, Django-friendly Web Application Firewall
15
- > with rate-limiting, anomaly detection, honeypots, UUID-tamper protection, and daily retraining.
16
-
17
- ---
18
-
19
- ## Package Structure
20
-
21
- ```
22
- aiwaf/
23
- ├── __init__.py
24
- ├── blacklist_manager.py
25
- ├── middleware.py
26
- ├── trainer.py # exposes detect_and_train()
27
- ├── utils.py
28
- ├── template_tags/
29
- │ └── aiwaf_tags.py
30
- ├── resources/
31
- │ └── model.pkl # pre-trained base model
32
- ├── management/
33
- │ └── commands/
34
- │ └── detect_and_train.py # python manage.py detect_and_train
35
- └── LICENSE
36
- ```
37
-
38
- ---
39
-
40
- ## Features
41
-
42
- - **IP Blocklist**
43
- Automatically blocks suspicious IPs; optionally backed by CSV or Django model.
44
-
45
- - **Rate Limiting**
46
- Sliding window logic blocks IPs exceeding a threshold of requests per second.
47
-
48
- - **AI Anomaly Detection**
49
- IsolationForest trained on real logs with features like:
50
- - Path length
51
- - Keyword hits
52
- - Response time
53
- - Status code index
54
- - Burst count
55
- - Total 404s
56
-
57
- - **Honeypot Field**
58
- Hidden form field that bots are likely to fill — if triggered, the IP is blocked.
59
-
60
- - **UUID Tampering Protection**
61
- Detects if someone is probing by injecting random/nonexistent UUIDs into URLs.
62
-
63
- - **Daily Retraining**
64
- A single command retrains your model every day based on your logs.
65
-
66
- ---
67
-
68
- ## Installation
69
-
70
- Install locally or from PyPI:
71
-
72
- ```bash
73
- pip install aiwaf
74
- ```
75
-
76
- Or for local dev:
77
-
78
- ```bash
79
- git clone https://github.com/aayushgauba/aiwaf.git
80
- cd aiwaf
81
- pip install -e .
82
- ```
83
-
84
- ---
85
-
86
- ## ⚙️ Configuration (`settings.py`)
87
-
88
- ```python
89
- INSTALLED_APPS += [
90
- "aiwaf",
91
- ]
92
-
93
- # Required
94
- AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
95
-
96
- # Optional (defaults included)
97
- AIWAF_MODEL_PATH = BASE_DIR / "aiwaf" / "resources" / "model.pkl"
98
- AIWAF_MALICIOUS_KEYWORDS = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
99
- AIWAF_STATUS_CODES = ["200", "403", "404", "500"]
100
- AIWAF_HONEYPOT_FIELD = "hp_field"
101
- ```
102
-
103
- ---
104
-
105
- ## Middleware Setup
106
-
107
- Add to `MIDDLEWARE` in order:
108
-
109
- ```python
110
- MIDDLEWARE = [
111
- "aiwaf.middleware.IPBlockMiddleware",
112
- "aiwaf.middleware.RateLimitMiddleware",
113
- "aiwaf.middleware.AIAnomalyMiddleware",
114
- "aiwaf.middleware.HoneypotMiddleware",
115
- "aiwaf.middleware.UUIDTamperMiddleware",
116
- ...
117
- ]
118
- ```
119
-
120
- ---
121
-
122
- ## Honeypot Field (in template)
123
-
124
- ```html
125
- {% load aiwaf_tags %}
126
-
127
- <form method="post">
128
- {% csrf_token %}
129
- {% honeypot_field %}
130
- <!-- other fields -->
131
- </form>
132
- ```
133
-
134
- The hidden field will be `<input type="hidden" name="hp_field">`.
135
- If it’s ever filled → IP gets blocked.
136
-
137
- ---
138
-
139
- ## Run Detection + Training
140
-
141
- ```bash
142
- python manage.py detect_and_train
143
- ```
144
-
145
- What it does:
146
-
147
- - Reads logs (supports `.gz` and rotated logs).
148
- - Detects excessive 404s (≥6) → instant block.
149
- - Builds feature vectors from logs.
150
- - Trains IsolationForest and saves `model.pkl`.
151
-
152
- Schedule it to run daily via `cron`, `Celery beat`, or systemd timer.
153
-
154
- ---
155
-
156
- ## How It Works (Simplified)
157
-
158
- | Middleware | Functionality |
159
- |------------------------|--------------------------------------------------------------|
160
- | IPBlockMiddleware | Blocks requests from known blacklisted IPs |
161
- | RateLimitMiddleware | Blocks flooders (>20/10s) and blacklists them (>10/10s) |
162
- | AIAnomalyMiddleware | Uses ML to detect suspicious behavior in request patterns |
163
- | HoneypotMiddleware | Detects bots filling hidden inputs in forms |
164
- | UUIDTamperMiddleware | Detects guessing/probing by checking invalid UUID access |
165
-
166
- ---
167
-
168
- ## Development Roadmap
169
-
170
- - [ ] Add CSV blocklist fallback
171
- - [ ] Admin dashboard integration
172
- - [ ] Auto-pruning of old block entries
173
- - [ ] Real-time log streaming compatibility
174
- - [ ] Docker/Helm deployment guide
175
-
176
- ---
177
-
178
- ## License
179
-
180
- This project is licensed under the **MIT License** — see `LICENSE` for details.
181
-
182
- ---
183
-
184
- ## Credits
185
-
186
- **AIWAF** by [Aayush Gauba](https://github.com/aayushgauba)
187
- > "Let your firewall learn and evolve with your logs. Make your site a fortress."
File without changes