aiwaf 0.1.7__py3-none-any.whl → 0.1.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiwaf might be problematic. Click here for more details.

aiwaf/middleware.py CHANGED
@@ -18,6 +18,14 @@ from django.urls import get_resolver
18
18
  from .blacklist_manager import BlacklistManager
19
19
  from .models import DynamicKeyword
20
20
 
21
+ def is_exempt_path(path):
22
+ path = path.lower()
23
+ exempt_paths = getattr(settings, "AIWAF_EXEMPT_PATHS", [])
24
+ for exempt in exempt_paths:
25
+ if path == exempt or path.startswith(exempt.rstrip("/") + "/"):
26
+ return True
27
+ return False
28
+
21
29
  MODEL_PATH = getattr(
22
30
  settings,
23
31
  "AIWAF_MODEL_PATH",
@@ -64,8 +72,11 @@ class IPAndKeywordBlockMiddleware:
64
72
  return prefixes
65
73
 
66
74
  def __call__(self, request):
75
+ raw_path = request.path.lower()
76
+ if is_exempt_path(raw_path):
77
+ return self.get_response(request)
67
78
  ip = get_ip(request)
68
- path = request.path.lower().lstrip("/")
79
+ path = raw_path.lstrip("/")
69
80
  if BlacklistManager.is_blocked(ip):
70
81
  return JsonResponse({"error": "blocked"}, status=403)
71
82
  segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
@@ -99,6 +110,8 @@ class RateLimitMiddleware:
99
110
  self.logs = defaultdict(list)
100
111
 
101
112
  def __call__(self, request):
113
+ if is_exempt_path(request.path):
114
+ return self.get_response(request)
102
115
  ip = get_ip(request)
103
116
  now = time.time()
104
117
  recs = [t for t in self.logs[ip] if now - t < self.WINDOW]
@@ -119,6 +132,8 @@ class AIAnomalyMiddleware(MiddlewareMixin):
119
132
  TOP_N = getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)
120
133
 
121
134
  def process_request(self, request):
135
+ if is_exempt_path(request.path):
136
+ return None
122
137
  ip = get_ip(request)
123
138
  if BlacklistManager.is_blocked(ip):
124
139
  return JsonResponse({"error": "blocked"}, status=403)
@@ -160,6 +175,8 @@ class AIAnomalyMiddleware(MiddlewareMixin):
160
175
 
161
176
  class HoneypotMiddleware(MiddlewareMixin):
162
177
  def process_view(self, request, view_func, view_args, view_kwargs):
178
+ if is_exempt_path(request.path):
179
+ return None
163
180
  trap = request.POST.get(getattr(settings, "AIWAF_HONEYPOT_FIELD", "hp_field"), "")
164
181
  if trap:
165
182
  ip = get_ip(request)
@@ -170,6 +187,8 @@ class HoneypotMiddleware(MiddlewareMixin):
170
187
 
171
188
  class UUIDTamperMiddleware(MiddlewareMixin):
172
189
  def process_view(self, request, view_func, view_args, view_kwargs):
190
+ if is_exempt_path(request.path):
191
+ return None
173
192
  uid = view_kwargs.get("uuid")
174
193
  if not uid:
175
194
  return None
aiwaf/trainer.py CHANGED
@@ -14,7 +14,6 @@ from django.apps import apps
14
14
  from django.db.models import F
15
15
  from django.urls import get_resolver
16
16
 
17
- # ─── CONFIG ────────────────────────────────────────────────────────────────
18
17
 
19
18
  LOG_PATH = settings.AIWAF_ACCESS_LOG
20
19
  MODEL_PATH = os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
@@ -30,7 +29,13 @@ _LOG_RX = re.compile(
30
29
  BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
31
30
  DynamicKeyword = apps.get_model("aiwaf", "DynamicKeyword")
32
31
 
33
-
32
+ def is_exempt_path(path):
33
+ path = path.lower()
34
+ exempt_paths = getattr(settings, "AIWAF_EXEMPT_PATHS", [])
35
+ for exempt in exempt_paths:
36
+ if path == exempt or path.startswith(exempt.rstrip("/") + "/"):
37
+ return True
38
+ return False
34
39
 
35
40
  def path_exists_in_django(path):
36
41
  from django.urls import get_resolver
@@ -51,6 +56,18 @@ def path_exists_in_django(path):
51
56
  return True
52
57
  return False
53
58
 
59
+ def remove_exempt_keywords():
60
+ exempt_paths = getattr(settings, "AIWAF_EXEMPT_PATHS", [])
61
+ exempt_tokens = set()
62
+
63
+ for path in exempt_paths:
64
+ path = path.strip("/").lower()
65
+ segments = re.split(r"\W+", path)
66
+ exempt_tokens.update(seg for seg in segments if len(seg) > 3)
67
+
68
+ if exempt_tokens:
69
+ deleted_count, _ = DynamicKeyword.objects.filter(keyword__in=exempt_tokens).delete()
70
+ print(f"Removed {deleted_count} dynamic keywords that are now exempt: {list(exempt_tokens)}")
54
71
 
55
72
  def _read_all_logs():
56
73
  lines = []
@@ -88,6 +105,7 @@ def _parse(line):
88
105
 
89
106
 
90
107
  def train():
108
+ remove_exempt_keywords()
91
109
  raw_lines = _read_all_logs()
92
110
  if not raw_lines:
93
111
  print("No log lines found – check AIWAF_ACCESS_LOG setting.")
@@ -125,7 +143,7 @@ def train():
125
143
  total404 = ip_404[ip]
126
144
  is_known_path = path_exists_in_django(r["path"])
127
145
  kw_hits = 0
128
- if not is_known_path:
146
+ if not is_known_path and not is_exempt_path(r["path"]):
129
147
  kw_hits = sum(k in r["path"].lower() for k in STATIC_KW)
130
148
  status_idx = STATUS_IDX.index(r["status"]) if r["status"] in STATUS_IDX else -1
131
149
  feature_dicts.append({
@@ -178,4 +196,4 @@ def train():
178
196
 
179
197
 
180
198
  if __name__ == "__main__":
181
- train()
199
+ train()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiwaf
3
- Version: 0.1.7
3
+ Version: 0.1.7.1
4
4
  Summary: AI-powered Web Application Firewall
5
5
  Home-page: https://github.com/aayushgauba/aiwaf
6
6
  Author: Aayush Gauba
@@ -15,14 +15,14 @@ Dynamic: license-file
15
15
  Dynamic: requires-python
16
16
 
17
17
 
18
- # AI‑WAF
18
+ # AI‑WAF
19
19
 
20
20
  > A self‑learning, Django‑friendly Web Application Firewall
21
- > with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
21
+ > with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, exempt path awareness, and daily retraining.
22
22
 
23
23
  ---
24
24
 
25
- ## Package Structure
25
+ ## 📁 Package Structure
26
26
 
27
27
  ```
28
28
  aiwaf/
@@ -44,7 +44,7 @@ aiwaf/
44
44
 
45
45
  ---
46
46
 
47
- ## Features
47
+ ## 🚀 Features
48
48
 
49
49
  - **IP Blocklist**
50
50
  Instantly blocks suspicious IPs (supports CSV fallback or Django model).
@@ -53,7 +53,7 @@ aiwaf/
53
53
  Sliding‑window blocks flooders (> `AIWAF_RATE_MAX` per `AIWAF_RATE_WINDOW`), then blacklists them.
54
54
 
55
55
  - **AI Anomaly Detection**
56
- IsolationForest on features:
56
+ IsolationForest trained on:
57
57
  - Path length
58
58
  - Keyword hits (static + dynamic)
59
59
  - Response time
@@ -61,34 +61,28 @@ aiwaf/
61
61
  - Burst count
62
62
  - Total 404s
63
63
 
64
- - **Dynamic Keyword Extraction**
65
- Every retrain: top 10 most frequent “words” from 4xx/5xx paths are appended to your malicious keyword set.
64
+ - **Dynamic Keyword Extraction & Cleanup**
65
+ - Every retrain adds top 10 keyword segments from 4xx/5xx paths
66
+ - **If a path is added to `AIWAF_EXEMPT_PATHS`, its keywords are automatically removed from the database**
66
67
 
67
68
  - **File‑Extension Probing Detection**
68
- Tracks repeated 404s on common web‑extensions (e.g. `.php`, `.asp`) and auto‑blocks after a burst.
69
+ Tracks repeated 404s on common extensions (e.g. `.php`, `.asp`) and blocks IPs.
69
70
 
70
71
  - **Honeypot Field**
71
- Hidden form field (via template tag) that bots fill instant block.
72
+ Hidden field for bot detection IP blacklisted on fill.
72
73
 
73
74
  - **UUID Tampering Protection**
74
- Any `<uuid:…>` URL that doesn’t map to **any** model in its Django app gets blocked.
75
+ Blocks guessed or invalid UUIDs that don’t resolve to real models.
75
76
 
76
- - **Daily Retraining**
77
- Reads rotated/gzipped logs, auto‑blocks 404 floods (≥6), retrains the model, updates `model.pkl` + `dynamic_keywords.json`.
78
-
79
- ---
80
-
81
- ## Installation
82
-
83
- ```bash
84
- # From PyPI
85
- pip install aiwaf
77
+ - **Exempt Path Awareness**
78
+ Fully respects `AIWAF_EXEMPT_PATHS` across all modules exempt paths are:
79
+ - Skipped from keyword learning
80
+ - Immune to AI blocking
81
+ - Ignored in log training
82
+ - Cleaned from `DynamicKeyword` model automatically
86
83
 
87
- # Or for local development
88
- git clone https://github.com/aayushgauba/aiwaf.git
89
- cd aiwaf
90
- pip install -e .
91
- ```
84
+ - **Daily Retraining**
85
+ Reads rotated logs, auto‑blocks 404 floods, retrains the IsolationForest, updates `model.pkl`, and evolves the keyword DB.
92
86
 
93
87
  ---
94
88
 
@@ -96,33 +90,51 @@ pip install -e .
96
90
 
97
91
  ```python
98
92
  INSTALLED_APPS += ["aiwaf"]
93
+ ```
99
94
 
100
95
  ### Database Setup
101
96
 
102
- After adding `aiwaf` to your `INSTALLED_APPS`, create the necessary tables for the IP‐blacklist and dynamic‐keyword models:
97
+ After adding `aiwaf` to your `INSTALLED_APPS`, run the following to create the necessary tables:
103
98
 
104
99
  ```bash
105
100
  python manage.py makemigrations aiwaf
106
101
  python manage.py migrate
102
+ ```
107
103
 
108
- # Required
104
+ ---
105
+
106
+ ### Required
107
+
108
+ ```python
109
109
  AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
110
+ ```
111
+
112
+ ---
113
+
114
+ ### Optional (defaults shown)
110
115
 
111
- # Optional (defaults shown)
116
+ ```python
112
117
  AIWAF_MODEL_PATH = BASE_DIR / "aiwaf" / "resources" / "model.pkl"
113
118
  AIWAF_HONEYPOT_FIELD = "hp_field"
114
119
  AIWAF_RATE_WINDOW = 10 # seconds
115
- AIWAF_RATE_MAX = 20 # max reqs/window
120
+ AIWAF_RATE_MAX = 20 # max requests per window
116
121
  AIWAF_RATE_FLOOD = 10 # flood threshold
117
- AIWAF_WINDOW_SECONDS = 60 # anomaly window
118
- AIWAF_FILE_EXTENSIONS = [".php", ".asp", ".jsp"] # 404‑burst tracked extensions
122
+ AIWAF_WINDOW_SECONDS = 60 # anomaly detection window
123
+ AIWAF_FILE_EXTENSIONS = [".php", ".asp", ".jsp"]
124
+ AIWAF_EXEMPT_PATHS = [ # optional but highly recommended
125
+ "/favicon.ico",
126
+ "/robots.txt",
127
+ "/static/",
128
+ "/media/",
129
+ "/health/",
130
+ ]
119
131
  ```
120
132
 
121
- > **Note:** You no longer need to define `AIWAF_MALICIOUS_KEYWORDS` or `AIWAF_STATUS_CODES` in your settings — they’re built in and evolve dynamically.
133
+ > **Note:** You no longer need to define `AIWAF_MALICIOUS_KEYWORDS` or `AIWAF_STATUS_CODES` — they evolve dynamically.
122
134
 
123
135
  ---
124
136
 
125
- ## Middleware Setup
137
+ ## 🧱 Middleware Setup
126
138
 
127
139
  Add in **this** order to your `MIDDLEWARE` list:
128
140
 
@@ -139,7 +151,7 @@ MIDDLEWARE = [
139
151
 
140
152
  ---
141
153
 
142
- ## Honeypot Field (in your template)
154
+ ## 🕵️ Honeypot Field (in your template)
143
155
 
144
156
  ```django
145
157
  {% load aiwaf_tags %}
@@ -156,22 +168,23 @@ MIDDLEWARE = [
156
168
 
157
169
  ---
158
170
 
159
- ## Running Detection & Training
171
+ ## 🔁 Running Detection & Training
160
172
 
161
173
  ```bash
162
174
  python manage.py detect_and_train
163
175
  ```
164
176
 
165
- **What happens:**
166
- 1. Read access logs
177
+ ### What happens:
178
+ 1. Read access logs (incl. rotated or gzipped)
167
179
  2. Auto‑block IPs with ≥ 6 total 404s
168
180
  3. Extract features & train IsolationForest
169
181
  4. Save `model.pkl`
170
182
  5. Extract top 10 dynamic keywords from 4xx/5xx
183
+ 6. Remove any keywords associated with newly exempt paths
171
184
 
172
185
  ---
173
186
 
174
- ## How It Works
187
+ ## 🧠 How It Works
175
188
 
176
189
  | Middleware | Purpose |
177
190
  |------------------------------------|-----------------------------------------------------------------|
@@ -180,15 +193,16 @@ python manage.py detect_and_train
180
193
  | AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
181
194
  | HoneypotMiddleware | Detects bots filling hidden inputs in forms |
182
195
  | UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
196
+
183
197
  ---
184
198
 
185
- ## License
199
+ ## 📄 License
186
200
 
187
201
  This project is licensed under the **MIT License**. See the [LICENSE](LICENSE) file for details.
188
202
 
189
203
  ---
190
204
 
191
- ## Credits
205
+ ## 👤 Credits
192
206
 
193
207
  **AI‑WAF** by [Aayush Gauba](https://github.com/aayushgauba)
194
208
  > “Let your firewall learn and evolve — keep your site a fortress.”
@@ -1,10 +1,10 @@
1
1
  aiwaf/__init__.py,sha256=nQFpJ1YpX48snzLjEQCf8zD2YNh8v0b_kPTrXx8uBYc,46
2
2
  aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
3
3
  aiwaf/blacklist_manager.py,sha256=sM6uTH7zD6MOPGb0kzqV2aFut2vxKgft_UVeRJr7klw,392
4
- aiwaf/middleware.py,sha256=2sNCqDULvuASo6dlbvrGpLzwhgHtHXwgVR8u3IhvrDI,6698
4
+ aiwaf/middleware.py,sha256=LTLHmQYIQ36WwfR9FEPLrmTbYgqxIh4X5Aen4VJ-vN0,7350
5
5
  aiwaf/models.py,sha256=8au1umopgCo0lthztTTRrYRJQUM7uX8eAeXgs3z45K4,1282
6
6
  aiwaf/storage.py,sha256=bxCILzzvA1-q6nwclRE8WrfoRhe25H4VrsQDf0hl_lY,1903
7
- aiwaf/trainer.py,sha256=IwL-BHbjGunOLX2HuGE12-W_PB0aDwbiZ62izPpfOEo,5796
7
+ aiwaf/trainer.py,sha256=ir5kFTeLQuhMd2h094ct03Wr-rNZsX-mZHwjLx29F54,6422
8
8
  aiwaf/utils.py,sha256=RkEUWhhHy6tOk7V0UYv3cN4xhOR_7aBy9bjhwuV2cdA,1436
9
9
  aiwaf/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  aiwaf/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,8 +12,8 @@ aiwaf/management/commands/detect_and_train.py,sha256=-o-LZ7QZ5GeJPCekryox1DGXKMm
12
12
  aiwaf/resources/model.pkl,sha256=rCCXH38SJrnaOba2WZrU1LQVzWT34x6bTVkq20XJU-Q,1091129
13
13
  aiwaf/template_tags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  aiwaf/template_tags/aiwaf_tags.py,sha256=1KGqeioYmgKACDUiPkykSqI7DLQ6-Ypy1k00weWj9iY,399
15
- aiwaf-0.1.7.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
16
- aiwaf-0.1.7.dist-info/METADATA,sha256=uyKj5eHph-ufrCwZWOtGWxMZD1OtQOXu_6JXz0SRB2Q,5414
17
- aiwaf-0.1.7.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
18
- aiwaf-0.1.7.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
19
- aiwaf-0.1.7.dist-info/RECORD,,
15
+ aiwaf-0.1.7.1.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
16
+ aiwaf-0.1.7.1.dist-info/METADATA,sha256=aO_1D_qSP_s4vKUj60a8VmsFcCLCyhBZii1tpbo3HqE,5790
17
+ aiwaf-0.1.7.1.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
18
+ aiwaf-0.1.7.1.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
19
+ aiwaf-0.1.7.1.dist-info/RECORD,,
File without changes