aiwaf 0.1.3__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiwaf might be problematic. Click here for more details.

aiwaf-0.1.6/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Aayush Gauba
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,15 +1,21 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiwaf
3
- Version: 0.1.3
3
+ Version: 0.1.6
4
4
  Summary: AI-powered Web Application Firewall
5
+ Home-page: https://github.com/aayushgauba/aiwaf
5
6
  Author: Aayush Gauba
6
7
  Author-email: Aayush Gauba <gauba.aayush@gmail.com>
7
8
  License: MIT
8
9
  Requires-Python: >=3.8
9
10
  Description-Content-Type: text/markdown
11
+ License-File: LICENSE
10
12
  Dynamic: author
13
+ Dynamic: home-page
14
+ Dynamic: license-file
15
+ Dynamic: requires-python
11
16
 
12
- # AI‑WAF
17
+
18
+ # AI‑WAF
13
19
 
14
20
  > A self‑learning, Django‑friendly Web Application Firewall
15
21
  > with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
@@ -91,6 +97,14 @@ pip install -e .
91
97
  ```python
92
98
  INSTALLED_APPS += ["aiwaf"]
93
99
 
100
+ ### Database Setup
101
+
102
+ After adding `aiwaf` to your `INSTALLED_APPS`, create the necessary tables for the IP‐blacklist and dynamic‐keyword models:
103
+
104
+ ```bash
105
+ python manage.py makemigrations aiwaf
106
+ python manage.py migrate
107
+
94
108
  # Required
95
109
  AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
96
110
 
@@ -114,7 +128,7 @@ Add in **this** order to your `MIDDLEWARE` list:
114
128
 
115
129
  ```python
116
130
  MIDDLEWARE = [
117
- "aiwaf.middleware.IPBlockMiddleware",
131
+ "aiwaf.middleware.IPAndKeywordBlockMiddleware",
118
132
  "aiwaf.middleware.RateLimitMiddleware",
119
133
  "aiwaf.middleware.AIAnomalyMiddleware",
120
134
  "aiwaf.middleware.HoneypotMiddleware",
@@ -159,14 +173,13 @@ python manage.py detect_and_train
159
173
 
160
174
  ## How It Works
161
175
 
162
- | Middleware | Purpose |
163
- |--------------------------|------------------------------------------------------------------|
164
- | IPBlockMiddleware | Blocks requests from known blacklisted IPs |
165
- | RateLimitMiddleware | Enforces burst & flood thresholds |
166
- | AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
167
- | HoneypotMiddleware | Detects bots filling hidden inputs in forms |
168
- | UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
169
-
176
+ | Middleware | Purpose |
177
+ |------------------------------------|-----------------------------------------------------------------|
178
+ | IPAndKeywordBlockMiddleware | Blocks requests from known blacklisted IPs and Keywords |
179
+ | RateLimitMiddleware | Enforces burst & flood thresholds |
180
+ | AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
181
+ | HoneypotMiddleware | Detects bots filling hidden inputs in forms |
182
+ | UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
170
183
  ---
171
184
 
172
185
  ## License
@@ -1,15 +1,5 @@
1
- Metadata-Version: 2.4
2
- Name: aiwaf
3
- Version: 0.1.3
4
- Summary: AI-powered Web Application Firewall
5
- Author: Aayush Gauba
6
- Author-email: Aayush Gauba <gauba.aayush@gmail.com>
7
- License: MIT
8
- Requires-Python: >=3.8
9
- Description-Content-Type: text/markdown
10
- Dynamic: author
11
-
12
- # AI‑WAF
1
+
2
+ # AI‑WAF
13
3
 
14
4
  > A self‑learning, Django‑friendly Web Application Firewall
15
5
  > with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
@@ -91,6 +81,14 @@ pip install -e .
91
81
  ```python
92
82
  INSTALLED_APPS += ["aiwaf"]
93
83
 
84
+ ### Database Setup
85
+
86
+ After adding `aiwaf` to your `INSTALLED_APPS`, create the necessary tables for the IP‐blacklist and dynamic‐keyword models:
87
+
88
+ ```bash
89
+ python manage.py makemigrations aiwaf
90
+ python manage.py migrate
91
+
94
92
  # Required
95
93
  AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
96
94
 
@@ -114,7 +112,7 @@ Add in **this** order to your `MIDDLEWARE` list:
114
112
 
115
113
  ```python
116
114
  MIDDLEWARE = [
117
- "aiwaf.middleware.IPBlockMiddleware",
115
+ "aiwaf.middleware.IPAndKeywordBlockMiddleware",
118
116
  "aiwaf.middleware.RateLimitMiddleware",
119
117
  "aiwaf.middleware.AIAnomalyMiddleware",
120
118
  "aiwaf.middleware.HoneypotMiddleware",
@@ -159,14 +157,13 @@ python manage.py detect_and_train
159
157
 
160
158
  ## How It Works
161
159
 
162
- | Middleware | Purpose |
163
- |--------------------------|------------------------------------------------------------------|
164
- | IPBlockMiddleware | Blocks requests from known blacklisted IPs |
165
- | RateLimitMiddleware | Enforces burst & flood thresholds |
166
- | AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
167
- | HoneypotMiddleware | Detects bots filling hidden inputs in forms |
168
- | UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
169
-
160
+ | Middleware | Purpose |
161
+ |------------------------------------|-----------------------------------------------------------------|
162
+ | IPAndKeywordBlockMiddleware | Blocks requests from known blacklisted IPs and Keywords |
163
+ | RateLimitMiddleware | Enforces burst & flood thresholds |
164
+ | AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
165
+ | HoneypotMiddleware | Detects bots filling hidden inputs in forms |
166
+ | UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
170
167
  ---
171
168
 
172
169
  ## License
@@ -13,11 +13,11 @@ from django.conf import settings
13
13
  from django.core.cache import cache
14
14
  from django.db.models import F
15
15
  from django.apps import apps
16
+ from django.urls import get_resolver
16
17
 
17
18
  from .blacklist_manager import BlacklistManager
18
19
  from .models import DynamicKeyword
19
20
 
20
- # ─── Model loading with fallback ────────────────────────────────────────────
21
21
  MODEL_PATH = getattr(
22
22
  settings,
23
23
  "AIWAF_MODEL_PATH",
@@ -25,7 +25,6 @@ MODEL_PATH = getattr(
25
25
  )
26
26
  MODEL = joblib.load(MODEL_PATH)
27
27
 
28
- # ─── Static keywords default ────────────────────────────────────────────────
29
28
  STATIC_KW = getattr(
30
29
  settings,
31
30
  "AIWAF_MALICIOUS_KEYWORDS",
@@ -41,15 +40,46 @@ def get_ip(request):
41
40
  return xff.split(",")[0].strip()
42
41
  return request.META.get("REMOTE_ADDR", "")
43
42
 
44
-
45
- class IPBlockMiddleware:
43
+ class IPAndKeywordBlockMiddleware:
46
44
  def __init__(self, get_response):
47
45
  self.get_response = get_response
46
+ self.url_patterns = self._collect_view_paths()
47
+
48
+ def _collect_view_paths(self):
49
+ resolver = get_resolver()
50
+ patterns = set()
51
+
52
+ def extract(patterns_list, prefix=""):
53
+ for p in patterns_list:
54
+ if hasattr(p, "url_patterns"):
55
+ extract(p.url_patterns, prefix + str(p.pattern))
56
+ else:
57
+ pat = (prefix + str(p.pattern)).strip("^$")
58
+ patterns.add(pat)
59
+ extract(resolver.url_patterns)
60
+ return patterns
48
61
 
49
62
  def __call__(self, request):
50
63
  ip = get_ip(request)
64
+ path = request.path.lower()
51
65
  if BlacklistManager.is_blocked(ip):
52
66
  return JsonResponse({"error": "blocked"}, status=403)
67
+ segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
68
+ for seg in segments:
69
+ obj, _ = DynamicKeyword.objects.get_or_create(keyword=seg)
70
+ DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
71
+ dynamic_top = list(
72
+ DynamicKeyword.objects
73
+ .order_by("-count")
74
+ .values_list("keyword", flat=True)[: getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)]
75
+ )
76
+ all_kw = set(STATIC_KW) | set(dynamic_top)
77
+ safe_kw = {kw for kw in all_kw if any(kw in pat for pat in self.url_patterns)}
78
+ suspicious_kw = all_kw - safe_kw
79
+ for seg in segments:
80
+ if seg in suspicious_kw:
81
+ BlacklistManager.block(ip, f"Keyword block: {seg}")
82
+ return JsonResponse({"error": "blocked"}, status=403)
53
83
  return self.get_response(request)
54
84
 
55
85
 
@@ -0,0 +1,151 @@
1
+ import os
2
+ import glob
3
+ import gzip
4
+ import re
5
+ from datetime import datetime
6
+ from collections import defaultdict, Counter
7
+
8
+ import pandas as pd
9
+ from sklearn.ensemble import IsolationForest
10
+ import joblib
11
+
12
+ from django.conf import settings
13
+ from django.apps import apps
14
+ from django.db.models import F
15
+
16
+ LOG_PATH = settings.AIWAF_ACCESS_LOG
17
+ MODEL_PATH = os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
18
+
19
+ STATIC_KW = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
20
+ STATUS_IDX = ["200", "403", "404", "500"]
21
+
22
+ _LOG_RX = re.compile(
23
+ r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" '
24
+ r'(\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
25
+ )
26
+
27
+ BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
28
+ DynamicKeyword = apps.get_model("aiwaf", "DynamicKeyword")
29
+
30
+
31
+ def _read_all_logs():
32
+ lines = []
33
+ if LOG_PATH and os.path.exists(LOG_PATH):
34
+ with open(LOG_PATH, "r", errors="ignore") as f:
35
+ lines.extend(f.readlines())
36
+ for path in sorted(glob.glob(f"{LOG_PATH}.*")):
37
+ opener = gzip.open if path.endswith(".gz") else open
38
+ try:
39
+ with opener(path, "rt", errors="ignore") as f:
40
+ lines.extend(f.readlines())
41
+ except OSError:
42
+ continue
43
+ return lines
44
+
45
+
46
+ def _parse(line):
47
+ m = _LOG_RX.search(line)
48
+ if not m:
49
+ return None
50
+ ip, ts_str, path, status, ref, ua, rt = m.groups()
51
+ try:
52
+ ts = datetime.strptime(ts_str.split()[0], "%d/%b/%Y:%H:%M:%S")
53
+ except ValueError:
54
+ return None
55
+ return {
56
+ "ip": ip,
57
+ "timestamp": ts,
58
+ "path": path,
59
+ "status": status,
60
+ "ua": ua,
61
+ "response_time": float(rt),
62
+ }
63
+
64
+
65
+ def train():
66
+ raw_lines = _read_all_logs()
67
+ if not raw_lines:
68
+ print(" No log lines found – check AIWAF_ACCESS_LOG setting.")
69
+ return
70
+ parsed = []
71
+ ip_404 = defaultdict(int)
72
+ ip_times = defaultdict(list)
73
+ for ln in raw_lines:
74
+ rec = _parse(ln)
75
+ if not rec:
76
+ continue
77
+ parsed.append(rec)
78
+ ip_times[rec["ip"]].append(rec["timestamp"])
79
+ if rec["status"] == "404":
80
+ ip_404[rec["ip"]] += 1
81
+ blocked_404 = []
82
+ for ip, count in ip_404.items():
83
+ if count >= 6:
84
+ obj, created = BlacklistEntry.objects.get_or_create(
85
+ ip_address=ip,
86
+ defaults={"reason": "Excessive 404s (≥6)"}
87
+ )
88
+ if created:
89
+ blocked_404.append(ip)
90
+ if blocked_404:
91
+ print(f"Blocked {len(blocked_404)} IPs for 404 flood: {blocked_404}")
92
+ feature_dicts = []
93
+ for r in parsed:
94
+ ip = r["ip"]
95
+ burst = sum(
96
+ 1 for t in ip_times[ip]
97
+ if (r["timestamp"] - t).total_seconds() <= 10
98
+ )
99
+ total404 = ip_404[ip]
100
+ kw_hits = sum(k in r["path"].lower() for k in STATIC_KW)
101
+ status_idx = STATUS_IDX.index(r["status"]) if r["status"] in STATUS_IDX else -1
102
+ feature_dicts.append({
103
+ "ip": ip,
104
+ "path_len": len(r["path"]),
105
+ "kw_hits": kw_hits,
106
+ "resp_time": r["response_time"],
107
+ "status_idx": status_idx,
108
+ "burst_count": burst,
109
+ "total_404": total404,
110
+ })
111
+
112
+ if not feature_dicts:
113
+ print("⚠️ Nothing to train on – no valid log entries.")
114
+ return
115
+
116
+ df = pd.DataFrame(feature_dicts)
117
+ feature_cols = [c for c in df.columns if c != "ip"]
118
+ X = df[feature_cols].astype(float).values
119
+ model = IsolationForest(contamination=0.01, random_state=42)
120
+ model.fit(X)
121
+ os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
122
+ joblib.dump(model, MODEL_PATH)
123
+ print(f"✅ Model trained on {len(X)} samples → {MODEL_PATH}")
124
+ preds = model.predict(X) # -1 for outliers
125
+ anomalous_ips = set(df.loc[preds == -1, 'ip'])
126
+ blocked_anom = []
127
+ for ip in anomalous_ips:
128
+ obj, created = BlacklistEntry.objects.get_or_create(
129
+ ip_address=ip,
130
+ defaults={"reason": "Anomalous behavior"}
131
+ )
132
+ if created:
133
+ blocked_anom.append(ip)
134
+ if blocked_anom:
135
+ print(f" Blocked {len(blocked_anom)} anomalous IPs: {blocked_anom}")
136
+
137
+ tokens = Counter()
138
+ for r in parsed:
139
+ if r["status"].startswith(("4", "5")):
140
+ for seg in re.split(r"\W+", r["path"].lower()):
141
+ if len(seg) > 3 and seg not in STATIC_KW:
142
+ tokens[seg] += 1
143
+ top_tokens = tokens.most_common(10)
144
+ for kw, cnt in top_tokens:
145
+ obj, _ = DynamicKeyword.objects.get_or_create(keyword=kw)
146
+ DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + cnt)
147
+ print(f"DynamicKeyword DB updated with top tokens: {[kw for kw, _ in top_tokens]}")
148
+
149
+
150
+ if __name__ == "__main__":
151
+ train()
@@ -1,4 +1,21 @@
1
- # AI‑WAF
1
+ Metadata-Version: 2.4
2
+ Name: aiwaf
3
+ Version: 0.1.6
4
+ Summary: AI-powered Web Application Firewall
5
+ Home-page: https://github.com/aayushgauba/aiwaf
6
+ Author: Aayush Gauba
7
+ Author-email: Aayush Gauba <gauba.aayush@gmail.com>
8
+ License: MIT
9
+ Requires-Python: >=3.8
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Dynamic: author
13
+ Dynamic: home-page
14
+ Dynamic: license-file
15
+ Dynamic: requires-python
16
+
17
+
18
+ # AI‑WAF
2
19
 
3
20
  > A self‑learning, Django‑friendly Web Application Firewall
4
21
  > with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
@@ -80,6 +97,14 @@ pip install -e .
80
97
  ```python
81
98
  INSTALLED_APPS += ["aiwaf"]
82
99
 
100
+ ### Database Setup
101
+
102
+ After adding `aiwaf` to your `INSTALLED_APPS`, create the necessary tables for the IP‐blacklist and dynamic‐keyword models:
103
+
104
+ ```bash
105
+ python manage.py makemigrations aiwaf
106
+ python manage.py migrate
107
+
83
108
  # Required
84
109
  AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
85
110
 
@@ -103,7 +128,7 @@ Add in **this** order to your `MIDDLEWARE` list:
103
128
 
104
129
  ```python
105
130
  MIDDLEWARE = [
106
- "aiwaf.middleware.IPBlockMiddleware",
131
+ "aiwaf.middleware.IPAndKeywordBlockMiddleware",
107
132
  "aiwaf.middleware.RateLimitMiddleware",
108
133
  "aiwaf.middleware.AIAnomalyMiddleware",
109
134
  "aiwaf.middleware.HoneypotMiddleware",
@@ -148,14 +173,13 @@ python manage.py detect_and_train
148
173
 
149
174
  ## How It Works
150
175
 
151
- | Middleware | Purpose |
152
- |--------------------------|------------------------------------------------------------------|
153
- | IPBlockMiddleware | Blocks requests from known blacklisted IPs |
154
- | RateLimitMiddleware | Enforces burst & flood thresholds |
155
- | AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
156
- | HoneypotMiddleware | Detects bots filling hidden inputs in forms |
157
- | UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
158
-
176
+ | Middleware | Purpose |
177
+ |------------------------------------|-----------------------------------------------------------------|
178
+ | IPAndKeywordBlockMiddleware | Blocks requests from known blacklisted IPs and Keywords |
179
+ | RateLimitMiddleware | Enforces burst & flood thresholds |
180
+ | AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
181
+ | HoneypotMiddleware | Detects bots filling hidden inputs in forms |
182
+ | UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
159
183
  ---
160
184
 
161
185
  ## License
@@ -167,4 +191,4 @@ This project is licensed under the **MIT License**. See the [LICENSE](LICENSE) f
167
191
  ## Credits
168
192
 
169
193
  **AI‑WAF** by [Aayush Gauba](https://github.com/aayushgauba)
170
- > “Let your firewall learn and evolve — keep your site a fortress.”
194
+ > “Let your firewall learn and evolve — keep your site a fortress.”
@@ -1,3 +1,4 @@
1
+ LICENSE
1
2
  README.md
2
3
  pyproject.toml
3
4
  setup.py
@@ -1,9 +1,9 @@
1
1
  [project]
2
2
  name = "aiwaf"
3
- version = "0.1.3"
3
+ version = "0.1.6"
4
4
  description = "AI-powered Web Application Firewall"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.8"
7
7
  license = {text = "MIT"}
8
8
  authors = [{ name = "Aayush Gauba", email = "gauba.aayush@gmail.com" }]
9
- dependencies = [ ]
9
+ dependencies = [ ]
aiwaf-0.1.6/setup.py ADDED
@@ -0,0 +1,43 @@
1
+ # setup.py
2
+ from setuptools import setup, find_packages
3
+ import pathlib
4
+
5
+ HERE = pathlib.Path(__file__).parent
6
+
7
+ # read the long description from your README
8
+ long_description = (HERE / "README.md").read_text(encoding="utf-8")
9
+
10
+ setup(
11
+ name="aiwaf",
12
+ version="0.1.6",
13
+ description="AI‑driven, self‑learning Web Application Firewall for Django",
14
+ long_description=long_description,
15
+ long_description_content_type="text/markdown",
16
+ author="Aayush Gauba",
17
+ url="https://github.com/aayushgauba/aiwaf",
18
+ license="MIT",
19
+ packages=find_packages(exclude=["tests*", "docs*"]),
20
+ python_requires=">=3.8",
21
+ install_requires=[
22
+ "Django>=3.2",
23
+ "numpy>=1.21",
24
+ "pandas>=1.3",
25
+ "scikit-learn>=1.0",
26
+ "joblib>=1.1",
27
+ ],
28
+ include_package_data=True,
29
+ package_data={
30
+ # include your pretrained model and any JSON resources
31
+ "aiwaf": ["resources/*.pkl", "resources/*.json"]
32
+ },
33
+ entry_points={
34
+ "console_scripts": [
35
+ "aiwaf-detect=aiwaf.trainer:train",
36
+ ]
37
+ },
38
+ classifiers=[
39
+ "Framework :: Django",
40
+ "Programming Language :: Python :: 3",
41
+ "License :: MIT License",
42
+ ],
43
+ )
@@ -1,175 +0,0 @@
1
- import os
2
- import glob
3
- import gzip
4
- import re
5
- import json
6
- import joblib
7
-
8
- from datetime import datetime
9
- from collections import defaultdict, Counter
10
-
11
- import pandas as pd
12
- from sklearn.ensemble import IsolationForest
13
-
14
- from django.conf import settings
15
- from django.apps import apps
16
-
17
- # ─── CONFIG ────────────────────────────────────────────────────────────────
18
-
19
- # Where to read your access logs (and rotated/.gz siblings)
20
- LOG_PATH = settings.AIWAF_ACCESS_LOG
21
-
22
- # Where we save our trained model
23
- MODEL_PATH = os.path.join(
24
- os.path.dirname(__file__),
25
- "resources",
26
- "model.pkl"
27
- )
28
-
29
- # Static “malicious” path keywords & file extensions
30
- MALICIOUS_KEYWORDS = [
31
- ".php", "xmlrpc", "wp-", ".env", ".git", ".bak",
32
- "conflg", "shell", "filemanager"
33
- ]
34
- STATUS_CODES = ["200", "403", "404", "500"]
35
-
36
- # Regex for combined log with response-time=…
37
- _LOG_RX = re.compile(
38
- r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" '
39
- r'(\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
40
- )
41
-
42
- # Your Django model for storing blocked IPs
43
- BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
44
-
45
-
46
- # ─── READ & PARSE LOG LINES ─────────────────────────────────────────────────
47
-
48
- def _read_all_logs():
49
- lines = []
50
- if LOG_PATH and os.path.exists(LOG_PATH):
51
- with open(LOG_PATH, "r", errors="ignore") as f:
52
- lines += f.readlines()
53
- for path in sorted(glob.glob(LOG_PATH + ".*")):
54
- opener = gzip.open if path.endswith(".gz") else open
55
- try:
56
- with opener(path, "rt", errors="ignore") as f:
57
- lines += f.readlines()
58
- except OSError:
59
- continue
60
- return lines
61
-
62
- def _parse(line):
63
- m = _LOG_RX.search(line)
64
- if not m:
65
- return None
66
- ip, ts_str, path, status, ref, ua, rt = m.groups()
67
- try:
68
- ts = datetime.strptime(ts_str.split()[0], "%d/%b/%Y:%H:%M:%S")
69
- except ValueError:
70
- return None
71
- return {
72
- "ip": ip,
73
- "timestamp": ts,
74
- "path": path,
75
- "status": status,
76
- "ua": ua,
77
- "response_time": float(rt),
78
- }
79
-
80
-
81
- # ─── TRAIN ENTRYPOINT ───────────────────────────────────────────────────────
82
-
83
- def train():
84
- raw = _read_all_logs()
85
- if not raw:
86
- print("❌ No log lines found – check settings.AIWAF_ACCESS_LOG")
87
- return
88
-
89
- parsed = []
90
- ip_404 = defaultdict(int)
91
- ip_times = defaultdict(list)
92
-
93
- # parse + accumulate timestamps & 404 counts
94
- for ln in raw:
95
- rec = _parse(ln)
96
- if not rec:
97
- continue
98
- parsed.append(rec)
99
- ip_times[rec["ip"]].append(rec["timestamp"])
100
- if rec["status"] == "404":
101
- ip_404[rec["ip"]] += 1
102
-
103
- # auto-block IPs with >=6 total 404s
104
- newly_blocked = []
105
- for ip, cnt in ip_404.items():
106
- if cnt >= 6:
107
- obj, created = BlacklistEntry.objects.get_or_create(
108
- ip_address=ip,
109
- defaults={"reason": "Excessive 404s (≥6)"}
110
- )
111
- if created:
112
- newly_blocked.append(ip)
113
- if newly_blocked:
114
- print(f"🔒 Blocked {len(newly_blocked)} IPs for 404 flood: {newly_blocked}")
115
-
116
- # build feature vectors
117
- rows = []
118
- for r in parsed:
119
- ip = r["ip"]
120
- burst = sum(
121
- 1 for t in ip_times[ip]
122
- if (r["timestamp"] - t).total_seconds() <= 10
123
- )
124
- total404 = ip_404[ip]
125
- kw_hits = sum(k in r["path"].lower() for k in MALICIOUS_KEYWORDS)
126
- status_idx = STATUS_CODES.index(r["status"]) if r["status"] in STATUS_CODES else -1
127
-
128
- rows.append([
129
- len(r["path"]),
130
- kw_hits,
131
- r["response_time"],
132
- status_idx,
133
- burst,
134
- total404
135
- ])
136
-
137
- if not rows:
138
- print("⚠️ No entries to train on.")
139
- return
140
-
141
- df = pd.DataFrame(
142
- rows,
143
- columns=[
144
- "path_len", "kw_hits", "resp_time",
145
- "status_idx", "burst_count", "total_404"
146
- ]
147
- ).fillna(0).astype(float)
148
-
149
- # train & save
150
- clf = IsolationForest(contamination=0.01, random_state=42)
151
- clf.fit(df.values)
152
- os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
153
- joblib.dump(clf, MODEL_PATH)
154
- print(f"✅ Model trained on {len(df)} samples → {MODEL_PATH}")
155
-
156
- # extract top‑10 dynamic keywords from 4xx/5xx paths
157
- tokens = Counter()
158
- for r in parsed:
159
- if r["status"].startswith(("4", "5")):
160
- segs = re.split(r"\W+", r["path"].lower())
161
- for seg in segs:
162
- if len(seg) > 3 and seg not in MALICIOUS_KEYWORDS:
163
- tokens[seg] += 1
164
-
165
- new_kw = [kw for kw, _ in tokens.most_common(10)]
166
- DK_FILE = os.path.join(os.path.dirname(__file__), "resources", "dynamic_keywords.json")
167
- try:
168
- existing = set(json.load(open(DK_FILE)))
169
- except FileNotFoundError:
170
- existing = set()
171
- updated = sorted(existing | set(new_kw))
172
- with open(DK_FILE, "w") as f:
173
- json.dump(updated, f, indent=2)
174
-
175
- print(f"📝 Updated dynamic keywords: {new_kw}")
aiwaf-0.1.3/setup.py DELETED
@@ -1,31 +0,0 @@
1
- from setuptools import setup, find_packages
2
- from pathlib import Path
3
-
4
- this_directory = Path(__file__).parent
5
- long_description = (this_directory / "README.md").read_text(encoding="utf-8")
6
-
7
- setup(
8
- name="aiwaf",
9
- version="0.1.3",
10
- description="AI‑driven pluggable Web Application Firewall for Django (CSV or DB storage)",
11
- long_description=long_description,
12
- long_description_content_type="text/markdown", # <- required for markdown support
13
- author="Aayush Gauba",
14
- packages=find_packages(),
15
- package_data={
16
- "aiwaf": ["resources/*.pkl"],
17
- },
18
- include_package_data=True,
19
- install_requires=[
20
- "django>=3.0",
21
- "scikit-learn",
22
- "numpy",
23
- "pandas",
24
- "joblib",
25
- ],
26
- entry_points={
27
- "console_scripts": [
28
- "aiwaf-detect=aiwaf.trainer:detect_and_train",
29
- ]
30
- },
31
- )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes