aiwaf 0.1.7.5__tar.gz → 0.1.7.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiwaf might be problematic. Click here for more details.
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/PKG-INFO +1 -1
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf/middleware.py +1 -1
- aiwaf-0.1.7.7/aiwaf/resources/model.pkl +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf/trainer.py +68 -77
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf.egg-info/PKG-INFO +1 -1
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/pyproject.toml +1 -1
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/setup.py +1 -1
- aiwaf-0.1.7.5/aiwaf/resources/model.pkl +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/LICENSE +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/README.md +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf/__init__.py +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf/apps.py +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf/blacklist_manager.py +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf/management/__init__.py +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf/management/commands/__init__.py +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf/management/commands/detect_and_train.py +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf/models.py +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf/storage.py +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf/templatetags/__init__.py +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf/templatetags/aiwaf_tags.py +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf/utils.py +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf.egg-info/SOURCES.txt +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf.egg-info/dependency_links.txt +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/aiwaf.egg-info/top_level.txt +0 -0
- {aiwaf-0.1.7.5 → aiwaf-0.1.7.7}/setup.cfg +0 -0
|
@@ -14,7 +14,7 @@ from django.core.cache import cache
|
|
|
14
14
|
from django.db.models import F
|
|
15
15
|
from django.apps import apps
|
|
16
16
|
from django.urls import get_resolver
|
|
17
|
-
from .trainer import STATIC_KW, STATUS_IDX, is_exempt_path, path_exists_in_django
|
|
17
|
+
from .trainer import STATIC_KW, STATUS_IDX, is_exempt_path, path_exists_in_django
|
|
18
18
|
from .blacklist_manager import BlacklistManager
|
|
19
19
|
from .models import DynamicKeyword
|
|
20
20
|
|
|
Binary file
|
|
@@ -2,23 +2,23 @@ import os
|
|
|
2
2
|
import glob
|
|
3
3
|
import gzip
|
|
4
4
|
import re
|
|
5
|
+
import joblib
|
|
6
|
+
|
|
5
7
|
from datetime import datetime
|
|
6
8
|
from collections import defaultdict, Counter
|
|
7
9
|
|
|
8
10
|
import pandas as pd
|
|
9
11
|
from sklearn.ensemble import IsolationForest
|
|
10
|
-
import joblib
|
|
11
12
|
|
|
12
13
|
from django.conf import settings
|
|
13
14
|
from django.apps import apps
|
|
14
15
|
from django.db.models import F
|
|
15
|
-
from django.urls import get_resolver
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
LOG_PATH
|
|
17
|
+
# ─────────── Configuration ───────────
|
|
18
|
+
LOG_PATH = settings.AIWAF_ACCESS_LOG
|
|
19
19
|
MODEL_PATH = os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
|
|
20
20
|
|
|
21
|
-
STATIC_KW
|
|
21
|
+
STATIC_KW = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
|
|
22
22
|
STATUS_IDX = ["200", "403", "404", "500"]
|
|
23
23
|
|
|
24
24
|
_LOG_RX = re.compile(
|
|
@@ -29,109 +29,105 @@ _LOG_RX = re.compile(
|
|
|
29
29
|
BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
|
|
30
30
|
DynamicKeyword = apps.get_model("aiwaf", "DynamicKeyword")
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
|
|
33
|
+
def is_exempt_path(path: str) -> bool:
|
|
33
34
|
path = path.lower()
|
|
34
|
-
|
|
35
|
-
for exempt in exempt_paths:
|
|
35
|
+
for exempt in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
|
|
36
36
|
if path == exempt or path.startswith(exempt.rstrip("/") + "/"):
|
|
37
37
|
return True
|
|
38
38
|
return False
|
|
39
39
|
|
|
40
|
-
|
|
40
|
+
|
|
41
|
+
def path_exists_in_django(path: str) -> bool:
|
|
41
42
|
from django.urls import get_resolver
|
|
42
|
-
from django.urls.resolvers import
|
|
43
|
+
from django.urls.resolvers import URLResolver
|
|
43
44
|
|
|
44
|
-
|
|
45
|
+
candidate = path.split("?")[0].lstrip("/")
|
|
45
46
|
try:
|
|
46
|
-
get_resolver().resolve(f"/{
|
|
47
|
+
get_resolver().resolve(f"/{candidate}")
|
|
47
48
|
return True
|
|
48
49
|
except:
|
|
49
50
|
pass
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
for
|
|
53
|
-
if isinstance(
|
|
54
|
-
prefix =
|
|
55
|
-
if prefix and
|
|
51
|
+
|
|
52
|
+
root = get_resolver()
|
|
53
|
+
for p in root.url_patterns:
|
|
54
|
+
if isinstance(p, URLResolver):
|
|
55
|
+
prefix = p.pattern.describe().strip("^/")
|
|
56
|
+
if prefix and candidate.startswith(prefix):
|
|
56
57
|
return True
|
|
57
58
|
return False
|
|
58
59
|
|
|
59
|
-
def remove_exempt_keywords():
|
|
60
|
-
exempt_paths = getattr(settings, "AIWAF_EXEMPT_PATHS", [])
|
|
61
|
-
exempt_tokens = set()
|
|
62
|
-
|
|
63
|
-
for path in exempt_paths:
|
|
64
|
-
path = path.strip("/").lower()
|
|
65
|
-
segments = re.split(r"\W+", path)
|
|
66
|
-
exempt_tokens.update(seg for seg in segments if len(seg) > 3)
|
|
67
60
|
|
|
61
|
+
def remove_exempt_keywords() -> None:
|
|
62
|
+
exempt_tokens = set()
|
|
63
|
+
for path in getattr(settings, "AIWAF_EXEMPT_PATHS", []):
|
|
64
|
+
for seg in re.split(r"\W+", path.strip("/").lower()):
|
|
65
|
+
if len(seg) > 3:
|
|
66
|
+
exempt_tokens.add(seg)
|
|
68
67
|
if exempt_tokens:
|
|
69
|
-
|
|
70
|
-
|
|
68
|
+
DynamicKeyword.objects.filter(keyword__in=exempt_tokens).delete()
|
|
69
|
+
|
|
71
70
|
|
|
72
|
-
def _read_all_logs():
|
|
71
|
+
def _read_all_logs() -> list[str]:
|
|
73
72
|
lines = []
|
|
74
73
|
if LOG_PATH and os.path.exists(LOG_PATH):
|
|
75
74
|
with open(LOG_PATH, "r", errors="ignore") as f:
|
|
76
75
|
lines.extend(f.readlines())
|
|
77
|
-
for
|
|
78
|
-
opener = gzip.open if
|
|
76
|
+
for p in sorted(glob.glob(f"{LOG_PATH}.*")):
|
|
77
|
+
opener = gzip.open if p.endswith(".gz") else open
|
|
79
78
|
try:
|
|
80
|
-
with opener(
|
|
79
|
+
with opener(p, "rt", errors="ignore") as f:
|
|
81
80
|
lines.extend(f.readlines())
|
|
82
81
|
except OSError:
|
|
83
82
|
continue
|
|
84
83
|
return lines
|
|
85
84
|
|
|
86
85
|
|
|
87
|
-
def _parse(line):
|
|
86
|
+
def _parse(line: str) -> dict | None:
|
|
88
87
|
m = _LOG_RX.search(line)
|
|
89
88
|
if not m:
|
|
90
89
|
return None
|
|
91
|
-
ip, ts_str, path, status,
|
|
90
|
+
ip, ts_str, path, status, *_ , rt = m.groups()
|
|
92
91
|
try:
|
|
93
92
|
ts = datetime.strptime(ts_str.split()[0], "%d/%b/%Y:%H:%M:%S")
|
|
94
93
|
except ValueError:
|
|
95
94
|
return None
|
|
96
95
|
return {
|
|
97
|
-
"ip":
|
|
98
|
-
"timestamp":
|
|
99
|
-
"path":
|
|
100
|
-
"status":
|
|
101
|
-
"ua": ua,
|
|
96
|
+
"ip": ip,
|
|
97
|
+
"timestamp": ts,
|
|
98
|
+
"path": path,
|
|
99
|
+
"status": status,
|
|
102
100
|
"response_time": float(rt),
|
|
103
101
|
}
|
|
104
102
|
|
|
105
103
|
|
|
106
|
-
|
|
107
|
-
def train():
|
|
104
|
+
def train() -> None:
|
|
108
105
|
remove_exempt_keywords()
|
|
109
106
|
raw_lines = _read_all_logs()
|
|
110
107
|
if not raw_lines:
|
|
111
108
|
print("No log lines found – check AIWAF_ACCESS_LOG setting.")
|
|
112
109
|
return
|
|
110
|
+
|
|
113
111
|
parsed = []
|
|
114
|
-
ip_404
|
|
112
|
+
ip_404 = defaultdict(int)
|
|
115
113
|
ip_times = defaultdict(list)
|
|
116
|
-
|
|
117
|
-
|
|
114
|
+
|
|
115
|
+
for line in raw_lines:
|
|
116
|
+
rec = _parse(line)
|
|
118
117
|
if not rec:
|
|
119
118
|
continue
|
|
120
119
|
parsed.append(rec)
|
|
121
120
|
ip_times[rec["ip"]].append(rec["timestamp"])
|
|
122
121
|
if rec["status"] == "404":
|
|
123
122
|
ip_404[rec["ip"]] += 1
|
|
124
|
-
|
|
123
|
+
|
|
124
|
+
# 3. Optional immediate 404‐flood blocking
|
|
125
125
|
for ip, count in ip_404.items():
|
|
126
126
|
if count >= 6:
|
|
127
|
-
|
|
127
|
+
BlacklistEntry.objects.get_or_create(
|
|
128
128
|
ip_address=ip,
|
|
129
129
|
defaults={"reason": "Excessive 404s (≥6)"}
|
|
130
130
|
)
|
|
131
|
-
if created:
|
|
132
|
-
blocked_404.append(ip)
|
|
133
|
-
if blocked_404:
|
|
134
|
-
print(f"Blocked {len(blocked_404)} IPs for 404 flood: {blocked_404}")
|
|
135
131
|
|
|
136
132
|
feature_dicts = []
|
|
137
133
|
for r in parsed:
|
|
@@ -140,60 +136,55 @@ def train():
|
|
|
140
136
|
1 for t in ip_times[ip]
|
|
141
137
|
if (r["timestamp"] - t).total_seconds() <= 10
|
|
142
138
|
)
|
|
143
|
-
total404
|
|
144
|
-
|
|
145
|
-
kw_hits
|
|
146
|
-
if not
|
|
139
|
+
total404 = ip_404[ip]
|
|
140
|
+
known_path = path_exists_in_django(r["path"])
|
|
141
|
+
kw_hits = 0
|
|
142
|
+
if not known_path and not is_exempt_path(r["path"]):
|
|
147
143
|
kw_hits = sum(k in r["path"].lower() for k in STATIC_KW)
|
|
144
|
+
|
|
148
145
|
status_idx = STATUS_IDX.index(r["status"]) if r["status"] in STATUS_IDX else -1
|
|
146
|
+
|
|
149
147
|
feature_dicts.append({
|
|
150
|
-
"ip":
|
|
151
|
-
"path_len":
|
|
152
|
-
"kw_hits":
|
|
153
|
-
"resp_time":
|
|
154
|
-
"status_idx":
|
|
155
|
-
"burst_count":
|
|
156
|
-
"total_404":
|
|
148
|
+
"ip": ip,
|
|
149
|
+
"path_len": len(r["path"]),
|
|
150
|
+
"kw_hits": kw_hits,
|
|
151
|
+
"resp_time": r["response_time"],
|
|
152
|
+
"status_idx": status_idx,
|
|
153
|
+
"burst_count": burst,
|
|
154
|
+
"total_404": total404,
|
|
157
155
|
})
|
|
158
156
|
|
|
159
157
|
if not feature_dicts:
|
|
160
158
|
print("⚠️ Nothing to train on – no valid log entries.")
|
|
161
159
|
return
|
|
160
|
+
|
|
162
161
|
df = pd.DataFrame(feature_dicts)
|
|
163
162
|
feature_cols = [c for c in df.columns if c != "ip"]
|
|
164
163
|
X = df[feature_cols].astype(float).values
|
|
165
164
|
model = IsolationForest(contamination=0.01, random_state=42)
|
|
166
165
|
model.fit(X)
|
|
166
|
+
|
|
167
167
|
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
|
|
168
168
|
joblib.dump(model, MODEL_PATH)
|
|
169
169
|
print(f"Model trained on {len(X)} samples → {MODEL_PATH}")
|
|
170
170
|
preds = model.predict(X)
|
|
171
|
-
anomalous_ips = set(df.loc[preds == -1,
|
|
172
|
-
blocked_anom = []
|
|
171
|
+
anomalous_ips = set(df.loc[preds == -1, "ip"])
|
|
173
172
|
for ip in anomalous_ips:
|
|
174
|
-
|
|
173
|
+
BlacklistEntry.objects.get_or_create(
|
|
175
174
|
ip_address=ip,
|
|
176
175
|
defaults={"reason": "Anomalous behavior"}
|
|
177
176
|
)
|
|
178
|
-
|
|
179
|
-
blocked_anom.append(ip)
|
|
180
|
-
if blocked_anom:
|
|
181
|
-
print(f"🚫 Blocked {len(blocked_anom)} anomalous IPs: {blocked_anom}")
|
|
177
|
+
|
|
182
178
|
tokens = Counter()
|
|
183
179
|
for r in parsed:
|
|
184
|
-
if r["status"].startswith(("4", "5"))
|
|
180
|
+
if (r["status"].startswith(("4", "5"))
|
|
181
|
+
and not path_exists_in_django(r["path"])):
|
|
185
182
|
for seg in re.split(r"\W+", r["path"].lower()):
|
|
186
183
|
if len(seg) > 3 and seg not in STATIC_KW:
|
|
187
184
|
tokens[seg] += 1
|
|
188
185
|
|
|
189
|
-
|
|
190
|
-
for kw, cnt in top_tokens:
|
|
186
|
+
for kw, cnt in tokens.most_common(10):
|
|
191
187
|
obj, _ = DynamicKeyword.objects.get_or_create(keyword=kw)
|
|
192
188
|
DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + cnt)
|
|
193
189
|
|
|
194
|
-
print(f"
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
if __name__ == "__main__":
|
|
199
|
-
train()
|
|
190
|
+
print(f"DynamicKeyword DB updated with top tokens: {[kw for kw, _ in tokens.most_common(10)]}")
|
|
@@ -9,7 +9,7 @@ long_description = (HERE / "README.md").read_text(encoding="utf-8")
|
|
|
9
9
|
|
|
10
10
|
setup(
|
|
11
11
|
name="aiwaf",
|
|
12
|
-
version="0.1.7.
|
|
12
|
+
version="0.1.7.7",
|
|
13
13
|
description="AI‑driven, self‑learning Web Application Firewall for Django",
|
|
14
14
|
long_description=long_description,
|
|
15
15
|
long_description_content_type="text/markdown",
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|