PyPI - aiwaf - Versions diffs - 0.1.3__tar.gz → 0.1.6__tar.gz - Mend

aiwaf 0.1.3tar.gz → 0.1.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiwaf might be problematic. Click here for more details.

Files changed (26) hide show

aiwaf-0.1.6/LICENSE +21 -0
{aiwaf-0.1.3/aiwaf.egg-info → aiwaf-0.1.6}/PKG-INFO +24 -11
aiwaf-0.1.3/PKG-INFO → aiwaf-0.1.6/README.md +18 -21
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/middleware.py +34 -4
aiwaf-0.1.6/aiwaf/trainer.py +151 -0
aiwaf-0.1.3/README.md → aiwaf-0.1.6/aiwaf.egg-info/PKG-INFO +35 -11
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf.egg-info/SOURCES.txt +1 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/pyproject.toml +2 -2
aiwaf-0.1.6/setup.py +43 -0
aiwaf-0.1.3/aiwaf/trainer.py +0 -175
aiwaf-0.1.3/setup.py +0 -31
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/__init__.py +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/apps.py +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/blacklist_manager.py +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/management/__init__.py +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/management/commands/__init__.py +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/management/commands/detect_and_train.py +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/models.py +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/resources/model.pkl +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/storage.py +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/template_tags/__init__.py +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/template_tags/aiwaf_tags.py +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/utils.py +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf.egg-info/dependency_links.txt +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf.egg-info/top_level.txt +0 -0
{aiwaf-0.1.3 → aiwaf-0.1.6}/setup.cfg +0 -0

aiwaf-0.1.6/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Aayush Gauba
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

{aiwaf-0.1.3/aiwaf.egg-info → aiwaf-0.1.6}/PKG-INFO RENAMED Viewed

@@ -1,15 +1,21 @@
 Metadata-Version: 2.4
 Name: aiwaf
-Version: 0.1.3
+Version: 0.1.6
 Summary: AI-powered Web Application Firewall
+Home-page: https://github.com/aayushgauba/aiwaf
 Author: Aayush Gauba
 Author-email: Aayush Gauba <gauba.aayush@gmail.com>
 License: MIT
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
+License-File: LICENSE
 Dynamic: author
+Dynamic: home-page
+Dynamic: license-file
+Dynamic: requires-python
-# AI‑WAF
+# AI‑WAF
 > A self‑learning, Django‑friendly Web Application Firewall
 > with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
@@ -91,6 +97,14 @@ pip install -e .
 ```python
 INSTALLED_APPS += ["aiwaf"]
+### Database Setup
+After adding `aiwaf` to your `INSTALLED_APPS`, create the necessary tables for the IP‐blacklist and dynamic‐keyword models:
+```bash
+python manage.py makemigrations aiwaf
+python manage.py migrate
 # Required
 AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
@@ -114,7 +128,7 @@ Add in **this** order to your `MIDDLEWARE` list:
 ```python
 MIDDLEWARE = [
-    "aiwaf.middleware.IPBlockMiddleware",
+    "aiwaf.middleware.IPAndKeywordBlockMiddleware",
     "aiwaf.middleware.RateLimitMiddleware",
     "aiwaf.middleware.AIAnomalyMiddleware",
     "aiwaf.middleware.HoneypotMiddleware",
@@ -159,14 +173,13 @@ python manage.py detect_and_train
 ## How It Works
-| Middleware               | Purpose                                                         |
-|--------------------------|------------------------------------------------------------------|
-| IPBlockMiddleware        | Blocks requests from known blacklisted IPs                      |
-| RateLimitMiddleware      | Enforces burst & flood thresholds                               |
-| AIAnomalyMiddleware      | ML‑driven behavior analysis + block on anomaly                  |
-| HoneypotMiddleware       | Detects bots filling hidden inputs in forms                     |
-| UUIDTamperMiddleware     | Blocks guessed/nonexistent UUIDs across all models in an app    |
+| Middleware                         | Purpose                                                         |
+|------------------------------------|-----------------------------------------------------------------|
+| IPAndKeywordBlockMiddleware        | Blocks requests from known blacklisted IPs and Keywords         |
+| RateLimitMiddleware                | Enforces burst & flood thresholds                               |
+| AIAnomalyMiddleware                | ML‑driven behavior analysis + block on anomaly                  |
+| HoneypotMiddleware                 | Detects bots filling hidden inputs in forms                     |
+| UUIDTamperMiddleware               | Blocks guessed/nonexistent UUIDs across all models in an app    |
 ---
 ## License

aiwaf-0.1.3/PKG-INFO → aiwaf-0.1.6/README.md RENAMED Viewed

@@ -1,15 +1,5 @@
-Metadata-Version: 2.4
-Name: aiwaf
-Version: 0.1.3
-Summary: AI-powered Web Application Firewall
-Author: Aayush Gauba
-Author-email: Aayush Gauba <gauba.aayush@gmail.com>
-License: MIT
-Requires-Python: >=3.8
-Description-Content-Type: text/markdown
-Dynamic: author
-# AI‑WAF
+# AI‑WAF
 > A self‑learning, Django‑friendly Web Application Firewall
 > with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
@@ -91,6 +81,14 @@ pip install -e .
 ```python
 INSTALLED_APPS += ["aiwaf"]
+### Database Setup
+After adding `aiwaf` to your `INSTALLED_APPS`, create the necessary tables for the IP‐blacklist and dynamic‐keyword models:
+```bash
+python manage.py makemigrations aiwaf
+python manage.py migrate
 # Required
 AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
@@ -114,7 +112,7 @@ Add in **this** order to your `MIDDLEWARE` list:
 ```python
 MIDDLEWARE = [
-    "aiwaf.middleware.IPBlockMiddleware",
+    "aiwaf.middleware.IPAndKeywordBlockMiddleware",
     "aiwaf.middleware.RateLimitMiddleware",
     "aiwaf.middleware.AIAnomalyMiddleware",
     "aiwaf.middleware.HoneypotMiddleware",
@@ -159,14 +157,13 @@ python manage.py detect_and_train
 ## How It Works
-| Middleware               | Purpose                                                         |
-|--------------------------|------------------------------------------------------------------|
-| IPBlockMiddleware        | Blocks requests from known blacklisted IPs                      |
-| RateLimitMiddleware      | Enforces burst & flood thresholds                               |
-| AIAnomalyMiddleware      | ML‑driven behavior analysis + block on anomaly                  |
-| HoneypotMiddleware       | Detects bots filling hidden inputs in forms                     |
-| UUIDTamperMiddleware     | Blocks guessed/nonexistent UUIDs across all models in an app    |
+| Middleware                         | Purpose                                                         |
+|------------------------------------|-----------------------------------------------------------------|
+| IPAndKeywordBlockMiddleware        | Blocks requests from known blacklisted IPs and Keywords         |
+| RateLimitMiddleware                | Enforces burst & flood thresholds                               |
+| AIAnomalyMiddleware                | ML‑driven behavior analysis + block on anomaly                  |
+| HoneypotMiddleware                 | Detects bots filling hidden inputs in forms                     |
+| UUIDTamperMiddleware               | Blocks guessed/nonexistent UUIDs across all models in an app    |
 ---
 ## License

{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf/middleware.py RENAMED Viewed

@@ -13,11 +13,11 @@ from django.conf import settings
 from django.core.cache import cache
 from django.db.models import F
 from django.apps import apps
+from django.urls import get_resolver
 from .blacklist_manager import BlacklistManager
 from .models import DynamicKeyword
-# ─── Model loading with fallback ────────────────────────────────────────────
 MODEL_PATH = getattr(
     settings,
     "AIWAF_MODEL_PATH",
@@ -25,7 +25,6 @@ MODEL_PATH = getattr(
 )
 MODEL = joblib.load(MODEL_PATH)
-# ─── Static keywords default ────────────────────────────────────────────────
 STATIC_KW = getattr(
     settings,
     "AIWAF_MALICIOUS_KEYWORDS",
@@ -41,15 +40,46 @@ def get_ip(request):
         return xff.split(",")[0].strip()
     return request.META.get("REMOTE_ADDR", "")
-class IPBlockMiddleware:
+class IPAndKeywordBlockMiddleware:
     def __init__(self, get_response):
         self.get_response = get_response
+        self.url_patterns = self._collect_view_paths()
+    def _collect_view_paths(self):
+        resolver = get_resolver()
+        patterns = set()
+        def extract(patterns_list, prefix=""):
+            for p in patterns_list:
+                if hasattr(p, "url_patterns"):
+                    extract(p.url_patterns, prefix + str(p.pattern))
+                else:
+                    pat = (prefix + str(p.pattern)).strip("^$")
+                    patterns.add(pat)
+        extract(resolver.url_patterns)
+        return patterns
     def __call__(self, request):
         ip = get_ip(request)
+        path = request.path.lower()
         if BlacklistManager.is_blocked(ip):
             return JsonResponse({"error": "blocked"}, status=403)
+        segments = [seg for seg in re.split(r"\W+", path) if len(seg) > 3]
+        for seg in segments:
+            obj, _ = DynamicKeyword.objects.get_or_create(keyword=seg)
+            DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + 1)
+        dynamic_top = list(
+            DynamicKeyword.objects
+            .order_by("-count")
+            .values_list("keyword", flat=True)[: getattr(settings, "AIWAF_DYNAMIC_TOP_N", 10)]
+        )
+        all_kw = set(STATIC_KW) | set(dynamic_top)
+        safe_kw = {kw for kw in all_kw if any(kw in pat for pat in self.url_patterns)}
+        suspicious_kw = all_kw - safe_kw
+        for seg in segments:
+            if seg in suspicious_kw:
+                BlacklistManager.block(ip, f"Keyword block: {seg}")
+                return JsonResponse({"error": "blocked"}, status=403)
         return self.get_response(request)

aiwaf-0.1.6/aiwaf/trainer.py ADDED Viewed

@@ -0,0 +1,151 @@
+import os
+import glob
+import gzip
+import re
+from datetime import datetime
+from collections import defaultdict, Counter
+import pandas as pd
+from sklearn.ensemble import IsolationForest
+import joblib
+from django.conf import settings
+from django.apps import apps
+from django.db.models import F
+LOG_PATH = settings.AIWAF_ACCESS_LOG
+MODEL_PATH = os.path.join(os.path.dirname(__file__), "resources", "model.pkl")
+STATIC_KW = [".php", "xmlrpc", "wp-", ".env", ".git", ".bak", "conflg", "shell", "filemanager"]
+STATUS_IDX = ["200", "403", "404", "500"]
+_LOG_RX = re.compile(
+    r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" '
+    r'(\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
+)
+BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
+DynamicKeyword = apps.get_model("aiwaf", "DynamicKeyword")
+def _read_all_logs():
+    lines = []
+    if LOG_PATH and os.path.exists(LOG_PATH):
+        with open(LOG_PATH, "r", errors="ignore") as f:
+            lines.extend(f.readlines())
+    for path in sorted(glob.glob(f"{LOG_PATH}.*")):
+        opener = gzip.open if path.endswith(".gz") else open
+        try:
+            with opener(path, "rt", errors="ignore") as f:
+                lines.extend(f.readlines())
+        except OSError:
+            continue
+    return lines
+def _parse(line):
+    m = _LOG_RX.search(line)
+    if not m:
+        return None
+    ip, ts_str, path, status, ref, ua, rt = m.groups()
+    try:
+        ts = datetime.strptime(ts_str.split()[0], "%d/%b/%Y:%H:%M:%S")
+    except ValueError:
+        return None
+    return {
+        "ip": ip,
+        "timestamp": ts,
+        "path": path,
+        "status": status,
+        "ua": ua,
+        "response_time": float(rt),
+    }
+def train():
+    raw_lines = _read_all_logs()
+    if not raw_lines:
+        print(" No log lines found – check AIWAF_ACCESS_LOG setting.")
+        return
+    parsed = []
+    ip_404 = defaultdict(int)
+    ip_times = defaultdict(list)
+    for ln in raw_lines:
+        rec = _parse(ln)
+        if not rec:
+            continue
+        parsed.append(rec)
+        ip_times[rec["ip"]].append(rec["timestamp"])
+        if rec["status"] == "404":
+            ip_404[rec["ip"]] += 1
+    blocked_404 = []
+    for ip, count in ip_404.items():
+        if count >= 6:
+            obj, created = BlacklistEntry.objects.get_or_create(
+                ip_address=ip,
+                defaults={"reason": "Excessive 404s (≥6)"}
+            )
+            if created:
+                blocked_404.append(ip)
+    if blocked_404:
+        print(f"Blocked {len(blocked_404)} IPs for 404 flood: {blocked_404}")
+    feature_dicts = []
+    for r in parsed:
+        ip = r["ip"]
+        burst = sum(
+            1 for t in ip_times[ip]
+            if (r["timestamp"] - t).total_seconds() <= 10
+        )
+        total404 = ip_404[ip]
+        kw_hits = sum(k in r["path"].lower() for k in STATIC_KW)
+        status_idx = STATUS_IDX.index(r["status"]) if r["status"] in STATUS_IDX else -1
+        feature_dicts.append({
+            "ip": ip,
+            "path_len": len(r["path"]),
+            "kw_hits": kw_hits,
+            "resp_time": r["response_time"],
+            "status_idx": status_idx,
+            "burst_count": burst,
+            "total_404": total404,
+        })
+    if not feature_dicts:
+        print("⚠️ Nothing to train on – no valid log entries.")
+        return
+    df = pd.DataFrame(feature_dicts)
+    feature_cols = [c for c in df.columns if c != "ip"]
+    X = df[feature_cols].astype(float).values
+    model = IsolationForest(contamination=0.01, random_state=42)
+    model.fit(X)
+    os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
+    joblib.dump(model, MODEL_PATH)
+    print(f"✅ Model trained on {len(X)} samples → {MODEL_PATH}")
+    preds = model.predict(X)  # -1 for outliers
+    anomalous_ips = set(df.loc[preds == -1, 'ip'])
+    blocked_anom = []
+    for ip in anomalous_ips:
+        obj, created = BlacklistEntry.objects.get_or_create(
+            ip_address=ip,
+            defaults={"reason": "Anomalous behavior"}
+        )
+        if created:
+            blocked_anom.append(ip)
+    if blocked_anom:
+        print(f" Blocked {len(blocked_anom)} anomalous IPs: {blocked_anom}")
+    tokens = Counter()
+    for r in parsed:
+        if r["status"].startswith(("4", "5")):
+            for seg in re.split(r"\W+", r["path"].lower()):
+                if len(seg) > 3 and seg not in STATIC_KW:
+                    tokens[seg] += 1
+    top_tokens = tokens.most_common(10)
+    for kw, cnt in top_tokens:
+        obj, _ = DynamicKeyword.objects.get_or_create(keyword=kw)
+        DynamicKeyword.objects.filter(pk=obj.pk).update(count=F("count") + cnt)
+    print(f"DynamicKeyword DB updated with top tokens: {[kw for kw, _ in top_tokens]}")
+if __name__ == "__main__":
+    train()

aiwaf-0.1.3/README.md → aiwaf-0.1.6/aiwaf.egg-info/PKG-INFO RENAMED Viewed

@@ -1,4 +1,21 @@
-# AI‑WAF
+Metadata-Version: 2.4
+Name: aiwaf
+Version: 0.1.6
+Summary: AI-powered Web Application Firewall
+Home-page: https://github.com/aayushgauba/aiwaf
+Author: Aayush Gauba
+Author-email: Aayush Gauba <gauba.aayush@gmail.com>
+License: MIT
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Dynamic: author
+Dynamic: home-page
+Dynamic: license-file
+Dynamic: requires-python
+# AI‑WAF
 > A self‑learning, Django‑friendly Web Application Firewall
 > with rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, dynamic keyword extraction, file‑extension probing detection, and daily retraining.
@@ -80,6 +97,14 @@ pip install -e .
 ```python
 INSTALLED_APPS += ["aiwaf"]
+### Database Setup
+After adding `aiwaf` to your `INSTALLED_APPS`, create the necessary tables for the IP‐blacklist and dynamic‐keyword models:
+```bash
+python manage.py makemigrations aiwaf
+python manage.py migrate
 # Required
 AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
@@ -103,7 +128,7 @@ Add in **this** order to your `MIDDLEWARE` list:
 ```python
 MIDDLEWARE = [
-    "aiwaf.middleware.IPBlockMiddleware",
+    "aiwaf.middleware.IPAndKeywordBlockMiddleware",
     "aiwaf.middleware.RateLimitMiddleware",
     "aiwaf.middleware.AIAnomalyMiddleware",
     "aiwaf.middleware.HoneypotMiddleware",
@@ -148,14 +173,13 @@ python manage.py detect_and_train
 ## How It Works
-| Middleware               | Purpose                                                         |
-|--------------------------|------------------------------------------------------------------|
-| IPBlockMiddleware        | Blocks requests from known blacklisted IPs                      |
-| RateLimitMiddleware      | Enforces burst & flood thresholds                               |
-| AIAnomalyMiddleware      | ML‑driven behavior analysis + block on anomaly                  |
-| HoneypotMiddleware       | Detects bots filling hidden inputs in forms                     |
-| UUIDTamperMiddleware     | Blocks guessed/nonexistent UUIDs across all models in an app    |
+| Middleware                         | Purpose                                                         |
+|------------------------------------|-----------------------------------------------------------------|
+| IPAndKeywordBlockMiddleware        | Blocks requests from known blacklisted IPs and Keywords         |
+| RateLimitMiddleware                | Enforces burst & flood thresholds                               |
+| AIAnomalyMiddleware                | ML‑driven behavior analysis + block on anomaly                  |
+| HoneypotMiddleware                 | Detects bots filling hidden inputs in forms                     |
+| UUIDTamperMiddleware               | Blocks guessed/nonexistent UUIDs across all models in an app    |
 ---
 ## License
@@ -167,4 +191,4 @@ This project is licensed under the **MIT License**. See the [LICENSE](LICENSE) f
 ## Credits
 **AI‑WAF** by [Aayush Gauba](https://github.com/aayushgauba)
-> “Let your firewall learn and evolve — keep your site a fortress.”
+> “Let your firewall learn and evolve — keep your site a fortress.”

{aiwaf-0.1.3 → aiwaf-0.1.6}/aiwaf.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,3 +1,4 @@
+LICENSE
 README.md
 pyproject.toml
 setup.py

{aiwaf-0.1.3 → aiwaf-0.1.6}/pyproject.toml RENAMED Viewed

@@ -1,9 +1,9 @@
 [project]
 name = "aiwaf"
-version = "0.1.3"
+version = "0.1.6"
 description = "AI-powered Web Application Firewall"
 readme = "README.md"
 requires-python = ">=3.8"
 license = {text = "MIT"}
 authors = [{ name = "Aayush Gauba", email = "gauba.aayush@gmail.com" }]
-dependencies = [ ]
+dependencies = [ ]

aiwaf-0.1.6/setup.py ADDED Viewed

@@ -0,0 +1,43 @@
+# setup.py
+from setuptools import setup, find_packages
+import pathlib
+HERE = pathlib.Path(__file__).parent
+# read the long description from your README
+long_description = (HERE / "README.md").read_text(encoding="utf-8")
+setup(
+    name="aiwaf",
+    version="0.1.6",
+    description="AI‑driven, self‑learning Web Application Firewall for Django",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    author="Aayush Gauba",
+    url="https://github.com/aayushgauba/aiwaf",
+    license="MIT",
+    packages=find_packages(exclude=["tests*", "docs*"]),
+    python_requires=">=3.8",
+    install_requires=[
+        "Django>=3.2",
+        "numpy>=1.21",
+        "pandas>=1.3",
+        "scikit-learn>=1.0",
+        "joblib>=1.1",
+    ],
+    include_package_data=True,
+    package_data={
+        # include your pretrained model and any JSON resources
+        "aiwaf": ["resources/*.pkl", "resources/*.json"]
+    },
+    entry_points={
+        "console_scripts": [
+            "aiwaf-detect=aiwaf.trainer:train",
+        ]
+    },
+    classifiers=[
+        "Framework :: Django",
+        "Programming Language :: Python :: 3",
+        "License :: MIT License",
+    ],
+)

aiwaf-0.1.3/aiwaf/trainer.py DELETED Viewed

@@ -1,175 +0,0 @@
-import os
-import glob
-import gzip
-import re
-import json
-import joblib
-from datetime import datetime
-from collections import defaultdict, Counter
-import pandas as pd
-from sklearn.ensemble import IsolationForest
-from django.conf import settings
-from django.apps import apps
-# ─── CONFIG ────────────────────────────────────────────────────────────────
-# Where to read your access logs (and rotated/.gz siblings)
-LOG_PATH = settings.AIWAF_ACCESS_LOG
-# Where we save our trained model
-MODEL_PATH = os.path.join(
-    os.path.dirname(__file__),
-    "resources",
-    "model.pkl"
-)
-# Static “malicious” path keywords & file extensions
-MALICIOUS_KEYWORDS = [
-    ".php", "xmlrpc", "wp-", ".env", ".git", ".bak",
-    "conflg", "shell", "filemanager"
-]
-STATUS_CODES = ["200", "403", "404", "500"]
-# Regex for combined log with response-time=…
-_LOG_RX = re.compile(
-    r'(\d+\.\d+\.\d+\.\d+).*\[(.*?)\].*"(?:GET|POST) (.*?) HTTP/.*?" '
-    r'(\d{3}).*?"(.*?)" "(.*?)".*?response-time=(\d+\.\d+)'
-)
-# Your Django model for storing blocked IPs
-BlacklistEntry = apps.get_model("aiwaf", "BlacklistEntry")
-# ─── READ & PARSE LOG LINES ─────────────────────────────────────────────────
-def _read_all_logs():
-    lines = []
-    if LOG_PATH and os.path.exists(LOG_PATH):
-        with open(LOG_PATH, "r", errors="ignore") as f:
-            lines += f.readlines()
-    for path in sorted(glob.glob(LOG_PATH + ".*")):
-        opener = gzip.open if path.endswith(".gz") else open
-        try:
-            with opener(path, "rt", errors="ignore") as f:
-                lines += f.readlines()
-        except OSError:
-            continue
-    return lines
-def _parse(line):
-    m = _LOG_RX.search(line)
-    if not m:
-        return None
-    ip, ts_str, path, status, ref, ua, rt = m.groups()
-    try:
-        ts = datetime.strptime(ts_str.split()[0], "%d/%b/%Y:%H:%M:%S")
-    except ValueError:
-        return None
-    return {
-        "ip": ip,
-        "timestamp": ts,
-        "path": path,
-        "status": status,
-        "ua": ua,
-        "response_time": float(rt),
-    }
-# ─── TRAIN ENTRYPOINT ───────────────────────────────────────────────────────
-def train():
-    raw = _read_all_logs()
-    if not raw:
-        print("❌ No log lines found – check settings.AIWAF_ACCESS_LOG")
-        return
-    parsed  = []
-    ip_404   = defaultdict(int)
-    ip_times = defaultdict(list)
-    # parse + accumulate timestamps & 404 counts
-    for ln in raw:
-        rec = _parse(ln)
-        if not rec:
-            continue
-        parsed.append(rec)
-        ip_times[rec["ip"]].append(rec["timestamp"])
-        if rec["status"] == "404":
-            ip_404[rec["ip"]] += 1
-    # auto-block IPs with >=6 total 404s
-    newly_blocked = []
-    for ip, cnt in ip_404.items():
-        if cnt >= 6:
-            obj, created = BlacklistEntry.objects.get_or_create(
-                ip_address=ip,
-                defaults={"reason": "Excessive 404s (≥6)"}
-            )
-            if created:
-                newly_blocked.append(ip)
-    if newly_blocked:
-        print(f"🔒 Blocked {len(newly_blocked)} IPs for 404 flood: {newly_blocked}")
-    # build feature vectors
-    rows = []
-    for r in parsed:
-        ip         = r["ip"]
-        burst      = sum(
-            1 for t in ip_times[ip]
-            if (r["timestamp"] - t).total_seconds() <= 10
-        )
-        total404   = ip_404[ip]
-        kw_hits    = sum(k in r["path"].lower() for k in MALICIOUS_KEYWORDS)
-        status_idx = STATUS_CODES.index(r["status"]) if r["status"] in STATUS_CODES else -1
-        rows.append([
-            len(r["path"]),
-            kw_hits,
-            r["response_time"],
-            status_idx,
-            burst,
-            total404
-        ])
-    if not rows:
-        print("⚠️ No entries to train on.")
-        return
-    df = pd.DataFrame(
-        rows,
-        columns=[
-            "path_len", "kw_hits", "resp_time",
-            "status_idx", "burst_count", "total_404"
-        ]
-    ).fillna(0).astype(float)
-    # train & save
-    clf = IsolationForest(contamination=0.01, random_state=42)
-    clf.fit(df.values)
-    os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
-    joblib.dump(clf, MODEL_PATH)
-    print(f"✅ Model trained on {len(df)} samples → {MODEL_PATH}")
-    # extract top‑10 dynamic keywords from 4xx/5xx paths
-    tokens = Counter()
-    for r in parsed:
-        if r["status"].startswith(("4", "5")):
-            segs = re.split(r"\W+", r["path"].lower())
-            for seg in segs:
-                if len(seg) > 3 and seg not in MALICIOUS_KEYWORDS:
-                    tokens[seg] += 1
-    new_kw = [kw for kw, _ in tokens.most_common(10)]
-    DK_FILE = os.path.join(os.path.dirname(__file__), "resources", "dynamic_keywords.json")
-    try:
-        existing = set(json.load(open(DK_FILE)))
-    except FileNotFoundError:
-        existing = set()
-    updated = sorted(existing | set(new_kw))
-    with open(DK_FILE, "w") as f:
-        json.dump(updated, f, indent=2)
-    print(f"📝 Updated dynamic keywords: {new_kw}")

aiwaf-0.1.3/setup.py DELETED Viewed

@@ -1,31 +0,0 @@
-from setuptools import setup, find_packages
-from pathlib import Path
-this_directory = Path(__file__).parent
-long_description = (this_directory / "README.md").read_text(encoding="utf-8")
-setup(
-    name="aiwaf",
-    version="0.1.3",
-    description="AI‑driven pluggable Web Application Firewall for Django (CSV or DB storage)",
-    long_description=long_description,
-    long_description_content_type="text/markdown",  # <- required for markdown support
-    author="Aayush Gauba",
-    packages=find_packages(),
-    package_data={
-        "aiwaf": ["resources/*.pkl"],
-    },
-    include_package_data=True,
-    install_requires=[
-        "django>=3.0",
-        "scikit-learn",
-        "numpy",
-        "pandas",
-        "joblib",
-    ],
-    entry_points={
-        "console_scripts": [
-            "aiwaf-detect=aiwaf.trainer:detect_and_train",
-        ]
-    },
-)