aiwaf 0.1.9.0__tar.gz → 0.1.9.4.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. aiwaf-0.1.9.4.6/PKG-INFO +855 -0
  2. aiwaf-0.1.9.4.6/README.md +823 -0
  3. aiwaf-0.1.9.4.6/aiwaf/__init__.py +6 -0
  4. aiwaf-0.1.9.4.6/aiwaf/apps.py +12 -0
  5. aiwaf-0.1.9.4.6/aiwaf/blacklist_manager.py +41 -0
  6. aiwaf-0.1.9.4.6/aiwaf/geoip.py +189 -0
  7. aiwaf-0.1.9.4.6/aiwaf/geolock/ipinfo_lite.mmdb +0 -0
  8. aiwaf-0.1.9.4.6/aiwaf/management/commands/add_exemption.py +30 -0
  9. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/aiwaf/management/commands/add_ipexemption.py +1 -1
  10. aiwaf-0.1.9.4.6/aiwaf/management/commands/add_pathexemption.py +36 -0
  11. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/aiwaf/management/commands/aiwaf_diagnose.py +29 -1
  12. aiwaf-0.1.9.4.6/aiwaf/management/commands/aiwaf_list.py +104 -0
  13. aiwaf-0.1.9.4.6/aiwaf/management/commands/aiwaf_pathshell.py +163 -0
  14. aiwaf-0.1.9.4.6/aiwaf/management/commands/aiwaf_reset.py +183 -0
  15. aiwaf-0.1.9.4.6/aiwaf/management/commands/clear_blacklist.py +66 -0
  16. aiwaf-0.1.9.4.6/aiwaf/management/commands/clear_cache.py +18 -0
  17. aiwaf-0.1.9.4.6/aiwaf/management/commands/debug_csv.py +155 -0
  18. aiwaf-0.1.9.4.6/aiwaf/management/commands/detect_and_train.py +22 -0
  19. aiwaf-0.1.9.4.6/aiwaf/management/commands/diagnose_blocking.py +96 -0
  20. aiwaf-0.1.9.4.6/aiwaf/management/commands/geo_block_country.py +40 -0
  21. aiwaf-0.1.9.4.6/aiwaf/management/commands/geo_traffic_summary.py +65 -0
  22. aiwaf-0.1.9.4.6/aiwaf/management/commands/regenerate_model.py +96 -0
  23. aiwaf-0.1.9.4.6/aiwaf/management/commands/setup_models.py +35 -0
  24. aiwaf-0.1.9.4.6/aiwaf/management/commands/test_exemption.py +120 -0
  25. aiwaf-0.1.9.4.6/aiwaf/management/commands/test_exemption_fix.py +54 -0
  26. aiwaf-0.1.9.4.6/aiwaf/middleware.py +1334 -0
  27. aiwaf-0.1.9.4.6/aiwaf/middleware_logger.py +129 -0
  28. aiwaf-0.1.9.4.6/aiwaf/model_store.py +114 -0
  29. aiwaf-0.1.9.4.6/aiwaf/models.py +116 -0
  30. aiwaf-0.1.9.4.6/aiwaf/settings_compat.py +103 -0
  31. aiwaf-0.1.9.4.6/aiwaf/storage.py +501 -0
  32. aiwaf-0.1.9.4.6/aiwaf/trainer.py +757 -0
  33. aiwaf-0.1.9.4.6/aiwaf/utils.py +227 -0
  34. aiwaf-0.1.9.4.6/aiwaf.egg-info/PKG-INFO +855 -0
  35. aiwaf-0.1.9.4.6/aiwaf.egg-info/SOURCES.txt +93 -0
  36. aiwaf-0.1.9.4.6/aiwaf.egg-info/requires.txt +19 -0
  37. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/pyproject.toml +17 -2
  38. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/setup.py +18 -3
  39. aiwaf-0.1.9.4.6/tests/test_404_only_learning_django.py +60 -0
  40. aiwaf-0.1.9.4.6/tests/test_aiwaf_reset_django.py +76 -0
  41. aiwaf-0.1.9.4.6/tests/test_basic_import_django.py +126 -0
  42. aiwaf-0.1.9.4.6/tests/test_conservative_path_validation_django.py +47 -0
  43. aiwaf-0.1.9.4.6/tests/test_csv_simple_django.py +48 -0
  44. aiwaf-0.1.9.4.6/tests/test_edge_case_fix_demo_django.py +61 -0
  45. aiwaf-0.1.9.4.6/tests/test_exemption_simple_django.py +49 -0
  46. aiwaf-0.1.9.4.6/tests/test_geo_blocking.py +72 -0
  47. aiwaf-0.1.9.4.6/tests/test_geo_traffic_summary.py +30 -0
  48. aiwaf-0.1.9.4.6/tests/test_header_validation_django.py +68 -0
  49. aiwaf-0.1.9.4.6/tests/test_honeypot_enhancements_django.py +75 -0
  50. aiwaf-0.1.9.4.6/tests/test_import_fix_django.py +48 -0
  51. aiwaf-0.1.9.4.6/tests/test_improved_path_validation_django.py +47 -0
  52. aiwaf-0.1.9.4.6/tests/test_include_path_edge_case_django.py +61 -0
  53. aiwaf-0.1.9.4.6/tests/test_keyword_persistence_django.py +48 -0
  54. aiwaf-0.1.9.4.6/tests/test_keyword_protection_django.py +65 -0
  55. aiwaf-0.1.9.4.6/tests/test_keyword_storage_debug_django.py +61 -0
  56. aiwaf-0.1.9.4.6/tests/test_live_web_app_django.py +62 -0
  57. aiwaf-0.1.9.4.6/tests/test_malicious_keywords_fix_django.py +47 -0
  58. aiwaf-0.1.9.4.6/tests/test_method_validation_django.py +170 -0
  59. aiwaf-0.1.9.4.6/tests/test_method_validation_simple_django.py +63 -0
  60. aiwaf-0.1.9.4.6/tests/test_middleware_enhanced_validation_django.py +47 -0
  61. aiwaf-0.1.9.4.6/tests/test_middleware_learning_fix_django.py +60 -0
  62. aiwaf-0.1.9.4.6/tests/test_middleware_logger_django.py +48 -0
  63. aiwaf-0.1.9.4.6/tests/test_middleware_protection_django.py +92 -0
  64. aiwaf-0.1.9.4.6/tests/test_model_storage.py +43 -0
  65. aiwaf-0.1.9.4.6/tests/test_path_exemptions_django.py +53 -0
  66. aiwaf-0.1.9.4.6/tests/test_path_rules_django.py +64 -0
  67. aiwaf-0.1.9.4.6/tests/test_path_validation_flaw_django.py +47 -0
  68. aiwaf-0.1.9.4.6/tests/test_rate_limiting_django.py +48 -0
  69. aiwaf-0.1.9.4.6/tests/test_rate_limiting_pure_logic_django.py +48 -0
  70. aiwaf-0.1.9.4.6/tests/test_real_world_headers_django.py +51 -0
  71. aiwaf-0.1.9.4.6/tests/test_route_keyword_extraction_django.py +60 -0
  72. aiwaf-0.1.9.4.6/tests/test_route_protection_simple_django.py +60 -0
  73. aiwaf-0.1.9.4.6/tests/test_settings.py +165 -0
  74. aiwaf-0.1.9.4.6/tests/test_settings_compat.py +104 -0
  75. aiwaf-0.1.9.4.6/tests/test_simplified_honeypot_django.py +52 -0
  76. aiwaf-0.1.9.4.6/tests/test_status_summary.py +86 -0
  77. aiwaf-0.1.9.4.6/tests/test_storage_fix_django.py +61 -0
  78. aiwaf-0.1.9.4.6/tests/test_storage_simple_django.py +48 -0
  79. aiwaf-0.1.9.4.6/tests/test_trainer_enhancements_django.py +78 -0
  80. aiwaf-0.1.9.4.6/tests/test_trainer_functions_django.py +94 -0
  81. aiwaf-0.1.9.4.6/tests/test_trainer_geo_summary.py +59 -0
  82. aiwaf-0.1.9.4.6/tests/test_unified_keyword_logic_django.py +86 -0
  83. aiwaf-0.1.9.4.6/tests/test_urls.py +28 -0
  84. aiwaf-0.1.9.4.6/tests/test_view_method_detection_django.py +73 -0
  85. aiwaf-0.1.9.0/PKG-INFO +0 -440
  86. aiwaf-0.1.9.0/README.md +0 -419
  87. aiwaf-0.1.9.0/aiwaf/__init__.py +0 -19
  88. aiwaf-0.1.9.0/aiwaf/apps.py +0 -5
  89. aiwaf-0.1.9.0/aiwaf/blacklist_manager.py +0 -24
  90. aiwaf-0.1.9.0/aiwaf/management/commands/aiwaf_reset.py +0 -95
  91. aiwaf-0.1.9.0/aiwaf/management/commands/detect_and_train.py +0 -10
  92. aiwaf-0.1.9.0/aiwaf/middleware.py +0 -251
  93. aiwaf-0.1.9.0/aiwaf/middleware_logger.py +0 -160
  94. aiwaf-0.1.9.0/aiwaf/models.py +0 -46
  95. aiwaf-0.1.9.0/aiwaf/storage.py +0 -392
  96. aiwaf-0.1.9.0/aiwaf/trainer.py +0 -257
  97. aiwaf-0.1.9.0/aiwaf/utils.py +0 -106
  98. aiwaf-0.1.9.0/aiwaf.egg-info/PKG-INFO +0 -440
  99. aiwaf-0.1.9.0/aiwaf.egg-info/SOURCES.txt +0 -29
  100. aiwaf-0.1.9.0/aiwaf.egg-info/requires.txt +0 -5
  101. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/LICENSE +0 -0
  102. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/aiwaf/decorators.py +0 -0
  103. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/aiwaf/management/__init__.py +0 -0
  104. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/aiwaf/management/commands/__init__.py +0 -0
  105. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/aiwaf/management/commands/aiwaf_logging.py +0 -0
  106. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/aiwaf/resources/model.pkl +0 -0
  107. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/aiwaf/templatetags/__init__.py +0 -0
  108. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/aiwaf/templatetags/aiwaf_tags.py +0 -0
  109. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/aiwaf.egg-info/dependency_links.txt +0 -0
  110. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/aiwaf.egg-info/top_level.txt +0 -0
  111. {aiwaf-0.1.9.0 → aiwaf-0.1.9.4.6}/setup.cfg +0 -0
@@ -0,0 +1,855 @@
1
+ Metadata-Version: 2.4
2
+ Name: aiwaf
3
+ Version: 0.1.9.4.6
4
+ Summary: AI-powered Web Application Firewall
5
+ Home-page: https://github.com/aayushgauba/aiwaf
6
+ Author: Aayush Gauba
7
+ Author-email: Aayush Gauba <gauba.aayush@gmail.com>
8
+ License: MIT
9
+ Requires-Python: >=3.8
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: Django>=3.2
13
+ Requires-Dist: numpy>=1.21
14
+ Requires-Dist: pandas>=1.3
15
+ Requires-Dist: scikit-learn<2.0,>=1.0
16
+ Requires-Dist: joblib>=1.1
17
+ Requires-Dist: geoip2>=4.0
18
+ Requires-Dist: packaging>=21.0
19
+ Requires-Dist: requests>=2.25.0
20
+ Provides-Extra: learning
21
+ Requires-Dist: numpy>=1.21; extra == "learning"
22
+ Requires-Dist: pandas>=1.3; extra == "learning"
23
+ Requires-Dist: scikit-learn<2.0,>=1.0; extra == "learning"
24
+ Requires-Dist: joblib>=1.1; extra == "learning"
25
+ Provides-Extra: geoblock
26
+ Requires-Dist: geoip2>=4.0; extra == "geoblock"
27
+ Provides-Extra: light
28
+ Dynamic: author
29
+ Dynamic: home-page
30
+ Dynamic: license-file
31
+ Dynamic: requires-python
32
+
33
+
34
+ # AI‑WAF
35
+
36
+ > A self‑learning, Django‑friendly Web Application Firewall
37
+ > with **enhanced context-aware protection**, rate‑limiting, anomaly detection, honeypots, UUID‑tamper protection, **smart keyword learning**, file‑extension probing detection, exempt path awareness, and daily retraining.
38
+
39
+ **🆕 Latest Enhancements:**
40
+ - ✅ **Smart Keyword Filtering** - Prevents blocking legitimate pages like `/profile/`
41
+ - ✅ **Granular Reset Commands** - Clear specific data types (`--blacklist`, `--keywords`, `--exemptions`)
42
+ - ✅ **Context-Aware Learning** - Only learns from suspicious requests, not legitimate site functionality
43
+ - ✅ **Enhanced Configuration** - `AIWAF_ALLOWED_PATH_KEYWORDS` and `AIWAF_EXEMPT_KEYWORDS`
44
+ - ✅ **Comprehensive HTTP Method Validation** - Blocks GET→POST-only, POST→GET-only, unsupported REST methods
45
+ - ✅ **Enhanced Honeypot Protection** - POST validation & 4-minute page timeout with smart reload detection
46
+ - ✅ **HTTP Header Validation** - Comprehensive bot detection via header analysis and quality scoring
47
+
48
+ ---
49
+
50
+ ## 🚀 Quick Installation
51
+
52
+ ```bash
53
+ pip install aiwaf
54
+ ```
55
+
56
+ **⚠️ Important:** Add `'aiwaf'` to your Django `INSTALLED_APPS` to avoid setup errors.
57
+
58
+ **📋 Complete Setup Guide:** See [INSTALLATION.md](INSTALLATION.md) for detailed installation instructions and troubleshooting.
59
+
60
+ ---
61
+
62
+ ## System Requirements
63
+
64
+ No GPU needed—AI-WAF runs entirely on CPU with just Python 3.8+, Django 3.2+, a single vCPU and ~512 MB RAM for small sites; for moderate production traffic you can bump to 2–4 vCPUs and 2–4 GB RAM, offload the daily detect-and-train job to a worker, and rotate logs to keep memory use bounded.
65
+
66
+ ## 📁 Package Structure
67
+
68
+ ```
69
+ aiwaf/
70
+ ├── __init__.py
71
+ ├── blacklist_manager.py
72
+ ├── middleware.py
73
+ ├── trainer.py # exposes train()
74
+ ├── utils.py
75
+ ├── template_tags/
76
+ │ └── aiwaf_tags.py
77
+ ├── resources/
78
+ │ ├── model.pkl # pre‑trained base model
79
+ │ └── dynamic_keywords.json # evolves daily
80
+ ├── management/
81
+ │ └── commands/
82
+ │ ├── detect_and_train.py # `python manage.py detect_and_train`
83
+ │ ├── add_ipexemption.py # `python manage.py add_ipexemption`
84
+ │ ├── aiwaf_reset.py # `python manage.py aiwaf_reset`
85
+ │ └── aiwaf_logging.py # `python manage.py aiwaf_logging`
86
+ └── LICENSE
87
+ ```
88
+
89
+ ---
90
+
91
+ ## 🚀 Features
92
+
93
+ - **IP Blocklist**
94
+ Instantly blocks suspicious IPs using Django models with real-time performance.
95
+
96
+ - **Rate Limiting**
97
+ Sliding‑window blocks flooders (> `AIWAF_RATE_MAX` per `AIWAF_RATE_WINDOW`), then blacklists them.
98
+
99
+ - **AI Anomaly Detection**
100
+ IsolationForest trained on:
101
+ - Path length
102
+
103
+ - **GeoIP Support**
104
+ AIWAF supports optional geo-blocking and country-level traffic statistics using a local GeoIP database.
105
+ - Keyword hits (static + dynamic)
106
+ - Response time
107
+ - Status‑code index
108
+ - Burst count
109
+ - Total 404s
110
+
111
+ - **Enhanced Dynamic Keyword Learning with Django Route Protection**
112
+ - **Smart Context-Aware Learning**: Only learns keywords from suspicious requests on non-existent paths
113
+ - **Automatic Django Route Extraction**: Automatically excludes keywords from:
114
+ - Valid Django URL patterns (`/profile/`, `/admin/`, `/api/`, etc.)
115
+ - Django app names and model names (users, posts, categories)
116
+ - View function names and URL namespaces
117
+ - **Unified Logic**: Both trainer and middleware use identical legitimate keyword detection
118
+ - **Configuration Options**:
119
+ - `AIWAF_ALLOWED_PATH_KEYWORDS` - Explicitly allow certain keywords in legitimate paths
120
+ - `AIWAF_EXEMPT_KEYWORDS` - Keywords that should never trigger blocking
121
+ - **Automatic Cleanup**: Keywords from `AIWAF_EXEMPT_PATHS` are automatically removed from the database
122
+ - **False Positive Prevention**: Stops learning legitimate site functionality as "malicious"
123
+ - **Inherent Malicious Detection**: Middleware also blocks obviously malicious keywords (`hack`, `exploit`, `attack`) even if not yet learned
124
+
125
+ - **File‑Extension Probing Detection**
126
+ Tracks repeated 404s on common extensions (e.g. `.php`, `.asp`) and blocks IPs.
127
+
128
+ - **🆕 HTTP Header Validation**
129
+ Advanced header analysis to detect bots and malicious requests:
130
+ - **Missing Required Headers** - Blocks requests without User-Agent or Accept headers
131
+ - **Suspicious User-Agents** - Detects curl, wget, python-requests, automated tools
132
+ - **Header Quality Scoring** - Calculates realism score based on browser-standard headers
133
+ - **Legitimate Bot Whitelist** - Allows Googlebot, Bingbot, and other search engines
134
+ - **Header Combination Analysis** - Detects impossible combinations (HTTP/2 + old browsers)
135
+ - **Static File Exemption** - Skips validation for CSS, JS, images
136
+
137
+ ## 🛡️ Header Validation Middleware Features
138
+
139
+ The **HeaderValidationMiddleware** provides advanced bot detection through HTTP header analysis:
140
+
141
+ ### **What it detects:**
142
+ - **Missing Headers**: Requests without standard browser headers
143
+ - **Suspicious User-Agents**: WordPress scanners, exploit tools, basic scrapers
144
+ - **Bot-like Patterns**: Low header diversity, missing Accept headers
145
+ - **Quality Scoring**: 0-11 point system based on header completeness
146
+
147
+ ### **What it allows:**
148
+ - **Legitimate Browsers**: Chrome, Firefox, Safari, Edge with full headers
149
+ - **Search Engine Bots**: Google, Bing, DuckDuckGo, Yandex crawlers
150
+ - **API Clients**: Properly identified with good headers
151
+ - **Static Files**: CSS, JS, images (automatically exempted)
152
+
153
+ ### **Real-world effectiveness:**
154
+ ```
155
+ ✅ Blocks: WordPress scanners, exploit bots, basic scrapers
156
+ ✅ Allows: Real browsers, legitimate bots, API clients
157
+ ✅ Quality Score: 10/11 = Legitimate, 2/11 = Suspicious bot
158
+ ```
159
+
160
+ ### **Testing header validation:**
161
+ ```bash
162
+ # Test with curl (will be blocked - low quality headers)
163
+ curl http://yoursite.com/
164
+
165
+ # Test with browser (will be allowed - high quality headers)
166
+ # Visit site normally in Chrome/Firefox
167
+
168
+ # Check logs for header validation blocks
169
+ python manage.py aiwaf_logging --recent
170
+ ```
171
+
172
+ - **Enhanced Timing-Based Honeypot**
173
+ Advanced GET→POST timing analysis with comprehensive HTTP method validation:
174
+ - Submit forms faster than `AIWAF_MIN_FORM_TIME` seconds (default: 1 second)
175
+ - **🆕 Smart HTTP Method Validation** - Comprehensive protection against method misuse:
176
+ - Blocks GET requests to POST-only views (form endpoints, API creates)
177
+ - Blocks POST requests to GET-only views (list pages, read-only content)
178
+ - Blocks unsupported REST methods (PUT/DELETE to non-REST views)
179
+ - Uses Django view analysis: class-based views, method handlers, URL patterns
180
+ - **🆕 Page expiration** after `AIWAF_MAX_PAGE_TIME` (4 minutes) with smart reload
181
+
182
+ - **UUID Tampering Protection**
183
+ Blocks guessed or invalid UUIDs that don't resolve to real models.
184
+
185
+ - **Built-in Request Logger**
186
+ Optional middleware logger that captures requests to Django models:
187
+ - **Automatic fallback** when main access logs unavailable
188
+ - **Real-time storage** in database for instant access
189
+ - **Captures response times** for better anomaly detection
190
+ - **Zero configuration** - works out of the box
191
+
192
+ - **Blocked Request Debug Logging**
193
+ Optional debug logs that explain why a request was blocked:
194
+ - **Reason included** (keyword, flood pattern, AI anomaly, header validation, etc.)
195
+ - **Request context** (IP, method, path, user agent)
196
+ - **Disabled by default** - enable via Django `LOGGING`
197
+
198
+ Example `settings.py`:
199
+ ```python
200
+ LOGGING = {
201
+ "version": 1,
202
+ "disable_existing_loggers": False,
203
+ "handlers": {
204
+ "console": {"class": "logging.StreamHandler"},
205
+ },
206
+ "loggers": {
207
+ "aiwaf.middleware": {"handlers": ["console"], "level": "DEBUG"},
208
+ },
209
+ }
210
+ ```
211
+
212
+ - **Smart Training System**
213
+ AI trainer automatically uses the best available data source:
214
+ - **Primary**: Configured access log files (`AIWAF_ACCESS_LOG`)
215
+ - **Fallback**: Database RequestLog model when files unavailable
216
+ - **Seamless switching** between data sources
217
+ - **Enhanced compatibility** with exemption system
218
+ - **Minimum log thresholds**: AI training requires `AIWAF_MIN_AI_LOGS` (default 10,000); fewer logs falls back to keyword-only, which still requires `AIWAF_MIN_TRAIN_LOGS` (default 50)
219
+
220
+ **Exempt Path & IP Awareness**
221
+
222
+ **Exempt Paths:**
223
+ AI‑WAF automatically exempts common login paths (`/admin/`, `/login/`, `/accounts/login/`, etc.) from all blocking mechanisms. You can add additional exempt paths in your Django `settings.py`:
224
+
225
+ ```python
226
+ AIWAF_EXEMPT_PATHS = [
227
+ "/api/webhooks/",
228
+ "/health/",
229
+ "/special-endpoint/",
230
+ ]
231
+ ```
232
+
233
+ You can also store exempt paths in the database (no deploy needed):
234
+
235
+ ```bash
236
+ python manage.py aiwaf_pathshell
237
+ ```
238
+
239
+ Or add directly:
240
+
241
+ ```bash
242
+ python manage.py add_pathexemption /myapp/api/ --reason "API traffic"
243
+ ```
244
+
245
+ **AIWAF Path Shell Commands:**
246
+ ```
247
+ ls # list routes at current level
248
+ cd <index|name> # enter a route segment
249
+ up / cd .. # go up one level
250
+ pwd # show current path prefix
251
+ exempt <index|name|.> # add exemption for selection or current path
252
+ exit # quit
253
+ ```
254
+
255
+
256
+ **Exempt Path & IP Awareness**
257
+
258
+ **Exempt Paths:**
259
+ AI‑WAF automatically exempts common login paths (`/admin/`, `/login/`, `/accounts/login/`, etc.) from all blocking mechanisms. You can add additional exempt paths in your Django `settings.py`:
260
+
261
+ ```python
262
+ AIWAF_EXEMPT_PATHS = [
263
+ "/api/webhooks/",
264
+ "/health/",
265
+ "/special-endpoint/",
266
+ ]
267
+ ```
268
+
269
+ You can also store exempt paths in the database (no deploy needed):
270
+
271
+ ```bash
272
+ python manage.py aiwaf_pathshell
273
+ ```
274
+
275
+ Or add directly:
276
+
277
+ ```bash
278
+ python manage.py add_pathexemption /myapp/api/ --reason "API traffic"
279
+ ```
280
+
281
+ **AIWAF Path Shell Commands:**
282
+ ```
283
+ ls # list routes at current level
284
+ cd <index|name> # enter a route segment
285
+ up / cd .. # go up one level
286
+ pwd # show current path prefix
287
+ exempt <index|name|.> # add exemption for selection or current path
288
+ exit # quit
289
+ ```
290
+
291
+ **Exempt Views (Decorator):**
292
+ Use the `@aiwaf_exempt` decorator to exempt specific views from all AI-WAF protection:
293
+
294
+ ```python
295
+ from aiwaf.decorators import aiwaf_exempt
296
+ from django.http import JsonResponse
297
+
298
+ @aiwaf_exempt
299
+ def my_api_view(request):
300
+ """This view will be exempt from all AI-WAF protection"""
301
+ return JsonResponse({"status": "success"})
302
+
303
+ # Works with class-based views too
304
+ @aiwaf_exempt
305
+ class MyAPIView(View):
306
+ def get(self, request):
307
+ return JsonResponse({"method": "GET"})
308
+ ```
309
+
310
+ All exempt paths and views are:
311
+ - Skipped from keyword learning
312
+ - Immune to AI blocking
313
+ - Ignored in log training
314
+ - Cleaned from `DynamicKeyword` model automatically
315
+
316
+ **Exempt IPs:**
317
+ You can exempt specific IP addresses from all blocking and blacklisting logic. Exempted IPs will:
318
+ - Never be added to the blacklist (even if they trigger rules)
319
+ - Be automatically removed from the blacklist during retraining
320
+ - Bypass all block/deny logic in middleware
321
+
322
+ ### Managing Exempt IPs
323
+
324
+ Add an IP to the exemption list using the management command:
325
+
326
+ ```bash
327
+ python manage.py add_ipexemption <ip-address> --reason "optional reason"
328
+ ```
329
+
330
+ ### Resetting AI-WAF
331
+
332
+ The `aiwaf_reset` command provides **granular control** for clearing different types of data:
333
+
334
+ ```bash
335
+ # Clear everything (default - backward compatible)
336
+ python manage.py aiwaf_reset
337
+
338
+ # Clear everything without confirmation prompt
339
+ python manage.py aiwaf_reset --confirm
340
+
341
+ # 🆕 GRANULAR CONTROL - Clear specific data types
342
+ python manage.py aiwaf_reset --blacklist # Clear only blocked IPs
343
+ python manage.py aiwaf_reset --exemptions # Clear only exempted IPs
344
+ python manage.py aiwaf_reset --keywords # Clear only learned keywords
345
+
346
+ # 🔧 COMBINE OPTIONS - Mix and match as needed
347
+ python manage.py aiwaf_reset --blacklist --keywords # Keep exemptions
348
+ python manage.py aiwaf_reset --exemptions --keywords # Keep blacklist
349
+ python manage.py aiwaf_reset --blacklist --exemptions # Keep keywords
350
+
351
+ # 🚀 COMMON USE CASES
352
+ # Fix false positive keywords (like "profile" blocking legitimate pages)
353
+ python manage.py aiwaf_reset --keywords --confirm
354
+ python manage.py detect_and_train # Retrain with enhanced filtering
355
+
356
+ # Clear blocked IPs but preserve exemptions and learning
357
+ python manage.py aiwaf_reset --blacklist --confirm
358
+
359
+ # Legacy support (still works for backward compatibility)
360
+ python manage.py aiwaf_reset --blacklist-only # Legacy: blacklist only
361
+ python manage.py aiwaf_reset --exemptions-only # Legacy: exemptions only
362
+ ```
363
+
364
+ **Enhanced Feedback:**
365
+ ```bash
366
+ $ python manage.py aiwaf_reset --keywords
367
+ 🔧 AI-WAF Reset: Clear 15 learned keywords
368
+ Are you sure you want to proceed? [y/N]: y
369
+ ✅ Reset complete: Deleted 15 learned keywords
370
+ ```
371
+
372
+ This will ensure the IP is never blocked by AI‑WAF. You can also manage exemptions via the Django admin interface.
373
+
374
+ - **Daily Retraining**
375
+ Reads rotated logs, auto‑blocks 404 floods, retrains the IsolationForest, updates `model.pkl`, and evolves the keyword DB.
376
+ If GeoIP is enabled, it also prints a country summary for anomalous IPs.
377
+
378
+ ---
379
+
380
+ ## ⚙️ Configuration (`settings.py`)
381
+
382
+ ```python
383
+ INSTALLED_APPS += ["aiwaf"]
384
+ ```
385
+
386
+ ### Database Setup
387
+
388
+ After adding `aiwaf` to your `INSTALLED_APPS`, run the following to create the necessary tables:
389
+
390
+ ```bash
391
+ python manage.py makemigrations aiwaf
392
+ python manage.py migrate
393
+ ```
394
+
395
+ ---
396
+
397
+ ### Required
398
+
399
+ ```python
400
+ AIWAF_ACCESS_LOG = "/var/log/nginx/access.log"
401
+ ```
402
+
403
+ ---
404
+
405
+ ### Database Models
406
+
407
+ AI-WAF uses Django models for real-time, high-performance storage:
408
+
409
+ ```python
410
+ # All data is stored in Django models - no configuration needed
411
+ # Tables created automatically with migrations:
412
+ # - aiwaf_blacklistentry # Blocked IP addresses
413
+ # - aiwaf_ipexemption # Exempt IP addresses
414
+ # - aiwaf_exemptpath # Exempt path prefixes
415
+ # - aiwaf_dynamickeyword # Dynamic keywords with counts
416
+ # - aiwaf_featuresample # Feature samples for ML training
417
+ # - aiwaf_requestlog # Request logs (if middleware logging enabled)
418
+ ```
419
+
420
+ **Benefits of Django Models:**
421
+ - ⚡ **Real-time performance** - No file I/O bottlenecks
422
+ - 🔄 **Instant updates** - Changes visible immediately across all processes
423
+ - 🚀 **Better concurrency** - No file locking issues
424
+ - 📊 **Rich querying** - Use Django ORM for complex operations
425
+ - 🔍 **Admin integration** - View/manage data through Django admin
426
+
427
+ **Database Setup:**
428
+ ```bash
429
+ # Create and apply migrations
430
+ python manage.py makemigrations aiwaf
431
+ python manage.py migrate aiwaf
432
+ ```
433
+
434
+ ---
435
+
436
+ ### Built-in Request Logger (Optional)
437
+
438
+ Enable AI-WAF's built-in request logger as a fallback when main access logs aren't available:
439
+
440
+ ```python
441
+ # Enable middleware logging
442
+ AIWAF_MIDDLEWARE_LOGGING = True # Enable/disable logging
443
+ ```
444
+
445
+ **Then add middleware to MIDDLEWARE list:**
446
+
447
+ ```python
448
+ MIDDLEWARE = [
449
+ # ... your existing middleware ...
450
+ 'aiwaf.middleware_logger.AIWAFLoggerMiddleware', # Add near the end
451
+ ]
452
+ ```
453
+
454
+ **Manage middleware logging:**
455
+
456
+ ```bash
457
+ python manage.py aiwaf_logging --status # Check logging status
458
+ python manage.py aiwaf_logging --enable # Show setup instructions
459
+ python manage.py aiwaf_logging --clear # Clear log files
460
+ ```
461
+
462
+ **Benefits:**
463
+ - **Automatic fallback** when `AIWAF_ACCESS_LOG` unavailable
464
+ - **Database storage** with precise timestamps and response times
465
+ - **Zero configuration** - trainer automatically detects and uses model logs
466
+ - **Lightweight** - fails silently to avoid breaking your application
467
+
468
+ ---
469
+
470
+ ### Optional (defaults shown)
471
+
472
+ ```python
473
+ AIWAF_MODEL_PATH = BASE_DIR / "aiwaf" / "resources" / "model.pkl"
474
+ AIWAF_MODEL_STORAGE = "file" # file | db | cache
475
+ AIWAF_MODEL_CACHE_KEY = "aiwaf:model"
476
+ AIWAF_MODEL_CACHE_TIMEOUT = None # seconds; None for no expiry
477
+ AIWAF_MODEL_STORAGE_FALLBACK = True # fallback to file when db/cache unavailable
478
+ AIWAF_MIN_FORM_TIME = 1.0 # minimum seconds between GET and POST
479
+ AIWAF_MAX_PAGE_TIME = 240 # maximum page age before requiring reload (4 minutes)
480
+ AIWAF_AI_CONTAMINATION = 0.05 # AI anomaly detection sensitivity (5%)
481
+ AIWAF_MIN_AI_LOGS = 10000 # minimum log lines for AI training
482
+ AIWAF_MIN_TRAIN_LOGS = 50 # minimum log lines for keyword training
483
+ AIWAF_FORCE_AI_TRAINING = False # override AIWAF_MIN_AI_LOGS gate
484
+ AIWAF_RATE_WINDOW = 10 # seconds
485
+ AIWAF_RATE_MAX = 20 # max requests per window
486
+ AIWAF_RATE_FLOOD = 10 # flood threshold
487
+ AIWAF_WINDOW_SECONDS = 60 # anomaly detection window
488
+ AIWAF_FILE_EXTENSIONS = [".php", ".asp", ".jsp"]
489
+
490
+ # Geo-blocking (optional, requires aiwaf[geoblock])
491
+ AIWAF_GEO_BLOCK_ENABLED = False
492
+ AIWAF_GEOIP_DB_PATH = "aiwaf/geolock/ipinfo_lite.mmdb"
493
+ AIWAF_GEO_BLOCK_COUNTRIES = ["CN", "RU"]
494
+ AIWAF_GEO_ALLOW_COUNTRIES = [] # If set, only these countries are allowed
495
+ AIWAF_GEO_CACHE_SECONDS = 3600
496
+ AIWAF_GEO_CACHE_PREFIX = "aiwaf:geo:"
497
+ AIWAF_EXEMPT_PATHS = [ # optional but highly recommended
498
+ "/favicon.ico",
499
+ "/robots.txt",
500
+ "/static/",
501
+ "/media/",
502
+ "/health/",
503
+ ]
504
+
505
+ # 🆕 ENHANCED KEYWORD FILTERING OPTIONS
506
+ AIWAF_ALLOWED_PATH_KEYWORDS = [ # Keywords allowed in legitimate paths
507
+ "profile", "user", "account", "settings", "dashboard",
508
+ "admin", "api", "auth", "search", "contact", "about",
509
+ # Add your site-specific legitimate keywords
510
+ "buddycraft", "sc2", "starcraft", # Example: gaming site keywords
511
+ ]
512
+
513
+ AIWAF_EXEMPT_KEYWORDS = [ # Keywords that never trigger blocking
514
+ "api", "webhook", "health", "static", "media",
515
+ "upload", "download", "backup", "profile"
516
+ ]
517
+
518
+ AIWAF_DYNAMIC_TOP_N = 10 # Number of dynamic keywords to learn (default: 10)
519
+ ```
520
+
521
+ > **Note:** You no longer need to define `AIWAF_MALICIOUS_KEYWORDS` or `AIWAF_STATUS_CODES` — they evolve dynamically.
522
+
523
+ **Model storage options:**
524
+ - `file` (default) writes to `AIWAF_MODEL_PATH`
525
+ - `db` stores the model in the `AIModelArtifact` table (run migrations)
526
+ - `cache` stores the model in your Django cache backend
527
+
528
+ ### Installation Modes
529
+
530
+ Full install (default) includes AI training and GeoIP support:
531
+
532
+ ```bash
533
+ pip install aiwaf
534
+ ```
535
+
536
+ Light install (manual deps only):
537
+
538
+ ```bash
539
+ pip install aiwaf --no-deps
540
+ pip install "Django>=3.2" "requests>=2.25.0"
541
+ ```
542
+
543
+ Geo-blocking uses the bundled `.mmdb` file by default. Set `AIWAF_GEOIP_DB_PATH` to override.
544
+
545
+ **GeoBlock Middleware:**
546
+ Enable the middleware and the feature flag:
547
+
548
+ ```python
549
+ AIWAF_GEO_BLOCK_ENABLED = True
550
+ ```
551
+
552
+ ```python
553
+ MIDDLEWARE = [
554
+ "aiwaf.middleware.GeoBlockMiddleware",
555
+ # ... other AI-WAF middleware ...
556
+ ]
557
+ ```
558
+
559
+ ### Acknowledgements
560
+
561
+ Geo-blocking functionality in AIWAF relies on the IPinfo MMDB for IP-to-country mapping.
562
+ Thanks to IPinfo for providing a reliable GeoIP database.
563
+
564
+ **Dynamic country blocking (database-backed):**
565
+
566
+ ```bash
567
+ python manage.py geo_block_country list
568
+ python manage.py geo_block_country add US
569
+ python manage.py geo_block_country remove US
570
+ ```
571
+
572
+ ### Path-Specific Rules
573
+
574
+ Use path rules to selectively disable middleware or override settings without
575
+ full exemptions:
576
+
577
+ ```python
578
+ AIWAF_SETTINGS = {
579
+ "PATH_RULES": [
580
+ {
581
+ "PREFIX": "/myapp/api/",
582
+ "DISABLE": ["HeaderValidationMiddleware"],
583
+ "RATE_LIMIT": {"WINDOW": 60, "MAX": 2000},
584
+ },
585
+ {
586
+ "PREFIX": "/myapp/",
587
+ "RATE_LIMIT": {"WINDOW": 60, "MAX": 200},
588
+ },
589
+ ]
590
+ }
591
+ ```
592
+
593
+ Each middleware checks `request.path`, computes the effective policy, then
594
+ applies or skips accordingly.
595
+
596
+ Define `PATH_RULES` in your Django settings file (e.g. `settings.py`) under
597
+ `AIWAF_SETTINGS`.
598
+
599
+ ### Legacy `AIWAF_SETTINGS` Compatibility
600
+
601
+ If you already use the nested `AIWAF_SETTINGS` dict, AI-WAF will map common keys into the flat `AIWAF_*` settings at startup (without overriding explicit `AIWAF_*` values). Supported mappings include `RATE_LIMITING`, `EXEMPTIONS.PATHS`, `IP_BLOCKING.ENABLED`, `KEYWORD_DETECTION` (custom patterns + sensitivity), and `LOGGING.ENABLED`.
602
+
603
+ ---
604
+
605
+ ## 🧱 Middleware Setup
606
+
607
+ Add in **this** order to your `MIDDLEWARE` list:
608
+
609
+ ```python
610
+ MIDDLEWARE = [
611
+ "aiwaf.middleware.GeoBlockMiddleware",
612
+ "aiwaf.middleware.IPAndKeywordBlockMiddleware",
613
+ "aiwaf.middleware.RateLimitMiddleware",
614
+ "aiwaf.middleware.AIAnomalyMiddleware",
615
+ "aiwaf.middleware.HoneypotTimingMiddleware",
616
+ "aiwaf.middleware.UUIDTamperMiddleware",
617
+ # ... other middleware ...
618
+ "aiwaf.middleware_logger.AIWAFLoggerMiddleware", # Optional: Add if using built-in logger
619
+ ]
620
+ ```
621
+
622
+ > **⚠️ Order matters!** AI-WAF protection middleware should come early. The logger middleware should come near the end to capture final response data.
623
+
624
+ ### **Troubleshooting Middleware Errors**
625
+
626
+ **Error: `Module "aiwaf.middleware" does not define a "UUIDTamperMiddleware" attribute/class`**
627
+
628
+ **Solutions:**
629
+ 1. **Update AI-WAF to latest version:**
630
+ ```bash
631
+ pip install --upgrade aiwaf
632
+ ```
633
+
634
+ 2. **Run diagnostic commands:**
635
+ ```bash
636
+ # Quick debug script (from AI-WAF directory)
637
+ python debug_aiwaf.py
638
+
639
+ # Django management command
640
+ python manage.py aiwaf_diagnose
641
+ ```
642
+
643
+ 3. **Check available middleware classes:**
644
+ ```python
645
+ # In Django shell: python manage.py shell
646
+ import aiwaf.middleware
647
+ print(dir(aiwaf.middleware))
648
+ ```
649
+
650
+ 4. **Verify AI-WAF is in INSTALLED_APPS:**
651
+ ```python
652
+ # In settings.py
653
+ INSTALLED_APPS = [
654
+ # ... other apps ...
655
+ 'aiwaf', # Must be included
656
+ ]
657
+ ```
658
+
659
+ 5. **Use minimal middleware setup if needed:**
660
+ ```python
661
+ MIDDLEWARE = [
662
+ # ... your existing middleware ...
663
+ "aiwaf.middleware.IPAndKeywordBlockMiddleware", # Core protection
664
+ "aiwaf.middleware.RateLimitMiddleware", # Rate limiting
665
+ "aiwaf.middleware.AIAnomalyMiddleware", # AI detection
666
+ ]
667
+ ```
668
+
669
+ **Common Issues:**
670
+ - **AppRegistryNotReady Error**: Fixed in v0.1.9.0.1 - update with `pip install --upgrade aiwaf`
671
+ - **Scikit-learn Version Warnings**: Fixed in v0.1.9.0.3 - regenerate model with `python manage.py regenerate_model`
672
+ - Missing Django: `pip install Django`
673
+ - Old AI-WAF version: `pip install --upgrade aiwaf`
674
+ - Missing migrations: `python manage.py migrate`
675
+ - Import errors: Check `INSTALLED_APPS` includes `'aiwaf'`
676
+
677
+
678
+ ---
679
+
680
+ ## Running Detection & Training
681
+
682
+ ```bash
683
+ python manage.py detect_and_train
684
+ ```
685
+
686
+ ### What happens:
687
+ 1. Read access logs (incl. rotated or gzipped) **OR** AI-WAF middleware model logs
688
+ 2. Auto‑block IPs with ≥ 6 total 404s
689
+ 3. Extract features & train IsolationForest
690
+ 4. Save `model.pkl` with current scikit-learn version
691
+
692
+ ### Model Regeneration
693
+
694
+ If you see scikit-learn version warnings, regenerate the model:
695
+
696
+ ```bash
697
+ # Quick model regeneration (recommended)
698
+ python manage.py regenerate_model
699
+
700
+ # Full retraining with fresh data
701
+ python manage.py detect_and_train
702
+ ```
703
+
704
+ **Benefits:**
705
+ - ✅ Eliminates version compatibility warnings
706
+ - ✅ Uses current scikit-learn optimizations
707
+ - ✅ Maintains same protection level
708
+ 4. Save `model.pkl`
709
+ 5. Extract top 10 dynamic keywords from 4xx/5xx
710
+ 6. Remove any keywords associated with newly exempt paths
711
+
712
+ **Note:** If main access log (`AIWAF_ACCESS_LOG`) is unavailable, trainer automatically falls back to AI-WAF middleware model logs.
713
+
714
+ ---
715
+
716
+ ## 🧠 How It Works
717
+ ```
718
+
719
+ ---
720
+
721
+ ## Running Detection & Training
722
+
723
+ ```bash
724
+ python manage.py detect_and_train
725
+ ```
726
+
727
+ ### What happens:
728
+ 1. Read access logs (incl. rotated or gzipped)
729
+ 2. Auto‑block IPs with ≥ 6 total 404s
730
+ 3. Extract features & train IsolationForest
731
+ 4. Save `model.pkl`
732
+ 5. Extract top 10 dynamic keywords from 4xx/5xx
733
+ 6. Remove any keywords associated with newly exempt paths
734
+
735
+ ---
736
+
737
+ ## 🔧 Troubleshooting
738
+
739
+ ### Legitimate Pages Being Blocked
740
+
741
+ **Problem**: Users can't access legitimate pages like `/en/profile/` due to keyword blocking.
742
+
743
+ **Cause**: AIWAF learned legitimate keywords (like "profile") as suspicious from previous traffic.
744
+
745
+ **Solution**:
746
+ ```bash
747
+ # 1. Clear problematic learned keywords
748
+ python manage.py aiwaf_reset --keywords --confirm
749
+
750
+ # 2. Add legitimate keywords to settings
751
+ # In settings.py:
752
+ AIWAF_ALLOWED_PATH_KEYWORDS = [
753
+ "profile", "user", "account", "dashboard",
754
+ # Add your site-specific keywords
755
+ ]
756
+
757
+ # 3. Retrain with enhanced filtering (won't learn legitimate keywords)
758
+ python manage.py detect_and_train
759
+
760
+ # 4. Test - legitimate pages should now work!
761
+ ```
762
+
763
+ ### Preventing Future False Positives
764
+
765
+ Configure AIWAF to recognize your site's legitimate keywords:
766
+
767
+ ```python
768
+ # settings.py
769
+ AIWAF_ALLOWED_PATH_KEYWORDS = [
770
+ # Common legitimate keywords
771
+ "profile", "user", "account", "settings", "dashboard",
772
+ "admin", "search", "contact", "about", "help",
773
+
774
+ # Your site-specific keywords
775
+ "buddycraft", "sc2", "starcraft", # Gaming site example
776
+ "shop", "cart", "checkout", # E-commerce example
777
+ "blog", "article", "news", # Content site example
778
+ ]
779
+ ```
780
+
781
+ ### Reset Command Options
782
+
783
+ ```bash
784
+ # Clear everything (safest for troubleshooting)
785
+ python manage.py aiwaf_reset --confirm
786
+
787
+ # Clear only problematic keywords
788
+ python manage.py aiwaf_reset --keywords --confirm
789
+
790
+ # Clear blocked IPs but keep exemptions
791
+ python manage.py aiwaf_reset --blacklist --confirm
792
+ ```
793
+
794
+ ---
795
+
796
+ ## 🧠 How It Works
797
+
798
+ | Middleware | Purpose |
799
+ |------------------------------------|-----------------------------------------------------------------|
800
+ | GeoBlockMiddleware | Blocks traffic by country based on GeoIP database |
801
+ | IPAndKeywordBlockMiddleware | Blocks requests from known blacklisted IPs and Keywords |
802
+ | RateLimitMiddleware | Enforces burst & flood thresholds |
803
+ | AIAnomalyMiddleware | ML‑driven behavior analysis + block on anomaly |
804
+ | HoneypotTimingMiddleware | Enhanced bot detection: GET→POST timing, POST validation, page timeouts |
805
+ | UUIDTamperMiddleware | Blocks guessed/nonexistent UUIDs across all models in an app |
806
+ | HeaderValidationMiddleware | Blocks suspicious header patterns and low‑quality user agents |
807
+ | AIWAFLoggerMiddleware | Optional request logger for model training and analysis |
808
+
809
+ ### 🍯 Enhanced Honeypot Protection
810
+
811
+ The **HoneypotTimingMiddleware** now includes advanced bot detection capabilities:
812
+
813
+ #### 🚫 Smart POST Request Validation
814
+ - **Analyzes Django views** to determine actual allowed HTTP methods
815
+ - **Intelligent detection** of GET-only vs POST-capable views
816
+ - **Example**: `POST` to view with `http_method_names = ['get']` → `403 Blocked`
817
+
818
+ #### ⏰ Page Timeout with Smart Reload
819
+ - **4-minute page expiration** prevents stale session attacks
820
+ - **HTTP 409 response** with reload instructions instead of immediate blocking
821
+ - **CSRF protection** by forcing fresh page loads for old sessions
822
+
823
+ ```python
824
+ # Configuration
825
+ AIWAF_MIN_FORM_TIME = 1.0 # Minimum form submission time
826
+ AIWAF_MAX_PAGE_TIME = 240 # Page timeout (4 minutes)
827
+ ```
828
+
829
+ **Timeline Example**:
830
+ ```
831
+ 12:00:00 - GET /contact/ ✅ Page loaded
832
+ 12:02:00 - POST /contact/ ✅ Valid submission (2 minutes)
833
+ 12:04:30 - POST /contact/ ❌ 409 Conflict (page expired, reload required)
834
+ ```
835
+
836
+ ---
837
+
838
+ ## Sponsors
839
+
840
+ This project is proudly supported by:
841
+
842
+ <a href="https://www.digitalocean.com/">
843
+ <img src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/SVG/DO_Logo_horizontal_blue.svg" width="201px">
844
+ </a>
845
+
846
+ [DigitalOcean](https://www.digitalocean.com/) provides the cloud infrastructure that powers AIWAF development.
847
+
848
+ ---
849
+
850
+ ## License
851
+
852
+ This project is licensed under the **MIT License**. See the [LICENSE](LICENSE) file for details.
853
+
854
+ ---
855
+