aiwaf-js 0.0.4 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +6 -0
- package/.github/workflows/node.js.yml +1 -1
- package/.github/workflows/npm-publish.yml +2 -1
- package/INSTALLATION.md +144 -0
- package/README.md +287 -102
- package/aiwaf.sqlite +0 -0
- package/bin/aiwaf.js +172 -0
- package/examples/sandbox/README.md +53 -0
- package/examples/sandbox/aiwaf-proxy/Dockerfile +21 -0
- package/examples/sandbox/aiwaf-proxy/package.json +15 -0
- package/examples/sandbox/aiwaf-proxy/server.js +44 -0
- package/examples/sandbox/attack-suite.js +293 -0
- package/examples/sandbox/compare-results.js +86 -0
- package/examples/sandbox/docker-compose.yml +27 -0
- package/examples/sandbox/run-and-compare.js +91 -0
- package/geolock/ipinfo_lite.mmdb +0 -0
- package/lib/anomalyDetector.js +207 -15
- package/lib/blacklistManager.js +129 -5
- package/lib/csvStore.js +84 -0
- package/lib/dynamicKeyword.js +20 -4
- package/lib/dynamicKeywordStore.js +169 -0
- package/lib/exemptionStore.js +186 -0
- package/lib/exemptions.js +56 -0
- package/lib/featureUtils.js +136 -22
- package/lib/geoBlocker.js +157 -0
- package/lib/geoStore.js +111 -0
- package/lib/headerValidation.js +250 -0
- package/lib/honeypotDetector.js +90 -4
- package/lib/middlewareLogger.js +123 -0
- package/lib/modelStore.js +129 -0
- package/lib/rateLimiter.js +65 -10
- package/lib/redisClient.js +8 -3
- package/lib/requestLogStore.js +167 -0
- package/lib/settingsCompat.js +107 -0
- package/lib/uuidDetector.js +34 -9
- package/lib/wafMiddleware.js +134 -42
- package/package.json +24 -4
- package/resources/model.json +9536 -1
- package/test/anomaly-detector.test.js +36 -0
- package/test/cli.test.js +125 -0
- package/test/csv-fallback.test.js +165 -0
- package/test/dynamic-keyword-integration.test.js +24 -0
- package/test/dynamic-keyword-store.test.js +78 -0
- package/test/exemptions-db.test.js +38 -0
- package/test/geo-mmdb.test.js +77 -0
- package/test/header-validation.test.js +66 -0
- package/test/honeypot-detector.test.js +42 -0
- package/test/isolation-forest.test.js +38 -0
- package/test/middleware-behavior.test.js +75 -0
- package/test/model-store-db.test.js +22 -0
- package/test/model-store.test.js +31 -0
- package/test/redis-client.test.js +35 -0
- package/test/settingsCompat.test.js +95 -0
- package/test/train.test.js +137 -0
- package/test/uuid-detector.test.js +20 -0
- package/test/waf.test.js +183 -11
- package/test-anomaly.js +77 -0
- package/test-complete-waf.js +147 -0
- package/test-simple.js +79 -0
- package/train.js +523 -87
package/.dockerignore
ADDED
|
@@ -8,6 +8,7 @@ on:
|
|
|
8
8
|
jobs:
|
|
9
9
|
build-and-publish:
|
|
10
10
|
runs-on: ubuntu-latest
|
|
11
|
+
environment: npm-publish
|
|
11
12
|
|
|
12
13
|
steps:
|
|
13
14
|
- uses: actions/checkout@v4
|
|
@@ -19,7 +20,7 @@ jobs:
|
|
|
19
20
|
node-version: 20
|
|
20
21
|
registry-url: https://registry.npmjs.org/
|
|
21
22
|
|
|
22
|
-
- run: npm
|
|
23
|
+
- run: npm install
|
|
23
24
|
- run: npm test
|
|
24
25
|
- run: npm publish --access public
|
|
25
26
|
env:
|
package/INSTALLATION.md
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# AIWAF-JS Installation Guide
|
|
2
|
+
|
|
3
|
+
This guide covers local setup, Redis setup, training setup, and common failure modes.
|
|
4
|
+
|
|
5
|
+
## Prerequisites
|
|
6
|
+
|
|
7
|
+
- Node.js 18+ recommended
|
|
8
|
+
- npm 9+ recommended
|
|
9
|
+
- Build tooling required by `sqlite3` (platform dependent)
|
|
10
|
+
|
|
11
|
+
## 1. Install Package
|
|
12
|
+
|
|
13
|
+
For application use:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
npm install aiwaf-js
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
For local development in this repository:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
npm install
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## 2. Basic Integration (Express)
|
|
26
|
+
|
|
27
|
+
```js
|
|
28
|
+
const express = require('express');
|
|
29
|
+
const aiwaf = require('aiwaf-js');
|
|
30
|
+
|
|
31
|
+
const app = express();
|
|
32
|
+
app.use(express.json());
|
|
33
|
+
|
|
34
|
+
app.use(aiwaf({
|
|
35
|
+
staticKeywords: ['.php', '.env', '.git'],
|
|
36
|
+
dynamicTopN: 10,
|
|
37
|
+
WINDOW_SEC: 10,
|
|
38
|
+
MAX_REQ: 20,
|
|
39
|
+
FLOOD_REQ: 40,
|
|
40
|
+
HONEYPOT_FIELD: 'hp_field',
|
|
41
|
+
uuidRoutePrefix: '/user'
|
|
42
|
+
}));
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## 3. Optional Redis Setup
|
|
46
|
+
|
|
47
|
+
Set `REDIS_URL` (or `AIWAF_REDIS_URL`) before app startup:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
export REDIS_URL=redis://localhost:6379
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
PowerShell:
|
|
54
|
+
|
|
55
|
+
```powershell
|
|
56
|
+
$env:REDIS_URL = 'redis://localhost:6379'
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
If Redis is not configured or not reachable, AIWAF-JS falls back to in-memory behavior.
|
|
60
|
+
|
|
61
|
+
## 3.1 Optional GeoIP MMDB Setup
|
|
62
|
+
|
|
63
|
+
Install MMDB reader:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
npm install maxmind
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Place your database at `geolock/ipinfo_lite.mmdb` or set:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
export AIWAF_GEO_MMDB_PATH=/absolute/path/to/ipinfo_lite.mmdb
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## 4. Train a Model from Logs
|
|
76
|
+
|
|
77
|
+
By default, trainer reads `/var/log/nginx/access.log`. Override as needed:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
NODE_LOG_PATH=/path/to/access.log npm run train
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Include rotated logs:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
NODE_LOG_GLOB='/path/to/access.log.*' npm run train
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Output model artifact:
|
|
90
|
+
|
|
91
|
+
- `resources/model.json`
|
|
92
|
+
|
|
93
|
+
## 5. Verify Installation
|
|
94
|
+
|
|
95
|
+
Run tests:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
npm test
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Check CLI wiring:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
npm run aiwaf -- help
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Run a minimal app and hit a known benign route (`/`) and a suspicious route (for example path with `.php`) to validate block behavior.
|
|
108
|
+
|
|
109
|
+
## 6. Troubleshooting
|
|
110
|
+
|
|
111
|
+
### `Failed to load pretrained model`
|
|
112
|
+
|
|
113
|
+
- Run `npm run train` to generate `resources/model.json`.
|
|
114
|
+
- Ensure process has read access to the `resources/` directory.
|
|
115
|
+
|
|
116
|
+
### Redis warnings or connection failures
|
|
117
|
+
|
|
118
|
+
- Verify `REDIS_URL` value and Redis server health.
|
|
119
|
+
- Runtime is designed to continue with fallback behavior.
|
|
120
|
+
|
|
121
|
+
### SQLite errors (`blocked_ips` table not found)
|
|
122
|
+
|
|
123
|
+
- Ensure process can create/write `./aiwaf.sqlite`.
|
|
124
|
+
- `blacklistManager` auto-initializes the table, but write permissions are required.
|
|
125
|
+
- If DB logging is unavailable, enable CSV middleware logs:
|
|
126
|
+
- `AIWAF_MIDDLEWARE_LOG_CSV=true`
|
|
127
|
+
- `AIWAF_MIDDLEWARE_LOG_CSV_PATH=logs/aiwaf-requests.csv`
|
|
128
|
+
- Core tables also fall back automatically to CSV files in `logs/storage/` when DB operations fail.
|
|
129
|
+
|
|
130
|
+
### `sqlite3` install/build issues
|
|
131
|
+
|
|
132
|
+
- Install platform-native build dependencies and reinstall packages.
|
|
133
|
+
- On CI/container images, ensure compiler toolchain is present.
|
|
134
|
+
|
|
135
|
+
### Training finds no logs
|
|
136
|
+
|
|
137
|
+
- Confirm `NODE_LOG_PATH` exists and is readable.
|
|
138
|
+
- If using rotation, set `NODE_LOG_GLOB` to a valid glob.
|
|
139
|
+
|
|
140
|
+
## 7. Production Notes
|
|
141
|
+
|
|
142
|
+
- Prefer Redis or a custom shared cache backend for multi-instance deployments.
|
|
143
|
+
- Place middleware after body parsing middleware if honeypot detection is required.
|
|
144
|
+
- Review rate limits and thresholds against real traffic profiles before broad rollout.
|
package/README.md
CHANGED
|
@@ -1,139 +1,324 @@
|
|
|
1
|
-
# aiwaf
|
|
1
|
+
# aiwaf-js
|
|
2
|
+
|
|
3
|
+
AIWAF-JS is a Node.js/Express Web Application Firewall that combines deterministic protections with anomaly detection and continuous learning. It ships as middleware, a CLI for ops workflows, and an offline trainer for IsolationForest models.
|
|
4
|
+
|
|
5
|
+
## What It Does
|
|
6
|
+
|
|
7
|
+
- Blocks known bad traffic with static keyword rules and IP blacklisting
|
|
8
|
+
- Enforces rate limits with flood detection
|
|
9
|
+
- Detects bot-like form abuse using honeypot field checks and timing gates
|
|
10
|
+
- Enforces optional method policies (405) and suspicious method usage
|
|
11
|
+
- Blocks suspicious UUID probing on route prefixes (with optional existence resolver)
|
|
12
|
+
- Learns high-frequency malicious segments as dynamic suspicious keywords
|
|
13
|
+
- Runs IsolationForest anomaly checks with recent-behavior analysis
|
|
14
|
+
- Supports Redis/custom cache backends with memory fallback
|
|
15
|
+
- Optional GeoIP blocking (MMDB) with allow/block lists and dynamic blocklist
|
|
16
|
+
- CSV fallback storage when DB is unavailable
|
|
17
|
+
- Operational CLI for blacklist, exemptions, geo, request logs, training and diagnostics
|
|
18
|
+
|
|
19
|
+
## Repository Layout
|
|
20
|
+
|
|
21
|
+
- `index.js`: package entrypoint
|
|
22
|
+
- `lib/wafMiddleware.js`: main middleware orchestration
|
|
23
|
+
- `lib/rateLimiter.js`: rate-window and flood logic
|
|
24
|
+
- `lib/blacklistManager.js`: blocked IP persistence and operations
|
|
25
|
+
- `lib/keywordDetector.js`: static keyword checks
|
|
26
|
+
- `lib/dynamicKeyword.js`: in-memory dynamic keyword learning/checking
|
|
27
|
+
- `lib/uuidDetector.js`: UUID tamper detection
|
|
28
|
+
- `lib/honeypotDetector.js`: honeypot trap detection
|
|
29
|
+
- `lib/anomalyDetector.js`: pretrained model loading and anomaly scoring
|
|
30
|
+
- `lib/featureUtils.js`: request feature extraction and short-lived caching
|
|
31
|
+
- `lib/isolationForest.js`: IsolationForest implementation
|
|
32
|
+
- `lib/redisClient.js`: optional Redis client lifecycle
|
|
33
|
+
- `lib/headerValidation.js`: header caps, suspicious UA, and header quality scoring
|
|
34
|
+
- `lib/geoBlocker.js`: GeoIP allow/block checks + MMDB lookup + cache
|
|
35
|
+
- `lib/middlewareLogger.js`: JSONL/CSV/DB request logging
|
|
36
|
+
- `lib/*Store.js`: DB/CSV storage adapters (blacklist, exemptions, geo, logs, keywords, models)
|
|
37
|
+
- `train.js`: offline model training from access logs
|
|
38
|
+
- `resources/model.json`: pretrained anomaly model artifact
|
|
39
|
+
- `utils/db.js`: SQLite connection (memory DB in test)
|
|
40
|
+
- `test/`: Jest test suite
|
|
41
|
+
|
|
42
|
+
## Request Processing Flow
|
|
43
|
+
|
|
44
|
+
1. Initialize module options for rate limiter, keyword detectors, honeypot, UUID checks, and anomaly detector.
|
|
45
|
+
2. Resolve client IP (`x-forwarded-for` first, then `req.ip`) and normalized path.
|
|
46
|
+
3. Enforce optional method policy (405) if enabled.
|
|
47
|
+
4. Block immediately if IP is already in blacklist.
|
|
48
|
+
5. Header validation (required headers, suspicious UA, header caps, quality score).
|
|
49
|
+
6. Geo checks (allow/block lists + DB-backed blocklist).
|
|
50
|
+
7. Honeypot field + timing checks.
|
|
51
|
+
8. Rate-limit + flood handling.
|
|
52
|
+
9. Static keyword blocking.
|
|
53
|
+
10. Dynamic keyword blocking.
|
|
54
|
+
11. UUID tamper checks (optional existence resolver).
|
|
55
|
+
12. Anomaly detection for unknown routes with recent-behavior analysis.
|
|
56
|
+
13. Request logging (JSONL/CSV/DB) and optional dynamic keyword learning on 404s.
|
|
57
|
+
14. Allow request through `next()` when no rule triggers.
|
|
2
58
|
|
|
3
|
-
|
|
4
|
-
> Self‑learning, plug‑and‑play WAF with rate‑limiting, static & dynamic keyword blocking, honeypot traps, UUID‑tamper protection, and IsolationForest anomaly detection—fully configurable and trainable on your own access logs. Now Redis‑powered and ready for distributed, multiprocess use.
|
|
5
|
-
|
|
6
|
-
[](https://www.npmjs.com/package/aiwaf-js)
|
|
7
|
-
[](https://github.com/your-user/aiwaf-js/actions)
|
|
8
|
-
[](LICENSE)
|
|
59
|
+
## Installation
|
|
9
60
|
|
|
10
|
-
|
|
61
|
+
```bash
|
|
62
|
+
npm install aiwaf-js
|
|
63
|
+
```
|
|
11
64
|
|
|
12
|
-
##
|
|
65
|
+
## Quick Start
|
|
13
66
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
- ✅ Honeypot Field Detection
|
|
18
|
-
- ✅ UUID‑Tamper Protection
|
|
19
|
-
- ✅ Anomaly Detection (Isolation Forest)
|
|
20
|
-
- ✅ Redis Support for multiprocess environments
|
|
21
|
-
- ✅ Offline Training from access logs
|
|
22
|
-
- ✅ **Custom Cache Logic Support**
|
|
67
|
+
```js
|
|
68
|
+
const express = require('express');
|
|
69
|
+
const aiwaf = require('aiwaf-js');
|
|
23
70
|
|
|
24
|
-
|
|
71
|
+
const app = express();
|
|
72
|
+
app.use(express.json());
|
|
25
73
|
|
|
26
|
-
|
|
74
|
+
app.use(aiwaf({
|
|
75
|
+
staticKeywords: ['.php', '.env', '.git'],
|
|
76
|
+
dynamicTopN: 10,
|
|
77
|
+
WINDOW_SEC: 10,
|
|
78
|
+
MAX_REQ: 20,
|
|
79
|
+
FLOOD_REQ: 40,
|
|
80
|
+
HONEYPOT_FIELD: 'hp_field',
|
|
81
|
+
uuidRoutePrefix: '/user',
|
|
82
|
+
AIWAF_HEADER_VALIDATION: true,
|
|
83
|
+
AIWAF_METHOD_POLICY_ENABLED: true,
|
|
84
|
+
AIWAF_ALLOWED_METHODS: ['GET', 'POST', 'HEAD', 'OPTIONS']
|
|
85
|
+
}));
|
|
27
86
|
|
|
28
|
-
|
|
29
|
-
|
|
87
|
+
app.get('/', (req, res) => res.send('Protected'));
|
|
88
|
+
app.listen(3000);
|
|
30
89
|
```
|
|
31
90
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
## Train the Model (Optional but recommended)
|
|
91
|
+
## Configuration
|
|
35
92
|
|
|
36
|
-
|
|
93
|
+
### Core Controls
|
|
94
|
+
|
|
95
|
+
| Option | Default | Description |
|
|
96
|
+
|---|---|---|
|
|
97
|
+
| `staticKeywords` | `[]` | Substrings that trigger immediate block + blacklist |
|
|
98
|
+
| `dynamicTopN` or `DYNAMIC_TOP_N` | `10` | Frequency threshold for dynamic segment blocking |
|
|
99
|
+
| `WINDOW_SEC` | `60` | Time window for rate limiting |
|
|
100
|
+
| `MAX_REQ` | `100` | Max allowed requests in window before rate block |
|
|
101
|
+
| `FLOOD_REQ` | `200` | Hard threshold that blacklists IP |
|
|
102
|
+
| `HONEYPOT_FIELD` | `undefined` | Body field name used as bot trap |
|
|
103
|
+
| `uuidRoutePrefix` | `"/user"` | Path prefix monitored for UUID tamper attempts |
|
|
104
|
+
| `uuidResolver` | `undefined` | Optional async resolver `(uuid, req) => boolean` for existence checks |
|
|
105
|
+
| `cache` | fallback memory cache | Custom cache backend used by limiter/features |
|
|
106
|
+
| `nTrees` | `100` | IsolationForest trees when model is initialized in-process |
|
|
107
|
+
| `sampleSize` | `256` | IsolationForest sample size |
|
|
108
|
+
|
|
109
|
+
### Header Validation
|
|
110
|
+
|
|
111
|
+
| Option | Default | Description |
|
|
112
|
+
|---|---|---|
|
|
113
|
+
| `AIWAF_HEADER_VALIDATION` | `false` | Enable header validation pipeline |
|
|
114
|
+
| `AIWAF_REQUIRED_HEADERS` | `[]` | Required headers array, or `{ DEFAULT, GET, POST }` mapping |
|
|
115
|
+
| `AIWAF_HEADER_QUALITY_MIN_SCORE` | `3` | Minimum header quality score |
|
|
116
|
+
| `AIWAF_MAX_HEADER_BYTES` | `32768` | Max header bytes before blocking |
|
|
117
|
+
| `AIWAF_MAX_HEADER_COUNT` | `100` | Max header count before blocking |
|
|
118
|
+
| `AIWAF_MAX_USER_AGENT_LENGTH` | `500` | Max User-Agent length |
|
|
119
|
+
| `AIWAF_MAX_ACCEPT_LENGTH` | `4096` | Max Accept header length |
|
|
120
|
+
| `AIWAF_BLOCKED_USER_AGENTS` | list | Substring deny list |
|
|
121
|
+
| `AIWAF_SUSPICIOUS_USER_AGENTS` | regex list | Regex list for suspicious UA detection |
|
|
122
|
+
| `AIWAF_LEGITIMATE_BOTS` | regex list | Regex allow list for legitimate crawlers |
|
|
123
|
+
|
|
124
|
+
### Method Policy
|
|
125
|
+
|
|
126
|
+
| Option | Default | Description |
|
|
127
|
+
|---|---|---|
|
|
128
|
+
| `AIWAF_METHOD_POLICY_ENABLED` | `false` | Enforce method allowlist (returns 405) |
|
|
129
|
+
| `AIWAF_ALLOWED_METHODS` | `['GET','POST','HEAD','OPTIONS']` | Allowed methods when policy enabled |
|
|
130
|
+
| `AIWAF_POST_ONLY_SUFFIXES` | `['/create/','/submit/','/upload/','/delete/','/process/']` | GET to these triggers 405 when policy enabled |
|
|
131
|
+
| `AIWAF_LOGIN_PATH_PREFIXES` | common login paths | Shorten min form time for login |
|
|
132
|
+
|
|
133
|
+
### Keyword Learning
|
|
134
|
+
|
|
135
|
+
| Option | Default | Description |
|
|
136
|
+
|---|---|---|
|
|
137
|
+
| `AIWAF_ENABLE_KEYWORD_LEARNING` | `true` | Enable dynamic keyword learning |
|
|
138
|
+
| `AIWAF_DYNAMIC_TOP_N` | `10` | Dynamic keyword learning threshold |
|
|
139
|
+
| `AIWAF_EXEMPT_KEYWORDS` | `[]` | Skip these keywords |
|
|
140
|
+
| `AIWAF_ALLOWED_PATH_KEYWORDS` | `[]` | Allowlist of path fragments |
|
|
141
|
+
|
|
142
|
+
### Model / Training
|
|
143
|
+
|
|
144
|
+
| Option | Default | Description |
|
|
145
|
+
|---|---|---|
|
|
146
|
+
| `AIWAF_MIN_TRAIN_LOGS` | `50` | Minimum logs to run training |
|
|
147
|
+
| `AIWAF_MIN_AI_LOGS` | `10000` | Minimum logs to train AI model |
|
|
148
|
+
| `AIWAF_FORCE_AI_TRAINING` | `false` | Force AI training below minimum logs |
|
|
149
|
+
| `AIWAF_MODEL_STORAGE` | `file` | `file`, `db`, or `cache` |
|
|
150
|
+
| `AIWAF_MODEL_PATH` | `resources/model.json` | Model file path (file backend) |
|
|
151
|
+
| `AIWAF_MODEL_STORAGE_FALLBACK` | `file` | Fallback model backend |
|
|
152
|
+
| `AIWAF_MODEL_CACHE_KEY` | `aiwaf:model` | Cache key when using cache backend |
|
|
153
|
+
| `AIWAF_MODEL_CACHE_TTL` | `0` | Cache TTL in seconds |
|
|
154
|
+
|
|
155
|
+
### Geo Blocking
|
|
156
|
+
|
|
157
|
+
| Option | Default | Description |
|
|
158
|
+
|---|---|---|
|
|
159
|
+
| `AIWAF_GEO_BLOCK_ENABLED` | `false` | Enable geo blocking |
|
|
160
|
+
| `AIWAF_GEO_BLOCK_COUNTRIES` | `[]` | Block list (country codes) |
|
|
161
|
+
| `AIWAF_GEO_ALLOW_COUNTRIES` | `[]` | Allow list (country codes) |
|
|
162
|
+
| `AIWAF_GEO_MMDB_PATH` | `geolock/ipinfo_lite.mmdb` | MMDB path |
|
|
163
|
+
| `AIWAF_GEO_CACHE_SECONDS` | `3600` | Geo cache TTL |
|
|
164
|
+
| `AIWAF_GEO_CACHE_PREFIX` | `aiwaf:geo:` | Geo cache key prefix |
|
|
165
|
+
|
|
166
|
+
### Logging / Storage
|
|
167
|
+
|
|
168
|
+
| Option | Default | Description |
|
|
169
|
+
|---|---|---|
|
|
170
|
+
| `AIWAF_MIDDLEWARE_LOGGING` | `false` | Enable JSONL logging |
|
|
171
|
+
| `AIWAF_MIDDLEWARE_LOG_PATH` | `logs/aiwaf-requests.jsonl` | JSONL log path |
|
|
172
|
+
| `AIWAF_MIDDLEWARE_LOG_DB` | `false` | Store logs in DB |
|
|
173
|
+
| `AIWAF_MIDDLEWARE_LOG_CSV` | `false` | Store logs in CSV |
|
|
174
|
+
| `AIWAF_MIDDLEWARE_LOG_CSV_PATH` | `logs/aiwaf-requests.csv` | CSV log path |
|
|
175
|
+
| `AIWAF_BLOCKED_IPS_CSV_PATH` | `logs/storage/blocked_ips.csv` | CSV fallback for blocked IPs |
|
|
176
|
+
| `AIWAF_IP_EXEMPTIONS_CSV_PATH` | `logs/storage/ip_exemptions.csv` | CSV fallback for IP exemptions |
|
|
177
|
+
| `AIWAF_PATH_EXEMPTIONS_CSV_PATH` | `logs/storage/path_exemptions.csv` | CSV fallback for path exemptions |
|
|
178
|
+
| `AIWAF_GEO_BLOCKED_COUNTRIES_CSV_PATH` | `logs/storage/geo_blocked_countries.csv` | CSV fallback for geo blocklist |
|
|
179
|
+
| `AIWAF_REQUEST_LOGS_CSV_PATH` | `logs/storage/request_logs.csv` | CSV fallback for request logs |
|
|
180
|
+
| `AIWAF_DYNAMIC_KEYWORDS_CSV_PATH` | `logs/storage/dynamic_keywords.csv` | CSV fallback for dynamic keywords |
|
|
181
|
+
|
|
182
|
+
### Redis / Cache
|
|
183
|
+
## Redis and Cache Behavior
|
|
184
|
+
|
|
185
|
+
- Set `REDIS_URL` (or `AIWAF_REDIS_URL`) to enable Redis connectivity (`lib/redisClient.js`).
|
|
186
|
+
- If Redis is unavailable, runtime falls back to in-memory behavior.
|
|
187
|
+
- You can inject a custom cache object.
|
|
188
|
+
|
|
189
|
+
Rate limiter custom cache must implement:
|
|
190
|
+
|
|
191
|
+
- `lPush(key, value)`
|
|
192
|
+
- `expire(key, ttl)`
|
|
193
|
+
- `lLen(key)`
|
|
194
|
+
- `lRange(key, start, end)`
|
|
195
|
+
|
|
196
|
+
Feature cache custom backend supports:
|
|
197
|
+
|
|
198
|
+
- `get(key)`
|
|
199
|
+
- `set(key, value, ttl)`
|
|
200
|
+
|
|
201
|
+
## Geo Blocking (MMDB)
|
|
202
|
+
|
|
203
|
+
- Put your DB at `geolock/ipinfo_lite.mmdb` (default) or set `AIWAF_GEO_MMDB_PATH`.
|
|
204
|
+
- Enable with `AIWAF_GEO_BLOCK_ENABLED: true`.
|
|
205
|
+
- Configure `AIWAF_GEO_BLOCK_COUNTRIES` and/or `AIWAF_GEO_ALLOW_COUNTRIES`.
|
|
206
|
+
- Install MMDB reader dependency in your app:
|
|
207
|
+
- `npm install maxmind`
|
|
208
|
+
- If MMDB is unavailable, the middleware falls back to `x-country-code` header.
|
|
209
|
+
|
|
210
|
+
## Offline Training
|
|
211
|
+
|
|
212
|
+
Train a model using access logs:
|
|
37
213
|
|
|
38
214
|
```bash
|
|
39
|
-
|
|
215
|
+
AIWAF_ACCESS_LOG=/path/to/access.log npm run train
|
|
40
216
|
```
|
|
41
217
|
|
|
42
|
-
|
|
218
|
+
Optional rotated/gz support:
|
|
43
219
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
## Quick Start
|
|
47
|
-
|
|
48
|
-
```js
|
|
49
|
-
const express = require('express')
|
|
50
|
-
const aiwaf = require('aiwaf-js')
|
|
51
|
-
|
|
52
|
-
const app = express()
|
|
53
|
-
app.use(express.json())
|
|
54
|
-
app.use(aiwaf())
|
|
55
|
-
app.get('/', (req, res) => res.send('Protected'))
|
|
56
|
-
app.listen(3000)
|
|
220
|
+
```bash
|
|
221
|
+
NODE_LOG_GLOB='/path/to/access.log.*' npm run train
|
|
57
222
|
```
|
|
58
223
|
|
|
59
|
-
|
|
224
|
+
Training pipeline in `train.js`:
|
|
60
225
|
|
|
61
|
-
|
|
226
|
+
- Reads raw and rotated (including `.gz`) access logs
|
|
227
|
+
- Parses request fields (IP, URI, status, response time, timestamp)
|
|
228
|
+
- Builds feature vectors: `[pathLen, kwHits, statusIdx, responseTime, burst, total404]`
|
|
229
|
+
- Enforces `AIWAF_MIN_TRAIN_LOGS` and `AIWAF_MIN_AI_LOGS`
|
|
230
|
+
- Trains IsolationForest when log volume is sufficient
|
|
231
|
+
- Learns dynamic keywords from suspicious 4xx/5xx traffic
|
|
232
|
+
- Removes exempt keywords and unblocks exempt IPs
|
|
233
|
+
- Writes model artifact to `resources/model.json` with metadata
|
|
234
|
+
- Model storage backends:
|
|
235
|
+
- `AIWAF_MODEL_STORAGE`: `file` (default), `db`, `cache`
|
|
236
|
+
- `AIWAF_MODEL_PATH` (file backend)
|
|
237
|
+
- `AIWAF_MODEL_STORAGE_FALLBACK` (fallback backend)
|
|
238
|
+
- `AIWAF_MODEL_CACHE_KEY`, `AIWAF_MODEL_CACHE_TTL` (cache backend)
|
|
62
239
|
|
|
63
|
-
|
|
240
|
+
## Testing
|
|
64
241
|
|
|
65
242
|
```bash
|
|
66
|
-
|
|
67
|
-
export REDIS_URL=redis://localhost:6379
|
|
68
|
-
|
|
69
|
-
# On Windows PowerShell
|
|
70
|
-
$env:REDIS_URL = "redis://localhost:6379"
|
|
243
|
+
npm test
|
|
71
244
|
```
|
|
72
245
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
246
|
+
Current tests cover:
|
|
247
|
+
|
|
248
|
+
- Header validation (caps, suspicious UA, quality scoring)
|
|
249
|
+
- Method policy enforcement
|
|
250
|
+
- Geo blocking and MMDB lookup
|
|
251
|
+
- Honeypot timing policies
|
|
252
|
+
- UUID tamper detection (with resolver)
|
|
253
|
+
- Anomaly detection and recent-behavior analysis
|
|
254
|
+
- Dynamic keyword learning and trainer behaviors
|
|
255
|
+
- CSV/DB fallback storage
|
|
256
|
+
- CLI and settings compatibility
|
|
257
|
+
|
|
258
|
+
## Data and Persistence
|
|
259
|
+
|
|
260
|
+
- Runtime blacklist storage uses SQLite through `utils/db.js`.
|
|
261
|
+
- Production DB file defaults to `./aiwaf.sqlite`.
|
|
262
|
+
- Test environment uses in-memory SQLite (`NODE_ENV=test`).
|
|
263
|
+
- Primary blocked IP table: `blocked_ips`.
|
|
264
|
+
- Middleware logging supports JSONL, optional SQLite, and CSV fallback.
|
|
265
|
+
- CSV settings:
|
|
266
|
+
- `AIWAF_MIDDLEWARE_LOG_CSV`
|
|
267
|
+
- `AIWAF_MIDDLEWARE_LOG_CSV_PATH`
|
|
268
|
+
- Table storage CSV fallbacks are enabled automatically when DB operations fail:
|
|
269
|
+
- `blocked_ips` -> `logs/storage/blocked_ips.csv` (`AIWAF_BLOCKED_IPS_CSV_PATH`)
|
|
270
|
+
- `ip_exemptions` -> `logs/storage/ip_exemptions.csv` (`AIWAF_IP_EXEMPTIONS_CSV_PATH`)
|
|
271
|
+
- `path_exemptions` -> `logs/storage/path_exemptions.csv` (`AIWAF_PATH_EXEMPTIONS_CSV_PATH`)
|
|
272
|
+
- `geo_blocked_countries` -> `logs/storage/geo_blocked_countries.csv` (`AIWAF_GEO_BLOCKED_COUNTRIES_CSV_PATH`)
|
|
273
|
+
- `request_logs` -> `logs/storage/request_logs.csv` (`AIWAF_REQUEST_LOGS_CSV_PATH`)
|
|
274
|
+
- `dynamic_keywords` -> `logs/storage/dynamic_keywords.csv` (`AIWAF_DYNAMIC_KEYWORDS_CSV_PATH`)
|
|
275
|
+
|
|
276
|
+
## Operational Notes
|
|
277
|
+
|
|
278
|
+
- Middleware order matters; place AIWAF after body parsers if honeypot checks depend on parsed JSON/form body.
|
|
279
|
+
- If no trained model exists or loading fails, anomaly detector fails open.
|
|
280
|
+
- Dynamic keyword learning persists to DB/CSV via `dynamicKeywordStore`.
|
|
281
|
+
- Multi-instance deployments should use Redis/custom shared cache for limiter consistency.
|
|
282
|
+
|
|
283
|
+
## Development
|
|
78
284
|
|
|
79
|
-
|
|
285
|
+
```bash
|
|
286
|
+
npm install
|
|
287
|
+
npm test
|
|
288
|
+
npm run train
|
|
289
|
+
npm run aiwaf -- help
|
|
290
|
+
```
|
|
80
291
|
|
|
81
|
-
|
|
82
|
-
const myCustomCache = {
|
|
83
|
-
get: async (key) => { /* return cached value */ },
|
|
84
|
-
set: async (key, value, options) => { /* store with optional TTL */ },
|
|
85
|
-
del: async (key) => { /* delete entry */ }
|
|
86
|
-
}
|
|
292
|
+
## Operations CLI
|
|
87
293
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
294
|
+
```bash
|
|
295
|
+
npm run aiwaf -- list blacklist
|
|
296
|
+
npm run aiwaf -- list exemptions
|
|
297
|
+
npm run aiwaf -- add ip-exemption 203.0.113.10 "trusted monitor"
|
|
298
|
+
npm run aiwaf -- add path-exemption /health "health probes"
|
|
299
|
+
npm run aiwaf -- geo block CN "manual block"
|
|
300
|
+
npm run aiwaf -- geo summary
|
|
301
|
+
npm run aiwaf -- diagnose 203.0.113.10
|
|
96
302
|
```
|
|
97
303
|
|
|
98
|
-
|
|
304
|
+
## Sandbox (OWASP Juice Shop)
|
|
99
305
|
|
|
100
|
-
|
|
306
|
+
The repository includes a runnable sandbox that proxies OWASP Juice Shop behind AIWAF. It also includes an attack suite that generates comparable results for direct vs protected traffic.
|
|
101
307
|
|
|
102
|
-
|
|
308
|
+
Run sandbox:
|
|
103
309
|
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
staticKeywords: ['.php', '.env', '.git'],
|
|
107
|
-
dynamicTopN: 10,
|
|
108
|
-
WINDOW_SEC: 10,
|
|
109
|
-
MAX_REQ: 20,
|
|
110
|
-
FLOOD_REQ: 10,
|
|
111
|
-
HONEYPOT_FIELD: 'hp_field',
|
|
112
|
-
cache: myCustomCache, // optional custom cache injection
|
|
113
|
-
}));
|
|
310
|
+
```bash
|
|
311
|
+
docker compose -f examples/sandbox/docker-compose.yml up --build
|
|
114
312
|
```
|
|
115
313
|
|
|
116
|
-
|
|
117
|
-
|--------------------|---------------------|-----------------------------|----------------------------------------------------------|
|
|
118
|
-
| `staticKeywords` | — | [".php",".xmlrpc","wp-"] | Substrings to block immediately. |
|
|
119
|
-
| `dynamicTopN` | `DYNAMIC_TOP_N` | 10 | Number of dynamic keywords to match. |
|
|
120
|
-
| `windowSec` | `WINDOW_SEC` | 10 | Time window in seconds for rate limiting. |
|
|
121
|
-
| `maxReq` | `MAX_REQ` | 20 | Max allowed requests per window. |
|
|
122
|
-
| `floodReq` | `FLOOD_REQ` | 10 | Hard limit triggering IP block. |
|
|
123
|
-
| `honeypotField` | `HONEYPOT_FIELD` | "hp_field" | Hidden bot trap field. |
|
|
124
|
-
| `anomalyThreshold` | `ANOMALY_THRESHOLD` | 0.5 | Threshold for IsolationForest-based anomaly detection. |
|
|
125
|
-
| `logPath` | `NODE_LOG_PATH` | "/var/log/nginx/access.log" | Path to access log file. |
|
|
126
|
-
| `logGlob` | `NODE_LOG_GLOB` | "${logPath}.*" | Glob pattern to include rotated/gzipped logs. |
|
|
127
|
-
| `cache` | — | undefined | Custom cache implementation (overrides Redis/memory) |
|
|
128
|
-
|
|
129
|
-
---
|
|
130
|
-
|
|
131
|
-
## Optimization Note
|
|
314
|
+
Run the attack suite and compare:
|
|
132
315
|
|
|
133
|
-
|
|
316
|
+
```bash
|
|
317
|
+
node examples/sandbox/run-and-compare.js http://localhost:3001 http://localhost:3000
|
|
318
|
+
```
|
|
134
319
|
|
|
135
|
-
|
|
320
|
+
The comparison output includes per‑attack block rates and total blocked requests.
|
|
136
321
|
|
|
137
|
-
##
|
|
322
|
+
## License
|
|
138
323
|
|
|
139
|
-
MIT
|
|
324
|
+
MIT
|
package/aiwaf.sqlite
ADDED
|
Binary file
|