aiwaf-js 0.0.3 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/.dockerignore +6 -0
  2. package/.github/workflows/node.js.yml +1 -1
  3. package/.github/workflows/npm-publish.yml +2 -1
  4. package/INSTALLATION.md +144 -0
  5. package/README.md +289 -76
  6. package/aiwaf.sqlite +0 -0
  7. package/bin/aiwaf.js +172 -0
  8. package/examples/sandbox/README.md +53 -0
  9. package/examples/sandbox/aiwaf-proxy/Dockerfile +21 -0
  10. package/examples/sandbox/aiwaf-proxy/package.json +15 -0
  11. package/examples/sandbox/aiwaf-proxy/server.js +44 -0
  12. package/examples/sandbox/attack-suite.js +293 -0
  13. package/examples/sandbox/compare-results.js +86 -0
  14. package/examples/sandbox/docker-compose.yml +27 -0
  15. package/examples/sandbox/run-and-compare.js +91 -0
  16. package/geolock/ipinfo_lite.mmdb +0 -0
  17. package/lib/anomalyDetector.js +207 -15
  18. package/lib/blacklistManager.js +129 -5
  19. package/lib/csvStore.js +84 -0
  20. package/lib/dynamicKeyword.js +20 -4
  21. package/lib/dynamicKeywordStore.js +169 -0
  22. package/lib/exemptionStore.js +186 -0
  23. package/lib/exemptions.js +56 -0
  24. package/lib/featureUtils.js +150 -18
  25. package/lib/geoBlocker.js +157 -0
  26. package/lib/geoStore.js +111 -0
  27. package/lib/headerValidation.js +250 -0
  28. package/lib/honeypotDetector.js +90 -4
  29. package/lib/middlewareLogger.js +123 -0
  30. package/lib/modelStore.js +129 -0
  31. package/lib/rateLimiter.js +99 -37
  32. package/lib/redisClient.js +8 -3
  33. package/lib/requestLogStore.js +167 -0
  34. package/lib/settingsCompat.js +107 -0
  35. package/lib/uuidDetector.js +34 -9
  36. package/lib/wafMiddleware.js +133 -40
  37. package/package.json +24 -4
  38. package/resources/model.json +9536 -1
  39. package/test/anomaly-detector.test.js +36 -0
  40. package/test/cli.test.js +125 -0
  41. package/test/csv-fallback.test.js +165 -0
  42. package/test/dynamic-keyword-integration.test.js +24 -0
  43. package/test/dynamic-keyword-store.test.js +78 -0
  44. package/test/exemptions-db.test.js +38 -0
  45. package/test/geo-mmdb.test.js +77 -0
  46. package/test/header-validation.test.js +66 -0
  47. package/test/honeypot-detector.test.js +42 -0
  48. package/test/isolation-forest.test.js +38 -0
  49. package/test/middleware-behavior.test.js +75 -0
  50. package/test/model-store-db.test.js +22 -0
  51. package/test/model-store.test.js +31 -0
  52. package/test/redis-client.test.js +35 -0
  53. package/test/settingsCompat.test.js +95 -0
  54. package/test/train.test.js +137 -0
  55. package/test/uuid-detector.test.js +20 -0
  56. package/test/waf.test.js +208 -21
  57. package/test-anomaly.js +77 -0
  58. package/test-complete-waf.js +147 -0
  59. package/test-simple.js +79 -0
  60. package/train.js +523 -87
package/.dockerignore ADDED
@@ -0,0 +1,6 @@
1
+ node_modules
2
+ **/node_modules
3
+ dist
4
+ build
5
+ npm-debug.log
6
+ *.log
@@ -26,6 +26,6 @@
26
26
  with:
27
27
  node-version: ${{ matrix.node-version }}
28
28
  cache: 'npm'
29
- - run: npm ci
29
+ - run: npm install
30
30
  - run: npm run build --if-present
31
31
  - run: npm test
@@ -8,6 +8,7 @@ on:
8
8
  jobs:
9
9
  build-and-publish:
10
10
  runs-on: ubuntu-latest
11
+ environment: npm-publish
11
12
 
12
13
  steps:
13
14
  - uses: actions/checkout@v4
@@ -19,7 +20,7 @@ jobs:
19
20
  node-version: 20
20
21
  registry-url: https://registry.npmjs.org/
21
22
 
22
- - run: npm ci
23
+ - run: npm install
23
24
  - run: npm test
24
25
  - run: npm publish --access public
25
26
  env:
@@ -0,0 +1,144 @@
1
+ # AIWAF-JS Installation Guide
2
+
3
+ This guide covers local setup, Redis setup, training setup, and common failure modes.
4
+
5
+ ## Prerequisites
6
+
7
+ - Node.js 18+ recommended
8
+ - npm 9+ recommended
9
+ - Build tooling required by `sqlite3` (platform dependent)
10
+
11
+ ## 1. Install Package
12
+
13
+ For application use:
14
+
15
+ ```bash
16
+ npm install aiwaf-js
17
+ ```
18
+
19
+ For local development in this repository:
20
+
21
+ ```bash
22
+ npm install
23
+ ```
24
+
25
+ ## 2. Basic Integration (Express)
26
+
27
+ ```js
28
+ const express = require('express');
29
+ const aiwaf = require('aiwaf-js');
30
+
31
+ const app = express();
32
+ app.use(express.json());
33
+
34
+ app.use(aiwaf({
35
+ staticKeywords: ['.php', '.env', '.git'],
36
+ dynamicTopN: 10,
37
+ WINDOW_SEC: 10,
38
+ MAX_REQ: 20,
39
+ FLOOD_REQ: 40,
40
+ HONEYPOT_FIELD: 'hp_field',
41
+ uuidRoutePrefix: '/user'
42
+ }));
43
+ ```
44
+
45
+ ## 3. Optional Redis Setup
46
+
47
+ Set `REDIS_URL` (or `AIWAF_REDIS_URL`) before app startup:
48
+
49
+ ```bash
50
+ export REDIS_URL=redis://localhost:6379
51
+ ```
52
+
53
+ PowerShell:
54
+
55
+ ```powershell
56
+ $env:REDIS_URL = 'redis://localhost:6379'
57
+ ```
58
+
59
+ If Redis is not configured or not reachable, AIWAF-JS falls back to in-memory behavior.
60
+
61
+ ## 3.1 Optional GeoIP MMDB Setup
62
+
63
+ Install MMDB reader:
64
+
65
+ ```bash
66
+ npm install maxmind
67
+ ```
68
+
69
+ Place your database at `geolock/ipinfo_lite.mmdb` or set:
70
+
71
+ ```bash
72
+ export AIWAF_GEO_MMDB_PATH=/absolute/path/to/ipinfo_lite.mmdb
73
+ ```
74
+
75
+ ## 4. Train a Model from Logs
76
+
77
+ By default, trainer reads `/var/log/nginx/access.log`. Override as needed:
78
+
79
+ ```bash
80
+ NODE_LOG_PATH=/path/to/access.log npm run train
81
+ ```
82
+
83
+ Include rotated logs:
84
+
85
+ ```bash
86
+ NODE_LOG_GLOB='/path/to/access.log.*' npm run train
87
+ ```
88
+
89
+ Output model artifact:
90
+
91
+ - `resources/model.json`
92
+
93
+ ## 5. Verify Installation
94
+
95
+ Run tests:
96
+
97
+ ```bash
98
+ npm test
99
+ ```
100
+
101
+ Check CLI wiring:
102
+
103
+ ```bash
104
+ npm run aiwaf -- help
105
+ ```
106
+
107
+ Run a minimal app and hit a known benign route (`/`) and a suspicious route (for example path with `.php`) to validate block behavior.
108
+
109
+ ## 6. Troubleshooting
110
+
111
+ ### `Failed to load pretrained model`
112
+
113
+ - Run `npm run train` to generate `resources/model.json`.
114
+ - Ensure process has read access to the `resources/` directory.
115
+
116
+ ### Redis warnings or connection failures
117
+
118
+ - Verify `REDIS_URL` value and Redis server health.
119
+ - Runtime is designed to continue with fallback behavior.
120
+
121
+ ### SQLite errors (`blocked_ips` table not found)
122
+
123
+ - Ensure process can create/write `./aiwaf.sqlite`.
124
+ - `blacklistManager` auto-initializes the table, but write permissions are required.
125
+ - If DB logging is unavailable, enable CSV middleware logs:
126
+ - `AIWAF_MIDDLEWARE_LOG_CSV=true`
127
+ - `AIWAF_MIDDLEWARE_LOG_CSV_PATH=logs/aiwaf-requests.csv`
128
+ - Core tables also fall back automatically to CSV files in `logs/storage/` when DB operations fail.
129
+
130
+ ### `sqlite3` install/build issues
131
+
132
+ - Install platform-native build dependencies and reinstall packages.
133
+ - On CI/container images, ensure compiler toolchain is present.
134
+
135
+ ### Training finds no logs
136
+
137
+ - Confirm `NODE_LOG_PATH` exists and is readable.
138
+ - If using rotation, set `NODE_LOG_GLOB` to a valid glob.
139
+
140
+ ## 7. Production Notes
141
+
142
+ - Prefer Redis or a custom shared cache backend for multi-instance deployments.
143
+ - Place middleware after body parsing middleware if honeypot detection is required.
144
+ - Review rate limits and thresholds against real traffic profiles before broad rollout.
package/README.md CHANGED
@@ -1,111 +1,324 @@
1
- # aiwafjs
1
+ # aiwaf-js
2
+
3
+ AIWAF-JS is a Node.js/Express Web Application Firewall that combines deterministic protections with anomaly detection and continuous learning. It ships as middleware, a CLI for ops workflows, and an offline trainer for IsolationForest models.
4
+
5
+ ## What It Does
6
+
7
+ - Blocks known bad traffic with static keyword rules and IP blacklisting
8
+ - Enforces rate limits with flood detection
9
+ - Detects bot-like form abuse using honeypot field checks and timing gates
10
+ - Enforces optional method policies (405) and suspicious method usage
11
+ - Blocks suspicious UUID probing on route prefixes (with optional existence resolver)
12
+ - Learns high-frequency malicious segments as dynamic suspicious keywords
13
+ - Runs IsolationForest anomaly checks with recent-behavior analysis
14
+ - Supports Redis/custom cache backends with memory fallback
15
+ - Optional GeoIP blocking (MMDB) with allow/block lists and dynamic blocklist
16
+ - CSV fallback storage when DB is unavailable
17
+ - Operational CLI for blacklist, exemptions, geo, request logs, training and diagnostics
18
+
19
+ ## Repository Layout
20
+
21
+ - `index.js`: package entrypoint
22
+ - `lib/wafMiddleware.js`: main middleware orchestration
23
+ - `lib/rateLimiter.js`: rate-window and flood logic
24
+ - `lib/blacklistManager.js`: blocked IP persistence and operations
25
+ - `lib/keywordDetector.js`: static keyword checks
26
+ - `lib/dynamicKeyword.js`: in-memory dynamic keyword learning/checking
27
+ - `lib/uuidDetector.js`: UUID tamper detection
28
+ - `lib/honeypotDetector.js`: honeypot trap detection
29
+ - `lib/anomalyDetector.js`: pretrained model loading and anomaly scoring
30
+ - `lib/featureUtils.js`: request feature extraction and short-lived caching
31
+ - `lib/isolationForest.js`: IsolationForest implementation
32
+ - `lib/redisClient.js`: optional Redis client lifecycle
33
+ - `lib/headerValidation.js`: header caps, suspicious UA, and header quality scoring
34
+ - `lib/geoBlocker.js`: GeoIP allow/block checks + MMDB lookup + cache
35
+ - `lib/middlewareLogger.js`: JSONL/CSV/DB request logging
36
+ - `lib/*Store.js`: DB/CSV storage adapters (blacklist, exemptions, geo, logs, keywords, models)
37
+ - `train.js`: offline model training from access logs
38
+ - `resources/model.json`: pretrained anomaly model artifact
39
+ - `utils/db.js`: SQLite connection (memory DB in test)
40
+ - `test/`: Jest test suite
41
+
42
+ ## Request Processing Flow
43
+
44
+ 1. Initialize module options for rate limiter, keyword detectors, honeypot, UUID checks, and anomaly detector.
45
+ 2. Resolve client IP (`x-forwarded-for` first, then `req.ip`) and normalized path.
46
+ 3. Enforce optional method policy (405) if enabled.
47
+ 4. Block immediately if IP is already in blacklist.
48
+ 5. Header validation (required headers, suspicious UA, header caps, quality score).
49
+ 6. Geo checks (allow/block lists + DB-backed blocklist).
50
+ 7. Honeypot field + timing checks.
51
+ 8. Rate-limit + flood handling.
52
+ 9. Static keyword blocking.
53
+ 10. Dynamic keyword blocking.
54
+ 11. UUID tamper checks (optional existence resolver).
55
+ 12. Anomaly detection for unknown routes with recent-behavior analysis.
56
+ 13. Request logging (JSONL/CSV/DB) and optional dynamic keyword learning on 404s.
57
+ 14. Allow request through `next()` when no rule triggers.
2
58
 
3
- > **Adaptive Web Application Firewall** middleware for Node.js & Express
4
- > Self‑learning, plug‑and‑play WAF with rate‑limiting, static & dynamic keyword blocking, honeypot traps, UUID‑tamper protection, and IsolationForest anomaly detection—fully configurable and trainable on your own access logs. Now Redis‑powered and ready for distributed, multiprocess use.
5
-
6
- [![npm version](https://img.shields.io/npm/v/aiwaf-js.svg)](https://www.npmjs.com/package/aiwaf-js)
7
- [![Build Status](https://img.shields.io/github/actions/workflow/status/your-user/aiwaf-js/ci.yml)](https://github.com/your-user/aiwaf-js/actions)
8
- [![License](https://img.shields.io/npm/l/aiwaf-js.svg)](LICENSE)
59
+ ## Installation
9
60
 
10
- ---
61
+ ```bash
62
+ npm install aiwaf-js
63
+ ```
11
64
 
12
- ## Features
65
+ ## Quick Start
13
66
 
14
- - ✅ Rate Limiting (Redis-based or fallback to memory)
15
- - Static Keyword Blocking
16
- - Dynamic Keyword Learning (auto-adaptive)
17
- - ✅ Honeypot Field Detection
18
- - ✅ UUID‑Tamper Protection
19
- - ✅ Anomaly Detection (Isolation Forest)
20
- - ✅ Redis Support for multiprocess environments
21
- - ✅ Offline Training from access logs
67
+ ```js
68
+ const express = require('express');
69
+ const aiwaf = require('aiwaf-js');
22
70
 
23
- ---
71
+ const app = express();
72
+ app.use(express.json());
24
73
 
25
- ## Installation
74
+ app.use(aiwaf({
75
+ staticKeywords: ['.php', '.env', '.git'],
76
+ dynamicTopN: 10,
77
+ WINDOW_SEC: 10,
78
+ MAX_REQ: 20,
79
+ FLOOD_REQ: 40,
80
+ HONEYPOT_FIELD: 'hp_field',
81
+ uuidRoutePrefix: '/user',
82
+ AIWAF_HEADER_VALIDATION: true,
83
+ AIWAF_METHOD_POLICY_ENABLED: true,
84
+ AIWAF_ALLOWED_METHODS: ['GET', 'POST', 'HEAD', 'OPTIONS']
85
+ }));
26
86
 
27
- ```bash
28
- npm install aiwaf-js --save
87
+ app.get('/', (req, res) => res.send('Protected'));
88
+ app.listen(3000);
29
89
  ```
30
90
 
31
- ---
91
+ ## Configuration
32
92
 
33
- ## Train the Model (Optional but recommended)
93
+ ### Core Controls
94
+
95
+ | Option | Default | Description |
96
+ |---|---|---|
97
+ | `staticKeywords` | `[]` | Substrings that trigger immediate block + blacklist |
98
+ | `dynamicTopN` or `DYNAMIC_TOP_N` | `10` | Frequency threshold for dynamic segment blocking |
99
+ | `WINDOW_SEC` | `60` | Time window for rate limiting |
100
+ | `MAX_REQ` | `100` | Max allowed requests in window before rate block |
101
+ | `FLOOD_REQ` | `200` | Hard threshold that blacklists IP |
102
+ | `HONEYPOT_FIELD` | `undefined` | Body field name used as bot trap |
103
+ | `uuidRoutePrefix` | `"/user"` | Path prefix monitored for UUID tamper attempts |
104
+ | `uuidResolver` | `undefined` | Optional async resolver `(uuid, req) => boolean` for existence checks |
105
+ | `cache` | fallback memory cache | Custom cache backend used by limiter/features |
106
+ | `nTrees` | `100` | IsolationForest trees when model is initialized in-process |
107
+ | `sampleSize` | `256` | IsolationForest sample size |
108
+
109
+ ### Header Validation
110
+
111
+ | Option | Default | Description |
112
+ |---|---|---|
113
+ | `AIWAF_HEADER_VALIDATION` | `false` | Enable header validation pipeline |
114
+ | `AIWAF_REQUIRED_HEADERS` | `[]` | Required headers array, or `{ DEFAULT, GET, POST }` mapping |
115
+ | `AIWAF_HEADER_QUALITY_MIN_SCORE` | `3` | Minimum header quality score |
116
+ | `AIWAF_MAX_HEADER_BYTES` | `32768` | Max header bytes before blocking |
117
+ | `AIWAF_MAX_HEADER_COUNT` | `100` | Max header count before blocking |
118
+ | `AIWAF_MAX_USER_AGENT_LENGTH` | `500` | Max User-Agent length |
119
+ | `AIWAF_MAX_ACCEPT_LENGTH` | `4096` | Max Accept header length |
120
+ | `AIWAF_BLOCKED_USER_AGENTS` | list | Substring deny list |
121
+ | `AIWAF_SUSPICIOUS_USER_AGENTS` | regex list | Regex list for suspicious UA detection |
122
+ | `AIWAF_LEGITIMATE_BOTS` | regex list | Regex allow list for legitimate crawlers |
123
+
124
+ ### Method Policy
125
+
126
+ | Option | Default | Description |
127
+ |---|---|---|
128
+ | `AIWAF_METHOD_POLICY_ENABLED` | `false` | Enforce method allowlist (returns 405) |
129
+ | `AIWAF_ALLOWED_METHODS` | `['GET','POST','HEAD','OPTIONS']` | Allowed methods when policy enabled |
130
+ | `AIWAF_POST_ONLY_SUFFIXES` | `['/create/','/submit/','/upload/','/delete/','/process/']` | GET to these triggers 405 when policy enabled |
131
+ | `AIWAF_LOGIN_PATH_PREFIXES` | common login paths | Shorten min form time for login |
132
+
133
+ ### Keyword Learning
134
+
135
+ | Option | Default | Description |
136
+ |---|---|---|
137
+ | `AIWAF_ENABLE_KEYWORD_LEARNING` | `true` | Enable dynamic keyword learning |
138
+ | `AIWAF_DYNAMIC_TOP_N` | `10` | Dynamic keyword learning threshold |
139
+ | `AIWAF_EXEMPT_KEYWORDS` | `[]` | Skip these keywords |
140
+ | `AIWAF_ALLOWED_PATH_KEYWORDS` | `[]` | Allowlist of path fragments |
141
+
142
+ ### Model / Training
143
+
144
+ | Option | Default | Description |
145
+ |---|---|---|
146
+ | `AIWAF_MIN_TRAIN_LOGS` | `50` | Minimum logs to run training |
147
+ | `AIWAF_MIN_AI_LOGS` | `10000` | Minimum logs to train AI model |
148
+ | `AIWAF_FORCE_AI_TRAINING` | `false` | Force AI training below minimum logs |
149
+ | `AIWAF_MODEL_STORAGE` | `file` | `file`, `db`, or `cache` |
150
+ | `AIWAF_MODEL_PATH` | `resources/model.json` | Model file path (file backend) |
151
+ | `AIWAF_MODEL_STORAGE_FALLBACK` | `file` | Fallback model backend |
152
+ | `AIWAF_MODEL_CACHE_KEY` | `aiwaf:model` | Cache key when using cache backend |
153
+ | `AIWAF_MODEL_CACHE_TTL` | `0` | Cache TTL in seconds |
154
+
155
+ ### Geo Blocking
156
+
157
+ | Option | Default | Description |
158
+ |---|---|---|
159
+ | `AIWAF_GEO_BLOCK_ENABLED` | `false` | Enable geo blocking |
160
+ | `AIWAF_GEO_BLOCK_COUNTRIES` | `[]` | Block list (country codes) |
161
+ | `AIWAF_GEO_ALLOW_COUNTRIES` | `[]` | Allow list (country codes) |
162
+ | `AIWAF_GEO_MMDB_PATH` | `geolock/ipinfo_lite.mmdb` | MMDB path |
163
+ | `AIWAF_GEO_CACHE_SECONDS` | `3600` | Geo cache TTL |
164
+ | `AIWAF_GEO_CACHE_PREFIX` | `aiwaf:geo:` | Geo cache key prefix |
165
+
166
+ ### Logging / Storage
167
+
168
+ | Option | Default | Description |
169
+ |---|---|---|
170
+ | `AIWAF_MIDDLEWARE_LOGGING` | `false` | Enable JSONL logging |
171
+ | `AIWAF_MIDDLEWARE_LOG_PATH` | `logs/aiwaf-requests.jsonl` | JSONL log path |
172
+ | `AIWAF_MIDDLEWARE_LOG_DB` | `false` | Store logs in DB |
173
+ | `AIWAF_MIDDLEWARE_LOG_CSV` | `false` | Store logs in CSV |
174
+ | `AIWAF_MIDDLEWARE_LOG_CSV_PATH` | `logs/aiwaf-requests.csv` | CSV log path |
175
+ | `AIWAF_BLOCKED_IPS_CSV_PATH` | `logs/storage/blocked_ips.csv` | CSV fallback for blocked IPs |
176
+ | `AIWAF_IP_EXEMPTIONS_CSV_PATH` | `logs/storage/ip_exemptions.csv` | CSV fallback for IP exemptions |
177
+ | `AIWAF_PATH_EXEMPTIONS_CSV_PATH` | `logs/storage/path_exemptions.csv` | CSV fallback for path exemptions |
178
+ | `AIWAF_GEO_BLOCKED_COUNTRIES_CSV_PATH` | `logs/storage/geo_blocked_countries.csv` | CSV fallback for geo blocklist |
179
+ | `AIWAF_REQUEST_LOGS_CSV_PATH` | `logs/storage/request_logs.csv` | CSV fallback for request logs |
180
+ | `AIWAF_DYNAMIC_KEYWORDS_CSV_PATH` | `logs/storage/dynamic_keywords.csv` | CSV fallback for dynamic keywords |
181
+
182
+ ### Redis / Cache
183
+ ## Redis and Cache Behavior
184
+
185
+ - Set `REDIS_URL` (or `AIWAF_REDIS_URL`) to enable Redis connectivity (`lib/redisClient.js`).
186
+ - If Redis is unavailable, runtime falls back to in-memory behavior.
187
+ - You can inject a custom cache object.
188
+
189
+ Rate limiter custom cache must implement:
190
+
191
+ - `lPush(key, value)`
192
+ - `expire(key, ttl)`
193
+ - `lLen(key)`
194
+ - `lRange(key, start, end)`
195
+
196
+ Feature cache custom backend supports:
197
+
198
+ - `get(key)`
199
+ - `set(key, value, ttl)`
200
+
201
+ ## Geo Blocking (MMDB)
202
+
203
+ - Put your DB at `geolock/ipinfo_lite.mmdb` (default) or set `AIWAF_GEO_MMDB_PATH`.
204
+ - Enable with `AIWAF_GEO_BLOCK_ENABLED: true`.
205
+ - Configure `AIWAF_GEO_BLOCK_COUNTRIES` and/or `AIWAF_GEO_ALLOW_COUNTRIES`.
206
+ - Install MMDB reader dependency in your app:
207
+ - `npm install maxmind`
208
+ - If MMDB is unavailable, the middleware falls back to `x-country-code` header.
209
+
210
+ ## Offline Training
211
+
212
+ Train a model using access logs:
34
213
 
35
- You can train the anomaly detector and keyword learner using real access logs.
214
+ ```bash
215
+ AIWAF_ACCESS_LOG=/path/to/access.log npm run train
216
+ ```
217
+
218
+ Optional rotated/gz support:
36
219
 
37
220
  ```bash
38
- NODE_LOG_PATH=/path/to/access.log npm run train
221
+ NODE_LOG_GLOB='/path/to/access.log.*' npm run train
39
222
  ```
40
223
 
41
- If `NODE_LOG_PATH` is not provided, it defaults to `/var/log/nginx/access.log`.
224
+ Training pipeline in `train.js`:
42
225
 
43
- ---
226
+ - Reads raw and rotated (including `.gz`) access logs
227
+ - Parses request fields (IP, URI, status, response time, timestamp)
228
+ - Builds feature vectors: `[pathLen, kwHits, statusIdx, responseTime, burst, total404]`
229
+ - Enforces `AIWAF_MIN_TRAIN_LOGS` and `AIWAF_MIN_AI_LOGS`
230
+ - Trains IsolationForest when log volume is sufficient
231
+ - Learns dynamic keywords from suspicious 4xx/5xx traffic
232
+ - Removes exempt keywords and unblocks exempt IPs
233
+ - Writes model artifact to `resources/model.json` with metadata
234
+ - Model storage backends:
235
+ - `AIWAF_MODEL_STORAGE`: `file` (default), `db`, `cache`
236
+ - `AIWAF_MODEL_PATH` (file backend)
237
+ - `AIWAF_MODEL_STORAGE_FALLBACK` (fallback backend)
238
+ - `AIWAF_MODEL_CACHE_KEY`, `AIWAF_MODEL_CACHE_TTL` (cache backend)
44
239
 
45
- ## Quick Start
240
+ ## Testing
46
241
 
47
- ```js
48
- const express = require('express')
49
- const aiwaf = require('aiwaf-js')
50
-
51
- const app = express()
52
- app.use(express.json())
53
- app.use(aiwaf())
54
- app.get('/', (req, res) => res.send('Protected'))
55
- app.listen(3000)
242
+ ```bash
243
+ npm test
56
244
  ```
57
245
 
58
- ---
246
+ Current tests cover:
247
+
248
+ - Header validation (caps, suspicious UA, quality scoring)
249
+ - Method policy enforcement
250
+ - Geo blocking and MMDB lookup
251
+ - Honeypot timing policies
252
+ - UUID tamper detection (with resolver)
253
+ - Anomaly detection and recent-behavior analysis
254
+ - Dynamic keyword learning and trainer behaviors
255
+ - CSV/DB fallback storage
256
+ - CLI and settings compatibility
257
+
258
+ ## Data and Persistence
259
+
260
+ - Runtime blacklist storage uses SQLite through `utils/db.js`.
261
+ - Production DB file defaults to `./aiwaf.sqlite`.
262
+ - Test environment uses in-memory SQLite (`NODE_ENV=test`).
263
+ - Primary blocked IP table: `blocked_ips`.
264
+ - Middleware logging supports JSONL, optional SQLite, and CSV fallback.
265
+ - CSV settings:
266
+ - `AIWAF_MIDDLEWARE_LOG_CSV`
267
+ - `AIWAF_MIDDLEWARE_LOG_CSV_PATH`
268
+ - Table storage CSV fallbacks are enabled automatically when DB operations fail:
269
+ - `blocked_ips` -> `logs/storage/blocked_ips.csv` (`AIWAF_BLOCKED_IPS_CSV_PATH`)
270
+ - `ip_exemptions` -> `logs/storage/ip_exemptions.csv` (`AIWAF_IP_EXEMPTIONS_CSV_PATH`)
271
+ - `path_exemptions` -> `logs/storage/path_exemptions.csv` (`AIWAF_PATH_EXEMPTIONS_CSV_PATH`)
272
+ - `geo_blocked_countries` -> `logs/storage/geo_blocked_countries.csv` (`AIWAF_GEO_BLOCKED_COUNTRIES_CSV_PATH`)
273
+ - `request_logs` -> `logs/storage/request_logs.csv` (`AIWAF_REQUEST_LOGS_CSV_PATH`)
274
+ - `dynamic_keywords` -> `logs/storage/dynamic_keywords.csv` (`AIWAF_DYNAMIC_KEYWORDS_CSV_PATH`)
275
+
276
+ ## Operational Notes
277
+
278
+ - Middleware order matters; place AIWAF after body parsers if honeypot checks depend on parsed JSON/form body.
279
+ - If no trained model exists or loading fails, anomaly detector fails open.
280
+ - Dynamic keyword learning persists to DB/CSV via `dynamicKeywordStore`.
281
+ - Multi-instance deployments should use Redis/custom shared cache for limiter consistency.
282
+
283
+ ## Development
59
284
 
60
- ## Redis Support (Recommended for Production)
285
+ ```bash
286
+ npm install
287
+ npm test
288
+ npm run train
289
+ npm run aiwaf -- help
290
+ ```
61
291
 
62
- AIWAF‑JS supports Redis for distributed rate limiting and keyword caching.
292
+ ## Operations CLI
63
293
 
64
294
  ```bash
65
- # On Unix/Linux/macOS
66
- export REDIS_URL=redis://localhost:6379
67
-
68
- # On Windows PowerShell
69
- $env:REDIS_URL = "redis://localhost:6379"
295
+ npm run aiwaf -- list blacklist
296
+ npm run aiwaf -- list exemptions
297
+ npm run aiwaf -- add ip-exemption 203.0.113.10 "trusted monitor"
298
+ npm run aiwaf -- add path-exemption /health "health probes"
299
+ npm run aiwaf -- geo block CN "manual block"
300
+ npm run aiwaf -- geo summary
301
+ npm run aiwaf -- diagnose 203.0.113.10
70
302
  ```
71
303
 
72
- If Redis is unavailable, it gracefully falls back to in-memory mode.
304
+ ## Sandbox (OWASP Juice Shop)
73
305
 
74
- ---
306
+ The repository includes a runnable sandbox that proxies OWASP Juice Shop behind AIWAF. It also includes an attack suite that generates comparable results for direct vs protected traffic.
75
307
 
76
- ## Configuration
308
+ Run sandbox:
77
309
 
78
- ```js
79
- app.use(aiwaf({
80
- staticKeywords: ['.php', '.env', '.git'],
81
- dynamicTopN: 10,
82
- WINDOW_SEC: 10,
83
- MAX_REQ: 20,
84
- FLOOD_REQ: 10,
85
- HONEYPOT_FIELD: 'hp_field',
86
- }));
310
+ ```bash
311
+ docker compose -f examples/sandbox/docker-compose.yml up --build
87
312
  ```
88
313
 
89
- | Option | Env Var | Default | Description |
90
- |--------------------|---------------------|-----------------------------|----------------------------------------------------------|
91
- | `staticKeywords` | — | [".php",".xmlrpc","wp-"] | Substrings to block immediately. |
92
- | `dynamicTopN` | `DYNAMIC_TOP_N` | 10 | Number of dynamic keywords to match. |
93
- | `windowSec` | `WINDOW_SEC` | 10 | Time window in seconds for rate limiting. |
94
- | `maxReq` | `MAX_REQ` | 20 | Max allowed requests per window. |
95
- | `floodReq` | `FLOOD_REQ` | 10 | Hard limit triggering IP block. |
96
- | `honeypotField` | `HONEYPOT_FIELD` | "hp_field" | Hidden bot trap field. |
97
- | `anomalyThreshold` | `ANOMALY_THRESHOLD` | 0.5 | Threshold for IsolationForest-based anomaly detection. |
98
- | `logPath` | `NODE_LOG_PATH` | "/var/log/nginx/access.log" | Path to access log file. |
99
- | `logGlob` | `NODE_LOG_GLOB` | "${logPath}.*" | Glob pattern to include rotated/gzipped logs. |
314
+ Run the attack suite and compare:
100
315
 
101
- ---
102
-
103
- ## Optimization Note
104
-
105
- **Tip:** In high-volume environments, caching the feature vector extractor (especially if Redis is unavailable) can reduce redundant computation and significantly boost performance.
316
+ ```bash
317
+ node examples/sandbox/run-and-compare.js http://localhost:3001 http://localhost:3000
318
+ ```
106
319
 
107
- ---
320
+ The comparison output includes per‑attack block rates and total blocked requests.
108
321
 
109
- ## 📄 License
322
+ ## License
110
323
 
111
- MIT License © 2025 [Aayush Gauba](https://github.com/aayushg)
324
+ MIT
package/aiwaf.sqlite ADDED
Binary file