aiwaf 0.1.9.0.5__py3-none-any.whl → 0.1.9.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiwaf might be problematic. Click here for more details.

aiwaf/storage.py CHANGED
@@ -1,4 +1,3 @@
1
- import os, csv, gzip, glob
2
1
  import numpy as np
3
2
  import pandas as pd
4
3
  from django.conf import settings
@@ -22,423 +21,209 @@ def _import_models():
22
21
  # Keep models as None if can't import
23
22
  pass
24
23
 
25
- # Configuration
26
- STORAGE_MODE = getattr(settings, "AIWAF_STORAGE_MODE", "models") # "models" or "csv"
27
- CSV_DATA_DIR = getattr(settings, "AIWAF_CSV_DATA_DIR", "aiwaf_data")
28
- FEATURE_CSV = getattr(settings, "AIWAF_CSV_PATH", os.path.join(CSV_DATA_DIR, "access_samples.csv"))
29
- BLACKLIST_CSV = os.path.join(CSV_DATA_DIR, "blacklist.csv")
30
- EXEMPTION_CSV = os.path.join(CSV_DATA_DIR, "exemptions.csv")
31
- KEYWORDS_CSV = os.path.join(CSV_DATA_DIR, "keywords.csv")
32
-
33
- CSV_HEADER = [
34
- "ip","path_len","kw_hits","resp_time",
35
- "status_idx","burst_count","total_404","label"
36
- ]
37
-
38
- def ensure_csv_directory():
39
- """Ensure the CSV data directory exists"""
40
- if STORAGE_MODE == "csv" and not os.path.exists(CSV_DATA_DIR):
41
- os.makedirs(CSV_DATA_DIR)
42
-
43
- class CsvFeatureStore:
44
- @staticmethod
45
- def persist_rows(rows):
46
- ensure_csv_directory()
47
- new_file = not os.path.exists(FEATURE_CSV)
48
- with open(FEATURE_CSV, "a", newline="", encoding="utf-8") as f:
49
- w = csv.writer(f)
50
- if new_file:
51
- w.writerow(CSV_HEADER)
52
- w.writerows(rows)
53
-
54
- @staticmethod
55
- def load_matrix():
56
- if not os.path.exists(FEATURE_CSV):
57
- return np.empty((0,6))
58
- df = pd.read_csv(
59
- FEATURE_CSV,
60
- names=CSV_HEADER,
61
- skiprows=1,
62
- engine="python",
63
- on_bad_lines="skip"
64
- )
65
- feature_cols = CSV_HEADER[1:7]
66
- df[feature_cols] = df[feature_cols].apply(pd.to_numeric, errors="coerce").fillna(0)
67
- return df[feature_cols].to_numpy()
68
-
69
- class DbFeatureStore:
24
+ class ModelFeatureStore:
70
25
  @staticmethod
71
26
  def persist_rows(rows):
27
+ """Persist feature data to Django models"""
72
28
  _import_models()
73
- if FeatureSample is not None:
74
- objs = []
75
- for ip,pl,kw,rt,si,bc,t404,label in rows:
76
- objs.append(FeatureSample(
77
- ip=ip, path_len=pl, kw_hits=kw,
78
- resp_time=rt, status_idx=si,
79
- burst_count=bc, total_404=t404,
80
- label=label
81
- ))
82
- FeatureSample.objects.bulk_create(objs, ignore_conflicts=True)
83
-
84
- @staticmethod
85
- def load_matrix():
29
+ if FeatureSample is None:
30
+ print("Warning: Django models not available, skipping feature storage")
31
+ return
32
+
33
+ for row in rows:
34
+ try:
35
+ FeatureSample.objects.create(
36
+ ip=row[0],
37
+ path_len=int(row[1]),
38
+ kw_hits=int(row[2]),
39
+ resp_time=float(row[3]),
40
+ status_idx=int(row[4]),
41
+ burst_count=int(row[5]),
42
+ total_404=int(row[6]),
43
+ label=int(row[7]),
44
+ created_at=timezone.now()
45
+ )
46
+ except Exception as e:
47
+ print(f"Error saving feature sample: {e}")
48
+
49
+ @staticmethod
50
+ def get_all_data():
51
+ """Get all feature data as DataFrame"""
86
52
  _import_models()
87
- if FeatureSample is not None:
88
- qs = FeatureSample.objects.all().values_list(
89
- "path_len","kw_hits","resp_time","status_idx","burst_count","total_404"
53
+ if FeatureSample is None:
54
+ return pd.DataFrame()
55
+
56
+ try:
57
+ queryset = FeatureSample.objects.all().values(
58
+ 'ip', 'path_len', 'kw_hits', 'resp_time',
59
+ 'status_idx', 'burst_count', 'total_404', 'label'
90
60
  )
91
- return np.array(list(qs), dtype=float)
92
- return np.empty((0,6))
93
-
94
- def get_store():
95
- if getattr(settings, "AIWAF_FEATURE_STORE", "csv") == "db":
96
- return DbFeatureStore
97
- return CsvFeatureStore
98
-
99
-
100
- # ============= CSV Storage Classes =============
61
+ df = pd.DataFrame(list(queryset))
62
+ if df.empty:
63
+ return df
64
+
65
+ # Ensure proper column order and types
66
+ feature_cols = ['path_len', 'kw_hits', 'resp_time', 'status_idx', 'burst_count', 'total_404']
67
+ for col in feature_cols:
68
+ if col in df.columns:
69
+ df[col] = pd.to_numeric(df[col], errors='coerce')
70
+
71
+ return df
72
+ except Exception as e:
73
+ print(f"Error loading feature data: {e}")
74
+ return pd.DataFrame()
101
75
 
102
- class CsvBlacklistStore:
103
- """CSV-based storage for IP blacklist entries"""
104
-
105
- @staticmethod
106
- def add_ip(ip_address, reason):
107
- ensure_csv_directory()
108
- # Check if IP already exists
109
- if CsvBlacklistStore.is_blocked(ip_address):
110
- return
111
-
112
- # Add new entry
113
- new_file = not os.path.exists(BLACKLIST_CSV)
114
- with open(BLACKLIST_CSV, "a", newline="", encoding="utf-8") as f:
115
- writer = csv.writer(f)
116
- if new_file:
117
- writer.writerow(["ip_address", "reason", "created_at"])
118
- writer.writerow([ip_address, reason, timezone.now().isoformat()])
119
-
76
+ class ModelBlacklistStore:
120
77
  @staticmethod
121
- def is_blocked(ip_address):
122
- if not os.path.exists(BLACKLIST_CSV):
78
+ def is_blocked(ip):
79
+ """Check if IP is in blacklist"""
80
+ _import_models()
81
+ if BlacklistEntry is None:
82
+ return False
83
+ try:
84
+ return BlacklistEntry.objects.filter(ip_address=ip).exists()
85
+ except Exception:
123
86
  return False
124
-
125
- with open(BLACKLIST_CSV, "r", newline="", encoding="utf-8") as f:
126
- reader = csv.DictReader(f)
127
- for row in reader:
128
- if row["ip_address"] == ip_address:
129
- return True
130
- return False
131
-
132
- @staticmethod
133
- def get_all():
134
- """Return list of dictionaries with blacklist entries"""
135
- if not os.path.exists(BLACKLIST_CSV):
136
- return []
137
-
138
- entries = []
139
- with open(BLACKLIST_CSV, "r", newline="", encoding="utf-8") as f:
140
- reader = csv.DictReader(f)
141
- for row in reader:
142
- entries.append(row)
143
- return entries
144
-
145
- @staticmethod
146
- def remove_ip(ip_address):
147
- if not os.path.exists(BLACKLIST_CSV):
148
- return
149
-
150
- # Read all entries except the one to remove
151
- entries = []
152
- with open(BLACKLIST_CSV, "r", newline="", encoding="utf-8") as f:
153
- reader = csv.DictReader(f)
154
- entries = [row for row in reader if row["ip_address"] != ip_address]
155
-
156
- # Write back the filtered entries
157
- with open(BLACKLIST_CSV, "w", newline="", encoding="utf-8") as f:
158
- if entries:
159
- writer = csv.DictWriter(f, fieldnames=["ip_address", "reason", "created_at"])
160
- writer.writeheader()
161
- writer.writerows(entries)
162
-
163
87
 
164
- class CsvExemptionStore:
165
- """CSV-based storage for IP exemption entries"""
166
-
167
88
  @staticmethod
168
- def add_ip(ip_address, reason=""):
169
- ensure_csv_directory()
170
-
171
- # Check if IP already exists to avoid duplicates
172
- if CsvExemptionStore.is_exempted(ip_address):
89
+ def block_ip(ip, reason="Automated block"):
90
+ """Add IP to blacklist"""
91
+ _import_models()
92
+ if BlacklistEntry is None:
93
+ print(f"Warning: Cannot block IP {ip}, models not available")
173
94
  return
174
-
175
- # Add new entry
176
- new_file = not os.path.exists(EXEMPTION_CSV)
177
- try:
178
- with open(EXEMPTION_CSV, "a", newline="", encoding="utf-8") as f:
179
- writer = csv.writer(f)
180
- if new_file:
181
- writer.writerow(["ip_address", "reason", "created_at"])
182
- writer.writerow([ip_address, reason, timezone.now().isoformat()])
183
- except Exception as e:
184
- print(f"Error writing to exemption CSV: {e}")
185
- print(f"File path: {EXEMPTION_CSV}")
186
- print(f"Directory exists: {os.path.exists(CSV_DATA_DIR)}")
187
- raise
188
-
189
- @staticmethod
190
- def is_exempted(ip_address):
191
- if not os.path.exists(EXEMPTION_CSV):
192
- # Debug: Let user know file doesn't exist
193
- if getattr(settings, 'DEBUG', False):
194
- print(f"DEBUG: Exemption CSV not found: {EXEMPTION_CSV}")
195
- return False
196
-
197
95
  try:
198
- with open(EXEMPTION_CSV, "r", newline="", encoding="utf-8") as f:
199
- reader = csv.DictReader(f)
200
- for row_num, row in enumerate(reader):
201
- stored_ip = row.get("ip_address", "").strip()
202
- if getattr(settings, 'DEBUG', False) and row_num < 5: # Show first 5 for debug
203
- print(f"DEBUG: Row {row_num}: comparing '{stored_ip}' with '{ip_address}'")
204
- if stored_ip == ip_address:
205
- if getattr(settings, 'DEBUG', False):
206
- print(f"DEBUG: Found exemption match for {ip_address}")
207
- return True
96
+ BlacklistEntry.objects.get_or_create(
97
+ ip_address=ip,
98
+ defaults={'reason': reason, 'created_at': timezone.now()}
99
+ )
208
100
  except Exception as e:
209
- print(f"Error reading exemption CSV: {e}")
210
- return False
211
-
212
- if getattr(settings, 'DEBUG', False):
213
- print(f"DEBUG: No exemption found for {ip_address}")
214
- return False
215
-
216
- @staticmethod
217
- def get_all():
218
- """Return list of dictionaries with exemption entries"""
219
- if not os.path.exists(EXEMPTION_CSV):
220
- return []
221
-
222
- entries = []
223
- with open(EXEMPTION_CSV, "r", newline="", encoding="utf-8") as f:
224
- reader = csv.DictReader(f)
225
- for row in reader:
226
- entries.append(row)
227
- return entries
228
-
229
- @staticmethod
230
- def remove_ip(ip_address):
231
- if not os.path.exists(EXEMPTION_CSV):
232
- return
233
-
234
- # Read all entries except the one to remove
235
- entries = []
236
- with open(EXEMPTION_CSV, "r", newline="", encoding="utf-8") as f:
237
- reader = csv.DictReader(f)
238
- entries = [row for row in reader if row["ip_address"] != ip_address]
239
-
240
- # Write back the filtered entries
241
- with open(EXEMPTION_CSV, "w", newline="", encoding="utf-8") as f:
242
- if entries:
243
- writer = csv.DictWriter(f, fieldnames=["ip_address", "reason", "created_at"])
244
- writer.writeheader()
245
- writer.writerows(entries)
246
-
101
+ print(f"Error blocking IP {ip}: {e}")
247
102
 
248
- class CsvKeywordStore:
249
- """CSV-based storage for dynamic keywords"""
250
-
251
- @staticmethod
252
- def add_keyword(keyword, count=1):
253
- ensure_csv_directory()
254
-
255
- # Read existing keywords
256
- keywords = CsvKeywordStore._load_keywords()
257
-
258
- # Update or add keyword
259
- keywords[keyword] = keywords.get(keyword, 0) + count
260
-
261
- # Save back to file
262
- CsvKeywordStore._save_keywords(keywords)
263
-
264
- @staticmethod
265
- def get_top_keywords(limit=10):
266
- keywords = CsvKeywordStore._load_keywords()
267
- # Sort by count in descending order and return top N
268
- sorted_keywords = sorted(keywords.items(), key=lambda x: x[1], reverse=True)
269
- return [kw for kw, count in sorted_keywords[:limit]]
270
-
271
- @staticmethod
272
- def remove_keyword(keyword):
273
- keywords = CsvKeywordStore._load_keywords()
274
- if keyword in keywords:
275
- del keywords[keyword]
276
- CsvKeywordStore._save_keywords(keywords)
277
-
278
- @staticmethod
279
- def clear_all():
280
- if os.path.exists(KEYWORDS_CSV):
281
- os.remove(KEYWORDS_CSV)
282
-
283
103
  @staticmethod
284
- def _load_keywords():
285
- """Load keywords from CSV file as a dictionary"""
286
- if not os.path.exists(KEYWORDS_CSV):
287
- return {}
288
-
289
- keywords = {}
290
- with open(KEYWORDS_CSV, "r", newline="", encoding="utf-8") as f:
291
- reader = csv.DictReader(f)
292
- for row in reader:
293
- keywords[row["keyword"]] = int(row["count"])
294
- return keywords
295
-
296
- @staticmethod
297
- def _save_keywords(keywords):
298
- """Save keywords dictionary to CSV file"""
299
- with open(KEYWORDS_CSV, "w", newline="", encoding="utf-8") as f:
300
- writer = csv.writer(f)
301
- writer.writerow(["keyword", "count", "last_updated"])
302
- for keyword, count in keywords.items():
303
- writer.writerow([keyword, count, timezone.now().isoformat()])
304
-
305
-
306
- # ============= Storage Factory Functions =============
307
-
308
- def get_blacklist_store():
309
- """Return appropriate blacklist storage class based on settings"""
310
- if STORAGE_MODE == "csv":
311
- return CsvBlacklistStore
312
- else:
313
- # Return a wrapper for Django models (only if models are available)
314
- if BlacklistEntry is not None:
315
- return ModelBlacklistStore
316
- else:
317
- # Fallback to CSV if models aren't available
318
- return CsvBlacklistStore
319
-
320
-
321
- def get_exemption_store():
322
- """Return appropriate exemption storage class based on settings"""
323
- if getattr(settings, 'DEBUG', False):
324
- print(f"DEBUG: Storage mode = {STORAGE_MODE}, CSV mode = {STORAGE_MODE == 'csv'}")
325
-
326
- if STORAGE_MODE == "csv":
327
- if getattr(settings, 'DEBUG', False):
328
- print("DEBUG: Using CsvExemptionStore")
329
- return CsvExemptionStore
330
- else:
104
+ def unblock_ip(ip):
105
+ """Remove IP from blacklist"""
331
106
  _import_models()
332
- if IPExemption is not None:
333
- if getattr(settings, 'DEBUG', False):
334
- print("DEBUG: Using ModelExemptionStore")
335
- return ModelExemptionStore
336
- else:
337
- if getattr(settings, 'DEBUG', False):
338
- print("DEBUG: Falling back to CsvExemptionStore (models not available)")
339
- return CsvExemptionStore
340
-
341
-
342
- def get_keyword_store():
343
- """Return appropriate keyword storage class based on settings"""
344
- if STORAGE_MODE == "csv":
345
- return CsvKeywordStore
346
- else:
347
- if DynamicKeyword is not None:
348
- return ModelKeywordStore
349
- else:
350
- return CsvKeywordStore
351
-
352
-
353
- # ============= Django Model Wrappers =============
107
+ if BlacklistEntry is None:
108
+ return
109
+ try:
110
+ BlacklistEntry.objects.filter(ip_address=ip).delete()
111
+ except Exception as e:
112
+ print(f"Error unblocking IP {ip}: {e}")
354
113
 
355
- class ModelBlacklistStore:
356
- """Django model-based storage for blacklist entries"""
357
-
358
- @staticmethod
359
- def add_ip(ip_address, reason):
360
- _import_models()
361
- if BlacklistEntry is not None:
362
- BlacklistEntry.objects.get_or_create(ip_address=ip_address, defaults={"reason": reason})
363
-
364
- @staticmethod
365
- def is_blocked(ip_address):
366
- _import_models()
367
- if BlacklistEntry is not None:
368
- return BlacklistEntry.objects.filter(ip_address=ip_address).exists()
369
- return False
370
-
371
- @staticmethod
372
- def get_all():
373
- _import_models()
374
- if BlacklistEntry is not None:
375
- return list(BlacklistEntry.objects.values("ip_address", "reason", "created_at"))
376
- return []
377
-
378
114
  @staticmethod
379
- def remove_ip(ip_address):
115
+ def get_all_blocked_ips():
116
+ """Get all blocked IPs"""
380
117
  _import_models()
381
- if BlacklistEntry is not None:
382
- BlacklistEntry.objects.filter(ip_address=ip_address).delete()
383
-
118
+ if BlacklistEntry is None:
119
+ return []
120
+ try:
121
+ return list(BlacklistEntry.objects.values_list('ip_address', flat=True))
122
+ except Exception:
123
+ return []
384
124
 
385
125
  class ModelExemptionStore:
386
- """Django model-based storage for exemption entries"""
387
-
388
126
  @staticmethod
389
- def add_ip(ip_address, reason=""):
127
+ def is_exempted(ip):
128
+ """Check if IP is exempted"""
390
129
  _import_models()
391
- if IPExemption is not None:
392
- IPExemption.objects.get_or_create(ip_address=ip_address, defaults={"reason": reason})
393
-
130
+ if IPExemption is None:
131
+ return False
132
+ try:
133
+ return IPExemption.objects.filter(ip_address=ip).exists()
134
+ except Exception:
135
+ return False
136
+
394
137
  @staticmethod
395
- def is_exempted(ip_address):
138
+ def add_exemption(ip, reason="Manual exemption"):
139
+ """Add IP to exemption list"""
396
140
  _import_models()
397
- if IPExemption is not None:
398
- return IPExemption.objects.filter(ip_address=ip_address).exists()
399
- return False
400
-
141
+ if IPExemption is None:
142
+ print(f"Warning: Cannot exempt IP {ip}, models not available")
143
+ return
144
+ try:
145
+ IPExemption.objects.get_or_create(
146
+ ip_address=ip,
147
+ defaults={'reason': reason, 'created_at': timezone.now()}
148
+ )
149
+ except Exception as e:
150
+ print(f"Error exempting IP {ip}: {e}")
151
+
401
152
  @staticmethod
402
- def get_all():
153
+ def remove_exemption(ip):
154
+ """Remove IP from exemption list"""
403
155
  _import_models()
404
- if IPExemption is not None:
405
- return list(IPExemption.objects.values("ip_address", "reason", "created_at"))
406
- return []
407
-
156
+ if IPExemption is None:
157
+ return
158
+ try:
159
+ IPExemption.objects.filter(ip_address=ip).delete()
160
+ except Exception as e:
161
+ print(f"Error removing exemption for IP {ip}: {e}")
162
+
408
163
  @staticmethod
409
- def remove_ip(ip_address):
164
+ def get_all_exempted_ips():
165
+ """Get all exempted IPs"""
410
166
  _import_models()
411
- if IPExemption is not None:
412
- IPExemption.objects.filter(ip_address=ip_address).delete()
413
-
167
+ if IPExemption is None:
168
+ return []
169
+ try:
170
+ return list(IPExemption.objects.values_list('ip_address', flat=True))
171
+ except Exception:
172
+ return []
414
173
 
415
174
  class ModelKeywordStore:
416
- """Django model-based storage for dynamic keywords"""
417
-
418
175
  @staticmethod
419
- def add_keyword(keyword, count=1):
176
+ def add_keyword(keyword):
177
+ """Add a keyword to the dynamic keyword list"""
420
178
  _import_models()
421
- if DynamicKeyword is not None:
422
- obj, created = DynamicKeyword.objects.get_or_create(keyword=keyword, defaults={"count": count})
179
+ if DynamicKeyword is None:
180
+ return
181
+ try:
182
+ obj, created = DynamicKeyword.objects.get_or_create(keyword=keyword)
423
183
  if not created:
424
- obj.count += count
184
+ obj.count += 1
425
185
  obj.save()
426
-
427
- @staticmethod
428
- def get_top_keywords(limit=10):
429
- _import_models()
430
- if DynamicKeyword is not None:
431
- return list(DynamicKeyword.objects.order_by("-count").values_list("keyword", flat=True)[:limit])
432
- return []
433
-
186
+ except Exception as e:
187
+ print(f"Error adding keyword {keyword}: {e}")
188
+
434
189
  @staticmethod
435
- def remove_keyword(keyword):
190
+ def get_top_keywords(n=10):
191
+ """Get top N keywords by count"""
436
192
  _import_models()
437
- if DynamicKeyword is not None:
438
- DynamicKeyword.objects.filter(keyword=keyword).delete()
439
-
193
+ if DynamicKeyword is None:
194
+ return []
195
+ try:
196
+ return list(
197
+ DynamicKeyword.objects.order_by('-count')[:n]
198
+ .values_list('keyword', flat=True)
199
+ )
200
+ except Exception:
201
+ return []
202
+
440
203
  @staticmethod
441
- def clear_all():
204
+ def reset_keywords():
205
+ """Reset all keyword counts"""
442
206
  _import_models()
443
- if DynamicKeyword is not None:
207
+ if DynamicKeyword is None:
208
+ return
209
+ try:
444
210
  DynamicKeyword.objects.all().delete()
211
+ except Exception as e:
212
+ print(f"Error resetting keywords: {e}")
213
+
214
+ # Factory functions that only return Django model stores
215
+ def get_feature_store():
216
+ """Get the feature store (Django models only)"""
217
+ return ModelFeatureStore()
218
+
219
+ def get_blacklist_store():
220
+ """Get the blacklist store (Django models only)"""
221
+ return ModelBlacklistStore()
222
+
223
+ def get_exemption_store():
224
+ """Get the exemption store (Django models only)"""
225
+ return ModelExemptionStore()
226
+
227
+ def get_keyword_store():
228
+ """Get the keyword store (Django models only)"""
229
+ return ModelKeywordStore()
aiwaf/trainer.py CHANGED
@@ -78,18 +78,6 @@ def _read_all_logs() -> list[str]:
78
78
  except OSError:
79
79
  continue
80
80
 
81
- # If no lines found from main log, try AI-WAF middleware CSV log
82
- if not lines:
83
- middleware_csv = getattr(settings, "AIWAF_MIDDLEWARE_LOG", "aiwaf_requests.log").replace('.log', '.csv')
84
- if os.path.exists(middleware_csv):
85
- try:
86
- from .middleware_logger import AIWAFCSVLogParser
87
- csv_lines = AIWAFCSVLogParser.get_log_lines_for_trainer(middleware_csv)
88
- lines.extend(csv_lines)
89
- print(f"📋 Using AI-WAF middleware CSV log: {middleware_csv} ({len(csv_lines)} entries)")
90
- except Exception as e:
91
- print(f"⚠️ Failed to read middleware CSV log: {e}")
92
-
93
81
  return lines
94
82
 
95
83