bbot 2.4.2__py3-none-any.whl → 2.4.2.6590rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bbot might be problematic. Click here for more details.
- bbot/__init__.py +1 -1
- bbot/core/event/base.py +64 -4
- bbot/core/helpers/diff.py +10 -7
- bbot/core/helpers/helper.py +5 -1
- bbot/core/helpers/misc.py +48 -11
- bbot/core/helpers/regex.py +4 -0
- bbot/core/helpers/regexes.py +45 -8
- bbot/core/helpers/url.py +21 -5
- bbot/core/helpers/web/client.py +25 -5
- bbot/core/helpers/web/engine.py +9 -1
- bbot/core/helpers/web/envelopes.py +352 -0
- bbot/core/helpers/web/web.py +10 -2
- bbot/core/helpers/yara_helper.py +50 -0
- bbot/core/modules.py +23 -7
- bbot/defaults.yml +26 -1
- bbot/modules/base.py +4 -2
- bbot/modules/{deadly/dastardly.py → dastardly.py} +1 -1
- bbot/modules/{deadly/ffuf.py → ffuf.py} +1 -1
- bbot/modules/ffuf_shortnames.py +1 -1
- bbot/modules/httpx.py +14 -0
- bbot/modules/hunt.py +24 -6
- bbot/modules/internal/aggregate.py +1 -0
- bbot/modules/internal/excavate.py +356 -197
- bbot/modules/lightfuzz/lightfuzz.py +203 -0
- bbot/modules/lightfuzz/submodules/__init__.py +0 -0
- bbot/modules/lightfuzz/submodules/base.py +312 -0
- bbot/modules/lightfuzz/submodules/cmdi.py +106 -0
- bbot/modules/lightfuzz/submodules/crypto.py +474 -0
- bbot/modules/lightfuzz/submodules/nosqli.py +183 -0
- bbot/modules/lightfuzz/submodules/path.py +154 -0
- bbot/modules/lightfuzz/submodules/serial.py +179 -0
- bbot/modules/lightfuzz/submodules/sqli.py +187 -0
- bbot/modules/lightfuzz/submodules/ssti.py +39 -0
- bbot/modules/lightfuzz/submodules/xss.py +191 -0
- bbot/modules/{deadly/nuclei.py → nuclei.py} +1 -1
- bbot/modules/paramminer_headers.py +2 -0
- bbot/modules/reflected_parameters.py +80 -0
- bbot/modules/{deadly/vhost.py → vhost.py} +2 -2
- bbot/presets/web/lightfuzz-heavy.yml +16 -0
- bbot/presets/web/lightfuzz-light.yml +20 -0
- bbot/presets/web/lightfuzz-medium.yml +14 -0
- bbot/presets/web/lightfuzz-superheavy.yml +13 -0
- bbot/presets/web/lightfuzz-xss.yml +21 -0
- bbot/presets/web/paramminer.yml +8 -5
- bbot/scanner/preset/args.py +26 -0
- bbot/scanner/scanner.py +6 -0
- bbot/test/test_step_1/test__module__tests.py +1 -1
- bbot/test/test_step_1/test_helpers.py +7 -0
- bbot/test/test_step_1/test_presets.py +2 -2
- bbot/test/test_step_1/test_web.py +20 -0
- bbot/test/test_step_1/test_web_envelopes.py +343 -0
- bbot/test/test_step_2/module_tests/test_module_excavate.py +404 -29
- bbot/test/test_step_2/module_tests/test_module_httpx.py +29 -0
- bbot/test/test_step_2/module_tests/test_module_hunt.py +18 -1
- bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +1947 -0
- bbot/test/test_step_2/module_tests/test_module_paramminer_getparams.py +4 -1
- bbot/test/test_step_2/module_tests/test_module_paramminer_headers.py +46 -2
- bbot/test/test_step_2/module_tests/test_module_reflected_parameters.py +226 -0
- bbot/wordlists/paramminer_parameters.txt +0 -8
- {bbot-2.4.2.dist-info → bbot-2.4.2.6590rc0.dist-info}/METADATA +2 -1
- {bbot-2.4.2.dist-info → bbot-2.4.2.6590rc0.dist-info}/RECORD +64 -42
- {bbot-2.4.2.dist-info → bbot-2.4.2.6590rc0.dist-info}/LICENSE +0 -0
- {bbot-2.4.2.dist-info → bbot-2.4.2.6590rc0.dist-info}/WHEEL +0 -0
- {bbot-2.4.2.dist-info → bbot-2.4.2.6590rc0.dist-info}/entry_points.txt +0 -0
|
@@ -63,6 +63,17 @@ def _exclude_key(original_dict, key_to_exclude):
|
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
def extract_params_url(parsed_url):
|
|
66
|
+
"""
|
|
67
|
+
Yields query parameters from a parsed URL.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
parsed_url (ParseResult): The URL to extract parameters from.
|
|
71
|
+
|
|
72
|
+
Yields:
|
|
73
|
+
tuple: Contains the hardcoded HTTP method ('GET'), parsed URL, parameter name,
|
|
74
|
+
original value, source (hardcoded to 'direct_url'), and additional parameters
|
|
75
|
+
(all parameters excluding the current one).
|
|
76
|
+
"""
|
|
66
77
|
params = parse_qs(parsed_url.query)
|
|
67
78
|
flat_params = {k: v[0] for k, v in params.items()}
|
|
68
79
|
|
|
@@ -303,44 +314,38 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
303
314
|
}
|
|
304
315
|
|
|
305
316
|
options = {
|
|
306
|
-
"retain_querystring": False,
|
|
307
317
|
"yara_max_match_data": 2000,
|
|
308
318
|
"custom_yara_rules": "",
|
|
319
|
+
"speculate_params": False,
|
|
309
320
|
}
|
|
310
321
|
options_desc = {
|
|
311
|
-
"retain_querystring": "Keep the querystring intact on emitted WEB_PARAMETERS",
|
|
312
322
|
"yara_max_match_data": "Sets the maximum amount of text that can extracted from a YARA regex",
|
|
313
323
|
"custom_yara_rules": "Include custom Yara rules",
|
|
324
|
+
"speculate_params": "Enable speculative parameter extraction from JSON and XML content",
|
|
314
325
|
}
|
|
315
326
|
scope_distance_modifier = None
|
|
316
327
|
accept_dupes = False
|
|
317
328
|
|
|
318
329
|
_module_threads = 8
|
|
319
330
|
|
|
320
|
-
parameter_blacklist = {
|
|
321
|
-
p.lower()
|
|
322
|
-
for p in [
|
|
323
|
-
"__VIEWSTATE",
|
|
324
|
-
"__EVENTARGUMENT",
|
|
325
|
-
"__EVENTVALIDATION",
|
|
326
|
-
"__EVENTTARGET",
|
|
327
|
-
"__EVENTARGUMENT",
|
|
328
|
-
"__VIEWSTATEGENERATOR",
|
|
329
|
-
"__SCROLLPOSITIONY",
|
|
330
|
-
"__SCROLLPOSITIONX",
|
|
331
|
-
"ASP.NET_SessionId",
|
|
332
|
-
"JSESSIONID",
|
|
333
|
-
"PHPSESSID",
|
|
334
|
-
]
|
|
335
|
-
}
|
|
336
|
-
|
|
337
331
|
yara_rule_name_regex = re.compile(r"rule\s(\w+)\s{")
|
|
338
332
|
yara_rule_regex = re.compile(r"(?s)((?:rule\s+\w+\s*{[^{}]*(?:{[^{}]*}[^{}]*)*[^{}]*(?:/\S*?}[^/]*?/)*)*})")
|
|
339
333
|
|
|
340
334
|
def in_bl(self, value):
|
|
341
|
-
|
|
335
|
+
# Check if the value is in the blacklist or starts with a blacklisted prefix.
|
|
336
|
+
lower_value = value.lower()
|
|
337
|
+
|
|
338
|
+
if lower_value in self.parameter_blacklist:
|
|
339
|
+
return True
|
|
340
|
+
|
|
341
|
+
for bl_param_prefix in self.parameter_blacklist_prefixes:
|
|
342
|
+
if lower_value.startswith(bl_param_prefix.lower()):
|
|
343
|
+
return True
|
|
344
|
+
|
|
345
|
+
return False
|
|
342
346
|
|
|
343
347
|
def url_unparse(self, param_type, parsed_url):
|
|
348
|
+
# Reconstructs a URL, optionally omitting the query string based on remove_querystring configuration value.
|
|
344
349
|
if param_type == "GETPARAM":
|
|
345
350
|
querystring = ""
|
|
346
351
|
else:
|
|
@@ -352,7 +357,7 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
352
357
|
parsed_url.netloc,
|
|
353
358
|
parsed_url.path,
|
|
354
359
|
"",
|
|
355
|
-
|
|
360
|
+
"" if self.remove_querystring else querystring,
|
|
356
361
|
"",
|
|
357
362
|
)
|
|
358
363
|
)
|
|
@@ -364,7 +369,7 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
364
369
|
class ParameterExtractorRule:
|
|
365
370
|
name = ""
|
|
366
371
|
|
|
367
|
-
def extract(self):
|
|
372
|
+
async def extract(self):
|
|
368
373
|
pass
|
|
369
374
|
|
|
370
375
|
def __init__(self, excavate, result):
|
|
@@ -377,29 +382,32 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
377
382
|
extraction_regex = re.compile(r"\$.get\([\'\"](.+)[\'\"].+(\{.+\})\)")
|
|
378
383
|
output_type = "GETPARAM"
|
|
379
384
|
|
|
380
|
-
def
|
|
381
|
-
|
|
382
|
-
extracted_str = re.sub(r"(\w+):", r'"\1":', extracted_str)
|
|
383
|
-
try:
|
|
384
|
-
return json.loads(extracted_str)
|
|
385
|
-
except json.JSONDecodeError as e:
|
|
386
|
-
self.excavate.debug(f"Failed to decode JSON: {e}")
|
|
387
|
-
return None
|
|
388
|
-
|
|
389
|
-
def extract(self):
|
|
390
|
-
extracted_results = self.extraction_regex.findall(str(self.result))
|
|
385
|
+
async def extract(self):
|
|
386
|
+
extracted_results = await self.excavate.helpers.re.findall(self.extraction_regex, str(self.result))
|
|
391
387
|
if extracted_results:
|
|
392
388
|
for action, extracted_parameters in extracted_results:
|
|
393
|
-
extracted_parameters_dict = self.convert_to_dict(extracted_parameters)
|
|
389
|
+
extracted_parameters_dict = await self.convert_to_dict(extracted_parameters)
|
|
394
390
|
for parameter_name, original_value in extracted_parameters_dict.items():
|
|
395
391
|
yield (
|
|
396
392
|
self.output_type,
|
|
397
393
|
parameter_name,
|
|
398
|
-
original_value,
|
|
394
|
+
original_value.strip(),
|
|
399
395
|
action,
|
|
400
396
|
_exclude_key(extracted_parameters_dict, parameter_name),
|
|
401
397
|
)
|
|
402
398
|
|
|
399
|
+
async def convert_to_dict(self, extracted_str):
|
|
400
|
+
extracted_str = extracted_str.replace("'", '"')
|
|
401
|
+
extracted_str = await self.excavate.helpers.re.sub(
|
|
402
|
+
re.compile(r"(\w+):"), r'"\1":', extracted_str
|
|
403
|
+
) # Quote keys
|
|
404
|
+
|
|
405
|
+
try:
|
|
406
|
+
return json.loads(extracted_str)
|
|
407
|
+
except json.JSONDecodeError as e:
|
|
408
|
+
self.excavate.debug(f"Failed to decode JSON: {e}")
|
|
409
|
+
return None
|
|
410
|
+
|
|
403
411
|
class PostJquery(GetJquery):
|
|
404
412
|
name = "POST jquery"
|
|
405
413
|
discovery_regex = r"/\$.post\([^\)].+\)/ nocase"
|
|
@@ -408,56 +416,136 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
408
416
|
|
|
409
417
|
class HtmlTags(ParameterExtractorRule):
|
|
410
418
|
name = "HTML Tags"
|
|
411
|
-
discovery_regex = r'/<[^>]+(href|src)=["\'][^"\']*["\'][^>]*>/ nocase'
|
|
419
|
+
discovery_regex = r'/<[^>]+(href|src|action)=["\']?[^"\'>\s]*["\']?[^>]*>/ nocase'
|
|
412
420
|
extraction_regex = bbot_regexes.tag_attribute_regex
|
|
413
421
|
output_type = "GETPARAM"
|
|
414
422
|
|
|
415
|
-
def extract(self):
|
|
416
|
-
urls = self.
|
|
423
|
+
async def extract(self):
|
|
424
|
+
urls = await self.excavate.helpers.re.findall(self.extraction_regex, str(self.result))
|
|
417
425
|
for url in urls:
|
|
418
426
|
parsed_url = urlparse(url)
|
|
419
|
-
query_strings = parse_qs(parsed_url.query)
|
|
420
|
-
query_strings_dict = {
|
|
421
|
-
k: v[0] if isinstance(v, list) and len(v) == 1 else v for k, v in query_strings.items()
|
|
422
|
-
}
|
|
427
|
+
query_strings = parse_qs(html.unescape(parsed_url.query))
|
|
428
|
+
query_strings_dict = {k: v[0] if isinstance(v, list) else v for k, v in query_strings.items()}
|
|
423
429
|
for parameter_name, original_value in query_strings_dict.items():
|
|
424
430
|
yield (
|
|
425
431
|
self.output_type,
|
|
426
432
|
parameter_name,
|
|
427
|
-
original_value,
|
|
433
|
+
original_value.strip(),
|
|
428
434
|
url,
|
|
429
435
|
_exclude_key(query_strings_dict, parameter_name),
|
|
430
436
|
)
|
|
431
437
|
|
|
438
|
+
class AjaxJquery(ParameterExtractorRule):
|
|
439
|
+
name = "JQuery Extractor"
|
|
440
|
+
discovery_regex = r"/\$\.ajax\(\{[^\<$\$]*\}\)/s nocase"
|
|
441
|
+
extraction_regex = None
|
|
442
|
+
output_type = "BODYJSON"
|
|
443
|
+
ajax_content_regexes = {
|
|
444
|
+
"url": re.compile(r"url\s*:\s*['\"](.*?)['\"]"),
|
|
445
|
+
"type": re.compile(r"type\s*:\s*['\"](.*?)['\"]"),
|
|
446
|
+
"content_type": re.compile(r"contentType\s*:\s*['\"](.*?)['\"]"),
|
|
447
|
+
"data": re.compile(r"data:.*(\{[^}]*\})"),
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
async def extract(self):
|
|
451
|
+
# Iterate through each regex in ajax_content_regexes
|
|
452
|
+
extracted_values = {}
|
|
453
|
+
for key, pattern in self.ajax_content_regexes.items():
|
|
454
|
+
match = await self.excavate.helpers.re.search(pattern, self.result)
|
|
455
|
+
if match:
|
|
456
|
+
# Store the matched value in the dictionary
|
|
457
|
+
extracted_values[key] = match.group(1)
|
|
458
|
+
|
|
459
|
+
# Check to see if the format is defined as JSON
|
|
460
|
+
if (
|
|
461
|
+
"content_type" in extracted_values.keys()
|
|
462
|
+
and extracted_values["content_type"] == "application/json"
|
|
463
|
+
):
|
|
464
|
+
form_parameters = {}
|
|
465
|
+
|
|
466
|
+
# If we can't figure out the parameter names, there is no point in continuing
|
|
467
|
+
if "data" in extracted_values.keys():
|
|
468
|
+
form_url = extracted_values.get("url", None)
|
|
469
|
+
|
|
470
|
+
try:
|
|
471
|
+
s = extracted_values["data"]
|
|
472
|
+
s = await self.excavate.helpers.re.sub(re.compile(r"(\w+)\s*:"), r'"\1":', s) # Quote keys
|
|
473
|
+
s = await self.excavate.helpers.re.sub(
|
|
474
|
+
re.compile(r":\s*(\w+)"), r': "\1"', s
|
|
475
|
+
) # Quote values if they are unquoted
|
|
476
|
+
data = json.loads(s)
|
|
477
|
+
except (ValueError, SyntaxError):
|
|
478
|
+
data = None
|
|
479
|
+
|
|
480
|
+
if data:
|
|
481
|
+
for p in data.keys():
|
|
482
|
+
form_parameters[p] = None
|
|
483
|
+
|
|
484
|
+
for parameter_name in form_parameters:
|
|
485
|
+
yield (
|
|
486
|
+
"BODYJSON",
|
|
487
|
+
parameter_name,
|
|
488
|
+
None,
|
|
489
|
+
form_url,
|
|
490
|
+
_exclude_key(form_parameters, parameter_name),
|
|
491
|
+
)
|
|
492
|
+
|
|
432
493
|
class GetForm(ParameterExtractorRule):
|
|
433
494
|
name = "GET Form"
|
|
434
495
|
discovery_regex = r'/<form[^>]*\bmethod=["\']?get["\']?[^>]*>.*<\/form>/s nocase'
|
|
435
|
-
form_content_regexes =
|
|
436
|
-
bbot_regexes.input_tag_regex,
|
|
437
|
-
bbot_regexes.
|
|
438
|
-
bbot_regexes.
|
|
439
|
-
|
|
496
|
+
form_content_regexes = {
|
|
497
|
+
"input_tag_regex": bbot_regexes.input_tag_regex,
|
|
498
|
+
"input_tag_regex2": bbot_regexes.input_tag_regex2,
|
|
499
|
+
"select_tag_regex": bbot_regexes.select_tag_regex,
|
|
500
|
+
"textarea_tag_regex": bbot_regexes.textarea_tag_regex,
|
|
501
|
+
"textarea_tag_regex2": bbot_regexes.textarea_tag_regex2,
|
|
502
|
+
"textarea_tag_novalue_regex": bbot_regexes.textarea_tag_novalue_regex,
|
|
503
|
+
"button_tag_regex": bbot_regexes.button_tag_regex,
|
|
504
|
+
"button_tag_regex2": bbot_regexes.button_tag_regex2,
|
|
505
|
+
"_input_tag_novalue_regex": bbot_regexes.input_tag_novalue_regex,
|
|
506
|
+
}
|
|
440
507
|
extraction_regex = bbot_regexes.get_form_regex
|
|
441
508
|
output_type = "GETPARAM"
|
|
442
509
|
|
|
443
|
-
def extract(self):
|
|
444
|
-
forms = self.
|
|
510
|
+
async def extract(self):
|
|
511
|
+
forms = await self.excavate.helpers.re.findall(self.extraction_regex, str(self.result))
|
|
445
512
|
for form_action, form_content in forms:
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
input_tags = form_content_regex.findall(form_content)
|
|
513
|
+
if not form_action or form_action == "#":
|
|
514
|
+
form_action = None
|
|
449
515
|
|
|
450
|
-
|
|
451
|
-
|
|
516
|
+
elif form_action.startswith("./"):
|
|
517
|
+
form_action = form_action.lstrip(".")
|
|
452
518
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
519
|
+
form_parameters = {}
|
|
520
|
+
for form_content_regex_name, form_content_regex in self.form_content_regexes.items():
|
|
521
|
+
input_tags = await self.excavate.helpers.re.findall(form_content_regex, form_content)
|
|
522
|
+
if input_tags:
|
|
523
|
+
# Normalize each input_tag to be a tuple of two elements
|
|
524
|
+
input_tags = [(tag if isinstance(tag, tuple) else (tag, None)) for tag in input_tags]
|
|
525
|
+
|
|
526
|
+
if form_content_regex_name in [
|
|
527
|
+
"input_tag_regex2",
|
|
528
|
+
"button_tag_regex2",
|
|
529
|
+
"textarea_tag_regex2",
|
|
530
|
+
]:
|
|
531
|
+
# Swap elements if needed
|
|
532
|
+
input_tags = [(b, a) for a, b in input_tags]
|
|
533
|
+
for parameter_name, original_value in input_tags:
|
|
534
|
+
form_parameters.setdefault(
|
|
535
|
+
parameter_name, original_value.strip() if original_value else None
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
for parameter_name, original_value in form_parameters.items():
|
|
539
|
+
yield (
|
|
540
|
+
self.output_type,
|
|
541
|
+
parameter_name,
|
|
542
|
+
original_value,
|
|
543
|
+
form_action,
|
|
544
|
+
_exclude_key(form_parameters, parameter_name),
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
class GetForm2(GetForm):
|
|
548
|
+
extraction_regex = bbot_regexes.get_form_regex2
|
|
461
549
|
|
|
462
550
|
class PostForm(GetForm):
|
|
463
551
|
name = "POST Form"
|
|
@@ -465,6 +553,21 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
465
553
|
extraction_regex = bbot_regexes.post_form_regex
|
|
466
554
|
output_type = "POSTPARAM"
|
|
467
555
|
|
|
556
|
+
class PostForm2(PostForm):
|
|
557
|
+
extraction_regex = bbot_regexes.post_form_regex2
|
|
558
|
+
|
|
559
|
+
class PostForm_NoAction(PostForm):
|
|
560
|
+
name = "POST Form (no action)"
|
|
561
|
+
extraction_regex = bbot_regexes.post_form_regex_noaction
|
|
562
|
+
|
|
563
|
+
# underscore ensure generic forms runs last, so it doesn't cause dedupe to stop full form detection
|
|
564
|
+
class _GenericForm(GetForm):
|
|
565
|
+
name = "Generic Form"
|
|
566
|
+
discovery_regex = r"/<form[^>]*>.*<\/form>/s nocase"
|
|
567
|
+
|
|
568
|
+
extraction_regex = bbot_regexes.generic_form_regex
|
|
569
|
+
output_type = "GETPARAM"
|
|
570
|
+
|
|
468
571
|
def __init__(self, excavate):
|
|
469
572
|
super().__init__(excavate)
|
|
470
573
|
self.parameterExtractorCallbackDict = {}
|
|
@@ -476,7 +579,7 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
476
579
|
regexes_component_list.append(f"${r.__name__} = {r.discovery_regex}")
|
|
477
580
|
regexes_component = " ".join(regexes_component_list)
|
|
478
581
|
self.yara_rules["parameter_extraction"] = (
|
|
479
|
-
rf'rule parameter_extraction {{meta: description = "contains
|
|
582
|
+
rf'rule parameter_extraction {{meta: description = "contains Parameter" strings: {regexes_component} condition: any of them}}'
|
|
480
583
|
)
|
|
481
584
|
|
|
482
585
|
async def process(self, yara_results, event, yara_rule_settings, discovery_context):
|
|
@@ -487,51 +590,64 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
487
590
|
parameterExtractorSubModule = self.parameterExtractorCallbackDict[identifier](
|
|
488
591
|
self.excavate, result
|
|
489
592
|
)
|
|
490
|
-
extracted_params = parameterExtractorSubModule.extract()
|
|
491
|
-
if extracted_params:
|
|
492
|
-
for (
|
|
493
|
-
parameter_type,
|
|
494
|
-
parameter_name,
|
|
495
|
-
original_value,
|
|
496
|
-
endpoint,
|
|
497
|
-
additional_params,
|
|
498
|
-
) in extracted_params:
|
|
499
|
-
self.excavate.debug(
|
|
500
|
-
f"Found Parameter [{parameter_name}] in [{parameterExtractorSubModule.name}] ParameterExtractor Submodule"
|
|
501
|
-
)
|
|
502
|
-
endpoint = event.data["url"] if not endpoint else endpoint
|
|
503
|
-
url = (
|
|
504
|
-
endpoint
|
|
505
|
-
if endpoint.startswith(("http://", "https://"))
|
|
506
|
-
else f"{event.parsed_url.scheme}://{event.parsed_url.netloc}{endpoint}"
|
|
507
|
-
)
|
|
508
593
|
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
594
|
+
# Use async for to iterate over the async generator
|
|
595
|
+
async for (
|
|
596
|
+
parameter_type,
|
|
597
|
+
parameter_name,
|
|
598
|
+
original_value,
|
|
599
|
+
endpoint,
|
|
600
|
+
additional_params,
|
|
601
|
+
) in parameterExtractorSubModule.extract():
|
|
602
|
+
self.excavate.debug(
|
|
603
|
+
f"Found Parameter [{parameter_name}] in [{parameterExtractorSubModule.name}] ParameterExtractor Submodule"
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
# account for the case where the action is html encoded
|
|
607
|
+
if endpoint and (
|
|
608
|
+
endpoint.startswith("https://")
|
|
609
|
+
or endpoint.startswith("http://")
|
|
610
|
+
):
|
|
611
|
+
endpoint = html.unescape(endpoint)
|
|
612
|
+
|
|
613
|
+
# If we have a full URL, leave it as-is
|
|
614
|
+
if endpoint and endpoint.startswith(("http://", "https://")):
|
|
615
|
+
url = endpoint
|
|
616
|
+
|
|
617
|
+
# The endpoint is usually a form action - we should use it if we have it. If not, default to URL.
|
|
618
|
+
else:
|
|
619
|
+
# Use the original URL as the base and resolve the endpoint correctly in case of relative paths
|
|
620
|
+
base_url = f"{event.parsed_url.scheme}://{event.parsed_url.netloc}{event.parsed_url.path}"
|
|
621
|
+
if not self.excavate.remove_querystring and len(event.parsed_url.query) > 0:
|
|
622
|
+
base_url += f"?{event.parsed_url.query}"
|
|
623
|
+
url = urljoin(base_url, endpoint)
|
|
624
|
+
|
|
625
|
+
if self.excavate.helpers.validate_parameter(parameter_name, parameter_type):
|
|
626
|
+
if self.excavate.in_bl(parameter_name) is False:
|
|
627
|
+
parsed_url = urlparse(url)
|
|
628
|
+
if not parsed_url.hostname:
|
|
629
|
+
self.excavate.warning(
|
|
630
|
+
f"Error Parsing reconstructed URL [{url}] during parameter extraction, missing hostname"
|
|
530
631
|
)
|
|
531
|
-
|
|
532
|
-
|
|
632
|
+
continue
|
|
633
|
+
description = f"HTTP Extracted Parameter [{parameter_name}] ({parameterExtractorSubModule.name} Submodule)"
|
|
634
|
+
data = {
|
|
635
|
+
"host": parsed_url.hostname,
|
|
636
|
+
"type": parameter_type,
|
|
637
|
+
"name": parameter_name,
|
|
638
|
+
"original_value": original_value,
|
|
639
|
+
"url": self.excavate.url_unparse(parameter_type, parsed_url),
|
|
640
|
+
"additional_params": additional_params,
|
|
641
|
+
"assigned_cookies": self.excavate.assigned_cookies,
|
|
642
|
+
"description": description,
|
|
643
|
+
}
|
|
644
|
+
await self.report(
|
|
645
|
+
data, event, yara_rule_settings, discovery_context, event_type="WEB_PARAMETER"
|
|
646
|
+
)
|
|
533
647
|
else:
|
|
534
|
-
self.excavate.debug(f"blocked parameter [{parameter_name}] due to
|
|
648
|
+
self.excavate.debug(f"blocked parameter [{parameter_name}] due to BL match")
|
|
649
|
+
else:
|
|
650
|
+
self.excavate.debug(f"blocked parameter [{parameter_name}] due to validation failure")
|
|
535
651
|
|
|
536
652
|
class CSPExtractor(ExcavateRule):
|
|
537
653
|
description = "Extracts domains from CSP headers."
|
|
@@ -609,12 +725,13 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
609
725
|
class SerializationExtractor(ExcavateRule):
|
|
610
726
|
description = "Identifies serialized objects from various platforms."
|
|
611
727
|
regexes = {
|
|
612
|
-
"Java": re.compile(r"[^a-zA-Z0-9\/+]rO0[a-zA-Z0-9+\/]+={0,2}"),
|
|
613
|
-
"
|
|
614
|
-
"
|
|
615
|
-
"
|
|
616
|
-
"
|
|
617
|
-
"
|
|
728
|
+
"Java": re.compile(r"[^a-zA-Z0-9\/+][\"']?rO0[a-zA-Z0-9+\/]+={0,2}"),
|
|
729
|
+
"Ruby": re.compile(r"[^a-zA-Z0-9\/+][\"']?BAh[a-zA-Z0-9+\/]+={0,2}"),
|
|
730
|
+
"DOTNET": re.compile(r"[^a-zA-Z0-9\/+][\"']?AAEAAAD\/\/[a-zA-Z0-9\/+]+={0,2}"),
|
|
731
|
+
"PHP_Array": re.compile(r"[^a-zA-Z0-9\/+][\"']?YTo[xyz0123456][a-zA-Z0-9+\/]+={0,2}"),
|
|
732
|
+
"PHP_String": re.compile(r"[^a-zA-Z0-9\/+][\"']?czo[xyz0123456][a-zA-Z0-9+\/]+={0,2}"),
|
|
733
|
+
"PHP_Object": re.compile(r"[^a-zA-Z0-9\/+][\"']?Tzo[xyz0123456][a-zA-Z0-9+\/]+={0,2}"),
|
|
734
|
+
"Possible_Compressed": re.compile(r"[^a-zA-Z0-9\/+][\"']?H4sIAAAA[a-zA-Z0-9+\/]+={0,2}"),
|
|
618
735
|
}
|
|
619
736
|
yara_rules = {}
|
|
620
737
|
|
|
@@ -622,7 +739,7 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
622
739
|
super().__init__(excavate)
|
|
623
740
|
regexes_component_list = []
|
|
624
741
|
for regex_name, regex in self.regexes.items():
|
|
625
|
-
regexes_component_list.append(rf"${regex_name} = /\b{regex.pattern}/
|
|
742
|
+
regexes_component_list.append(rf"${regex_name} = /\b{regex.pattern}/")
|
|
626
743
|
regexes_component = " ".join(regexes_component_list)
|
|
627
744
|
self.yara_rules["serialization_detection"] = (
|
|
628
745
|
f'rule serialization_detection {{meta: description = "contains a possible serialized object" strings: {regexes_component} condition: any of them}}'
|
|
@@ -715,7 +832,7 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
715
832
|
tags = "spider-danger"
|
|
716
833
|
description = "contains tag with src or href attribute"
|
|
717
834
|
strings:
|
|
718
|
-
$url_attr = /<[^>]+(href|src)=["\'][^"\']*["\'][^>]*>/
|
|
835
|
+
$url_attr = /<[^>]+(href|src|action)=["\']?[^"\']*["\']?[^>]*>/
|
|
719
836
|
condition:
|
|
720
837
|
$url_attr
|
|
721
838
|
}
|
|
@@ -762,7 +879,6 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
762
879
|
if final_url:
|
|
763
880
|
if self.excavate.scan.in_scope(final_url):
|
|
764
881
|
urls_found += 1
|
|
765
|
-
|
|
766
882
|
await self.report(
|
|
767
883
|
final_url,
|
|
768
884
|
event,
|
|
@@ -828,6 +944,36 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
828
944
|
for r in await self.helpers.re.findall(self.yara_rule_regex, rules_content):
|
|
829
945
|
yield r
|
|
830
946
|
|
|
947
|
+
async def emit_web_parameter(
|
|
948
|
+
self, host, param_type, name, original_value, url, description, additional_params, event, context
|
|
949
|
+
):
|
|
950
|
+
data = {
|
|
951
|
+
"host": host,
|
|
952
|
+
"type": param_type,
|
|
953
|
+
"name": name,
|
|
954
|
+
"original_value": original_value,
|
|
955
|
+
"url": url,
|
|
956
|
+
"description": description,
|
|
957
|
+
"additional_params": additional_params,
|
|
958
|
+
}
|
|
959
|
+
await self.emit_event(data, "WEB_PARAMETER", event, context=context)
|
|
960
|
+
|
|
961
|
+
async def emit_custom_parameters(self, event, config_key, param_type, description_suffix):
|
|
962
|
+
# Emits WEB_PARAMETER events for custom headers and cookies from the configuration.
|
|
963
|
+
custom_params = self.scan.web_config.get(config_key, {})
|
|
964
|
+
for param_name, param_value in custom_params.items():
|
|
965
|
+
await self.emit_web_parameter(
|
|
966
|
+
host=event.parsed_url.hostname,
|
|
967
|
+
param_type=param_type,
|
|
968
|
+
name=param_name,
|
|
969
|
+
original_value=param_value,
|
|
970
|
+
url=self.url_unparse(param_type, event.parsed_url),
|
|
971
|
+
description=f"HTTP Extracted Parameter [{param_name}] ({description_suffix})",
|
|
972
|
+
additional_params=_exclude_key(custom_params, param_name),
|
|
973
|
+
event=event,
|
|
974
|
+
context=f"Excavate saw a custom {param_type.lower()} set [{param_name}], and emitted a WEB_PARAMETER for it",
|
|
975
|
+
)
|
|
976
|
+
|
|
831
977
|
async def setup(self):
|
|
832
978
|
self.yara_rules_dict = {}
|
|
833
979
|
self.yara_preprocess_dict = {}
|
|
@@ -839,10 +985,8 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
839
985
|
]
|
|
840
986
|
|
|
841
987
|
self.parameter_extraction = bool(modules_WEB_PARAMETER)
|
|
842
|
-
|
|
843
|
-
self.
|
|
844
|
-
if self.config.get("retain_querystring", False) is True:
|
|
845
|
-
self.retain_querystring = True
|
|
988
|
+
self.speculate_params = bool(self.config.get("speculate_params", False))
|
|
989
|
+
self.remove_querystring = self.scan.config.get("url_querystring_remove", True)
|
|
846
990
|
|
|
847
991
|
for module in self.scan.modules.values():
|
|
848
992
|
if not str(module).startswith("_"):
|
|
@@ -863,6 +1007,9 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
863
1007
|
for rule_name, rule_content in excavateRule.yara_rules.items():
|
|
864
1008
|
self.add_yara_rule(rule_name, rule_content, excavateRule)
|
|
865
1009
|
|
|
1010
|
+
self.parameter_blacklist = set(p.lower() for p in self.scan.config.get("parameter_blacklist", []))
|
|
1011
|
+
self.parameter_blacklist_prefixes = set(self.scan.config.get("parameter_blacklist_prefixes", []))
|
|
1012
|
+
|
|
866
1013
|
self.custom_yara_rules = str(self.config.get("custom_yara_rules", ""))
|
|
867
1014
|
if self.custom_yara_rules:
|
|
868
1015
|
custom_rules_count = 0
|
|
@@ -919,10 +1066,9 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
919
1066
|
async def search(self, data, event, content_type, discovery_context="HTTP response"):
|
|
920
1067
|
if not data:
|
|
921
1068
|
return None
|
|
922
|
-
|
|
923
1069
|
decoded_data = await self.helpers.re.recursive_decode(data)
|
|
924
1070
|
|
|
925
|
-
if self.parameter_extraction:
|
|
1071
|
+
if self.parameter_extraction and self.speculate_params:
|
|
926
1072
|
content_type_lower = content_type.lower() if content_type else ""
|
|
927
1073
|
extraction_map = {
|
|
928
1074
|
"json": self.helpers.extract_params_json,
|
|
@@ -934,62 +1080,74 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
934
1080
|
results = extract_func(data)
|
|
935
1081
|
if results:
|
|
936
1082
|
for parameter_name, original_value in results:
|
|
937
|
-
|
|
938
|
-
|
|
1083
|
+
await self.emit_web_parameter(
|
|
1084
|
+
host=str(event.host),
|
|
1085
|
+
param_type="SPECULATIVE",
|
|
1086
|
+
name=parameter_name,
|
|
1087
|
+
original_value=original_value,
|
|
1088
|
+
url=str(event.data["url"]),
|
|
1089
|
+
description=f"HTTP Extracted Parameter (speculative from {source_type} content) [{parameter_name}]",
|
|
1090
|
+
additional_params={},
|
|
1091
|
+
event=event,
|
|
1092
|
+
context=f"excavate's Parameter extractor found a speculative WEB_PARAMETER: {parameter_name} by parsing {source_type} data from {str(event.host)}",
|
|
939
1093
|
)
|
|
940
|
-
data = {
|
|
941
|
-
"host": str(event.host),
|
|
942
|
-
"type": "SPECULATIVE",
|
|
943
|
-
"name": parameter_name,
|
|
944
|
-
"original_value": original_value,
|
|
945
|
-
"url": str(event.data["url"]),
|
|
946
|
-
"additional_params": {},
|
|
947
|
-
"assigned_cookies": self.assigned_cookies,
|
|
948
|
-
"description": description,
|
|
949
|
-
}
|
|
950
|
-
context = f"excavate's Parameter extractor found a speculative WEB_PARAMETER: {parameter_name} by parsing {source_type} data from {str(event.host)}"
|
|
951
|
-
await self.emit_event(data, "WEB_PARAMETER", event, context=context)
|
|
952
1094
|
return
|
|
953
1095
|
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
1096
|
+
# Initialize the list of data items to process
|
|
1097
|
+
data_items = []
|
|
1098
|
+
|
|
1099
|
+
# Check if data and decoded_data are identical
|
|
1100
|
+
if data == decoded_data:
|
|
1101
|
+
data_items.append(("data", data)) # Add only one since both are the same
|
|
1102
|
+
else:
|
|
1103
|
+
data_items.append(("data", data))
|
|
1104
|
+
data_items.append(("decoded_data", decoded_data))
|
|
1105
|
+
|
|
1106
|
+
for label, data_instance in data_items:
|
|
1107
|
+
# Your existing processing code
|
|
1108
|
+
for result in self.yara_rules.match(data=f"{data_instance}"):
|
|
1109
|
+
rule_name = result.rule
|
|
1110
|
+
|
|
1111
|
+
# Skip specific operations for 'parameter_extraction' rule on decoded_data
|
|
1112
|
+
if label == "decoded_data" and rule_name == "parameter_extraction":
|
|
1113
|
+
continue
|
|
1114
|
+
|
|
1115
|
+
# Check if rule processing function exists
|
|
1116
|
+
if rule_name in self.yara_preprocess_dict:
|
|
1117
|
+
await self.yara_preprocess_dict[rule_name](result, event, discovery_context)
|
|
1118
|
+
else:
|
|
1119
|
+
self.hugewarning(f"YARA Rule {rule_name} not found in pre-compiled rules")
|
|
960
1120
|
|
|
961
1121
|
async def handle_event(self, event, **kwargs):
|
|
962
1122
|
if event.type == "HTTP_RESPONSE":
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
self.
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
self.
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
data = event.data
|
|
1123
|
+
if self.parameter_extraction is True:
|
|
1124
|
+
# if parameter extraction is enabled, and we have custom cookies or headers, emit them as WEB_PARAMETER events
|
|
1125
|
+
await self.emit_custom_parameters(event, "http_cookies", "COOKIE", "Custom Cookie")
|
|
1126
|
+
await self.emit_custom_parameters(event, "http_headers", "HEADER", "Custom Header")
|
|
1127
|
+
|
|
1128
|
+
# if parameter extraction is enabled, and querystring removal is disabled, and the event is directly from the TARGET, create a WEB
|
|
1129
|
+
if self.url_querystring_remove is False and str(event.parent.parent.module) == "TARGET":
|
|
1130
|
+
self.debug(f"Processing target URL [{urlunparse(event.parsed_url)}] for GET parameters")
|
|
1131
|
+
for (
|
|
1132
|
+
method,
|
|
1133
|
+
parsed_url,
|
|
1134
|
+
parameter_name,
|
|
1135
|
+
original_value,
|
|
1136
|
+
regex_name,
|
|
1137
|
+
additional_params,
|
|
1138
|
+
) in extract_params_url(event.parsed_url):
|
|
1139
|
+
if self.in_bl(parameter_name) is False:
|
|
1140
|
+
await self.emit_web_parameter(
|
|
1141
|
+
host=parsed_url.hostname,
|
|
1142
|
+
param_type="GETPARAM",
|
|
1143
|
+
name=parameter_name,
|
|
1144
|
+
original_value=original_value,
|
|
1145
|
+
url=self.url_unparse("GETPARAM", parsed_url),
|
|
1146
|
+
description=f"HTTP Extracted Parameter [{parameter_name}] (Target URL)",
|
|
1147
|
+
additional_params=additional_params,
|
|
1148
|
+
event=event,
|
|
1149
|
+
context=f"Excavate parsed a URL directly from the scan target for parameters and found [GETPARAM] Parameter Name: [{parameter_name}] and emitted a WEB_PARAMETER for it",
|
|
1150
|
+
)
|
|
993
1151
|
|
|
994
1152
|
# process response data
|
|
995
1153
|
body = event.data.get("body", "")
|
|
@@ -1003,29 +1161,31 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
1003
1161
|
|
|
1004
1162
|
for header, header_values in headers.items():
|
|
1005
1163
|
for header_value in header_values:
|
|
1164
|
+
# Process 'set-cookie' headers to extract and emit cookies as WEB_PARAMETER events.
|
|
1006
1165
|
if header.lower() == "set-cookie" and self.parameter_extraction:
|
|
1007
1166
|
if "=" not in header_value:
|
|
1008
1167
|
self.debug(f"Cookie found without '=': {header_value}")
|
|
1009
1168
|
continue
|
|
1010
1169
|
else:
|
|
1011
|
-
cookie_name = header_value.
|
|
1012
|
-
cookie_value =
|
|
1170
|
+
cookie_name, _, remainder = header_value.partition("=")
|
|
1171
|
+
cookie_value = remainder.split(";")[0]
|
|
1013
1172
|
|
|
1014
|
-
if self.in_bl(
|
|
1173
|
+
if self.in_bl(cookie_name) is False:
|
|
1015
1174
|
self.assigned_cookies[cookie_name] = cookie_value
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
"
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
"
|
|
1022
|
-
"
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1175
|
+
await self.emit_web_parameter(
|
|
1176
|
+
host=str(event.host),
|
|
1177
|
+
param_type="COOKIE",
|
|
1178
|
+
name=cookie_name,
|
|
1179
|
+
original_value=cookie_value,
|
|
1180
|
+
url=self.url_unparse("COOKIE", event.parsed_url),
|
|
1181
|
+
description=f"Set-Cookie Assigned Cookie [{cookie_name}]",
|
|
1182
|
+
additional_params={},
|
|
1183
|
+
event=event,
|
|
1184
|
+
context=f"Excavate noticed a set-cookie header for cookie [{cookie_name}] and emitted a WEB_PARAMETER for it",
|
|
1185
|
+
)
|
|
1027
1186
|
else:
|
|
1028
1187
|
self.debug(f"blocked cookie parameter [{cookie_name}] due to BL match")
|
|
1188
|
+
# Handle 'location' headers to process and emit redirect URLs as URL_UNVERIFIED events.
|
|
1029
1189
|
if header.lower() == "location":
|
|
1030
1190
|
redirect_location = getattr(event, "redirect_location", "")
|
|
1031
1191
|
if redirect_location:
|
|
@@ -1056,18 +1216,17 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
1056
1216
|
additional_params,
|
|
1057
1217
|
) in extract_params_location(header_value, event.parsed_url):
|
|
1058
1218
|
if self.in_bl(parameter_name) is False:
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
"
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
"
|
|
1065
|
-
"
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
await self.emit_event(data, "WEB_PARAMETER", event, context=context)
|
|
1219
|
+
await self.emit_web_parameter(
|
|
1220
|
+
host=parsed_url.hostname,
|
|
1221
|
+
param_type="GETPARAM",
|
|
1222
|
+
name=parameter_name,
|
|
1223
|
+
original_value=original_value,
|
|
1224
|
+
url=self.url_unparse("GETPARAM", parsed_url),
|
|
1225
|
+
description=f"HTTP Extracted Parameter [{parameter_name}] (Location Header)",
|
|
1226
|
+
additional_params=additional_params,
|
|
1227
|
+
event=event,
|
|
1228
|
+
context=f"Excavate parsed a location header for parameters and found [GETPARAM] Parameter Name: [{parameter_name}] and emitted a WEB_PARAMETER for it",
|
|
1229
|
+
)
|
|
1071
1230
|
else:
|
|
1072
1231
|
self.warning("location header found but missing redirect_location in HTTP_RESPONSE")
|
|
1073
1232
|
if header.lower() == "content-type":
|