bbot 2.4.2__py3-none-any.whl → 2.4.2.6590rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bbot might be problematic. Click here for more details.
- bbot/__init__.py +1 -1
- bbot/core/event/base.py +64 -4
- bbot/core/helpers/diff.py +10 -7
- bbot/core/helpers/helper.py +5 -1
- bbot/core/helpers/misc.py +48 -11
- bbot/core/helpers/regex.py +4 -0
- bbot/core/helpers/regexes.py +45 -8
- bbot/core/helpers/url.py +21 -5
- bbot/core/helpers/web/client.py +25 -5
- bbot/core/helpers/web/engine.py +9 -1
- bbot/core/helpers/web/envelopes.py +352 -0
- bbot/core/helpers/web/web.py +10 -2
- bbot/core/helpers/yara_helper.py +50 -0
- bbot/core/modules.py +23 -7
- bbot/defaults.yml +26 -1
- bbot/modules/base.py +4 -2
- bbot/modules/{deadly/dastardly.py → dastardly.py} +1 -1
- bbot/modules/{deadly/ffuf.py → ffuf.py} +1 -1
- bbot/modules/ffuf_shortnames.py +1 -1
- bbot/modules/httpx.py +14 -0
- bbot/modules/hunt.py +24 -6
- bbot/modules/internal/aggregate.py +1 -0
- bbot/modules/internal/excavate.py +356 -197
- bbot/modules/lightfuzz/lightfuzz.py +203 -0
- bbot/modules/lightfuzz/submodules/__init__.py +0 -0
- bbot/modules/lightfuzz/submodules/base.py +312 -0
- bbot/modules/lightfuzz/submodules/cmdi.py +106 -0
- bbot/modules/lightfuzz/submodules/crypto.py +474 -0
- bbot/modules/lightfuzz/submodules/nosqli.py +183 -0
- bbot/modules/lightfuzz/submodules/path.py +154 -0
- bbot/modules/lightfuzz/submodules/serial.py +179 -0
- bbot/modules/lightfuzz/submodules/sqli.py +187 -0
- bbot/modules/lightfuzz/submodules/ssti.py +39 -0
- bbot/modules/lightfuzz/submodules/xss.py +191 -0
- bbot/modules/{deadly/nuclei.py → nuclei.py} +1 -1
- bbot/modules/paramminer_headers.py +2 -0
- bbot/modules/reflected_parameters.py +80 -0
- bbot/modules/{deadly/vhost.py → vhost.py} +2 -2
- bbot/presets/web/lightfuzz-heavy.yml +16 -0
- bbot/presets/web/lightfuzz-light.yml +20 -0
- bbot/presets/web/lightfuzz-medium.yml +14 -0
- bbot/presets/web/lightfuzz-superheavy.yml +13 -0
- bbot/presets/web/lightfuzz-xss.yml +21 -0
- bbot/presets/web/paramminer.yml +8 -5
- bbot/scanner/preset/args.py +26 -0
- bbot/scanner/scanner.py +6 -0
- bbot/test/test_step_1/test__module__tests.py +1 -1
- bbot/test/test_step_1/test_helpers.py +7 -0
- bbot/test/test_step_1/test_presets.py +2 -2
- bbot/test/test_step_1/test_web.py +20 -0
- bbot/test/test_step_1/test_web_envelopes.py +343 -0
- bbot/test/test_step_2/module_tests/test_module_excavate.py +404 -29
- bbot/test/test_step_2/module_tests/test_module_httpx.py +29 -0
- bbot/test/test_step_2/module_tests/test_module_hunt.py +18 -1
- bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +1947 -0
- bbot/test/test_step_2/module_tests/test_module_paramminer_getparams.py +4 -1
- bbot/test/test_step_2/module_tests/test_module_paramminer_headers.py +46 -2
- bbot/test/test_step_2/module_tests/test_module_reflected_parameters.py +226 -0
- bbot/wordlists/paramminer_parameters.txt +0 -8
- {bbot-2.4.2.dist-info → bbot-2.4.2.6590rc0.dist-info}/METADATA +2 -1
- {bbot-2.4.2.dist-info → bbot-2.4.2.6590rc0.dist-info}/RECORD +64 -42
- {bbot-2.4.2.dist-info → bbot-2.4.2.6590rc0.dist-info}/LICENSE +0 -0
- {bbot-2.4.2.dist-info → bbot-2.4.2.6590rc0.dist-info}/WHEEL +0 -0
- {bbot-2.4.2.dist-info → bbot-2.4.2.6590rc0.dist-info}/entry_points.txt +0 -0
bbot/__init__.py
CHANGED
bbot/core/event/base.py
CHANGED
|
@@ -9,9 +9,9 @@ import datetime
|
|
|
9
9
|
import ipaddress
|
|
10
10
|
import traceback
|
|
11
11
|
|
|
12
|
-
from copy import copy
|
|
13
12
|
from pathlib import Path
|
|
14
13
|
from typing import Optional
|
|
14
|
+
from copy import copy, deepcopy
|
|
15
15
|
from contextlib import suppress
|
|
16
16
|
from radixtarget import RadixTarget
|
|
17
17
|
from pydantic import BaseModel, field_validator
|
|
@@ -40,6 +40,7 @@ from bbot.core.helpers import (
|
|
|
40
40
|
validators,
|
|
41
41
|
get_file_extension,
|
|
42
42
|
)
|
|
43
|
+
from bbot.core.helpers.web.envelopes import BaseEnvelope
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
log = logging.getLogger("bbot.core.event")
|
|
@@ -633,6 +634,10 @@ class BaseEvent:
|
|
|
633
634
|
elif not self._dummy:
|
|
634
635
|
log.warning(f"Tried to set invalid parent on {self}: (got: {repr(parent)} ({type(parent)}))")
|
|
635
636
|
|
|
637
|
+
@property
|
|
638
|
+
def children(self):
|
|
639
|
+
return []
|
|
640
|
+
|
|
636
641
|
@property
|
|
637
642
|
def parent_id(self):
|
|
638
643
|
parent_id = getattr(self.get_parent(), "id", None)
|
|
@@ -687,6 +692,13 @@ class BaseEvent:
|
|
|
687
692
|
e = parent
|
|
688
693
|
return parents
|
|
689
694
|
|
|
695
|
+
def clone(self):
|
|
696
|
+
# Create a shallow copy of the event first
|
|
697
|
+
cloned_event = copy(self)
|
|
698
|
+
# Re-assign a new UUID
|
|
699
|
+
cloned_event._uuid = uuid.uuid4()
|
|
700
|
+
return cloned_event
|
|
701
|
+
|
|
690
702
|
def _host(self):
|
|
691
703
|
return ""
|
|
692
704
|
|
|
@@ -868,7 +880,13 @@ class BaseEvent:
|
|
|
868
880
|
j["discovery_path"] = self.discovery_path
|
|
869
881
|
j["parent_chain"] = self.parent_chain
|
|
870
882
|
|
|
883
|
+
# parameter envelopes
|
|
884
|
+
parameter_envelopes = getattr(self, "envelopes", None)
|
|
885
|
+
if parameter_envelopes is not None:
|
|
886
|
+
j["envelopes"] = parameter_envelopes.to_dict()
|
|
887
|
+
|
|
871
888
|
# normalize non-primitive python objects
|
|
889
|
+
|
|
872
890
|
for k, v in list(j.items()):
|
|
873
891
|
if k == "data":
|
|
874
892
|
continue
|
|
@@ -1368,12 +1386,56 @@ class URL_HINT(URL_UNVERIFIED):
|
|
|
1368
1386
|
|
|
1369
1387
|
|
|
1370
1388
|
class WEB_PARAMETER(DictHostEvent):
|
|
1389
|
+
@property
|
|
1390
|
+
def children(self):
|
|
1391
|
+
# if we have any subparams, raise a new WEB_PARAMETER for each one
|
|
1392
|
+
children = []
|
|
1393
|
+
envelopes = getattr(self, "envelopes", None)
|
|
1394
|
+
if envelopes is not None:
|
|
1395
|
+
subparams = sorted(list(self.envelopes.get_subparams()))
|
|
1396
|
+
|
|
1397
|
+
if envelopes.selected_subparam is None:
|
|
1398
|
+
current_subparam = subparams[0]
|
|
1399
|
+
envelopes.selected_subparam = current_subparam[0]
|
|
1400
|
+
if len(subparams) > 1:
|
|
1401
|
+
for subparam, _ in subparams[1:]:
|
|
1402
|
+
clone = self.clone()
|
|
1403
|
+
clone.envelopes = deepcopy(envelopes)
|
|
1404
|
+
clone.envelopes.selected_subparam = subparam
|
|
1405
|
+
clone.parent = self
|
|
1406
|
+
children.append(clone)
|
|
1407
|
+
return children
|
|
1408
|
+
|
|
1409
|
+
def sanitize_data(self, data):
|
|
1410
|
+
original_value = data.get("original_value", None)
|
|
1411
|
+
if original_value is not None:
|
|
1412
|
+
try:
|
|
1413
|
+
envelopes = BaseEnvelope.detect(original_value)
|
|
1414
|
+
setattr(self, "envelopes", envelopes)
|
|
1415
|
+
except ValueError as e:
|
|
1416
|
+
log.verbose(f"Error detecting envelopes for {self}: {e}")
|
|
1417
|
+
return data
|
|
1418
|
+
|
|
1371
1419
|
def _data_id(self):
|
|
1372
1420
|
# dedupe by url:name:param_type
|
|
1373
1421
|
url = self.data.get("url", "")
|
|
1374
1422
|
name = self.data.get("name", "")
|
|
1375
1423
|
param_type = self.data.get("type", "")
|
|
1376
|
-
|
|
1424
|
+
envelopes = getattr(self, "envelopes", "")
|
|
1425
|
+
subparam = getattr(envelopes, "selected_subparam", "")
|
|
1426
|
+
|
|
1427
|
+
return f"{url}:{name}:{param_type}:{subparam}"
|
|
1428
|
+
|
|
1429
|
+
def _outgoing_dedup_hash(self, event):
|
|
1430
|
+
return hash(
|
|
1431
|
+
(
|
|
1432
|
+
str(event.host),
|
|
1433
|
+
event.data["url"],
|
|
1434
|
+
event.data.get("name", ""),
|
|
1435
|
+
event.data.get("type", ""),
|
|
1436
|
+
event.data.get("envelopes", ""),
|
|
1437
|
+
)
|
|
1438
|
+
)
|
|
1377
1439
|
|
|
1378
1440
|
def _url(self):
|
|
1379
1441
|
return self.data["url"]
|
|
@@ -1810,7 +1872,6 @@ def make_event(
|
|
|
1810
1872
|
data = net.network_address
|
|
1811
1873
|
|
|
1812
1874
|
event_class = globals().get(event_type, DefaultEvent)
|
|
1813
|
-
|
|
1814
1875
|
return event_class(
|
|
1815
1876
|
data,
|
|
1816
1877
|
event_type=event_type,
|
|
@@ -1868,7 +1929,6 @@ def event_from_json(j, siem_friendly=False):
|
|
|
1868
1929
|
|
|
1869
1930
|
resolved_hosts = j.get("resolved_hosts", [])
|
|
1870
1931
|
event._resolved_hosts = set(resolved_hosts)
|
|
1871
|
-
|
|
1872
1932
|
event.timestamp = datetime.datetime.fromisoformat(j["timestamp"])
|
|
1873
1933
|
event.scope_distance = j["scope_distance"]
|
|
1874
1934
|
parent_id = j.get("parent", None)
|
bbot/core/helpers/diff.py
CHANGED
|
@@ -15,22 +15,24 @@ class HttpCompare:
|
|
|
15
15
|
parent_helper,
|
|
16
16
|
method="GET",
|
|
17
17
|
data=None,
|
|
18
|
+
json=None,
|
|
18
19
|
allow_redirects=False,
|
|
19
20
|
include_cache_buster=True,
|
|
20
21
|
headers=None,
|
|
21
22
|
cookies=None,
|
|
22
|
-
timeout=
|
|
23
|
+
timeout=10,
|
|
23
24
|
):
|
|
24
25
|
self.parent_helper = parent_helper
|
|
25
26
|
self.baseline_url = baseline_url
|
|
26
27
|
self.include_cache_buster = include_cache_buster
|
|
27
28
|
self.method = method
|
|
28
29
|
self.data = data
|
|
30
|
+
self.json = json
|
|
29
31
|
self.allow_redirects = allow_redirects
|
|
30
32
|
self._baselined = False
|
|
31
33
|
self.headers = headers
|
|
32
34
|
self.cookies = cookies
|
|
33
|
-
self.timeout =
|
|
35
|
+
self.timeout = 10
|
|
34
36
|
|
|
35
37
|
@staticmethod
|
|
36
38
|
def merge_dictionaries(headers1, headers2):
|
|
@@ -53,12 +55,13 @@ class HttpCompare:
|
|
|
53
55
|
follow_redirects=self.allow_redirects,
|
|
54
56
|
method=self.method,
|
|
55
57
|
data=self.data,
|
|
58
|
+
json=self.json,
|
|
56
59
|
headers=self.headers,
|
|
57
60
|
cookies=self.cookies,
|
|
58
61
|
retries=2,
|
|
59
62
|
timeout=self.timeout,
|
|
60
63
|
)
|
|
61
|
-
await self.parent_helper.sleep(
|
|
64
|
+
await self.parent_helper.sleep(0.5)
|
|
62
65
|
# put random parameters in URL, headers, and cookies
|
|
63
66
|
get_params = {self.parent_helper.rand_string(6): self.parent_helper.rand_string(6)}
|
|
64
67
|
|
|
@@ -76,12 +79,12 @@ class HttpCompare:
|
|
|
76
79
|
follow_redirects=self.allow_redirects,
|
|
77
80
|
method=self.method,
|
|
78
81
|
data=self.data,
|
|
82
|
+
json=self.json,
|
|
79
83
|
retries=2,
|
|
80
84
|
timeout=self.timeout,
|
|
81
85
|
)
|
|
82
86
|
|
|
83
87
|
self.baseline = baseline_1
|
|
84
|
-
|
|
85
88
|
if baseline_1 is None or baseline_2 is None:
|
|
86
89
|
log.debug("HTTP error while establishing baseline, aborting")
|
|
87
90
|
raise HttpCompareError(
|
|
@@ -90,6 +93,7 @@ class HttpCompare:
|
|
|
90
93
|
if baseline_1.status_code != baseline_2.status_code:
|
|
91
94
|
log.debug("Status code not stable during baseline, aborting")
|
|
92
95
|
raise HttpCompareError("Can't get baseline from source URL")
|
|
96
|
+
|
|
93
97
|
try:
|
|
94
98
|
baseline_1_json = xmltodict.parse(baseline_1.text)
|
|
95
99
|
baseline_2_json = xmltodict.parse(baseline_2.text)
|
|
@@ -105,11 +109,9 @@ class HttpCompare:
|
|
|
105
109
|
|
|
106
110
|
for k in ddiff.keys():
|
|
107
111
|
for x in list(ddiff[k]):
|
|
108
|
-
log.debug(f"Added {k} filter for path: {x.path()}")
|
|
109
112
|
self.ddiff_filters.append(x.path())
|
|
110
113
|
|
|
111
114
|
self.baseline_json = baseline_1_json
|
|
112
|
-
|
|
113
115
|
self.baseline_ignore_headers = [
|
|
114
116
|
h.lower()
|
|
115
117
|
for h in [
|
|
@@ -167,7 +169,6 @@ class HttpCompare:
|
|
|
167
169
|
if len(ddiff.keys()) == 0:
|
|
168
170
|
return True
|
|
169
171
|
else:
|
|
170
|
-
log.debug(ddiff)
|
|
171
172
|
return False
|
|
172
173
|
|
|
173
174
|
async def compare(
|
|
@@ -178,6 +179,7 @@ class HttpCompare:
|
|
|
178
179
|
check_reflection=False,
|
|
179
180
|
method="GET",
|
|
180
181
|
data=None,
|
|
182
|
+
json=None,
|
|
181
183
|
allow_redirects=False,
|
|
182
184
|
timeout=None,
|
|
183
185
|
):
|
|
@@ -208,6 +210,7 @@ class HttpCompare:
|
|
|
208
210
|
follow_redirects=allow_redirects,
|
|
209
211
|
method=method,
|
|
210
212
|
data=data,
|
|
213
|
+
json=json,
|
|
211
214
|
timeout=timeout,
|
|
212
215
|
)
|
|
213
216
|
|
bbot/core/helpers/helper.py
CHANGED
|
@@ -12,6 +12,7 @@ from .diff import HttpCompare
|
|
|
12
12
|
from .regex import RegexHelper
|
|
13
13
|
from .wordcloud import WordCloud
|
|
14
14
|
from .interactsh import Interactsh
|
|
15
|
+
from .yara_helper import YaraHelper
|
|
15
16
|
from .depsinstaller import DepsInstaller
|
|
16
17
|
from .async_helpers import get_event_loop
|
|
17
18
|
|
|
@@ -85,6 +86,7 @@ class ConfigAwareHelper:
|
|
|
85
86
|
self._cloud = None
|
|
86
87
|
|
|
87
88
|
self.re = RegexHelper(self)
|
|
89
|
+
self.yara = YaraHelper(self)
|
|
88
90
|
self._dns = None
|
|
89
91
|
self._web = None
|
|
90
92
|
self.config_aware_validators = self.validators.Validators(self)
|
|
@@ -129,7 +131,8 @@ class ConfigAwareHelper:
|
|
|
129
131
|
cookies=None,
|
|
130
132
|
method="GET",
|
|
131
133
|
data=None,
|
|
132
|
-
|
|
134
|
+
json=None,
|
|
135
|
+
timeout=10,
|
|
133
136
|
):
|
|
134
137
|
return HttpCompare(
|
|
135
138
|
url,
|
|
@@ -141,6 +144,7 @@ class ConfigAwareHelper:
|
|
|
141
144
|
timeout=timeout,
|
|
142
145
|
method=method,
|
|
143
146
|
data=data,
|
|
147
|
+
json=json,
|
|
144
148
|
)
|
|
145
149
|
|
|
146
150
|
def temp_filename(self, extension=None):
|
bbot/core/helpers/misc.py
CHANGED
|
@@ -2,6 +2,7 @@ import os
|
|
|
2
2
|
import sys
|
|
3
3
|
import copy
|
|
4
4
|
import json
|
|
5
|
+
import math
|
|
5
6
|
import random
|
|
6
7
|
import string
|
|
7
8
|
import asyncio
|
|
@@ -9,6 +10,7 @@ import logging
|
|
|
9
10
|
import ipaddress
|
|
10
11
|
import regex as re
|
|
11
12
|
import subprocess as sp
|
|
13
|
+
|
|
12
14
|
from pathlib import Path
|
|
13
15
|
from contextlib import suppress
|
|
14
16
|
from unidecode import unidecode # noqa F401
|
|
@@ -797,17 +799,14 @@ def recursive_decode(data, max_depth=5):
|
|
|
797
799
|
return data
|
|
798
800
|
|
|
799
801
|
|
|
800
|
-
|
|
801
|
-
rand_pool_digits = rand_pool + string.digits
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
def rand_string(length=10, digits=True):
|
|
802
|
+
def rand_string(length=10, digits=True, numeric_only=False):
|
|
805
803
|
"""
|
|
806
804
|
Generates a random string of specified length.
|
|
807
805
|
|
|
808
806
|
Args:
|
|
809
807
|
length (int, optional): The length of the random string. Defaults to 10.
|
|
810
808
|
digits (bool, optional): Whether to include digits in the string. Defaults to True.
|
|
809
|
+
numeric_only (bool, optional): Whether to generate a numeric-only string. Defaults to False.
|
|
811
810
|
|
|
812
811
|
Returns:
|
|
813
812
|
str: A random string of the specified length.
|
|
@@ -819,11 +818,17 @@ def rand_string(length=10, digits=True):
|
|
|
819
818
|
'ap4rsdtg5iw7ey7y3oa5'
|
|
820
819
|
>>> rand_string(30, digits=False)
|
|
821
820
|
'xdmyxtglqfzqktngkesyulwbfrihva'
|
|
821
|
+
>>> rand_string(15, numeric_only=True)
|
|
822
|
+
'934857349857395'
|
|
822
823
|
"""
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
824
|
+
if numeric_only:
|
|
825
|
+
pool = string.digits
|
|
826
|
+
elif digits:
|
|
827
|
+
pool = string.ascii_lowercase + string.digits
|
|
828
|
+
else:
|
|
829
|
+
pool = string.ascii_lowercase
|
|
830
|
+
|
|
831
|
+
return "".join(random.choice(pool) for _ in range(length))
|
|
827
832
|
|
|
828
833
|
|
|
829
834
|
def truncate_string(s, n):
|
|
@@ -885,7 +890,7 @@ def extract_params_xml(xml_data, compare_mode="getparam"):
|
|
|
885
890
|
xml_data (str): XML-formatted string containing elements.
|
|
886
891
|
|
|
887
892
|
Returns:
|
|
888
|
-
set: A set of tuples containing the tags and their corresponding text values present in the XML object.
|
|
893
|
+
set: A set of tuples containing the tags and their corresponding sanitized text values present in the XML object.
|
|
889
894
|
|
|
890
895
|
Raises:
|
|
891
896
|
Returns an empty set if ParseError occurs.
|
|
@@ -907,7 +912,10 @@ def extract_params_xml(xml_data, compare_mode="getparam"):
|
|
|
907
912
|
while stack:
|
|
908
913
|
current_element = stack.pop()
|
|
909
914
|
if validate_parameter(current_element.tag, compare_mode):
|
|
910
|
-
|
|
915
|
+
# Sanitize the text value
|
|
916
|
+
text_value = current_element.text.strip() if current_element.text else None
|
|
917
|
+
sanitized_value = quote(text_value, safe="") if text_value else None
|
|
918
|
+
tag_value_pairs.add((current_element.tag, sanitized_value))
|
|
911
919
|
for child in current_element:
|
|
912
920
|
stack.append(child)
|
|
913
921
|
return tag_value_pairs
|
|
@@ -921,6 +929,7 @@ valid_chars_dict = {
|
|
|
921
929
|
"getparam": {chr(c) for c in range(33, 127) if chr(c) not in ":/?#[]@!$&'()*+,;="},
|
|
922
930
|
"postparam": {chr(c) for c in range(33, 127) if chr(c) not in ":/?#[]@!$&'()*+,;="},
|
|
923
931
|
"cookie": {chr(c) for c in range(33, 127) if chr(c) not in '()<>@,;:"/[]?={} \t'},
|
|
932
|
+
"bodyjson": set(chr(c) for c in range(33, 127) if chr(c) not in ":/?#[]@!$&'()*+,;="),
|
|
924
933
|
}
|
|
925
934
|
|
|
926
935
|
|
|
@@ -1874,6 +1883,7 @@ def make_table(rows, header, **kwargs):
|
|
|
1874
1883
|
| row2 | row2 |
|
|
1875
1884
|
+-----------+-----------+
|
|
1876
1885
|
"""
|
|
1886
|
+
|
|
1877
1887
|
from tabulate import tabulate
|
|
1878
1888
|
|
|
1879
1889
|
# fix IndexError: list index out of range
|
|
@@ -2772,6 +2782,21 @@ def clean_dict(d, *key_names, fuzzy=False, exclude_keys=None, _prev_key=None):
|
|
|
2772
2782
|
return d
|
|
2773
2783
|
|
|
2774
2784
|
|
|
2785
|
+
def calculate_entropy(data):
|
|
2786
|
+
"""Calculate the Shannon entropy of a byte sequence"""
|
|
2787
|
+
if not data:
|
|
2788
|
+
return 0
|
|
2789
|
+
frequency = {}
|
|
2790
|
+
for byte in data:
|
|
2791
|
+
if byte in frequency:
|
|
2792
|
+
frequency[byte] += 1
|
|
2793
|
+
else:
|
|
2794
|
+
frequency[byte] = 1
|
|
2795
|
+
data_len = len(data)
|
|
2796
|
+
entropy = -sum((count / data_len) * math.log2(count / data_len) for count in frequency.values())
|
|
2797
|
+
return entropy
|
|
2798
|
+
|
|
2799
|
+
|
|
2775
2800
|
top_ports_cache = None
|
|
2776
2801
|
|
|
2777
2802
|
|
|
@@ -2825,3 +2850,15 @@ def get_python_constraints():
|
|
|
2825
2850
|
|
|
2826
2851
|
dist = distribution("bbot")
|
|
2827
2852
|
return [clean_requirement(r) for r in dist.requires]
|
|
2853
|
+
|
|
2854
|
+
|
|
2855
|
+
def is_printable(s):
|
|
2856
|
+
"""
|
|
2857
|
+
Check if a string is printable
|
|
2858
|
+
"""
|
|
2859
|
+
if not isinstance(s, str):
|
|
2860
|
+
raise ValueError(f"Expected a string, got {type(s)}")
|
|
2861
|
+
|
|
2862
|
+
# Exclude control characters that break display/printing
|
|
2863
|
+
s = set(s)
|
|
2864
|
+
return all(ord(c) >= 32 or c in "\t\n\r" for c in s)
|
bbot/core/helpers/regex.py
CHANGED
|
@@ -31,6 +31,10 @@ class RegexHelper:
|
|
|
31
31
|
self.ensure_compiled_regex(compiled_regex)
|
|
32
32
|
return await self.parent_helper.run_in_executor(compiled_regex.search, *args, **kwargs)
|
|
33
33
|
|
|
34
|
+
async def match(self, compiled_regex, *args, **kwargs):
|
|
35
|
+
self.ensure_compiled_regex(compiled_regex)
|
|
36
|
+
return await self.parent_helper.run_in_executor(compiled_regex.match, *args, **kwargs)
|
|
37
|
+
|
|
34
38
|
async def sub(self, compiled_regex, *args, **kwargs):
|
|
35
39
|
self.ensure_compiled_regex(compiled_regex)
|
|
36
40
|
return await self.parent_helper.run_in_executor(compiled_regex.sub, *args, **kwargs)
|
bbot/core/helpers/regexes.py
CHANGED
|
@@ -114,27 +114,64 @@ scan_name_regex = re.compile(r"[a-z]{3,20}_[a-z]{3,20}")
|
|
|
114
114
|
|
|
115
115
|
# For use with excavate parameters extractor
|
|
116
116
|
input_tag_regex = re.compile(
|
|
117
|
-
r"<input[^>]
|
|
117
|
+
r"<input[^>]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?\svalue=[\"\']?([:%\-\._=+\/\w\s]*)[\"\']?[^>]*?>"
|
|
118
118
|
)
|
|
119
|
-
|
|
120
|
-
|
|
119
|
+
input_tag_regex2 = re.compile(
|
|
120
|
+
r"<input[^>]*?\svalue=[\"\']?([:\-%\._=+\/\w\s]*)[\"\']?[^>]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>"
|
|
121
|
+
)
|
|
122
|
+
input_tag_novalue_regex = re.compile(r"<input(?![^>]*\b\svalue=)[^>]*?\sname=[\"\']?([\-\._=+\/\w]*)[\"\']?[^>]*?>")
|
|
123
|
+
# jquery_get_regex = re.compile(r"url:\s?[\"\'].+?\?(\w+)=")
|
|
124
|
+
# jquery_get_regex = re.compile(r"\$.get\([\'\"].+[\'\"].+\{(.+)\}")
|
|
125
|
+
# jquery_post_regex = re.compile(r"\$.post\([\'\"].+[\'\"].+\{(.+)\}")
|
|
121
126
|
a_tag_regex = re.compile(r"<a[^>]*href=[\"\']([^\"\'?>]*)\?([^&\"\'=]+)=([^&\"\'=]+)")
|
|
122
127
|
img_tag_regex = re.compile(r"<img[^>]*src=[\"\']([^\"\'?>]*)\?([^&\"\'=]+)=([^&\"\'=]+)")
|
|
123
128
|
get_form_regex = re.compile(
|
|
124
|
-
r"<form[^>]
|
|
129
|
+
r"<form[^>]*\bmethod=[\"']?[gG][eE][tT][\"']?[^>]*\baction=[\"']?([^\s\"'<>]+)[\"']?[^>]*>([\s\S]*?)<\/form>",
|
|
130
|
+
re.DOTALL,
|
|
131
|
+
)
|
|
132
|
+
get_form_regex2 = re.compile(
|
|
133
|
+
r"<form[^>]*\baction=[\"']?([^\s\"'<>]+)[\"']?[^>]*\bmethod=[\"']?[gG][eE][tT][\"']?[^>]*>([\s\S]*?)<\/form>",
|
|
125
134
|
re.DOTALL,
|
|
126
135
|
)
|
|
127
136
|
post_form_regex = re.compile(
|
|
128
|
-
r"<form[^>]
|
|
137
|
+
r"<form[^>]*\bmethod=[\"']?[pP][oO][sS][tT][\"']?[^>]*\baction=[\"']?([^\s\"'<>]+)[\"']?[^>]*>([\s\S]*?)<\/form>",
|
|
138
|
+
re.DOTALL,
|
|
139
|
+
)
|
|
140
|
+
post_form_regex2 = re.compile(
|
|
141
|
+
r"<form[^>]*\baction=[\"']?([^\s\"'<>]+)[\"']?[^>]*\bmethod=[\"']?[pP][oO][sS][tT][\"']?[^>]*>([\s\S]*?)<\/form>",
|
|
142
|
+
re.DOTALL,
|
|
143
|
+
)
|
|
144
|
+
post_form_regex_noaction = re.compile(
|
|
145
|
+
r"<form[^>]*(?:\baction=[\"']?([^\s\"'<>]+)[\"']?)?[^>]*\bmethod=[\"']?[pP][oO][sS][tT][\"']?[^>]*>([\s\S]*?)<\/form>",
|
|
129
146
|
re.DOTALL,
|
|
130
147
|
)
|
|
148
|
+
generic_form_regex = re.compile(
|
|
149
|
+
r"<form(?![^>]*\bmethod=)[^>]+(?:\baction=[\"']?([^\s\"'<>]+)[\"']?)[^>]*>([\s\S]*?)<\/form>",
|
|
150
|
+
re.IGNORECASE | re.DOTALL,
|
|
151
|
+
)
|
|
152
|
+
|
|
131
153
|
select_tag_regex = re.compile(
|
|
132
|
-
r"<select[^>]+?name=[\"\']?(
|
|
154
|
+
r"<select[^>]+?name=[\"\']?([_\-\.\w]+)[\"\']?[^>]*>(?:\s*<option[^>]*?value=[\"\']?([_\.\-\w]*)[\"\']?[^>]*>)?",
|
|
155
|
+
re.IGNORECASE | re.DOTALL,
|
|
133
156
|
)
|
|
157
|
+
|
|
134
158
|
textarea_tag_regex = re.compile(
|
|
135
|
-
r
|
|
159
|
+
r"<textarea[^>]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?\svalue=[\"\']?([:%\-\._=+\/\w]*)[\"\']?[^>]*?>"
|
|
160
|
+
)
|
|
161
|
+
textarea_tag_regex2 = re.compile(
|
|
162
|
+
r"<textarea[^>]*?\svalue=[\"\']?([:\-%\._=+\/\w]*)[\"\']?[^>]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>"
|
|
163
|
+
)
|
|
164
|
+
textarea_tag_novalue_regex = re.compile(
|
|
165
|
+
r'<textarea[^>]*\bname=["\']?([_\-\.\w]+)["\']?[^>]*>(.*?)</textarea>', re.IGNORECASE | re.DOTALL
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
button_tag_regex = re.compile(
|
|
169
|
+
r"<button[^>]*?name=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?value=[\"\']?([%\-\._=+\/\w]*)[\"\']?[^>]*?>"
|
|
170
|
+
)
|
|
171
|
+
button_tag_regex2 = re.compile(
|
|
172
|
+
r"<button[^>]*?value=[\"\']?([\-%\._=+\/\w]*)[\"\']?[^>]*?name=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>"
|
|
136
173
|
)
|
|
137
|
-
tag_attribute_regex = re.compile(r"<[^>]*(?:href|action|src)\s*=\s*[\"\']?(?!mailto:)([^\
|
|
174
|
+
tag_attribute_regex = re.compile(r"<[^>]*(?:href|action|src)\s*=\s*[\"\']?(?!mailto:)([^\'\"\>]+)[\"\']?[^>]*>")
|
|
138
175
|
|
|
139
176
|
valid_netloc = r"[^\s!@#$%^&()=/?\\'\";~`<>]+"
|
|
140
177
|
|
bbot/core/helpers/url.py
CHANGED
|
@@ -32,7 +32,10 @@ def parse_url(url):
|
|
|
32
32
|
return urlparse(url)
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def add_get_params(url, params):
|
|
35
|
+
def add_get_params(url, params, encode=True):
|
|
36
|
+
def _no_encode_quote(s, safe="/", encoding=None, errors=None):
|
|
37
|
+
return s
|
|
38
|
+
|
|
36
39
|
"""
|
|
37
40
|
Add or update query parameters to the given URL.
|
|
38
41
|
|
|
@@ -53,10 +56,23 @@ def add_get_params(url, params):
|
|
|
53
56
|
>>> add_get_params('https://www.evilcorp.com?foo=1', {'foo': 2})
|
|
54
57
|
ParseResult(scheme='https', netloc='www.evilcorp.com', path='', params='', query='foo=2', fragment='')
|
|
55
58
|
"""
|
|
56
|
-
parsed =
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
59
|
+
parsed = urlparse(url)
|
|
60
|
+
query_params = parsed.query.split("&")
|
|
61
|
+
|
|
62
|
+
existing_params = {}
|
|
63
|
+
for param in query_params:
|
|
64
|
+
if "=" in param:
|
|
65
|
+
k, v = param.split("=", 1)
|
|
66
|
+
existing_params[k] = v
|
|
67
|
+
|
|
68
|
+
existing_params.update(params)
|
|
69
|
+
|
|
70
|
+
if encode:
|
|
71
|
+
new_query = urlencode(existing_params, doseq=True)
|
|
72
|
+
else:
|
|
73
|
+
new_query = urlencode(existing_params, doseq=True, quote_via=_no_encode_quote)
|
|
74
|
+
|
|
75
|
+
return parsed._replace(query=new_query)
|
|
60
76
|
|
|
61
77
|
|
|
62
78
|
def get_get_params(url):
|
bbot/core/helpers/web/client.py
CHANGED
|
@@ -52,7 +52,7 @@ class BBOTAsyncClient(httpx.AsyncClient):
|
|
|
52
52
|
if http_debug:
|
|
53
53
|
log.trace(f"Creating AsyncClient: {args}, {kwargs}")
|
|
54
54
|
|
|
55
|
-
self._persist_cookies = kwargs.pop("persist_cookies",
|
|
55
|
+
self._persist_cookies = kwargs.pop("persist_cookies", False)
|
|
56
56
|
|
|
57
57
|
# timeout
|
|
58
58
|
http_timeout = self._web_config.get("http_timeout", 20)
|
|
@@ -63,11 +63,18 @@ class BBOTAsyncClient(httpx.AsyncClient):
|
|
|
63
63
|
headers = kwargs.get("headers", None)
|
|
64
64
|
if headers is None:
|
|
65
65
|
headers = {}
|
|
66
|
+
|
|
67
|
+
# cookies
|
|
68
|
+
cookies = kwargs.get("cookies", None)
|
|
69
|
+
if cookies is None:
|
|
70
|
+
cookies = {}
|
|
71
|
+
|
|
66
72
|
# user agent
|
|
67
73
|
user_agent = self._web_config.get("user_agent", "BBOT")
|
|
68
74
|
if "User-Agent" not in headers:
|
|
69
75
|
headers["User-Agent"] = user_agent
|
|
70
76
|
kwargs["headers"] = headers
|
|
77
|
+
kwargs["cookies"] = cookies
|
|
71
78
|
# proxy
|
|
72
79
|
proxies = self._web_config.get("http_proxy", None)
|
|
73
80
|
kwargs["proxy"] = proxies
|
|
@@ -78,10 +85,23 @@ class BBOTAsyncClient(httpx.AsyncClient):
|
|
|
78
85
|
self._cookies = DummyCookies()
|
|
79
86
|
|
|
80
87
|
def build_request(self, *args, **kwargs):
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
88
|
+
if args:
|
|
89
|
+
url = args[0]
|
|
90
|
+
kwargs["url"] = url
|
|
91
|
+
url = kwargs["url"]
|
|
92
|
+
|
|
93
|
+
target_in_scope = self._target.in_scope(str(url))
|
|
94
|
+
|
|
95
|
+
if target_in_scope:
|
|
96
|
+
if not kwargs.get("cookies", None):
|
|
97
|
+
kwargs["cookies"] = {}
|
|
98
|
+
for ck, cv in self._web_config.get("http_cookies", {}).items():
|
|
99
|
+
if ck not in kwargs["cookies"]:
|
|
100
|
+
kwargs["cookies"][ck] = cv
|
|
101
|
+
|
|
102
|
+
request = super().build_request(**kwargs)
|
|
103
|
+
|
|
104
|
+
if target_in_scope:
|
|
85
105
|
for hk, hv in self._web_config.get("http_headers", {}).items():
|
|
86
106
|
hv = str(hv)
|
|
87
107
|
# don't clobber headers
|
bbot/core/helpers/web/engine.py
CHANGED
|
@@ -8,7 +8,7 @@ from socksio.exceptions import SOCKSError
|
|
|
8
8
|
from contextlib import asynccontextmanager
|
|
9
9
|
|
|
10
10
|
from bbot.core.engine import EngineServer
|
|
11
|
-
from bbot.core.helpers.misc import bytes_to_human, human_to_bytes, get_exception_chain
|
|
11
|
+
from bbot.core.helpers.misc import bytes_to_human, human_to_bytes, get_exception_chain, truncate_string
|
|
12
12
|
|
|
13
13
|
log = logging.getLogger("bbot.core.helpers.web.engine")
|
|
14
14
|
|
|
@@ -203,6 +203,14 @@ class HTTPEngine(EngineServer):
|
|
|
203
203
|
else:
|
|
204
204
|
log.trace(f"Error with request to URL: {url}: {e}")
|
|
205
205
|
log.trace(traceback.format_exc())
|
|
206
|
+
except httpx.InvalidURL as e:
|
|
207
|
+
if raise_error:
|
|
208
|
+
raise
|
|
209
|
+
else:
|
|
210
|
+
log.warning(
|
|
211
|
+
f"Invalid URL (possibly due to dangerous redirect) on request to : {url}: {truncate_string(e, 200)}"
|
|
212
|
+
)
|
|
213
|
+
log.trace(traceback.format_exc())
|
|
206
214
|
except ssl.SSLError as e:
|
|
207
215
|
msg = f"SSL error with request to URL: {url}: {e}"
|
|
208
216
|
if raise_error:
|