bbot 2.4.0.6045rc0__py3-none-any.whl → 2.4.0.6067rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bbot might be problematic. Click here for more details.
- bbot/__init__.py +1 -1
- bbot/cli.py +1 -1
- bbot/core/event/base.py +60 -20
- bbot/core/event/helpers.py +222 -36
- bbot/core/helpers/helper.py +1 -1
- bbot/core/helpers/regexes.py +1 -1
- bbot/core/helpers/web/web.py +1 -1
- bbot/modules/deadly/nuclei.py +3 -1
- bbot/modules/internal/excavate.py +1 -1
- bbot/modules/output/asset_inventory.py +1 -1
- bbot/modules/portscan.py +2 -13
- bbot/scanner/manager.py +30 -16
- bbot/scanner/preset/preset.py +1 -5
- bbot/scanner/scanner.py +7 -2
- bbot/scanner/target.py +90 -148
- bbot/test/test_step_1/test_cli.py +1 -1
- bbot/test/test_step_1/test_event_seeds.py +160 -0
- bbot/test/test_step_1/test_presets.py +3 -3
- bbot/test/test_step_1/test_python_api.py +1 -1
- bbot/test/test_step_1/test_regexes.py +20 -13
- bbot/test/test_step_1/test_scan.py +1 -1
- bbot/test/test_step_1/test_target.py +12 -14
- bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py +2 -1
- {bbot-2.4.0.6045rc0.dist-info → bbot-2.4.0.6067rc0.dist-info}/METADATA +1 -1
- {bbot-2.4.0.6045rc0.dist-info → bbot-2.4.0.6067rc0.dist-info}/RECORD +28 -27
- {bbot-2.4.0.6045rc0.dist-info → bbot-2.4.0.6067rc0.dist-info}/LICENSE +0 -0
- {bbot-2.4.0.6045rc0.dist-info → bbot-2.4.0.6067rc0.dist-info}/WHEEL +0 -0
- {bbot-2.4.0.6045rc0.dist-info → bbot-2.4.0.6067rc0.dist-info}/entry_points.txt +0 -0
bbot/__init__.py
CHANGED
bbot/cli.py
CHANGED
|
@@ -199,7 +199,7 @@ async def _main():
|
|
|
199
199
|
if sys.stdin.isatty():
|
|
200
200
|
# warn if any targets belong directly to a cloud provider
|
|
201
201
|
if not scan.preset.strict_scope:
|
|
202
|
-
for event in scan.target.seeds.
|
|
202
|
+
for event in scan.target.seeds.event_seeds:
|
|
203
203
|
if event.type == "DNS_NAME":
|
|
204
204
|
cloudcheck_result = scan.helpers.cloudcheck(event.host)
|
|
205
205
|
if cloudcheck_result:
|
bbot/core/event/base.py
CHANGED
|
@@ -18,8 +18,8 @@ from pydantic import BaseModel, field_validator
|
|
|
18
18
|
from urllib.parse import urlparse, urljoin, parse_qs
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
from .helpers import *
|
|
22
21
|
from bbot.errors import *
|
|
22
|
+
from .helpers import EventSeed
|
|
23
23
|
from bbot.core.helpers import (
|
|
24
24
|
extract_words,
|
|
25
25
|
is_domain,
|
|
@@ -109,18 +109,66 @@ class BaseEvent:
|
|
|
109
109
|
# Bypass scope checking and dns resolution, distribute immediately to modules
|
|
110
110
|
# This is useful for "end-of-line" events like FINDING and VULNERABILITY
|
|
111
111
|
_quick_emit = False
|
|
112
|
-
# Whether this event has been retroactively marked as part of an important discovery chain
|
|
113
|
-
_graph_important = False
|
|
114
|
-
# Disables certain data validations
|
|
115
|
-
_dummy = False
|
|
116
112
|
# Data validation, if data is a dictionary
|
|
117
113
|
_data_validator = None
|
|
118
114
|
# Whether to increment scope distance if the child and parent hosts are the same
|
|
115
|
+
# Normally we don't want this, since scope distance only increases if the host changes
|
|
116
|
+
# But for some events like SOCIAL media profiles, this is required to prevent spidering all of facebook.com
|
|
119
117
|
_scope_distance_increment_same_host = False
|
|
120
118
|
# Don't allow duplicates to occur within a parent chain
|
|
121
119
|
# In other words, don't emit the event if the same one already exists in its discovery context
|
|
122
120
|
_suppress_chain_dupes = False
|
|
123
121
|
|
|
122
|
+
# using __slots__ dramatically reduces memory usage in large scans
|
|
123
|
+
__slots__ = [
|
|
124
|
+
# Core identification attributes
|
|
125
|
+
"_uuid",
|
|
126
|
+
"_id",
|
|
127
|
+
"_hash",
|
|
128
|
+
"_data",
|
|
129
|
+
"_data_hash",
|
|
130
|
+
# Host-related attributes
|
|
131
|
+
"__host",
|
|
132
|
+
"_host_original",
|
|
133
|
+
"_port",
|
|
134
|
+
# Parent-related attributes
|
|
135
|
+
"_parent",
|
|
136
|
+
"_parent_id",
|
|
137
|
+
"_parent_uuid",
|
|
138
|
+
# Event metadata
|
|
139
|
+
"_type",
|
|
140
|
+
"_tags",
|
|
141
|
+
"_omit",
|
|
142
|
+
"__words",
|
|
143
|
+
"_priority",
|
|
144
|
+
"_scope_distance",
|
|
145
|
+
"_module_priority",
|
|
146
|
+
"_graph_important",
|
|
147
|
+
"_resolved_hosts",
|
|
148
|
+
"_discovery_context",
|
|
149
|
+
"_discovery_context_regex",
|
|
150
|
+
"_stats_recorded",
|
|
151
|
+
"_internal",
|
|
152
|
+
"_confidence",
|
|
153
|
+
"_dummy",
|
|
154
|
+
"_module",
|
|
155
|
+
# DNS-related attributes
|
|
156
|
+
"dns_children",
|
|
157
|
+
"raw_dns_records",
|
|
158
|
+
"dns_resolve_distance",
|
|
159
|
+
# Web-related attributes
|
|
160
|
+
"web_spider_distance",
|
|
161
|
+
"parsed_url",
|
|
162
|
+
"url_extension",
|
|
163
|
+
"num_redirects",
|
|
164
|
+
# File-related attributes
|
|
165
|
+
"_data_path",
|
|
166
|
+
# Public attributes
|
|
167
|
+
"module",
|
|
168
|
+
"scan",
|
|
169
|
+
"timestamp",
|
|
170
|
+
]
|
|
171
|
+
|
|
124
172
|
def __init__(
|
|
125
173
|
self,
|
|
126
174
|
data,
|
|
@@ -129,7 +177,6 @@ class BaseEvent:
|
|
|
129
177
|
context=None,
|
|
130
178
|
module=None,
|
|
131
179
|
scan=None,
|
|
132
|
-
scans=None,
|
|
133
180
|
tags=None,
|
|
134
181
|
confidence=100,
|
|
135
182
|
timestamp=None,
|
|
@@ -148,7 +195,6 @@ class BaseEvent:
|
|
|
148
195
|
parent (BaseEvent, optional): Parent event that led to this event's discovery. Defaults to None.
|
|
149
196
|
module (str, optional): Module that discovered the event. Defaults to None.
|
|
150
197
|
scan (Scan, optional): BBOT Scan object. Required unless _dummy is True. Defaults to None.
|
|
151
|
-
scans (list of Scan, optional): BBOT Scan objects, used primarily when unserializing an Event from the database. Defaults to None.
|
|
152
198
|
tags (list of str, optional): Descriptive tags for the event. Defaults to None.
|
|
153
199
|
confidence (int, optional): Confidence level for the event, on a scale of 1-100. Defaults to 100.
|
|
154
200
|
timestamp (datetime, optional): Time of event discovery. Defaults to current UTC time.
|
|
@@ -174,6 +220,7 @@ class BaseEvent:
|
|
|
174
220
|
self._host_original = None
|
|
175
221
|
self._scope_distance = None
|
|
176
222
|
self._module_priority = None
|
|
223
|
+
self._graph_important = False
|
|
177
224
|
self._resolved_hosts = set()
|
|
178
225
|
self.dns_children = {}
|
|
179
226
|
self.raw_dns_records = {}
|
|
@@ -204,12 +251,6 @@ class BaseEvent:
|
|
|
204
251
|
self.scan = scan
|
|
205
252
|
if (not self.scan) and (not self._dummy):
|
|
206
253
|
raise ValidationError("Must specify scan")
|
|
207
|
-
# self.scans holds a list of scan IDs from scans that encountered this event
|
|
208
|
-
self.scans = []
|
|
209
|
-
if scans is not None:
|
|
210
|
-
self.scans = scans
|
|
211
|
-
if self.scan:
|
|
212
|
-
self.scans = list(set([self.scan.id] + self.scans))
|
|
213
254
|
|
|
214
255
|
try:
|
|
215
256
|
self.data = self._sanitize_data(data)
|
|
@@ -1348,7 +1389,7 @@ class EMAIL_ADDRESS(BaseEvent):
|
|
|
1348
1389
|
return validators.validate_email(data)
|
|
1349
1390
|
|
|
1350
1391
|
def _host(self):
|
|
1351
|
-
data = str(self.data).
|
|
1392
|
+
data = str(self.data).rsplit("@", 1)[-1]
|
|
1352
1393
|
host, self._port = split_host_port(data)
|
|
1353
1394
|
return host
|
|
1354
1395
|
|
|
@@ -1652,7 +1693,6 @@ def make_event(
|
|
|
1652
1693
|
context=None,
|
|
1653
1694
|
module=None,
|
|
1654
1695
|
scan=None,
|
|
1655
|
-
scans=None,
|
|
1656
1696
|
tags=None,
|
|
1657
1697
|
confidence=100,
|
|
1658
1698
|
dummy=False,
|
|
@@ -1712,12 +1752,11 @@ def make_event(
|
|
|
1712
1752
|
tags = [tags]
|
|
1713
1753
|
tags = set(tags)
|
|
1714
1754
|
|
|
1755
|
+
# if data is already an event, update it with the user's kwargs
|
|
1715
1756
|
if is_event(data):
|
|
1716
1757
|
event = copy(data)
|
|
1717
1758
|
if scan is not None and not event.scan:
|
|
1718
1759
|
event.scan = scan
|
|
1719
|
-
if scans is not None and not event.scans:
|
|
1720
|
-
event.scans = scans
|
|
1721
1760
|
if module is not None:
|
|
1722
1761
|
event.module = module
|
|
1723
1762
|
if parent is not None:
|
|
@@ -1731,8 +1770,11 @@ def make_event(
|
|
|
1731
1770
|
event_type = data.type
|
|
1732
1771
|
return event
|
|
1733
1772
|
else:
|
|
1773
|
+
# if event_type is not provided, autodetect it
|
|
1734
1774
|
if event_type is None:
|
|
1735
|
-
|
|
1775
|
+
event_seed = EventSeed(data)
|
|
1776
|
+
event_type = event_seed.type
|
|
1777
|
+
data = event_seed.data
|
|
1736
1778
|
if not dummy:
|
|
1737
1779
|
log.debug(f'Autodetected event type "{event_type}" based on data: "{data}"')
|
|
1738
1780
|
|
|
@@ -1776,7 +1818,6 @@ def make_event(
|
|
|
1776
1818
|
context=context,
|
|
1777
1819
|
module=module,
|
|
1778
1820
|
scan=scan,
|
|
1779
|
-
scans=scans,
|
|
1780
1821
|
tags=tags,
|
|
1781
1822
|
confidence=confidence,
|
|
1782
1823
|
_dummy=dummy,
|
|
@@ -1810,7 +1851,6 @@ def event_from_json(j, siem_friendly=False):
|
|
|
1810
1851
|
event_type = j["type"]
|
|
1811
1852
|
kwargs = {
|
|
1812
1853
|
"event_type": event_type,
|
|
1813
|
-
"scans": j.get("scans", []),
|
|
1814
1854
|
"tags": j.get("tags", []),
|
|
1815
1855
|
"confidence": j.get("confidence", 100),
|
|
1816
1856
|
"context": j.get("discovery_context", None),
|
bbot/core/event/helpers.py
CHANGED
|
@@ -1,52 +1,238 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
import ipaddress
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
import regex as re
|
|
3
|
+
from functools import cached_property
|
|
5
4
|
from bbot.errors import ValidationError
|
|
6
|
-
from bbot.core.helpers
|
|
7
|
-
from bbot.core.helpers import
|
|
5
|
+
from bbot.core.helpers import validators
|
|
6
|
+
from bbot.core.helpers.misc import split_host_port, make_ip_type
|
|
7
|
+
from bbot.core.helpers import regexes, smart_decode, smart_encode_punycode
|
|
8
|
+
|
|
9
|
+
bbot_event_seeds = {}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
An "Event Seed" is a lightweight event containing only the minimum logic required to:
|
|
14
|
+
- parse input to determine the event type + data
|
|
15
|
+
- validate+sanitize the data
|
|
16
|
+
- extract the host for scope purposes
|
|
8
17
|
|
|
18
|
+
It's useful for quickly parsing target lists without the cpu+memory overhead of creating full-fledged BBOT events
|
|
9
19
|
|
|
10
|
-
|
|
20
|
+
Not every type of BBOT event needs to be represented here. Only ones that are meant to be targets.
|
|
21
|
+
"""
|
|
11
22
|
|
|
12
23
|
|
|
13
|
-
|
|
24
|
+
class EventSeedRegistry(type):
|
|
25
|
+
"""
|
|
26
|
+
Metaclass for EventSeed that registers all subclasses in a registry.
|
|
14
27
|
"""
|
|
15
|
-
Determines the type of event based on the given data.
|
|
16
28
|
|
|
17
|
-
|
|
18
|
-
|
|
29
|
+
def __new__(mcs, name, bases, attrs):
|
|
30
|
+
global bbot_event_seeds
|
|
31
|
+
cls = super().__new__(mcs, name, bases, attrs)
|
|
32
|
+
# Don't register the base EventSeed class
|
|
33
|
+
if name != "BaseEventSeed":
|
|
34
|
+
bbot_event_seeds[cls.__name__] = cls
|
|
35
|
+
return cls
|
|
19
36
|
|
|
20
|
-
Returns:
|
|
21
|
-
str: The type of event such as "IP_ADDRESS", "IP_RANGE", or "URL_UNVERIFIED".
|
|
22
37
|
|
|
23
|
-
|
|
24
|
-
|
|
38
|
+
def EventSeed(input):
|
|
39
|
+
input = smart_encode_punycode(smart_decode(input).strip())
|
|
40
|
+
for _, event_class in bbot_event_seeds.items():
|
|
41
|
+
if hasattr(event_class, "precheck"):
|
|
42
|
+
if event_class.precheck(input):
|
|
43
|
+
return event_class(input)
|
|
44
|
+
else:
|
|
45
|
+
for regex in event_class.regexes:
|
|
46
|
+
match = regex.match(input)
|
|
47
|
+
if match:
|
|
48
|
+
data = event_class.handle_match(match)
|
|
49
|
+
return event_class(data)
|
|
50
|
+
raise ValidationError(f'Unable to autodetect data type from "{input}"')
|
|
25
51
|
|
|
26
|
-
Notes:
|
|
27
|
-
- Utilizes `smart_decode_punycode` and `smart_decode` to preprocess the data.
|
|
28
|
-
- Makes use of `ipaddress` standard library to check for IP and network types.
|
|
29
|
-
- Checks against a set of predefined regular expressions stored in `event_type_regexes`.
|
|
30
|
-
"""
|
|
31
52
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
53
|
+
class BaseEventSeed(metaclass=EventSeedRegistry):
|
|
54
|
+
regexes = []
|
|
55
|
+
_target_type = "TARGET"
|
|
56
|
+
|
|
57
|
+
__slots__ = ["data", "host", "port", "input"]
|
|
58
|
+
|
|
59
|
+
def __init__(self, data):
|
|
60
|
+
self.data, self.host, self.port = self._sanitize_and_extract_host(data)
|
|
61
|
+
self.input = self._override_input(data)
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
def handle_match(match):
|
|
65
|
+
"""
|
|
66
|
+
Given a regex match, returns the event data
|
|
67
|
+
"""
|
|
68
|
+
return match.group(0)
|
|
69
|
+
|
|
70
|
+
def _sanitize_and_extract_host(self, data):
|
|
71
|
+
"""
|
|
72
|
+
Given the event data, returns the host
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
tuple: (data, host, port)
|
|
76
|
+
"""
|
|
77
|
+
return data, None, None
|
|
78
|
+
|
|
79
|
+
def _override_input(self, input):
|
|
80
|
+
return self.data
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def type(self):
|
|
84
|
+
return self.__class__.__name__
|
|
85
|
+
|
|
86
|
+
@cached_property
|
|
87
|
+
def _hash(self):
|
|
88
|
+
return hash(self.input)
|
|
89
|
+
|
|
90
|
+
def __hash__(self):
|
|
91
|
+
return self._hash
|
|
92
|
+
|
|
93
|
+
def __eq__(self, other):
|
|
94
|
+
return hash(self) == hash(other)
|
|
95
|
+
|
|
96
|
+
def __str__(self):
|
|
97
|
+
return f"EventSeed({self.input})"
|
|
98
|
+
|
|
99
|
+
def __repr__(self):
|
|
100
|
+
return str(self)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class IP_ADDRESS(BaseEventSeed):
|
|
104
|
+
regexes = regexes.event_type_regexes["IP_ADDRESS"]
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def precheck(data):
|
|
108
|
+
try:
|
|
109
|
+
return ipaddress.ip_address(data)
|
|
110
|
+
except ValueError:
|
|
111
|
+
return False
|
|
112
|
+
|
|
113
|
+
@staticmethod
|
|
114
|
+
def _sanitize_and_extract_host(data):
|
|
115
|
+
validated = ipaddress.ip_address(data)
|
|
116
|
+
return str(validated), validated, None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class DNS_NAME(BaseEventSeed):
|
|
120
|
+
regexes = regexes.event_type_regexes["DNS_NAME"]
|
|
121
|
+
|
|
122
|
+
@staticmethod
|
|
123
|
+
def _sanitize_and_extract_host(data):
|
|
124
|
+
validated = validators.validate_host(data)
|
|
125
|
+
return validated, validated, None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class IP_RANGE(BaseEventSeed):
|
|
129
|
+
regexes = regexes.event_type_regexes["IP_RANGE"]
|
|
130
|
+
|
|
131
|
+
@staticmethod
|
|
132
|
+
def precheck(data):
|
|
133
|
+
try:
|
|
134
|
+
return ipaddress.ip_network(str(data), strict=False)
|
|
135
|
+
except ValueError:
|
|
136
|
+
return False
|
|
137
|
+
|
|
138
|
+
@staticmethod
|
|
139
|
+
def _sanitize_and_extract_host(data):
|
|
140
|
+
validated = ipaddress.ip_network(str(data), strict=False)
|
|
141
|
+
return str(validated), validated, None
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class OPEN_TCP_PORT(BaseEventSeed):
|
|
145
|
+
regexes = regexes.event_type_regexes["OPEN_TCP_PORT"]
|
|
146
|
+
|
|
147
|
+
@staticmethod
|
|
148
|
+
def _sanitize_and_extract_host(data):
|
|
149
|
+
validated = validators.validate_open_port(data)
|
|
150
|
+
host, port = split_host_port(validated)
|
|
151
|
+
host = make_ip_type(host)
|
|
152
|
+
return str(validated), host, port
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class URL_UNVERIFIED(BaseEventSeed):
|
|
156
|
+
regexes = regexes.event_type_regexes["URL"]
|
|
157
|
+
|
|
158
|
+
_scheme_to_port = {
|
|
159
|
+
"https": 443,
|
|
160
|
+
"http": 80,
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
@staticmethod
|
|
164
|
+
def _sanitize_and_extract_host(data):
|
|
165
|
+
parsed_url = validators.clean_url(data, url_querystring_remove=False)
|
|
166
|
+
scheme = parsed_url.scheme
|
|
167
|
+
host = make_ip_type(validators.validate_host(parsed_url.hostname))
|
|
168
|
+
port = parsed_url.port
|
|
169
|
+
if port is None:
|
|
170
|
+
port = URL_UNVERIFIED._scheme_to_port.get(scheme, None)
|
|
171
|
+
return parsed_url.geturl(), host, port
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class EMAIL_ADDRESS(BaseEventSeed):
|
|
175
|
+
regexes = regexes.event_type_regexes["EMAIL_ADDRESS"]
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def _sanitize_and_extract_host(data):
|
|
179
|
+
validated = validators.validate_email(data)
|
|
180
|
+
host = validated.rsplit("@", 1)[-1]
|
|
181
|
+
host, port = split_host_port(host)
|
|
182
|
+
return validated, host, port
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class ORG_STUB(BaseEventSeed):
|
|
186
|
+
regexes = (re.compile(r"^(?:ORG|ORG_STUB):(.*)"),)
|
|
187
|
+
|
|
188
|
+
def _override_input(self, input):
|
|
189
|
+
return f"ORG_STUB:{self.data}"
|
|
190
|
+
|
|
191
|
+
@staticmethod
|
|
192
|
+
def handle_match(match):
|
|
193
|
+
return match.group(1)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class USERNAME(BaseEventSeed):
|
|
197
|
+
regexes = (re.compile(r"^(?:USER|USERNAME):(.*)"),)
|
|
198
|
+
|
|
199
|
+
def _override_input(self, input):
|
|
200
|
+
return f"USERNAME:{self.data}"
|
|
201
|
+
|
|
202
|
+
@staticmethod
|
|
203
|
+
def handle_match(match):
|
|
204
|
+
return match.group(1)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class FILESYSTEM(BaseEventSeed):
|
|
208
|
+
regexes = (re.compile(r"^(?:FILESYSTEM|FILE|FOLDER|DIR|PATH):(.*)"),)
|
|
209
|
+
|
|
210
|
+
def _override_input(self, input):
|
|
211
|
+
return f"FILESYSTEM:{self.data['path']}"
|
|
212
|
+
|
|
213
|
+
@staticmethod
|
|
214
|
+
def handle_match(match):
|
|
215
|
+
return {"path": match.group(1)}
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class MOBILE_APP(BaseEventSeed):
|
|
219
|
+
regexes = (re.compile(r"^(?:MOBILE_APP|APK|IPA|APP):(.*)"),)
|
|
220
|
+
|
|
221
|
+
def _override_input(self, input):
|
|
222
|
+
return f"MOBILE_APP:{self.data['url']}"
|
|
223
|
+
|
|
224
|
+
@staticmethod
|
|
225
|
+
def handle_match(match):
|
|
226
|
+
return {"url": match.group(1)}
|
|
36
227
|
|
|
37
|
-
# IP network
|
|
38
|
-
with suppress(Exception):
|
|
39
|
-
ipaddress.ip_network(data, strict=False)
|
|
40
|
-
return "IP_RANGE", data
|
|
41
228
|
|
|
42
|
-
|
|
229
|
+
class BLACKLIST_REGEX(BaseEventSeed):
|
|
230
|
+
regexes = (re.compile(r"^(?:RE|REGEX):(.*)"),)
|
|
231
|
+
_target_type = "BLACKLIST"
|
|
43
232
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
for r in regexes:
|
|
47
|
-
if r.match(data):
|
|
48
|
-
if t == "URL":
|
|
49
|
-
return "URL_UNVERIFIED", data
|
|
50
|
-
return t, data
|
|
233
|
+
def _override_input(self, input):
|
|
234
|
+
return f"REGEX:{self.data}"
|
|
51
235
|
|
|
52
|
-
|
|
236
|
+
@staticmethod
|
|
237
|
+
def handle_match(match):
|
|
238
|
+
return match.group(1)
|
bbot/core/helpers/helper.py
CHANGED
|
@@ -159,7 +159,7 @@ class ConfigAwareHelper:
|
|
|
159
159
|
self.clean_old(self.scans_dir, keep=self.keep_old_scans, filter=_filter)
|
|
160
160
|
|
|
161
161
|
def make_target(self, *targets, **kwargs):
|
|
162
|
-
return BaseTarget(*targets,
|
|
162
|
+
return BaseTarget(*targets, **kwargs)
|
|
163
163
|
|
|
164
164
|
@property
|
|
165
165
|
def config(self):
|
bbot/core/helpers/regexes.py
CHANGED
|
@@ -46,7 +46,7 @@ _dns_name_regex_with_period = r"(?:\w(?:[\w-]{0,100}\w)?\.)+(?:[xX][nN]--)?[^\W_
|
|
|
46
46
|
dns_name_extraction_regex = re.compile(_dns_name_regex_with_period, re.I)
|
|
47
47
|
dns_name_validation_regex = re.compile(r"^" + _dns_name_regex + r"$", re.I)
|
|
48
48
|
|
|
49
|
-
_email_regex = r"(?:[^\W_][\w\-\.\+']{,100})@" + _dns_name_regex
|
|
49
|
+
_email_regex = r"(?:[^\W_][\w\-\.\+']{,100})@" + _dns_name_regex + r"(?::[0-9]{1,5})?"
|
|
50
50
|
email_regex = re.compile(_email_regex, re.I)
|
|
51
51
|
|
|
52
52
|
_ptr_regex = r"(?:[0-9]{1,3}[-_\.]){3}[0-9]{1,3}"
|
bbot/core/helpers/web/web.py
CHANGED
|
@@ -57,7 +57,7 @@ class WebHelper(EngineClient):
|
|
|
57
57
|
self.ssl_verify = self.config.get("ssl_verify", False)
|
|
58
58
|
engine_debug = self.config.get("engine", {}).get("debug", False)
|
|
59
59
|
super().__init__(
|
|
60
|
-
server_kwargs={"config": self.config, "target": self.parent_helper.preset.target
|
|
60
|
+
server_kwargs={"config": self.config, "target": self.parent_helper.preset.target},
|
|
61
61
|
debug=engine_debug,
|
|
62
62
|
)
|
|
63
63
|
|
bbot/modules/deadly/nuclei.py
CHANGED
|
@@ -139,7 +139,9 @@ class nuclei(BaseModule):
|
|
|
139
139
|
return True
|
|
140
140
|
|
|
141
141
|
async def handle_batch(self, *events):
|
|
142
|
-
temp_target = self.helpers.make_target(
|
|
142
|
+
temp_target = self.helpers.make_target()
|
|
143
|
+
for e in events:
|
|
144
|
+
temp_target.add(e.data, e)
|
|
143
145
|
nuclei_input = [str(e.data) for e in events]
|
|
144
146
|
async for severity, template, tags, host, url, name, extracted_results in self.execute_nuclei(nuclei_input):
|
|
145
147
|
# this is necessary because sometimes nuclei is inconsistent about the data returned in the host field
|
|
@@ -776,7 +776,7 @@ class excavate(BaseInternalModule, BaseInterceptModule):
|
|
|
776
776
|
event_draft = self.excavate.make_event(event_data, event_type, parent=event)
|
|
777
777
|
if not event_draft:
|
|
778
778
|
return None
|
|
779
|
-
url_in_scope = self.excavate.scan.in_scope(event_draft)
|
|
779
|
+
url_in_scope = self.excavate.scan.in_scope(event_draft.host_filterable)
|
|
780
780
|
urls_found = kwargs.get("urls_found", None)
|
|
781
781
|
if urls_found:
|
|
782
782
|
exceeds_max_links = urls_found > self.excavate.scan.web_spider_links_per_page and url_in_scope
|
|
@@ -77,7 +77,7 @@ class asset_inventory(CSV):
|
|
|
77
77
|
return False, "event is internal"
|
|
78
78
|
if event.type not in self.watched_events:
|
|
79
79
|
return False, "event type is not in watched_events"
|
|
80
|
-
if not self.scan.in_scope(event):
|
|
80
|
+
if not self.scan.in_scope(event.host):
|
|
81
81
|
return False, "event is not in scope"
|
|
82
82
|
if "unresolved" in event.tags:
|
|
83
83
|
return False, "event is unresolved"
|
bbot/modules/portscan.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import ipaddress
|
|
3
3
|
from contextlib import suppress
|
|
4
|
-
from radixtarget import RadixTarget
|
|
4
|
+
from radixtarget import RadixTarget, host_size_key
|
|
5
5
|
|
|
6
6
|
from bbot.modules.base import BaseModule
|
|
7
7
|
|
|
@@ -65,8 +65,6 @@ class portscan(BaseModule):
|
|
|
65
65
|
except ValueError as e:
|
|
66
66
|
return False, f"Error parsing ports '{self.ports}': {e}"
|
|
67
67
|
|
|
68
|
-
# whether we've finished scanning our original scan targets
|
|
69
|
-
self.scanned_initial_targets = False
|
|
70
68
|
# keeps track of individual scanned IPs and their open ports
|
|
71
69
|
# this is necessary because we may encounter more hosts with the same IP
|
|
72
70
|
# and we want to avoid scanning them again
|
|
@@ -93,14 +91,6 @@ class portscan(BaseModule):
|
|
|
93
91
|
return True
|
|
94
92
|
|
|
95
93
|
async def handle_batch(self, *events):
|
|
96
|
-
# on our first run, we automatically include all our initial scan targets
|
|
97
|
-
if not self.scanned_initial_targets:
|
|
98
|
-
self.scanned_initial_targets = True
|
|
99
|
-
events = set(events)
|
|
100
|
-
events.update(
|
|
101
|
-
{e for e in self.scan.target.seeds.events if e.type in ("DNS_NAME", "IP_ADDRESS", "IP_RANGE")}
|
|
102
|
-
)
|
|
103
|
-
|
|
104
94
|
# ping scan
|
|
105
95
|
if self.ping_scan:
|
|
106
96
|
ping_targets, ping_correlator = await self.make_targets(events, self.ping_scanned)
|
|
@@ -160,14 +150,13 @@ class portscan(BaseModule):
|
|
|
160
150
|
"""
|
|
161
151
|
correlator = RadixTarget()
|
|
162
152
|
targets = set()
|
|
163
|
-
for event in sorted(events, key=lambda e: e.
|
|
153
|
+
for event in sorted(events, key=lambda e: host_size_key(e.host)):
|
|
164
154
|
# skip events without host
|
|
165
155
|
if not event.host:
|
|
166
156
|
continue
|
|
167
157
|
ips = set()
|
|
168
158
|
try:
|
|
169
159
|
# first assume it's an ip address / ip range
|
|
170
|
-
# False == it's not a hostname
|
|
171
160
|
ips.add(ipaddress.ip_network(event.host, strict=False))
|
|
172
161
|
except Exception:
|
|
173
162
|
# if it's a hostname, get its IPs from resolved_hosts
|
bbot/scanner/manager.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from contextlib import suppress
|
|
3
|
+
from radixtarget.helpers import host_size_key
|
|
3
4
|
|
|
4
5
|
from bbot.modules.base import BaseInterceptModule
|
|
5
6
|
|
|
@@ -29,7 +30,7 @@ class ScanIngress(BaseInterceptModule):
|
|
|
29
30
|
# track incoming duplicates module-by-module (for `suppress_dupes` attribute of modules)
|
|
30
31
|
self.incoming_dup_tracker = set()
|
|
31
32
|
|
|
32
|
-
async def init_events(self,
|
|
33
|
+
async def init_events(self, event_seeds=None):
|
|
33
34
|
"""
|
|
34
35
|
Initializes events by seeding the scanner with target events and distributing them for further processing.
|
|
35
36
|
|
|
@@ -37,21 +38,31 @@ class ScanIngress(BaseInterceptModule):
|
|
|
37
38
|
- This method populates the event queue with initial target events.
|
|
38
39
|
- It also marks the Scan object as finished with initialization by setting `_finished_init` to True.
|
|
39
40
|
"""
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
41
|
+
async with (
|
|
42
|
+
self.scan._acatch(self.init_events, unhandled_is_critical=True),
|
|
43
|
+
self._task_counter.count(self.init_events),
|
|
44
|
+
):
|
|
45
|
+
if event_seeds is None:
|
|
46
|
+
event_seeds = self.scan.target.seeds.event_seeds
|
|
47
|
+
root_event = self.scan.root_event
|
|
48
|
+
event_seeds = sorted(event_seeds, key=lambda e: (host_size_key(str(e.host)), e.data))
|
|
49
|
+
# queue root scan event
|
|
50
|
+
await self.queue_event(root_event, {})
|
|
51
|
+
target_module = self.scan._make_dummy_module(name="TARGET", _type="TARGET")
|
|
52
|
+
# queue each target in turn
|
|
53
|
+
for event_seed in event_seeds:
|
|
54
|
+
event = self.scan.make_event(
|
|
55
|
+
event_seed.data,
|
|
56
|
+
event_seed.type,
|
|
57
|
+
parent=root_event,
|
|
58
|
+
module=target_module,
|
|
59
|
+
context=f"Scan {self.scan.name} seeded with " + "{event.type}: {event.data}",
|
|
60
|
+
tags=["target"],
|
|
61
|
+
)
|
|
54
62
|
self.verbose(f"Target: {event}")
|
|
63
|
+
# don't fill up the queue with too many events
|
|
64
|
+
while self.incoming_event_queue.qsize() > 100:
|
|
65
|
+
await asyncio.sleep(0.2)
|
|
55
66
|
await self.queue_event(event, {})
|
|
56
67
|
await asyncio.sleep(0.1)
|
|
57
68
|
self.scan._finished_init = True
|
|
@@ -95,7 +106,10 @@ class ScanIngress(BaseInterceptModule):
|
|
|
95
106
|
event.add_tag("blacklisted")
|
|
96
107
|
|
|
97
108
|
# main scan blacklist
|
|
98
|
-
|
|
109
|
+
host_filterable = getattr(event, "host_filterable", None)
|
|
110
|
+
event_blacklisted = False
|
|
111
|
+
if host_filterable:
|
|
112
|
+
event_blacklisted = self.scan.blacklisted(host_filterable)
|
|
99
113
|
|
|
100
114
|
# reject all blacklisted events
|
|
101
115
|
if event_blacklisted or "blacklisted" in event.tags:
|
bbot/scanner/preset/preset.py
CHANGED
|
@@ -483,11 +483,7 @@ class Preset(metaclass=BasePreset):
|
|
|
483
483
|
from bbot.scanner.target import BBOTTarget
|
|
484
484
|
|
|
485
485
|
baked_preset._target = BBOTTarget(
|
|
486
|
-
*list(self._seeds),
|
|
487
|
-
whitelist=self._whitelist,
|
|
488
|
-
blacklist=self._blacklist,
|
|
489
|
-
strict_scope=self.strict_scope,
|
|
490
|
-
scan=scan,
|
|
486
|
+
*list(self._seeds), whitelist=self._whitelist, blacklist=self._blacklist, strict_scope=self.strict_scope
|
|
491
487
|
)
|
|
492
488
|
|
|
493
489
|
# evaluate conditions
|