bbot 2.4.0.6045rc0__py3-none-any.whl → 2.4.0.6067rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbot might be problematic. Click here for more details.

bbot/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # version placeholder (replaced by poetry-dynamic-versioning)
2
- __version__ = "v2.4.0.6045rc"
2
+ __version__ = "v2.4.0.6067rc"
3
3
 
4
4
  from .scanner import Scanner, Preset
bbot/cli.py CHANGED
@@ -199,7 +199,7 @@ async def _main():
199
199
  if sys.stdin.isatty():
200
200
  # warn if any targets belong directly to a cloud provider
201
201
  if not scan.preset.strict_scope:
202
- for event in scan.target.seeds.events:
202
+ for event in scan.target.seeds.event_seeds:
203
203
  if event.type == "DNS_NAME":
204
204
  cloudcheck_result = scan.helpers.cloudcheck(event.host)
205
205
  if cloudcheck_result:
bbot/core/event/base.py CHANGED
@@ -18,8 +18,8 @@ from pydantic import BaseModel, field_validator
18
18
  from urllib.parse import urlparse, urljoin, parse_qs
19
19
 
20
20
 
21
- from .helpers import *
22
21
  from bbot.errors import *
22
+ from .helpers import EventSeed
23
23
  from bbot.core.helpers import (
24
24
  extract_words,
25
25
  is_domain,
@@ -109,18 +109,66 @@ class BaseEvent:
109
109
  # Bypass scope checking and dns resolution, distribute immediately to modules
110
110
  # This is useful for "end-of-line" events like FINDING and VULNERABILITY
111
111
  _quick_emit = False
112
- # Whether this event has been retroactively marked as part of an important discovery chain
113
- _graph_important = False
114
- # Disables certain data validations
115
- _dummy = False
116
112
  # Data validation, if data is a dictionary
117
113
  _data_validator = None
118
114
  # Whether to increment scope distance if the child and parent hosts are the same
115
+ # Normally we don't want this, since scope distance only increases if the host changes
116
+ # But for some events like SOCIAL media profiles, this is required to prevent spidering all of facebook.com
119
117
  _scope_distance_increment_same_host = False
120
118
  # Don't allow duplicates to occur within a parent chain
121
119
  # In other words, don't emit the event if the same one already exists in its discovery context
122
120
  _suppress_chain_dupes = False
123
121
 
122
+ # using __slots__ dramatically reduces memory usage in large scans
123
+ __slots__ = [
124
+ # Core identification attributes
125
+ "_uuid",
126
+ "_id",
127
+ "_hash",
128
+ "_data",
129
+ "_data_hash",
130
+ # Host-related attributes
131
+ "__host",
132
+ "_host_original",
133
+ "_port",
134
+ # Parent-related attributes
135
+ "_parent",
136
+ "_parent_id",
137
+ "_parent_uuid",
138
+ # Event metadata
139
+ "_type",
140
+ "_tags",
141
+ "_omit",
142
+ "__words",
143
+ "_priority",
144
+ "_scope_distance",
145
+ "_module_priority",
146
+ "_graph_important",
147
+ "_resolved_hosts",
148
+ "_discovery_context",
149
+ "_discovery_context_regex",
150
+ "_stats_recorded",
151
+ "_internal",
152
+ "_confidence",
153
+ "_dummy",
154
+ "_module",
155
+ # DNS-related attributes
156
+ "dns_children",
157
+ "raw_dns_records",
158
+ "dns_resolve_distance",
159
+ # Web-related attributes
160
+ "web_spider_distance",
161
+ "parsed_url",
162
+ "url_extension",
163
+ "num_redirects",
164
+ # File-related attributes
165
+ "_data_path",
166
+ # Public attributes
167
+ "module",
168
+ "scan",
169
+ "timestamp",
170
+ ]
171
+
124
172
  def __init__(
125
173
  self,
126
174
  data,
@@ -129,7 +177,6 @@ class BaseEvent:
129
177
  context=None,
130
178
  module=None,
131
179
  scan=None,
132
- scans=None,
133
180
  tags=None,
134
181
  confidence=100,
135
182
  timestamp=None,
@@ -148,7 +195,6 @@ class BaseEvent:
148
195
  parent (BaseEvent, optional): Parent event that led to this event's discovery. Defaults to None.
149
196
  module (str, optional): Module that discovered the event. Defaults to None.
150
197
  scan (Scan, optional): BBOT Scan object. Required unless _dummy is True. Defaults to None.
151
- scans (list of Scan, optional): BBOT Scan objects, used primarily when unserializing an Event from the database. Defaults to None.
152
198
  tags (list of str, optional): Descriptive tags for the event. Defaults to None.
153
199
  confidence (int, optional): Confidence level for the event, on a scale of 1-100. Defaults to 100.
154
200
  timestamp (datetime, optional): Time of event discovery. Defaults to current UTC time.
@@ -174,6 +220,7 @@ class BaseEvent:
174
220
  self._host_original = None
175
221
  self._scope_distance = None
176
222
  self._module_priority = None
223
+ self._graph_important = False
177
224
  self._resolved_hosts = set()
178
225
  self.dns_children = {}
179
226
  self.raw_dns_records = {}
@@ -204,12 +251,6 @@ class BaseEvent:
204
251
  self.scan = scan
205
252
  if (not self.scan) and (not self._dummy):
206
253
  raise ValidationError("Must specify scan")
207
- # self.scans holds a list of scan IDs from scans that encountered this event
208
- self.scans = []
209
- if scans is not None:
210
- self.scans = scans
211
- if self.scan:
212
- self.scans = list(set([self.scan.id] + self.scans))
213
254
 
214
255
  try:
215
256
  self.data = self._sanitize_data(data)
@@ -1348,7 +1389,7 @@ class EMAIL_ADDRESS(BaseEvent):
1348
1389
  return validators.validate_email(data)
1349
1390
 
1350
1391
  def _host(self):
1351
- data = str(self.data).split("@")[-1]
1392
+ data = str(self.data).rsplit("@", 1)[-1]
1352
1393
  host, self._port = split_host_port(data)
1353
1394
  return host
1354
1395
 
@@ -1652,7 +1693,6 @@ def make_event(
1652
1693
  context=None,
1653
1694
  module=None,
1654
1695
  scan=None,
1655
- scans=None,
1656
1696
  tags=None,
1657
1697
  confidence=100,
1658
1698
  dummy=False,
@@ -1712,12 +1752,11 @@ def make_event(
1712
1752
  tags = [tags]
1713
1753
  tags = set(tags)
1714
1754
 
1755
+ # if data is already an event, update it with the user's kwargs
1715
1756
  if is_event(data):
1716
1757
  event = copy(data)
1717
1758
  if scan is not None and not event.scan:
1718
1759
  event.scan = scan
1719
- if scans is not None and not event.scans:
1720
- event.scans = scans
1721
1760
  if module is not None:
1722
1761
  event.module = module
1723
1762
  if parent is not None:
@@ -1731,8 +1770,11 @@ def make_event(
1731
1770
  event_type = data.type
1732
1771
  return event
1733
1772
  else:
1773
+ # if event_type is not provided, autodetect it
1734
1774
  if event_type is None:
1735
- event_type, data = get_event_type(data)
1775
+ event_seed = EventSeed(data)
1776
+ event_type = event_seed.type
1777
+ data = event_seed.data
1736
1778
  if not dummy:
1737
1779
  log.debug(f'Autodetected event type "{event_type}" based on data: "{data}"')
1738
1780
 
@@ -1776,7 +1818,6 @@ def make_event(
1776
1818
  context=context,
1777
1819
  module=module,
1778
1820
  scan=scan,
1779
- scans=scans,
1780
1821
  tags=tags,
1781
1822
  confidence=confidence,
1782
1823
  _dummy=dummy,
@@ -1810,7 +1851,6 @@ def event_from_json(j, siem_friendly=False):
1810
1851
  event_type = j["type"]
1811
1852
  kwargs = {
1812
1853
  "event_type": event_type,
1813
- "scans": j.get("scans", []),
1814
1854
  "tags": j.get("tags", []),
1815
1855
  "confidence": j.get("confidence", 100),
1816
1856
  "context": j.get("discovery_context", None),
@@ -1,52 +1,238 @@
1
- import logging
2
1
  import ipaddress
3
- from contextlib import suppress
4
-
2
+ import regex as re
3
+ from functools import cached_property
5
4
  from bbot.errors import ValidationError
6
- from bbot.core.helpers.regexes import event_type_regexes
7
- from bbot.core.helpers import smart_decode, smart_encode_punycode
5
+ from bbot.core.helpers import validators
6
+ from bbot.core.helpers.misc import split_host_port, make_ip_type
7
+ from bbot.core.helpers import regexes, smart_decode, smart_encode_punycode
8
+
9
+ bbot_event_seeds = {}
10
+
11
+
12
+ """
13
+ An "Event Seed" is a lightweight event containing only the minimum logic required to:
14
+ - parse input to determine the event type + data
15
+ - validate+sanitize the data
16
+ - extract the host for scope purposes
8
17
 
18
+ It's useful for quickly parsing target lists without the cpu+memory overhead of creating full-fledged BBOT events
9
19
 
10
- log = logging.getLogger("bbot.core.event.helpers")
20
+ Not every type of BBOT event needs to be represented here. Only ones that are meant to be targets.
21
+ """
11
22
 
12
23
 
13
- def get_event_type(data):
24
+ class EventSeedRegistry(type):
25
+ """
26
+ Metaclass for EventSeed that registers all subclasses in a registry.
14
27
  """
15
- Determines the type of event based on the given data.
16
28
 
17
- Args:
18
- data (str): The data to be used for determining the event type.
29
+ def __new__(mcs, name, bases, attrs):
30
+ global bbot_event_seeds
31
+ cls = super().__new__(mcs, name, bases, attrs)
32
+ # Don't register the base EventSeed class
33
+ if name != "BaseEventSeed":
34
+ bbot_event_seeds[cls.__name__] = cls
35
+ return cls
19
36
 
20
- Returns:
21
- str: The type of event such as "IP_ADDRESS", "IP_RANGE", or "URL_UNVERIFIED".
22
37
 
23
- Raises:
24
- ValidationError: If the event type could not be determined.
38
+ def EventSeed(input):
39
+ input = smart_encode_punycode(smart_decode(input).strip())
40
+ for _, event_class in bbot_event_seeds.items():
41
+ if hasattr(event_class, "precheck"):
42
+ if event_class.precheck(input):
43
+ return event_class(input)
44
+ else:
45
+ for regex in event_class.regexes:
46
+ match = regex.match(input)
47
+ if match:
48
+ data = event_class.handle_match(match)
49
+ return event_class(data)
50
+ raise ValidationError(f'Unable to autodetect data type from "{input}"')
25
51
 
26
- Notes:
27
- - Utilizes `smart_decode_punycode` and `smart_decode` to preprocess the data.
28
- - Makes use of `ipaddress` standard library to check for IP and network types.
29
- - Checks against a set of predefined regular expressions stored in `event_type_regexes`.
30
- """
31
52
 
32
- # IP address
33
- with suppress(Exception):
34
- ipaddress.ip_address(data)
35
- return "IP_ADDRESS", data
53
+ class BaseEventSeed(metaclass=EventSeedRegistry):
54
+ regexes = []
55
+ _target_type = "TARGET"
56
+
57
+ __slots__ = ["data", "host", "port", "input"]
58
+
59
+ def __init__(self, data):
60
+ self.data, self.host, self.port = self._sanitize_and_extract_host(data)
61
+ self.input = self._override_input(data)
62
+
63
+ @staticmethod
64
+ def handle_match(match):
65
+ """
66
+ Given a regex match, returns the event data
67
+ """
68
+ return match.group(0)
69
+
70
+ def _sanitize_and_extract_host(self, data):
71
+ """
72
+ Given the event data, returns the host
73
+
74
+ Returns:
75
+ tuple: (data, host, port)
76
+ """
77
+ return data, None, None
78
+
79
+ def _override_input(self, input):
80
+ return self.data
81
+
82
+ @property
83
+ def type(self):
84
+ return self.__class__.__name__
85
+
86
+ @cached_property
87
+ def _hash(self):
88
+ return hash(self.input)
89
+
90
+ def __hash__(self):
91
+ return self._hash
92
+
93
+ def __eq__(self, other):
94
+ return hash(self) == hash(other)
95
+
96
+ def __str__(self):
97
+ return f"EventSeed({self.input})"
98
+
99
+ def __repr__(self):
100
+ return str(self)
101
+
102
+
103
+ class IP_ADDRESS(BaseEventSeed):
104
+ regexes = regexes.event_type_regexes["IP_ADDRESS"]
105
+
106
+ @staticmethod
107
+ def precheck(data):
108
+ try:
109
+ return ipaddress.ip_address(data)
110
+ except ValueError:
111
+ return False
112
+
113
+ @staticmethod
114
+ def _sanitize_and_extract_host(data):
115
+ validated = ipaddress.ip_address(data)
116
+ return str(validated), validated, None
117
+
118
+
119
+ class DNS_NAME(BaseEventSeed):
120
+ regexes = regexes.event_type_regexes["DNS_NAME"]
121
+
122
+ @staticmethod
123
+ def _sanitize_and_extract_host(data):
124
+ validated = validators.validate_host(data)
125
+ return validated, validated, None
126
+
127
+
128
+ class IP_RANGE(BaseEventSeed):
129
+ regexes = regexes.event_type_regexes["IP_RANGE"]
130
+
131
+ @staticmethod
132
+ def precheck(data):
133
+ try:
134
+ return ipaddress.ip_network(str(data), strict=False)
135
+ except ValueError:
136
+ return False
137
+
138
+ @staticmethod
139
+ def _sanitize_and_extract_host(data):
140
+ validated = ipaddress.ip_network(str(data), strict=False)
141
+ return str(validated), validated, None
142
+
143
+
144
+ class OPEN_TCP_PORT(BaseEventSeed):
145
+ regexes = regexes.event_type_regexes["OPEN_TCP_PORT"]
146
+
147
+ @staticmethod
148
+ def _sanitize_and_extract_host(data):
149
+ validated = validators.validate_open_port(data)
150
+ host, port = split_host_port(validated)
151
+ host = make_ip_type(host)
152
+ return str(validated), host, port
153
+
154
+
155
+ class URL_UNVERIFIED(BaseEventSeed):
156
+ regexes = regexes.event_type_regexes["URL"]
157
+
158
+ _scheme_to_port = {
159
+ "https": 443,
160
+ "http": 80,
161
+ }
162
+
163
+ @staticmethod
164
+ def _sanitize_and_extract_host(data):
165
+ parsed_url = validators.clean_url(data, url_querystring_remove=False)
166
+ scheme = parsed_url.scheme
167
+ host = make_ip_type(validators.validate_host(parsed_url.hostname))
168
+ port = parsed_url.port
169
+ if port is None:
170
+ port = URL_UNVERIFIED._scheme_to_port.get(scheme, None)
171
+ return parsed_url.geturl(), host, port
172
+
173
+
174
+ class EMAIL_ADDRESS(BaseEventSeed):
175
+ regexes = regexes.event_type_regexes["EMAIL_ADDRESS"]
176
+
177
+ @staticmethod
178
+ def _sanitize_and_extract_host(data):
179
+ validated = validators.validate_email(data)
180
+ host = validated.rsplit("@", 1)[-1]
181
+ host, port = split_host_port(host)
182
+ return validated, host, port
183
+
184
+
185
+ class ORG_STUB(BaseEventSeed):
186
+ regexes = (re.compile(r"^(?:ORG|ORG_STUB):(.*)"),)
187
+
188
+ def _override_input(self, input):
189
+ return f"ORG_STUB:{self.data}"
190
+
191
+ @staticmethod
192
+ def handle_match(match):
193
+ return match.group(1)
194
+
195
+
196
+ class USERNAME(BaseEventSeed):
197
+ regexes = (re.compile(r"^(?:USER|USERNAME):(.*)"),)
198
+
199
+ def _override_input(self, input):
200
+ return f"USERNAME:{self.data}"
201
+
202
+ @staticmethod
203
+ def handle_match(match):
204
+ return match.group(1)
205
+
206
+
207
+ class FILESYSTEM(BaseEventSeed):
208
+ regexes = (re.compile(r"^(?:FILESYSTEM|FILE|FOLDER|DIR|PATH):(.*)"),)
209
+
210
+ def _override_input(self, input):
211
+ return f"FILESYSTEM:{self.data['path']}"
212
+
213
+ @staticmethod
214
+ def handle_match(match):
215
+ return {"path": match.group(1)}
216
+
217
+
218
+ class MOBILE_APP(BaseEventSeed):
219
+ regexes = (re.compile(r"^(?:MOBILE_APP|APK|IPA|APP):(.*)"),)
220
+
221
+ def _override_input(self, input):
222
+ return f"MOBILE_APP:{self.data['url']}"
223
+
224
+ @staticmethod
225
+ def handle_match(match):
226
+ return {"url": match.group(1)}
36
227
 
37
- # IP network
38
- with suppress(Exception):
39
- ipaddress.ip_network(data, strict=False)
40
- return "IP_RANGE", data
41
228
 
42
- data = smart_encode_punycode(smart_decode(data).strip())
229
+ class BLACKLIST_REGEX(BaseEventSeed):
230
+ regexes = (re.compile(r"^(?:RE|REGEX):(.*)"),)
231
+ _target_type = "BLACKLIST"
43
232
 
44
- # Strict regexes
45
- for t, regexes in event_type_regexes.items():
46
- for r in regexes:
47
- if r.match(data):
48
- if t == "URL":
49
- return "URL_UNVERIFIED", data
50
- return t, data
233
+ def _override_input(self, input):
234
+ return f"REGEX:{self.data}"
51
235
 
52
- raise ValidationError(f'Unable to autodetect event type from "{data}"')
236
+ @staticmethod
237
+ def handle_match(match):
238
+ return match.group(1)
@@ -159,7 +159,7 @@ class ConfigAwareHelper:
159
159
  self.clean_old(self.scans_dir, keep=self.keep_old_scans, filter=_filter)
160
160
 
161
161
  def make_target(self, *targets, **kwargs):
162
- return BaseTarget(*targets, scan=self.scan, **kwargs)
162
+ return BaseTarget(*targets, **kwargs)
163
163
 
164
164
  @property
165
165
  def config(self):
@@ -46,7 +46,7 @@ _dns_name_regex_with_period = r"(?:\w(?:[\w-]{0,100}\w)?\.)+(?:[xX][nN]--)?[^\W_
46
46
  dns_name_extraction_regex = re.compile(_dns_name_regex_with_period, re.I)
47
47
  dns_name_validation_regex = re.compile(r"^" + _dns_name_regex + r"$", re.I)
48
48
 
49
- _email_regex = r"(?:[^\W_][\w\-\.\+']{,100})@" + _dns_name_regex
49
+ _email_regex = r"(?:[^\W_][\w\-\.\+']{,100})@" + _dns_name_regex + r"(?::[0-9]{1,5})?"
50
50
  email_regex = re.compile(_email_regex, re.I)
51
51
 
52
52
  _ptr_regex = r"(?:[0-9]{1,3}[-_\.]){3}[0-9]{1,3}"
@@ -57,7 +57,7 @@ class WebHelper(EngineClient):
57
57
  self.ssl_verify = self.config.get("ssl_verify", False)
58
58
  engine_debug = self.config.get("engine", {}).get("debug", False)
59
59
  super().__init__(
60
- server_kwargs={"config": self.config, "target": self.parent_helper.preset.target.minimal},
60
+ server_kwargs={"config": self.config, "target": self.parent_helper.preset.target},
61
61
  debug=engine_debug,
62
62
  )
63
63
 
@@ -139,7 +139,9 @@ class nuclei(BaseModule):
139
139
  return True
140
140
 
141
141
  async def handle_batch(self, *events):
142
- temp_target = self.helpers.make_target(*events)
142
+ temp_target = self.helpers.make_target()
143
+ for e in events:
144
+ temp_target.add(e.data, e)
143
145
  nuclei_input = [str(e.data) for e in events]
144
146
  async for severity, template, tags, host, url, name, extracted_results in self.execute_nuclei(nuclei_input):
145
147
  # this is necessary because sometimes nuclei is inconsistent about the data returned in the host field
@@ -776,7 +776,7 @@ class excavate(BaseInternalModule, BaseInterceptModule):
776
776
  event_draft = self.excavate.make_event(event_data, event_type, parent=event)
777
777
  if not event_draft:
778
778
  return None
779
- url_in_scope = self.excavate.scan.in_scope(event_draft)
779
+ url_in_scope = self.excavate.scan.in_scope(event_draft.host_filterable)
780
780
  urls_found = kwargs.get("urls_found", None)
781
781
  if urls_found:
782
782
  exceeds_max_links = urls_found > self.excavate.scan.web_spider_links_per_page and url_in_scope
@@ -77,7 +77,7 @@ class asset_inventory(CSV):
77
77
  return False, "event is internal"
78
78
  if event.type not in self.watched_events:
79
79
  return False, "event type is not in watched_events"
80
- if not self.scan.in_scope(event):
80
+ if not self.scan.in_scope(event.host):
81
81
  return False, "event is not in scope"
82
82
  if "unresolved" in event.tags:
83
83
  return False, "event is unresolved"
bbot/modules/portscan.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import ipaddress
3
3
  from contextlib import suppress
4
- from radixtarget import RadixTarget
4
+ from radixtarget import RadixTarget, host_size_key
5
5
 
6
6
  from bbot.modules.base import BaseModule
7
7
 
@@ -65,8 +65,6 @@ class portscan(BaseModule):
65
65
  except ValueError as e:
66
66
  return False, f"Error parsing ports '{self.ports}': {e}"
67
67
 
68
- # whether we've finished scanning our original scan targets
69
- self.scanned_initial_targets = False
70
68
  # keeps track of individual scanned IPs and their open ports
71
69
  # this is necessary because we may encounter more hosts with the same IP
72
70
  # and we want to avoid scanning them again
@@ -93,14 +91,6 @@ class portscan(BaseModule):
93
91
  return True
94
92
 
95
93
  async def handle_batch(self, *events):
96
- # on our first run, we automatically include all our initial scan targets
97
- if not self.scanned_initial_targets:
98
- self.scanned_initial_targets = True
99
- events = set(events)
100
- events.update(
101
- {e for e in self.scan.target.seeds.events if e.type in ("DNS_NAME", "IP_ADDRESS", "IP_RANGE")}
102
- )
103
-
104
94
  # ping scan
105
95
  if self.ping_scan:
106
96
  ping_targets, ping_correlator = await self.make_targets(events, self.ping_scanned)
@@ -160,14 +150,13 @@ class portscan(BaseModule):
160
150
  """
161
151
  correlator = RadixTarget()
162
152
  targets = set()
163
- for event in sorted(events, key=lambda e: e._host_size):
153
+ for event in sorted(events, key=lambda e: host_size_key(e.host)):
164
154
  # skip events without host
165
155
  if not event.host:
166
156
  continue
167
157
  ips = set()
168
158
  try:
169
159
  # first assume it's an ip address / ip range
170
- # False == it's not a hostname
171
160
  ips.add(ipaddress.ip_network(event.host, strict=False))
172
161
  except Exception:
173
162
  # if it's a hostname, get its IPs from resolved_hosts
bbot/scanner/manager.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  from contextlib import suppress
3
+ from radixtarget.helpers import host_size_key
3
4
 
4
5
  from bbot.modules.base import BaseInterceptModule
5
6
 
@@ -29,7 +30,7 @@ class ScanIngress(BaseInterceptModule):
29
30
  # track incoming duplicates module-by-module (for `suppress_dupes` attribute of modules)
30
31
  self.incoming_dup_tracker = set()
31
32
 
32
- async def init_events(self, events=None):
33
+ async def init_events(self, event_seeds=None):
33
34
  """
34
35
  Initializes events by seeding the scanner with target events and distributing them for further processing.
35
36
 
@@ -37,21 +38,31 @@ class ScanIngress(BaseInterceptModule):
37
38
  - This method populates the event queue with initial target events.
38
39
  - It also marks the Scan object as finished with initialization by setting `_finished_init` to True.
39
40
  """
40
- if events is None:
41
- events = self.scan.target.seeds.events
42
- async with self.scan._acatch(self.init_events), self._task_counter.count(self.init_events):
43
- sorted_events = sorted(events, key=lambda e: len(e.data))
44
- for event in [self.scan.root_event] + sorted_events:
45
- event._dummy = False
46
- event.web_spider_distance = 0
47
- event.scan = self.scan
48
- if event.parent is None:
49
- event.parent = self.scan.root_event
50
- if event.module is None:
51
- event.module = self.scan._make_dummy_module(name="TARGET", _type="TARGET")
52
- if event != self.scan.root_event:
53
- event.discovery_context = f"Scan {self.scan.name} seeded with " + "{event.type}: {event.data}"
41
+ async with (
42
+ self.scan._acatch(self.init_events, unhandled_is_critical=True),
43
+ self._task_counter.count(self.init_events),
44
+ ):
45
+ if event_seeds is None:
46
+ event_seeds = self.scan.target.seeds.event_seeds
47
+ root_event = self.scan.root_event
48
+ event_seeds = sorted(event_seeds, key=lambda e: (host_size_key(str(e.host)), e.data))
49
+ # queue root scan event
50
+ await self.queue_event(root_event, {})
51
+ target_module = self.scan._make_dummy_module(name="TARGET", _type="TARGET")
52
+ # queue each target in turn
53
+ for event_seed in event_seeds:
54
+ event = self.scan.make_event(
55
+ event_seed.data,
56
+ event_seed.type,
57
+ parent=root_event,
58
+ module=target_module,
59
+ context=f"Scan {self.scan.name} seeded with " + "{event.type}: {event.data}",
60
+ tags=["target"],
61
+ )
54
62
  self.verbose(f"Target: {event}")
63
+ # don't fill up the queue with too many events
64
+ while self.incoming_event_queue.qsize() > 100:
65
+ await asyncio.sleep(0.2)
55
66
  await self.queue_event(event, {})
56
67
  await asyncio.sleep(0.1)
57
68
  self.scan._finished_init = True
@@ -95,7 +106,10 @@ class ScanIngress(BaseInterceptModule):
95
106
  event.add_tag("blacklisted")
96
107
 
97
108
  # main scan blacklist
98
- event_blacklisted = self.scan.blacklisted(event)
109
+ host_filterable = getattr(event, "host_filterable", None)
110
+ event_blacklisted = False
111
+ if host_filterable:
112
+ event_blacklisted = self.scan.blacklisted(host_filterable)
99
113
 
100
114
  # reject all blacklisted events
101
115
  if event_blacklisted or "blacklisted" in event.tags:
@@ -483,11 +483,7 @@ class Preset(metaclass=BasePreset):
483
483
  from bbot.scanner.target import BBOTTarget
484
484
 
485
485
  baked_preset._target = BBOTTarget(
486
- *list(self._seeds),
487
- whitelist=self._whitelist,
488
- blacklist=self._blacklist,
489
- strict_scope=self.strict_scope,
490
- scan=scan,
486
+ *list(self._seeds), whitelist=self._whitelist, blacklist=self._blacklist, strict_scope=self.strict_scope
491
487
  )
492
488
 
493
489
  # evaluate conditions