bbot 2.6.0.6840rc0__py3-none-any.whl → 2.7.2.7424rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. bbot/__init__.py +1 -1
  2. bbot/cli.py +22 -8
  3. bbot/core/engine.py +1 -1
  4. bbot/core/event/__init__.py +2 -2
  5. bbot/core/event/base.py +138 -110
  6. bbot/core/flags.py +1 -0
  7. bbot/core/helpers/bloom.py +6 -7
  8. bbot/core/helpers/depsinstaller/installer.py +21 -2
  9. bbot/core/helpers/dns/dns.py +0 -1
  10. bbot/core/helpers/dns/engine.py +0 -2
  11. bbot/core/helpers/files.py +2 -2
  12. bbot/core/helpers/git.py +17 -0
  13. bbot/core/helpers/helper.py +6 -5
  14. bbot/core/helpers/misc.py +8 -23
  15. bbot/core/helpers/ntlm.py +0 -2
  16. bbot/core/helpers/regex.py +1 -1
  17. bbot/core/helpers/regexes.py +25 -8
  18. bbot/core/helpers/web/web.py +2 -1
  19. bbot/core/modules.py +22 -60
  20. bbot/defaults.yml +4 -2
  21. bbot/modules/apkpure.py +1 -1
  22. bbot/modules/baddns.py +1 -1
  23. bbot/modules/baddns_direct.py +1 -1
  24. bbot/modules/baddns_zone.py +1 -1
  25. bbot/modules/badsecrets.py +1 -1
  26. bbot/modules/base.py +123 -38
  27. bbot/modules/bucket_amazon.py +1 -1
  28. bbot/modules/bucket_digitalocean.py +1 -1
  29. bbot/modules/bucket_firebase.py +1 -1
  30. bbot/modules/bucket_google.py +1 -1
  31. bbot/modules/{bucket_azure.py → bucket_microsoft.py} +2 -2
  32. bbot/modules/builtwith.py +4 -2
  33. bbot/modules/dnsbimi.py +1 -4
  34. bbot/modules/dnsbrute.py +6 -1
  35. bbot/modules/dnsdumpster.py +35 -52
  36. bbot/modules/dnstlsrpt.py +0 -6
  37. bbot/modules/docker_pull.py +1 -1
  38. bbot/modules/emailformat.py +17 -1
  39. bbot/modules/ffuf.py +4 -1
  40. bbot/modules/ffuf_shortnames.py +6 -3
  41. bbot/modules/filedownload.py +7 -4
  42. bbot/modules/git_clone.py +47 -22
  43. bbot/modules/gitdumper.py +4 -14
  44. bbot/modules/github_workflows.py +6 -5
  45. bbot/modules/gitlab_com.py +31 -0
  46. bbot/modules/gitlab_onprem.py +84 -0
  47. bbot/modules/gowitness.py +0 -6
  48. bbot/modules/graphql_introspection.py +5 -2
  49. bbot/modules/httpx.py +2 -0
  50. bbot/modules/iis_shortnames.py +0 -7
  51. bbot/modules/internal/cloudcheck.py +65 -72
  52. bbot/modules/internal/unarchive.py +9 -3
  53. bbot/modules/lightfuzz/lightfuzz.py +6 -2
  54. bbot/modules/lightfuzz/submodules/esi.py +42 -0
  55. bbot/modules/medusa.py +4 -7
  56. bbot/modules/nuclei.py +1 -1
  57. bbot/modules/otx.py +9 -2
  58. bbot/modules/output/base.py +3 -11
  59. bbot/modules/paramminer_headers.py +10 -7
  60. bbot/modules/portfilter.py +2 -0
  61. bbot/modules/postman_download.py +1 -1
  62. bbot/modules/retirejs.py +232 -0
  63. bbot/modules/securitytxt.py +0 -3
  64. bbot/modules/sslcert.py +2 -2
  65. bbot/modules/subdomaincenter.py +1 -16
  66. bbot/modules/telerik.py +7 -2
  67. bbot/modules/templates/bucket.py +24 -4
  68. bbot/modules/templates/gitlab.py +98 -0
  69. bbot/modules/trufflehog.py +6 -3
  70. bbot/modules/wafw00f.py +2 -2
  71. bbot/presets/web/lightfuzz-heavy.yml +1 -1
  72. bbot/presets/web/lightfuzz-medium.yml +1 -1
  73. bbot/presets/web/lightfuzz-superheavy.yml +1 -1
  74. bbot/scanner/manager.py +44 -37
  75. bbot/scanner/scanner.py +12 -4
  76. bbot/scripts/benchmark_report.py +433 -0
  77. bbot/test/benchmarks/__init__.py +2 -0
  78. bbot/test/benchmarks/test_bloom_filter_benchmarks.py +105 -0
  79. bbot/test/benchmarks/test_closest_match_benchmarks.py +76 -0
  80. bbot/test/benchmarks/test_event_validation_benchmarks.py +438 -0
  81. bbot/test/benchmarks/test_excavate_benchmarks.py +291 -0
  82. bbot/test/benchmarks/test_ipaddress_benchmarks.py +143 -0
  83. bbot/test/benchmarks/test_weighted_shuffle_benchmarks.py +70 -0
  84. bbot/test/test_step_1/test_bbot_fastapi.py +2 -2
  85. bbot/test/test_step_1/test_events.py +22 -21
  86. bbot/test/test_step_1/test_helpers.py +1 -0
  87. bbot/test/test_step_1/test_manager_scope_accuracy.py +45 -0
  88. bbot/test/test_step_1/test_modules_basic.py +40 -15
  89. bbot/test/test_step_1/test_python_api.py +2 -2
  90. bbot/test/test_step_1/test_regexes.py +21 -4
  91. bbot/test/test_step_1/test_scan.py +7 -8
  92. bbot/test/test_step_1/test_web.py +46 -0
  93. bbot/test/test_step_2/module_tests/base.py +6 -1
  94. bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py +52 -18
  95. bbot/test/test_step_2/module_tests/test_module_bucket_google.py +1 -1
  96. bbot/test/test_step_2/module_tests/{test_module_bucket_azure.py → test_module_bucket_microsoft.py} +7 -5
  97. bbot/test/test_step_2/module_tests/test_module_cloudcheck.py +19 -31
  98. bbot/test/test_step_2/module_tests/test_module_dnsbimi.py +2 -1
  99. bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py +3 -5
  100. bbot/test/test_step_2/module_tests/test_module_emailformat.py +1 -1
  101. bbot/test/test_step_2/module_tests/test_module_emails.py +2 -2
  102. bbot/test/test_step_2/module_tests/test_module_excavate.py +57 -4
  103. bbot/test/test_step_2/module_tests/test_module_github_workflows.py +10 -1
  104. bbot/test/test_step_2/module_tests/test_module_gitlab_com.py +66 -0
  105. bbot/test/test_step_2/module_tests/{test_module_gitlab.py → test_module_gitlab_onprem.py} +4 -69
  106. bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +71 -3
  107. bbot/test/test_step_2/module_tests/test_module_nuclei.py +1 -2
  108. bbot/test/test_step_2/module_tests/test_module_otx.py +3 -0
  109. bbot/test/test_step_2/module_tests/test_module_portfilter.py +2 -0
  110. bbot/test/test_step_2/module_tests/test_module_retirejs.py +161 -0
  111. bbot/test/test_step_2/module_tests/test_module_telerik.py +1 -1
  112. bbot/test/test_step_2/module_tests/test_module_trufflehog.py +10 -1
  113. {bbot-2.6.0.6840rc0.dist-info → bbot-2.7.2.7424rc0.dist-info}/METADATA +10 -7
  114. {bbot-2.6.0.6840rc0.dist-info → bbot-2.7.2.7424rc0.dist-info}/RECORD +117 -106
  115. {bbot-2.6.0.6840rc0.dist-info → bbot-2.7.2.7424rc0.dist-info}/WHEEL +1 -1
  116. {bbot-2.6.0.6840rc0.dist-info → bbot-2.7.2.7424rc0.dist-info/licenses}/LICENSE +98 -58
  117. bbot/modules/censys.py +0 -98
  118. bbot/modules/gitlab.py +0 -141
  119. bbot/modules/zoomeye.py +0 -77
  120. bbot/test/test_step_2/module_tests/test_module_censys.py +0 -83
  121. bbot/test/test_step_2/module_tests/test_module_zoomeye.py +0 -35
  122. {bbot-2.6.0.6840rc0.dist-info → bbot-2.7.2.7424rc0.dist-info}/entry_points.txt +0 -0
@@ -65,7 +65,7 @@ class RegexHelper:
65
65
 
66
66
  while tasks: # While there are tasks pending
67
67
  # Wait for the first task to complete
68
- done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
68
+ done, _ = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
69
69
 
70
70
  for task in done:
71
71
  result = task.result()
@@ -23,13 +23,28 @@ num_regex = re.compile(r"\d+")
23
23
  _ipv4_regex = r"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?:\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}"
24
24
  ipv4_regex = re.compile(_ipv4_regex, re.I)
25
25
 
26
- # IPv6 is complicated, so we have accommodate alternative patterns,
27
- # :(:[A-F0-9]{1,4}){1,7} == ::1, ::ffff:1
28
- # ([A-F0-9]{1,4}:){1,7}: == 2001::, 2001:db8::, 2001:db8:0:1:2:3::
29
- # ([A-F0-9]{1,4}:){1,6}:([A-F0-9]{1,4}) == 2001::1, 2001:db8::1, 2001:db8:0:1:2:3::1
30
- # ([A-F0-9]{1,4}:){7,7}([A-F0-9]{1,4}) == 1:1:1:1:1:1:1:1, ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
31
-
32
- _ipv6_regex = r"(:(:[A-F0-9]{1,4}){1,7}|([A-F0-9]{1,4}:){1,7}:|([A-F0-9]{1,4}:){1,6}:([A-F0-9]{1,4})|([A-F0-9]{1,4}:){7,7}([A-F0-9]{1,4}))"
26
+ # IPv6 regex breakdown:
27
+ #
28
+ # (?: # —— address body ——
29
+ # We have to individually account for all possible variations of: "N left hextets :: M right hextets" with N+M ≤ 8 or fully expanded 8 hextets.
30
+ # (?:[A-F0-9]{1,4}:){7}[A-F0-9]{1,4} # 8 hextets, no compression.
31
+ # | (?:[A-F0-9]{1,4}:){1,7}: # 1–7 left, then "::" (0 right).
32
+ # | (?:[A-F0-9]{1,4}:){1,6}:[A-F0-9]{1,4} # 1–6 left, "::", 1 right.
33
+ # | (?:[A-F0-9]{1,4}:){1,5}(?::[A-F0-9]{1,4}){1,2} # 1–5 left, "::", 1–2 right.
34
+ # | (?:[A-F0-9]{1,4}:){1,4}(?::[A-F0-9]{1,4}){1,3} # 1–4 left, "::", 1–3 right.
35
+ # | (?:[A-F0-9]{1,4}:){1,3}(?::[A-F0-9]{1,4}){1,4} # 1–3 left, "::", 1–4 right.
36
+ # | (?:[A-F0-9]{1,4}:){1,2}(?::[A-F0-9]{1,4}){1,5} # 1–2 left, "::", 1–5 right.
37
+ # | [A-F0-9]{1,4}:(?::[A-F0-9]{1,4}){1,6} # 1 left, "::", 1–6 right.
38
+ # | :(?::[A-F0-9]{1,4}){1,7} # 0 left, "::", 1–7 right.
39
+ # | :: # all zeros.
40
+ # )
41
+ #
42
+ # Notes:
43
+ # - Does not match IPv4-embedded forms (e.g., ::ffff:192.0.2.1).
44
+ # - Does not match zone IDs (e.g., %eth0).
45
+ # - Pure syntax check; will not validate special ranges.
46
+
47
+ _ipv6_regex = r"(?:(?:[A-F0-9]{1,4}:){7}[A-F0-9]{1,4}|(?:[A-F0-9]{1,4}:){1,7}:|(?:[A-F0-9]{1,4}:){1,6}:[A-F0-9]{1,4}|(?:[A-F0-9]{1,4}:){1,5}(?::[A-F0-9]{1,4}){1,2}|(?:[A-F0-9]{1,4}:){1,4}(?::[A-F0-9]{1,4}){1,3}|(?:[A-F0-9]{1,4}:){1,3}(?::[A-F0-9]{1,4}){1,4}|(?:[A-F0-9]{1,4}:){1,2}(?::[A-F0-9]{1,4}){1,5}|[A-F0-9]{1,4}:(?::[A-F0-9]{1,4}){1,6}|:(?::[A-F0-9]{1,4}){1,7}|::)"
33
48
  ipv6_regex = re.compile(_ipv6_regex, re.I)
34
49
 
35
50
  _ip_range_regexes = (
@@ -173,7 +188,9 @@ button_tag_regex2 = re.compile(
173
188
  )
174
189
  tag_attribute_regex = re.compile(r"<[^>]*(?:href|action|src)\s*=\s*[\"\']?(?!mailto:)([^\'\"\>]+)[\"\']?[^>]*>")
175
190
 
176
- valid_netloc = r"[^\s!@#$%^&()=/?\\'\";~`<>]+"
191
+ _invalid_netloc_chars = r"\s!@#$%^&()=/?\\'\";~`<>"
192
+ # first char must not be a colon, even though it's a valid char for a netloc
193
+ valid_netloc = r"[^" + (_invalid_netloc_chars + ":") + r"]{1}[^" + _invalid_netloc_chars + "]*"
177
194
 
178
195
  _split_host_port_regex = r"(?:(?P<scheme>[a-z0-9]{1,20})://)?(?:[^?]*@)?(?P<netloc>" + valid_netloc + ")"
179
196
  split_host_port_regex = re.compile(_split_host_port_regex, re.I)
@@ -267,7 +267,8 @@ class WebHelper(EngineClient):
267
267
  if not path:
268
268
  raise WordlistError(f"Invalid wordlist: {path}")
269
269
  if "cache_hrs" not in kwargs:
270
- kwargs["cache_hrs"] = 720
270
+ # 4320 hrs = 180 days = 6 months
271
+ kwargs["cache_hrs"] = 4320
271
272
  if self.parent_helper.is_url(path):
272
273
  filename = await self.download(str(path), **kwargs)
273
274
  if filename is None:
bbot/core/modules.py CHANGED
@@ -56,7 +56,6 @@ class ModuleLoader:
56
56
  self._shared_deps = dict(SHARED_DEPS)
57
57
 
58
58
  self.__preloaded = {}
59
- self._modules = {}
60
59
  self._configs = {}
61
60
  self.flag_choices = set()
62
61
  self.all_module_choices = set()
@@ -165,8 +164,10 @@ class ModuleLoader:
165
164
  if module_dir.name in ("output", "internal"):
166
165
  module_type = str(module_dir.name)
167
166
 
167
+ disable_auto_module_deps = preloaded.get("disable_auto_module_deps", False)
168
+
168
169
  # derive module dependencies from watched event types (only for scan modules)
169
- if module_type == "scan":
170
+ if module_type == "scan" and not disable_auto_module_deps:
170
171
  for event_type in preloaded["watched_events"]:
171
172
  if event_type in self.default_module_deps:
172
173
  deps_modules = set(preloaded.get("deps", {}).get("modules", []))
@@ -329,6 +330,7 @@ class ModuleLoader:
329
330
  ansible_tasks = []
330
331
  config = {}
331
332
  options_desc = {}
333
+ disable_auto_module_deps = False
332
334
  python_code = open(module_file).read()
333
335
  # take a hash of the code so we can keep track of when it changes
334
336
  module_hash = sha1(python_code).hexdigest()
@@ -353,8 +355,11 @@ class ModuleLoader:
353
355
  # look for classes
354
356
  if type(root_element) == ast.ClassDef:
355
357
  for class_attr in root_element.body:
358
+ if not type(class_attr) == ast.Assign:
359
+ continue
360
+
356
361
  # class attributes that are dictionaries
357
- if type(class_attr) == ast.Assign and type(class_attr.value) == ast.Dict:
362
+ if type(class_attr.value) == ast.Dict:
358
363
  # module options
359
364
  if any(target.id == "options" for target in class_attr.targets):
360
365
  config.update(ast.literal_eval(class_attr.value))
@@ -366,7 +371,7 @@ class ModuleLoader:
366
371
  meta = ast.literal_eval(class_attr.value)
367
372
 
368
373
  # class attributes that are lists
369
- if type(class_attr) == ast.Assign and type(class_attr.value) == ast.List:
374
+ if type(class_attr.value) == ast.List:
370
375
  # flags
371
376
  if any(target.id == "flags" for target in class_attr.targets):
372
377
  for flag in class_attr.value.elts:
@@ -415,6 +420,12 @@ class ModuleLoader:
415
420
  if type(dep_common.value) == str:
416
421
  deps_common.append(dep_common.value)
417
422
 
423
+ # class attributes that are booleans
424
+ if type(class_attr.value) == ast.Constant:
425
+ if any(target.id == "_disable_auto_module_deps" for target in class_attr.targets):
426
+ if type(class_attr.value.value) == bool:
427
+ disable_auto_module_deps = class_attr.value.value
428
+
418
429
  for task in ansible_tasks:
419
430
  if "become" not in task:
420
431
  task["become"] = False
@@ -441,6 +452,7 @@ class ModuleLoader:
441
452
  "common": deps_common,
442
453
  },
443
454
  "sudo": len(deps_apt) > 0,
455
+ "disable_auto_module_deps": disable_auto_module_deps,
444
456
  }
445
457
  ansible_task_list = list(ansible_tasks)
446
458
  for dep_common in deps_common:
@@ -461,9 +473,13 @@ class ModuleLoader:
461
473
  def load_modules(self, module_names):
462
474
  modules = {}
463
475
  for module_name in module_names:
464
- module = self.load_module(module_name)
476
+ try:
477
+ module = self.load_module(module_name)
478
+ except ModuleNotFoundError as e:
479
+ raise BBOTError(
480
+ f"Error loading module {module_name}: {e}. You may have leftover artifacts from an older version of BBOT. Try deleting/renaming your '~/.bbot' directory."
481
+ ) from e
465
482
  modules[module_name] = module
466
- self._modules[module_name] = module
467
483
  return modules
468
484
 
469
485
  def load_module(self, module_name):
@@ -512,60 +528,6 @@ class ModuleLoader:
512
528
  # then we have a module
513
529
  return value
514
530
 
515
- def recommend_dependencies(self, modules):
516
- """
517
- Returns a dictionary containing missing dependencies and their suggested resolutions
518
-
519
- Needs work. For this we should probably be building a dependency graph
520
- """
521
- resolve_choices = {}
522
- # step 1: build a dictionary containing event types and their associated modules
523
- # {"IP_ADDRESS": set("masscan", "ipneighbor", ...)}
524
- watched = {}
525
- produced = {}
526
- for modname in modules:
527
- preloaded = self._preloaded.get(modname)
528
- if preloaded:
529
- for event_type in preloaded.get("watched_events", []):
530
- self.add_or_create(watched, event_type, modname)
531
- for event_type in preloaded.get("produced_events", []):
532
- self.add_or_create(produced, event_type, modname)
533
- watched_all = {}
534
- produced_all = {}
535
- for modname, preloaded in self.preloaded().items():
536
- if preloaded:
537
- for event_type in preloaded.get("watched_events", []):
538
- self.add_or_create(watched_all, event_type, modname)
539
- for event_type in preloaded.get("produced_events", []):
540
- self.add_or_create(produced_all, event_type, modname)
541
-
542
- # step 2: check to see if there are missing dependencies
543
- for modname in modules:
544
- preloaded = self._preloaded.get(modname)
545
- module_type = preloaded.get("type", "unknown")
546
- if module_type != "scan":
547
- continue
548
- watched_events = preloaded.get("watched_events", [])
549
- missing_deps = {e: not self.check_dependency(e, modname, produced) for e in watched_events}
550
- if all(missing_deps.values()):
551
- for event_type in watched_events:
552
- if event_type == "SCAN":
553
- continue
554
- choices = produced_all.get(event_type, [])
555
- choices = set(choices)
556
- with suppress(KeyError):
557
- choices.remove(modname)
558
- if event_type not in resolve_choices:
559
- resolve_choices[event_type] = {}
560
- deps = resolve_choices[event_type]
561
- self.add_or_create(deps, "required_by", modname)
562
- for c in choices:
563
- choice_type = self._preloaded.get(c, {}).get("type", "unknown")
564
- if choice_type == "scan":
565
- self.add_or_create(deps, "recommended", c)
566
-
567
- return resolve_choices
568
-
569
531
  def check_dependency(self, event_type, modname, produced):
570
532
  if event_type not in produced:
571
533
  return False
bbot/defaults.yml CHANGED
@@ -187,8 +187,10 @@ url_extension_blacklist:
187
187
  - mov
188
188
  - flv
189
189
  - webm
190
- # Distribute URLs with these extensions only to httpx (these are omitted from output)
191
- url_extension_httpx_only:
190
+
191
+ # URLs with these extensions are not distributed to modules unless the module opts in via `accept_url_special = True`
192
+ # They are also excluded from output. If you want to see them in output, remove them from this list.
193
+ url_extension_special:
192
194
  - js
193
195
 
194
196
  # These url extensions are almost always static, so we exclude them from modules that fuzz things
bbot/modules/apkpure.py CHANGED
@@ -6,7 +6,7 @@ from bbot.modules.base import BaseModule
6
6
  class apkpure(BaseModule):
7
7
  watched_events = ["MOBILE_APP"]
8
8
  produced_events = ["FILESYSTEM"]
9
- flags = ["passive", "safe", "code-enum"]
9
+ flags = ["passive", "safe", "code-enum", "download"]
10
10
  meta = {
11
11
  "description": "Download android applications from apkpure.com",
12
12
  "created_date": "2024-10-11",
bbot/modules/baddns.py CHANGED
@@ -22,7 +22,7 @@ class baddns(BaseModule):
22
22
  "enabled_submodules": "A list of submodules to enable. Empty list (default) enables CNAME, TXT and MX Only",
23
23
  }
24
24
  module_threads = 8
25
- deps_pip = ["baddns~=1.9.130"]
25
+ deps_pip = ["baddns~=1.12.294"]
26
26
 
27
27
  def select_modules(self):
28
28
  selected_submodules = []
@@ -19,7 +19,7 @@ class baddns_direct(BaseModule):
19
19
  "custom_nameservers": "Force BadDNS to use a list of custom nameservers",
20
20
  }
21
21
  module_threads = 8
22
- deps_pip = ["baddns~=1.9.130"]
22
+ deps_pip = ["baddns~=1.12.294"]
23
23
 
24
24
  scope_distance_modifier = 1
25
25
 
@@ -16,7 +16,7 @@ class baddns_zone(baddns_module):
16
16
  "only_high_confidence": "Do not emit low-confidence or generic detections",
17
17
  }
18
18
  module_threads = 8
19
- deps_pip = ["baddns~=1.9.130"]
19
+ deps_pip = ["baddns~=1.12.294"]
20
20
 
21
21
  def set_modules(self):
22
22
  self.enabled_submodules = ["NSEC", "zonetransfer"]
@@ -17,7 +17,7 @@ class badsecrets(BaseModule):
17
17
  options_desc = {
18
18
  "custom_secrets": "Include custom secrets loaded from a local file",
19
19
  }
20
- deps_pip = ["badsecrets~=0.9.29"]
20
+ deps_pip = ["badsecrets~=0.13.47"]
21
21
 
22
22
  async def setup(self):
23
23
  self.custom_secrets = None
bbot/modules/base.py CHANGED
@@ -4,9 +4,10 @@ import traceback
4
4
  from sys import exc_info
5
5
  from contextlib import suppress
6
6
 
7
- from ..errors import ValidationError
8
7
  from ..core.helpers.misc import get_size # noqa
8
+ from ..errors import ValidationError, WebError
9
9
  from ..core.helpers.async_helpers import TaskCounter, ShuffleQueue
10
+ from ..core.event import is_event
10
11
 
11
12
 
12
13
  class BaseModule:
@@ -53,6 +54,8 @@ class BaseModule:
53
54
 
54
55
  in_scope_only (bool): Accept only explicitly in-scope events, regardless of the scan's search distance. Default is False.
55
56
 
57
+ accept_url_special (bool): Accept "special" URLs not typically distributed to web modules, e.g. JS URLs. Default is False.
58
+
56
59
  options (Dict): Customizable options for the module, e.g., {"api_key": ""}. Empty dict by default.
57
60
 
58
61
  options_desc (Dict): Descriptions for options, e.g., {"api_key": "API Key"}. Empty dict by default.
@@ -67,6 +70,8 @@ class BaseModule:
67
70
 
68
71
  _stats_exclude (bool): Whether to exclude this module from scan statistics. Default is False.
69
72
 
73
+ _disable_auto_module_deps (bool): Whether to disable automatic module dependencies. This is useful e.g. if the module consumes URLs, but you don't want to automatically enable the httpx module. Default is False.
74
+
70
75
  _qsize (int): Outgoing queue size (0 for infinite). Default is 0.
71
76
 
72
77
  _priority (int): Priority level of the module. Lower values are higher priority. Default is 3.
@@ -97,7 +102,7 @@ class BaseModule:
97
102
  scope_distance_modifier = 0
98
103
  target_only = False
99
104
  in_scope_only = False
100
-
105
+ accept_url_special = False
101
106
  _module_threads = 1
102
107
  _batch_size = 1
103
108
 
@@ -110,6 +115,7 @@ class BaseModule:
110
115
 
111
116
  _preserve_graph = False
112
117
  _stats_exclude = False
118
+ _disable_auto_module_deps = False
113
119
  _qsize = 1000
114
120
  _priority = 3
115
121
  _name = "base"
@@ -163,7 +169,6 @@ class BaseModule:
163
169
  self._default_handle_batch_timeout = self.scan.config.get(
164
170
  "module_handle_batch_timeout", 60 * 60 * 2
165
171
  ) # 2 hours
166
- self._event_handler_watchdog_task = None
167
172
  self._event_handler_watchdog_interval = self.event_handler_timeout / 10
168
173
 
169
174
  # used for optional "per host" tracking
@@ -211,6 +216,14 @@ class BaseModule:
211
216
 
212
217
  return True
213
218
 
219
+ async def setup_deps(self):
220
+ """
221
+ Similar to setup(), but reserved for installing dependencies not covered by Ansible.
222
+
223
+ This should always be used to install static dependencies like AI models, wordlists, etc.
224
+ """
225
+ return True
226
+
214
227
  async def handle_event(self, event, **kwargs):
215
228
  """Asynchronously handles incoming events that the module is configured to watch.
216
229
 
@@ -510,6 +523,12 @@ class BaseModule:
510
523
  if (not args) or getattr(args[0], "module", None) is None:
511
524
  kwargs["module"] = self
512
525
  try:
526
+ if args and is_event(args[0]):
527
+ raise ValidationError(
528
+ f"{self.__class__.__name__}.make_event() does not accept an existing event "
529
+ f"({type(args[0]).__name__}) as the first argument. "
530
+ "Use update_event(event, ...) or emit_event(event, ...) instead."
531
+ )
513
532
  event = self.scan.make_event(*args, **kwargs)
514
533
  except ValidationError as e:
515
534
  if raise_error:
@@ -518,6 +537,39 @@ class BaseModule:
518
537
  return
519
538
  return event
520
539
 
540
+ def update_event(self, event, **kwargs):
541
+ """Update an existing event for the scan.
542
+
543
+ This is the counterpart to :meth:`make_event` for modifying an existing
544
+ :class:`bbot.core.event.base.BaseEvent` instance.
545
+
546
+ Raises a validation error if the update could not be applied, unless
547
+ ``raise_error`` is set to False.
548
+
549
+ Args:
550
+ event: The event object to update.
551
+ **kwargs: Keyword arguments to be passed to the scan's update_event method.
552
+ raise_error (bool, optional): Whether to raise a validation error if the event could not be updated. Defaults to False.
553
+
554
+ Returns:
555
+ Event or None: The updated event, or None if a validation error occurred and raise_error was False.
556
+
557
+ Raises:
558
+ ValidationError: If the event could not be validated and raise_error is True.
559
+ """
560
+ raise_error = kwargs.pop("raise_error", False)
561
+ module = kwargs.pop("module", None)
562
+ if module is None and getattr(event, "module", None) is None:
563
+ kwargs["module"] = self
564
+ try:
565
+ updated = self.scan.update_event(event, **kwargs)
566
+ except ValidationError as e:
567
+ if raise_error:
568
+ raise
569
+ self.warning(f"{e}")
570
+ return
571
+ return updated
572
+
521
573
  async def emit_event(self, *args, **kwargs):
522
574
  """Emit an event to the event queue and distribute it to interested modules.
523
575
 
@@ -553,7 +605,23 @@ class BaseModule:
553
605
  v = event_kwargs.pop(o, None)
554
606
  if v is not None:
555
607
  emit_kwargs[o] = v
556
- event = self.make_event(*args, **event_kwargs)
608
+
609
+ # Two entry points:
610
+ # - emit_event(data, ...) -> create a new event via make_event()
611
+ # - emit_event(existing_event, ...) -> update and re‑emit that event
612
+ if args and is_event(args[0]):
613
+ event, *rest = args
614
+ if rest:
615
+ self.warning(
616
+ f"emit_event() was called on {self.name} with an existing event and extra "
617
+ f"positional args ({rest}); extra args are ignored. "
618
+ "Pass only the event plus keyword arguments, or call make_event() explicitly."
619
+ )
620
+ # Update the existing event (e.g. tags/context/module) before emitting
621
+ event = self.update_event(event, **event_kwargs)
622
+ else:
623
+ event = self.make_event(*args, **event_kwargs)
624
+
557
625
  if event is not None:
558
626
  children = event.children
559
627
  for e in [event] + children:
@@ -613,44 +681,32 @@ class BaseModule:
613
681
  asyncio.create_task(self._worker(), name=f"{self.scan.name}.{self.name}._worker()")
614
682
  for _ in range(self.module_threads)
615
683
  ]
616
- self._event_handler_watchdog_task = asyncio.create_task(
684
+ watchdog_task = asyncio.create_task(
617
685
  self._event_handler_watchdog(),
618
686
  name=f"{self.scan.name}.{self.name}._event_handler_watchdog()",
619
687
  )
688
+ self._tasks.append(watchdog_task)
620
689
 
621
- async def _setup(self):
622
- """
623
- Asynchronously sets up the module by invoking its 'setup()' method.
624
-
625
- This method catches exceptions during setup, sets the module's error state if necessary, and determines the
626
- status code based on the result of the setup process.
627
-
628
- Args:
629
- None
630
-
631
- Returns:
632
- tuple: A tuple containing the module's name, status (True for success, False for hard-fail, None for soft-fail),
633
- and an optional status message.
634
-
635
- Raises:
636
- Exception: Captured exceptions from the 'setup()' method are logged, but not propagated.
637
-
638
- Notes:
639
- - The 'setup()' method can return either a simple boolean status or a tuple of status and message.
640
- - A WordlistError exception triggers a soft-fail status.
641
- - The debug log will contain setup status information for the module.
642
- """
690
+ async def _setup(self, deps_only=False):
691
+ """ """
643
692
  status_codes = {False: "hard-fail", None: "soft-fail", True: "success"}
644
693
 
645
694
  status = False
646
695
  self.debug(f"Setting up module {self.name}")
647
696
  try:
648
- result = await self.setup()
649
- if type(result) == tuple and len(result) == 2:
650
- status, msg = result
651
- else:
652
- status = result
653
- msg = status_codes[status]
697
+ funcs = [self.setup_deps]
698
+ if not deps_only:
699
+ funcs.append(self.setup)
700
+ for func in funcs:
701
+ self.debug(f"Running {self.name}.{func.__name__}()")
702
+ result = await func()
703
+ if type(result) == tuple and len(result) == 2:
704
+ status, msg = result
705
+ else:
706
+ status = result
707
+ msg = status_codes[status]
708
+ if status is False:
709
+ break
654
710
  self.debug(f"Finished setting up module {self.name}")
655
711
  except Exception as e:
656
712
  self.set_error_state(f"Unexpected error during module setup: {e}", critical=True)
@@ -738,6 +794,9 @@ class BaseModule:
738
794
 
739
795
  @property
740
796
  def max_scope_distance(self):
797
+ """
798
+ Maximum scope distance for events that are accepted by the module.
799
+ """
741
800
  if self.in_scope_only or self.target_only:
742
801
  return 0
743
802
  if self.scope_distance_modifier is None:
@@ -785,10 +844,14 @@ class BaseModule:
785
844
  if "target" not in event.tags:
786
845
  return False, "it did not meet target_only filter criteria"
787
846
 
788
- # exclude certain URLs (e.g. javascript):
789
- # TODO: revisit this after httpx rework
790
- if event.type.startswith("URL") and self.name != "httpx" and "httpx-only" in event.tags:
791
- return False, "its extension was listed in url_extension_httpx_only"
847
+ # limit js URLs to modules that opt in to receive them
848
+ if (not self.accept_url_special) and event.type.startswith("URL"):
849
+ extension = getattr(event, "url_extension", "")
850
+ if extension in self.scan.url_extension_special:
851
+ return (
852
+ False,
853
+ f"it is a special URL (extension {extension}) but the module does not opt in to receive special URLs",
854
+ )
792
855
 
793
856
  return True, "precheck succeeded"
794
857
 
@@ -1248,6 +1311,24 @@ class BaseModule:
1248
1311
 
1249
1312
  return r
1250
1313
 
1314
+ async def api_download(self, url, **kwargs):
1315
+ """
1316
+ A wrapper around the `download()` web helper that incorporates API key cycling.
1317
+ """
1318
+ error = None
1319
+ raise_error = kwargs.pop("raise_error", False)
1320
+ for _ in range(self.api_retries):
1321
+ new_url, kwargs = self.prepare_api_request(url, kwargs)
1322
+ if "raise_error" not in kwargs:
1323
+ kwargs["raise_error"] = True
1324
+ try:
1325
+ return await self.helpers.download(new_url, **kwargs)
1326
+ except WebError as e:
1327
+ error = e
1328
+ self.cycle_api_key()
1329
+ if raise_error:
1330
+ raise error
1331
+
1251
1332
  def _get_retry_after(self, r):
1252
1333
  # try to get retry_after from headers first
1253
1334
  headers = getattr(r, "headers", {})
@@ -1259,7 +1340,10 @@ class BaseModule:
1259
1340
  if isinstance(body_json, dict):
1260
1341
  retry_after = body_json.get("retry_after", None)
1261
1342
  if retry_after is not None:
1262
- return float(retry_after)
1343
+ # we don't allow retry-after smaller than 1 second
1344
+ # this is to prevent cases where APIs erroneously return a retry-after value of 0
1345
+ # e.g. https://github.com/blacklanternsecurity/bbot/issues/2826
1346
+ return max(1.0, float(retry_after))
1263
1347
 
1264
1348
  def _prepare_api_iter_req(self, url, page, page_size, offset, **requests_kwargs):
1265
1349
  """
@@ -1715,6 +1799,7 @@ class BaseInterceptModule(BaseModule):
1715
1799
  """
1716
1800
 
1717
1801
  accept_dupes = True
1802
+ accept_url_special = True
1718
1803
  _intercept = True
1719
1804
 
1720
1805
  async def _worker(self):
@@ -16,7 +16,7 @@ class bucket_amazon(bucket_template):
16
16
  }
17
17
  scope_distance_modifier = 3
18
18
 
19
- cloud_helper_name = "amazon"
19
+ cloudcheck_provider_name = "Amazon"
20
20
  delimiters = ("", ".", "-")
21
21
  base_domains = ["s3.amazonaws.com"]
22
22
  regions = [None]
@@ -15,7 +15,7 @@ class bucket_digitalocean(bucket_template):
15
15
  "permutations": "Whether to try permutations",
16
16
  }
17
17
 
18
- cloud_helper_name = "digitalocean"
18
+ cloudcheck_provider_name = "DigitalOcean"
19
19
  delimiters = ("", "-")
20
20
  base_domains = ["digitaloceanspaces.com"]
21
21
  regions = ["ams3", "fra1", "nyc3", "sfo2", "sfo3", "sgp1"]
@@ -15,7 +15,7 @@ class bucket_firebase(bucket_template):
15
15
  "permutations": "Whether to try permutations",
16
16
  }
17
17
 
18
- cloud_helper_name = "google"
18
+ cloudcheck_provider_name = "Google"
19
19
  delimiters = ("", "-")
20
20
  base_domains = ["firebaseio.com"]
21
21
 
@@ -19,7 +19,7 @@ class bucket_google(bucket_template):
19
19
  "permutations": "Whether to try permutations",
20
20
  }
21
21
 
22
- cloud_helper_name = "google"
22
+ cloudcheck_provider_name = "Google"
23
23
  delimiters = ("", "-", ".", "_")
24
24
  base_domains = ["storage.googleapis.com"]
25
25
  bad_permissions = [
@@ -1,7 +1,7 @@
1
1
  from bbot.modules.templates.bucket import bucket_template
2
2
 
3
3
 
4
- class bucket_azure(bucket_template):
4
+ class bucket_microsoft(bucket_template):
5
5
  watched_events = ["DNS_NAME", "STORAGE_BUCKET"]
6
6
  produced_events = ["STORAGE_BUCKET", "FINDING"]
7
7
  flags = ["active", "safe", "cloud-enum", "web-basic"]
@@ -15,7 +15,7 @@ class bucket_azure(bucket_template):
15
15
  "permutations": "Whether to try permutations",
16
16
  }
17
17
 
18
- cloud_helper_name = "azure"
18
+ cloudcheck_provider_name = "Microsoft"
19
19
  delimiters = ("", "-")
20
20
  base_domains = ["blob.core.windows.net"]
21
21
  # Dirbusting is required to know whether a bucket is public
bbot/modules/builtwith.py CHANGED
@@ -33,7 +33,8 @@ class builtwith(subdomain_enum_apikey):
33
33
  subdomains = await self.query(query, parse_fn=self.parse_domains, request_fn=self.request_domains)
34
34
  if subdomains:
35
35
  for s in subdomains:
36
- if s != event:
36
+ # `s` is a hostname string; compare against the event's data, not the Event object itself.
37
+ if s != event.data:
37
38
  await self.emit_event(
38
39
  s,
39
40
  "DNS_NAME",
@@ -45,7 +46,8 @@ class builtwith(subdomain_enum_apikey):
45
46
  redirects = await self.query(query, parse_fn=self.parse_redirects, request_fn=self.request_redirects)
46
47
  if redirects:
47
48
  for r in redirects:
48
- if r != event:
49
+ # `r` is a hostname string; compare against the event's data, not the Event object itself.
50
+ if r != event.data:
49
51
  await self.emit_event(
50
52
  r,
51
53
  "DNS_NAME",
bbot/modules/dnsbimi.py CHANGED
@@ -39,7 +39,7 @@ import re
39
39
  # Handle "v=BIMI1; l=https://bimi.entrust.net/example.com/logo.svg;"
40
40
  # Handle "v=BIMI1;l=https://bimi.entrust.net/example.com/logo.svg;a=https://bimi.entrust.net/example.com/certchain.pem"
41
41
  # Handle "v=BIMI1; l=https://bimi.entrust.net/example.com/logo.svg;a=https://bimi.entrust.net/example.com/certchain.pem;"
42
- _bimi_regex = r"^v=(?P<v>BIMI1);* *(l=(?P<l>https*://[^;]*|)|);*( *a=((?P<a>https://[^;]*|)|);*)*$"
42
+ _bimi_regex = r"^v=(?P<v>BIMI1);\s?(?:l=(?P<l>https?://[^;\s]{1,255})?)?;?(?:\s?a=(?P<a>https://[^;\s]{1,255})?;?)?$"
43
43
  bimi_regex = re.compile(_bimi_regex, re.I)
44
44
 
45
45
 
@@ -140,6 +140,3 @@ class dnsbimi(BaseModule):
140
140
 
141
141
  async def handle_event(self, event):
142
142
  await self.inspectBIMI(event, event.host)
143
-
144
-
145
- # EOF