bbot 2.3.0.5546rc0__py3-none-any.whl → 2.3.1.5815rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbot might be problematic. Click here for more details.

Files changed (116) hide show
  1. bbot/__init__.py +1 -1
  2. bbot/cli.py +1 -1
  3. bbot/core/engine.py +1 -1
  4. bbot/core/event/base.py +7 -5
  5. bbot/core/helpers/async_helpers.py +7 -1
  6. bbot/core/helpers/depsinstaller/installer.py +7 -2
  7. bbot/core/helpers/diff.py +13 -4
  8. bbot/core/helpers/dns/brute.py +8 -2
  9. bbot/core/helpers/dns/engine.py +3 -2
  10. bbot/core/helpers/ratelimiter.py +8 -2
  11. bbot/core/helpers/regexes.py +5 -2
  12. bbot/core/helpers/web/engine.py +1 -1
  13. bbot/core/helpers/web/web.py +1 -1
  14. bbot/core/shared_deps.py +14 -0
  15. bbot/defaults.yml +44 -0
  16. bbot/modules/ajaxpro.py +64 -37
  17. bbot/modules/baddns.py +23 -15
  18. bbot/modules/baddns_direct.py +2 -2
  19. bbot/modules/badsecrets.py +2 -2
  20. bbot/modules/base.py +49 -15
  21. bbot/modules/censys.py +1 -1
  22. bbot/modules/deadly/dastardly.py +3 -3
  23. bbot/modules/deadly/nuclei.py +1 -1
  24. bbot/modules/dehashed.py +2 -2
  25. bbot/modules/dnsbrute_mutations.py +3 -1
  26. bbot/modules/docker_pull.py +1 -1
  27. bbot/modules/dockerhub.py +2 -2
  28. bbot/modules/dotnetnuke.py +12 -12
  29. bbot/modules/extractous.py +1 -1
  30. bbot/modules/ffuf_shortnames.py +107 -48
  31. bbot/modules/filedownload.py +6 -0
  32. bbot/modules/generic_ssrf.py +54 -40
  33. bbot/modules/github_codesearch.py +2 -2
  34. bbot/modules/github_org.py +16 -20
  35. bbot/modules/github_workflows.py +6 -2
  36. bbot/modules/gowitness.py +6 -0
  37. bbot/modules/hunt.py +1 -1
  38. bbot/modules/hunterio.py +1 -1
  39. bbot/modules/iis_shortnames.py +23 -7
  40. bbot/modules/internal/excavate.py +5 -3
  41. bbot/modules/internal/unarchive.py +82 -0
  42. bbot/modules/jadx.py +2 -2
  43. bbot/modules/output/asset_inventory.py +1 -1
  44. bbot/modules/output/base.py +1 -1
  45. bbot/modules/output/discord.py +2 -1
  46. bbot/modules/output/slack.py +2 -1
  47. bbot/modules/output/teams.py +10 -25
  48. bbot/modules/output/web_parameters.py +55 -0
  49. bbot/modules/paramminer_headers.py +15 -10
  50. bbot/modules/portfilter.py +41 -0
  51. bbot/modules/portscan.py +1 -22
  52. bbot/modules/postman.py +61 -43
  53. bbot/modules/postman_download.py +10 -147
  54. bbot/modules/sitedossier.py +1 -1
  55. bbot/modules/skymem.py +1 -1
  56. bbot/modules/templates/postman.py +163 -1
  57. bbot/modules/templates/subdomain_enum.py +1 -1
  58. bbot/modules/templates/webhook.py +17 -26
  59. bbot/modules/trufflehog.py +3 -3
  60. bbot/modules/wappalyzer.py +1 -1
  61. bbot/modules/zoomeye.py +1 -1
  62. bbot/presets/kitchen-sink.yml +1 -1
  63. bbot/presets/nuclei/nuclei-budget.yml +19 -0
  64. bbot/presets/nuclei/nuclei-intense.yml +28 -0
  65. bbot/presets/nuclei/nuclei-technology.yml +23 -0
  66. bbot/presets/nuclei/nuclei.yml +34 -0
  67. bbot/presets/spider-intense.yml +13 -0
  68. bbot/scanner/preset/args.py +29 -3
  69. bbot/scanner/preset/preset.py +43 -24
  70. bbot/scanner/scanner.py +17 -7
  71. bbot/test/bbot_fixtures.py +7 -7
  72. bbot/test/test_step_1/test_bloom_filter.py +2 -2
  73. bbot/test/test_step_1/test_cli.py +5 -5
  74. bbot/test/test_step_1/test_dns.py +33 -0
  75. bbot/test/test_step_1/test_events.py +15 -5
  76. bbot/test/test_step_1/test_modules_basic.py +21 -21
  77. bbot/test/test_step_1/test_presets.py +94 -4
  78. bbot/test/test_step_1/test_regexes.py +13 -13
  79. bbot/test/test_step_1/test_scan.py +78 -0
  80. bbot/test/test_step_1/test_web.py +4 -4
  81. bbot/test/test_step_2/module_tests/test_module_ajaxpro.py +43 -23
  82. bbot/test/test_step_2/module_tests/test_module_azure_realm.py +3 -3
  83. bbot/test/test_step_2/module_tests/test_module_baddns.py +3 -3
  84. bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py +6 -6
  85. bbot/test/test_step_2/module_tests/test_module_bufferoverrun.py +3 -3
  86. bbot/test/test_step_2/module_tests/test_module_cloudcheck.py +3 -3
  87. bbot/test/test_step_2/module_tests/test_module_dnsbimi.py +3 -3
  88. bbot/test/test_step_2/module_tests/test_module_dnscaa.py +6 -6
  89. bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py +9 -9
  90. bbot/test/test_step_2/module_tests/test_module_dnstlsrpt.py +12 -12
  91. bbot/test/test_step_2/module_tests/test_module_excavate.py +15 -15
  92. bbot/test/test_step_2/module_tests/test_module_extractous.py +3 -3
  93. bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py +8 -8
  94. bbot/test/test_step_2/module_tests/test_module_generic_ssrf.py +3 -1
  95. bbot/test/test_step_2/module_tests/test_module_github_codesearch.py +3 -3
  96. bbot/test/test_step_2/module_tests/test_module_gowitness.py +9 -9
  97. bbot/test/test_step_2/module_tests/test_module_iis_shortnames.py +1 -1
  98. bbot/test/test_step_2/module_tests/test_module_paramminer_getparams.py +35 -1
  99. bbot/test/test_step_2/module_tests/test_module_paramminer_headers.py +3 -3
  100. bbot/test/test_step_2/module_tests/test_module_portfilter.py +48 -0
  101. bbot/test/test_step_2/module_tests/test_module_postman.py +338 -3
  102. bbot/test/test_step_2/module_tests/test_module_postman_download.py +4 -161
  103. bbot/test/test_step_2/module_tests/test_module_securitytxt.py +12 -12
  104. bbot/test/test_step_2/module_tests/test_module_teams.py +10 -1
  105. bbot/test/test_step_2/module_tests/test_module_trufflehog.py +1 -1
  106. bbot/test/test_step_2/module_tests/test_module_unarchive.py +229 -0
  107. bbot/test/test_step_2/module_tests/test_module_viewdns.py +3 -3
  108. bbot/test/test_step_2/module_tests/test_module_web_parameters.py +59 -0
  109. bbot/test/test_step_2/module_tests/test_module_websocket.py +5 -4
  110. {bbot-2.3.0.5546rc0.dist-info → bbot-2.3.1.5815rc0.dist-info}/METADATA +7 -7
  111. {bbot-2.3.0.5546rc0.dist-info → bbot-2.3.1.5815rc0.dist-info}/RECORD +115 -105
  112. {bbot-2.3.0.5546rc0.dist-info → bbot-2.3.1.5815rc0.dist-info}/WHEEL +1 -1
  113. bbot/wordlists/ffuf_shortname_candidates.txt +0 -107982
  114. /bbot/presets/{baddns-thorough.yml → baddns-intense.yml} +0 -0
  115. {bbot-2.3.0.5546rc0.dist-info → bbot-2.3.1.5815rc0.dist-info}/LICENSE +0 -0
  116. {bbot-2.3.0.5546rc0.dist-info → bbot-2.3.1.5815rc0.dist-info}/entry_points.txt +0 -0
bbot/scanner/scanner.py CHANGED
@@ -334,7 +334,7 @@ class Scanner:
334
334
  await self._prep()
335
335
 
336
336
  self._start_log_handlers()
337
- self.trace(f'Ran BBOT {__version__} at {self.start_time}, command: {" ".join(sys.argv)}')
337
+ self.trace(f"Ran BBOT {__version__} at {self.start_time}, command: {' '.join(sys.argv)}")
338
338
  self.trace(f"Target: {self.preset.target.json}")
339
339
  self.trace(f"Preset: {self.preset.to_dict(redact_secrets=True)}")
340
340
 
@@ -683,14 +683,14 @@ class Scanner:
683
683
  event_type_summary = sorted(self.stats.events_emitted_by_type.items(), key=lambda x: x[-1], reverse=True)
684
684
  if event_type_summary:
685
685
  self.info(
686
- f'{self.name}: Events produced so far: {", ".join([f"{k}: {v}" for k,v in event_type_summary])}'
686
+ f"{self.name}: Events produced so far: {', '.join([f'{k}: {v}' for k, v in event_type_summary])}"
687
687
  )
688
688
  else:
689
689
  self.info(f"{self.name}: No events produced yet")
690
690
 
691
691
  if modules_errored:
692
692
  self.verbose(
693
- f'{self.name}: Modules errored: {len(modules_errored):,} ({", ".join(list(modules_errored))})'
693
+ f"{self.name}: Modules errored: {len(modules_errored):,} ({', '.join(list(modules_errored))})"
694
694
  )
695
695
 
696
696
  num_queued_events = self.num_queued_events
@@ -1089,9 +1089,19 @@ class Scanner:
1089
1089
  regexes_component_list = []
1090
1090
  for i, r in enumerate(self.dns_regexes_yara):
1091
1091
  regexes_component_list.append(rf"$dns_name_{i} = /\b{r.pattern}/ nocase")
1092
- if regexes_component_list:
1093
- regexes_component = " ".join(regexes_component_list)
1094
- self._dns_yara_rules_uncompiled = f'rule hostname_extraction {{meta: description = "matches DNS hostname pattern derived from target(s)" strings: {regexes_component} condition: any of them}}'
1092
+
1093
+ # Chunk the regexes into groups of 10,000
1094
+ chunk_size = 10000
1095
+ rules = {}
1096
+ for chunk_index in range(0, len(regexes_component_list), chunk_size):
1097
+ chunk = regexes_component_list[chunk_index : chunk_index + chunk_size]
1098
+ if chunk:
1099
+ regexes_component = " ".join(chunk)
1100
+ rule_name = f"hostname_extraction_{chunk_index // chunk_size}"
1101
+ rule = f'rule {rule_name} {{meta: description = "matches DNS hostname pattern derived from target(s)" strings: {regexes_component} condition: any of them}}'
1102
+ rules[rule_name] = rule
1103
+
1104
+ self._dns_yara_rules_uncompiled = rules
1095
1105
  return self._dns_yara_rules_uncompiled
1096
1106
 
1097
1107
  async def dns_yara_rules(self):
@@ -1100,7 +1110,7 @@ class Scanner:
1100
1110
  import yara
1101
1111
 
1102
1112
  self._dns_yara_rules = await self.helpers.run_in_executor(
1103
- yara.compile, source=self.dns_yara_rules_uncompiled
1113
+ yara.compile, source="\n".join(self.dns_yara_rules_uncompiled.values())
1104
1114
  )
1105
1115
  return self._dns_yara_rules
1106
1116
 
@@ -224,12 +224,12 @@ def events(scan):
224
224
  return bbot_events
225
225
 
226
226
 
227
- @pytest.fixture(scope="session", autouse=True)
228
- def install_all_python_deps():
229
- deps_pip = set()
230
- for module in DEFAULT_PRESET.module_loader.preloaded().values():
231
- deps_pip.update(set(module.get("deps", {}).get("pip", [])))
227
+ # @pytest.fixture(scope="session", autouse=True)
228
+ # def install_all_python_deps():
229
+ # deps_pip = set()
230
+ # for module in DEFAULT_PRESET.module_loader.preloaded().values():
231
+ # deps_pip.update(set(module.get("deps", {}).get("pip", [])))
232
232
 
233
- constraint_file = tempwordlist(get_python_constraints())
233
+ # constraint_file = tempwordlist(get_python_constraints())
234
234
 
235
- subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip))
235
+ # subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip))
@@ -38,7 +38,7 @@ async def test_bloom_filter():
38
38
  test_set.add(hash(item))
39
39
  end = time.time()
40
40
  elapsed = end - start
41
- print(f"elapsed: {elapsed:.2f} ({int(n_items_to_test/elapsed)}/s)")
41
+ print(f"elapsed: {elapsed:.2f} ({int(n_items_to_test / elapsed)}/s)")
42
42
  # this shouldn't take longer than 5 seconds
43
43
  assert elapsed < 5
44
44
 
@@ -48,7 +48,7 @@ async def test_bloom_filter():
48
48
  assert item in bloom_filter
49
49
  end = time.time()
50
50
  elapsed = end - start
51
- print(f"elapsed: {elapsed:.2f} ({int(n_items_to_test/elapsed)}/s)")
51
+ print(f"elapsed: {elapsed:.2f} ({int(n_items_to_test / elapsed)}/s)")
52
52
  # this shouldn't take longer than 5 seconds
53
53
  assert elapsed < 5
54
54
 
@@ -271,7 +271,7 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config):
271
271
  result = await cli._main()
272
272
  out, err = capsys.readouterr()
273
273
  assert result is None
274
- assert "Target:\n -t TARGET [TARGET ...]" in out
274
+ assert "-t TARGET [TARGET ...]" in out
275
275
 
276
276
  # list modules
277
277
  monkeypatch.setattr("sys.argv", ["bbot", "-l"])
@@ -342,17 +342,17 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config):
342
342
  monkeypatch.setattr("sys.argv", ["bbot", "-y"])
343
343
  result = await cli._main()
344
344
  assert result is True
345
- assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,speculate)" in caplog.text
345
+ assert "Loaded 6/6 internal modules (aggregate,cloudcheck,dnsresolve,excavate,speculate,unarchive)" in caplog.text
346
346
  caplog.clear()
347
347
  monkeypatch.setattr("sys.argv", ["bbot", "-em", "excavate", "speculate", "-y"])
348
348
  result = await cli._main()
349
349
  assert result is True
350
- assert "Loaded 3/3 internal modules (aggregate,cloudcheck,dnsresolve)" in caplog.text
350
+ assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,unarchive)" in caplog.text
351
351
  caplog.clear()
352
352
  monkeypatch.setattr("sys.argv", ["bbot", "-c", "speculate=false", "-y"])
353
353
  result = await cli._main()
354
354
  assert result is True
355
- assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,excavate)" in caplog.text
355
+ assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,unarchive)" in caplog.text
356
356
 
357
357
  # custom target type
358
358
  out, err = capsys.readouterr()
@@ -607,7 +607,7 @@ def test_cli_presets(monkeypatch, capsys, caplog):
607
607
  assert " http_proxy: currentpresettest" in captured.out
608
608
 
609
609
  # show current preset (full)
610
- monkeypatch.setattr("sys.argv", ["bbot", "-c" "modules.c99.api_key=asdf", "--current-preset-full"])
610
+ monkeypatch.setattr("sys.argv", ["bbot", "-cmodules.c99.api_key=asdf", "--current-preset-full"])
611
611
  cli.main()
612
612
  captured = capsys.readouterr()
613
613
  assert " api_key: asdf" in captured.out
@@ -776,6 +776,39 @@ async def test_dns_graph_structure(bbot_scanner):
776
776
  assert str(events_by_data["evilcorp.com"].module) == "host"
777
777
 
778
778
 
779
+ @pytest.mark.asyncio
780
+ async def test_hostname_extraction(bbot_scanner):
781
+ scan = bbot_scanner("evilcorp.com", config={"dns": {"minimal": False}})
782
+ await scan.helpers.dns._mock_dns(
783
+ {
784
+ "evilcorp.com": {
785
+ "A": ["127.0.0.1"],
786
+ "TXT": [
787
+ "v=spf1 include:spf-a.evilcorp.com include:spf-b.evilcorp.com include:icpbounce.com include:shops.shopify.com include:_spf.qemailserver.com include:spf.mandrillapp.com include:spf.protection.office365.us include:spf-003ea501.gpphosted.com 127.0.0.1 -all"
788
+ ],
789
+ }
790
+ }
791
+ )
792
+ events = [e async for e in scan.async_start()]
793
+ dns_name_events = [e for e in events if e.type == "DNS_NAME"]
794
+ main_dns_event = [e for e in dns_name_events if e.data == "evilcorp.com"]
795
+ assert len(main_dns_event) == 1
796
+ main_dns_event = main_dns_event[0]
797
+ dns_children = main_dns_event.dns_children
798
+ assert dns_children["A"] == {"127.0.0.1"}
799
+ assert dns_children["TXT"] == {
800
+ "spf-a.evilcorp.com",
801
+ "spf-b.evilcorp.com",
802
+ "icpbounce.com",
803
+ "shops.shopify.com",
804
+ "_spf.qemailserver.com",
805
+ "spf.mandrillapp.com",
806
+ "spf.protection.office365.us",
807
+ "spf-003ea501.gpphosted.com",
808
+ "127.0.0.1",
809
+ }
810
+
811
+
779
812
  @pytest.mark.asyncio
780
813
  async def test_dns_helpers(bbot_scanner):
781
814
  assert service_record("") is False
@@ -964,15 +964,25 @@ def test_event_closest_host():
964
964
  assert vuln.data["path"] == "/tmp/asdf.txt"
965
965
  assert vuln.host == "www.evilcorp.com"
966
966
 
967
- # no host == not allowed
967
+ # no host and no path == not allowed
968
968
  event3 = scan.make_event("wat", "ASDF", parent=scan.root_event)
969
969
  assert not event3.host
970
970
  with pytest.raises(ValueError):
971
- finding = scan.make_event({"path": "/tmp/asdf.txt", "description": "test"}, "FINDING", parent=event3)
971
+ finding = scan.make_event({"description": "test"}, "FINDING", parent=event3)
972
+ finding = scan.make_event({"path": "/tmp/asdf.txt", "description": "test"}, "FINDING", parent=event3)
973
+ assert finding is not None
974
+ finding = scan.make_event({"host": "evilcorp.com", "description": "test"}, "FINDING", parent=event3)
975
+ assert finding is not None
972
976
  with pytest.raises(ValueError):
973
- vuln = scan.make_event(
974
- {"path": "/tmp/asdf.txt", "description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3
975
- )
977
+ vuln = scan.make_event({"description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3)
978
+ vuln = scan.make_event(
979
+ {"path": "/tmp/asdf.txt", "description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3
980
+ )
981
+ assert vuln is not None
982
+ vuln = scan.make_event(
983
+ {"host": "evilcorp.com", "description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3
984
+ )
985
+ assert vuln is not None
976
986
 
977
987
 
978
988
  def test_event_magic():
@@ -147,15 +147,15 @@ async def test_modules_basic_checks(events, httpx_mock):
147
147
  for flag in flags:
148
148
  all_flags.add(flag)
149
149
  if preloaded["type"] == "scan":
150
- assert ("active" in flags and "passive" not in flags) or (
151
- "active" not in flags and "passive" in flags
152
- ), f'module "{module_name}" must have either "active" or "passive" flag'
150
+ assert ("active" in flags and "passive" not in flags) or ("active" not in flags and "passive" in flags), (
151
+ f'module "{module_name}" must have either "active" or "passive" flag'
152
+ )
153
153
  assert ("safe" in flags and "aggressive" not in flags) or (
154
154
  "safe" not in flags and "aggressive" in flags
155
155
  ), f'module "{module_name}" must have either "safe" or "aggressive" flag'
156
- assert not (
157
- "web-basic" in flags and "web-thorough" in flags
158
- ), f'module "{module_name}" should have either "web-basic" or "web-thorough" flags, not both'
156
+ assert not ("web-basic" in flags and "web-thorough" in flags), (
157
+ f'module "{module_name}" should have either "web-basic" or "web-thorough" flags, not both'
158
+ )
159
159
  meta = preloaded.get("meta", {})
160
160
  # make sure every module has a description
161
161
  assert meta.get("description", ""), f"{module_name} must have a description"
@@ -176,29 +176,29 @@ async def test_modules_basic_checks(events, httpx_mock):
176
176
  assert watched_events, f"{module_name}.watched_events must not be empty"
177
177
  assert type(watched_events) == list, f"{module_name}.watched_events must be of type list"
178
178
  assert type(produced_events) == list, f"{module_name}.produced_events must be of type list"
179
- assert all(
180
- type(t) == str for t in watched_events
181
- ), f"{module_name}.watched_events entries must be of type string"
182
- assert all(
183
- type(t) == str for t in produced_events
184
- ), f"{module_name}.produced_events entries must be of type string"
179
+ assert all(type(t) == str for t in watched_events), (
180
+ f"{module_name}.watched_events entries must be of type string"
181
+ )
182
+ assert all(type(t) == str for t in produced_events), (
183
+ f"{module_name}.produced_events entries must be of type string"
184
+ )
185
185
 
186
186
  assert type(preloaded.get("deps_pip", [])) == list, f"{module_name}.deps_pip must be of type list"
187
- assert (
188
- type(preloaded.get("deps_pip_constraints", [])) == list
189
- ), f"{module_name}.deps_pip_constraints must be of type list"
187
+ assert type(preloaded.get("deps_pip_constraints", [])) == list, (
188
+ f"{module_name}.deps_pip_constraints must be of type list"
189
+ )
190
190
  assert type(preloaded.get("deps_apt", [])) == list, f"{module_name}.deps_apt must be of type list"
191
191
  assert type(preloaded.get("deps_shell", [])) == list, f"{module_name}.deps_shell must be of type list"
192
192
  assert type(preloaded.get("config", None)) == dict, f"{module_name}.options must be of type list"
193
193
  assert type(preloaded.get("options_desc", None)) == dict, f"{module_name}.options_desc must be of type list"
194
194
  # options must have descriptions
195
- assert set(preloaded.get("config", {})) == set(
196
- preloaded.get("options_desc", {})
197
- ), f"{module_name}.options do not match options_desc"
195
+ assert set(preloaded.get("config", {})) == set(preloaded.get("options_desc", {})), (
196
+ f"{module_name}.options do not match options_desc"
197
+ )
198
198
  # descriptions most not be blank
199
- assert all(
200
- o for o in preloaded.get("options_desc", {}).values()
201
- ), f"{module_name}.options_desc descriptions must not be blank"
199
+ assert all(o for o in preloaded.get("options_desc", {}).values()), (
200
+ f"{module_name}.options_desc descriptions must not be blank"
201
+ )
202
202
 
203
203
  from bbot.core.flags import flag_descriptions
204
204
 
@@ -17,9 +17,9 @@ def test_preset_descriptions():
17
17
  # ensure very preset has a description
18
18
  preset = Preset()
19
19
  for loaded_preset, category, preset_path, original_filename in preset.all_presets.values():
20
- assert (
21
- loaded_preset.description
22
- ), f'Preset "{loaded_preset.name}" at {original_filename} does not have a description.'
20
+ assert loaded_preset.description, (
21
+ f'Preset "{loaded_preset.name}" at {original_filename} does not have a description.'
22
+ )
23
23
 
24
24
 
25
25
  def test_core():
@@ -493,7 +493,14 @@ def test_preset_module_resolution(clean_default_config):
493
493
  # make sure we have the expected defaults
494
494
  assert not preset.scan_modules
495
495
  assert set(preset.output_modules) == {"python", "csv", "txt", "json"}
496
- assert set(preset.internal_modules) == {"aggregate", "excavate", "speculate", "cloudcheck", "dnsresolve"}
496
+ assert set(preset.internal_modules) == {
497
+ "aggregate",
498
+ "excavate",
499
+ "unarchive",
500
+ "speculate",
501
+ "cloudcheck",
502
+ "dnsresolve",
503
+ }
497
504
  assert preset.modules == set(preset.output_modules).union(set(preset.internal_modules))
498
505
 
499
506
  # make sure dependency resolution works as expected
@@ -553,6 +560,7 @@ def test_preset_module_resolution(clean_default_config):
553
560
  "dnsresolve",
554
561
  "aggregate",
555
562
  "excavate",
563
+ "unarchive",
556
564
  "txt",
557
565
  "httpx",
558
566
  "csv",
@@ -878,6 +886,88 @@ def test_preset_module_disablement(clean_default_config):
878
886
  assert set(preset.output_modules) == {"json"}
879
887
 
880
888
 
889
+ def test_preset_override():
890
+ # tests to make sure a preset's config settings override others it includes
891
+ preset_1_yaml = """
892
+ name: override1
893
+ scan_name: override1
894
+ target: ["evilcorp1.com"]
895
+ silent: True
896
+ modules:
897
+ - robots
898
+ config:
899
+ modules:
900
+ asdf:
901
+ option1: asdf
902
+ """
903
+ preset_2_yaml = """
904
+ name: override2
905
+ scan_name: override2
906
+ target: ["evilcorp2.com"]
907
+ debug: true
908
+ modules:
909
+ - c99
910
+ config:
911
+ modules:
912
+ asdf:
913
+ option1: fdsa
914
+ """
915
+ preset_3_yaml = """
916
+ name: override3
917
+ scan_name: override3
918
+ target: ["evilcorp3.com"]
919
+ modules:
920
+ - securitytrails
921
+ # test ordering priority
922
+ include:
923
+ - override1
924
+ - override2
925
+ config:
926
+ web:
927
+ spider_distance: 2
928
+ spider_depth: 3
929
+ """
930
+ preset_4_yaml = """
931
+ name: override4
932
+ scan_name: override4
933
+ target: ["evilcorp4.com"]
934
+ modules:
935
+ - virustotal
936
+ include:
937
+ - override3
938
+ config:
939
+ web:
940
+ spider_distance: 1
941
+ spider_depth: 2
942
+ """
943
+ custom_preset_dir = bbot_test_dir / "custom_preset_dir_override"
944
+ custom_preset_dir.mkdir(parents=True, exist_ok=True)
945
+ preset_1_file = custom_preset_dir / "override1.yml"
946
+ preset_1_file.write_text(preset_1_yaml)
947
+ preset_2_file = custom_preset_dir / "override2.yml"
948
+ preset_2_file.write_text(preset_2_yaml)
949
+ preset_3_file = custom_preset_dir / "override3.yml"
950
+ preset_3_file.write_text(preset_3_yaml)
951
+ preset_4_file = custom_preset_dir / "override4.yml"
952
+ preset_4_file.write_text(preset_4_yaml)
953
+
954
+ preset = Preset.from_yaml_file(preset_4_file.resolve())
955
+ assert preset.debug is True
956
+ assert preset.silent is True
957
+ assert preset.name == "override4"
958
+ preset = preset.bake()
959
+ assert preset.debug is False
960
+ assert preset.silent is True
961
+ assert preset.name == "override4"
962
+ assert preset.scan_name == "override4"
963
+ targets = set([str(e.data) for e in preset.target.seeds])
964
+ assert targets == {"evilcorp1.com", "evilcorp2.com", "evilcorp3.com", "evilcorp4.com"}
965
+ assert preset.config["web"]["spider_distance"] == 1
966
+ assert preset.config["web"]["spider_depth"] == 2
967
+ assert preset.config["modules"]["asdf"]["option1"] == "fdsa"
968
+ assert set(preset.scan_modules) == {"httpx", "c99", "robots", "virustotal", "securitytrails"}
969
+
970
+
881
971
  def test_preset_require_exclude():
882
972
  def get_module_flags(p):
883
973
  for m in p.scan_modules:
@@ -83,13 +83,13 @@ def test_ip_regexes():
83
83
  event_type, _ = get_event_type(ip)
84
84
  if not event_type == "IP_ADDRESS":
85
85
  if ip.endswith("/24"):
86
- assert (
87
- ip == "203.0.113.0/24" and event_type == "IP_RANGE"
88
- ), f"Event type for IP_ADDRESS {ip} was not properly detected"
86
+ assert ip == "203.0.113.0/24" and event_type == "IP_RANGE", (
87
+ f"Event type for IP_ADDRESS {ip} was not properly detected"
88
+ )
89
89
  else:
90
- assert (
91
- ip == "2001:db8::1/128" and event_type == "IP_RANGE"
92
- ), f"Event type for IP_ADDRESS {ip} was not properly detected"
90
+ assert ip == "2001:db8::1/128" and event_type == "IP_RANGE", (
91
+ f"Event type for IP_ADDRESS {ip} was not properly detected"
92
+ )
93
93
  else:
94
94
  matches = [r.match(ip) for r in ip_address_regexes]
95
95
  assert any(matches), f"Good IP ADDRESS {ip} did not match regexes"
@@ -195,9 +195,9 @@ def test_dns_name_regexes():
195
195
  assert any(matches), f"Good DNS_NAME {dns} did not match regexes"
196
196
  event_type, _ = get_event_type(dns)
197
197
  if not event_type == "DNS_NAME":
198
- assert (
199
- dns == "1.2.3.4" and event_type == "IP_ADDRESS"
200
- ), f"Event type for DNS_NAME {dns} was not properly detected"
198
+ assert dns == "1.2.3.4" and event_type == "IP_ADDRESS", (
199
+ f"Event type for DNS_NAME {dns} was not properly detected"
200
+ )
201
201
 
202
202
 
203
203
  def test_open_port_regexes():
@@ -279,7 +279,7 @@ def test_url_regexes():
279
279
  "http://-evilcorp-.com",
280
280
  "http://evilcorp-.com/path",
281
281
  "http://evilcorp.com-/path",
282
- "evilcorp.com/path" "asdfasdfasdfasdfgsdgasdfs.asdfasdfasdfasdfasdf.evilcorp.com/path",
282
+ "evilcorp.com/pathasdfasdfasdfasdfgsdgasdfs.asdfasdfasdfasdfasdf.evilcorp.com/path",
283
283
  "rhttps://evilcorp.com",
284
284
  "https://[e]",
285
285
  "https://[1]:80",
@@ -320,9 +320,9 @@ def test_url_regexes():
320
320
  for good_url in good_urls:
321
321
  matches = [r.match(good_url) for r in url_regexes]
322
322
  assert any(matches), f"Good URL {good_url} did not match regexes"
323
- assert (
324
- get_event_type(good_url)[0] == "URL_UNVERIFIED"
325
- ), f"Event type for URL {good_url} was not properly detected"
323
+ assert get_event_type(good_url)[0] == "URL_UNVERIFIED", (
324
+ f"Event type for URL {good_url} was not properly detected"
325
+ )
326
326
 
327
327
 
328
328
  @pytest.mark.asyncio
@@ -149,3 +149,81 @@ async def test_python_output_matches_json(bbot_scanner):
149
149
  assert len([e for e in events if e["type"] == "ORG_STUB"]) == 1
150
150
  assert len([e for e in events if e["type"] == "IP_ADDRESS"]) == 1
151
151
  assert events == json_events
152
+
153
+
154
+ @pytest.mark.asyncio
155
+ async def test_huge_target_list(bbot_scanner, monkeypatch):
156
+ # single target should only have one rule
157
+ scan = bbot_scanner("evilcorp.com", config={"excavate": True})
158
+ await scan._prep()
159
+ assert "hostname_extraction_0" in scan.modules["excavate"].yara_rules_dict
160
+ assert "hostname_extraction_1" not in scan.modules["excavate"].yara_rules_dict
161
+
162
+ # over 10000 targets should be broken into two rules
163
+ num_targets = 10005
164
+ targets = [f"evil{i}.com" for i in range(num_targets)]
165
+ scan = bbot_scanner(*targets, config={"excavate": True})
166
+ await scan._prep()
167
+ assert "hostname_extraction_0" in scan.modules["excavate"].yara_rules_dict
168
+ assert "hostname_extraction_1" in scan.modules["excavate"].yara_rules_dict
169
+ assert "hostname_extraction_2" not in scan.modules["excavate"].yara_rules_dict
170
+
171
+
172
+ @pytest.mark.asyncio
173
+ async def test_exclude_cdn(bbot_scanner, monkeypatch):
174
+ # test that CDN exclusion works
175
+
176
+ from bbot import Preset
177
+
178
+ dns_mock = {
179
+ "evilcorp.com": {"A": ["127.0.0.1"]},
180
+ "www.evilcorp.com": {"A": ["127.0.0.1"]},
181
+ }
182
+
183
+ # first, run a scan with no CDN exclusion
184
+ scan = bbot_scanner("evilcorp.com")
185
+ await scan.helpers._mock_dns(dns_mock)
186
+
187
+ from bbot.modules.base import BaseModule
188
+
189
+ class DummyModule(BaseModule):
190
+ watched_events = ["DNS_NAME"]
191
+
192
+ async def handle_event(self, event):
193
+ if event.type == "DNS_NAME" and event.data == "evilcorp.com":
194
+ await self.emit_event("www.evilcorp.com", "DNS_NAME", parent=event, tags=["cdn-cloudflare"])
195
+ if event.type == "DNS_NAME" and event.data == "www.evilcorp.com":
196
+ await self.emit_event("www.evilcorp.com:80", "OPEN_TCP_PORT", parent=event, tags=["cdn-cloudflare"])
197
+ await self.emit_event("www.evilcorp.com:443", "OPEN_TCP_PORT", parent=event, tags=["cdn-cloudflare"])
198
+ await self.emit_event("www.evilcorp.com:8080", "OPEN_TCP_PORT", parent=event, tags=["cdn-cloudflare"])
199
+
200
+ dummy = DummyModule(scan=scan)
201
+ await scan._prep()
202
+ scan.modules["dummy"] = dummy
203
+ events = [e async for e in scan.async_start() if e.type in ("DNS_NAME", "OPEN_TCP_PORT")]
204
+ assert set(e.data for e in events) == {
205
+ "evilcorp.com",
206
+ "www.evilcorp.com",
207
+ "www.evilcorp.com:80",
208
+ "www.evilcorp.com:443",
209
+ "www.evilcorp.com:8080",
210
+ }
211
+
212
+ monkeypatch.setattr("sys.argv", ["bbot", "-t", "evilcorp.com", "--exclude-cdn"])
213
+
214
+ # then run a scan with --exclude-cdn enabled
215
+ preset = Preset("evilcorp.com")
216
+ preset.parse_args()
217
+ assert preset.bake().to_yaml() == "modules:\n- portfilter\n"
218
+ scan = bbot_scanner("evilcorp.com", preset=preset)
219
+ await scan.helpers._mock_dns(dns_mock)
220
+ dummy = DummyModule(scan=scan)
221
+ await scan._prep()
222
+ scan.modules["dummy"] = dummy
223
+ events = [e async for e in scan.async_start() if e.type in ("DNS_NAME", "OPEN_TCP_PORT")]
224
+ assert set(e.data for e in events) == {
225
+ "evilcorp.com",
226
+ "www.evilcorp.com",
227
+ "www.evilcorp.com:80",
228
+ "www.evilcorp.com:443",
229
+ }
@@ -263,7 +263,7 @@ async def test_web_helpers(bbot_scanner, bbot_httpserver, httpx_mock):
263
263
  finally:
264
264
  await agen.aclose()
265
265
  assert not results
266
- agen = module.api_page_iter(template_url, json=False)
266
+ agen = module.api_page_iter(template_url, _json=False)
267
267
  try:
268
268
  async for result in agen:
269
269
  if result and result.text.startswith("page"):
@@ -407,9 +407,9 @@ async def test_http_proxy(bbot_scanner, bbot_httpserver, proxy_server):
407
407
 
408
408
  r = await scan.helpers.request(url)
409
409
 
410
- assert (
411
- len(proxy_server.RequestHandlerClass.urls) == 1
412
- ), f"Request to {url} did not go through proxy {proxy_address}"
410
+ assert len(proxy_server.RequestHandlerClass.urls) == 1, (
411
+ f"Request to {url} did not go through proxy {proxy_address}"
412
+ )
413
413
  visited_url = proxy_server.RequestHandlerClass.urls[0]
414
414
  assert visited_url.endswith(endpoint), f"There was a problem with request to {url}: {visited_url}"
415
415
  assert r.status_code == 200 and r.text == "test_http_proxy_yep"