bbot 2.0.1.4720rc0__py3-none-any.whl → 2.3.0.5401rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbot might be problematic. Click here for more details.

Files changed (278) hide show
  1. bbot/__init__.py +1 -1
  2. bbot/cli.py +3 -7
  3. bbot/core/config/files.py +0 -1
  4. bbot/core/config/logger.py +34 -4
  5. bbot/core/core.py +21 -4
  6. bbot/core/engine.py +9 -8
  7. bbot/core/event/base.py +131 -52
  8. bbot/core/helpers/bloom.py +10 -3
  9. bbot/core/helpers/command.py +8 -7
  10. bbot/core/helpers/depsinstaller/installer.py +31 -13
  11. bbot/core/helpers/diff.py +10 -10
  12. bbot/core/helpers/dns/brute.py +7 -4
  13. bbot/core/helpers/dns/dns.py +1 -2
  14. bbot/core/helpers/dns/engine.py +4 -6
  15. bbot/core/helpers/dns/helpers.py +2 -2
  16. bbot/core/helpers/dns/mock.py +0 -1
  17. bbot/core/helpers/files.py +1 -1
  18. bbot/core/helpers/helper.py +7 -4
  19. bbot/core/helpers/interactsh.py +3 -3
  20. bbot/core/helpers/libmagic.py +65 -0
  21. bbot/core/helpers/misc.py +65 -22
  22. bbot/core/helpers/names_generator.py +17 -3
  23. bbot/core/helpers/process.py +0 -20
  24. bbot/core/helpers/regex.py +1 -1
  25. bbot/core/helpers/regexes.py +12 -6
  26. bbot/core/helpers/validators.py +1 -2
  27. bbot/core/helpers/web/client.py +1 -1
  28. bbot/core/helpers/web/engine.py +1 -2
  29. bbot/core/helpers/web/web.py +4 -114
  30. bbot/core/helpers/wordcloud.py +5 -5
  31. bbot/core/modules.py +36 -27
  32. bbot/core/multiprocess.py +58 -0
  33. bbot/core/shared_deps.py +46 -3
  34. bbot/db/sql/models.py +147 -0
  35. bbot/defaults.yml +12 -10
  36. bbot/modules/anubisdb.py +2 -2
  37. bbot/modules/apkpure.py +63 -0
  38. bbot/modules/azure_tenant.py +2 -2
  39. bbot/modules/baddns.py +35 -19
  40. bbot/modules/baddns_direct.py +92 -0
  41. bbot/modules/baddns_zone.py +3 -8
  42. bbot/modules/badsecrets.py +4 -3
  43. bbot/modules/base.py +195 -51
  44. bbot/modules/bevigil.py +7 -7
  45. bbot/modules/binaryedge.py +7 -4
  46. bbot/modules/bufferoverrun.py +47 -0
  47. bbot/modules/builtwith.py +6 -10
  48. bbot/modules/bypass403.py +5 -5
  49. bbot/modules/c99.py +10 -7
  50. bbot/modules/censys.py +9 -13
  51. bbot/modules/certspotter.py +5 -3
  52. bbot/modules/chaos.py +9 -7
  53. bbot/modules/code_repository.py +1 -0
  54. bbot/modules/columbus.py +3 -3
  55. bbot/modules/crt.py +5 -3
  56. bbot/modules/deadly/dastardly.py +1 -1
  57. bbot/modules/deadly/ffuf.py +9 -9
  58. bbot/modules/deadly/nuclei.py +3 -3
  59. bbot/modules/deadly/vhost.py +4 -3
  60. bbot/modules/dehashed.py +1 -1
  61. bbot/modules/digitorus.py +1 -1
  62. bbot/modules/dnsbimi.py +145 -0
  63. bbot/modules/dnscaa.py +3 -3
  64. bbot/modules/dnsdumpster.py +4 -4
  65. bbot/modules/dnstlsrpt.py +144 -0
  66. bbot/modules/docker_pull.py +7 -5
  67. bbot/modules/dockerhub.py +2 -2
  68. bbot/modules/dotnetnuke.py +20 -21
  69. bbot/modules/emailformat.py +1 -1
  70. bbot/modules/extractous.py +122 -0
  71. bbot/modules/filedownload.py +9 -7
  72. bbot/modules/fullhunt.py +7 -4
  73. bbot/modules/generic_ssrf.py +5 -5
  74. bbot/modules/github_codesearch.py +3 -2
  75. bbot/modules/github_org.py +4 -4
  76. bbot/modules/github_workflows.py +4 -4
  77. bbot/modules/gitlab.py +2 -5
  78. bbot/modules/google_playstore.py +93 -0
  79. bbot/modules/gowitness.py +48 -50
  80. bbot/modules/hackertarget.py +5 -3
  81. bbot/modules/host_header.py +5 -5
  82. bbot/modules/httpx.py +1 -4
  83. bbot/modules/hunterio.py +3 -9
  84. bbot/modules/iis_shortnames.py +19 -30
  85. bbot/modules/internal/cloudcheck.py +29 -12
  86. bbot/modules/internal/dnsresolve.py +22 -22
  87. bbot/modules/internal/excavate.py +97 -59
  88. bbot/modules/internal/speculate.py +41 -32
  89. bbot/modules/internetdb.py +4 -2
  90. bbot/modules/ip2location.py +3 -5
  91. bbot/modules/ipneighbor.py +1 -1
  92. bbot/modules/ipstack.py +3 -8
  93. bbot/modules/jadx.py +87 -0
  94. bbot/modules/leakix.py +11 -10
  95. bbot/modules/myssl.py +2 -2
  96. bbot/modules/newsletters.py +2 -2
  97. bbot/modules/otx.py +5 -3
  98. bbot/modules/output/asset_inventory.py +7 -7
  99. bbot/modules/output/base.py +1 -1
  100. bbot/modules/output/csv.py +1 -1
  101. bbot/modules/output/http.py +20 -14
  102. bbot/modules/output/mysql.py +51 -0
  103. bbot/modules/output/neo4j.py +7 -2
  104. bbot/modules/output/postgres.py +49 -0
  105. bbot/modules/output/slack.py +0 -1
  106. bbot/modules/output/sqlite.py +29 -0
  107. bbot/modules/output/stdout.py +2 -2
  108. bbot/modules/output/teams.py +107 -6
  109. bbot/modules/paramminer_headers.py +8 -11
  110. bbot/modules/passivetotal.py +13 -13
  111. bbot/modules/portscan.py +32 -6
  112. bbot/modules/postman.py +50 -126
  113. bbot/modules/postman_download.py +220 -0
  114. bbot/modules/rapiddns.py +3 -8
  115. bbot/modules/report/asn.py +18 -11
  116. bbot/modules/robots.py +3 -3
  117. bbot/modules/securitytrails.py +7 -10
  118. bbot/modules/securitytxt.py +1 -1
  119. bbot/modules/shodan_dns.py +7 -9
  120. bbot/modules/sitedossier.py +1 -1
  121. bbot/modules/skymem.py +2 -2
  122. bbot/modules/social.py +2 -1
  123. bbot/modules/subdomaincenter.py +1 -1
  124. bbot/modules/subdomainradar.py +160 -0
  125. bbot/modules/telerik.py +8 -8
  126. bbot/modules/templates/bucket.py +1 -1
  127. bbot/modules/templates/github.py +22 -14
  128. bbot/modules/templates/postman.py +21 -0
  129. bbot/modules/templates/shodan.py +14 -13
  130. bbot/modules/templates/sql.py +95 -0
  131. bbot/modules/templates/subdomain_enum.py +51 -16
  132. bbot/modules/templates/webhook.py +2 -4
  133. bbot/modules/trickest.py +8 -37
  134. bbot/modules/trufflehog.py +10 -12
  135. bbot/modules/url_manipulation.py +3 -3
  136. bbot/modules/urlscan.py +1 -1
  137. bbot/modules/viewdns.py +1 -1
  138. bbot/modules/virustotal.py +8 -30
  139. bbot/modules/wafw00f.py +1 -1
  140. bbot/modules/wayback.py +1 -1
  141. bbot/modules/wpscan.py +17 -11
  142. bbot/modules/zoomeye.py +11 -6
  143. bbot/presets/baddns-thorough.yml +12 -0
  144. bbot/presets/fast.yml +16 -0
  145. bbot/presets/kitchen-sink.yml +1 -2
  146. bbot/presets/spider.yml +4 -0
  147. bbot/presets/subdomain-enum.yml +7 -7
  148. bbot/presets/web/dotnet-audit.yml +0 -1
  149. bbot/scanner/manager.py +5 -16
  150. bbot/scanner/preset/args.py +46 -26
  151. bbot/scanner/preset/environ.py +7 -2
  152. bbot/scanner/preset/path.py +7 -4
  153. bbot/scanner/preset/preset.py +36 -23
  154. bbot/scanner/scanner.py +172 -62
  155. bbot/scanner/target.py +236 -434
  156. bbot/scripts/docs.py +1 -1
  157. bbot/test/bbot_fixtures.py +13 -3
  158. bbot/test/conftest.py +132 -100
  159. bbot/test/fastapi_test.py +17 -0
  160. bbot/test/owasp_mastg.apk +0 -0
  161. bbot/test/run_tests.sh +4 -4
  162. bbot/test/test.conf +2 -0
  163. bbot/test/test_step_1/test__module__tests.py +0 -1
  164. bbot/test/test_step_1/test_bbot_fastapi.py +79 -0
  165. bbot/test/test_step_1/test_bloom_filter.py +2 -1
  166. bbot/test/test_step_1/test_cli.py +138 -64
  167. bbot/test/test_step_1/test_dns.py +61 -27
  168. bbot/test/test_step_1/test_engine.py +17 -19
  169. bbot/test/test_step_1/test_events.py +183 -30
  170. bbot/test/test_step_1/test_helpers.py +64 -29
  171. bbot/test/test_step_1/test_manager_deduplication.py +1 -1
  172. bbot/test/test_step_1/test_manager_scope_accuracy.py +333 -330
  173. bbot/test/test_step_1/test_modules_basic.py +68 -70
  174. bbot/test/test_step_1/test_presets.py +183 -100
  175. bbot/test/test_step_1/test_python_api.py +7 -2
  176. bbot/test/test_step_1/test_regexes.py +35 -5
  177. bbot/test/test_step_1/test_scan.py +39 -5
  178. bbot/test/test_step_1/test_scope.py +4 -3
  179. bbot/test/test_step_1/test_target.py +242 -145
  180. bbot/test/test_step_1/test_web.py +14 -10
  181. bbot/test/test_step_2/module_tests/base.py +15 -7
  182. bbot/test/test_step_2/module_tests/test_module_anubisdb.py +1 -1
  183. bbot/test/test_step_2/module_tests/test_module_apkpure.py +71 -0
  184. bbot/test/test_step_2/module_tests/test_module_asset_inventory.py +0 -1
  185. bbot/test/test_step_2/module_tests/test_module_azure_realm.py +1 -1
  186. bbot/test/test_step_2/module_tests/test_module_baddns.py +6 -6
  187. bbot/test/test_step_2/module_tests/test_module_baddns_direct.py +62 -0
  188. bbot/test/test_step_2/module_tests/test_module_bevigil.py +29 -2
  189. bbot/test/test_step_2/module_tests/test_module_binaryedge.py +4 -2
  190. bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py +2 -2
  191. bbot/test/test_step_2/module_tests/test_module_bucket_azure.py +1 -1
  192. bbot/test/test_step_2/module_tests/test_module_bufferoverrun.py +35 -0
  193. bbot/test/test_step_2/module_tests/test_module_builtwith.py +2 -2
  194. bbot/test/test_step_2/module_tests/test_module_bypass403.py +1 -1
  195. bbot/test/test_step_2/module_tests/test_module_c99.py +126 -0
  196. bbot/test/test_step_2/module_tests/test_module_censys.py +4 -1
  197. bbot/test/test_step_2/module_tests/test_module_cloudcheck.py +4 -0
  198. bbot/test/test_step_2/module_tests/test_module_code_repository.py +11 -1
  199. bbot/test/test_step_2/module_tests/test_module_columbus.py +1 -1
  200. bbot/test/test_step_2/module_tests/test_module_credshed.py +3 -3
  201. bbot/test/test_step_2/module_tests/test_module_dastardly.py +2 -1
  202. bbot/test/test_step_2/module_tests/test_module_dehashed.py +2 -2
  203. bbot/test/test_step_2/module_tests/test_module_digitorus.py +1 -1
  204. bbot/test/test_step_2/module_tests/test_module_discord.py +1 -1
  205. bbot/test/test_step_2/module_tests/test_module_dnsbimi.py +103 -0
  206. bbot/test/test_step_2/module_tests/test_module_dnsbrute.py +9 -10
  207. bbot/test/test_step_2/module_tests/test_module_dnsbrute_mutations.py +1 -2
  208. bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py +1 -2
  209. bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py +4 -4
  210. bbot/test/test_step_2/module_tests/test_module_dnstlsrpt.py +64 -0
  211. bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py +0 -8
  212. bbot/test/test_step_2/module_tests/test_module_excavate.py +28 -48
  213. bbot/test/test_step_2/module_tests/test_module_extractous.py +54 -0
  214. bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py +1 -1
  215. bbot/test/test_step_2/module_tests/test_module_filedownload.py +14 -14
  216. bbot/test/test_step_2/module_tests/test_module_git_clone.py +2 -2
  217. bbot/test/test_step_2/module_tests/test_module_github_org.py +19 -8
  218. bbot/test/test_step_2/module_tests/test_module_github_workflows.py +1 -1
  219. bbot/test/test_step_2/module_tests/test_module_gitlab.py +9 -4
  220. bbot/test/test_step_2/module_tests/test_module_google_playstore.py +83 -0
  221. bbot/test/test_step_2/module_tests/test_module_gowitness.py +4 -6
  222. bbot/test/test_step_2/module_tests/test_module_host_header.py +1 -1
  223. bbot/test/test_step_2/module_tests/test_module_http.py +4 -4
  224. bbot/test/test_step_2/module_tests/test_module_httpx.py +10 -8
  225. bbot/test/test_step_2/module_tests/test_module_hunterio.py +68 -4
  226. bbot/test/test_step_2/module_tests/test_module_jadx.py +55 -0
  227. bbot/test/test_step_2/module_tests/test_module_json.py +22 -9
  228. bbot/test/test_step_2/module_tests/test_module_leakix.py +7 -3
  229. bbot/test/test_step_2/module_tests/test_module_mysql.py +76 -0
  230. bbot/test/test_step_2/module_tests/test_module_myssl.py +1 -1
  231. bbot/test/test_step_2/module_tests/test_module_neo4j.py +1 -1
  232. bbot/test/test_step_2/module_tests/test_module_newsletters.py +16 -16
  233. bbot/test/test_step_2/module_tests/test_module_ntlm.py +8 -7
  234. bbot/test/test_step_2/module_tests/test_module_oauth.py +1 -1
  235. bbot/test/test_step_2/module_tests/test_module_otx.py +1 -1
  236. bbot/test/test_step_2/module_tests/test_module_paramminer_cookies.py +1 -2
  237. bbot/test/test_step_2/module_tests/test_module_paramminer_getparams.py +0 -6
  238. bbot/test/test_step_2/module_tests/test_module_paramminer_headers.py +2 -9
  239. bbot/test/test_step_2/module_tests/test_module_passivetotal.py +3 -1
  240. bbot/test/test_step_2/module_tests/test_module_pgp.py +2 -2
  241. bbot/test/test_step_2/module_tests/test_module_portscan.py +9 -8
  242. bbot/test/test_step_2/module_tests/test_module_postgres.py +74 -0
  243. bbot/test/test_step_2/module_tests/test_module_postman.py +84 -253
  244. bbot/test/test_step_2/module_tests/test_module_postman_download.py +439 -0
  245. bbot/test/test_step_2/module_tests/test_module_rapiddns.py +93 -1
  246. bbot/test/test_step_2/module_tests/test_module_shodan_dns.py +20 -1
  247. bbot/test/test_step_2/module_tests/test_module_sitedossier.py +2 -2
  248. bbot/test/test_step_2/module_tests/test_module_smuggler.py +14 -14
  249. bbot/test/test_step_2/module_tests/test_module_social.py +11 -1
  250. bbot/test/test_step_2/module_tests/test_module_speculate.py +4 -8
  251. bbot/test/test_step_2/module_tests/test_module_splunk.py +4 -4
  252. bbot/test/test_step_2/module_tests/test_module_sqlite.py +18 -0
  253. bbot/test/test_step_2/module_tests/test_module_sslcert.py +1 -1
  254. bbot/test/test_step_2/module_tests/test_module_stdout.py +5 -3
  255. bbot/test/test_step_2/module_tests/test_module_subdomaincenter.py +1 -1
  256. bbot/test/test_step_2/module_tests/test_module_subdomainradar.py +208 -0
  257. bbot/test/test_step_2/module_tests/test_module_subdomains.py +1 -1
  258. bbot/test/test_step_2/module_tests/test_module_teams.py +8 -6
  259. bbot/test/test_step_2/module_tests/test_module_telerik.py +1 -1
  260. bbot/test/test_step_2/module_tests/test_module_trufflehog.py +317 -14
  261. bbot/test/test_step_2/module_tests/test_module_viewdns.py +1 -1
  262. bbot/test/test_step_2/module_tests/test_module_wayback.py +1 -1
  263. bbot/test/test_step_2/template_tests/test_template_subdomain_enum.py +2 -2
  264. bbot/wordlists/devops_mutations.txt +1 -1
  265. bbot/wordlists/ffuf_shortname_candidates.txt +1 -1
  266. bbot/wordlists/nameservers.txt +1 -1
  267. bbot/wordlists/paramminer_headers.txt +1 -1
  268. bbot/wordlists/paramminer_parameters.txt +1 -1
  269. bbot/wordlists/raft-small-extensions-lowercase_CLEANED.txt +1 -1
  270. bbot/wordlists/valid_url_schemes.txt +1 -1
  271. {bbot-2.0.1.4720rc0.dist-info → bbot-2.3.0.5401rc0.dist-info}/METADATA +48 -18
  272. bbot-2.3.0.5401rc0.dist-info/RECORD +421 -0
  273. {bbot-2.0.1.4720rc0.dist-info → bbot-2.3.0.5401rc0.dist-info}/WHEEL +1 -1
  274. bbot/modules/unstructured.py +0 -163
  275. bbot/test/test_step_2/module_tests/test_module_unstructured.py +0 -102
  276. bbot-2.0.1.4720rc0.dist-info/RECORD +0 -387
  277. {bbot-2.0.1.4720rc0.dist-info → bbot-2.3.0.5401rc0.dist-info}/LICENSE +0 -0
  278. {bbot-2.0.1.4720rc0.dist-info → bbot-2.3.0.5401rc0.dist-info}/entry_points.txt +0 -0
@@ -3,11 +3,11 @@ from contextlib import suppress
3
3
 
4
4
  from bbot.errors import ValidationError
5
5
  from bbot.core.helpers.dns.engine import all_rdtypes
6
- from bbot.modules.base import InterceptModule, BaseModule
7
6
  from bbot.core.helpers.dns.helpers import extract_targets
7
+ from bbot.modules.base import BaseInterceptModule, BaseModule
8
8
 
9
9
 
10
- class DNSResolve(InterceptModule):
10
+ class DNSResolve(BaseInterceptModule):
11
11
  watched_events = ["*"]
12
12
  _priority = 1
13
13
  scope_distance_modifier = None
@@ -16,12 +16,6 @@ class DNSResolve(InterceptModule):
16
16
  _name = "host"
17
17
  _type = "internal"
18
18
 
19
- def _outgoing_dedup_hash(self, event):
20
- # this exists to ensure a second, more interesting host isn't passed up
21
- # because its ugly cousin spent its one dedup token before it arrived
22
- # by removing those race conditions, this makes for more consistent results
23
- return hash((event, self.name, event.always_emit))
24
-
25
19
  @property
26
20
  def module_threads(self):
27
21
  return self.dns_config.get("threads", 25)
@@ -85,11 +79,21 @@ class DNSResolve(InterceptModule):
85
79
  await self.resolve_event(main_host_event, types=non_minimal_rdtypes)
86
80
  # check for wildcards if the event is within the scan's search distance
87
81
  if new_event and main_host_event.scope_distance <= self.scan.scope_search_distance:
88
- await self.handle_wildcard_event(main_host_event)
82
+ event_data_changed = await self.handle_wildcard_event(main_host_event)
83
+ if event_data_changed:
84
+ # since data has changed, we check again whether it's a duplicate
85
+ if self.scan.ingress_module.is_incoming_duplicate(event, add=True):
86
+ if not event._graph_important:
87
+ return False, "event was already emitted by its module"
88
+ else:
89
+ self.debug(
90
+ f"Event {event} was already emitted by its module, but it's graph-important so it gets a pass"
91
+ )
89
92
 
90
93
  # if there weren't any DNS children and it's not an IP address, tag as unresolved
91
94
  if not main_host_event.raw_dns_records and not event_is_ip:
92
95
  main_host_event.add_tag("unresolved")
96
+ main_host_event.type = "DNS_NAME_UNRESOLVED"
93
97
 
94
98
  # main_host_event.add_tag(f"resolve-distance-{main_host_event.dns_resolve_distance}")
95
99
 
@@ -109,10 +113,6 @@ class DNSResolve(InterceptModule):
109
113
  if not self.minimal:
110
114
  await self.emit_dns_children(main_host_event)
111
115
 
112
- # If the event is unresolved, change its type to DNS_NAME_UNRESOLVED
113
- if main_host_event.type == "DNS_NAME" and "unresolved" in main_host_event.tags:
114
- main_host_event.type = "DNS_NAME_UNRESOLVED"
115
-
116
116
  # emit the main DNS_NAME or IP_ADDRESS
117
117
  if (
118
118
  new_event
@@ -131,9 +131,9 @@ class DNSResolve(InterceptModule):
131
131
  event.host, rdtypes=rdtypes, raw_dns_records=event.raw_dns_records
132
132
  )
133
133
  for rdtype, (is_wildcard, wildcard_host) in wildcard_rdtypes.items():
134
- if is_wildcard == False:
134
+ if is_wildcard is False:
135
135
  continue
136
- elif is_wildcard == True:
136
+ elif is_wildcard is True:
137
137
  event.add_tag("wildcard")
138
138
  wildcard_tag = "wildcard"
139
139
  else:
@@ -142,16 +142,16 @@ class DNSResolve(InterceptModule):
142
142
  event.add_tag(f"{rdtype}-{wildcard_tag}")
143
143
 
144
144
  # wildcard event modification (www.evilcorp.com --> _wildcard.evilcorp.com)
145
- if wildcard_rdtypes and not "target" in event.tags:
145
+ if wildcard_rdtypes and "target" not in event.tags:
146
146
  # these are the rdtypes that have wildcards
147
147
  wildcard_rdtypes_set = set(wildcard_rdtypes)
148
148
  # consider the event a full wildcard if all its records are wildcards
149
149
  event_is_wildcard = False
150
150
  if wildcard_rdtypes_set:
151
- event_is_wildcard = all(r[0] == True for r in wildcard_rdtypes.values())
151
+ event_is_wildcard = all(r[0] is True for r in wildcard_rdtypes.values())
152
152
 
153
153
  if event_is_wildcard:
154
- if event.type in ("DNS_NAME",) and not "_wildcard" in event.data.split("."):
154
+ if event.type in ("DNS_NAME",) and "_wildcard" not in event.data.split("."):
155
155
  wildcard_parent = self.helpers.parent_domain(event.host)
156
156
  for rdtype, (_is_wildcard, _parent_domain) in wildcard_rdtypes.items():
157
157
  if _is_wildcard:
@@ -161,6 +161,8 @@ class DNSResolve(InterceptModule):
161
161
  if wildcard_data != event.data:
162
162
  self.debug(f'Wildcard detected, changing event.data "{event.data}" --> "{wildcard_data}"')
163
163
  event.data = wildcard_data
164
+ return True
165
+ return False
164
166
 
165
167
  async def emit_dns_children(self, event):
166
168
  for rdtype, children in event.dns_children.items():
@@ -271,7 +273,7 @@ class DNSResolve(InterceptModule):
271
273
  # tag event with errors
272
274
  for rdtype, errors in dns_errors.items():
273
275
  # only consider it an error if there weren't any results for that rdtype
274
- if errors and not rdtype in event.dns_children:
276
+ if errors and rdtype not in event.dns_children:
275
277
  event.add_tag(f"{rdtype}-error")
276
278
 
277
279
  def get_dns_parent(self, event):
@@ -304,9 +306,7 @@ class DNSResolve(InterceptModule):
304
306
  @property
305
307
  def emit_raw_records(self):
306
308
  if self._emit_raw_records is None:
307
- watching_raw_records = any(
308
- ["RAW_DNS_RECORD" in m.get_watched_events() for m in self.scan.modules.values()]
309
- )
309
+ watching_raw_records = any("RAW_DNS_RECORD" in m.get_watched_events() for m in self.scan.modules.values())
310
310
  omitted_event_types = self.scan.config.get("omit_event_types", [])
311
311
  omit_raw_records = "RAW_DNS_RECORD" in omitted_event_types
312
312
  self._emit_raw_records = watching_raw_records or not omit_raw_records
@@ -6,6 +6,7 @@ import regex as re
6
6
  from pathlib import Path
7
7
  from bbot.errors import ExcavateError
8
8
  import bbot.core.helpers.regexes as bbot_regexes
9
+ from bbot.modules.base import BaseInterceptModule
9
10
  from bbot.modules.internal.base import BaseInternalModule
10
11
  from urllib.parse import urlparse, urljoin, parse_qs, urlunparse
11
12
 
@@ -61,7 +62,6 @@ def _exclude_key(original_dict, key_to_exclude):
61
62
 
62
63
 
63
64
  def extract_params_url(parsed_url):
64
-
65
65
  params = parse_qs(parsed_url.query)
66
66
  flat_params = {k: v[0] for k, v in params.items()}
67
67
 
@@ -93,7 +93,6 @@ def extract_params_location(location_header_value, original_parsed_url):
93
93
 
94
94
 
95
95
  class YaraRuleSettings:
96
-
97
96
  def __init__(self, description, tags, emit_match):
98
97
  self.description = description
99
98
  self.tags = tags
@@ -153,7 +152,9 @@ class ExcavateRule:
153
152
  yara_rule_settings = YaraRuleSettings(description, tags, emit_match)
154
153
  yara_results = {}
155
154
  for h in r.strings:
156
- yara_results[h.identifier.lstrip("$")] = sorted(set([i.matched_data.decode("utf-8") for i in h.instances]))
155
+ yara_results[h.identifier.lstrip("$")] = sorted(
156
+ {i.matched_data.decode("utf-8", errors="ignore") for i in h.instances}
157
+ )
157
158
  await self.process(yara_results, event, yara_rule_settings, discovery_context)
158
159
 
159
160
  async def process(self, yara_results, event, yara_rule_settings, discovery_context):
@@ -179,7 +180,7 @@ class ExcavateRule:
179
180
  Returns:
180
181
  None
181
182
  """
182
- for identifier, results in yara_results.items():
183
+ for results in yara_results.values():
183
184
  for result in results:
184
185
  event_data = {"description": f"{discovery_context} {yara_rule_settings.description}"}
185
186
  if yara_rule_settings.emit_match:
@@ -260,7 +261,6 @@ class ExcavateRule:
260
261
 
261
262
 
262
263
  class CustomExtractor(ExcavateRule):
263
-
264
264
  def __init__(self, excavate):
265
265
  super().__init__(excavate)
266
266
 
@@ -279,7 +279,7 @@ class CustomExtractor(ExcavateRule):
279
279
  await self.report(event_data, event, yara_rule_settings, discovery_context)
280
280
 
281
281
 
282
- class excavate(BaseInternalModule):
282
+ class excavate(BaseInternalModule, BaseInterceptModule):
283
283
  """
284
284
  Example (simple) Excavate Rules:
285
285
 
@@ -310,10 +310,11 @@ class excavate(BaseInternalModule):
310
310
  "custom_yara_rules": "Include custom Yara rules",
311
311
  }
312
312
  scope_distance_modifier = None
313
+ accept_dupes = False
313
314
 
314
315
  _module_threads = 8
315
316
 
316
- parameter_blacklist = set(
317
+ parameter_blacklist = {
317
318
  p.lower()
318
319
  for p in [
319
320
  "__VIEWSTATE",
@@ -328,7 +329,7 @@ class excavate(BaseInternalModule):
328
329
  "JSESSIONID",
329
330
  "PHPSESSID",
330
331
  ]
331
- )
332
+ }
332
333
 
333
334
  yara_rule_name_regex = re.compile(r"rule\s(\w+)\s{")
334
335
  yara_rule_regex = re.compile(r"(?s)((?:rule\s+\w+\s*{[^{}]*(?:{[^{}]*}[^{}]*)*[^{}]*(?:/\S*?}[^/]*?/)*)*})")
@@ -354,7 +355,6 @@ class excavate(BaseInternalModule):
354
355
  )
355
356
 
356
357
  class ParameterExtractor(ExcavateRule):
357
-
358
358
  yara_rules = {}
359
359
 
360
360
  class ParameterExtractorRule:
@@ -368,7 +368,6 @@ class excavate(BaseInternalModule):
368
368
  self.result = result
369
369
 
370
370
  class GetJquery(ParameterExtractorRule):
371
-
372
371
  name = "GET jquery"
373
372
  discovery_regex = r"/\$.get\([^\)].+\)/ nocase"
374
373
  extraction_regex = re.compile(r"\$.get\([\'\"](.+)[\'\"].+(\{.+\})\)")
@@ -389,8 +388,12 @@ class excavate(BaseInternalModule):
389
388
  for action, extracted_parameters in extracted_results:
390
389
  extracted_parameters_dict = self.convert_to_dict(extracted_parameters)
391
390
  for parameter_name, original_value in extracted_parameters_dict.items():
392
- yield self.output_type, parameter_name, original_value, action, _exclude_key(
393
- extracted_parameters_dict, parameter_name
391
+ yield (
392
+ self.output_type,
393
+ parameter_name,
394
+ original_value,
395
+ action,
396
+ _exclude_key(extracted_parameters_dict, parameter_name),
394
397
  )
395
398
 
396
399
  class PostJquery(GetJquery):
@@ -414,8 +417,12 @@ class excavate(BaseInternalModule):
414
417
  k: v[0] if isinstance(v, list) and len(v) == 1 else v for k, v in query_strings.items()
415
418
  }
416
419
  for parameter_name, original_value in query_strings_dict.items():
417
- yield self.output_type, parameter_name, original_value, url, _exclude_key(
418
- query_strings_dict, parameter_name
420
+ yield (
421
+ self.output_type,
422
+ parameter_name,
423
+ original_value,
424
+ url,
425
+ _exclude_key(query_strings_dict, parameter_name),
419
426
  )
420
427
 
421
428
  class GetForm(ParameterExtractorRule):
@@ -440,8 +447,12 @@ class excavate(BaseInternalModule):
440
447
  form_parameters[parameter_name] = original_value
441
448
 
442
449
  for parameter_name, original_value in form_parameters.items():
443
- yield self.output_type, parameter_name, original_value, form_action, _exclude_key(
444
- form_parameters, parameter_name
450
+ yield (
451
+ self.output_type,
452
+ parameter_name,
453
+ original_value,
454
+ form_action,
455
+ _exclude_key(form_parameters, parameter_name),
445
456
  )
446
457
 
447
458
  class PostForm(GetForm):
@@ -460,7 +471,7 @@ class excavate(BaseInternalModule):
460
471
  self.parameterExtractorCallbackDict[r.__name__] = r
461
472
  regexes_component_list.append(f"${r.__name__} = {r.discovery_regex}")
462
473
  regexes_component = " ".join(regexes_component_list)
463
- self.yara_rules[f"parameter_extraction"] = (
474
+ self.yara_rules["parameter_extraction"] = (
464
475
  rf'rule parameter_extraction {{meta: description = "contains POST form" strings: {regexes_component} condition: any of them}}'
465
476
  )
466
477
 
@@ -481,7 +492,6 @@ class excavate(BaseInternalModule):
481
492
  endpoint,
482
493
  additional_params,
483
494
  ) in extracted_params:
484
-
485
495
  self.excavate.debug(
486
496
  f"Found Parameter [{parameter_name}] in [{parameterExtractorSubModule.name}] ParameterExtractor Submodule"
487
497
  )
@@ -493,8 +503,7 @@ class excavate(BaseInternalModule):
493
503
  )
494
504
 
495
505
  if self.excavate.helpers.validate_parameter(parameter_name, parameter_type):
496
-
497
- if self.excavate.in_bl(parameter_name) == False:
506
+ if self.excavate.in_bl(parameter_name) is False:
498
507
  parsed_url = urlparse(url)
499
508
  description = f"HTTP Extracted Parameter [{parameter_name}] ({parameterExtractorSubModule.name} Submodule)"
500
509
  data = {
@@ -523,13 +532,11 @@ class excavate(BaseInternalModule):
523
532
  async def process(self, yara_results, event, yara_rule_settings, discovery_context):
524
533
  for identifier in yara_results.keys():
525
534
  for csp_str in yara_results[identifier]:
526
- domains = await self.helpers.re.findall(bbot_regexes.dns_name_regex, csp_str)
527
- unique_domains = set(domains)
528
- for domain in unique_domains:
535
+ domains = await self.excavate.scan.extract_in_scope_hostnames(csp_str)
536
+ for domain in domains:
529
537
  await self.report(domain, event, yara_rule_settings, discovery_context, event_type="DNS_NAME")
530
538
 
531
539
  class EmailExtractor(ExcavateRule):
532
-
533
540
  yara_rules = {
534
541
  "email": 'rule email { meta: description = "contains email address" strings: $email = /[^\\W_][\\w\\-\\.\\+\']{0,100}@[a-zA-Z0-9\\-]{1,100}(\\.[a-zA-Z0-9\\-]{1,100})*\\.[a-zA-Z]{2,63}/ nocase fullword condition: $email }',
535
542
  }
@@ -548,7 +555,6 @@ class excavate(BaseInternalModule):
548
555
  }
549
556
 
550
557
  class ErrorExtractor(ExcavateRule):
551
-
552
558
  signatures = {
553
559
  "PHP_1": r"/\.php on line [0-9]+/",
554
560
  "PHP_2": r"/\.php<\/b> on line <b>[0-9]+/",
@@ -573,7 +579,7 @@ class excavate(BaseInternalModule):
573
579
  for signature_name, signature in self.signatures.items():
574
580
  signature_component_list.append(rf"${signature_name} = {signature}")
575
581
  signature_component = " ".join(signature_component_list)
576
- self.yara_rules[f"error_detection"] = (
582
+ self.yara_rules["error_detection"] = (
577
583
  f'rule error_detection {{meta: description = "contains a verbose error message" strings: {signature_component} condition: any of them}}'
578
584
  )
579
585
 
@@ -586,7 +592,6 @@ class excavate(BaseInternalModule):
586
592
  await self.report(event_data, event, yara_rule_settings, discovery_context, event_type="FINDING")
587
593
 
588
594
  class SerializationExtractor(ExcavateRule):
589
-
590
595
  regexes = {
591
596
  "Java": re.compile(r"[^a-zA-Z0-9\/+]rO0[a-zA-Z0-9+\/]+={0,2}"),
592
597
  "DOTNET": re.compile(r"[^a-zA-Z0-9\/+]AAEAAAD\/\/[a-zA-Z0-9\/+]+={0,2}"),
@@ -603,7 +608,7 @@ class excavate(BaseInternalModule):
603
608
  for regex_name, regex in self.regexes.items():
604
609
  regexes_component_list.append(rf"${regex_name} = /\b{regex.pattern}/ nocase")
605
610
  regexes_component = " ".join(regexes_component_list)
606
- self.yara_rules[f"serialization_detection"] = (
611
+ self.yara_rules["serialization_detection"] = (
607
612
  f'rule serialization_detection {{meta: description = "contains a possible serialized object" strings: {regexes_component} condition: any of them}}'
608
613
  )
609
614
 
@@ -616,7 +621,6 @@ class excavate(BaseInternalModule):
616
621
  await self.report(event_data, event, yara_rule_settings, discovery_context, event_type="FINDING")
617
622
 
618
623
  class FunctionalityExtractor(ExcavateRule):
619
-
620
624
  yara_rules = {
621
625
  "File_Upload_Functionality": r'rule File_Upload_Functionality { meta: description = "contains file upload functionality" strings: $fileuploadfunc = /<input[^>]+type=["\']?file["\']?[^>]+>/ nocase condition: $fileuploadfunc }',
622
626
  "Web_Service_WSDL": r'rule Web_Service_WSDL { meta: emit_match = "True" description = "contains a web service WSDL URL" strings: $wsdl = /https?:\/\/[^\s]*\.(wsdl)/ nocase condition: $wsdl }',
@@ -630,7 +634,7 @@ class excavate(BaseInternalModule):
630
634
  scheme_blacklist = ["javascript", "mailto", "tel", "data", "vbscript", "about", "file"]
631
635
 
632
636
  async def process(self, yara_results, event, yara_rule_settings, discovery_context):
633
- for identifier, results in yara_results.items():
637
+ for results in yara_results.values():
634
638
  for url_str in results:
635
639
  scheme = url_str.split("://")[0]
636
640
  if scheme in self.scheme_blacklist:
@@ -652,7 +656,8 @@ class excavate(BaseInternalModule):
652
656
  continue
653
657
  if parsed_url.scheme in ["http", "https"]:
654
658
  continue
655
- abort_if = lambda e: e.scope_distance > 0
659
+ def abort_if(e):
660
+ return e.scope_distance > 0
656
661
  finding_data = {"host": str(host), "description": f"Non-HTTP URI: {parsed_url.geturl()}"}
657
662
  await self.report(finding_data, event, yara_rule_settings, discovery_context, abort_if=abort_if)
658
663
  protocol_data = {"protocol": parsed_url.scheme, "host": str(host)}
@@ -669,15 +674,38 @@ class excavate(BaseInternalModule):
669
674
 
670
675
  class URLExtractor(ExcavateRule):
671
676
  yara_rules = {
672
- "url_full": r'rule url_full { meta: tags = "spider-danger" description = "contains full URL" strings: $url_full = /https?:\/\/([\w\.-]+)([:\/\w\.-]*)/ condition: $url_full }',
673
- "url_attr": r'rule url_attr { meta: tags = "spider-danger" description = "contains tag with src or href attribute" strings: $url_attr = /<[^>]+(href|src)=["\'][^"\']*["\'][^>]*>/ condition: $url_attr }',
677
+ "url_full": (
678
+ r"""
679
+ rule url_full {
680
+ meta:
681
+ tags = "spider-danger"
682
+ description = "contains full URL"
683
+ strings:
684
+ $url_full = /https?:\/\/([\w\.-]+)(:\d{1,5})?([\/\w\.-]*)/
685
+ condition:
686
+ $url_full
687
+ }
688
+ """
689
+ ),
690
+ "url_attr": (
691
+ r"""
692
+ rule url_attr {
693
+ meta:
694
+ tags = "spider-danger"
695
+ description = "contains tag with src or href attribute"
696
+ strings:
697
+ $url_attr = /<[^>]+(href|src)=["\'][^"\']*["\'][^>]*>/
698
+ condition:
699
+ $url_attr
700
+ }
701
+ """
702
+ ),
674
703
  }
675
704
  full_url_regex = re.compile(r"(https?)://((?:\w|\d)(?:[\d\w-]+\.?)+(?::\d{1,5})?(?:/[-\w\.\(\)]*[-\w\.]+)*/?)")
676
705
  full_url_regex_strict = re.compile(r"^(https?):\/\/([\w.-]+)(?::\d{1,5})?(\/[\w\/\.-]*)?(\?[^\s]+)?$")
677
706
  tag_attribute_regex = bbot_regexes.tag_attribute_regex
678
707
 
679
708
  async def process(self, yara_results, event, yara_rule_settings, discovery_context):
680
-
681
709
  for identifier, results in yara_results.items():
682
710
  urls_found = 0
683
711
  final_url = ""
@@ -741,20 +769,33 @@ class excavate(BaseInternalModule):
741
769
 
742
770
  def __init__(self, excavate):
743
771
  super().__init__(excavate)
744
- regexes_component_list = []
745
- if excavate.scan.dns_regexes_yara:
746
- for i, r in enumerate(excavate.scan.dns_regexes_yara):
747
- regexes_component_list.append(rf"$dns_name_{i} = /\b{r.pattern}/ nocase")
748
- regexes_component = " ".join(regexes_component_list)
749
- self.yara_rules[f"hostname_extraction"] = (
750
- f'rule hostname_extraction {{meta: description = "matches DNS hostname pattern derived from target(s)" strings: {regexes_component} condition: any of them}}'
751
- )
772
+ if excavate.scan.dns_yara_rules_uncompiled:
773
+ self.yara_rules["hostname_extraction"] = excavate.scan.dns_yara_rules_uncompiled
752
774
 
753
775
  async def process(self, yara_results, event, yara_rule_settings, discovery_context):
754
776
  for identifier in yara_results.keys():
755
777
  for domain_str in yara_results[identifier]:
756
778
  await self.report(domain_str, event, yara_rule_settings, discovery_context, event_type="DNS_NAME")
757
779
 
780
+ class LoginPageExtractor(ExcavateRule):
781
+ yara_rules = {
782
+ "login_page": r"""
783
+ rule login_page {
784
+ meta:
785
+ description = "Detects login pages with username and password fields"
786
+ strings:
787
+ $username_field = /<input[^>]+name=["']?(user|login|email)/ nocase
788
+ $password_field = /<input[^>]+name=["']?passw?/ nocase
789
+ condition:
790
+ $username_field and $password_field
791
+ }
792
+ """
793
+ }
794
+
795
+ async def process(self, yara_results, event, yara_rule_settings, discovery_context):
796
+ if yara_results:
797
+ event.add_tag("login-page")
798
+
758
799
  def add_yara_rule(self, rule_name, rule_content, rule_instance):
759
800
  rule_instance.name = rule_name
760
801
  self.yara_rules_dict[rule_name] = rule_content
@@ -777,7 +818,7 @@ class excavate(BaseInternalModule):
777
818
  self.parameter_extraction = bool(modules_WEB_PARAMETER)
778
819
 
779
820
  self.retain_querystring = False
780
- if self.config.get("retain_querystring", False) == True:
821
+ if self.config.get("retain_querystring", False) is True:
781
822
  self.retain_querystring = True
782
823
 
783
824
  for module in self.scan.modules.values():
@@ -805,9 +846,9 @@ class excavate(BaseInternalModule):
805
846
  if Path(self.custom_yara_rules).is_file():
806
847
  with open(self.custom_yara_rules) as f:
807
848
  rules_content = f.read()
808
- self.debug(f"Successfully loaded secrets file [{self.custom_yara_rules}]")
849
+ self.debug(f"Successfully loaded custom yara rules file [{self.custom_yara_rules}]")
809
850
  else:
810
- self.debug(f"Custom secrets is NOT a file. Will attempt to treat it as rule content")
851
+ self.debug("Custom yara rules file is NOT a file. Will attempt to treat it as rule content")
811
852
  rules_content = self.custom_yara_rules
812
853
 
813
854
  self.debug(f"Final combined yara rule contents: {rules_content}")
@@ -816,13 +857,11 @@ class excavate(BaseInternalModule):
816
857
  try:
817
858
  yara.compile(source=rule_content)
818
859
  except yara.SyntaxError as e:
819
- self.hugewarning(f"Custom Yara rule failed to compile: {e}")
820
- return False
860
+ return False, f"Custom Yara rule failed to compile: {e}"
821
861
 
822
862
  rule_match = await self.helpers.re.search(self.yara_rule_name_regex, rule_content)
823
863
  if not rule_match:
824
- self.hugewarning(f"Custom Yara formatted incorrectly: could not find rule name")
825
- return False
864
+ return False, "Custom Yara formatted incorrectly: could not find rule name"
826
865
 
827
866
  rule_name = rule_match.groups(1)[0]
828
867
  c = CustomExtractor(self)
@@ -836,11 +875,13 @@ class excavate(BaseInternalModule):
836
875
  yara.set_config(max_match_data=yara_max_match_data)
837
876
  yara_rules_combined = "\n".join(self.yara_rules_dict.values())
838
877
  try:
878
+ self.info(f"Compiling {len(self.yara_rules_dict):,} YARA rules")
879
+ for rule_name, rule_content in self.yara_rules_dict.items():
880
+ self.debug(f" - {rule_name}")
839
881
  self.yara_rules = yara.compile(source=yara_rules_combined)
840
882
  except yara.SyntaxError as e:
841
- self.hugewarning(f"Yara Rules failed to compile with error: [{e}]")
842
883
  self.debug(yara_rules_combined)
843
- return False
884
+ return False, f"Yara Rules failed to compile with error: [{e}]"
844
885
 
845
886
  # pre-load valid URL schemes
846
887
  valid_schemes_filename = self.helpers.wordlist_dir / "valid_url_schemes.txt"
@@ -857,7 +898,6 @@ class excavate(BaseInternalModule):
857
898
  decoded_data = await self.helpers.re.recursive_decode(data)
858
899
 
859
900
  if self.parameter_extraction:
860
-
861
901
  content_type_lower = content_type.lower() if content_type else ""
862
902
  extraction_map = {
863
903
  "json": self.helpers.extract_params_json,
@@ -894,12 +934,11 @@ class excavate(BaseInternalModule):
894
934
  self.hugewarning(f"YARA Rule {rule_name} not found in pre-compiled rules")
895
935
 
896
936
  async def handle_event(self, event):
897
-
898
937
  if event.type == "HTTP_RESPONSE":
899
938
  # Harvest GET parameters from URL, if it came directly from the target, and parameter extraction is enabled
900
939
  if (
901
- self.parameter_extraction == True
902
- and self.url_querystring_remove == False
940
+ self.parameter_extraction is True
941
+ and self.url_querystring_remove is False
903
942
  and str(event.parent.parent.module) == "TARGET"
904
943
  ):
905
944
  self.debug(f"Processing target URL [{urlunparse(event.parsed_url)}] for GET parameters")
@@ -911,7 +950,7 @@ class excavate(BaseInternalModule):
911
950
  regex_name,
912
951
  additional_params,
913
952
  ) in extract_params_url(event.parsed_url):
914
- if self.in_bl(parameter_name) == False:
953
+ if self.in_bl(parameter_name) is False:
915
954
  description = f"HTTP Extracted Parameter [{parameter_name}] (Target URL)"
916
955
  data = {
917
956
  "host": parsed_url.hostname,
@@ -947,7 +986,7 @@ class excavate(BaseInternalModule):
947
986
  cookie_name = header_value.split("=")[0]
948
987
  cookie_value = header_value.split("=")[1].split(";")[0]
949
988
 
950
- if self.in_bl(cookie_value) == False:
989
+ if self.in_bl(cookie_value) is False:
951
990
  self.assigned_cookies[cookie_name] = cookie_value
952
991
  description = f"Set-Cookie Assigned Cookie [{cookie_name}]"
953
992
  data = {
@@ -983,7 +1022,6 @@ class excavate(BaseInternalModule):
983
1022
 
984
1023
  # Try to extract parameters from the redirect URL
985
1024
  if self.parameter_extraction:
986
-
987
1025
  for (
988
1026
  method,
989
1027
  parsed_url,
@@ -992,7 +1030,7 @@ class excavate(BaseInternalModule):
992
1030
  regex_name,
993
1031
  additional_params,
994
1032
  ) in extract_params_location(header_value, event.parsed_url):
995
- if self.in_bl(parameter_name) == False:
1033
+ if self.in_bl(parameter_name) is False:
996
1034
  description = f"HTTP Extracted Parameter [{parameter_name}] (Location Header)"
997
1035
  data = {
998
1036
  "host": parsed_url.hostname,