bbot 2.6.0.6879rc0__py3-none-any.whl → 2.7.2.7254rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbot might be problematic. Click here for more details.

Files changed (75) hide show
  1. bbot/__init__.py +1 -1
  2. bbot/core/engine.py +1 -1
  3. bbot/core/flags.py +1 -0
  4. bbot/core/helpers/bloom.py +6 -7
  5. bbot/core/helpers/dns/dns.py +0 -1
  6. bbot/core/helpers/dns/engine.py +0 -2
  7. bbot/core/helpers/files.py +2 -2
  8. bbot/core/helpers/git.py +17 -0
  9. bbot/core/helpers/misc.py +1 -0
  10. bbot/core/helpers/ntlm.py +0 -2
  11. bbot/core/helpers/regex.py +1 -1
  12. bbot/core/modules.py +0 -54
  13. bbot/defaults.yml +4 -2
  14. bbot/modules/apkpure.py +1 -1
  15. bbot/modules/base.py +11 -5
  16. bbot/modules/dnsbimi.py +1 -4
  17. bbot/modules/dnsdumpster.py +35 -52
  18. bbot/modules/dnstlsrpt.py +0 -6
  19. bbot/modules/docker_pull.py +1 -1
  20. bbot/modules/emailformat.py +17 -1
  21. bbot/modules/filedownload.py +1 -1
  22. bbot/modules/git_clone.py +47 -22
  23. bbot/modules/gitdumper.py +4 -14
  24. bbot/modules/github_workflows.py +1 -1
  25. bbot/modules/gitlab_com.py +31 -0
  26. bbot/modules/gitlab_onprem.py +84 -0
  27. bbot/modules/gowitness.py +0 -6
  28. bbot/modules/graphql_introspection.py +5 -2
  29. bbot/modules/httpx.py +2 -0
  30. bbot/modules/iis_shortnames.py +0 -7
  31. bbot/modules/internal/unarchive.py +9 -3
  32. bbot/modules/lightfuzz/lightfuzz.py +5 -1
  33. bbot/modules/nuclei.py +1 -1
  34. bbot/modules/output/base.py +0 -5
  35. bbot/modules/postman_download.py +1 -1
  36. bbot/modules/retirejs.py +232 -0
  37. bbot/modules/securitytxt.py +0 -3
  38. bbot/modules/subdomaincenter.py +1 -16
  39. bbot/modules/telerik.py +6 -1
  40. bbot/modules/templates/gitlab.py +98 -0
  41. bbot/modules/trufflehog.py +1 -1
  42. bbot/scanner/manager.py +7 -4
  43. bbot/scanner/scanner.py +1 -1
  44. bbot/scripts/benchmark_report.py +433 -0
  45. bbot/test/benchmarks/__init__.py +2 -0
  46. bbot/test/benchmarks/test_bloom_filter_benchmarks.py +105 -0
  47. bbot/test/benchmarks/test_closest_match_benchmarks.py +76 -0
  48. bbot/test/benchmarks/test_event_validation_benchmarks.py +438 -0
  49. bbot/test/benchmarks/test_excavate_benchmarks.py +291 -0
  50. bbot/test/benchmarks/test_ipaddress_benchmarks.py +143 -0
  51. bbot/test/benchmarks/test_weighted_shuffle_benchmarks.py +70 -0
  52. bbot/test/test_step_1/test_bbot_fastapi.py +2 -2
  53. bbot/test/test_step_1/test_events.py +0 -1
  54. bbot/test/test_step_1/test_scan.py +1 -8
  55. bbot/test/test_step_2/module_tests/base.py +6 -1
  56. bbot/test/test_step_2/module_tests/test_module_dnsbimi.py +2 -1
  57. bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py +3 -5
  58. bbot/test/test_step_2/module_tests/test_module_emailformat.py +1 -1
  59. bbot/test/test_step_2/module_tests/test_module_emails.py +2 -2
  60. bbot/test/test_step_2/module_tests/test_module_excavate.py +35 -6
  61. bbot/test/test_step_2/module_tests/test_module_gitlab_com.py +66 -0
  62. bbot/test/test_step_2/module_tests/{test_module_gitlab.py → test_module_gitlab_onprem.py} +4 -69
  63. bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +2 -2
  64. bbot/test/test_step_2/module_tests/test_module_retirejs.py +159 -0
  65. bbot/test/test_step_2/module_tests/test_module_telerik.py +1 -1
  66. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/METADATA +7 -4
  67. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/RECORD +70 -60
  68. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/WHEEL +1 -1
  69. bbot/modules/censys.py +0 -98
  70. bbot/modules/gitlab.py +0 -141
  71. bbot/modules/zoomeye.py +0 -77
  72. bbot/test/test_step_2/module_tests/test_module_censys.py +0 -83
  73. bbot/test/test_step_2/module_tests/test_module_zoomeye.py +0 -35
  74. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/entry_points.txt +0 -0
  75. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info/licenses}/LICENSE +0 -0
bbot/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  # version placeholder (replaced by poetry-dynamic-versioning)
2
- __version__ = "v2.6.0.6879rc"
2
+ __version__ = "v2.7.2.7254rc"
3
3
 
4
4
  from .scanner import Scanner, Preset
5
5
 
bbot/core/engine.py CHANGED
@@ -636,7 +636,7 @@ class EngineServer(EngineBase):
636
636
  """
637
637
  if tasks:
638
638
  try:
639
- done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED, timeout=timeout)
639
+ done, _ = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED, timeout=timeout)
640
640
  return done
641
641
  except BaseException as e:
642
642
  if isinstance(e, (TimeoutError, asyncio.exceptions.TimeoutError)):
bbot/core/flags.py CHANGED
@@ -6,6 +6,7 @@ flag_descriptions = {
6
6
  "cloud-enum": "Enumerates cloud resources",
7
7
  "code-enum": "Find public code repositories and search them for secrets etc.",
8
8
  "deadly": "Highly aggressive",
9
+ "download": "Modules that download files, apps, or repositories",
9
10
  "email-enum": "Enumerates email addresses",
10
11
  "iis-shortnames": "Scans for IIS Shortname vulnerability",
11
12
  "passive": "Never connects to target systems",
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  import mmh3
3
3
  import mmap
4
+ import xxhash
4
5
 
5
6
 
6
7
  class BloomFilter:
@@ -55,14 +56,12 @@ class BloomFilter:
55
56
  if not isinstance(item, str):
56
57
  item = str(item)
57
58
  item = item.encode("utf-8")
58
- return [abs(hash(item)) % self.size, abs(mmh3.hash(item)) % self.size, abs(self._fnv1a_hash(item)) % self.size]
59
59
 
60
- def _fnv1a_hash(self, data):
61
- hash = 0x811C9DC5 # 2166136261
62
- for byte in data:
63
- hash ^= byte
64
- hash = (hash * 0x01000193) % 2**32 # 16777619
65
- return hash
60
+ return [
61
+ abs(hash(item)) % self.size,
62
+ abs(mmh3.hash(item)) % self.size,
63
+ abs(xxhash.xxh32(item).intdigest()) % self.size,
64
+ ]
66
65
 
67
66
  def close(self):
68
67
  """Explicitly close the memory-mapped file."""
@@ -38,7 +38,6 @@ class DNSHelper(EngineClient):
38
38
  _wildcard_cache (dict): Cache for wildcard detection results.
39
39
  _dns_cache (LRUCache): Cache for DNS resolution results, limited in size.
40
40
  resolver_file (Path): File containing system's current resolver nameservers.
41
- filter_bad_ptrs (bool): Whether to filter out DNS names that appear to be auto-generated PTR records. Defaults to True.
42
41
 
43
42
  Args:
44
43
  parent_helper: The parent helper object with configuration details and utilities.
@@ -86,8 +86,6 @@ class DNSEngine(EngineServer):
86
86
  self._debug = self.dns_config.get("debug", False)
87
87
  self._dns_cache = LRUCache(maxsize=10000)
88
88
 
89
- self.filter_bad_ptrs = self.dns_config.get("filter_ptrs", True)
90
-
91
89
  async def resolve(self, query, **kwargs):
92
90
  """Resolve DNS names and IP addresses to their corresponding results.
93
91
 
@@ -9,7 +9,7 @@ from .misc import rm_at_exit
9
9
  log = logging.getLogger("bbot.core.helpers.files")
10
10
 
11
11
 
12
- def tempfile(self, content, pipe=True):
12
+ def tempfile(self, content, pipe=True, extension=None):
13
13
  """
14
14
  Creates a temporary file or named pipe and populates it with content.
15
15
 
@@ -29,7 +29,7 @@ def tempfile(self, content, pipe=True):
29
29
  >>> tempfile(["Another", "temp", "file"], pipe=False)
30
30
  '/home/user/.bbot/temp/someotherfile'
31
31
  """
32
- filename = self.temp_filename()
32
+ filename = self.temp_filename(extension)
33
33
  rm_at_exit(filename)
34
34
  try:
35
35
  if type(content) not in (set, list, tuple):
@@ -0,0 +1,17 @@
1
+ from pathlib import Path
2
+
3
+
4
+ def sanitize_git_repo(repo_folder: Path):
5
+ # sanitizing the git config is infeasible since there are too many different ways to do evil things
6
+ # instead, we move it out of .git and into the repo folder, so we don't miss any secrets etc. inside
7
+ config_file = repo_folder / ".git" / "config"
8
+ if config_file.exists():
9
+ config_file.rename(repo_folder / "git_config_original")
10
+ # move the index file
11
+ index_file = repo_folder / ".git" / "index"
12
+ if index_file.exists():
13
+ index_file.rename(repo_folder / "git_index_original")
14
+ # move the hooks folder
15
+ hooks_folder = repo_folder / ".git" / "hooks"
16
+ if hooks_folder.exists():
17
+ hooks_folder.rename(repo_folder / "git_hooks_original")
bbot/core/helpers/misc.py CHANGED
@@ -17,6 +17,7 @@ from unidecode import unidecode # noqa F401
17
17
  from asyncio import create_task, gather, sleep, wait_for # noqa
18
18
  from urllib.parse import urlparse, quote, unquote, urlunparse, urljoin # noqa F401
19
19
 
20
+ from .git import * # noqa F401
20
21
  from .url import * # noqa F401
21
22
  from ... import errors
22
23
  from . import regexes as bbot_regexes
bbot/core/helpers/ntlm.py CHANGED
@@ -17,11 +17,9 @@ class StrStruct(object):
17
17
  self.alloc = alloc
18
18
  self.offset = offset
19
19
  self.raw = raw[offset : offset + length]
20
- self.utf16 = False
21
20
 
22
21
  if len(self.raw) >= 2 and self.raw[1] == "\0":
23
22
  self.string = self.raw.decode("utf-16")
24
- self.utf16 = True
25
23
  else:
26
24
  self.string = self.raw
27
25
 
@@ -65,7 +65,7 @@ class RegexHelper:
65
65
 
66
66
  while tasks: # While there are tasks pending
67
67
  # Wait for the first task to complete
68
- done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
68
+ done, _ = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
69
69
 
70
70
  for task in done:
71
71
  result = task.result()
bbot/core/modules.py CHANGED
@@ -512,60 +512,6 @@ class ModuleLoader:
512
512
  # then we have a module
513
513
  return value
514
514
 
515
- def recommend_dependencies(self, modules):
516
- """
517
- Returns a dictionary containing missing dependencies and their suggested resolutions
518
-
519
- Needs work. For this we should probably be building a dependency graph
520
- """
521
- resolve_choices = {}
522
- # step 1: build a dictionary containing event types and their associated modules
523
- # {"IP_ADDRESS": set("masscan", "ipneighbor", ...)}
524
- watched = {}
525
- produced = {}
526
- for modname in modules:
527
- preloaded = self._preloaded.get(modname)
528
- if preloaded:
529
- for event_type in preloaded.get("watched_events", []):
530
- self.add_or_create(watched, event_type, modname)
531
- for event_type in preloaded.get("produced_events", []):
532
- self.add_or_create(produced, event_type, modname)
533
- watched_all = {}
534
- produced_all = {}
535
- for modname, preloaded in self.preloaded().items():
536
- if preloaded:
537
- for event_type in preloaded.get("watched_events", []):
538
- self.add_or_create(watched_all, event_type, modname)
539
- for event_type in preloaded.get("produced_events", []):
540
- self.add_or_create(produced_all, event_type, modname)
541
-
542
- # step 2: check to see if there are missing dependencies
543
- for modname in modules:
544
- preloaded = self._preloaded.get(modname)
545
- module_type = preloaded.get("type", "unknown")
546
- if module_type != "scan":
547
- continue
548
- watched_events = preloaded.get("watched_events", [])
549
- missing_deps = {e: not self.check_dependency(e, modname, produced) for e in watched_events}
550
- if all(missing_deps.values()):
551
- for event_type in watched_events:
552
- if event_type == "SCAN":
553
- continue
554
- choices = produced_all.get(event_type, [])
555
- choices = set(choices)
556
- with suppress(KeyError):
557
- choices.remove(modname)
558
- if event_type not in resolve_choices:
559
- resolve_choices[event_type] = {}
560
- deps = resolve_choices[event_type]
561
- self.add_or_create(deps, "required_by", modname)
562
- for c in choices:
563
- choice_type = self._preloaded.get(c, {}).get("type", "unknown")
564
- if choice_type == "scan":
565
- self.add_or_create(deps, "recommended", c)
566
-
567
- return resolve_choices
568
-
569
515
  def check_dependency(self, event_type, modname, produced):
570
516
  if event_type not in produced:
571
517
  return False
bbot/defaults.yml CHANGED
@@ -187,8 +187,10 @@ url_extension_blacklist:
187
187
  - mov
188
188
  - flv
189
189
  - webm
190
- # Distribute URLs with these extensions only to httpx (these are omitted from output)
191
- url_extension_httpx_only:
190
+
191
+ # URLs with these extensions are not distributed to modules unless the module opts in via `accept_url_special = True`
192
+ # They are also excluded from output. If you want to see them in output, remove them from this list.
193
+ url_extension_special:
192
194
  - js
193
195
 
194
196
  # These url extensions are almost always static, so we exclude them from modules that fuzz things
bbot/modules/apkpure.py CHANGED
@@ -6,7 +6,7 @@ from bbot.modules.base import BaseModule
6
6
  class apkpure(BaseModule):
7
7
  watched_events = ["MOBILE_APP"]
8
8
  produced_events = ["FILESYSTEM"]
9
- flags = ["passive", "safe", "code-enum"]
9
+ flags = ["passive", "safe", "code-enum", "download"]
10
10
  meta = {
11
11
  "description": "Download android applications from apkpure.com",
12
12
  "created_date": "2024-10-11",
bbot/modules/base.py CHANGED
@@ -53,6 +53,8 @@ class BaseModule:
53
53
 
54
54
  in_scope_only (bool): Accept only explicitly in-scope events, regardless of the scan's search distance. Default is False.
55
55
 
56
+ accept_url_special (bool): Accept "special" URLs not typically distributed to web modules, e.g. JS URLs. Default is False.
57
+
56
58
  options (Dict): Customizable options for the module, e.g., {"api_key": ""}. Empty dict by default.
57
59
 
58
60
  options_desc (Dict): Descriptions for options, e.g., {"api_key": "API Key"}. Empty dict by default.
@@ -97,7 +99,7 @@ class BaseModule:
97
99
  scope_distance_modifier = 0
98
100
  target_only = False
99
101
  in_scope_only = False
100
-
102
+ accept_url_special = False
101
103
  _module_threads = 1
102
104
  _batch_size = 1
103
105
 
@@ -785,10 +787,14 @@ class BaseModule:
785
787
  if "target" not in event.tags:
786
788
  return False, "it did not meet target_only filter criteria"
787
789
 
788
- # exclude certain URLs (e.g. javascript):
789
- # TODO: revisit this after httpx rework
790
- if event.type.startswith("URL") and self.name != "httpx" and "httpx-only" in event.tags:
791
- return False, "its extension was listed in url_extension_httpx_only"
790
+ # limit js URLs to modules that opt in to receive them
791
+ if (not self.accept_url_special) and event.type.startswith("URL"):
792
+ extension = getattr(event, "url_extension", "")
793
+ if extension in self.scan.url_extension_special:
794
+ return (
795
+ False,
796
+ f"it is a special URL (extension {extension}) but the module does not opt in to receive special URLs",
797
+ )
792
798
 
793
799
  return True, "precheck succeeded"
794
800
 
bbot/modules/dnsbimi.py CHANGED
@@ -39,7 +39,7 @@ import re
39
39
  # Handle "v=BIMI1; l=https://bimi.entrust.net/example.com/logo.svg;"
40
40
  # Handle "v=BIMI1;l=https://bimi.entrust.net/example.com/logo.svg;a=https://bimi.entrust.net/example.com/certchain.pem"
41
41
  # Handle "v=BIMI1; l=https://bimi.entrust.net/example.com/logo.svg;a=https://bimi.entrust.net/example.com/certchain.pem;"
42
- _bimi_regex = r"^v=(?P<v>BIMI1);* *(l=(?P<l>https*://[^;]*|)|);*( *a=((?P<a>https://[^;]*|)|);*)*$"
42
+ _bimi_regex = r"^v=(?P<v>BIMI1);\s?(?:l=(?P<l>https?://[^;\s]{1,255})?)?;?(?:\s?a=(?P<a>https://[^;\s]{1,255})?;?)?$"
43
43
  bimi_regex = re.compile(_bimi_regex, re.I)
44
44
 
45
45
 
@@ -140,6 +140,3 @@ class dnsbimi(BaseModule):
140
140
 
141
141
  async def handle_event(self, event):
142
142
  await self.inspectBIMI(event, event.host)
143
-
144
-
145
- # EOF
@@ -1,4 +1,4 @@
1
- import re
1
+ import json
2
2
 
3
3
  from bbot.modules.templates.subdomain_enum import subdomain_enum
4
4
 
@@ -15,78 +15,61 @@ class dnsdumpster(subdomain_enum):
15
15
 
16
16
  base_url = "https://dnsdumpster.com"
17
17
 
18
+ async def setup(self):
19
+ self.apikey_regex = self.helpers.re.compile(r'<form[^>]*data-form-id="mainform"[^>]*hx-headers=\'([^\']*)\'')
20
+ return True
21
+
18
22
  async def query(self, domain):
19
23
  ret = []
20
- # first, get the CSRF tokens
24
+ # first, get the JWT token from the main page
21
25
  res1 = await self.api_request(self.base_url)
22
26
  status_code = getattr(res1, "status_code", 0)
23
- if status_code in [429]:
24
- self.verbose(f'Too many requests "{status_code}"')
25
- return ret
26
- elif status_code not in [200]:
27
+ if status_code not in [200]:
27
28
  self.verbose(f'Bad response code "{status_code}" from DNSDumpster')
28
29
  return ret
29
- else:
30
- self.debug(f'Valid response code "{status_code}" from DNSDumpster')
31
-
32
- html = self.helpers.beautifulsoup(res1.content, "html.parser")
33
- if html is False:
34
- self.verbose("BeautifulSoup returned False")
35
- return ret
36
30
 
37
- csrftoken = None
38
- csrfmiddlewaretoken = None
31
+ # Extract JWT token from the form's hx-headers attribute using regex
32
+ jwt_token = None
39
33
  try:
40
- for cookie in res1.headers.get("set-cookie", "").split(";"):
41
- try:
42
- k, v = cookie.split("=", 1)
43
- except ValueError:
44
- self.verbose("Error retrieving cookie")
45
- return ret
46
- if k == "csrftoken":
47
- csrftoken = str(v)
48
- csrfmiddlewaretoken = html.find("input", {"name": "csrfmiddlewaretoken"}).attrs.get("value", None)
49
- except AttributeError:
50
- pass
34
+ # Look for the form with data-form-id="mainform" and extract hx-headers
35
+ form_match = await self.helpers.re.search(self.apikey_regex, res1.text)
36
+ if form_match:
37
+ headers_json = form_match.group(1)
38
+ headers_data = json.loads(headers_json)
39
+ jwt_token = headers_data.get("Authorization")
40
+ except (AttributeError, json.JSONDecodeError, KeyError):
41
+ self.log.warning("Error obtaining JWT token")
42
+ return ret
51
43
 
52
- # Abort if we didn't get the tokens
53
- if not csrftoken or not csrfmiddlewaretoken:
54
- self.verbose("Error obtaining CSRF tokens")
44
+ # Abort if we didn't get the JWT token
45
+ if not jwt_token:
46
+ self.verbose("Error obtaining JWT token")
55
47
  self.errorState = True
56
48
  return ret
57
49
  else:
58
- self.debug("Successfully obtained CSRF tokens")
50
+ self.debug("Successfully obtained JWT token")
59
51
 
60
52
  if self.scan.stopping:
61
- return
53
+ return ret
62
54
 
63
- # Otherwise, do the needful
64
- subdomains = set()
55
+ # Query the API with the JWT token
65
56
  res2 = await self.api_request(
66
- f"{self.base_url}/",
57
+ "https://api.dnsdumpster.com/htmld/",
67
58
  method="POST",
68
- cookies={"csrftoken": csrftoken},
69
- data={
70
- "csrfmiddlewaretoken": csrfmiddlewaretoken,
71
- "targetip": str(domain).lower(),
72
- "user": "free",
73
- },
59
+ data={"target": str(domain).lower()},
74
60
  headers={
75
- "origin": "https://dnsdumpster.com",
76
- "referer": "https://dnsdumpster.com/",
61
+ "Authorization": jwt_token,
62
+ "Content-Type": "application/x-www-form-urlencoded",
63
+ "Origin": "https://dnsdumpster.com",
64
+ "Referer": "https://dnsdumpster.com/",
65
+ "HX-Request": "true",
66
+ "HX-Target": "results",
67
+ "HX-Current-URL": "https://dnsdumpster.com/",
77
68
  },
78
69
  )
79
70
  status_code = getattr(res2, "status_code", 0)
80
71
  if status_code not in [200]:
81
- self.verbose(f'Bad response code "{status_code}" from DNSDumpster')
82
- return ret
83
- html = self.helpers.beautifulsoup(res2.content, "html.parser")
84
- if html is False:
85
- self.verbose("BeautifulSoup returned False")
72
+ self.verbose(f'Bad response code "{status_code}" from DNSDumpster API')
86
73
  return ret
87
- escaped_domain = re.escape(domain)
88
- match_pattern = re.compile(r"^[\w\.-]+\." + escaped_domain + r"$")
89
- for subdomain in html.findAll(text=match_pattern):
90
- subdomains.add(str(subdomain).strip().lower())
91
74
 
92
- return list(subdomains)
75
+ return await self.scan.extract_in_scope_hostnames(res2.text)
bbot/modules/dnstlsrpt.py CHANGED
@@ -44,20 +44,17 @@ class dnstlsrpt(BaseModule):
44
44
  "emit_emails": True,
45
45
  "emit_raw_dns_records": False,
46
46
  "emit_urls": True,
47
- "emit_vulnerabilities": True,
48
47
  }
49
48
  options_desc = {
50
49
  "emit_emails": "Emit EMAIL_ADDRESS events",
51
50
  "emit_raw_dns_records": "Emit RAW_DNS_RECORD events",
52
51
  "emit_urls": "Emit URL_UNVERIFIED events",
53
- "emit_vulnerabilities": "Emit VULNERABILITY events",
54
52
  }
55
53
 
56
54
  async def setup(self):
57
55
  self.emit_emails = self.config.get("emit_emails", True)
58
56
  self.emit_raw_dns_records = self.config.get("emit_raw_dns_records", False)
59
57
  self.emit_urls = self.config.get("emit_urls", True)
60
- self.emit_vulnerabilities = self.config.get("emit_vulnerabilities", True)
61
58
  return await super().setup()
62
59
 
63
60
  def _incoming_dedup_hash(self, event):
@@ -139,6 +136,3 @@ class dnstlsrpt(BaseModule):
139
136
  tags=tags.append(f"tlsrpt-record-{key}"),
140
137
  parent=event,
141
138
  )
142
-
143
-
144
- # EOF
@@ -8,7 +8,7 @@ from bbot.modules.base import BaseModule
8
8
  class docker_pull(BaseModule):
9
9
  watched_events = ["CODE_REPOSITORY"]
10
10
  produced_events = ["FILESYSTEM"]
11
- flags = ["passive", "safe", "slow", "code-enum"]
11
+ flags = ["passive", "safe", "slow", "code-enum", "download"]
12
12
  meta = {
13
13
  "description": "Download images from a docker repository",
14
14
  "created_date": "2024-03-24",
@@ -15,13 +15,29 @@ class emailformat(BaseModule):
15
15
 
16
16
  base_url = "https://www.email-format.com"
17
17
 
18
+ async def setup(self):
19
+ self.cfemail_regex = self.helpers.re.compile(r'data-cfemail="([0-9a-z]+)"')
20
+ return True
21
+
18
22
  async def handle_event(self, event):
19
23
  _, query = self.helpers.split_domain(event.data)
20
24
  url = f"{self.base_url}/d/{self.helpers.quote(query)}/"
21
25
  r = await self.api_request(url)
22
26
  if not r:
23
27
  return
24
- for email in await self.helpers.re.extract_emails(r.text):
28
+
29
+ encrypted_emails = await self.helpers.re.findall(self.cfemail_regex, r.text)
30
+
31
+ for enc in encrypted_emails:
32
+ enc_len = len(enc)
33
+
34
+ if enc_len < 2 or enc_len % 2 != 0:
35
+ continue
36
+
37
+ key = int(enc[:2], 16)
38
+
39
+ email = "".join([chr(int(enc[i : i + 2], 16) ^ key) for i in range(2, enc_len, 2)]).lower()
40
+
25
41
  if email.endswith(query):
26
42
  await self.emit_event(
27
43
  email,
@@ -14,7 +14,7 @@ class filedownload(BaseModule):
14
14
 
15
15
  watched_events = ["URL_UNVERIFIED", "HTTP_RESPONSE"]
16
16
  produced_events = ["FILESYSTEM"]
17
- flags = ["active", "safe", "web-basic"]
17
+ flags = ["active", "safe", "web-basic", "download"]
18
18
  meta = {
19
19
  "description": "Download common filetypes such as PDF, DOCX, PPTX, etc.",
20
20
  "created_date": "2023-10-11",
bbot/modules/git_clone.py CHANGED
@@ -6,7 +6,7 @@ from bbot.modules.templates.github import github
6
6
  class git_clone(github):
7
7
  watched_events = ["CODE_REPOSITORY"]
8
8
  produced_events = ["FILESYSTEM"]
9
- flags = ["passive", "safe", "slow", "code-enum"]
9
+ flags = ["passive", "safe", "slow", "code-enum", "download"]
10
10
  meta = {
11
11
  "description": "Clone code github repositories",
12
12
  "created_date": "2024-03-08",
@@ -24,44 +24,69 @@ class git_clone(github):
24
24
 
25
25
  async def setup(self):
26
26
  output_folder = self.config.get("output_folder")
27
- if output_folder:
28
- self.output_dir = Path(output_folder) / "git_repos"
29
- else:
30
- self.output_dir = self.scan.temp_dir / "git_repos"
27
+ self.output_dir = Path(output_folder) / "git_repos" if output_folder else self.scan.temp_dir / "git_repos"
31
28
  self.helpers.mkdir(self.output_dir)
32
29
  return await super().setup()
33
30
 
34
31
  async def filter_event(self, event):
35
- if event.type == "CODE_REPOSITORY":
36
- if "git" not in event.tags:
37
- return False, "event is not a git repository"
32
+ if event.type == "CODE_REPOSITORY" and "git" not in event.tags:
33
+ return False, "event is not a git repository"
38
34
  return True
39
35
 
40
36
  async def handle_event(self, event):
41
- repo_url = event.data.get("url")
42
- repo_path = await self.clone_git_repository(repo_url)
43
- if repo_path:
44
- self.verbose(f"Cloned {repo_url} to {repo_path}")
45
- codebase_event = self.make_event({"path": str(repo_path)}, "FILESYSTEM", tags=["git"], parent=event)
37
+ repository_url = event.data.get("url")
38
+ repository_path = await self.clone_git_repository(repository_url)
39
+ if repository_path:
40
+ self.verbose(f"Cloned {repository_url} to {repository_path}")
41
+ codebase_event = self.make_event({"path": str(repository_path)}, "FILESYSTEM", tags=["git"], parent=event)
46
42
  await self.emit_event(
47
43
  codebase_event,
48
- context=f"{{module}} downloaded git repo at {repo_url} to {{event.type}}: {repo_path}",
44
+ context=f"{{module}} cloned git repository at {repository_url} to {{event.type}}: {repository_path}",
49
45
  )
50
46
 
51
47
  async def clone_git_repository(self, repository_url):
52
48
  owner = repository_url.split("/")[-2]
53
49
  folder = self.output_dir / owner
54
50
  self.helpers.mkdir(folder)
55
- if self.api_key:
56
- url = repository_url.replace("https://github.com", f"https://user:{self.api_key}@github.com")
57
- else:
58
- url = repository_url
59
- command = ["git", "-C", folder, "clone", url]
51
+
52
+ command = ["git", "-C", folder, "clone", repository_url]
53
+ env = {"GIT_TERMINAL_PROMPT": "0"}
54
+
60
55
  try:
61
- output = await self.run_process(command, env={"GIT_TERMINAL_PROMPT": "0"}, check=True)
56
+ hostname = self.helpers.urlparse(repository_url).hostname
57
+ if hostname and self.api_key:
58
+ _, domain = self.helpers.split_domain(hostname)
59
+ # only use the api key if the domain is github.com
60
+ if domain == "github.com":
61
+ env["GIT_HELPER"] = (
62
+ f'!f() {{ case "$1" in get) '
63
+ f"echo username=x-access-token; "
64
+ f"echo password={self.api_key};; "
65
+ f'esac; }}; f "$@"'
66
+ )
67
+ command = (
68
+ command[:1]
69
+ + [
70
+ "-c",
71
+ "credential.helper=",
72
+ "-c",
73
+ "credential.useHttpPath=true",
74
+ "--config-env=credential.helper=GIT_HELPER",
75
+ ]
76
+ + command[1:]
77
+ )
78
+
79
+ output = await self.run_process(command, env=env, check=True)
62
80
  except CalledProcessError as e:
63
- self.debug(f"Error cloning {url}. STDERR: {repr(e.stderr)}")
81
+ self.debug(f"Error cloning {repository_url}. STDERR: {repr(e.stderr)}")
64
82
  return
65
83
 
66
84
  folder_name = output.stderr.split("Cloning into '")[1].split("'")[0]
67
- return folder / folder_name
85
+ repo_folder = folder / folder_name
86
+
87
+ # sanitize the repo
88
+ # this moves the git config, index file, and hooks folder out of the .git folder to prevent nasty things
89
+ # Note: the index file can be regenerated by running "git checkout HEAD -- ."
90
+ self.helpers.sanitize_git_repo(repo_folder)
91
+
92
+ return repo_folder
bbot/modules/gitdumper.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import asyncio
2
- import regex as re
3
2
  from pathlib import Path
4
3
  from subprocess import CalledProcessError
5
4
  from bbot.modules.base import BaseModule
@@ -8,7 +7,7 @@ from bbot.modules.base import BaseModule
8
7
  class gitdumper(BaseModule):
9
8
  watched_events = ["CODE_REPOSITORY"]
10
9
  produced_events = ["FILESYSTEM"]
11
- flags = ["passive", "safe", "slow", "code-enum"]
10
+ flags = ["passive", "safe", "slow", "code-enum", "download"]
12
11
  meta = {
13
12
  "description": "Download a leaked .git folder recursively or by fuzzing common names",
14
13
  "created_date": "2025-02-11",
@@ -35,7 +34,6 @@ class gitdumper(BaseModule):
35
34
  else:
36
35
  self.output_dir = self.scan.temp_dir / "git_repos"
37
36
  self.helpers.mkdir(self.output_dir)
38
- self.unsafe_regex = self.helpers.re.compile(r"^\s*fsmonitor|sshcommand|askpass|editor|pager", re.IGNORECASE)
39
37
  self.ref_regex = self.helpers.re.compile(r"ref: refs/heads/([a-zA-Z\d_-]+)")
40
38
  self.obj_regex = self.helpers.re.compile(r"[a-f0-9]{40}")
41
39
  self.pack_regex = self.helpers.re.compile(r"pack-([a-f0-9]{40})\.pack")
@@ -131,7 +129,6 @@ class gitdumper(BaseModule):
131
129
  else:
132
130
  result = await self.git_fuzz(repo_url, repo_folder)
133
131
  if result:
134
- await self.sanitize_config(repo_folder)
135
132
  await self.git_checkout(repo_folder)
136
133
  codebase_event = self.make_event({"path": str(repo_folder)}, "FILESYSTEM", tags=["git"], parent=event)
137
134
  await self.emit_event(
@@ -251,15 +248,6 @@ class gitdumper(BaseModule):
251
248
  self.debug(f"Unable to download git files to {folder}")
252
249
  return False
253
250
 
254
- async def sanitize_config(self, folder):
255
- config_file = folder / ".git/config"
256
- if config_file.exists():
257
- with config_file.open("r", encoding="utf-8", errors="ignore") as file:
258
- content = file.read()
259
- sanitized = await self.helpers.re.sub(self.unsafe_regex, r"# \g<0>", content)
260
- with config_file.open("w", encoding="utf-8") as file:
261
- file.write(sanitized)
262
-
263
251
  async def git_catfile(self, hash, option="-t", folder=Path()):
264
252
  command = ["git", "cat-file", option, hash]
265
253
  try:
@@ -270,8 +258,10 @@ class gitdumper(BaseModule):
270
258
  return output.stdout
271
259
 
272
260
  async def git_checkout(self, folder):
261
+ self.helpers.sanitize_git_repo(folder)
273
262
  self.verbose(f"Running git checkout to reconstruct the git repository at {folder}")
274
- command = ["git", "checkout", "."]
263
+ # we do "checkout head -- ." because the sanitization deletes the index file, and it needs to be reconstructed
264
+ command = ["git", "checkout", "HEAD", "--", "."]
275
265
  try:
276
266
  await self.run_process(command, env={"GIT_TERMINAL_PROMPT": "0"}, cwd=folder, check=True)
277
267
  except CalledProcessError as e: