bbot 2.6.0.6840rc0__py3-none-any.whl → 2.7.2.7424rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. bbot/__init__.py +1 -1
  2. bbot/cli.py +22 -8
  3. bbot/core/engine.py +1 -1
  4. bbot/core/event/__init__.py +2 -2
  5. bbot/core/event/base.py +138 -110
  6. bbot/core/flags.py +1 -0
  7. bbot/core/helpers/bloom.py +6 -7
  8. bbot/core/helpers/depsinstaller/installer.py +21 -2
  9. bbot/core/helpers/dns/dns.py +0 -1
  10. bbot/core/helpers/dns/engine.py +0 -2
  11. bbot/core/helpers/files.py +2 -2
  12. bbot/core/helpers/git.py +17 -0
  13. bbot/core/helpers/helper.py +6 -5
  14. bbot/core/helpers/misc.py +8 -23
  15. bbot/core/helpers/ntlm.py +0 -2
  16. bbot/core/helpers/regex.py +1 -1
  17. bbot/core/helpers/regexes.py +25 -8
  18. bbot/core/helpers/web/web.py +2 -1
  19. bbot/core/modules.py +22 -60
  20. bbot/defaults.yml +4 -2
  21. bbot/modules/apkpure.py +1 -1
  22. bbot/modules/baddns.py +1 -1
  23. bbot/modules/baddns_direct.py +1 -1
  24. bbot/modules/baddns_zone.py +1 -1
  25. bbot/modules/badsecrets.py +1 -1
  26. bbot/modules/base.py +123 -38
  27. bbot/modules/bucket_amazon.py +1 -1
  28. bbot/modules/bucket_digitalocean.py +1 -1
  29. bbot/modules/bucket_firebase.py +1 -1
  30. bbot/modules/bucket_google.py +1 -1
  31. bbot/modules/{bucket_azure.py → bucket_microsoft.py} +2 -2
  32. bbot/modules/builtwith.py +4 -2
  33. bbot/modules/dnsbimi.py +1 -4
  34. bbot/modules/dnsbrute.py +6 -1
  35. bbot/modules/dnsdumpster.py +35 -52
  36. bbot/modules/dnstlsrpt.py +0 -6
  37. bbot/modules/docker_pull.py +1 -1
  38. bbot/modules/emailformat.py +17 -1
  39. bbot/modules/ffuf.py +4 -1
  40. bbot/modules/ffuf_shortnames.py +6 -3
  41. bbot/modules/filedownload.py +7 -4
  42. bbot/modules/git_clone.py +47 -22
  43. bbot/modules/gitdumper.py +4 -14
  44. bbot/modules/github_workflows.py +6 -5
  45. bbot/modules/gitlab_com.py +31 -0
  46. bbot/modules/gitlab_onprem.py +84 -0
  47. bbot/modules/gowitness.py +0 -6
  48. bbot/modules/graphql_introspection.py +5 -2
  49. bbot/modules/httpx.py +2 -0
  50. bbot/modules/iis_shortnames.py +0 -7
  51. bbot/modules/internal/cloudcheck.py +65 -72
  52. bbot/modules/internal/unarchive.py +9 -3
  53. bbot/modules/lightfuzz/lightfuzz.py +6 -2
  54. bbot/modules/lightfuzz/submodules/esi.py +42 -0
  55. bbot/modules/medusa.py +4 -7
  56. bbot/modules/nuclei.py +1 -1
  57. bbot/modules/otx.py +9 -2
  58. bbot/modules/output/base.py +3 -11
  59. bbot/modules/paramminer_headers.py +10 -7
  60. bbot/modules/portfilter.py +2 -0
  61. bbot/modules/postman_download.py +1 -1
  62. bbot/modules/retirejs.py +232 -0
  63. bbot/modules/securitytxt.py +0 -3
  64. bbot/modules/sslcert.py +2 -2
  65. bbot/modules/subdomaincenter.py +1 -16
  66. bbot/modules/telerik.py +7 -2
  67. bbot/modules/templates/bucket.py +24 -4
  68. bbot/modules/templates/gitlab.py +98 -0
  69. bbot/modules/trufflehog.py +6 -3
  70. bbot/modules/wafw00f.py +2 -2
  71. bbot/presets/web/lightfuzz-heavy.yml +1 -1
  72. bbot/presets/web/lightfuzz-medium.yml +1 -1
  73. bbot/presets/web/lightfuzz-superheavy.yml +1 -1
  74. bbot/scanner/manager.py +44 -37
  75. bbot/scanner/scanner.py +12 -4
  76. bbot/scripts/benchmark_report.py +433 -0
  77. bbot/test/benchmarks/__init__.py +2 -0
  78. bbot/test/benchmarks/test_bloom_filter_benchmarks.py +105 -0
  79. bbot/test/benchmarks/test_closest_match_benchmarks.py +76 -0
  80. bbot/test/benchmarks/test_event_validation_benchmarks.py +438 -0
  81. bbot/test/benchmarks/test_excavate_benchmarks.py +291 -0
  82. bbot/test/benchmarks/test_ipaddress_benchmarks.py +143 -0
  83. bbot/test/benchmarks/test_weighted_shuffle_benchmarks.py +70 -0
  84. bbot/test/test_step_1/test_bbot_fastapi.py +2 -2
  85. bbot/test/test_step_1/test_events.py +22 -21
  86. bbot/test/test_step_1/test_helpers.py +1 -0
  87. bbot/test/test_step_1/test_manager_scope_accuracy.py +45 -0
  88. bbot/test/test_step_1/test_modules_basic.py +40 -15
  89. bbot/test/test_step_1/test_python_api.py +2 -2
  90. bbot/test/test_step_1/test_regexes.py +21 -4
  91. bbot/test/test_step_1/test_scan.py +7 -8
  92. bbot/test/test_step_1/test_web.py +46 -0
  93. bbot/test/test_step_2/module_tests/base.py +6 -1
  94. bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py +52 -18
  95. bbot/test/test_step_2/module_tests/test_module_bucket_google.py +1 -1
  96. bbot/test/test_step_2/module_tests/{test_module_bucket_azure.py → test_module_bucket_microsoft.py} +7 -5
  97. bbot/test/test_step_2/module_tests/test_module_cloudcheck.py +19 -31
  98. bbot/test/test_step_2/module_tests/test_module_dnsbimi.py +2 -1
  99. bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py +3 -5
  100. bbot/test/test_step_2/module_tests/test_module_emailformat.py +1 -1
  101. bbot/test/test_step_2/module_tests/test_module_emails.py +2 -2
  102. bbot/test/test_step_2/module_tests/test_module_excavate.py +57 -4
  103. bbot/test/test_step_2/module_tests/test_module_github_workflows.py +10 -1
  104. bbot/test/test_step_2/module_tests/test_module_gitlab_com.py +66 -0
  105. bbot/test/test_step_2/module_tests/{test_module_gitlab.py → test_module_gitlab_onprem.py} +4 -69
  106. bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +71 -3
  107. bbot/test/test_step_2/module_tests/test_module_nuclei.py +1 -2
  108. bbot/test/test_step_2/module_tests/test_module_otx.py +3 -0
  109. bbot/test/test_step_2/module_tests/test_module_portfilter.py +2 -0
  110. bbot/test/test_step_2/module_tests/test_module_retirejs.py +161 -0
  111. bbot/test/test_step_2/module_tests/test_module_telerik.py +1 -1
  112. bbot/test/test_step_2/module_tests/test_module_trufflehog.py +10 -1
  113. {bbot-2.6.0.6840rc0.dist-info → bbot-2.7.2.7424rc0.dist-info}/METADATA +10 -7
  114. {bbot-2.6.0.6840rc0.dist-info → bbot-2.7.2.7424rc0.dist-info}/RECORD +117 -106
  115. {bbot-2.6.0.6840rc0.dist-info → bbot-2.7.2.7424rc0.dist-info}/WHEEL +1 -1
  116. {bbot-2.6.0.6840rc0.dist-info → bbot-2.7.2.7424rc0.dist-info/licenses}/LICENSE +98 -58
  117. bbot/modules/censys.py +0 -98
  118. bbot/modules/gitlab.py +0 -141
  119. bbot/modules/zoomeye.py +0 -77
  120. bbot/test/test_step_2/module_tests/test_module_censys.py +0 -83
  121. bbot/test/test_step_2/module_tests/test_module_zoomeye.py +0 -35
  122. {bbot-2.6.0.6840rc0.dist-info → bbot-2.7.2.7424rc0.dist-info}/entry_points.txt +0 -0
bbot/modules/dnsbrute.py CHANGED
@@ -23,9 +23,14 @@ class dnsbrute(subdomain_enum):
23
23
  dedup_strategy = "lowest_parent"
24
24
  _qsize = 10000
25
25
 
26
+ async def setup_deps(self):
27
+ self.subdomain_file = await self.helpers.wordlist(self.config.get("wordlist"))
28
+ # tell the dnsbrute helper to fetch the resolver file
29
+ await self.helpers.dns.brute.resolver_file()
30
+ return True
31
+
26
32
  async def setup(self):
27
33
  self.max_depth = max(1, self.config.get("max_depth", 5))
28
- self.subdomain_file = await self.helpers.wordlist(self.config.get("wordlist"))
29
34
  self.subdomain_list = set(self.helpers.read_file(self.subdomain_file))
30
35
  self.wordlist_size = len(self.subdomain_list)
31
36
  return await super().setup()
@@ -1,4 +1,4 @@
1
- import re
1
+ import json
2
2
 
3
3
  from bbot.modules.templates.subdomain_enum import subdomain_enum
4
4
 
@@ -15,78 +15,61 @@ class dnsdumpster(subdomain_enum):
15
15
 
16
16
  base_url = "https://dnsdumpster.com"
17
17
 
18
+ async def setup(self):
19
+ self.apikey_regex = self.helpers.re.compile(r'<form[^>]*data-form-id="mainform"[^>]*hx-headers=\'([^\']*)\'')
20
+ return True
21
+
18
22
  async def query(self, domain):
19
23
  ret = []
20
- # first, get the CSRF tokens
24
+ # first, get the JWT token from the main page
21
25
  res1 = await self.api_request(self.base_url)
22
26
  status_code = getattr(res1, "status_code", 0)
23
- if status_code in [429]:
24
- self.verbose(f'Too many requests "{status_code}"')
25
- return ret
26
- elif status_code not in [200]:
27
+ if status_code not in [200]:
27
28
  self.verbose(f'Bad response code "{status_code}" from DNSDumpster')
28
29
  return ret
29
- else:
30
- self.debug(f'Valid response code "{status_code}" from DNSDumpster')
31
-
32
- html = self.helpers.beautifulsoup(res1.content, "html.parser")
33
- if html is False:
34
- self.verbose("BeautifulSoup returned False")
35
- return ret
36
30
 
37
- csrftoken = None
38
- csrfmiddlewaretoken = None
31
+ # Extract JWT token from the form's hx-headers attribute using regex
32
+ jwt_token = None
39
33
  try:
40
- for cookie in res1.headers.get("set-cookie", "").split(";"):
41
- try:
42
- k, v = cookie.split("=", 1)
43
- except ValueError:
44
- self.verbose("Error retrieving cookie")
45
- return ret
46
- if k == "csrftoken":
47
- csrftoken = str(v)
48
- csrfmiddlewaretoken = html.find("input", {"name": "csrfmiddlewaretoken"}).attrs.get("value", None)
49
- except AttributeError:
50
- pass
34
+ # Look for the form with data-form-id="mainform" and extract hx-headers
35
+ form_match = await self.helpers.re.search(self.apikey_regex, res1.text)
36
+ if form_match:
37
+ headers_json = form_match.group(1)
38
+ headers_data = json.loads(headers_json)
39
+ jwt_token = headers_data.get("Authorization")
40
+ except (AttributeError, json.JSONDecodeError, KeyError):
41
+ self.log.warning("Error obtaining JWT token")
42
+ return ret
51
43
 
52
- # Abort if we didn't get the tokens
53
- if not csrftoken or not csrfmiddlewaretoken:
54
- self.verbose("Error obtaining CSRF tokens")
44
+ # Abort if we didn't get the JWT token
45
+ if not jwt_token:
46
+ self.verbose("Error obtaining JWT token")
55
47
  self.errorState = True
56
48
  return ret
57
49
  else:
58
- self.debug("Successfully obtained CSRF tokens")
50
+ self.debug("Successfully obtained JWT token")
59
51
 
60
52
  if self.scan.stopping:
61
- return
53
+ return ret
62
54
 
63
- # Otherwise, do the needful
64
- subdomains = set()
55
+ # Query the API with the JWT token
65
56
  res2 = await self.api_request(
66
- f"{self.base_url}/",
57
+ "https://api.dnsdumpster.com/htmld/",
67
58
  method="POST",
68
- cookies={"csrftoken": csrftoken},
69
- data={
70
- "csrfmiddlewaretoken": csrfmiddlewaretoken,
71
- "targetip": str(domain).lower(),
72
- "user": "free",
73
- },
59
+ data={"target": str(domain).lower()},
74
60
  headers={
75
- "origin": "https://dnsdumpster.com",
76
- "referer": "https://dnsdumpster.com/",
61
+ "Authorization": jwt_token,
62
+ "Content-Type": "application/x-www-form-urlencoded",
63
+ "Origin": "https://dnsdumpster.com",
64
+ "Referer": "https://dnsdumpster.com/",
65
+ "HX-Request": "true",
66
+ "HX-Target": "results",
67
+ "HX-Current-URL": "https://dnsdumpster.com/",
77
68
  },
78
69
  )
79
70
  status_code = getattr(res2, "status_code", 0)
80
71
  if status_code not in [200]:
81
- self.verbose(f'Bad response code "{status_code}" from DNSDumpster')
82
- return ret
83
- html = self.helpers.beautifulsoup(res2.content, "html.parser")
84
- if html is False:
85
- self.verbose("BeautifulSoup returned False")
72
+ self.verbose(f'Bad response code "{status_code}" from DNSDumpster API')
86
73
  return ret
87
- escaped_domain = re.escape(domain)
88
- match_pattern = re.compile(r"^[\w\.-]+\." + escaped_domain + r"$")
89
- for subdomain in html.findAll(text=match_pattern):
90
- subdomains.add(str(subdomain).strip().lower())
91
74
 
92
- return list(subdomains)
75
+ return await self.scan.extract_in_scope_hostnames(res2.text)
bbot/modules/dnstlsrpt.py CHANGED
@@ -44,20 +44,17 @@ class dnstlsrpt(BaseModule):
44
44
  "emit_emails": True,
45
45
  "emit_raw_dns_records": False,
46
46
  "emit_urls": True,
47
- "emit_vulnerabilities": True,
48
47
  }
49
48
  options_desc = {
50
49
  "emit_emails": "Emit EMAIL_ADDRESS events",
51
50
  "emit_raw_dns_records": "Emit RAW_DNS_RECORD events",
52
51
  "emit_urls": "Emit URL_UNVERIFIED events",
53
- "emit_vulnerabilities": "Emit VULNERABILITY events",
54
52
  }
55
53
 
56
54
  async def setup(self):
57
55
  self.emit_emails = self.config.get("emit_emails", True)
58
56
  self.emit_raw_dns_records = self.config.get("emit_raw_dns_records", False)
59
57
  self.emit_urls = self.config.get("emit_urls", True)
60
- self.emit_vulnerabilities = self.config.get("emit_vulnerabilities", True)
61
58
  return await super().setup()
62
59
 
63
60
  def _incoming_dedup_hash(self, event):
@@ -139,6 +136,3 @@ class dnstlsrpt(BaseModule):
139
136
  tags=tags.append(f"tlsrpt-record-{key}"),
140
137
  parent=event,
141
138
  )
142
-
143
-
144
- # EOF
@@ -8,7 +8,7 @@ from bbot.modules.base import BaseModule
8
8
  class docker_pull(BaseModule):
9
9
  watched_events = ["CODE_REPOSITORY"]
10
10
  produced_events = ["FILESYSTEM"]
11
- flags = ["passive", "safe", "slow", "code-enum"]
11
+ flags = ["passive", "safe", "slow", "code-enum", "download"]
12
12
  meta = {
13
13
  "description": "Download images from a docker repository",
14
14
  "created_date": "2024-03-24",
@@ -15,13 +15,29 @@ class emailformat(BaseModule):
15
15
 
16
16
  base_url = "https://www.email-format.com"
17
17
 
18
+ async def setup(self):
19
+ self.cfemail_regex = self.helpers.re.compile(r'data-cfemail="([0-9a-z]+)"')
20
+ return True
21
+
18
22
  async def handle_event(self, event):
19
23
  _, query = self.helpers.split_domain(event.data)
20
24
  url = f"{self.base_url}/d/{self.helpers.quote(query)}/"
21
25
  r = await self.api_request(url)
22
26
  if not r:
23
27
  return
24
- for email in await self.helpers.re.extract_emails(r.text):
28
+
29
+ encrypted_emails = await self.helpers.re.findall(self.cfemail_regex, r.text)
30
+
31
+ for enc in encrypted_emails:
32
+ enc_len = len(enc)
33
+
34
+ if enc_len < 2 or enc_len % 2 != 0:
35
+ continue
36
+
37
+ key = int(enc[:2], 16)
38
+
39
+ email = "".join([chr(int(enc[i : i + 2], 16) ^ key) for i in range(2, enc_len, 2)]).lower()
40
+
25
41
  if email.endswith(query):
26
42
  await self.emit_event(
27
43
  email,
bbot/modules/ffuf.py CHANGED
@@ -37,12 +37,15 @@ class ffuf(BaseModule):
37
37
 
38
38
  in_scope_only = True
39
39
 
40
+ async def setup_deps(self):
41
+ self.wordlist = await self.helpers.wordlist(self.config.get("wordlist"))
42
+ return True
43
+
40
44
  async def setup(self):
41
45
  self.proxy = self.scan.web_config.get("http_proxy", "")
42
46
  self.canary = "".join(random.choice(string.ascii_lowercase) for i in range(10))
43
47
  wordlist_url = self.config.get("wordlist", "")
44
48
  self.debug(f"Using wordlist [{wordlist_url}]")
45
- self.wordlist = await self.helpers.wordlist(wordlist_url)
46
49
  self.wordlist_lines = self.generate_wordlist(self.wordlist)
47
50
  self.tempfile, tempfile_len = self.generate_templist()
48
51
  self.rate = self.config.get("rate", 0)
@@ -87,14 +87,17 @@ class ffuf_shortnames(ffuf):
87
87
  found_prefixes.add(prefix)
88
88
  return list(found_prefixes)
89
89
 
90
- async def setup(self):
91
- self.proxy = self.scan.web_config.get("http_proxy", "")
92
- self.canary = "".join(random.choice(string.ascii_lowercase) for i in range(10))
90
+ async def setup_deps(self):
93
91
  wordlist_extensions = self.config.get("wordlist_extensions", "")
94
92
  if not wordlist_extensions:
95
93
  wordlist_extensions = f"{self.helpers.wordlist_dir}/raft-small-extensions-lowercase_CLEANED.txt"
96
94
  self.debug(f"Using [{wordlist_extensions}] for shortname candidate extension list")
97
95
  self.wordlist_extensions = await self.helpers.wordlist(wordlist_extensions)
96
+ return True
97
+
98
+ async def setup(self):
99
+ self.proxy = self.scan.web_config.get("http_proxy", "")
100
+ self.canary = "".join(random.choice(string.ascii_lowercase) for i in range(10))
98
101
  self.ignore_redirects = self.config.get("ignore_redirects")
99
102
  self.max_predictions = self.config.get("max_predictions")
100
103
  self.find_subwords = self.config.get("find_subwords")
@@ -14,7 +14,7 @@ class filedownload(BaseModule):
14
14
 
15
15
  watched_events = ["URL_UNVERIFIED", "HTTP_RESPONSE"]
16
16
  produced_events = ["FILESYSTEM"]
17
- flags = ["active", "safe", "web-basic"]
17
+ flags = ["active", "safe", "web-basic", "download"]
18
18
  meta = {
19
19
  "description": "Download common filetypes such as PDF, DOCX, PPTX, etc.",
20
20
  "created_date": "2023-10-11",
@@ -94,6 +94,12 @@ class filedownload(BaseModule):
94
94
 
95
95
  scope_distance_modifier = 3
96
96
 
97
+ async def setup_deps(self):
98
+ self.mime_db_file = await self.helpers.wordlist(
99
+ "https://raw.githubusercontent.com/jshttp/mime-db/master/db.json"
100
+ )
101
+ return True
102
+
97
103
  async def setup(self):
98
104
  self.extensions = list({e.lower().strip(".") for e in self.config.get("extensions", [])})
99
105
  self.max_filesize = self.config.get("max_filesize", "10MB")
@@ -105,9 +111,6 @@ class filedownload(BaseModule):
105
111
  else:
106
112
  self.download_dir = self.scan.temp_dir / "filedownload"
107
113
  self.helpers.mkdir(self.download_dir)
108
- self.mime_db_file = await self.helpers.wordlist(
109
- "https://raw.githubusercontent.com/jshttp/mime-db/master/db.json"
110
- )
111
114
  self.mime_db = {}
112
115
  with open(self.mime_db_file) as f:
113
116
  mime_db = json.load(f)
bbot/modules/git_clone.py CHANGED
@@ -6,7 +6,7 @@ from bbot.modules.templates.github import github
6
6
  class git_clone(github):
7
7
  watched_events = ["CODE_REPOSITORY"]
8
8
  produced_events = ["FILESYSTEM"]
9
- flags = ["passive", "safe", "slow", "code-enum"]
9
+ flags = ["passive", "safe", "slow", "code-enum", "download"]
10
10
  meta = {
11
11
  "description": "Clone code github repositories",
12
12
  "created_date": "2024-03-08",
@@ -24,44 +24,69 @@ class git_clone(github):
24
24
 
25
25
  async def setup(self):
26
26
  output_folder = self.config.get("output_folder")
27
- if output_folder:
28
- self.output_dir = Path(output_folder) / "git_repos"
29
- else:
30
- self.output_dir = self.scan.temp_dir / "git_repos"
27
+ self.output_dir = Path(output_folder) / "git_repos" if output_folder else self.scan.temp_dir / "git_repos"
31
28
  self.helpers.mkdir(self.output_dir)
32
29
  return await super().setup()
33
30
 
34
31
  async def filter_event(self, event):
35
- if event.type == "CODE_REPOSITORY":
36
- if "git" not in event.tags:
37
- return False, "event is not a git repository"
32
+ if event.type == "CODE_REPOSITORY" and "git" not in event.tags:
33
+ return False, "event is not a git repository"
38
34
  return True
39
35
 
40
36
  async def handle_event(self, event):
41
- repo_url = event.data.get("url")
42
- repo_path = await self.clone_git_repository(repo_url)
43
- if repo_path:
44
- self.verbose(f"Cloned {repo_url} to {repo_path}")
45
- codebase_event = self.make_event({"path": str(repo_path)}, "FILESYSTEM", tags=["git"], parent=event)
37
+ repository_url = event.data.get("url")
38
+ repository_path = await self.clone_git_repository(repository_url)
39
+ if repository_path:
40
+ self.verbose(f"Cloned {repository_url} to {repository_path}")
41
+ codebase_event = self.make_event({"path": str(repository_path)}, "FILESYSTEM", tags=["git"], parent=event)
46
42
  await self.emit_event(
47
43
  codebase_event,
48
- context=f"{{module}} downloaded git repo at {repo_url} to {{event.type}}: {repo_path}",
44
+ context=f"{{module}} cloned git repository at {repository_url} to {{event.type}}: {repository_path}",
49
45
  )
50
46
 
51
47
  async def clone_git_repository(self, repository_url):
52
48
  owner = repository_url.split("/")[-2]
53
49
  folder = self.output_dir / owner
54
50
  self.helpers.mkdir(folder)
55
- if self.api_key:
56
- url = repository_url.replace("https://github.com", f"https://user:{self.api_key}@github.com")
57
- else:
58
- url = repository_url
59
- command = ["git", "-C", folder, "clone", url]
51
+
52
+ command = ["git", "-C", folder, "clone", repository_url]
53
+ env = {"GIT_TERMINAL_PROMPT": "0"}
54
+
60
55
  try:
61
- output = await self.run_process(command, env={"GIT_TERMINAL_PROMPT": "0"}, check=True)
56
+ hostname = self.helpers.urlparse(repository_url).hostname
57
+ if hostname and self.api_key:
58
+ _, domain = self.helpers.split_domain(hostname)
59
+ # only use the api key if the domain is github.com
60
+ if domain == "github.com":
61
+ env["GIT_HELPER"] = (
62
+ f'!f() {{ case "$1" in get) '
63
+ f"echo username=x-access-token; "
64
+ f"echo password={self.api_key};; "
65
+ f'esac; }}; f "$@"'
66
+ )
67
+ command = (
68
+ command[:1]
69
+ + [
70
+ "-c",
71
+ "credential.helper=",
72
+ "-c",
73
+ "credential.useHttpPath=true",
74
+ "--config-env=credential.helper=GIT_HELPER",
75
+ ]
76
+ + command[1:]
77
+ )
78
+
79
+ output = await self.run_process(command, env=env, check=True)
62
80
  except CalledProcessError as e:
63
- self.debug(f"Error cloning {url}. STDERR: {repr(e.stderr)}")
81
+ self.debug(f"Error cloning {repository_url}. STDERR: {repr(e.stderr)}")
64
82
  return
65
83
 
66
84
  folder_name = output.stderr.split("Cloning into '")[1].split("'")[0]
67
- return folder / folder_name
85
+ repo_folder = folder / folder_name
86
+
87
+ # sanitize the repo
88
+ # this moves the git config, index file, and hooks folder out of the .git folder to prevent nasty things
89
+ # Note: the index file can be regenerated by running "git checkout HEAD -- ."
90
+ self.helpers.sanitize_git_repo(repo_folder)
91
+
92
+ return repo_folder
bbot/modules/gitdumper.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import asyncio
2
- import regex as re
3
2
  from pathlib import Path
4
3
  from subprocess import CalledProcessError
5
4
  from bbot.modules.base import BaseModule
@@ -8,7 +7,7 @@ from bbot.modules.base import BaseModule
8
7
  class gitdumper(BaseModule):
9
8
  watched_events = ["CODE_REPOSITORY"]
10
9
  produced_events = ["FILESYSTEM"]
11
- flags = ["passive", "safe", "slow", "code-enum"]
10
+ flags = ["passive", "safe", "slow", "code-enum", "download"]
12
11
  meta = {
13
12
  "description": "Download a leaked .git folder recursively or by fuzzing common names",
14
13
  "created_date": "2025-02-11",
@@ -35,7 +34,6 @@ class gitdumper(BaseModule):
35
34
  else:
36
35
  self.output_dir = self.scan.temp_dir / "git_repos"
37
36
  self.helpers.mkdir(self.output_dir)
38
- self.unsafe_regex = self.helpers.re.compile(r"^\s*fsmonitor|sshcommand|askpass|editor|pager", re.IGNORECASE)
39
37
  self.ref_regex = self.helpers.re.compile(r"ref: refs/heads/([a-zA-Z\d_-]+)")
40
38
  self.obj_regex = self.helpers.re.compile(r"[a-f0-9]{40}")
41
39
  self.pack_regex = self.helpers.re.compile(r"pack-([a-f0-9]{40})\.pack")
@@ -131,7 +129,6 @@ class gitdumper(BaseModule):
131
129
  else:
132
130
  result = await self.git_fuzz(repo_url, repo_folder)
133
131
  if result:
134
- await self.sanitize_config(repo_folder)
135
132
  await self.git_checkout(repo_folder)
136
133
  codebase_event = self.make_event({"path": str(repo_folder)}, "FILESYSTEM", tags=["git"], parent=event)
137
134
  await self.emit_event(
@@ -251,15 +248,6 @@ class gitdumper(BaseModule):
251
248
  self.debug(f"Unable to download git files to {folder}")
252
249
  return False
253
250
 
254
- async def sanitize_config(self, folder):
255
- config_file = folder / ".git/config"
256
- if config_file.exists():
257
- with config_file.open("r", encoding="utf-8", errors="ignore") as file:
258
- content = file.read()
259
- sanitized = await self.helpers.re.sub(self.unsafe_regex, r"# \g<0>", content)
260
- with config_file.open("w", encoding="utf-8") as file:
261
- file.write(sanitized)
262
-
263
251
  async def git_catfile(self, hash, option="-t", folder=Path()):
264
252
  command = ["git", "cat-file", option, hash]
265
253
  try:
@@ -270,8 +258,10 @@ class gitdumper(BaseModule):
270
258
  return output.stdout
271
259
 
272
260
  async def git_checkout(self, folder):
261
+ self.helpers.sanitize_git_repo(folder)
273
262
  self.verbose(f"Running git checkout to reconstruct the git repository at {folder}")
274
- command = ["git", "checkout", "."]
263
+ # we do "checkout head -- ." because the sanitization deletes the index file, and it needs to be reconstructed
264
+ command = ["git", "checkout", "HEAD", "--", "."]
275
265
  try:
276
266
  await self.run_process(command, env={"GIT_TERMINAL_PROMPT": "0"}, cwd=folder, check=True)
277
267
  except CalledProcessError as e:
@@ -8,11 +8,12 @@ from bbot.modules.templates.github import github
8
8
  class github_workflows(github):
9
9
  watched_events = ["CODE_REPOSITORY"]
10
10
  produced_events = ["FILESYSTEM"]
11
- flags = ["passive", "safe", "code-enum"]
11
+ flags = ["passive", "safe", "code-enum", "download"]
12
12
  meta = {
13
13
  "description": "Download a github repositories workflow logs and workflow artifacts",
14
14
  "created_date": "2024-04-29",
15
15
  "author": "@domwhewell-sage",
16
+ "auth_required": True,
16
17
  }
17
18
  options = {"api_key": "", "num_logs": 1, "output_folder": ""}
18
19
  options_desc = {
@@ -152,7 +153,7 @@ class github_workflows(github):
152
153
  filename = f"run_{run_id}.zip"
153
154
  file_destination = folder / filename
154
155
  try:
155
- await self.helpers.download(
156
+ await self.api_download(
156
157
  f"{self.base_url}/repos/{owner}/{repo}/actions/runs/{run_id}/logs",
157
158
  filename=file_destination,
158
159
  headers=self.headers,
@@ -166,7 +167,7 @@ class github_workflows(github):
166
167
  status_code = getattr(response, "status_code", 0)
167
168
  if status_code == 403:
168
169
  self.warning(
169
- f"The current access key does not have access to workflow {owner}/{repo}/{run_id} (status: {status_code})"
170
+ f"The current access key does not have access to workflow {owner}/{repo}/{run_id}, The API key must have the 'repo' scope or read 'Actions' repository permissions (status: {status_code})"
170
171
  )
171
172
  else:
172
173
  self.info(
@@ -212,7 +213,7 @@ class github_workflows(github):
212
213
  self.helpers.mkdir(folder)
213
214
  file_destination = folder / artifact_name
214
215
  try:
215
- await self.helpers.download(
216
+ await self.api_download(
216
217
  f"{self.base_url}/repos/{owner}/{repo}/actions/artifacts/{artifact_id}/zip",
217
218
  filename=file_destination,
218
219
  headers=self.headers,
@@ -228,6 +229,6 @@ class github_workflows(github):
228
229
  status_code = getattr(response, "status_code", 0)
229
230
  if status_code == 403:
230
231
  self.warning(
231
- f"The current access key does not have access to workflow artifacts {owner}/{repo}/{artifact_id} (status: {status_code})"
232
+ f"The current access key does not have access to workflow artifacts {owner}/{repo}/{artifact_id}, The API key must have the 'repo' scope or read 'Actions' repository permissions (status: {status_code})"
232
233
  )
233
234
  return file_destination
@@ -0,0 +1,31 @@
1
+ from bbot.modules.templates.gitlab import GitLabBaseModule
2
+
3
+
4
+ class gitlab_com(GitLabBaseModule):
5
+ watched_events = ["SOCIAL"]
6
+ produced_events = [
7
+ "CODE_REPOSITORY",
8
+ ]
9
+ flags = ["active", "safe", "code-enum"]
10
+ meta = {
11
+ "description": "Enumerate GitLab SaaS (gitlab.com/org) for projects and groups",
12
+ "created_date": "2024-03-11",
13
+ "author": "@TheTechromancer",
14
+ }
15
+
16
+ options = {"api_key": ""}
17
+ options_desc = {"api_key": "GitLab access token (for gitlab.com/org only)"}
18
+
19
+ # This is needed because we are consuming SOCIAL events, which aren't in scope
20
+ scope_distance_modifier = 2
21
+
22
+ async def handle_event(self, event):
23
+ await self.handle_social(event)
24
+
25
+ async def filter_event(self, event):
26
+ if event.data["platform"] != "gitlab":
27
+ return False, "platform is not gitlab"
28
+ _, domain = self.helpers.split_domain(event.host)
29
+ if domain not in self.saas_domains:
30
+ return False, "gitlab instance is not gitlab.com/org"
31
+ return True
@@ -0,0 +1,84 @@
1
+ from bbot.modules.templates.gitlab import GitLabBaseModule
2
+
3
+
4
+ class gitlab_onprem(GitLabBaseModule):
5
+ watched_events = ["HTTP_RESPONSE", "TECHNOLOGY", "SOCIAL"]
6
+ produced_events = [
7
+ "TECHNOLOGY",
8
+ "SOCIAL",
9
+ "CODE_REPOSITORY",
10
+ "FINDING",
11
+ ]
12
+ flags = ["active", "safe", "code-enum"]
13
+ meta = {
14
+ "description": "Detect self-hosted GitLab instances and query them for repositories",
15
+ "created_date": "2024-03-11",
16
+ "author": "@TheTechromancer",
17
+ }
18
+
19
+ # Optional GitLab access token (only required for gitlab.com, but still
20
+ # supported for on-prem installations that expose private projects).
21
+ options = {"api_key": ""}
22
+ options_desc = {"api_key": "GitLab access token (for self-hosted instances only)"}
23
+
24
+ # Allow accepting events slightly beyond configured max distance so we can
25
+ # discover repos on neighbouring infrastructure.
26
+ scope_distance_modifier = 2
27
+
28
+ async def handle_event(self, event):
29
+ if event.type == "HTTP_RESPONSE":
30
+ await self.handle_http_response(event)
31
+ elif event.type == "TECHNOLOGY":
32
+ await self.handle_technology(event)
33
+ elif event.type == "SOCIAL":
34
+ await self.handle_social(event)
35
+
36
+ async def filter_event(self, event):
37
+ # only accept out-of-scope SOCIAL events
38
+ if event.type == "HTTP_RESPONSE":
39
+ if event.scope_distance > self.scan.scope_search_distance:
40
+ return False, "event is out of scope distance"
41
+ elif event.type == "TECHNOLOGY":
42
+ if not event.data["technology"].lower().startswith("gitlab"):
43
+ return False, "technology is not gitlab"
44
+ if not self.helpers.is_ip(event.host) and self.helpers.tldextract(event.host).domain == "gitlab":
45
+ return False, "gitlab instance is not self-hosted"
46
+ elif event.type == "SOCIAL":
47
+ if event.data["platform"] != "gitlab":
48
+ return False, "platform is not gitlab"
49
+ _, domain = self.helpers.split_domain(event.host)
50
+ if domain in self.saas_domains:
51
+ return False, "gitlab instance is not self-hosted"
52
+ return True
53
+
54
+ async def handle_http_response(self, event):
55
+ """Identify GitLab servers from HTTP responses."""
56
+ headers = event.data.get("header", {})
57
+ if "x_gitlab_meta" in headers:
58
+ url = event.parsed_url._replace(path="/").geturl()
59
+ await self.emit_event(
60
+ {"host": str(event.host), "technology": "GitLab", "url": url},
61
+ "TECHNOLOGY",
62
+ parent=event,
63
+ context=f"{{module}} detected {{event.type}}: GitLab at {url}",
64
+ )
65
+ description = f"GitLab server at {event.host}"
66
+ await self.emit_event(
67
+ {"host": str(event.host), "description": description},
68
+ "FINDING",
69
+ parent=event,
70
+ context=f"{{module}} detected {{event.type}}: {description}",
71
+ )
72
+
73
+ async def handle_technology(self, event):
74
+ """Enumerate projects & groups once we know a host is GitLab."""
75
+ base_url = self.get_base_url(event)
76
+
77
+ # Projects owned by the authenticated user (or public projects if no
78
+ # authentication).
79
+ projects_url = self.helpers.urljoin(base_url, "api/v4/projects?simple=true")
80
+ await self.handle_projects_url(projects_url, event)
81
+
82
+ # Group enumeration.
83
+ groups_url = self.helpers.urljoin(base_url, "api/v4/groups?simple=true")
84
+ await self.handle_groups_url(groups_url, event)
bbot/modules/gowitness.py CHANGED
@@ -161,7 +161,6 @@ class gowitness(BaseModule):
161
161
  key = e.data["url"]
162
162
  event_dict[key] = e
163
163
  stdin = "\n".join(list(event_dict))
164
- self.hugeinfo(f"Gowitness input: {stdin}")
165
164
 
166
165
  try:
167
166
  async for line in self.run_process_live(self.command, input=stdin, idle_timeout=self.idle_timeout):
@@ -182,7 +181,6 @@ class gowitness(BaseModule):
182
181
  # NOTE: this prevents long filenames from causing problems in BBOT, but gowitness will still fail to save it.
183
182
  filename = self.helpers.truncate_filename(filename)
184
183
  webscreenshot_data = {"path": str(filename), "url": final_url}
185
- self.hugewarning(event_dict)
186
184
  parent_event = event_dict[url]
187
185
  await self.emit_event(
188
186
  webscreenshot_data,
@@ -259,9 +257,7 @@ class gowitness(BaseModule):
259
257
  con.row_factory = aiosqlite.Row
260
258
  con.text_factory = self.helpers.smart_decode
261
259
  async with con.execute("SELECT * FROM results") as cur:
262
- self.critical(f"CUR: {cur}")
263
260
  async for row in cur:
264
- self.critical(f"SCREENSHOT: {row}")
265
261
  row = dict(row)
266
262
  _id = row["id"]
267
263
  if _id not in self.screenshots_taken:
@@ -276,7 +272,6 @@ class gowitness(BaseModule):
276
272
  con.row_factory = aiosqlite.Row
277
273
  async with con.execute("SELECT * FROM network_logs") as cur:
278
274
  async for row in cur:
279
- self.critical(f"NETWORK LOG: {row}")
280
275
  row = dict(row)
281
276
  url = row["url"]
282
277
  if url not in self.connections_logged:
@@ -291,7 +286,6 @@ class gowitness(BaseModule):
291
286
  con.row_factory = aiosqlite.Row
292
287
  async with con.execute("SELECT * FROM technologies") as cur:
293
288
  async for row in cur:
294
- self.critical(f"TECHNOLOGY: {row}")
295
289
  _id = row["id"]
296
290
  if _id not in self.technologies_found:
297
291
  self.technologies_found.add(_id)