bbot 2.5.0__py3-none-any.whl → 2.7.2.7424rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. bbot/__init__.py +1 -1
  2. bbot/cli.py +22 -8
  3. bbot/core/engine.py +1 -1
  4. bbot/core/event/__init__.py +2 -2
  5. bbot/core/event/base.py +138 -110
  6. bbot/core/flags.py +1 -0
  7. bbot/core/helpers/bloom.py +6 -7
  8. bbot/core/helpers/command.py +5 -2
  9. bbot/core/helpers/depsinstaller/installer.py +78 -7
  10. bbot/core/helpers/dns/dns.py +0 -1
  11. bbot/core/helpers/dns/engine.py +0 -2
  12. bbot/core/helpers/files.py +2 -2
  13. bbot/core/helpers/git.py +17 -0
  14. bbot/core/helpers/helper.py +6 -5
  15. bbot/core/helpers/misc.py +15 -28
  16. bbot/core/helpers/names_generator.py +5 -0
  17. bbot/core/helpers/ntlm.py +0 -2
  18. bbot/core/helpers/regex.py +1 -1
  19. bbot/core/helpers/regexes.py +25 -8
  20. bbot/core/helpers/web/engine.py +1 -1
  21. bbot/core/helpers/web/web.py +2 -1
  22. bbot/core/modules.py +22 -60
  23. bbot/core/shared_deps.py +38 -0
  24. bbot/defaults.yml +4 -2
  25. bbot/modules/apkpure.py +2 -2
  26. bbot/modules/aspnet_bin_exposure.py +80 -0
  27. bbot/modules/baddns.py +1 -1
  28. bbot/modules/baddns_direct.py +1 -1
  29. bbot/modules/baddns_zone.py +1 -1
  30. bbot/modules/badsecrets.py +1 -1
  31. bbot/modules/base.py +129 -40
  32. bbot/modules/bucket_amazon.py +1 -1
  33. bbot/modules/bucket_digitalocean.py +1 -1
  34. bbot/modules/bucket_firebase.py +1 -1
  35. bbot/modules/bucket_google.py +1 -1
  36. bbot/modules/{bucket_azure.py → bucket_microsoft.py} +2 -2
  37. bbot/modules/builtwith.py +4 -2
  38. bbot/modules/c99.py +1 -1
  39. bbot/modules/dnsbimi.py +1 -4
  40. bbot/modules/dnsbrute.py +6 -1
  41. bbot/modules/dnscommonsrv.py +1 -0
  42. bbot/modules/dnsdumpster.py +35 -52
  43. bbot/modules/dnstlsrpt.py +0 -6
  44. bbot/modules/docker_pull.py +2 -2
  45. bbot/modules/emailformat.py +17 -1
  46. bbot/modules/ffuf.py +4 -1
  47. bbot/modules/ffuf_shortnames.py +6 -3
  48. bbot/modules/filedownload.py +8 -5
  49. bbot/modules/fullhunt.py +1 -1
  50. bbot/modules/git_clone.py +47 -22
  51. bbot/modules/gitdumper.py +5 -15
  52. bbot/modules/github_workflows.py +6 -5
  53. bbot/modules/gitlab_com.py +31 -0
  54. bbot/modules/gitlab_onprem.py +84 -0
  55. bbot/modules/gowitness.py +60 -30
  56. bbot/modules/graphql_introspection.py +145 -0
  57. bbot/modules/httpx.py +2 -0
  58. bbot/modules/hunt.py +10 -3
  59. bbot/modules/iis_shortnames.py +16 -7
  60. bbot/modules/internal/cloudcheck.py +65 -72
  61. bbot/modules/internal/unarchive.py +9 -3
  62. bbot/modules/lightfuzz/lightfuzz.py +6 -2
  63. bbot/modules/lightfuzz/submodules/esi.py +42 -0
  64. bbot/modules/{deadly/medusa.py → medusa.py} +4 -7
  65. bbot/modules/nuclei.py +2 -2
  66. bbot/modules/otx.py +9 -2
  67. bbot/modules/output/base.py +3 -11
  68. bbot/modules/paramminer_headers.py +10 -7
  69. bbot/modules/passivetotal.py +1 -1
  70. bbot/modules/portfilter.py +2 -0
  71. bbot/modules/portscan.py +1 -1
  72. bbot/modules/postman_download.py +2 -2
  73. bbot/modules/retirejs.py +232 -0
  74. bbot/modules/securitytxt.py +0 -3
  75. bbot/modules/sslcert.py +2 -2
  76. bbot/modules/subdomaincenter.py +1 -16
  77. bbot/modules/telerik.py +7 -2
  78. bbot/modules/templates/bucket.py +24 -4
  79. bbot/modules/templates/gitlab.py +98 -0
  80. bbot/modules/trufflehog.py +7 -4
  81. bbot/modules/wafw00f.py +2 -2
  82. bbot/presets/web/dotnet-audit.yml +1 -0
  83. bbot/presets/web/lightfuzz-heavy.yml +1 -1
  84. bbot/presets/web/lightfuzz-medium.yml +1 -1
  85. bbot/presets/web/lightfuzz-superheavy.yml +1 -1
  86. bbot/scanner/manager.py +44 -37
  87. bbot/scanner/scanner.py +17 -4
  88. bbot/scripts/benchmark_report.py +433 -0
  89. bbot/test/benchmarks/__init__.py +2 -0
  90. bbot/test/benchmarks/test_bloom_filter_benchmarks.py +105 -0
  91. bbot/test/benchmarks/test_closest_match_benchmarks.py +76 -0
  92. bbot/test/benchmarks/test_event_validation_benchmarks.py +438 -0
  93. bbot/test/benchmarks/test_excavate_benchmarks.py +291 -0
  94. bbot/test/benchmarks/test_ipaddress_benchmarks.py +143 -0
  95. bbot/test/benchmarks/test_weighted_shuffle_benchmarks.py +70 -0
  96. bbot/test/conftest.py +1 -1
  97. bbot/test/test_step_1/test_bbot_fastapi.py +2 -2
  98. bbot/test/test_step_1/test_events.py +22 -21
  99. bbot/test/test_step_1/test_helpers.py +20 -0
  100. bbot/test/test_step_1/test_manager_scope_accuracy.py +45 -0
  101. bbot/test/test_step_1/test_modules_basic.py +40 -15
  102. bbot/test/test_step_1/test_python_api.py +2 -2
  103. bbot/test/test_step_1/test_regexes.py +21 -4
  104. bbot/test/test_step_1/test_scan.py +7 -8
  105. bbot/test/test_step_1/test_web.py +46 -0
  106. bbot/test/test_step_2/module_tests/base.py +6 -1
  107. bbot/test/test_step_2/module_tests/test_module_aspnet_bin_exposure.py +73 -0
  108. bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py +52 -18
  109. bbot/test/test_step_2/module_tests/test_module_bucket_google.py +1 -1
  110. bbot/test/test_step_2/module_tests/{test_module_bucket_azure.py → test_module_bucket_microsoft.py} +7 -5
  111. bbot/test/test_step_2/module_tests/test_module_cloudcheck.py +19 -31
  112. bbot/test/test_step_2/module_tests/test_module_dnsbimi.py +2 -1
  113. bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py +3 -5
  114. bbot/test/test_step_2/module_tests/test_module_emailformat.py +1 -1
  115. bbot/test/test_step_2/module_tests/test_module_emails.py +2 -2
  116. bbot/test/test_step_2/module_tests/test_module_excavate.py +64 -5
  117. bbot/test/test_step_2/module_tests/test_module_extractous.py +13 -1
  118. bbot/test/test_step_2/module_tests/test_module_github_workflows.py +10 -1
  119. bbot/test/test_step_2/module_tests/test_module_gitlab_com.py +66 -0
  120. bbot/test/test_step_2/module_tests/{test_module_gitlab.py → test_module_gitlab_onprem.py} +4 -69
  121. bbot/test/test_step_2/module_tests/test_module_gowitness.py +5 -5
  122. bbot/test/test_step_2/module_tests/test_module_graphql_introspection.py +34 -0
  123. bbot/test/test_step_2/module_tests/test_module_iis_shortnames.py +46 -1
  124. bbot/test/test_step_2/module_tests/test_module_jadx.py +9 -0
  125. bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +71 -3
  126. bbot/test/test_step_2/module_tests/test_module_nuclei.py +8 -6
  127. bbot/test/test_step_2/module_tests/test_module_otx.py +3 -0
  128. bbot/test/test_step_2/module_tests/test_module_portfilter.py +2 -0
  129. bbot/test/test_step_2/module_tests/test_module_retirejs.py +161 -0
  130. bbot/test/test_step_2/module_tests/test_module_telerik.py +1 -1
  131. bbot/test/test_step_2/module_tests/test_module_trufflehog.py +10 -1
  132. bbot/test/test_step_2/module_tests/test_module_unarchive.py +9 -0
  133. {bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info}/METADATA +12 -9
  134. {bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info}/RECORD +137 -124
  135. {bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info}/WHEEL +1 -1
  136. {bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info/licenses}/LICENSE +98 -58
  137. bbot/modules/binaryedge.py +0 -42
  138. bbot/modules/censys.py +0 -98
  139. bbot/modules/gitlab.py +0 -141
  140. bbot/modules/zoomeye.py +0 -77
  141. bbot/test/test_step_2/module_tests/test_module_binaryedge.py +0 -33
  142. bbot/test/test_step_2/module_tests/test_module_censys.py +0 -83
  143. bbot/test/test_step_2/module_tests/test_module_zoomeye.py +0 -35
  144. {bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info}/entry_points.txt +0 -0
@@ -87,14 +87,17 @@ class ffuf_shortnames(ffuf):
87
87
  found_prefixes.add(prefix)
88
88
  return list(found_prefixes)
89
89
 
90
- async def setup(self):
91
- self.proxy = self.scan.web_config.get("http_proxy", "")
92
- self.canary = "".join(random.choice(string.ascii_lowercase) for i in range(10))
90
+ async def setup_deps(self):
93
91
  wordlist_extensions = self.config.get("wordlist_extensions", "")
94
92
  if not wordlist_extensions:
95
93
  wordlist_extensions = f"{self.helpers.wordlist_dir}/raft-small-extensions-lowercase_CLEANED.txt"
96
94
  self.debug(f"Using [{wordlist_extensions}] for shortname candidate extension list")
97
95
  self.wordlist_extensions = await self.helpers.wordlist(wordlist_extensions)
96
+ return True
97
+
98
+ async def setup(self):
99
+ self.proxy = self.scan.web_config.get("http_proxy", "")
100
+ self.canary = "".join(random.choice(string.ascii_lowercase) for i in range(10))
98
101
  self.ignore_redirects = self.config.get("ignore_redirects")
99
102
  self.max_predictions = self.config.get("max_predictions")
100
103
  self.find_subwords = self.config.get("find_subwords")
@@ -14,7 +14,7 @@ class filedownload(BaseModule):
14
14
 
15
15
  watched_events = ["URL_UNVERIFIED", "HTTP_RESPONSE"]
16
16
  produced_events = ["FILESYSTEM"]
17
- flags = ["active", "safe", "web-basic"]
17
+ flags = ["active", "safe", "web-basic", "download"]
18
18
  meta = {
19
19
  "description": "Download common filetypes such as PDF, DOCX, PPTX, etc.",
20
20
  "created_date": "2023-10-11",
@@ -94,6 +94,12 @@ class filedownload(BaseModule):
94
94
 
95
95
  scope_distance_modifier = 3
96
96
 
97
+ async def setup_deps(self):
98
+ self.mime_db_file = await self.helpers.wordlist(
99
+ "https://raw.githubusercontent.com/jshttp/mime-db/master/db.json"
100
+ )
101
+ return True
102
+
97
103
  async def setup(self):
98
104
  self.extensions = list({e.lower().strip(".") for e in self.config.get("extensions", [])})
99
105
  self.max_filesize = self.config.get("max_filesize", "10MB")
@@ -103,11 +109,8 @@ class filedownload(BaseModule):
103
109
  if output_dir:
104
110
  self.download_dir = Path(output_dir) / "filedownload"
105
111
  else:
106
- self.download_dir = self.helpers.temp_dir / "filedownload"
112
+ self.download_dir = self.scan.temp_dir / "filedownload"
107
113
  self.helpers.mkdir(self.download_dir)
108
- self.mime_db_file = await self.helpers.wordlist(
109
- "https://raw.githubusercontent.com/jshttp/mime-db/master/db.json"
110
- )
111
114
  self.mime_db = {}
112
115
  with open(self.mime_db_file) as f:
113
116
  mime_db = json.load(f)
bbot/modules/fullhunt.py CHANGED
@@ -22,7 +22,7 @@ class fullhunt(subdomain_enum_apikey):
22
22
 
23
23
  async def ping(self):
24
24
  url = f"{self.base_url}/auth/status"
25
- j = (await self.api_request(url)).json()
25
+ j = (await self.api_request(url, retry_on_http_429=False)).json()
26
26
  remaining = j["user_credits"]["remaining_credits"]
27
27
  assert remaining > 0, "No credits remaining"
28
28
 
bbot/modules/git_clone.py CHANGED
@@ -6,7 +6,7 @@ from bbot.modules.templates.github import github
6
6
  class git_clone(github):
7
7
  watched_events = ["CODE_REPOSITORY"]
8
8
  produced_events = ["FILESYSTEM"]
9
- flags = ["passive", "safe", "slow", "code-enum"]
9
+ flags = ["passive", "safe", "slow", "code-enum", "download"]
10
10
  meta = {
11
11
  "description": "Clone code github repositories",
12
12
  "created_date": "2024-03-08",
@@ -24,44 +24,69 @@ class git_clone(github):
24
24
 
25
25
  async def setup(self):
26
26
  output_folder = self.config.get("output_folder")
27
- if output_folder:
28
- self.output_dir = Path(output_folder) / "git_repos"
29
- else:
30
- self.output_dir = self.helpers.temp_dir / "git_repos"
27
+ self.output_dir = Path(output_folder) / "git_repos" if output_folder else self.scan.temp_dir / "git_repos"
31
28
  self.helpers.mkdir(self.output_dir)
32
29
  return await super().setup()
33
30
 
34
31
  async def filter_event(self, event):
35
- if event.type == "CODE_REPOSITORY":
36
- if "git" not in event.tags:
37
- return False, "event is not a git repository"
32
+ if event.type == "CODE_REPOSITORY" and "git" not in event.tags:
33
+ return False, "event is not a git repository"
38
34
  return True
39
35
 
40
36
  async def handle_event(self, event):
41
- repo_url = event.data.get("url")
42
- repo_path = await self.clone_git_repository(repo_url)
43
- if repo_path:
44
- self.verbose(f"Cloned {repo_url} to {repo_path}")
45
- codebase_event = self.make_event({"path": str(repo_path)}, "FILESYSTEM", tags=["git"], parent=event)
37
+ repository_url = event.data.get("url")
38
+ repository_path = await self.clone_git_repository(repository_url)
39
+ if repository_path:
40
+ self.verbose(f"Cloned {repository_url} to {repository_path}")
41
+ codebase_event = self.make_event({"path": str(repository_path)}, "FILESYSTEM", tags=["git"], parent=event)
46
42
  await self.emit_event(
47
43
  codebase_event,
48
- context=f"{{module}} downloaded git repo at {repo_url} to {{event.type}}: {repo_path}",
44
+ context=f"{{module}} cloned git repository at {repository_url} to {{event.type}}: {repository_path}",
49
45
  )
50
46
 
51
47
  async def clone_git_repository(self, repository_url):
52
48
  owner = repository_url.split("/")[-2]
53
49
  folder = self.output_dir / owner
54
50
  self.helpers.mkdir(folder)
55
- if self.api_key:
56
- url = repository_url.replace("https://github.com", f"https://user:{self.api_key}@github.com")
57
- else:
58
- url = repository_url
59
- command = ["git", "-C", folder, "clone", url]
51
+
52
+ command = ["git", "-C", folder, "clone", repository_url]
53
+ env = {"GIT_TERMINAL_PROMPT": "0"}
54
+
60
55
  try:
61
- output = await self.run_process(command, env={"GIT_TERMINAL_PROMPT": "0"}, check=True)
56
+ hostname = self.helpers.urlparse(repository_url).hostname
57
+ if hostname and self.api_key:
58
+ _, domain = self.helpers.split_domain(hostname)
59
+ # only use the api key if the domain is github.com
60
+ if domain == "github.com":
61
+ env["GIT_HELPER"] = (
62
+ f'!f() {{ case "$1" in get) '
63
+ f"echo username=x-access-token; "
64
+ f"echo password={self.api_key};; "
65
+ f'esac; }}; f "$@"'
66
+ )
67
+ command = (
68
+ command[:1]
69
+ + [
70
+ "-c",
71
+ "credential.helper=",
72
+ "-c",
73
+ "credential.useHttpPath=true",
74
+ "--config-env=credential.helper=GIT_HELPER",
75
+ ]
76
+ + command[1:]
77
+ )
78
+
79
+ output = await self.run_process(command, env=env, check=True)
62
80
  except CalledProcessError as e:
63
- self.debug(f"Error cloning {url}. STDERR: {repr(e.stderr)}")
81
+ self.debug(f"Error cloning {repository_url}. STDERR: {repr(e.stderr)}")
64
82
  return
65
83
 
66
84
  folder_name = output.stderr.split("Cloning into '")[1].split("'")[0]
67
- return folder / folder_name
85
+ repo_folder = folder / folder_name
86
+
87
+ # sanitize the repo
88
+ # this moves the git config, index file, and hooks folder out of the .git folder to prevent nasty things
89
+ # Note: the index file can be regenerated by running "git checkout HEAD -- ."
90
+ self.helpers.sanitize_git_repo(repo_folder)
91
+
92
+ return repo_folder
bbot/modules/gitdumper.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import asyncio
2
- import regex as re
3
2
  from pathlib import Path
4
3
  from subprocess import CalledProcessError
5
4
  from bbot.modules.base import BaseModule
@@ -8,7 +7,7 @@ from bbot.modules.base import BaseModule
8
7
  class gitdumper(BaseModule):
9
8
  watched_events = ["CODE_REPOSITORY"]
10
9
  produced_events = ["FILESYSTEM"]
11
- flags = ["passive", "safe", "slow", "code-enum"]
10
+ flags = ["passive", "safe", "slow", "code-enum", "download"]
12
11
  meta = {
13
12
  "description": "Download a leaked .git folder recursively or by fuzzing common names",
14
13
  "created_date": "2025-02-11",
@@ -33,9 +32,8 @@ class gitdumper(BaseModule):
33
32
  if output_folder:
34
33
  self.output_dir = Path(output_folder) / "git_repos"
35
34
  else:
36
- self.output_dir = self.helpers.temp_dir / "git_repos"
35
+ self.output_dir = self.scan.temp_dir / "git_repos"
37
36
  self.helpers.mkdir(self.output_dir)
38
- self.unsafe_regex = self.helpers.re.compile(r"^\s*fsmonitor|sshcommand|askpass|editor|pager", re.IGNORECASE)
39
37
  self.ref_regex = self.helpers.re.compile(r"ref: refs/heads/([a-zA-Z\d_-]+)")
40
38
  self.obj_regex = self.helpers.re.compile(r"[a-f0-9]{40}")
41
39
  self.pack_regex = self.helpers.re.compile(r"pack-([a-f0-9]{40})\.pack")
@@ -131,7 +129,6 @@ class gitdumper(BaseModule):
131
129
  else:
132
130
  result = await self.git_fuzz(repo_url, repo_folder)
133
131
  if result:
134
- await self.sanitize_config(repo_folder)
135
132
  await self.git_checkout(repo_folder)
136
133
  codebase_event = self.make_event({"path": str(repo_folder)}, "FILESYSTEM", tags=["git"], parent=event)
137
134
  await self.emit_event(
@@ -251,15 +248,6 @@ class gitdumper(BaseModule):
251
248
  self.debug(f"Unable to download git files to {folder}")
252
249
  return False
253
250
 
254
- async def sanitize_config(self, folder):
255
- config_file = folder / ".git/config"
256
- if config_file.exists():
257
- with config_file.open("r", encoding="utf-8", errors="ignore") as file:
258
- content = file.read()
259
- sanitized = await self.helpers.re.sub(self.unsafe_regex, r"# \g<0>", content)
260
- with config_file.open("w", encoding="utf-8") as file:
261
- file.write(sanitized)
262
-
263
251
  async def git_catfile(self, hash, option="-t", folder=Path()):
264
252
  command = ["git", "cat-file", option, hash]
265
253
  try:
@@ -270,8 +258,10 @@ class gitdumper(BaseModule):
270
258
  return output.stdout
271
259
 
272
260
  async def git_checkout(self, folder):
261
+ self.helpers.sanitize_git_repo(folder)
273
262
  self.verbose(f"Running git checkout to reconstruct the git repository at {folder}")
274
- command = ["git", "checkout", "."]
263
+ # we do "checkout head -- ." because the sanitization deletes the index file, and it needs to be reconstructed
264
+ command = ["git", "checkout", "HEAD", "--", "."]
275
265
  try:
276
266
  await self.run_process(command, env={"GIT_TERMINAL_PROMPT": "0"}, cwd=folder, check=True)
277
267
  except CalledProcessError as e:
@@ -8,11 +8,12 @@ from bbot.modules.templates.github import github
8
8
  class github_workflows(github):
9
9
  watched_events = ["CODE_REPOSITORY"]
10
10
  produced_events = ["FILESYSTEM"]
11
- flags = ["passive", "safe", "code-enum"]
11
+ flags = ["passive", "safe", "code-enum", "download"]
12
12
  meta = {
13
13
  "description": "Download a github repositories workflow logs and workflow artifacts",
14
14
  "created_date": "2024-04-29",
15
15
  "author": "@domwhewell-sage",
16
+ "auth_required": True,
16
17
  }
17
18
  options = {"api_key": "", "num_logs": 1, "output_folder": ""}
18
19
  options_desc = {
@@ -152,7 +153,7 @@ class github_workflows(github):
152
153
  filename = f"run_{run_id}.zip"
153
154
  file_destination = folder / filename
154
155
  try:
155
- await self.helpers.download(
156
+ await self.api_download(
156
157
  f"{self.base_url}/repos/{owner}/{repo}/actions/runs/{run_id}/logs",
157
158
  filename=file_destination,
158
159
  headers=self.headers,
@@ -166,7 +167,7 @@ class github_workflows(github):
166
167
  status_code = getattr(response, "status_code", 0)
167
168
  if status_code == 403:
168
169
  self.warning(
169
- f"The current access key does not have access to workflow {owner}/{repo}/{run_id} (status: {status_code})"
170
+ f"The current access key does not have access to workflow {owner}/{repo}/{run_id}, The API key must have the 'repo' scope or read 'Actions' repository permissions (status: {status_code})"
170
171
  )
171
172
  else:
172
173
  self.info(
@@ -212,7 +213,7 @@ class github_workflows(github):
212
213
  self.helpers.mkdir(folder)
213
214
  file_destination = folder / artifact_name
214
215
  try:
215
- await self.helpers.download(
216
+ await self.api_download(
216
217
  f"{self.base_url}/repos/{owner}/{repo}/actions/artifacts/{artifact_id}/zip",
217
218
  filename=file_destination,
218
219
  headers=self.headers,
@@ -228,6 +229,6 @@ class github_workflows(github):
228
229
  status_code = getattr(response, "status_code", 0)
229
230
  if status_code == 403:
230
231
  self.warning(
231
- f"The current access key does not have access to workflow artifacts {owner}/{repo}/{artifact_id} (status: {status_code})"
232
+ f"The current access key does not have access to workflow artifacts {owner}/{repo}/{artifact_id}, The API key must have the 'repo' scope or read 'Actions' repository permissions (status: {status_code})"
232
233
  )
233
234
  return file_destination
@@ -0,0 +1,31 @@
1
+ from bbot.modules.templates.gitlab import GitLabBaseModule
2
+
3
+
4
+ class gitlab_com(GitLabBaseModule):
5
+ watched_events = ["SOCIAL"]
6
+ produced_events = [
7
+ "CODE_REPOSITORY",
8
+ ]
9
+ flags = ["active", "safe", "code-enum"]
10
+ meta = {
11
+ "description": "Enumerate GitLab SaaS (gitlab.com/org) for projects and groups",
12
+ "created_date": "2024-03-11",
13
+ "author": "@TheTechromancer",
14
+ }
15
+
16
+ options = {"api_key": ""}
17
+ options_desc = {"api_key": "GitLab access token (for gitlab.com/org only)"}
18
+
19
+ # This is needed because we are consuming SOCIAL events, which aren't in scope
20
+ scope_distance_modifier = 2
21
+
22
+ async def handle_event(self, event):
23
+ await self.handle_social(event)
24
+
25
+ async def filter_event(self, event):
26
+ if event.data["platform"] != "gitlab":
27
+ return False, "platform is not gitlab"
28
+ _, domain = self.helpers.split_domain(event.host)
29
+ if domain not in self.saas_domains:
30
+ return False, "gitlab instance is not gitlab.com/org"
31
+ return True
@@ -0,0 +1,84 @@
1
+ from bbot.modules.templates.gitlab import GitLabBaseModule
2
+
3
+
4
+ class gitlab_onprem(GitLabBaseModule):
5
+ watched_events = ["HTTP_RESPONSE", "TECHNOLOGY", "SOCIAL"]
6
+ produced_events = [
7
+ "TECHNOLOGY",
8
+ "SOCIAL",
9
+ "CODE_REPOSITORY",
10
+ "FINDING",
11
+ ]
12
+ flags = ["active", "safe", "code-enum"]
13
+ meta = {
14
+ "description": "Detect self-hosted GitLab instances and query them for repositories",
15
+ "created_date": "2024-03-11",
16
+ "author": "@TheTechromancer",
17
+ }
18
+
19
+ # Optional GitLab access token (only required for gitlab.com, but still
20
+ # supported for on-prem installations that expose private projects).
21
+ options = {"api_key": ""}
22
+ options_desc = {"api_key": "GitLab access token (for self-hosted instances only)"}
23
+
24
+ # Allow accepting events slightly beyond configured max distance so we can
25
+ # discover repos on neighbouring infrastructure.
26
+ scope_distance_modifier = 2
27
+
28
+ async def handle_event(self, event):
29
+ if event.type == "HTTP_RESPONSE":
30
+ await self.handle_http_response(event)
31
+ elif event.type == "TECHNOLOGY":
32
+ await self.handle_technology(event)
33
+ elif event.type == "SOCIAL":
34
+ await self.handle_social(event)
35
+
36
+ async def filter_event(self, event):
37
+ # only accept out-of-scope SOCIAL events
38
+ if event.type == "HTTP_RESPONSE":
39
+ if event.scope_distance > self.scan.scope_search_distance:
40
+ return False, "event is out of scope distance"
41
+ elif event.type == "TECHNOLOGY":
42
+ if not event.data["technology"].lower().startswith("gitlab"):
43
+ return False, "technology is not gitlab"
44
+ if not self.helpers.is_ip(event.host) and self.helpers.tldextract(event.host).domain == "gitlab":
45
+ return False, "gitlab instance is not self-hosted"
46
+ elif event.type == "SOCIAL":
47
+ if event.data["platform"] != "gitlab":
48
+ return False, "platform is not gitlab"
49
+ _, domain = self.helpers.split_domain(event.host)
50
+ if domain in self.saas_domains:
51
+ return False, "gitlab instance is not self-hosted"
52
+ return True
53
+
54
+ async def handle_http_response(self, event):
55
+ """Identify GitLab servers from HTTP responses."""
56
+ headers = event.data.get("header", {})
57
+ if "x_gitlab_meta" in headers:
58
+ url = event.parsed_url._replace(path="/").geturl()
59
+ await self.emit_event(
60
+ {"host": str(event.host), "technology": "GitLab", "url": url},
61
+ "TECHNOLOGY",
62
+ parent=event,
63
+ context=f"{{module}} detected {{event.type}}: GitLab at {url}",
64
+ )
65
+ description = f"GitLab server at {event.host}"
66
+ await self.emit_event(
67
+ {"host": str(event.host), "description": description},
68
+ "FINDING",
69
+ parent=event,
70
+ context=f"{{module}} detected {{event.type}}: {description}",
71
+ )
72
+
73
+ async def handle_technology(self, event):
74
+ """Enumerate projects & groups once we know a host is GitLab."""
75
+ base_url = self.get_base_url(event)
76
+
77
+ # Projects owned by the authenticated user (or public projects if no
78
+ # authentication).
79
+ projects_url = self.helpers.urljoin(base_url, "api/v4/projects?simple=true")
80
+ await self.handle_projects_url(projects_url, event)
81
+
82
+ # Group enumeration.
83
+ groups_url = self.helpers.urljoin(base_url, "api/v4/groups?simple=true")
84
+ await self.handle_groups_url(groups_url, event)
bbot/modules/gowitness.py CHANGED
@@ -2,6 +2,7 @@ import os
2
2
  import asyncio
3
3
  import aiosqlite
4
4
  import multiprocessing
5
+ import platform
5
6
  from pathlib import Path
6
7
  from contextlib import suppress
7
8
  from shutil import copyfile, copymode
@@ -15,7 +16,7 @@ class gowitness(BaseModule):
15
16
  flags = ["active", "safe", "web-screenshots"]
16
17
  meta = {"description": "Take screenshots of webpages", "created_date": "2022-07-08", "author": "@TheTechromancer"}
17
18
  options = {
18
- "version": "2.4.2",
19
+ "version": "3.0.5",
19
20
  "threads": 0,
20
21
  "timeout": 10,
21
22
  "resolution_x": 1440,
@@ -23,6 +24,7 @@ class gowitness(BaseModule):
23
24
  "output_path": "",
24
25
  "social": False,
25
26
  "idle_timeout": 1800,
27
+ "chrome_path": "",
26
28
  }
27
29
  options_desc = {
28
30
  "version": "Gowitness version",
@@ -33,6 +35,7 @@ class gowitness(BaseModule):
33
35
  "output_path": "Where to save screenshots",
34
36
  "social": "Whether to screenshot social media webpages",
35
37
  "idle_timeout": "Skip the current gowitness batch if it stalls for longer than this many seconds",
38
+ "chrome_path": "Path to chrome executable",
36
39
  }
37
40
  deps_common = ["chromium"]
38
41
  deps_pip = ["aiosqlite"]
@@ -67,29 +70,54 @@ class gowitness(BaseModule):
67
70
  self.base_path = Path(output_path) / "gowitness"
68
71
  else:
69
72
  self.base_path = self.scan.home / "gowitness"
73
+
70
74
  self.chrome_path = None
71
- custom_chrome_path = self.helpers.tools_dir / "chrome-linux" / "chrome"
72
- if custom_chrome_path.is_file():
73
- self.chrome_path = custom_chrome_path
75
+ config_chrome_path = self.config.get("chrome_path")
76
+ if config_chrome_path:
77
+ config_chrome_path = Path(config_chrome_path)
78
+ if not config_chrome_path.is_file():
79
+ return False, f"Could not find custom Chrome path at {config_chrome_path}"
80
+ self.chrome_path = config_chrome_path
81
+ else:
82
+ if platform.system() == "Darwin":
83
+ bbot_chrome_path = (
84
+ self.helpers.tools_dir / "chrome-mac" / "Chromium.app" / "Contents" / "MacOS" / "Chromium"
85
+ )
86
+ else:
87
+ bbot_chrome_path = self.helpers.tools_dir / "chrome-linux" / "chrome"
88
+ if bbot_chrome_path.is_file():
89
+ self.chrome_path = bbot_chrome_path
90
+
91
+ # make sure our chrome path works
92
+ chrome_test_pass = False
93
+ if self.chrome_path and self.chrome_path.is_file():
94
+ chrome_test_proc = await self.run_process([str(self.chrome_path), "--version"])
95
+ if getattr(chrome_test_proc, "returncode", 1) == 0:
96
+ self.verbose(f"Found chrome executable at {self.chrome_path}")
97
+ chrome_test_pass = True
98
+
99
+ if not chrome_test_pass:
100
+ # last resort - try to find a working chrome install
101
+ for binary in ("Google Chrome", "chrome", "chromium", "chromium-browser"):
102
+ binary_path = self.helpers.which(binary)
103
+ if binary_path and Path(binary_path).is_file():
104
+ chrome_test_proc = await self.run_process([str(binary_path), "--version"])
105
+ if getattr(chrome_test_proc, "returncode", 1) == 0:
106
+ self.verbose(f"Found chrome executable at {binary_path}")
107
+ chrome_test_pass = True
108
+ break
109
+
110
+ if not chrome_test_pass:
111
+ return (
112
+ False,
113
+ "Failed to set up Google chrome. Please install manually and set `chrome_path`, or try again with --force-deps.",
114
+ )
74
115
 
75
116
  # fix ubuntu-specific sandbox bug
76
117
  chrome_devel_sandbox = self.helpers.tools_dir / "chrome-linux" / "chrome_sandbox"
77
118
  if chrome_devel_sandbox.is_file():
78
119
  os.environ["CHROME_DEVEL_SANDBOX"] = str(chrome_devel_sandbox)
79
120
 
80
- # make sure we have a working chrome install
81
- chrome_test_pass = False
82
- for binary in ("chrome", "chromium", "chromium-browser", custom_chrome_path):
83
- binary_path = self.helpers.which(binary)
84
- if binary_path and Path(binary_path).is_file():
85
- chrome_test_proc = await self.run_process([binary_path, "--version"])
86
- if getattr(chrome_test_proc, "returncode", 1) == 0:
87
- self.verbose(f"Found chrome executable at {binary_path}")
88
- chrome_test_pass = True
89
- break
90
- if not chrome_test_pass:
91
- return False, "Failed to set up Google chrome. Please install manually or try again with --force-deps."
92
-
93
121
  self.db_path = self.base_path / "gowitness.sqlite3"
94
122
  self.screenshot_path = self.base_path / "screenshots"
95
123
  self.command = self.construct_command()
@@ -146,6 +174,7 @@ class gowitness(BaseModule):
146
174
  new_screenshots = await self.get_new_screenshots()
147
175
  for filename, screenshot in new_screenshots.items():
148
176
  url = screenshot["url"]
177
+ url = self.helpers.clean_url(url).geturl()
149
178
  final_url = screenshot["final_url"]
150
179
  filename = self.screenshot_path / screenshot["filename"]
151
180
  filename = filename.relative_to(self.scan.home)
@@ -163,11 +192,11 @@ class gowitness(BaseModule):
163
192
  # emit URLs
164
193
  new_network_logs = await self.get_new_network_logs()
165
194
  for url, row in new_network_logs.items():
166
- ip = row["ip"]
195
+ ip = row["remote_ip"]
167
196
  status_code = row["status_code"]
168
197
  tags = [f"status-{status_code}", f"ip-{ip}", "spider-danger"]
169
198
 
170
- _id = row["url_id"]
199
+ _id = row["result_id"]
171
200
  parent_url = self.screenshots_taken[_id]
172
201
  parent_event = event_dict[parent_url]
173
202
  if url and url.startswith("http"):
@@ -182,7 +211,7 @@ class gowitness(BaseModule):
182
211
  # emit technologies
183
212
  new_technologies = await self.get_new_technologies()
184
213
  for row in new_technologies.values():
185
- parent_id = row["url_id"]
214
+ parent_id = row["result_id"]
186
215
  parent_url = self.screenshots_taken[parent_id]
187
216
  parent_event = event_dict[parent_url]
188
217
  technology = row["value"]
@@ -196,28 +225,29 @@ class gowitness(BaseModule):
196
225
 
197
226
  def construct_command(self):
198
227
  # base executable
199
- command = ["gowitness"]
228
+ command = ["gowitness", "scan"]
200
229
  # chrome path
201
230
  if self.chrome_path is not None:
202
231
  command += ["--chrome-path", str(self.chrome_path)]
203
232
  # db path
204
- command += ["--db-path", str(self.db_path)]
233
+ command += ["--write-db"]
234
+ command += ["--write-db-uri", f"sqlite://{self.db_path}"]
205
235
  # screenshot path
206
236
  command += ["--screenshot-path", str(self.screenshot_path)]
207
237
  # user agent
208
- command += ["--user-agent", f"{self.scan.useragent}"]
238
+ command += ["--chrome-user-agent", f"{self.scan.useragent}"]
209
239
  # proxy
210
240
  if self.proxy:
211
- command += ["--proxy", str(self.proxy)]
241
+ command += ["--chrome-proxy", str(self.proxy)]
212
242
  # resolution
213
- command += ["--resolution-x", str(self.resolution_x)]
214
- command += ["--resolution-y", str(self.resolution_y)]
215
- # input
216
- command += ["file", "-f", "-"]
243
+ command += ["--chrome-window-x", str(self.resolution_x)]
244
+ command += ["--chrome-window-y", str(self.resolution_y)]
217
245
  # threads
218
246
  command += ["--threads", str(self.threads)]
219
247
  # timeout
220
248
  command += ["--timeout", str(self.timeout)]
249
+ # input
250
+ command += ["file", "-f", "-"]
221
251
  return command
222
252
 
223
253
  async def get_new_screenshots(self):
@@ -226,7 +256,7 @@ class gowitness(BaseModule):
226
256
  async with aiosqlite.connect(str(self.db_path)) as con:
227
257
  con.row_factory = aiosqlite.Row
228
258
  con.text_factory = self.helpers.smart_decode
229
- async with con.execute("SELECT * FROM urls") as cur:
259
+ async with con.execute("SELECT * FROM results") as cur:
230
260
  async for row in cur:
231
261
  row = dict(row)
232
262
  _id = row["id"]
@@ -243,7 +273,7 @@ class gowitness(BaseModule):
243
273
  async with con.execute("SELECT * FROM network_logs") as cur:
244
274
  async for row in cur:
245
275
  row = dict(row)
246
- url = row["final_url"]
276
+ url = row["url"]
247
277
  if url not in self.connections_logged:
248
278
  self.connections_logged.add(url)
249
279
  network_logs[url] = row