bbot 2.1.2.5158rc0__py3-none-any.whl → 2.1.2.5171rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbot might be problematic. Click here for more details.

bbot/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # version placeholder (replaced by poetry-dynamic-versioning)
2
- __version__ = "v2.1.2.5158rc"
2
+ __version__ = "v2.1.2.5171rc"
3
3
 
4
4
  from .scanner import Scanner, Preset
bbot/core/event/base.py CHANGED
@@ -503,12 +503,13 @@ class BaseEvent:
503
503
  for t in list(self.tags):
504
504
  if t.startswith("distance-"):
505
505
  self.remove_tag(t)
506
- if scope_distance == 0:
507
- self.add_tag("in-scope")
508
- self.remove_tag("affiliate")
509
- else:
510
- self.remove_tag("in-scope")
511
- self.add_tag(f"distance-{new_scope_distance}")
506
+ if self.host:
507
+ if scope_distance == 0:
508
+ self.add_tag("in-scope")
509
+ self.remove_tag("affiliate")
510
+ else:
511
+ self.remove_tag("in-scope")
512
+ self.add_tag(f"distance-{new_scope_distance}")
512
513
  self._scope_distance = new_scope_distance
513
514
  # apply recursively to parent events
514
515
  parent_scope_distance = getattr(self.parent, "scope_distance", None)
@@ -1018,20 +1019,21 @@ class ClosestHostEvent(DictHostEvent):
1018
1019
  class DictPathEvent(DictEvent):
1019
1020
  def sanitize_data(self, data):
1020
1021
  new_data = dict(data)
1022
+ new_data["path"] = str(new_data["path"])
1021
1023
  file_blobs = getattr(self.scan, "_file_blobs", False)
1022
1024
  folder_blobs = getattr(self.scan, "_folder_blobs", False)
1023
1025
  blob = None
1024
1026
  try:
1025
- data_path = Path(data["path"])
1026
- if data_path.is_file():
1027
+ self._data_path = Path(data["path"])
1028
+ if self._data_path.is_file():
1027
1029
  self.add_tag("file")
1028
1030
  if file_blobs:
1029
- with open(data_path, "rb") as file:
1031
+ with open(self._data_path, "rb") as file:
1030
1032
  blob = file.read()
1031
- elif data_path.is_dir():
1033
+ elif self._data_path.is_dir():
1032
1034
  self.add_tag("folder")
1033
1035
  if folder_blobs:
1034
- blob = self._tar_directory(data_path)
1036
+ blob = self._tar_directory(self._data_path)
1035
1037
  except KeyError:
1036
1038
  pass
1037
1039
  if blob:
@@ -1540,7 +1542,23 @@ class WAF(DictHostEvent):
1540
1542
 
1541
1543
 
1542
1544
  class FILESYSTEM(DictPathEvent):
1543
- pass
1545
+ def __init__(self, *args, **kwargs):
1546
+ super().__init__(*args, **kwargs)
1547
+ if self._data_path.is_file():
1548
+ # detect type of file content using magic
1549
+ from bbot.core.helpers.libmagic import get_magic_info, get_compression
1550
+
1551
+ extension, mime_type, description, confidence = get_magic_info(self.data["path"])
1552
+ self.data["magic_extension"] = extension
1553
+ self.data["magic_mime_type"] = mime_type
1554
+ self.data["magic_description"] = description
1555
+ self.data["magic_confidence"] = confidence
1556
+ # detection compression
1557
+ compression = get_compression(mime_type)
1558
+ if compression:
1559
+ self.add_tag("compressed")
1560
+ self.add_tag(f"{compression}-archive")
1561
+ self.data["compression"] = compression
1544
1562
 
1545
1563
 
1546
1564
  class RAW_DNS_RECORD(DictHostEvent, DnsEvent):
@@ -0,0 +1,68 @@
1
+ import puremagic
2
+
3
+
4
+ def get_magic_info(file):
5
+
6
+ magic_detections = puremagic.magic_file(file)
7
+ if magic_detections:
8
+ magic_detections.sort(key=lambda x: x.confidence, reverse=True)
9
+ detection = magic_detections[0]
10
+ return detection.extension, detection.mime_type, detection.name, detection.confidence
11
+ return "", "", "", 0
12
+
13
+
14
+ def get_compression(mime_type):
15
+ mime_type = mime_type.lower()
16
+ # from https://github.com/cdgriffith/puremagic/blob/master/puremagic/magic_data.json
17
+ compression_map = {
18
+ "application/gzip": "gzip", # Gzip compressed file
19
+ "application/zip": "zip", # Zip archive
20
+ "application/x-bzip2": "bzip2", # Bzip2 compressed file
21
+ "application/x-xz": "xz", # XZ compressed file
22
+ "application/x-7z-compressed": "7z", # 7-Zip archive
23
+ "application/vnd.rar": "rar", # RAR archive
24
+ "application/x-lzma": "lzma", # LZMA compressed file
25
+ "application/x-compress": "compress", # Unix compress file
26
+ "application/zstd": "zstd", # Zstandard compressed file
27
+ "application/x-lz4": "lz4", # LZ4 compressed file
28
+ "application/x-tar": "tar", # Tar archive
29
+ "application/x-zip-compressed-fb2": "zip", # Zip archive (FB2)
30
+ "application/epub+zip": "zip", # EPUB book (Zip archive)
31
+ "application/pak": "pak", # PAK archive
32
+ "application/x-lha": "lha", # LHA archive
33
+ "application/arj": "arj", # ARJ archive
34
+ "application/vnd.ms-cab-compressed": "cab", # Microsoft Cabinet archive
35
+ "application/x-sit": "sit", # StuffIt archive
36
+ "application/binhex": "binhex", # BinHex encoded file
37
+ "application/x-lrzip": "lrzip", # Long Range ZIP
38
+ "application/x-alz": "alz", # ALZip archive
39
+ "application/x-tgz": "tgz", # Gzip compressed Tar archive
40
+ "application/x-gzip": "gzip", # Gzip compressed file
41
+ "application/x-lzip": "lzip", # Lzip compressed file
42
+ "application/x-zstd-compressed-tar": "zstd", # Zstandard compressed Tar archive
43
+ "application/x-lz4-compressed-tar": "lz4", # LZ4 compressed Tar archive
44
+ "application/vnd.comicbook+zip": "zip", # Comic book archive (Zip)
45
+ "application/vnd.palm": "palm", # Palm OS data
46
+ "application/fictionbook2+zip": "zip", # FictionBook 2.0 (Zip)
47
+ "application/fictionbook3+zip": "zip", # FictionBook 3.0 (Zip)
48
+ "application/x-cpio": "cpio", # CPIO archive
49
+ "application/x-java-pack200": "pack200", # Java Pack200 archive
50
+ "application/x-par2": "par2", # PAR2 recovery file
51
+ "application/x-rar-compressed": "rar", # RAR archive
52
+ "application/java-archive": "zip", # Java Archive (JAR)
53
+ "application/x-webarchive": "zip", # Web archive (Zip)
54
+ "application/vnd.android.package-archive": "zip", # Android package (APK)
55
+ "application/x-itunes-ipa": "zip", # iOS application archive (IPA)
56
+ "application/x-stuffit": "sit", # StuffIt archive
57
+ "application/x-archive": "ar", # Unix archive
58
+ "application/x-qpress": "qpress", # Qpress archive
59
+ "application/x-xar": "xar", # XAR archive
60
+ "application/x-ace": "ace", # ACE archive
61
+ "application/x-zoo": "zoo", # Zoo archive
62
+ "application/x-arc": "arc", # ARC archive
63
+ "application/x-zstd-compressed-tar": "zstd", # Zstandard compressed Tar archive
64
+ "application/x-lz4-compressed-tar": "lz4", # LZ4 compressed Tar archive
65
+ "application/vnd.comicbook-rar": "rar", # Comic book archive (RAR)
66
+ }
67
+
68
+ return compression_map.get(mime_type, "")
bbot/core/shared_deps.py CHANGED
@@ -81,7 +81,7 @@ DEP_CHROMIUM = [
81
81
  {
82
82
  "name": "Install Chromium dependencies (Debian)",
83
83
  "package": {
84
- "name": "libasound2,libatk-bridge2.0-0,libatk1.0-0,libcairo2,libcups2,libdrm2,libgbm1,libnss3,libpango-1.0-0,libxcomposite1,libxdamage1,libxfixes3,libxkbcommon0,libxrandr2",
84
+ "name": "libasound2,libatk-bridge2.0-0,libatk1.0-0,libcairo2,libcups2,libdrm2,libgbm1,libnss3,libpango-1.0-0,libglib2.0-0,libxcomposite1,libxdamage1,libxfixes3,libxkbcommon0,libxrandr2",
85
85
  "state": "present",
86
86
  },
87
87
  "become": True,
@@ -1,9 +1,9 @@
1
- import os
1
+ from extractous import Extractor
2
2
 
3
3
  from bbot.modules.base import BaseModule
4
4
 
5
5
 
6
- class unstructured(BaseModule):
6
+ class extractous(BaseModule):
7
7
  watched_events = ["FILESYSTEM"]
8
8
  produced_events = ["RAW_TEXT"]
9
9
  flags = ["passive", "safe"]
@@ -63,15 +63,11 @@ class unstructured(BaseModule):
63
63
  "extensions": "File extensions to parse",
64
64
  }
65
65
 
66
- deps_apt = ["libmagic-dev", "poppler-utils", "tesseract-ocr", "libreoffice", "pandoc"]
67
- deps_pip = ["unstructured[all-docs]>=0.15.7,<1.0", "nltk>=3.9.0,<4.0"]
68
-
66
+ deps_pip = ["extractous"]
69
67
  scope_distance_modifier = 1
70
68
 
71
69
  async def setup(self):
72
70
  self.extensions = list(set([e.lower().strip(".") for e in self.config.get("extensions", [])]))
73
- # Do not send user statistics to the unstructured library
74
- os.environ["SCARF_NO_ANALYTICS"] = "true"
75
71
  return True
76
72
 
77
73
  async def filter_event(self, event):
@@ -94,22 +90,16 @@ class unstructured(BaseModule):
94
90
  )
95
91
  await self.emit_event(raw_text_event)
96
92
 
97
- async def finish(self):
98
- del os.environ["SCARF_NO_ANALYTICS"]
99
- return
100
-
101
93
 
102
94
  def extract_text(file_path):
103
95
  """
104
- extract_text Extracts plaintext from a document path using unstructured.
96
+ extract_text Extracts plaintext from a document path using extractous.
105
97
 
106
98
  :param file_path: The path of the file to extract text from.
107
99
  :return: ASCII-encoded plaintext extracted from the document.
108
100
  """
109
101
 
110
- from unstructured.partition.auto import partition
111
-
112
- unstructured_file_types = [
102
+ extractable_file_types = [
113
103
  ".csv",
114
104
  ".eml",
115
105
  ".msg",
@@ -135,12 +125,21 @@ def extract_text(file_path):
135
125
  ".xml",
136
126
  ]
137
127
 
138
- # If the file can be extracted with unstructured use its partition function or try and read it
139
- if any(file_path.lower().endswith(file_type) for file_type in unstructured_file_types):
128
+ # If the file can be extracted with extractous use its partition function or try and read it
129
+ if any(file_path.lower().endswith(file_type) for file_type in extractable_file_types):
140
130
  try:
141
- elements = partition(filename=file_path)
142
- return "\n\n".join(element.text for element in elements)
143
- except ValueError:
131
+ extractor = Extractor()
132
+ reader = extractor.extract_file(str(file_path))
133
+
134
+ result = ""
135
+ buffer = reader.read(4096)
136
+ while len(buffer) > 0:
137
+ result += buffer.decode("utf-8")
138
+ buffer = reader.read(4096)
139
+
140
+ return result.strip()
141
+
142
+ except Exception:
144
143
  with open(file_path, "rb") as file:
145
144
  return file.read().decode("utf-8", errors="ignore")
146
145
  else:
@@ -376,6 +376,9 @@ class Preset:
376
376
  # misc
377
377
  self.force_start = self.force_start | other.force_start
378
378
  self._cli = self._cli | other._cli
379
+ # transfer args
380
+ if other._args is not None:
381
+ self._args = other._args
379
382
 
380
383
  def bake(self, scan=None):
381
384
  """
@@ -925,3 +925,44 @@ def test_event_closest_host():
925
925
  vuln = scan.make_event(
926
926
  {"path": "/tmp/asdf.txt", "description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3
927
927
  )
928
+
929
+
930
+ def test_event_magic():
931
+ from bbot.core.helpers.libmagic import get_magic_info, get_compression
932
+
933
+ import base64
934
+
935
+ zip_base64 = "UEsDBAoDAAAAAOMmZ1lR4FaHBQAAAAUAAAAIAAAAYXNkZi50eHRhc2RmClBLAQI/AwoDAAAAAOMmZ1lR4FaHBQAAAAUAAAAIACQAAAAAAAAAIICkgQAAAABhc2RmLnR4dAoAIAAAAAAAAQAYAICi2B77MNsBgKLYHvsw2wGAotge+zDbAVBLBQYAAAAAAQABAFoAAAArAAAAAAA="
936
+ zip_bytes = base64.b64decode(zip_base64)
937
+ zip_file = Path("/tmp/.bbottestzipasdkfjalsdf.zip")
938
+ with open(zip_file, "wb") as f:
939
+ f.write(zip_bytes)
940
+
941
+ # test magic helpers
942
+ extension, mime_type, description, confidence = get_magic_info(zip_file)
943
+ assert extension == ".zip"
944
+ assert mime_type == "application/zip"
945
+ assert description == "PKZIP Archive file"
946
+ assert confidence > 0
947
+ assert get_compression(mime_type) == "zip"
948
+
949
+ # test filesystem event - file
950
+ scan = Scanner()
951
+ event = scan.make_event({"path": zip_file}, "FILESYSTEM", parent=scan.root_event)
952
+ assert event.data == {
953
+ "path": "/tmp/.bbottestzipasdkfjalsdf.zip",
954
+ "magic_extension": ".zip",
955
+ "magic_mime_type": "application/zip",
956
+ "magic_description": "PKZIP Archive file",
957
+ "magic_confidence": 0.9,
958
+ "compression": "zip",
959
+ }
960
+ assert event.tags == {"file", "zip-archive", "compressed"}
961
+
962
+ # test filesystem event - folder
963
+ scan = Scanner()
964
+ event = scan.make_event({"path": "/tmp"}, "FILESYSTEM", parent=scan.root_event)
965
+ assert event.data == {"path": "/tmp"}
966
+ assert event.tags == {"folder"}
967
+
968
+ zip_file.unlink()
@@ -894,7 +894,7 @@ class TestExcavateHeaders(ModuleTestBase):
894
894
 
895
895
  class TestExcavateRAWTEXT(ModuleTestBase):
896
896
  targets = ["http://127.0.0.1:8888/", "test.notreal"]
897
- modules_overrides = ["excavate", "httpx", "filedownload", "unstructured"]
897
+ modules_overrides = ["excavate", "httpx", "filedownload", "extractous"]
898
898
  config_overrides = {"scope": {"report_distance": 1}, "web": {"spider_distance": 2, "spider_depth": 2}}
899
899
 
900
900
  pdf_data = r"""%PDF-1.3
@@ -965,7 +965,7 @@ trailer
965
965
  startxref
966
966
  1669
967
967
  %%EOF"""
968
- unstructured_response = """This is an email example@blacklanternsecurity.notreal
968
+ extractous_response = """This is an email example@blacklanternsecurity.notreal
969
969
 
970
970
  An example JWT eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c
971
971
 
@@ -995,13 +995,13 @@ A href <a href='/donot_detect.js'>Click me</a>"""
995
995
  raw_text_events = [e for e in events if e.type == "RAW_TEXT"]
996
996
  assert 1 == len(raw_text_events), "Failed to emit RAW_TEXT event"
997
997
  assert (
998
- raw_text_events[0].data == self.unstructured_response
998
+ raw_text_events[0].data == self.extractous_response
999
999
  ), f"Text extracted from PDF is incorrect, got {raw_text_events[0].data}"
1000
1000
  email_events = [e for e in events if e.type == "EMAIL_ADDRESS"]
1001
1001
  assert 1 == len(email_events), "Failed to emit EMAIL_ADDRESS event"
1002
1002
  assert (
1003
1003
  email_events[0].data == "example@blacklanternsecurity.notreal"
1004
- ), f"Email extracted from unstructured text is incorrect, got {email_events[0].data}"
1004
+ ), f"Email extracted from extractous text is incorrect, got {email_events[0].data}"
1005
1005
  finding_events = [e for e in events if e.type == "FINDING"]
1006
1006
  assert 2 == len(finding_events), "Failed to emit FINDING events"
1007
1007
  assert any(
@@ -1026,7 +1026,7 @@ A href <a href='/donot_detect.js'>Click me</a>"""
1026
1026
  url_events = [e.data for e in events if e.type == "URL_UNVERIFIED"]
1027
1027
  assert (
1028
1028
  "https://www.test.notreal/about" in url_events
1029
- ), f"URL extracted from unstructured text is incorrect, got {url_events}"
1029
+ ), f"URL extracted from extractous text is incorrect, got {url_events}"
1030
1030
  assert (
1031
1031
  "/donot_detect.js" not in url_events
1032
- ), f"URL extracted from unstructured text is incorrect, got {url_events}"
1032
+ ), f"URL extracted from extractous text is incorrect, got {url_events}"
@@ -0,0 +1,54 @@
1
+ import base64
2
+ from pathlib import Path
3
+ from .base import ModuleTestBase
4
+
5
+
6
+ class TestExtractous(ModuleTestBase):
7
+ targets = ["http://127.0.0.1:8888"]
8
+ modules_overrides = ["extractous", "filedownload", "httpx", "excavate", "speculate"]
9
+ config_overrides = {"web": {"spider_distance": 2, "spider_depth": 2}}
10
+
11
+ pdf_data = base64.b64decode(
12
+ "JVBERi0xLjMKJe+/ve+/ve+/ve+/vSBSZXBvcnRMYWIgR2VuZXJhdGVkIFBERiBkb2N1bWVudCBodHRwOi8vd3d3LnJlcG9ydGxhYi5jb20KMSAwIG9iago8PAovRjEgMiAwIFIKPj4KZW5kb2JqCjIgMCBvYmoKPDwKL0Jhc2VGb250IC9IZWx2ZXRpY2EgL0VuY29kaW5nIC9XaW5BbnNpRW5jb2RpbmcgL05hbWUgL0YxIC9TdWJ0eXBlIC9UeXBlMSAvVHlwZSAvRm9udAo+PgplbmRvYmoKMyAwIG9iago8PAovQ29udGVudHMgNyAwIFIgL01lZGlhQm94IFsgMCAwIDU5NS4yNzU2IDg0MS44ODk4IF0gL1BhcmVudCA2IDAgUiAvUmVzb3VyY2VzIDw8Ci9Gb250IDEgMCBSIC9Qcm9jU2V0IFsgL1BERiAvVGV4dCAvSW1hZ2VCIC9JbWFnZUMgL0ltYWdlSSBdCj4+IC9Sb3RhdGUgMCAvVHJhbnMgPDwKCj4+IAogIC9UeXBlIC9QYWdlCj4+CmVuZG9iago0IDAgb2JqCjw8Ci9QYWdlTW9kZSAvVXNlTm9uZSAvUGFnZXMgNiAwIFIgL1R5cGUgL0NhdGFsb2cKPj4KZW5kb2JqCjUgMCBvYmoKPDwKL0F1dGhvciAoYW5vbnltb3VzKSAvQ3JlYXRpb25EYXRlIChEOjIwMjQwNjAzMTg1ODE2KzAwJzAwJykgL0NyZWF0b3IgKFJlcG9ydExhYiBQREYgTGlicmFyeSAtIHd3dy5yZXBvcnRsYWIuY29tKSAvS2V5d29yZHMgKCkgL01vZERhdGUgKEQ6MjAyNDA2MDMxODU4MTYrMDAnMDAnKSAvUHJvZHVjZXIgKFJlcG9ydExhYiBQREYgTGlicmFyeSAtIHd3dy5yZXBvcnRsYWIuY29tKSAKICAvU3ViamVjdCAodW5zcGVjaWZpZWQpIC9UaXRsZSAodW50aXRsZWQpIC9UcmFwcGVkIC9GYWxzZQo+PgplbmRvYmoKNiAwIG9iago8PAovQ291bnQgMSAvS2lkcyBbIDMgMCBSIF0gL1R5cGUgL1BhZ2VzCj4+CmVuZG9iago3IDAgb2JqCjw8Ci9GaWx0ZXIgWyAvQVNDSUk4NURlY29kZSAvRmxhdGVEZWNvZGUgXSAvTGVuZ3RoIDEwNwo+PgpzdHJlYW0KR2FwUWgwRT1GLDBVXEgzVFxwTllUXlFLaz90Yz5JUCw7VyNVMV4yM2loUEVNXz9DVzRLSVNpOTBNakdeMixGUyM8UkM1K2MsbilaOyRiSyRiIjVJWzwhXlREI2dpXSY9NVgsWzVAWUBWfj5lbmRzdHJlYW0KZW5kb2JqCnhyZWYKMCA4CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDA3MyAwMDAwMCBuIAowMDAwMDAwMTA0IDAwMDAwIG4gCjAwMDAwMDAyMTEgMDAwMDAgbiAKMDAwMDAwMDQxNCAwMDAwMCBuIAowMDAwMDAwNDgyIDAwMDAwIG4gCjAwMDAwMDA3NzggMDAwMDAgbiAKMDAwMDAwMDgzNyAwMDAwMCBuIAp0cmFpbGVyCjw8Ci9JRCAKWzw4MGQ5ZjViOTY0ZmM5OTI4NDUwMWRlYjdhNmE2MzdmNz48ODBkOWY1Yjk2NGZjOTkyODQ1MDFkZWI3YTZhNjM3Zjc+XQolIFJlcG9ydExhYiBnZW5lcmF0ZWQgUERGIGRvY3VtZW50IC0tIGRpZ2VzdCAoaHR0cDovL3d3dy5yZXBvcnRsYWIuY29tKQoKL0luZm8gNSAwIFIKL1Jvb3QgNCAwIFIKL1NpemUgOAo+PgpzdGFydHhyZWYKMTAzNAolJUVPRg=="
13
+ )
14
+
15
+ docx_data = base64.b64decode(
16
+ ""
17
+ )
18
+
19
+ expected_result_pdf = "Hello, World!"
20
+ expected_result_docx = "Hello, World!!"
21
+
22
+ async def setup_after_prep(self, module_test):
23
+ module_test.set_expect_requests(
24
+ dict(uri="/"),
25
+ dict(response_data='<a href="/Test_PDF"/><a href="/Test_DOCX"/>'),
26
+ )
27
+ module_test.set_expect_requests(
28
+ dict(uri="/Test_PDF"),
29
+ dict(response_data=self.pdf_data, headers={"Content-Type": "application/pdf"}),
30
+ )
31
+ module_test.set_expect_requests(
32
+ dict(uri="/Test_DOCX"),
33
+ dict(
34
+ response_data=self.docx_data,
35
+ headers={"Content-Type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"},
36
+ ),
37
+ )
38
+
39
+ def check(self, module_test, events):
40
+ filesystem_events = [e for e in events if e.type == "FILESYSTEM"]
41
+ assert 2 == len(filesystem_events), filesystem_events
42
+ for filesystem_event in filesystem_events:
43
+ file = Path(filesystem_event.data["path"])
44
+ assert file.is_file(), "Destination file doesn't exist"
45
+ assert (
46
+ open(file, "rb").read() == self.pdf_data or open(file, "rb").read() == self.docx_data
47
+ ), f"File at {file} does not contain the correct content"
48
+ raw_text_events = [e for e in events if e.type == "RAW_TEXT"]
49
+ assert 2 == len(raw_text_events), "Failed to emit RAW_TEXT event"
50
+ for raw_text_event in raw_text_events:
51
+ assert raw_text_event.data in [
52
+ self.expected_result_pdf,
53
+ self.expected_result_docx,
54
+ ], f"Text extracted from {raw_text_event.data['path']} is incorrect, got {raw_text_event.data}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bbot
3
- Version: 2.1.2.5158rc0
3
+ Version: 2.1.2.5171rc0
4
4
  Summary: OSINT automation for hackers.
5
5
  Home-page: https://github.com/blacklanternsecurity/bbot
6
6
  License: GPL-3.0
@@ -30,6 +30,7 @@ Requires-Dist: lxml (>=4.9.2,<6.0.0)
30
30
  Requires-Dist: mmh3 (>=4.1,<6.0)
31
31
  Requires-Dist: omegaconf (>=2.3.0,<3.0.0)
32
32
  Requires-Dist: psutil (>=5.9.4,<7.0.0)
33
+ Requires-Dist: puremagic (>=1.28,<2.0)
33
34
  Requires-Dist: pycryptodome (>=3.17,<4.0)
34
35
  Requires-Dist: pydantic (>=2.4.2,<3.0.0)
35
36
  Requires-Dist: pyjwt (>=2.7.0,<3.0.0)
@@ -1,4 +1,4 @@
1
- bbot/__init__.py,sha256=XGv5tdX9jJAqzDOsvtlIiSepFtDm0gWOhPAnGSwzum0,130
1
+ bbot/__init__.py,sha256=Z54hdiCr1FhYAEWsmNTb4Q2V7SAiZkx2pqD434M5kMg,130
2
2
  bbot/cli.py,sha256=7S3a4eB-Dl8yodc5WC-927Z30CNlLl9EXimGvIVypJo,10434
3
3
  bbot/core/__init__.py,sha256=l255GJE_DvUnWvrRb0J5lG-iMztJ8zVvoweDOfegGtI,46
4
4
  bbot/core/config/__init__.py,sha256=zYNw2Me6tsEr8hOOkLb4BQ97GB7Kis2k--G81S8vofU,342
@@ -7,7 +7,7 @@ bbot/core/config/logger.py,sha256=zkD08_KNiIa8LTZkI4wiAeA4g0zVCiA7d7P5MmocXsk,10
7
7
  bbot/core/core.py,sha256=twd7-fiaaxzgcWTPwT1zbSWfAa_gHHfl7gAFvLYvFYg,6358
8
8
  bbot/core/engine.py,sha256=wGopKa2GNs61r16Pr_xtp6Si9AT6I-lE83iWhEgtxwA,29290
9
9
  bbot/core/event/__init__.py,sha256=8ut88ZUg0kbtWkOx2j3XzNr_3kTfgoM-3UdiWHFA_ag,56
10
- bbot/core/event/base.py,sha256=YHWevdDo5sHIIMKz_9TkWFYGTVtoD3fv957TpMO-6DQ,59498
10
+ bbot/core/event/base.py,sha256=vb4rPOEGILaWsaKG_DB7aZQqAuNsCHYFblK5gThZT1U,60459
11
11
  bbot/core/event/helpers.py,sha256=PUN4Trq5_wpKVuhmwUQWAr40apgMXhJ9Gz-VfZ0j3lA,1554
12
12
  bbot/core/flags.py,sha256=Ltvm8Bc4D65I55HuU5bzyjO1R3yMDNpVmreGU83ZBXE,1266
13
13
  bbot/core/helpers/__init__.py,sha256=0UNwcZjNsX41hbHdo3yZPuARkYWch-okI68DScexve4,86
@@ -28,6 +28,7 @@ bbot/core/helpers/dns/mock.py,sha256=Ztkp2aOuwDJ0NTQSlAk2H0s3Stx9wIM22Qm3VtqWMKM
28
28
  bbot/core/helpers/files.py,sha256=GqrwNGJljUvGSzaOW5-Y357hkt7j88dOYbzQxJGsdTc,5787
29
29
  bbot/core/helpers/helper.py,sha256=3O96peNBvSkaJosft8w9-nKjCscEdykTayGcUlHRqLw,8394
30
30
  bbot/core/helpers/interactsh.py,sha256=Q9IHUzH-T7e1s4YTHevHe-VJj1Mokv0EHY16UZJdl8M,12627
31
+ bbot/core/helpers/libmagic.py,sha256=a9tmL558cM5lzN69YahBc7JNHmPnBJgu9Wa8Q5bH1S0,3631
31
32
  bbot/core/helpers/misc.py,sha256=rvfZmm8UHCChmbMorjPMybaCZTkERrKZhxvY9S4dVPc,86873
32
33
  bbot/core/helpers/names_generator.py,sha256=Sj_Q-7KQyElEpalzlUadSwaniESqrIVVEle9ycPIiho,10322
33
34
  bbot/core/helpers/ntlm.py,sha256=P2Xj4-GPos2iAzw4dfk0FJp6oGyycGhu2x6sLDVjYjs,2573
@@ -44,7 +45,7 @@ bbot/core/helpers/web/ssl_context.py,sha256=aWVgl-d0HoE8B4EBKNxaa5UAzQmx79DjDByf
44
45
  bbot/core/helpers/web/web.py,sha256=K7BOts1c1bRjU5rpluD94jClwchmBMZQk8FZI1ljS94,22661
45
46
  bbot/core/helpers/wordcloud.py,sha256=WdQwboCNcCxcUdLuB6MMMDQBL4ZshFM_f6GW7nUZEBQ,19819
46
47
  bbot/core/modules.py,sha256=OOUSncr-EM6bJBrI3iH5wvfnpTXKQ-A8OL8UMvkL0CU,31432
47
- bbot/core/shared_deps.py,sha256=7WQxxlYOWCq6lb51sITz5-nePILu0ltwNI3UtCJXcL8,5174
48
+ bbot/core/shared_deps.py,sha256=IZgYbeJy20ToUNa8TnNAgzaKRK_c09W6rl-uxEhudd0,5187
48
49
  bbot/defaults.yml,sha256=_3sNH-2TWPaQHZ6ozBA1UKWLB7HuHK8vjZ534mb8cO4,6042
49
50
  bbot/errors.py,sha256=xwQcD26nU9oc7-o0kv5jmEDTInmi8_W8eKAgQZZxdVM,953
50
51
  bbot/logger.py,sha256=rLcLzNDvfR8rFj7_tZ-f5QB3Z8T0RVroact3W0ogjpA,1408
@@ -93,6 +94,7 @@ bbot/modules/docker_pull.py,sha256=Dp8de9UCCELcozwmZphA3lMh8qZaXyDo2kfwG45Wm3w,9
93
94
  bbot/modules/dockerhub.py,sha256=ruvTP8Uz5LEuX-_SrKDzByvSNtd1ofZbX-lRTeKUB24,3491
94
95
  bbot/modules/dotnetnuke.py,sha256=XZysDA99ahQSLXR8RPROlmUwDxqrxvBFvscZMYBmsmc,10539
95
96
  bbot/modules/emailformat.py,sha256=RLPJW-xitYB-VT4Lp08qVzFkXx_kMyV_035JT_Yf4fM,1082
97
+ bbot/modules/extractous.py,sha256=yPIM6UHYExGPNVDt8x_jE-UxRl_JbDrThFguIfBUuuY,5129
96
98
  bbot/modules/ffuf_shortnames.py,sha256=9Kh0kJsw7XXpXmCkiB5eAhG4h9rSo8Y-mB3p0EDa_l0,12624
97
99
  bbot/modules/filedownload.py,sha256=1prC84wAQO-W1HstitKPQ0-eYEApjzFn3RHFa9oaqLc,8185
98
100
  bbot/modules/fingerprintx.py,sha256=rdlR9d64AntAhbS_eJzh8bZCeLPTJPSKdkdKdhH_qAo,3269
@@ -180,7 +182,6 @@ bbot/modules/templates/subdomain_enum.py,sha256=lT5MZF66OuzsyFFrj20wKlsZflzL9MOk
180
182
  bbot/modules/templates/webhook.py,sha256=MYhKWrNYrsfM0a4PR6yVotudLyyCwgmy2eI-l9LvpBs,3706
181
183
  bbot/modules/trickest.py,sha256=HfAzjnawxXd9ypi3gumDHqImE5-C7uwNugo8d_b9HT0,1544
182
184
  bbot/modules/trufflehog.py,sha256=y99b6hPeiG00XK50HC8QELa6HE9MqEYNiGAVWpfWvnM,8554
183
- bbot/modules/unstructured.py,sha256=si3_Y__A36QOBdkIUocVXCHrmUqM0E-JSnoOeRpELYE,5311
184
185
  bbot/modules/url_manipulation.py,sha256=BI-OhlzNzP5xvwzHphL4qdehc4NiEYnL2BNK-JoEm90,4322
185
186
  bbot/modules/urlscan.py,sha256=ajhiX2sj-zZDlKU1q5rE8JTzxioj1mDLqZ9PRSQCpAw,3741
186
187
  bbot/modules/viewdns.py,sha256=f0vwoLpua2Ovw1gcrjoafUdaAP9fi4bHgTUiDOe8iWg,2596
@@ -213,7 +214,7 @@ bbot/scanner/preset/args.py,sha256=9Nmir2dHJWzN66m6N-mA0QEKiOgt8vWq23O8BG50eMA,1
213
214
  bbot/scanner/preset/conditions.py,sha256=hFL9cSIWGEsv2TfM5UGurf0c91cyaM8egb5IngBmIjA,1569
214
215
  bbot/scanner/preset/environ.py,sha256=-wbFk1YHpU8IJLKVw23Q3btQTICeX0iulURo7D673L0,4732
215
216
  bbot/scanner/preset/path.py,sha256=p9tZC7XcgZv2jXpbEJAg1lU2b4ZLX5COFnCxEUOXz2g,2234
216
- bbot/scanner/preset/preset.py,sha256=7q6PB9LalIzHyb4eiMDVKE6CapWBCKVw7350M0fSiwM,40083
217
+ bbot/scanner/preset/preset.py,sha256=-HH_nlr4VaXmKCooXMG5av39gOUdCVOO_y9Bhgbt_u4,40180
217
218
  bbot/scanner/scanner.py,sha256=62DKCjgV1uLxNAwpxjvE5h1uzQCxG-nzBxp1PBCSVKc,53674
218
219
  bbot/scanner/stats.py,sha256=re93sArKXZSiD0Owgqk2J3Kdvfm3RL4Y9Qy_VOcaVk8,3623
219
220
  bbot/scanner/target.py,sha256=X25gpgRv5HmqQjGADiSe6b8744yOkRhAGAvKKYbXnSI,19886
@@ -236,7 +237,7 @@ bbot/test/test_step_1/test_depsinstaller.py,sha256=zr9f-wJDotD1ZvKXGEuDRWzFYMAYB
236
237
  bbot/test/test_step_1/test_dns.py,sha256=YZtSbja-Z76KC9MWBieRExolVWHm0WqssL0WHUpUiC8,30932
237
238
  bbot/test/test_step_1/test_docs.py,sha256=YWVGNRfzcrvDmFekX0Cq9gutQplsqvhKTpZ0XK4tWvo,82
238
239
  bbot/test/test_step_1/test_engine.py,sha256=Bfid3-D9ziN93w4vym97tFEn_l2Iof08wjITTv_lAZw,4269
239
- bbot/test/test_step_1/test_events.py,sha256=D9W3zGxRWUIm0SYklsWRE3IeAPcMdWLAOIMWkI24Rpc,45130
240
+ bbot/test/test_step_1/test_events.py,sha256=_rMAxbyuSReZxNwwghL37p7HA9YNpptVcBmcuz74nKw,46669
240
241
  bbot/test/test_step_1/test_files.py,sha256=5Q_3jPpMXULxDHsanSDUaj8zF8bXzKdiJZHOmoYpLhQ,699
241
242
  bbot/test/test_step_1/test_helpers.py,sha256=oY2hWhgL-TCB67ve1bAyIwZO3wNRWpx4SjCHNUxHep8,38676
242
243
  bbot/test/test_step_1/test_manager_deduplication.py,sha256=hZQpDXzg6zvzxFolVOcJuY-ME8NXjZUsqS70BRNXp8A,15594
@@ -301,7 +302,8 @@ bbot/test/test_step_2/module_tests/test_module_dockerhub.py,sha256=9T8CFcFP32MOp
301
302
  bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py,sha256=qDh281o0Cixz_LvMDSX_y9jHTXeRpt50eRUb20tC8ig,8212
302
303
  bbot/test/test_step_2/module_tests/test_module_emailformat.py,sha256=cKxBPnEQ4AiRKV_-hSYEE6756ypst3hi6MN0L5RTukY,461
303
304
  bbot/test/test_step_2/module_tests/test_module_emails.py,sha256=bZjtO8N3GG2_g6SUEYprAFLcsi7SlwNPJJ0nODfrWYU,944
304
- bbot/test/test_step_2/module_tests/test_module_excavate.py,sha256=6XBe7JzKtSMbIN3VQLiR8wHXiqT6PfdL8nzt9h6gam0,42191
305
+ bbot/test/test_step_2/module_tests/test_module_excavate.py,sha256=Myq6xkLPueT4m-Rzp8PPC_zF5vzq9JbtV8N46yAhWYE,42179
306
+ bbot/test/test_step_2/module_tests/test_module_extractous.py,sha256=FiMSgddx2qnxGIDImvofrd5hPRQIIul6Y67wwNsgEqE,17973
305
307
  bbot/test/test_step_2/module_tests/test_module_ffuf.py,sha256=aSB49aN77sw-2LNTDHckiEEaHAn_85xCJno1shdOwus,2964
306
308
  bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py,sha256=QoIDYEY5R5HA3gJQyCEG0gHkgM0zItwsXc6oqDA1neA,7609
307
309
  bbot/test/test_step_2/module_tests/test_module_filedownload.py,sha256=d4jJWYqdfb9GYDSfBp3b6h2gQRdPfhmoZtm99RG9sVo,2609
@@ -368,7 +370,6 @@ bbot/test/test_step_2/module_tests/test_module_telerik.py,sha256=Fy02lF6q06dhc-u
368
370
  bbot/test/test_step_2/module_tests/test_module_trickest.py,sha256=6mTYH6fIah-WbKnFI-_WZBwRdKFi-oeWyVtl1n0nVAU,1630
369
371
  bbot/test/test_step_2/module_tests/test_module_trufflehog.py,sha256=2cbQo7839tPUJgat99w0O-_bXhONr-z6G1xsPgN6p20,79146
370
372
  bbot/test/test_step_2/module_tests/test_module_txt.py,sha256=R-EBfEZM0jwY2yuVyfYhoccDOl0Y2uQZSkXQ1HyinUA,247
371
- bbot/test/test_step_2/module_tests/test_module_unstructured.py,sha256=WT5yPx6qprYAuVfLWqrsCZh2VoBpnNY-K5YbGvd31O8,2922
372
373
  bbot/test/test_step_2/module_tests/test_module_url_manipulation.py,sha256=aP3nK2TQQOjk0ZeuHhHYfZm_e37qrrXbnufd7m-QeJU,1144
373
374
  bbot/test/test_step_2/module_tests/test_module_urlscan.py,sha256=H_og5fOQMLpDbEGOhcVcZcDXvodT6nfgCE6Rk8LTkas,2902
374
375
  bbot/test/test_step_2/module_tests/test_module_vhost.py,sha256=W-88CA-aVVZ0il0Mzji_3kFU4lhPF-_gPBdUaoJEc1A,2874
@@ -395,8 +396,8 @@ bbot/wordlists/raft-small-extensions-lowercase_CLEANED.txt,sha256=ruUQwVfia1_m2u
395
396
  bbot/wordlists/top_open_ports_nmap.txt,sha256=LmdFYkfapSxn1pVuQC2LkOIY2hMLgG-Xts7DVtYzweM,42727
396
397
  bbot/wordlists/valid_url_schemes.txt,sha256=VciB-ww0y-O8Ii1wpTR6rJzGDiC2r-dhVsIJApS1ZYU,3309
397
398
  bbot/wordlists/wordninja_dns.txt.gz,sha256=DYHvvfW0TvzrVwyprqODAk4tGOxv5ezNmCPSdPuDUnQ,570241
398
- bbot-2.1.2.5158rc0.dist-info/LICENSE,sha256=GzeCzK17hhQQDNow0_r0L8OfLpeTKQjFQwBQU7ZUymg,32473
399
- bbot-2.1.2.5158rc0.dist-info/METADATA,sha256=bLLthN7PSjwktPDH25JefcoGLcY9A04erMQJtWWQqRg,16964
400
- bbot-2.1.2.5158rc0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
401
- bbot-2.1.2.5158rc0.dist-info/entry_points.txt,sha256=cWjvcU_lLrzzJgjcjF7yeGuRA_eDS8pQ-kmPUAyOBfo,38
402
- bbot-2.1.2.5158rc0.dist-info/RECORD,,
399
+ bbot-2.1.2.5171rc0.dist-info/LICENSE,sha256=GzeCzK17hhQQDNow0_r0L8OfLpeTKQjFQwBQU7ZUymg,32473
400
+ bbot-2.1.2.5171rc0.dist-info/METADATA,sha256=E5cIla8VjE2978VgYdXW4qZWhUuFcJeI-LeRtp7EtJA,17003
401
+ bbot-2.1.2.5171rc0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
402
+ bbot-2.1.2.5171rc0.dist-info/entry_points.txt,sha256=cWjvcU_lLrzzJgjcjF7yeGuRA_eDS8pQ-kmPUAyOBfo,38
403
+ bbot-2.1.2.5171rc0.dist-info/RECORD,,
@@ -1,102 +0,0 @@
1
- from pathlib import Path
2
- from .base import ModuleTestBase
3
-
4
-
5
- class TestUnstructured(ModuleTestBase):
6
- targets = ["http://127.0.0.1:8888"]
7
- modules_overrides = ["unstructured", "filedownload", "httpx", "excavate", "speculate"]
8
- config_overrides = {"web": {"spider_distance": 2, "spider_depth": 2}}
9
-
10
- pdf_data = r"""%PDF-1.3
11
- %���� ReportLab Generated PDF document http://www.reportlab.com
12
- 1 0 obj
13
- <<
14
- /F1 2 0 R
15
- >>
16
- endobj
17
- 2 0 obj
18
- <<
19
- /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
20
- >>
21
- endobj
22
- 3 0 obj
23
- <<
24
- /Contents 7 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 6 0 R /Resources <<
25
- /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
26
- >> /Rotate 0 /Trans <<
27
-
28
- >>
29
- /Type /Page
30
- >>
31
- endobj
32
- 4 0 obj
33
- <<
34
- /PageMode /UseNone /Pages 6 0 R /Type /Catalog
35
- >>
36
- endobj
37
- 5 0 obj
38
- <<
39
- /Author (anonymous) /CreationDate (D:20240603185816+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20240603185816+00'00') /Producer (ReportLab PDF Library - www.reportlab.com)
40
- /Subject (unspecified) /Title (untitled) /Trapped /False
41
- >>
42
- endobj
43
- 6 0 obj
44
- <<
45
- /Count 1 /Kids [ 3 0 R ] /Type /Pages
46
- >>
47
- endobj
48
- 7 0 obj
49
- <<
50
- /Filter [ /ASCII85Decode /FlateDecode ] /Length 107
51
- >>
52
- stream
53
- GapQh0E=F,0U\H3T\pNYT^QKk?tc>IP,;W#U1^23ihPEM_?CW4KISi90MjG^2,FS#<RC5+c,n)Z;$bK$b"5I[<!^TD#gi]&=5X,[5@Y@V~>endstream
54
- endobj
55
- xref
56
- 0 8
57
- 0000000000 65535 f
58
- 0000000073 00000 n
59
- 0000000104 00000 n
60
- 0000000211 00000 n
61
- 0000000414 00000 n
62
- 0000000482 00000 n
63
- 0000000778 00000 n
64
- 0000000837 00000 n
65
- trailer
66
- <<
67
- /ID
68
- [<80d9f5b964fc99284501deb7a6a637f7><80d9f5b964fc99284501deb7a6a637f7>]
69
- % ReportLab generated PDF document -- digest (http://www.reportlab.com)
70
-
71
- /Info 5 0 R
72
- /Root 4 0 R
73
- /Size 8
74
- >>
75
- startxref
76
- 1034
77
- %%EOF"""
78
-
79
- unstructured_response = "Hello, World!"
80
-
81
- async def setup_after_prep(self, module_test):
82
- module_test.set_expect_requests(
83
- dict(uri="/"),
84
- dict(response_data='<a href="/Test_PDF"/>'),
85
- )
86
- module_test.set_expect_requests(
87
- dict(uri="/Test_PDF"),
88
- dict(response_data=self.pdf_data, headers={"Content-Type": "application/pdf"}),
89
- )
90
-
91
- def check(self, module_test, events):
92
- filesystem_events = [e for e in events if e.type == "FILESYSTEM"]
93
- assert 1 == len(filesystem_events), filesystem_events
94
- filesystem_event = filesystem_events[0]
95
- file = Path(filesystem_event.data["path"])
96
- assert file.is_file(), "Destination file doesn't exist"
97
- assert open(file).read() == self.pdf_data, f"File at {file} does not contain the correct content"
98
- raw_text_events = [e for e in events if e.type == "RAW_TEXT"]
99
- assert 1 == len(raw_text_events), "Failed to emit RAW_TEXT event"
100
- assert (
101
- raw_text_events[0].data == self.unstructured_response
102
- ), f"Text extracted from PDF is incorrect, got {raw_text_events[0].data}"