bbot 2.1.2.5238rc0__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbot might be problematic. Click here for more details.

bbot/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # version placeholder (replaced by poetry-dynamic-versioning)
2
- __version__ = "v2.1.2.5238rc"
2
+ __version__ = "v2.2.0"
3
3
 
4
4
  from .scanner import Scanner, Preset
@@ -81,6 +81,12 @@ class extractous(BaseModule):
81
81
  async def handle_event(self, event):
82
82
  file_path = event.data["path"]
83
83
  content = await self.scan.helpers.run_in_executor_mp(extract_text, file_path)
84
+ if isinstance(content, tuple):
85
+ error, traceback = content
86
+ self.error(f"Error extracting text from {file_path}: {error}")
87
+ self.trace(traceback)
88
+ return
89
+
84
90
  if content:
85
91
  raw_text_event = self.make_event(
86
92
  content,
@@ -99,49 +105,18 @@ def extract_text(file_path):
99
105
  :return: ASCII-encoded plaintext extracted from the document.
100
106
  """
101
107
 
102
- extractable_file_types = [
103
- ".csv",
104
- ".eml",
105
- ".msg",
106
- ".epub",
107
- ".xlsx",
108
- ".xls",
109
- ".html",
110
- ".htm",
111
- ".md",
112
- ".org",
113
- ".odt",
114
- ".pdf",
115
- ".txt",
116
- ".text",
117
- ".log",
118
- ".ppt",
119
- ".pptx",
120
- ".rst",
121
- ".rtf",
122
- ".tsv",
123
- ".doc",
124
- ".docx",
125
- ".xml",
126
- ]
127
-
128
- # If the file can be extracted with extractous use its partition function or try and read it
129
- if any(file_path.lower().endswith(file_type) for file_type in extractable_file_types):
130
- try:
131
- extractor = Extractor()
132
- reader = extractor.extract_file(str(file_path))
108
+ try:
109
+ extractor = Extractor()
110
+ reader, metadata = extractor.extract_file(str(file_path))
133
111
 
134
- result = ""
112
+ result = ""
113
+ buffer = reader.read(4096)
114
+ while len(buffer) > 0:
115
+ result += buffer.decode("utf-8")
135
116
  buffer = reader.read(4096)
136
- while len(buffer) > 0:
137
- result += buffer.decode("utf-8")
138
- buffer = reader.read(4096)
139
117
 
140
- return result.strip()
118
+ return result.strip()
119
+ except Exception as e:
120
+ import traceback
141
121
 
142
- except Exception:
143
- with open(file_path, "rb") as file:
144
- return file.read().decode("utf-8", errors="ignore")
145
- else:
146
- with open(file_path, "rb") as file:
147
- return file.read().decode("utf-8", errors="ignore")
122
+ return (str(e), traceback.format_exc())
bbot/scanner/scanner.py CHANGED
@@ -126,7 +126,7 @@ class Scanner:
126
126
  self._success = False
127
127
 
128
128
  if scan_id is not None:
129
- self.id = str(id)
129
+ self.id = str(scan_id)
130
130
  else:
131
131
  self.id = f"SCAN:{sha1(rand_string(20)).hexdigest()}"
132
132
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bbot
3
- Version: 2.1.2.5238rc0
3
+ Version: 2.2.0
4
4
  Summary: OSINT automation for hackers.
5
5
  Home-page: https://github.com/blacklanternsecurity/bbot
6
6
  License: GPL-3.0
@@ -1,4 +1,4 @@
1
- bbot/__init__.py,sha256=JVjtsonS_He_khDQUwAqECN1ZBEtf2leD_lCLKF4TSc,130
1
+ bbot/__init__.py,sha256=N-qQdKMTzvNCQSwf552T4NkTPi0xe-5hDjEYCwbF5l0,123
2
2
  bbot/cli.py,sha256=7S3a4eB-Dl8yodc5WC-927Z30CNlLl9EXimGvIVypJo,10434
3
3
  bbot/core/__init__.py,sha256=l255GJE_DvUnWvrRb0J5lG-iMztJ8zVvoweDOfegGtI,46
4
4
  bbot/core/config/__init__.py,sha256=zYNw2Me6tsEr8hOOkLb4BQ97GB7Kis2k--G81S8vofU,342
@@ -96,7 +96,7 @@ bbot/modules/docker_pull.py,sha256=Dp8de9UCCELcozwmZphA3lMh8qZaXyDo2kfwG45Wm3w,9
96
96
  bbot/modules/dockerhub.py,sha256=ruvTP8Uz5LEuX-_SrKDzByvSNtd1ofZbX-lRTeKUB24,3491
97
97
  bbot/modules/dotnetnuke.py,sha256=XZysDA99ahQSLXR8RPROlmUwDxqrxvBFvscZMYBmsmc,10539
98
98
  bbot/modules/emailformat.py,sha256=RLPJW-xitYB-VT4Lp08qVzFkXx_kMyV_035JT_Yf4fM,1082
99
- bbot/modules/extractous.py,sha256=yPIM6UHYExGPNVDt8x_jE-UxRl_JbDrThFguIfBUuuY,5129
99
+ bbot/modules/extractous.py,sha256=22pc62jOs_4E3uZtBjUbUksLTadNr9sp_4r0-eU1un8,4551
100
100
  bbot/modules/ffuf_shortnames.py,sha256=9Kh0kJsw7XXpXmCkiB5eAhG4h9rSo8Y-mB3p0EDa_l0,12624
101
101
  bbot/modules/filedownload.py,sha256=1prC84wAQO-W1HstitKPQ0-eYEApjzFn3RHFa9oaqLc,8185
102
102
  bbot/modules/fingerprintx.py,sha256=rdlR9d64AntAhbS_eJzh8bZCeLPTJPSKdkdKdhH_qAo,3269
@@ -220,7 +220,7 @@ bbot/scanner/preset/conditions.py,sha256=hFL9cSIWGEsv2TfM5UGurf0c91cyaM8egb5IngB
220
220
  bbot/scanner/preset/environ.py,sha256=-wbFk1YHpU8IJLKVw23Q3btQTICeX0iulURo7D673L0,4732
221
221
  bbot/scanner/preset/path.py,sha256=p9tZC7XcgZv2jXpbEJAg1lU2b4ZLX5COFnCxEUOXz2g,2234
222
222
  bbot/scanner/preset/preset.py,sha256=-HH_nlr4VaXmKCooXMG5av39gOUdCVOO_y9Bhgbt_u4,40180
223
- bbot/scanner/scanner.py,sha256=n0jpHQ9tXqrPJTwg8DndMgNPas0NbNzV0FeoQDbQgJE,53692
223
+ bbot/scanner/scanner.py,sha256=dF3Pf2e02gjQ0hJafEUYI0aAgvDPcVo8uX7AHWQ-mMs,53697
224
224
  bbot/scanner/stats.py,sha256=re93sArKXZSiD0Owgqk2J3Kdvfm3RL4Y9Qy_VOcaVk8,3623
225
225
  bbot/scanner/target.py,sha256=X25gpgRv5HmqQjGADiSe6b8744yOkRhAGAvKKYbXnSI,19886
226
226
  bbot/scripts/docs.py,sha256=kg2CzovmUVGJx9hBZjAjUdE1hXeIwC7Ry3CyrnE8GL8,10782
@@ -404,8 +404,8 @@ bbot/wordlists/raft-small-extensions-lowercase_CLEANED.txt,sha256=ruUQwVfia1_m2u
404
404
  bbot/wordlists/top_open_ports_nmap.txt,sha256=LmdFYkfapSxn1pVuQC2LkOIY2hMLgG-Xts7DVtYzweM,42727
405
405
  bbot/wordlists/valid_url_schemes.txt,sha256=VciB-ww0y-O8Ii1wpTR6rJzGDiC2r-dhVsIJApS1ZYU,3309
406
406
  bbot/wordlists/wordninja_dns.txt.gz,sha256=DYHvvfW0TvzrVwyprqODAk4tGOxv5ezNmCPSdPuDUnQ,570241
407
- bbot-2.1.2.5238rc0.dist-info/LICENSE,sha256=GzeCzK17hhQQDNow0_r0L8OfLpeTKQjFQwBQU7ZUymg,32473
408
- bbot-2.1.2.5238rc0.dist-info/METADATA,sha256=HSSIhK3Gqj3mxD3gQoYAgdDlJInB3EdQUIy8eBPmuxQ,17109
409
- bbot-2.1.2.5238rc0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
410
- bbot-2.1.2.5238rc0.dist-info/entry_points.txt,sha256=cWjvcU_lLrzzJgjcjF7yeGuRA_eDS8pQ-kmPUAyOBfo,38
411
- bbot-2.1.2.5238rc0.dist-info/RECORD,,
407
+ bbot-2.2.0.dist-info/LICENSE,sha256=GzeCzK17hhQQDNow0_r0L8OfLpeTKQjFQwBQU7ZUymg,32473
408
+ bbot-2.2.0.dist-info/METADATA,sha256=smQwYhCFM3eBHGoKPvxkT44GFlLV5k-o-TCmYjGkxl4,17101
409
+ bbot-2.2.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
410
+ bbot-2.2.0.dist-info/entry_points.txt,sha256=cWjvcU_lLrzzJgjcjF7yeGuRA_eDS8pQ-kmPUAyOBfo,38
411
+ bbot-2.2.0.dist-info/RECORD,,