bbot 2.1.2.5238rc0__py3-none-any.whl → 2.1.2.5240rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbot might be problematic. Click here for more details.

bbot/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # version placeholder (replaced by poetry-dynamic-versioning)
2
- __version__ = "v2.1.2.5238rc"
2
+ __version__ = "v2.1.2.5240rc"
3
3
 
4
4
  from .scanner import Scanner, Preset
@@ -81,6 +81,12 @@ class extractous(BaseModule):
81
81
  async def handle_event(self, event):
82
82
  file_path = event.data["path"]
83
83
  content = await self.scan.helpers.run_in_executor_mp(extract_text, file_path)
84
+ if isinstance(content, tuple):
85
+ error, traceback = content
86
+ self.error(f"Error extracting text from {file_path}: {error}")
87
+ self.trace(traceback)
88
+ return
89
+
84
90
  if content:
85
91
  raw_text_event = self.make_event(
86
92
  content,
@@ -99,49 +105,18 @@ def extract_text(file_path):
99
105
  :return: ASCII-encoded plaintext extracted from the document.
100
106
  """
101
107
 
102
- extractable_file_types = [
103
- ".csv",
104
- ".eml",
105
- ".msg",
106
- ".epub",
107
- ".xlsx",
108
- ".xls",
109
- ".html",
110
- ".htm",
111
- ".md",
112
- ".org",
113
- ".odt",
114
- ".pdf",
115
- ".txt",
116
- ".text",
117
- ".log",
118
- ".ppt",
119
- ".pptx",
120
- ".rst",
121
- ".rtf",
122
- ".tsv",
123
- ".doc",
124
- ".docx",
125
- ".xml",
126
- ]
127
-
128
- # If the file can be extracted with extractous use its partition function or try and read it
129
- if any(file_path.lower().endswith(file_type) for file_type in extractable_file_types):
130
- try:
131
- extractor = Extractor()
132
- reader = extractor.extract_file(str(file_path))
108
+ try:
109
+ extractor = Extractor()
110
+ reader, metadata = extractor.extract_file(str(file_path))
133
111
 
134
- result = ""
112
+ result = ""
113
+ buffer = reader.read(4096)
114
+ while len(buffer) > 0:
115
+ result += buffer.decode("utf-8")
135
116
  buffer = reader.read(4096)
136
- while len(buffer) > 0:
137
- result += buffer.decode("utf-8")
138
- buffer = reader.read(4096)
139
117
 
140
- return result.strip()
118
+ return result.strip()
119
+ except Exception as e:
120
+ import traceback
141
121
 
142
- except Exception:
143
- with open(file_path, "rb") as file:
144
- return file.read().decode("utf-8", errors="ignore")
145
- else:
146
- with open(file_path, "rb") as file:
147
- return file.read().decode("utf-8", errors="ignore")
122
+ return (str(e), traceback.format_exc())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bbot
3
- Version: 2.1.2.5238rc0
3
+ Version: 2.1.2.5240rc0
4
4
  Summary: OSINT automation for hackers.
5
5
  Home-page: https://github.com/blacklanternsecurity/bbot
6
6
  License: GPL-3.0
@@ -1,4 +1,4 @@
1
- bbot/__init__.py,sha256=JVjtsonS_He_khDQUwAqECN1ZBEtf2leD_lCLKF4TSc,130
1
+ bbot/__init__.py,sha256=9oJhyJvXyXbI4jsxhLDiobby05_5HUsSxccd9M__JTc,130
2
2
  bbot/cli.py,sha256=7S3a4eB-Dl8yodc5WC-927Z30CNlLl9EXimGvIVypJo,10434
3
3
  bbot/core/__init__.py,sha256=l255GJE_DvUnWvrRb0J5lG-iMztJ8zVvoweDOfegGtI,46
4
4
  bbot/core/config/__init__.py,sha256=zYNw2Me6tsEr8hOOkLb4BQ97GB7Kis2k--G81S8vofU,342
@@ -96,7 +96,7 @@ bbot/modules/docker_pull.py,sha256=Dp8de9UCCELcozwmZphA3lMh8qZaXyDo2kfwG45Wm3w,9
96
96
  bbot/modules/dockerhub.py,sha256=ruvTP8Uz5LEuX-_SrKDzByvSNtd1ofZbX-lRTeKUB24,3491
97
97
  bbot/modules/dotnetnuke.py,sha256=XZysDA99ahQSLXR8RPROlmUwDxqrxvBFvscZMYBmsmc,10539
98
98
  bbot/modules/emailformat.py,sha256=RLPJW-xitYB-VT4Lp08qVzFkXx_kMyV_035JT_Yf4fM,1082
99
- bbot/modules/extractous.py,sha256=yPIM6UHYExGPNVDt8x_jE-UxRl_JbDrThFguIfBUuuY,5129
99
+ bbot/modules/extractous.py,sha256=22pc62jOs_4E3uZtBjUbUksLTadNr9sp_4r0-eU1un8,4551
100
100
  bbot/modules/ffuf_shortnames.py,sha256=9Kh0kJsw7XXpXmCkiB5eAhG4h9rSo8Y-mB3p0EDa_l0,12624
101
101
  bbot/modules/filedownload.py,sha256=1prC84wAQO-W1HstitKPQ0-eYEApjzFn3RHFa9oaqLc,8185
102
102
  bbot/modules/fingerprintx.py,sha256=rdlR9d64AntAhbS_eJzh8bZCeLPTJPSKdkdKdhH_qAo,3269
@@ -404,8 +404,8 @@ bbot/wordlists/raft-small-extensions-lowercase_CLEANED.txt,sha256=ruUQwVfia1_m2u
404
404
  bbot/wordlists/top_open_ports_nmap.txt,sha256=LmdFYkfapSxn1pVuQC2LkOIY2hMLgG-Xts7DVtYzweM,42727
405
405
  bbot/wordlists/valid_url_schemes.txt,sha256=VciB-ww0y-O8Ii1wpTR6rJzGDiC2r-dhVsIJApS1ZYU,3309
406
406
  bbot/wordlists/wordninja_dns.txt.gz,sha256=DYHvvfW0TvzrVwyprqODAk4tGOxv5ezNmCPSdPuDUnQ,570241
407
- bbot-2.1.2.5238rc0.dist-info/LICENSE,sha256=GzeCzK17hhQQDNow0_r0L8OfLpeTKQjFQwBQU7ZUymg,32473
408
- bbot-2.1.2.5238rc0.dist-info/METADATA,sha256=HSSIhK3Gqj3mxD3gQoYAgdDlJInB3EdQUIy8eBPmuxQ,17109
409
- bbot-2.1.2.5238rc0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
410
- bbot-2.1.2.5238rc0.dist-info/entry_points.txt,sha256=cWjvcU_lLrzzJgjcjF7yeGuRA_eDS8pQ-kmPUAyOBfo,38
411
- bbot-2.1.2.5238rc0.dist-info/RECORD,,
407
+ bbot-2.1.2.5240rc0.dist-info/LICENSE,sha256=GzeCzK17hhQQDNow0_r0L8OfLpeTKQjFQwBQU7ZUymg,32473
408
+ bbot-2.1.2.5240rc0.dist-info/METADATA,sha256=-25uLYiERzYogB42LWOMPVT4217SpPPlAt7yv00ju5I,17109
409
+ bbot-2.1.2.5240rc0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
410
+ bbot-2.1.2.5240rc0.dist-info/entry_points.txt,sha256=cWjvcU_lLrzzJgjcjF7yeGuRA_eDS8pQ-kmPUAyOBfo,38
411
+ bbot-2.1.2.5240rc0.dist-info/RECORD,,