man-spider 1.1.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
man_spider/lib/util.py CHANGED
@@ -1,12 +1,66 @@
1
1
  import os
2
- import magic
2
+ import re
3
3
  import string
4
4
  import random
5
5
  import logging
6
6
  import ipaddress
7
7
  from pathlib import Path
8
+ from dataclasses import dataclass
9
+ from charset_normalizer import from_bytes
8
10
 
9
- log = logging.getLogger('manspider.util')
11
+ log = logging.getLogger("manspider.util")
12
+
13
+
14
+ @dataclass
15
+ class Target:
16
+ """Represents a target host with optional port."""
17
+
18
+ host: str
19
+ port: int = 445
20
+
21
+ def __str__(self):
22
+ if self.port == 445:
23
+ return self.host
24
+ return f"{self.host}:{self.port}"
25
+
26
+ def __hash__(self):
27
+ return hash((self.host, self.port))
28
+
29
+ def __eq__(self, other):
30
+ if isinstance(other, Target):
31
+ return self.host == other.host and self.port == other.port
32
+ return False
33
+
34
+
35
+ def parse_host_port(s):
36
+ """
37
+ Parse a host:port string. Returns (host, port) tuple.
38
+ Port defaults to 445 if not specified.
39
+ Handles IPv6 addresses in brackets: [::1]:445
40
+ """
41
+ # IPv6 with port: [::1]:445
42
+ ipv6_match = re.match(r"^\[([^\]]+)\]:(\d+)$", s)
43
+ if ipv6_match:
44
+ return ipv6_match.group(1), int(ipv6_match.group(2))
45
+
46
+ # IPv6 without port: [::1] or ::1
47
+ if s.startswith("[") and s.endswith("]"):
48
+ return s[1:-1], 445
49
+ if ":" in s and s.count(":") > 1:
50
+ # Plain IPv6 address (multiple colons, no port)
51
+ return s, 445
52
+
53
+ # IPv4/hostname with port: 192.168.1.1:445 or host.com:445
54
+ if ":" in s:
55
+ host, port_str = s.rsplit(":", 1)
56
+ try:
57
+ return host, int(port_str)
58
+ except ValueError:
59
+ # Not a valid port, treat whole thing as host
60
+ return s, 445
61
+
62
+ # No port specified
63
+ return s, 445
10
64
 
11
65
 
12
66
  def str_to_list(s):
@@ -26,46 +80,51 @@ def str_to_list(s):
26
80
 
27
81
 
28
82
  def make_targets(s):
29
- '''
83
+ """
30
84
  Accepts filename, CIDR, IP, hostname, file, or folder
31
- Returns list of targets as IPs, hostnames, or Path() objects
32
- '''
85
+ Supports host:port syntax (e.g., 192.168.1.1:4455)
86
+ Returns list of targets as Target objects or Path() objects
87
+ """
33
88
 
34
89
  targets = set()
35
90
 
36
91
  p = Path(s)
37
- if s.lower() == 'loot':
38
- targets.add(Path.home() / '.manspider' / 'loot')
92
+ if s.lower() == "loot":
93
+ targets.add(Path.home() / ".manspider" / "loot")
39
94
 
40
95
  elif p.is_dir():
41
96
  targets.add(p)
42
97
 
43
98
  else:
44
99
  for i in str_to_list(s):
100
+ # Parse host:port if present
101
+ host, port = parse_host_port(i)
45
102
  try:
46
- for ip in ipaddress.ip_network(i, strict=False):
47
- targets.add(str(ip))
103
+ # Try to expand as CIDR network
104
+ for ip in ipaddress.ip_network(host, strict=False):
105
+ targets.add(Target(str(ip), port))
48
106
  except ValueError:
49
- targets.add(i)
107
+ # Not a CIDR, treat as hostname
108
+ targets.add(Target(host, port))
50
109
 
51
110
  return list(targets)
52
111
 
53
112
 
54
113
  def human_to_int(h):
55
- '''
114
+ """
56
115
  converts human-readable number to integer
57
116
  e.g. 1K --> 1000
58
- '''
117
+ """
59
118
 
60
119
  if type(h) == int:
61
120
  return h
62
121
 
63
- units = {'': 1, 'K': 1024, 'M': 1024**2, 'G': 1024**3, 'T': 1024**4}
122
+ units = {"": 1, "K": 1024, "M": 1024**2, "G": 1024**3, "T": 1024**4}
64
123
 
65
124
  try:
66
125
  h = h.upper().strip()
67
- i = float(''.join(c for c in h if c in string.digits + '.'))
68
- unit = ''.join([c for c in h if c in units.keys()])
126
+ i = float("".join(c for c in h if c in string.digits + "."))
127
+ unit = "".join([c for c in h if c in units.keys()])
69
128
  except (ValueError, KeyError):
70
129
  raise ValueError(f'Invalid filesize "{h}"')
71
130
 
@@ -73,45 +132,52 @@ def human_to_int(h):
73
132
 
74
133
 
75
134
  def bytes_to_human(_bytes):
76
- '''
135
+ """
77
136
  converts bytes to human-readable filesize
78
137
  e.g. 1024 --> 1KB
79
- '''
138
+ """
80
139
 
81
- sizes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
140
+ sizes = ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB"]
82
141
  units = {}
83
142
  count = 0
84
143
  for size in sizes:
85
144
  units[size] = pow(1024, count)
86
- count +=1
145
+ count += 1
87
146
 
88
147
  for size in sizes:
89
148
  if abs(_bytes) < 1024.0:
90
149
  if size == sizes[0]:
91
150
  _bytes = str(int(_bytes))
92
151
  else:
93
- _bytes = '{:.2f}'.format(_bytes)
94
- return '{}{}'.format(_bytes, size)
152
+ _bytes = "{:.2f}".format(_bytes)
153
+ return "{}{}".format(_bytes, size)
95
154
  _bytes /= 1024
96
155
 
97
156
  raise ValueError
98
157
 
99
158
 
100
159
  def better_decode(b):
160
+ """
161
+ Decode bytes to string using charset-normalizer for encoding detection.
162
+ """
163
+ result = from_bytes(b)
164
+ best = result.best()
101
165
 
102
- # detect encoding with libmagic
103
- m = magic.Magic(mime_encoding=True)
104
- encoding = m.from_buffer(b)
166
+ if best is not None:
167
+ return str(best)
105
168
 
169
+ # Fallback if no encoding detected
106
170
  try:
107
- return b.decode(encoding)
171
+ return b.decode('utf-8', errors='ignore')
108
172
  except Exception:
109
173
  return str(b)[2:-1]
110
174
 
111
175
 
112
176
  def random_string(length):
113
177
 
114
- return ''.join(random.choice(string.ascii_lowercase + string.ascii_uppercase + string.digits) for i in range(length))
178
+ return "".join(
179
+ random.choice(string.ascii_lowercase + string.ascii_uppercase + string.digits) for i in range(length)
180
+ )
115
181
 
116
182
 
117
183
  def list_files(path):
@@ -130,13 +196,13 @@ def list_files(path):
130
196
 
131
197
 
132
198
  def rmdir(directory):
133
- '''
199
+ """
134
200
  Recursively remove directory
135
- '''
201
+ """
136
202
  directory = Path(directory)
137
203
  for item in directory.iterdir():
138
204
  if item.is_dir():
139
205
  rmdir(item)
140
206
  else:
141
207
  item.unlink()
142
- directory.rmdir()
208
+ directory.rmdir()
man_spider/manspider.py CHANGED
@@ -7,54 +7,56 @@ import argparse
7
7
  import traceback
8
8
  from time import sleep
9
9
  import multiprocessing
10
+ from datetime import datetime
10
11
 
11
12
  from man_spider.lib import *
12
13
 
13
14
 
14
15
  # set up logging
15
- log = logging.getLogger('manspider')
16
+ log = logging.getLogger("manspider")
16
17
  log.setLevel(logging.INFO)
17
18
 
18
19
 
19
20
  def go(options):
20
21
 
21
- log.info('MANSPIDER command executed: ' + ' '.join(sys.argv))
22
+ log.info("MANSPIDER command executed: " + " ".join(sys.argv))
22
23
 
23
24
  try:
24
-
25
25
  # warn if --or-logic is enabled
26
26
  if options.or_logic and options.content and not all([type(t) == pathlib.PosixPath for t in options.targets]):
27
- log.warning('WARNING: "--or-logic" causes files to be content-searched even if filename/extension filters do not match!!')
27
+ log.warning(
28
+ 'WARNING: "--or-logic" causes files to be content-searched even if filename/extension filters do not match!!'
29
+ )
28
30
  sleep(2)
29
31
 
30
32
  # exit if no filters were specified
31
33
  if not (options.filenames or options.extensions or options.exclude_extensions or options.content):
32
- log.error('Please specify at least one of --filenames, --content, --extensions, or --exclude-extensions')
34
+ log.error("Please specify at least one of --filenames, --content, --extensions, or --exclude-extensions")
33
35
  return
34
36
 
35
37
  # exit if --maxdepth is invalid
36
38
  if options.maxdepth <= 0:
37
- log.error('--maxdepth must be greater than zero')
39
+ log.error("--maxdepth must be greater than zero")
38
40
  return
39
41
 
40
- log.info(f'Skipping files larger than {bytes_to_human(options.max_filesize)}')
41
- log.info(f'Using {options.threads:,} threads')
42
+ log.info(f"Skipping files larger than {bytes_to_human(options.max_filesize)}")
43
+ log.info(f"Using {options.threads:,} threads")
42
44
 
43
45
  manspider = MANSPIDER(options)
44
46
  manspider.start()
45
47
 
46
48
  except KeyboardInterrupt:
47
- log.critical('Interrupted')
49
+ log.critical("Interrupted")
48
50
 
49
51
  except Exception as e:
50
52
  if log.level <= logging.DEBUG:
51
53
  log.critical(traceback.format_exc())
52
54
  else:
53
- log.critical(f'Critical error (-v to debug): {e}')
55
+ log.critical(f"Critical error (-v to debug): {e}")
54
56
 
55
57
  finally:
56
58
  # make sure temp files are cleaned up before exiting
57
- #rmdir(manspider.tmp_dir)
59
+ # rmdir(manspider.tmp_dir)
58
60
  pass
59
61
 
60
62
 
@@ -62,7 +64,7 @@ def main():
62
64
 
63
65
  interrupted = False
64
66
 
65
- examples = '''
67
+ examples = """
66
68
 
67
69
  # EXAMPLES
68
70
 
@@ -77,39 +79,132 @@ def main():
77
79
 
78
80
  Example 4: Search for finance-related files
79
81
  $ manspider share.evilcorp.local --dirnames bank financ payable payment reconcil remit voucher vendor eft swift -f '[0-9]{5,}' -d evilcorp -u bob -p Passw0rd
80
- '''
81
-
82
- parser = argparse.ArgumentParser(description='Scan for juicy data on SMB shares. Matching files and logs are stored in $HOME/.manspider. All filters are case-insensitive.')
83
- parser.add_argument('targets', nargs='+', type=make_targets, help='IPs, Hostnames, CIDR ranges, or files containing targets to spider (NOTE: local searching also supported, specify directory name or keyword "loot" to search downloaded files)')
84
- parser.add_argument('-u', '--username', default='', help='username for authentication')
85
- parser.add_argument('-p', '--password', default='', help='password for authentication')
86
- parser.add_argument('-d', '--domain', default='', help='domain for authentication')
87
- parser.add_argument('-l','--loot-dir', default='', help='loot directory (default ~/.manspider/)')
88
- parser.add_argument('-m', '--maxdepth', type=int, default=10, help='maximum depth to spider (default: 10)')
89
- parser.add_argument('-H', '--hash', default='', help='NTLM hash for authentication')
90
- parser.add_argument('-k', '--kerberos', action='store_true', help='Use Kerberos authentication. Grabs credentials from ccache file (KRB5CCNAME) based on target parameters')
91
- parser.add_argument('-aesKey', '--aes-key', action='store', metavar='HEX', help='AES key to use for Kerberos Authentication (128 or 256 bits)')
92
- parser.add_argument('-dc-ip', '--dc-ip', action='store', metavar='IP', help='IP Address of the domain controller. If omitted it will use the domain part (FQDN) specified in the target parameter')
93
- parser.add_argument('-t', '--threads', type=int, default=5, help='concurrent threads (default: 5)')
94
- parser.add_argument('-f', '--filenames', nargs='+', default=[], help=f'filter filenames using regex (space-separated)', metavar='REGEX')
95
- parser.add_argument('-e', '--extensions',nargs='+', default=[], help='only show filenames with these extensions (space-separated, e.g. `docx xlsx` for only word & excel docs)', metavar='EXT')
96
- parser.add_argument('--exclude-extensions',nargs='+', default=[], help='ignore files with these extensions', metavar='EXT')
97
- parser.add_argument('-c', '--content', nargs='+', default=[], help='search for file content using regex (multiple supported)', metavar='REGEX')
98
- parser.add_argument('--sharenames', nargs='+', default=[], help='only search shares with these names (multiple supported)', metavar='SHARE')
99
- parser.add_argument('--exclude-sharenames', nargs='*', default=['IPC$', 'C$', 'ADMIN$', 'PRINT$'],help='don\'t search shares with these names (multiple supported)', metavar='SHARE')
100
- parser.add_argument('--dirnames', nargs='+', default=[], help='only search directories containing these strings (multiple supported)', metavar='DIR')
101
- parser.add_argument('--exclude-dirnames', nargs='+', default=[], help='don\'t search directories containing these strings (multiple supported)', metavar='DIR')
102
- parser.add_argument('-q', '--quiet', action='store_true', help='don\'t display matching file content')
103
- parser.add_argument('-n', '--no-download', action='store_true', help='don\'t download matching files')
104
- parser.add_argument('-mfail', '--max-failed-logons', type=int, help='limit failed logons', metavar='INT')
105
- parser.add_argument('-o', '--or-logic', action='store_true', help=f'use OR logic instead of AND (files are downloaded if filename OR extension OR content match)')
106
- parser.add_argument('-s', '--max-filesize', type=human_to_int, default=human_to_int('10M'), help=f'don\'t retrieve files over this size, e.g. "500K" or ".5M" (default: 10M)', metavar='SIZE')
107
- parser.add_argument('-v', '--verbose', action='store_true', help='show debugging messages')
108
-
82
+ """
83
+
84
+ parser = argparse.ArgumentParser(
85
+ description="Scan for juicy data on SMB shares. Matching files and logs are stored in $HOME/.manspider. All filters are case-insensitive."
86
+ )
87
+ parser.add_argument(
88
+ "targets",
89
+ nargs="+",
90
+ type=make_targets,
91
+ help='IPs, Hostnames, CIDR ranges, or files containing targets to spider (NOTE: local searching also supported, specify directory name or keyword "loot" to search downloaded files)',
92
+ )
93
+ parser.add_argument("-u", "--username", default="", help="username for authentication")
94
+ parser.add_argument("-p", "--password", default="", help="password for authentication")
95
+ parser.add_argument("-d", "--domain", default="", help="domain for authentication")
96
+ parser.add_argument("-l", "--loot-dir", default="", help="loot directory (default ~/.manspider/)")
97
+ parser.add_argument("-m", "--maxdepth", type=int, default=10, help="maximum depth to spider (default: 10)")
98
+ parser.add_argument("-H", "--hash", default="", help="NTLM hash for authentication")
99
+ parser.add_argument(
100
+ "-k",
101
+ "--kerberos",
102
+ action="store_true",
103
+ help="Use Kerberos authentication. Grabs credentials from ccache file (KRB5CCNAME) based on target parameters",
104
+ )
105
+ parser.add_argument(
106
+ "-aesKey",
107
+ "--aes-key",
108
+ action="store",
109
+ metavar="HEX",
110
+ help="AES key to use for Kerberos Authentication (128 or 256 bits)",
111
+ )
112
+ parser.add_argument(
113
+ "-dc-ip",
114
+ "--dc-ip",
115
+ action="store",
116
+ metavar="IP",
117
+ help="IP Address of the domain controller. If omitted it will use the domain part (FQDN) specified in the target parameter",
118
+ )
119
+ parser.add_argument("-t", "--threads", type=int, default=5, help="concurrent threads (default: 5)")
120
+ parser.add_argument(
121
+ "-f",
122
+ "--filenames",
123
+ nargs="+",
124
+ default=[],
125
+ help="filter filenames using regex (space-separated)",
126
+ metavar="REGEX",
127
+ )
128
+ parser.add_argument(
129
+ "-e",
130
+ "--extensions",
131
+ nargs="+",
132
+ default=[],
133
+ help="only show filenames with these extensions (space-separated, e.g. `docx xlsx` for only word & excel docs)",
134
+ metavar="EXT",
135
+ )
136
+ parser.add_argument(
137
+ "--exclude-extensions", nargs="+", default=[], help="ignore files with these extensions", metavar="EXT"
138
+ )
139
+ parser.add_argument(
140
+ "-c",
141
+ "--content",
142
+ nargs="+",
143
+ default=[],
144
+ help="search for file content using regex (multiple supported)",
145
+ metavar="REGEX",
146
+ )
147
+ parser.add_argument(
148
+ "--sharenames",
149
+ nargs="+",
150
+ default=[],
151
+ help="only search shares with these names (multiple supported)",
152
+ metavar="SHARE",
153
+ )
154
+ parser.add_argument(
155
+ "--exclude-sharenames",
156
+ nargs="*",
157
+ default=["IPC$", "C$", "ADMIN$", "PRINT$"],
158
+ help="don't search shares with these names (multiple supported)",
159
+ metavar="SHARE",
160
+ )
161
+ parser.add_argument(
162
+ "--dirnames",
163
+ nargs="+",
164
+ default=[],
165
+ help="only search directories containing these strings (multiple supported)",
166
+ metavar="DIR",
167
+ )
168
+ parser.add_argument(
169
+ "--exclude-dirnames",
170
+ nargs="+",
171
+ default=[],
172
+ help="don't search directories containing these strings (multiple supported)",
173
+ metavar="DIR",
174
+ )
175
+ parser.add_argument("-q", "--quiet", action="store_true", help="don't display matching file content")
176
+ parser.add_argument("-n", "--no-download", action="store_true", help="don't download matching files")
177
+ parser.add_argument("-mfail", "--max-failed-logons", type=int, help="limit failed logons", metavar="INT")
178
+ parser.add_argument(
179
+ "-o",
180
+ "--or-logic",
181
+ action="store_true",
182
+ help="use OR logic instead of AND (files are downloaded if filename OR extension OR content match)",
183
+ )
184
+ parser.add_argument(
185
+ "-s",
186
+ "--max-filesize",
187
+ type=human_to_int,
188
+ default=human_to_int("10M"),
189
+ help='don\'t retrieve files over this size, e.g. "500K" or ".5M" (default: 10M)',
190
+ metavar="SIZE",
191
+ )
192
+ parser.add_argument("-v", "--verbose", action="store_true", help="show debugging messages")
193
+ parser.add_argument(
194
+ "--modified-after",
195
+ type=str,
196
+ metavar="DATE",
197
+ help="only show files modified after this date (format: YYYY-MM-DD)",
198
+ )
199
+ parser.add_argument(
200
+ "--modified-before",
201
+ type=str,
202
+ metavar="DATE",
203
+ help="only show files modified before this date (format: YYYY-MM-DD)",
204
+ )
109
205
 
110
206
  syntax_error = False
111
207
  try:
112
-
113
208
  if len(sys.argv) == 1:
114
209
  parser.print_help()
115
210
  sys.exit(1)
@@ -117,22 +212,41 @@ def main():
117
212
  options = parser.parse_args()
118
213
 
119
214
  if options.verbose:
120
- log.setLevel('DEBUG')
215
+ log.setLevel("DEBUG")
121
216
 
122
- if options.kerberos and not "KRB5CCNAME" in os.environ:
217
+ if options.kerberos and "KRB5CCNAME" not in os.environ:
123
218
  log.error("KRB5CCNAME is not set in the environment")
124
219
  sys.exit(1)
125
220
 
221
+ # Parse date filters
222
+ if options.modified_after:
223
+ try:
224
+ options.modified_after = datetime.strptime(options.modified_after, "%Y-%m-%d")
225
+ except ValueError:
226
+ log.error("Invalid date format for --modified-after. Use YYYY-MM-DD")
227
+ sys.exit(1)
228
+ else:
229
+ options.modified_after = None
230
+
231
+ if options.modified_before:
232
+ try:
233
+ options.modified_before = datetime.strptime(options.modified_before, "%Y-%m-%d")
234
+ except ValueError:
235
+ log.error("Invalid date format for --modified-before. Use YYYY-MM-DD")
236
+ sys.exit(1)
237
+ else:
238
+ options.modified_before = None
239
+
126
240
  # make sure extension formats are valid
127
241
  for i, extension in enumerate(options.extensions):
128
- if extension and not extension.startswith('.'):
129
- extension = f'.{extension}'
242
+ if extension and not extension.startswith("."):
243
+ extension = f".{extension}"
130
244
  options.extensions[i] = extension.lower()
131
245
 
132
246
  # make sure extension blacklist is valid
133
247
  for i, extension in enumerate(options.exclude_extensions):
134
- if not extension.startswith('.'):
135
- extension = f'.{extension}'
248
+ if not extension.startswith("."):
249
+ extension = f".{extension}"
136
250
  options.exclude_extensions[i] = extension.lower()
137
251
 
138
252
  # lowercase share names
@@ -155,11 +269,11 @@ def main():
155
269
  except argparse.ArgumentError as e:
156
270
  syntax_error = True
157
271
  log.error(e)
158
- log.error('Check your syntax')
272
+ log.error("Check your syntax")
159
273
  sys.exit(2)
160
274
 
161
275
  except KeyboardInterrupt:
162
- log.critical('Interrupted')
276
+ log.critical("Interrupted")
163
277
  sys.exit(1)
164
278
 
165
279
  # pretty format all errors if we're not debugging
@@ -167,10 +281,10 @@ def main():
167
281
  if log.level <= logging.DEBUG:
168
282
  log.critical(traceback.format_exc())
169
283
  else:
170
- log.critical(f'Critical error (-v to debug): {e}')
284
+ log.critical(f"Critical error (-v to debug): {e}")
171
285
 
172
286
  finally:
173
- if '-h' in sys.argv or '--help' in sys.argv or len(sys.argv) == 1 or syntax_error:
287
+ if "-h" in sys.argv or "--help" in sys.argv or len(sys.argv) == 1 or syntax_error:
174
288
  print(examples)
175
289
  sleep(1)
176
290
  try:
@@ -185,5 +299,5 @@ def main():
185
299
  pass
186
300
 
187
301
 
188
- if __name__ == '__main__':
189
- main()
302
+ if __name__ == "__main__":
303
+ main()