man-spider 1.1.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
man_spider/lib/util.py CHANGED
@@ -1,12 +1,66 @@
1
1
  import os
2
- import magic
2
+ import re
3
3
  import string
4
4
  import random
5
5
  import logging
6
6
  import ipaddress
7
7
  from pathlib import Path
8
+ from dataclasses import dataclass
9
+ from charset_normalizer import from_bytes
8
10
 
9
- log = logging.getLogger('manspider.util')
11
+ log = logging.getLogger("manspider.util")
12
+
13
+
14
+ @dataclass
15
+ class Target:
16
+ """Represents a target host with optional port."""
17
+
18
+ host: str
19
+ port: int = 445
20
+
21
+ def __str__(self):
22
+ if self.port == 445:
23
+ return self.host
24
+ return f"{self.host}:{self.port}"
25
+
26
+ def __hash__(self):
27
+ return hash((self.host, self.port))
28
+
29
+ def __eq__(self, other):
30
+ if isinstance(other, Target):
31
+ return self.host == other.host and self.port == other.port
32
+ return False
33
+
34
+
35
+ def parse_host_port(s):
36
+ """
37
+ Parse a host:port string. Returns (host, port) tuple.
38
+ Port defaults to 445 if not specified.
39
+ Handles IPv6 addresses in brackets: [::1]:445
40
+ """
41
+ # IPv6 with port: [::1]:445
42
+ ipv6_match = re.match(r"^\[([^\]]+)\]:(\d+)$", s)
43
+ if ipv6_match:
44
+ return ipv6_match.group(1), int(ipv6_match.group(2))
45
+
46
+ # IPv6 without port: [::1] or ::1
47
+ if s.startswith("[") and s.endswith("]"):
48
+ return s[1:-1], 445
49
+ if ":" in s and s.count(":") > 1:
50
+ # Plain IPv6 address (multiple colons, no port)
51
+ return s, 445
52
+
53
+ # IPv4/hostname with port: 192.168.1.1:445 or host.com:445
54
+ if ":" in s:
55
+ host, port_str = s.rsplit(":", 1)
56
+ try:
57
+ return host, int(port_str)
58
+ except ValueError:
59
+ # Not a valid port, treat whole thing as host
60
+ return s, 445
61
+
62
+ # No port specified
63
+ return s, 445
10
64
 
11
65
 
12
66
  def str_to_list(s):
@@ -26,46 +80,51 @@ def str_to_list(s):
26
80
 
27
81
 
28
82
  def make_targets(s):
29
- '''
83
+ """
30
84
  Accepts filename, CIDR, IP, hostname, file, or folder
31
- Returns list of targets as IPs, hostnames, or Path() objects
32
- '''
85
+ Supports host:port syntax (e.g., 192.168.1.1:4455)
86
+ Returns list of targets as Target objects or Path() objects
87
+ """
33
88
 
34
89
  targets = set()
35
90
 
36
91
  p = Path(s)
37
- if s.lower() == 'loot':
38
- targets.add(Path.home() / '.manspider' / 'loot')
92
+ if s.lower() == "loot":
93
+ targets.add(Path.home() / ".manspider" / "loot")
39
94
 
40
95
  elif p.is_dir():
41
96
  targets.add(p)
42
97
 
43
98
  else:
44
99
  for i in str_to_list(s):
100
+ # Parse host:port if present
101
+ host, port = parse_host_port(i)
45
102
  try:
46
- for ip in ipaddress.ip_network(i, strict=False):
47
- targets.add(str(ip))
103
+ # Try to expand as CIDR network
104
+ for ip in ipaddress.ip_network(host, strict=False):
105
+ targets.add(Target(str(ip), port))
48
106
  except ValueError:
49
- targets.add(i)
107
+ # Not a CIDR, treat as hostname
108
+ targets.add(Target(host, port))
50
109
 
51
110
  return list(targets)
52
111
 
53
112
 
54
113
  def human_to_int(h):
55
- '''
114
+ """
56
115
  converts human-readable number to integer
57
116
  e.g. 1K --> 1000
58
- '''
117
+ """
59
118
 
60
119
  if type(h) == int:
61
120
  return h
62
121
 
63
- units = {'': 1, 'K': 1024, 'M': 1024**2, 'G': 1024**3, 'T': 1024**4}
122
+ units = {"": 1, "K": 1024, "M": 1024**2, "G": 1024**3, "T": 1024**4}
64
123
 
65
124
  try:
66
125
  h = h.upper().strip()
67
- i = float(''.join(c for c in h if c in string.digits + '.'))
68
- unit = ''.join([c for c in h if c in units.keys()])
126
+ i = float("".join(c for c in h if c in string.digits + "."))
127
+ unit = "".join([c for c in h if c in units.keys()])
69
128
  except (ValueError, KeyError):
70
129
  raise ValueError(f'Invalid filesize "{h}"')
71
130
 
@@ -73,45 +132,52 @@ def human_to_int(h):
73
132
 
74
133
 
75
134
  def bytes_to_human(_bytes):
76
- '''
135
+ """
77
136
  converts bytes to human-readable filesize
78
137
  e.g. 1024 --> 1KB
79
- '''
138
+ """
80
139
 
81
- sizes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
140
+ sizes = ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB"]
82
141
  units = {}
83
142
  count = 0
84
143
  for size in sizes:
85
144
  units[size] = pow(1024, count)
86
- count +=1
145
+ count += 1
87
146
 
88
147
  for size in sizes:
89
148
  if abs(_bytes) < 1024.0:
90
149
  if size == sizes[0]:
91
150
  _bytes = str(int(_bytes))
92
151
  else:
93
- _bytes = '{:.2f}'.format(_bytes)
94
- return '{}{}'.format(_bytes, size)
152
+ _bytes = "{:.2f}".format(_bytes)
153
+ return "{}{}".format(_bytes, size)
95
154
  _bytes /= 1024
96
155
 
97
156
  raise ValueError
98
157
 
99
158
 
100
159
  def better_decode(b):
160
+ """
161
+ Decode bytes to string using charset-normalizer for encoding detection.
162
+ """
163
+ result = from_bytes(b)
164
+ best = result.best()
101
165
 
102
- # detect encoding with libmagic
103
- m = magic.Magic(mime_encoding=True)
104
- encoding = m.from_buffer(b)
166
+ if best is not None:
167
+ return str(best)
105
168
 
169
+ # Fallback if no encoding detected
106
170
  try:
107
- return b.decode(encoding)
171
+ return b.decode('utf-8', errors='ignore')
108
172
  except Exception:
109
173
  return str(b)[2:-1]
110
174
 
111
175
 
112
176
  def random_string(length):
113
177
 
114
- return ''.join(random.choice(string.ascii_lowercase + string.ascii_uppercase + string.digits) for i in range(length))
178
+ return "".join(
179
+ random.choice(string.ascii_lowercase + string.ascii_uppercase + string.digits) for i in range(length)
180
+ )
115
181
 
116
182
 
117
183
  def list_files(path):
@@ -130,13 +196,13 @@ def list_files(path):
130
196
 
131
197
 
132
198
  def rmdir(directory):
133
- '''
199
+ """
134
200
  Recursively remove directory
135
- '''
201
+ """
136
202
  directory = Path(directory)
137
203
  for item in directory.iterdir():
138
204
  if item.is_dir():
139
205
  rmdir(item)
140
206
  else:
141
207
  item.unlink()
142
- directory.rmdir()
208
+ directory.rmdir()
man_spider/manspider.py CHANGED
@@ -5,55 +5,58 @@ import pathlib
5
5
  import logging
6
6
  import argparse
7
7
  import traceback
8
- from .lib import *
9
8
  from time import sleep
10
9
  import multiprocessing
10
+ from datetime import datetime
11
+
12
+ from man_spider.lib import *
11
13
 
12
14
 
13
15
  # set up logging
14
- log = logging.getLogger('manspider')
16
+ log = logging.getLogger("manspider")
15
17
  log.setLevel(logging.INFO)
16
18
 
17
19
 
18
20
  def go(options):
19
21
 
20
- log.info('MANSPIDER command executed: ' + ' '.join(sys.argv))
22
+ log.info("MANSPIDER command executed: " + " ".join(sys.argv))
21
23
 
22
24
  try:
23
-
24
25
  # warn if --or-logic is enabled
25
26
  if options.or_logic and options.content and not all([type(t) == pathlib.PosixPath for t in options.targets]):
26
- log.warning('WARNING: "--or-logic" causes files to be content-searched even if filename/extension filters do not match!!')
27
+ log.warning(
28
+ 'WARNING: "--or-logic" causes files to be content-searched even if filename/extension filters do not match!!'
29
+ )
27
30
  sleep(2)
28
31
 
29
32
  # exit if no filters were specified
30
33
  if not (options.filenames or options.extensions or options.exclude_extensions or options.content):
31
- log.error('Please specify at least one of --filenames, --content, --extensions, or --exclude-extensions')
34
+ log.error("Please specify at least one of --filenames, --content, --extensions, or --exclude-extensions")
32
35
  return
33
36
 
34
37
  # exit if --maxdepth is invalid
35
38
  if options.maxdepth <= 0:
36
- log.error('--maxdepth must be greater than zero')
39
+ log.error("--maxdepth must be greater than zero")
37
40
  return
38
41
 
39
- log.info(f'Skipping files larger than {bytes_to_human(options.max_filesize)}')
40
- log.info(f'Using {options.threads:,} threads')
42
+ log.info(f"Skipping files larger than {bytes_to_human(options.max_filesize)}")
43
+ log.info(f"Using {options.threads:,} threads")
41
44
 
42
45
  manspider = MANSPIDER(options)
43
46
  manspider.start()
44
47
 
45
48
  except KeyboardInterrupt:
46
- log.critical('Interrupted')
49
+ log.critical("Interrupted")
47
50
 
48
51
  except Exception as e:
49
52
  if log.level <= logging.DEBUG:
50
53
  log.critical(traceback.format_exc())
51
54
  else:
52
- log.critical(f'Critical error (-v to debug): {e}')
55
+ log.critical(f"Critical error (-v to debug): {e}")
53
56
 
54
57
  finally:
55
58
  # make sure temp files are cleaned up before exiting
56
- #rmdir(manspider.tmp_dir)
59
+ # rmdir(manspider.tmp_dir)
57
60
  pass
58
61
 
59
62
 
@@ -61,7 +64,7 @@ def main():
61
64
 
62
65
  interrupted = False
63
66
 
64
- examples = '''
67
+ examples = """
65
68
 
66
69
  # EXAMPLES
67
70
 
@@ -76,39 +79,132 @@ def main():
76
79
 
77
80
  Example 4: Search for finance-related files
78
81
  $ manspider share.evilcorp.local --dirnames bank financ payable payment reconcil remit voucher vendor eft swift -f '[0-9]{5,}' -d evilcorp -u bob -p Passw0rd
79
- '''
80
-
81
- parser = argparse.ArgumentParser(description='Scan for juicy data on SMB shares. Matching files and logs are stored in $HOME/.manspider. All filters are case-insensitive.')
82
- parser.add_argument('targets', nargs='+', type=make_targets, help='IPs, Hostnames, CIDR ranges, or files containing targets to spider (NOTE: local searching also supported, specify directory name or keyword "loot" to search downloaded files)')
83
- parser.add_argument('-u', '--username', default='', help='username for authentication')
84
- parser.add_argument('-p', '--password', default='', help='password for authentication')
85
- parser.add_argument('-d', '--domain', default='', help='domain for authentication')
86
- parser.add_argument('-l','--loot-dir', default='', help='loot directory (default ~/.manspider/)')
87
- parser.add_argument('-m', '--maxdepth', type=int, default=10, help='maximum depth to spider (default: 10)')
88
- parser.add_argument('-H', '--hash', default='', help='NTLM hash for authentication')
89
- parser.add_argument('-k', '--kerberos', action='store_true', help='Use Kerberos authentication. Grabs credentials from ccache file (KRB5CCNAME) based on target parameters')
90
- parser.add_argument('-aesKey', '--aes-key', action='store', metavar='HEX', help='AES key to use for Kerberos Authentication (128 or 256 bits)')
91
- parser.add_argument('-dc-ip', '--dc-ip', action='store', metavar='IP', help='IP Address of the domain controller. If omitted it will use the domain part (FQDN) specified in the target parameter')
92
- parser.add_argument('-t', '--threads', type=int, default=5, help='concurrent threads (default: 5)')
93
- parser.add_argument('-f', '--filenames', nargs='+', default=[], help=f'filter filenames using regex (space-separated)', metavar='REGEX')
94
- parser.add_argument('-e', '--extensions',nargs='+', default=[], help='only show filenames with these extensions (space-separated, e.g. `docx xlsx` for only word & excel docs)', metavar='EXT')
95
- parser.add_argument('--exclude-extensions',nargs='+', default=[], help='ignore files with these extensions', metavar='EXT')
96
- parser.add_argument('-c', '--content', nargs='+', default=[], help='search for file content using regex (multiple supported)', metavar='REGEX')
97
- parser.add_argument('--sharenames', nargs='+', default=[], help='only search shares with these names (multiple supported)', metavar='SHARE')
98
- parser.add_argument('--exclude-sharenames', nargs='*', default=['IPC$', 'C$', 'ADMIN$', 'PRINT$'],help='don\'t search shares with these names (multiple supported)', metavar='SHARE')
99
- parser.add_argument('--dirnames', nargs='+', default=[], help='only search directories containing these strings (multiple supported)', metavar='DIR')
100
- parser.add_argument('--exclude-dirnames', nargs='+', default=[], help='don\'t search directories containing these strings (multiple supported)', metavar='DIR')
101
- parser.add_argument('-q', '--quiet', action='store_true', help='don\'t display matching file content')
102
- parser.add_argument('-n', '--no-download', action='store_true', help='don\'t download matching files')
103
- parser.add_argument('-mfail', '--max-failed-logons', type=int, help='limit failed logons', metavar='INT')
104
- parser.add_argument('-o', '--or-logic', action='store_true', help=f'use OR logic instead of AND (files are downloaded if filename OR extension OR content match)')
105
- parser.add_argument('-s', '--max-filesize', type=human_to_int, default=human_to_int('10M'), help=f'don\'t retrieve files over this size, e.g. "500K" or ".5M" (default: 10M)', metavar='SIZE')
106
- parser.add_argument('-v', '--verbose', action='store_true', help='show debugging messages')
107
-
82
+ """
83
+
84
+ parser = argparse.ArgumentParser(
85
+ description="Scan for juicy data on SMB shares. Matching files and logs are stored in $HOME/.manspider. All filters are case-insensitive."
86
+ )
87
+ parser.add_argument(
88
+ "targets",
89
+ nargs="+",
90
+ type=make_targets,
91
+ help='IPs, Hostnames, CIDR ranges, or files containing targets to spider (NOTE: local searching also supported, specify directory name or keyword "loot" to search downloaded files)',
92
+ )
93
+ parser.add_argument("-u", "--username", default="", help="username for authentication")
94
+ parser.add_argument("-p", "--password", default="", help="password for authentication")
95
+ parser.add_argument("-d", "--domain", default="", help="domain for authentication")
96
+ parser.add_argument("-l", "--loot-dir", default="", help="loot directory (default ~/.manspider/)")
97
+ parser.add_argument("-m", "--maxdepth", type=int, default=10, help="maximum depth to spider (default: 10)")
98
+ parser.add_argument("-H", "--hash", default="", help="NTLM hash for authentication")
99
+ parser.add_argument(
100
+ "-k",
101
+ "--kerberos",
102
+ action="store_true",
103
+ help="Use Kerberos authentication. Grabs credentials from ccache file (KRB5CCNAME) based on target parameters",
104
+ )
105
+ parser.add_argument(
106
+ "-aesKey",
107
+ "--aes-key",
108
+ action="store",
109
+ metavar="HEX",
110
+ help="AES key to use for Kerberos Authentication (128 or 256 bits)",
111
+ )
112
+ parser.add_argument(
113
+ "-dc-ip",
114
+ "--dc-ip",
115
+ action="store",
116
+ metavar="IP",
117
+ help="IP Address of the domain controller. If omitted it will use the domain part (FQDN) specified in the target parameter",
118
+ )
119
+ parser.add_argument("-t", "--threads", type=int, default=5, help="concurrent threads (default: 5)")
120
+ parser.add_argument(
121
+ "-f",
122
+ "--filenames",
123
+ nargs="+",
124
+ default=[],
125
+ help="filter filenames using regex (space-separated)",
126
+ metavar="REGEX",
127
+ )
128
+ parser.add_argument(
129
+ "-e",
130
+ "--extensions",
131
+ nargs="+",
132
+ default=[],
133
+ help="only show filenames with these extensions (space-separated, e.g. `docx xlsx` for only word & excel docs)",
134
+ metavar="EXT",
135
+ )
136
+ parser.add_argument(
137
+ "--exclude-extensions", nargs="+", default=[], help="ignore files with these extensions", metavar="EXT"
138
+ )
139
+ parser.add_argument(
140
+ "-c",
141
+ "--content",
142
+ nargs="+",
143
+ default=[],
144
+ help="search for file content using regex (multiple supported)",
145
+ metavar="REGEX",
146
+ )
147
+ parser.add_argument(
148
+ "--sharenames",
149
+ nargs="+",
150
+ default=[],
151
+ help="only search shares with these names (multiple supported)",
152
+ metavar="SHARE",
153
+ )
154
+ parser.add_argument(
155
+ "--exclude-sharenames",
156
+ nargs="*",
157
+ default=["IPC$", "C$", "ADMIN$", "PRINT$"],
158
+ help="don't search shares with these names (multiple supported)",
159
+ metavar="SHARE",
160
+ )
161
+ parser.add_argument(
162
+ "--dirnames",
163
+ nargs="+",
164
+ default=[],
165
+ help="only search directories containing these strings (multiple supported)",
166
+ metavar="DIR",
167
+ )
168
+ parser.add_argument(
169
+ "--exclude-dirnames",
170
+ nargs="+",
171
+ default=[],
172
+ help="don't search directories containing these strings (multiple supported)",
173
+ metavar="DIR",
174
+ )
175
+ parser.add_argument("-q", "--quiet", action="store_true", help="don't display matching file content")
176
+ parser.add_argument("-n", "--no-download", action="store_true", help="don't download matching files")
177
+ parser.add_argument("-mfail", "--max-failed-logons", type=int, help="limit failed logons", metavar="INT")
178
+ parser.add_argument(
179
+ "-o",
180
+ "--or-logic",
181
+ action="store_true",
182
+ help="use OR logic instead of AND (files are downloaded if filename OR extension OR content match)",
183
+ )
184
+ parser.add_argument(
185
+ "-s",
186
+ "--max-filesize",
187
+ type=human_to_int,
188
+ default=human_to_int("10M"),
189
+ help='don\'t retrieve files over this size, e.g. "500K" or ".5M" (default: 10M)',
190
+ metavar="SIZE",
191
+ )
192
+ parser.add_argument("-v", "--verbose", action="store_true", help="show debugging messages")
193
+ parser.add_argument(
194
+ "--modified-after",
195
+ type=str,
196
+ metavar="DATE",
197
+ help="only show files modified after this date (format: YYYY-MM-DD)",
198
+ )
199
+ parser.add_argument(
200
+ "--modified-before",
201
+ type=str,
202
+ metavar="DATE",
203
+ help="only show files modified before this date (format: YYYY-MM-DD)",
204
+ )
108
205
 
109
206
  syntax_error = False
110
207
  try:
111
-
112
208
  if len(sys.argv) == 1:
113
209
  parser.print_help()
114
210
  sys.exit(1)
@@ -116,22 +212,41 @@ def main():
116
212
  options = parser.parse_args()
117
213
 
118
214
  if options.verbose:
119
- log.setLevel('DEBUG')
215
+ log.setLevel("DEBUG")
120
216
 
121
- if options.kerberos and not "KRB5CCNAME" in os.environ:
217
+ if options.kerberos and "KRB5CCNAME" not in os.environ:
122
218
  log.error("KRB5CCNAME is not set in the environment")
123
219
  sys.exit(1)
124
220
 
221
+ # Parse date filters
222
+ if options.modified_after:
223
+ try:
224
+ options.modified_after = datetime.strptime(options.modified_after, "%Y-%m-%d")
225
+ except ValueError:
226
+ log.error("Invalid date format for --modified-after. Use YYYY-MM-DD")
227
+ sys.exit(1)
228
+ else:
229
+ options.modified_after = None
230
+
231
+ if options.modified_before:
232
+ try:
233
+ options.modified_before = datetime.strptime(options.modified_before, "%Y-%m-%d")
234
+ except ValueError:
235
+ log.error("Invalid date format for --modified-before. Use YYYY-MM-DD")
236
+ sys.exit(1)
237
+ else:
238
+ options.modified_before = None
239
+
125
240
  # make sure extension formats are valid
126
241
  for i, extension in enumerate(options.extensions):
127
- if extension and not extension.startswith('.'):
128
- extension = f'.{extension}'
242
+ if extension and not extension.startswith("."):
243
+ extension = f".{extension}"
129
244
  options.extensions[i] = extension.lower()
130
245
 
131
246
  # make sure extension blacklist is valid
132
247
  for i, extension in enumerate(options.exclude_extensions):
133
- if not extension.startswith('.'):
134
- extension = f'.{extension}'
248
+ if not extension.startswith("."):
249
+ extension = f".{extension}"
135
250
  options.exclude_extensions[i] = extension.lower()
136
251
 
137
252
  # lowercase share names
@@ -154,11 +269,11 @@ def main():
154
269
  except argparse.ArgumentError as e:
155
270
  syntax_error = True
156
271
  log.error(e)
157
- log.error('Check your syntax')
272
+ log.error("Check your syntax")
158
273
  sys.exit(2)
159
274
 
160
275
  except KeyboardInterrupt:
161
- log.critical('Interrupted')
276
+ log.critical("Interrupted")
162
277
  sys.exit(1)
163
278
 
164
279
  # pretty format all errors if we're not debugging
@@ -166,10 +281,10 @@ def main():
166
281
  if log.level <= logging.DEBUG:
167
282
  log.critical(traceback.format_exc())
168
283
  else:
169
- log.critical(f'Critical error (-v to debug): {e}')
284
+ log.critical(f"Critical error (-v to debug): {e}")
170
285
 
171
286
  finally:
172
- if '-h' in sys.argv or '--help' in sys.argv or len(sys.argv) == 1 or syntax_error:
287
+ if "-h" in sys.argv or "--help" in sys.argv or len(sys.argv) == 1 or syntax_error:
173
288
  print(examples)
174
289
  sleep(1)
175
290
  try:
@@ -184,5 +299,5 @@ def main():
184
299
  pass
185
300
 
186
301
 
187
- if __name__ == '__main__':
188
- main()
302
+ if __name__ == "__main__":
303
+ main()