man-spider 1.1.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,62 +1,62 @@
1
1
  import string
2
2
  import logging
3
3
  import pathlib
4
- from .smb import *
5
- from .file import *
6
- from .util import *
7
- from .errors import *
8
4
  import multiprocessing
9
5
  from shutil import move
10
- from .processpool import *
11
6
  from traceback import format_exc
7
+ from datetime import datetime
12
8
 
9
+ from man_spider.lib.smb import *
10
+ from man_spider.lib.file import *
11
+ from man_spider.lib.util import *
12
+ from man_spider.lib.errors import *
13
+ from man_spider.lib.processpool import *
13
14
 
14
- log = logging.getLogger('manspider.spiderling')
15
+
16
+ log = logging.getLogger("manspider.spiderling")
15
17
 
16
18
 
17
19
  class SpiderlingMessage:
18
- '''
20
+ """
19
21
  Message which gets sent back to the parent through parent_queue
20
- '''
22
+ """
21
23
 
22
24
  def __init__(self, message_type, target, content):
23
- '''
25
+ """
24
26
  "message_type" is a string, and can be:
25
27
  "e" - error
26
28
  "a" - authentication failure
27
- '''
29
+ """
28
30
  self.type = message_type
29
31
  self.target = target
30
32
  self.content = content
31
33
 
32
34
 
33
-
34
35
  class Spiderling:
35
- '''
36
+ """
36
37
  Enumerates SMB shares and spiders all possible directories/filenames up to maxdepth
37
38
  Designed to be threadable
38
- '''
39
+ """
39
40
 
40
41
  # these extensions don't get parsed for content, unless explicitly specified
41
42
  dont_parse = [
42
- '.png',
43
- '.gif',
44
- '.tiff',
45
- '.msi',
46
- '.bmp',
47
- '.jpg',
48
- '.jpeg',
49
- '.zip',
50
- '.gz',
51
- '.bz2',
52
- '.7z',
53
- '.xz',
43
+ ".png",
44
+ ".gif",
45
+ ".tiff",
46
+ ".msi",
47
+ ".bmp",
48
+ ".jpg",
49
+ ".jpeg",
50
+ ".zip",
51
+ ".gz",
52
+ ".bz2",
53
+ ".7z",
54
+ ".xz",
54
55
  ]
55
56
 
56
57
  def __init__(self, target, parent):
57
58
 
58
59
  try:
59
-
60
60
  self.parent = parent
61
61
  self.target = target
62
62
 
@@ -69,19 +69,20 @@ class Spiderling:
69
69
  self.local = False
70
70
 
71
71
  self.smb_client = SMBClient(
72
- target,
72
+ target.host,
73
73
  parent.username,
74
74
  parent.password,
75
75
  parent.domain,
76
76
  parent.nthash,
77
77
  parent.use_kerberos,
78
78
  parent.aes_key,
79
- parent.dc_ip
79
+ parent.dc_ip,
80
+ port=target.port,
80
81
  )
81
82
 
82
83
  logon_result = self.smb_client.login()
83
84
  if logon_result not in [True, None]:
84
- self.message_parent('a', logon_result)
85
+ self.message_parent("a", logon_result)
85
86
 
86
87
  if logon_result is not None:
87
88
  self.go()
@@ -91,20 +92,19 @@ class Spiderling:
91
92
  self.parser_process = None
92
93
 
93
94
  except KeyboardInterrupt:
94
- log.critical('Spiderling Interrupted')
95
+ log.critical("Spiderling Interrupted")
95
96
 
96
97
  # log all exceptions
97
98
  except Exception as e:
98
99
  if log.level <= logging.DEBUG:
99
100
  log.error(format_exc())
100
101
  else:
101
- log.error(f'Error in spiderling: {e}')
102
-
102
+ log.error(f"Error in spiderling: {e}")
103
103
 
104
104
  def go(self):
105
- '''
105
+ """
106
106
  go spider go spider go
107
- '''
107
+ """
108
108
 
109
109
  # local files
110
110
  if self.local:
@@ -117,7 +117,6 @@ class Spiderling:
117
117
  else:
118
118
  # remote files
119
119
  for file in self.files:
120
-
121
120
  # if content searching is enabled, parse the file
122
121
  if self.parent.parser.content_filters:
123
122
  try:
@@ -129,33 +128,42 @@ class Spiderling:
129
128
 
130
129
  # otherwise, just save it
131
130
  elif not self.local:
132
- log.info(f'{self.target}: {file.share}\\{file.name} ({bytes_to_human(file.size)})')
131
+ log.info(f"{self.target}: {file.share}\\{file.name} ({bytes_to_human(file.size)})")
133
132
  if not self.parent.no_download:
134
133
  self.save_file(file)
135
134
 
136
- log.info(f'Finished spidering {self.target}')
137
-
138
-
135
+ log.info(f"Finished spidering {self.target}")
139
136
 
140
137
  @property
141
138
  def files(self):
142
- '''
139
+ """
143
140
  Yields all files on the target to be parsed/downloaded
144
141
  Premptively download matching files into temp directory
145
- '''
142
+ """
146
143
 
147
144
  if self.local:
148
145
  for file in list(list_files(self.target)):
149
146
  if self.extension_blacklisted(file):
150
- log.debug(f'{self.target}: Skipping {file}: extension is blacklisted')
147
+ log.debug(f"{self.target}: Skipping {file}: extension is blacklisted")
151
148
  continue
149
+
150
+ if self.parent.modified_after or self.parent.modified_before:
151
+ try:
152
+ mod_time = file.stat().st_mtime
153
+ except Exception:
154
+ mod_time = None
155
+
156
+ if not self.date_match(mod_time):
157
+ log.debug(f"Skipping {file}: does not match date filters")
158
+ continue
159
+
152
160
  if self.path_match(file) or (self.parent.or_logic and self.parent.parser.content_filters):
153
161
  if self.path_match(file):
154
162
  log.debug(pathlib.Path(file).relative_to(self.target))
155
163
  if not self.is_binary_file(file):
156
164
  yield file
157
165
  else:
158
- log.debug(f'Skipping {file}: does not match filename/extension filters')
166
+ log.debug(f"Skipping {file}: does not match filename/extension filters")
159
167
 
160
168
  else:
161
169
  for share in self.shares:
@@ -164,16 +172,13 @@ class Spiderling:
164
172
  self.get_file(remote_file)
165
173
  yield remote_file
166
174
 
167
-
168
-
169
175
  def parse_file(self, file):
170
- '''
176
+ """
171
177
  Simple wrapper around self.parent.parser.parse_file()
172
178
  For sole purpose of threading
173
- '''
179
+ """
174
180
 
175
181
  try:
176
-
177
182
  if type(file) == RemoteFile:
178
183
  matches = self.parent.parser.parse_file(str(file.tmp_filename), pretty_filename=str(file))
179
184
  if matches and not self.parent.no_download:
@@ -182,7 +187,7 @@ class Spiderling:
182
187
  file.tmp_filename.unlink()
183
188
 
184
189
  else:
185
- log.debug(f'Found file: {file}')
190
+ log.debug(f"Found file: {file}")
186
191
  self.parent.parser.parse_file(file, file)
187
192
 
188
193
  # log all exceptions
@@ -190,73 +195,87 @@ class Spiderling:
190
195
  if log.level <= logging.DEBUG:
191
196
  log.error(format_exc())
192
197
  else:
193
- log.error(f'Error parsing file {file}: {e}')
198
+ log.error(f"Error parsing file {file}: {e}")
194
199
 
195
200
  except KeyboardInterrupt:
196
- log.critical('File parsing interrupted')
197
-
201
+ log.critical("File parsing interrupted")
198
202
 
199
203
  @property
200
204
  def shares(self):
201
- '''
205
+ """
202
206
  Lists all shares on single target
203
- '''
207
+ Includes both enumerated shares and user-specified shares (which may be hidden from enumeration)
208
+ """
209
+
210
+ # Keep track of shares we've already yielded to avoid duplicates
211
+ yielded_shares = set()
204
212
 
213
+ # First, yield enumerated shares that match filters
205
214
  for share in self.smb_client.shares:
206
215
  if self.share_match(share):
216
+ yielded_shares.add(share.lower())
207
217
  yield share
208
218
 
209
-
210
-
211
- def list_files(self, share, path='', depth=0, tries=2):
212
- '''
219
+ # If user specified a share whitelist, also try those shares even if not enumerated
220
+ # (some shares are hidden from enumeration but still accessible)
221
+ if self.parent.share_whitelist:
222
+ for share in self.parent.share_whitelist:
223
+ if share.lower() not in yielded_shares:
224
+ # Check if it's blacklisted
225
+ if (not self.parent.share_blacklist) or (share.lower() not in self.parent.share_blacklist):
226
+ log.debug(f"{self.target}: Adding non-enumerated share from whitelist: {share}")
227
+ yielded_shares.add(share.lower())
228
+ yield share
229
+
230
+ def list_files(self, share, path="", depth=0, tries=2):
231
+ """
213
232
  List files inside a specific directory
214
233
  Only yield files which conform to all filters (except content)
215
- '''
234
+ """
216
235
 
217
236
  if depth < self.parent.maxdepth and self.dir_match(path):
218
-
219
237
  files = []
220
238
  while tries > 0:
221
239
  try:
222
240
  files = list(self.smb_client.ls(share, path))
223
241
  break
224
242
  except FileListError as e:
225
- if 'ACCESS_DENIED' in str(e):
226
- log.debug(f'{self.target}: Error listing files: {e}')
243
+ if "ACCESS_DENIED" in str(e):
244
+ log.debug(f"{self.target}: Error listing files: {e}")
227
245
  break
228
246
  else:
229
247
  tries -= 1
230
248
 
231
249
  if files:
232
- log.debug(f'{self.target}: {share}{path}: contains {len(files):,} items')
250
+ log.debug(f"{self.target}: {share}{path}: contains {len(files):,} items")
233
251
 
234
252
  for f in files:
235
253
  name = f.get_longname()
236
- full_path = f'{path}\\{name}'
254
+ full_path = f"{path}\\{name}"
237
255
  # if it's a directory, go deeper
238
256
  if f.is_directory():
239
- for file in self.list_files(share, full_path, (depth+1)):
257
+ for file in self.list_files(share, full_path, (depth + 1)):
240
258
  yield file
241
259
 
242
260
  else:
243
-
244
261
  # skip the file if it didn't match extension filters
245
262
  if self.extension_blacklisted(name):
246
- log.debug(f'{self.target}: Skipping {share}{full_path}: extension is blacklisted')
263
+ log.debug(f"{self.target}: Skipping {share}{full_path}: extension is blacklisted")
247
264
  continue
248
265
 
249
266
  if not self.path_match(name):
250
267
  if not (
251
- # all of these have to be true in order to get past this point
252
- # "or logic" is enabled
253
- self.parent.or_logic and
254
- # and file does not have a "don't parse" extension
255
- (not self.is_binary_file(name)) and
256
- # and content filters are enabled
257
- self.parent.parser.content_filters
258
- ):
259
- log.debug(f'{self.target}: Skipping {share}{full_path}: filename/extensions do not match')
268
+ # all of these have to be true in order to get past this point
269
+ # "or logic" is enabled
270
+ self.parent.or_logic
271
+ and
272
+ # and file does not have a "don't parse" extension
273
+ (not self.is_binary_file(name))
274
+ and
275
+ # and content filters are enabled
276
+ self.parent.parser.content_filters
277
+ ):
278
+ log.debug(f"{self.target}: Skipping {share}{full_path}: filename/extensions do not match")
260
279
  continue
261
280
 
262
281
  # try to get the size of the file
@@ -266,19 +285,32 @@ class Spiderling:
266
285
  self.smb_client.handle_impacket_error(e)
267
286
  continue
268
287
 
288
+ if self.parent.modified_after or self.parent.modified_before:
289
+ try:
290
+ mod_time = f.get_mtime_epoch()
291
+ except Exception as e:
292
+ self.smb_client.handle_impacket_error(e)
293
+ mod_time = None
294
+
295
+ # check if file matches date filters
296
+ if not self.date_match(mod_time):
297
+ log.debug(f"{self.target}: Skipping {share}{full_path}: does not match date filters")
298
+ continue
299
+
269
300
  # make the RemoteFile object (the file won't be read yet)
270
- full_path_fixed = full_path.lstrip('\\')
301
+ full_path_fixed = full_path.lstrip("\\")
271
302
  remote_file = RemoteFile(full_path_fixed, share, self.target, size=filesize)
272
303
 
273
304
  # if it's a non-empty file that's smaller than the size limit
274
305
  if filesize > 0 and filesize < self.parent.max_filesize:
275
-
276
306
  # if it matched filename/extension filters and we're downloading files
277
- if (self.parent.file_extensions or self.parent.filename_filters) and not self.parent.no_download:
307
+ if (
308
+ self.parent.file_extensions or self.parent.filename_filters
309
+ ) and not self.parent.no_download:
278
310
  # but the extension is marked as "don't parse"
279
311
  if self.is_binary_file(name):
280
312
  # don't parse it, instead save it and continue
281
- log.info(f'{self.target}: {remote_file.share}\\{remote_file.name}')
313
+ log.info(f"{self.target}: {remote_file.share}\\{remote_file.name}")
282
314
  if self.get_file(remote_file):
283
315
  self.save_file(remote_file)
284
316
  continue
@@ -287,48 +319,46 @@ class Spiderling:
287
319
  yield remote_file
288
320
 
289
321
  else:
290
- log.debug(f'{self.target}: {full_path} is either empty or too large')
291
-
322
+ log.debug(f"{self.target}: {full_path} is either empty or too large")
292
323
 
293
324
  def path_match(self, file):
294
- '''
325
+ """
295
326
  Based on whether "or" logic is enabled, return True or False
296
327
  if the filename + extension meets the requirements
297
- '''
328
+ """
298
329
  filename_match = self.filename_match(file)
299
330
  extension_match = self.extension_whitelisted(file)
300
331
  if self.parent.or_logic:
301
- return (filename_match and self.parent.filename_filters) or (extension_match and self.parent.file_extensions)
332
+ return (filename_match and self.parent.filename_filters) or (
333
+ extension_match and self.parent.file_extensions
334
+ )
302
335
  else:
303
336
  return filename_match and extension_match
304
337
 
305
-
306
-
307
338
  def share_match(self, share):
308
- '''
339
+ """
309
340
  Return true if "share" matches any of the share filters
310
- '''
341
+ """
311
342
 
312
343
  # if the share has been whitelisted
313
- if ((not self.parent.share_whitelist) or (share.lower() in self.parent.share_whitelist)):
344
+ if (not self.parent.share_whitelist) or (share.lower() in self.parent.share_whitelist):
314
345
  # and hasn't been blacklisted
315
- if ((not self.parent.share_blacklist) or (share.lower() not in self.parent.share_blacklist)):
346
+ if (not self.parent.share_blacklist) or (share.lower() not in self.parent.share_blacklist):
316
347
  return True
317
348
  else:
318
- log.debug(f'{self.target}: Skipping blacklisted share: {share}')
349
+ log.debug(f"{self.target}: Skipping blacklisted share: {share}")
319
350
  else:
320
- log.debug(f'{self.target}: Skipping share {share}: not in whitelist')
351
+ log.debug(f"{self.target}: Skipping share {share}: not in whitelist")
321
352
 
322
353
  return False
323
354
 
324
-
325
355
  def dir_match(self, path):
326
- '''
356
+ """
327
357
  Return true if "path" matches any of the directory filters
328
- '''
358
+ """
329
359
 
330
360
  # convert forward slashes to backwards
331
- dirname = str(path).lower().replace('/', '\\')
361
+ dirname = str(path).lower().replace("/", "\\")
332
362
 
333
363
  # root path always passes
334
364
  if not path:
@@ -340,44 +370,43 @@ class Spiderling:
340
370
  if (not self.parent.dir_blacklist) or not any([k.lower() in dirname for k in self.parent.dir_blacklist]):
341
371
  return True
342
372
  else:
343
- log.debug(f'{self.target}: Skipping blacklisted dir: {path}')
373
+ log.debug(f"{self.target}: Skipping blacklisted dir: {path}")
344
374
  else:
345
- log.debug(f'{self.target}: Skipping dir {path}: not in whitelist')
375
+ log.debug(f"{self.target}: Skipping dir {path}: not in whitelist")
346
376
 
347
377
  return False
348
378
 
349
-
350
379
  def filename_match(self, filename):
351
- '''
380
+ """
352
381
  Return true if "filename" matches any of the filename filters
353
- '''
382
+ """
354
383
 
355
- if (not self.parent.filename_filters) or any([f_regex.match(str(pathlib.Path(filename).stem)) for f_regex in self.parent.filename_filters]):
384
+ if (not self.parent.filename_filters) or any(
385
+ [f_regex.match(str(pathlib.Path(filename).stem)) for f_regex in self.parent.filename_filters]
386
+ ):
356
387
  return True
357
388
  else:
358
- log.debug(f'{self.target}: {filename} does not match filename filters')
389
+ log.debug(f"{self.target}: {filename} does not match filename filters")
359
390
 
360
391
  return False
361
392
 
362
-
363
393
  def is_binary_file(self, filename):
364
- '''
394
+ """
365
395
  Returns true if file is a bad extension type, e.g. encrypted or compressed
366
- '''
396
+ """
367
397
 
368
- extension = ''.join(pathlib.Path(filename).suffixes).lower()
398
+ extension = "".join(pathlib.Path(filename).suffixes).lower()
369
399
  if any([extension.endswith(e.lower()) for e in self.dont_parse]):
370
400
  if extension not in self.parent.file_extensions:
371
- log.debug(f'{self.target}: Not parsing {filename} due to undesirable extension')
401
+ log.debug(f"{self.target}: Not parsing {filename} due to undesirable extension")
372
402
  return True
373
403
  return False
374
404
 
375
-
376
405
  def extension_blacklisted(self, filename):
377
- '''
406
+ """
378
407
  Return True if folder, file name, or extension has been blacklisted
379
- '''
380
- extension = ''.join(pathlib.Path(filename).suffixes).lower()
408
+ """
409
+ extension = "".join(pathlib.Path(filename).suffixes).lower()
381
410
  excluded_extensions = list(self.parent.extension_blacklist)
382
411
 
383
412
  if not excluded_extensions:
@@ -386,39 +415,34 @@ class Spiderling:
386
415
  if not any([extension.endswith(e) for e in excluded_extensions]):
387
416
  return False
388
417
  else:
389
- log.debug(f'{self.target}: Skipping file with blacklisted extension: {filename}')
418
+ log.debug(f"{self.target}: Skipping file with blacklisted extension: {filename}")
390
419
  return True
391
420
 
392
-
393
421
  def extension_whitelisted(self, filename):
394
- '''
422
+ """
395
423
  Return True if file extension has been whitelisted
396
- '''
424
+ """
397
425
  # a .tar.gz file will match both filters ".gz" and ".tar.gz"
398
- extension = ''.join(pathlib.Path(filename).suffixes).lower()
426
+ extension = "".join(pathlib.Path(filename).suffixes).lower()
399
427
  extensions = list(self.parent.file_extensions)
400
428
 
401
429
  if not extensions:
402
430
  return True
403
431
 
404
432
  # if whitelist check passes
405
- if any([(extension.endswith(e) if e else extension == e) for e in extensions]):
406
- log.debug(f'{self.target}: {filename} matches extension filters')
433
+ if any([(extension.endswith(e) if e else extension == e) for e in extensions]):
434
+ log.debug(f"{self.target}: {filename} matches extension filters")
407
435
  return True
408
436
  else:
409
- log.debug(f'{self.target}: Skipping file {filename}, does not match extension filters')
437
+ log.debug(f"{self.target}: Skipping file {filename}, does not match extension filters")
410
438
  return False
411
439
 
412
-
413
- def message_parent(self, message_type, content=''):
414
- '''
440
+ def message_parent(self, message_type, content=""):
441
+ """
415
442
  Send a message to the parent spider
416
- '''
417
-
418
- self.parent.spiderling_queue.put(
419
- SpiderlingMessage(message_type, self.target, content)
420
- )
443
+ """
421
444
 
445
+ self.parent.spiderling_queue.put(SpiderlingMessage(message_type, self.target, content))
422
446
 
423
447
  def parse_local_files(self, files):
424
448
 
@@ -426,37 +450,60 @@ class Spiderling:
426
450
  for r in pool.map(self.parse_file, files):
427
451
  pass
428
452
 
429
-
430
453
  def save_file(self, remote_file):
431
- '''
454
+ """
432
455
  Moves a file from temp storage into the loot directory
433
- '''
456
+ """
434
457
 
435
- allowed_chars = string.ascii_lowercase + string.ascii_uppercase + string.digits + '._ '
458
+ allowed_chars = string.ascii_lowercase + string.ascii_uppercase + string.digits + "._ "
436
459
 
437
460
  # replace backslashes with underscores to preserve directory names
438
- loot_filename = str(remote_file).replace('\\', '_')
461
+ loot_filename = str(remote_file).replace("\\", "_")
439
462
  # remove weird characters
440
- loot_filename = ''.join([c for c in loot_filename if c in allowed_chars])
463
+ loot_filename = "".join([c for c in loot_filename if c in allowed_chars])
441
464
  loot_dest = self.parent.loot_dir / loot_filename
442
465
  try:
443
466
  move(str(remote_file.tmp_filename), str(loot_dest))
444
467
  except Exception:
445
- log.warning(f'Error saving {remote_file}')
446
-
468
+ log.warning(f"Error saving {remote_file}")
447
469
 
448
470
  def get_file(self, remote_file):
449
- '''
471
+ """
450
472
  Attempts to retrieve "remote_file" from share and returns True if successful
451
- '''
473
+ """
452
474
 
453
475
  try:
454
476
  smb_client = self.parent.get_smb_client(self.target)
455
- log.debug(f'{self.target}: Downloading {remote_file.share}\\{remote_file.name}')
477
+ log.debug(f"{self.target}: Downloading {remote_file.share}\\{remote_file.name}")
456
478
  remote_file.get(smb_client)
457
479
  return True
458
480
  except FileRetrievalError as e:
459
- log.debug(f'{self.target}: {e}')
481
+ log.debug(f"{self.target}: {e}")
460
482
 
461
483
  return False
462
484
 
485
+ def date_match(self, file_time):
486
+ """
487
+ Return True if file modification time matches date filters
488
+ file_time is a unix timestamp
489
+ """
490
+
491
+ if file_time is None:
492
+ return True
493
+
494
+ # Convert timestamp to datetime
495
+ file_date = datetime.fromtimestamp(file_time)
496
+
497
+ # Check modified_after
498
+ if self.parent.modified_after:
499
+ if file_date < self.parent.modified_after:
500
+ log.debug(f"{self.target}: File too old: {file_date.strftime('%Y-%m-%d')}")
501
+ return False
502
+
503
+ # Check modified_before
504
+ if self.parent.modified_before:
505
+ if file_date > self.parent.modified_before:
506
+ log.debug(f"{self.target}: File too new: {file_date.strftime('%Y-%m-%d')}")
507
+ return False
508
+
509
+ return True