man-spider 1.1.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ import pathlib
4
4
  import multiprocessing
5
5
  from shutil import move
6
6
  from traceback import format_exc
7
+ from datetime import datetime
7
8
 
8
9
  from man_spider.lib.smb import *
9
10
  from man_spider.lib.file import *
@@ -12,52 +13,50 @@ from man_spider.lib.errors import *
12
13
  from man_spider.lib.processpool import *
13
14
 
14
15
 
15
- log = logging.getLogger('manspider.spiderling')
16
+ log = logging.getLogger("manspider.spiderling")
16
17
 
17
18
 
18
19
  class SpiderlingMessage:
19
- '''
20
+ """
20
21
  Message which gets sent back to the parent through parent_queue
21
- '''
22
+ """
22
23
 
23
24
  def __init__(self, message_type, target, content):
24
- '''
25
+ """
25
26
  "message_type" is a string, and can be:
26
27
  "e" - error
27
28
  "a" - authentication failure
28
- '''
29
+ """
29
30
  self.type = message_type
30
31
  self.target = target
31
32
  self.content = content
32
33
 
33
34
 
34
-
35
35
  class Spiderling:
36
- '''
36
+ """
37
37
  Enumerates SMB shares and spiders all possible directories/filenames up to maxdepth
38
38
  Designed to be threadable
39
- '''
39
+ """
40
40
 
41
41
  # these extensions don't get parsed for content, unless explicitly specified
42
42
  dont_parse = [
43
- '.png',
44
- '.gif',
45
- '.tiff',
46
- '.msi',
47
- '.bmp',
48
- '.jpg',
49
- '.jpeg',
50
- '.zip',
51
- '.gz',
52
- '.bz2',
53
- '.7z',
54
- '.xz',
43
+ ".png",
44
+ ".gif",
45
+ ".tiff",
46
+ ".msi",
47
+ ".bmp",
48
+ ".jpg",
49
+ ".jpeg",
50
+ ".zip",
51
+ ".gz",
52
+ ".bz2",
53
+ ".7z",
54
+ ".xz",
55
55
  ]
56
56
 
57
57
  def __init__(self, target, parent):
58
58
 
59
59
  try:
60
-
61
60
  self.parent = parent
62
61
  self.target = target
63
62
 
@@ -70,19 +69,20 @@ class Spiderling:
70
69
  self.local = False
71
70
 
72
71
  self.smb_client = SMBClient(
73
- target,
72
+ target.host,
74
73
  parent.username,
75
74
  parent.password,
76
75
  parent.domain,
77
76
  parent.nthash,
78
77
  parent.use_kerberos,
79
78
  parent.aes_key,
80
- parent.dc_ip
79
+ parent.dc_ip,
80
+ port=target.port,
81
81
  )
82
82
 
83
83
  logon_result = self.smb_client.login()
84
84
  if logon_result not in [True, None]:
85
- self.message_parent('a', logon_result)
85
+ self.message_parent("a", logon_result)
86
86
 
87
87
  if logon_result is not None:
88
88
  self.go()
@@ -92,20 +92,19 @@ class Spiderling:
92
92
  self.parser_process = None
93
93
 
94
94
  except KeyboardInterrupt:
95
- log.critical('Spiderling Interrupted')
95
+ log.critical("Spiderling Interrupted")
96
96
 
97
97
  # log all exceptions
98
98
  except Exception as e:
99
99
  if log.level <= logging.DEBUG:
100
100
  log.error(format_exc())
101
101
  else:
102
- log.error(f'Error in spiderling: {e}')
103
-
102
+ log.error(f"Error in spiderling: {e}")
104
103
 
105
104
  def go(self):
106
- '''
105
+ """
107
106
  go spider go spider go
108
- '''
107
+ """
109
108
 
110
109
  # local files
111
110
  if self.local:
@@ -118,7 +117,6 @@ class Spiderling:
118
117
  else:
119
118
  # remote files
120
119
  for file in self.files:
121
-
122
120
  # if content searching is enabled, parse the file
123
121
  if self.parent.parser.content_filters:
124
122
  try:
@@ -130,33 +128,42 @@ class Spiderling:
130
128
 
131
129
  # otherwise, just save it
132
130
  elif not self.local:
133
- log.info(f'{self.target}: {file.share}\\{file.name} ({bytes_to_human(file.size)})')
131
+ log.info(f"{self.target}: {file.share}\\{file.name} ({bytes_to_human(file.size)})")
134
132
  if not self.parent.no_download:
135
133
  self.save_file(file)
136
134
 
137
- log.info(f'Finished spidering {self.target}')
138
-
139
-
135
+ log.info(f"Finished spidering {self.target}")
140
136
 
141
137
  @property
142
138
  def files(self):
143
- '''
139
+ """
144
140
  Yields all files on the target to be parsed/downloaded
145
141
  Premptively download matching files into temp directory
146
- '''
142
+ """
147
143
 
148
144
  if self.local:
149
145
  for file in list(list_files(self.target)):
150
146
  if self.extension_blacklisted(file):
151
- log.debug(f'{self.target}: Skipping {file}: extension is blacklisted')
147
+ log.debug(f"{self.target}: Skipping {file}: extension is blacklisted")
152
148
  continue
149
+
150
+ if self.parent.modified_after or self.parent.modified_before:
151
+ try:
152
+ mod_time = file.stat().st_mtime
153
+ except Exception:
154
+ mod_time = None
155
+
156
+ if not self.date_match(mod_time):
157
+ log.debug(f"Skipping {file}: does not match date filters")
158
+ continue
159
+
153
160
  if self.path_match(file) or (self.parent.or_logic and self.parent.parser.content_filters):
154
161
  if self.path_match(file):
155
162
  log.debug(pathlib.Path(file).relative_to(self.target))
156
163
  if not self.is_binary_file(file):
157
164
  yield file
158
165
  else:
159
- log.debug(f'Skipping {file}: does not match filename/extension filters')
166
+ log.debug(f"Skipping {file}: does not match filename/extension filters")
160
167
 
161
168
  else:
162
169
  for share in self.shares:
@@ -165,16 +172,13 @@ class Spiderling:
165
172
  self.get_file(remote_file)
166
173
  yield remote_file
167
174
 
168
-
169
-
170
175
  def parse_file(self, file):
171
- '''
176
+ """
172
177
  Simple wrapper around self.parent.parser.parse_file()
173
178
  For sole purpose of threading
174
- '''
179
+ """
175
180
 
176
181
  try:
177
-
178
182
  if type(file) == RemoteFile:
179
183
  matches = self.parent.parser.parse_file(str(file.tmp_filename), pretty_filename=str(file))
180
184
  if matches and not self.parent.no_download:
@@ -183,7 +187,7 @@ class Spiderling:
183
187
  file.tmp_filename.unlink()
184
188
 
185
189
  else:
186
- log.debug(f'Found file: {file}')
190
+ log.debug(f"Found file: {file}")
187
191
  self.parent.parser.parse_file(file, file)
188
192
 
189
193
  # log all exceptions
@@ -191,73 +195,87 @@ class Spiderling:
191
195
  if log.level <= logging.DEBUG:
192
196
  log.error(format_exc())
193
197
  else:
194
- log.error(f'Error parsing file {file}: {e}')
198
+ log.error(f"Error parsing file {file}: {e}")
195
199
 
196
200
  except KeyboardInterrupt:
197
- log.critical('File parsing interrupted')
198
-
201
+ log.critical("File parsing interrupted")
199
202
 
200
203
  @property
201
204
  def shares(self):
202
- '''
205
+ """
203
206
  Lists all shares on single target
204
- '''
207
+ Includes both enumerated shares and user-specified shares (which may be hidden from enumeration)
208
+ """
209
+
210
+ # Keep track of shares we've already yielded to avoid duplicates
211
+ yielded_shares = set()
205
212
 
213
+ # First, yield enumerated shares that match filters
206
214
  for share in self.smb_client.shares:
207
215
  if self.share_match(share):
216
+ yielded_shares.add(share.lower())
208
217
  yield share
209
218
 
210
-
211
-
212
- def list_files(self, share, path='', depth=0, tries=2):
213
- '''
219
+ # If user specified a share whitelist, also try those shares even if not enumerated
220
+ # (some shares are hidden from enumeration but still accessible)
221
+ if self.parent.share_whitelist:
222
+ for share in self.parent.share_whitelist:
223
+ if share.lower() not in yielded_shares:
224
+ # Check if it's blacklisted
225
+ if (not self.parent.share_blacklist) or (share.lower() not in self.parent.share_blacklist):
226
+ log.debug(f"{self.target}: Adding non-enumerated share from whitelist: {share}")
227
+ yielded_shares.add(share.lower())
228
+ yield share
229
+
230
+ def list_files(self, share, path="", depth=0, tries=2):
231
+ """
214
232
  List files inside a specific directory
215
233
  Only yield files which conform to all filters (except content)
216
- '''
234
+ """
217
235
 
218
236
  if depth < self.parent.maxdepth and self.dir_match(path):
219
-
220
237
  files = []
221
238
  while tries > 0:
222
239
  try:
223
240
  files = list(self.smb_client.ls(share, path))
224
241
  break
225
242
  except FileListError as e:
226
- if 'ACCESS_DENIED' in str(e):
227
- log.debug(f'{self.target}: Error listing files: {e}')
243
+ if "ACCESS_DENIED" in str(e):
244
+ log.debug(f"{self.target}: Error listing files: {e}")
228
245
  break
229
246
  else:
230
247
  tries -= 1
231
248
 
232
249
  if files:
233
- log.debug(f'{self.target}: {share}{path}: contains {len(files):,} items')
250
+ log.debug(f"{self.target}: {share}{path}: contains {len(files):,} items")
234
251
 
235
252
  for f in files:
236
253
  name = f.get_longname()
237
- full_path = f'{path}\\{name}'
254
+ full_path = f"{path}\\{name}"
238
255
  # if it's a directory, go deeper
239
256
  if f.is_directory():
240
- for file in self.list_files(share, full_path, (depth+1)):
257
+ for file in self.list_files(share, full_path, (depth + 1)):
241
258
  yield file
242
259
 
243
260
  else:
244
-
245
261
  # skip the file if it didn't match extension filters
246
262
  if self.extension_blacklisted(name):
247
- log.debug(f'{self.target}: Skipping {share}{full_path}: extension is blacklisted')
263
+ log.debug(f"{self.target}: Skipping {share}{full_path}: extension is blacklisted")
248
264
  continue
249
265
 
250
266
  if not self.path_match(name):
251
267
  if not (
252
- # all of these have to be true in order to get past this point
253
- # "or logic" is enabled
254
- self.parent.or_logic and
255
- # and file does not have a "don't parse" extension
256
- (not self.is_binary_file(name)) and
257
- # and content filters are enabled
258
- self.parent.parser.content_filters
259
- ):
260
- log.debug(f'{self.target}: Skipping {share}{full_path}: filename/extensions do not match')
268
+ # all of these have to be true in order to get past this point
269
+ # "or logic" is enabled
270
+ self.parent.or_logic
271
+ and
272
+ # and file does not have a "don't parse" extension
273
+ (not self.is_binary_file(name))
274
+ and
275
+ # and content filters are enabled
276
+ self.parent.parser.content_filters
277
+ ):
278
+ log.debug(f"{self.target}: Skipping {share}{full_path}: filename/extensions do not match")
261
279
  continue
262
280
 
263
281
  # try to get the size of the file
@@ -267,19 +285,32 @@ class Spiderling:
267
285
  self.smb_client.handle_impacket_error(e)
268
286
  continue
269
287
 
288
+ if self.parent.modified_after or self.parent.modified_before:
289
+ try:
290
+ mod_time = f.get_mtime_epoch()
291
+ except Exception as e:
292
+ self.smb_client.handle_impacket_error(e)
293
+ mod_time = None
294
+
295
+ # check if file matches date filters
296
+ if not self.date_match(mod_time):
297
+ log.debug(f"{self.target}: Skipping {share}{full_path}: does not match date filters")
298
+ continue
299
+
270
300
  # make the RemoteFile object (the file won't be read yet)
271
- full_path_fixed = full_path.lstrip('\\')
301
+ full_path_fixed = full_path.lstrip("\\")
272
302
  remote_file = RemoteFile(full_path_fixed, share, self.target, size=filesize)
273
303
 
274
304
  # if it's a non-empty file that's smaller than the size limit
275
305
  if filesize > 0 and filesize < self.parent.max_filesize:
276
-
277
306
  # if it matched filename/extension filters and we're downloading files
278
- if (self.parent.file_extensions or self.parent.filename_filters) and not self.parent.no_download:
307
+ if (
308
+ self.parent.file_extensions or self.parent.filename_filters
309
+ ) and not self.parent.no_download:
279
310
  # but the extension is marked as "don't parse"
280
311
  if self.is_binary_file(name):
281
312
  # don't parse it, instead save it and continue
282
- log.info(f'{self.target}: {remote_file.share}\\{remote_file.name}')
313
+ log.info(f"{self.target}: {remote_file.share}\\{remote_file.name}")
283
314
  if self.get_file(remote_file):
284
315
  self.save_file(remote_file)
285
316
  continue
@@ -288,48 +319,46 @@ class Spiderling:
288
319
  yield remote_file
289
320
 
290
321
  else:
291
- log.debug(f'{self.target}: {full_path} is either empty or too large')
292
-
322
+ log.debug(f"{self.target}: {full_path} is either empty or too large")
293
323
 
294
324
  def path_match(self, file):
295
- '''
325
+ """
296
326
  Based on whether "or" logic is enabled, return True or False
297
327
  if the filename + extension meets the requirements
298
- '''
328
+ """
299
329
  filename_match = self.filename_match(file)
300
330
  extension_match = self.extension_whitelisted(file)
301
331
  if self.parent.or_logic:
302
- return (filename_match and self.parent.filename_filters) or (extension_match and self.parent.file_extensions)
332
+ return (filename_match and self.parent.filename_filters) or (
333
+ extension_match and self.parent.file_extensions
334
+ )
303
335
  else:
304
336
  return filename_match and extension_match
305
337
 
306
-
307
-
308
338
  def share_match(self, share):
309
- '''
339
+ """
310
340
  Return true if "share" matches any of the share filters
311
- '''
341
+ """
312
342
 
313
343
  # if the share has been whitelisted
314
- if ((not self.parent.share_whitelist) or (share.lower() in self.parent.share_whitelist)):
344
+ if (not self.parent.share_whitelist) or (share.lower() in self.parent.share_whitelist):
315
345
  # and hasn't been blacklisted
316
- if ((not self.parent.share_blacklist) or (share.lower() not in self.parent.share_blacklist)):
346
+ if (not self.parent.share_blacklist) or (share.lower() not in self.parent.share_blacklist):
317
347
  return True
318
348
  else:
319
- log.debug(f'{self.target}: Skipping blacklisted share: {share}')
349
+ log.debug(f"{self.target}: Skipping blacklisted share: {share}")
320
350
  else:
321
- log.debug(f'{self.target}: Skipping share {share}: not in whitelist')
351
+ log.debug(f"{self.target}: Skipping share {share}: not in whitelist")
322
352
 
323
353
  return False
324
354
 
325
-
326
355
  def dir_match(self, path):
327
- '''
356
+ """
328
357
  Return true if "path" matches any of the directory filters
329
- '''
358
+ """
330
359
 
331
360
  # convert forward slashes to backwards
332
- dirname = str(path).lower().replace('/', '\\')
361
+ dirname = str(path).lower().replace("/", "\\")
333
362
 
334
363
  # root path always passes
335
364
  if not path:
@@ -341,44 +370,43 @@ class Spiderling:
341
370
  if (not self.parent.dir_blacklist) or not any([k.lower() in dirname for k in self.parent.dir_blacklist]):
342
371
  return True
343
372
  else:
344
- log.debug(f'{self.target}: Skipping blacklisted dir: {path}')
373
+ log.debug(f"{self.target}: Skipping blacklisted dir: {path}")
345
374
  else:
346
- log.debug(f'{self.target}: Skipping dir {path}: not in whitelist')
375
+ log.debug(f"{self.target}: Skipping dir {path}: not in whitelist")
347
376
 
348
377
  return False
349
378
 
350
-
351
379
  def filename_match(self, filename):
352
- '''
380
+ """
353
381
  Return true if "filename" matches any of the filename filters
354
- '''
382
+ """
355
383
 
356
- if (not self.parent.filename_filters) or any([f_regex.match(str(pathlib.Path(filename).stem)) for f_regex in self.parent.filename_filters]):
384
+ if (not self.parent.filename_filters) or any(
385
+ [f_regex.match(str(pathlib.Path(filename).stem)) for f_regex in self.parent.filename_filters]
386
+ ):
357
387
  return True
358
388
  else:
359
- log.debug(f'{self.target}: {filename} does not match filename filters')
389
+ log.debug(f"{self.target}: {filename} does not match filename filters")
360
390
 
361
391
  return False
362
392
 
363
-
364
393
  def is_binary_file(self, filename):
365
- '''
394
+ """
366
395
  Returns true if file is a bad extension type, e.g. encrypted or compressed
367
- '''
396
+ """
368
397
 
369
- extension = ''.join(pathlib.Path(filename).suffixes).lower()
398
+ extension = "".join(pathlib.Path(filename).suffixes).lower()
370
399
  if any([extension.endswith(e.lower()) for e in self.dont_parse]):
371
400
  if extension not in self.parent.file_extensions:
372
- log.debug(f'{self.target}: Not parsing {filename} due to undesirable extension')
401
+ log.debug(f"{self.target}: Not parsing {filename} due to undesirable extension")
373
402
  return True
374
403
  return False
375
404
 
376
-
377
405
  def extension_blacklisted(self, filename):
378
- '''
406
+ """
379
407
  Return True if folder, file name, or extension has been blacklisted
380
- '''
381
- extension = ''.join(pathlib.Path(filename).suffixes).lower()
408
+ """
409
+ extension = "".join(pathlib.Path(filename).suffixes).lower()
382
410
  excluded_extensions = list(self.parent.extension_blacklist)
383
411
 
384
412
  if not excluded_extensions:
@@ -387,39 +415,34 @@ class Spiderling:
387
415
  if not any([extension.endswith(e) for e in excluded_extensions]):
388
416
  return False
389
417
  else:
390
- log.debug(f'{self.target}: Skipping file with blacklisted extension: {filename}')
418
+ log.debug(f"{self.target}: Skipping file with blacklisted extension: {filename}")
391
419
  return True
392
420
 
393
-
394
421
  def extension_whitelisted(self, filename):
395
- '''
422
+ """
396
423
  Return True if file extension has been whitelisted
397
- '''
424
+ """
398
425
  # a .tar.gz file will match both filters ".gz" and ".tar.gz"
399
- extension = ''.join(pathlib.Path(filename).suffixes).lower()
426
+ extension = "".join(pathlib.Path(filename).suffixes).lower()
400
427
  extensions = list(self.parent.file_extensions)
401
428
 
402
429
  if not extensions:
403
430
  return True
404
431
 
405
432
  # if whitelist check passes
406
- if any([(extension.endswith(e) if e else extension == e) for e in extensions]):
407
- log.debug(f'{self.target}: {filename} matches extension filters')
433
+ if any([(extension.endswith(e) if e else extension == e) for e in extensions]):
434
+ log.debug(f"{self.target}: {filename} matches extension filters")
408
435
  return True
409
436
  else:
410
- log.debug(f'{self.target}: Skipping file {filename}, does not match extension filters')
437
+ log.debug(f"{self.target}: Skipping file {filename}, does not match extension filters")
411
438
  return False
412
439
 
413
-
414
- def message_parent(self, message_type, content=''):
415
- '''
440
+ def message_parent(self, message_type, content=""):
441
+ """
416
442
  Send a message to the parent spider
417
- '''
418
-
419
- self.parent.spiderling_queue.put(
420
- SpiderlingMessage(message_type, self.target, content)
421
- )
443
+ """
422
444
 
445
+ self.parent.spiderling_queue.put(SpiderlingMessage(message_type, self.target, content))
423
446
 
424
447
  def parse_local_files(self, files):
425
448
 
@@ -427,37 +450,60 @@ class Spiderling:
427
450
  for r in pool.map(self.parse_file, files):
428
451
  pass
429
452
 
430
-
431
453
  def save_file(self, remote_file):
432
- '''
454
+ """
433
455
  Moves a file from temp storage into the loot directory
434
- '''
456
+ """
435
457
 
436
- allowed_chars = string.ascii_lowercase + string.ascii_uppercase + string.digits + '._ '
458
+ allowed_chars = string.ascii_lowercase + string.ascii_uppercase + string.digits + "._ "
437
459
 
438
460
  # replace backslashes with underscores to preserve directory names
439
- loot_filename = str(remote_file).replace('\\', '_')
461
+ loot_filename = str(remote_file).replace("\\", "_")
440
462
  # remove weird characters
441
- loot_filename = ''.join([c for c in loot_filename if c in allowed_chars])
463
+ loot_filename = "".join([c for c in loot_filename if c in allowed_chars])
442
464
  loot_dest = self.parent.loot_dir / loot_filename
443
465
  try:
444
466
  move(str(remote_file.tmp_filename), str(loot_dest))
445
467
  except Exception:
446
- log.warning(f'Error saving {remote_file}')
447
-
468
+ log.warning(f"Error saving {remote_file}")
448
469
 
449
470
  def get_file(self, remote_file):
450
- '''
471
+ """
451
472
  Attempts to retrieve "remote_file" from share and returns True if successful
452
- '''
473
+ """
453
474
 
454
475
  try:
455
476
  smb_client = self.parent.get_smb_client(self.target)
456
- log.debug(f'{self.target}: Downloading {remote_file.share}\\{remote_file.name}')
477
+ log.debug(f"{self.target}: Downloading {remote_file.share}\\{remote_file.name}")
457
478
  remote_file.get(smb_client)
458
479
  return True
459
480
  except FileRetrievalError as e:
460
- log.debug(f'{self.target}: {e}')
481
+ log.debug(f"{self.target}: {e}")
461
482
 
462
483
  return False
463
484
 
485
+ def date_match(self, file_time):
486
+ """
487
+ Return True if file modification time matches date filters
488
+ file_time is a unix timestamp
489
+ """
490
+
491
+ if file_time is None:
492
+ return True
493
+
494
+ # Convert timestamp to datetime
495
+ file_date = datetime.fromtimestamp(file_time)
496
+
497
+ # Check modified_after
498
+ if self.parent.modified_after:
499
+ if file_date < self.parent.modified_after:
500
+ log.debug(f"{self.target}: File too old: {file_date.strftime('%Y-%m-%d')}")
501
+ return False
502
+
503
+ # Check modified_before
504
+ if self.parent.modified_before:
505
+ if file_date > self.parent.modified_before:
506
+ log.debug(f"{self.target}: File too new: {file_date.strftime('%Y-%m-%d')}")
507
+ return False
508
+
509
+ return True