man-spider 1.1.2__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- man_spider/lib/errors.py +8 -4
- man_spider/lib/file.py +13 -16
- man_spider/lib/logger.py +27 -32
- man_spider/lib/parser/__init__.py +1 -1
- man_spider/lib/parser/parser.py +102 -55
- man_spider/lib/processpool.py +24 -31
- man_spider/lib/smb.py +69 -62
- man_spider/lib/spider.py +66 -66
- man_spider/lib/spiderling.py +182 -136
- man_spider/lib/util.py +95 -29
- man_spider/manspider.py +168 -54
- {man_spider-1.1.2.dist-info → man_spider-2.0.0.dist-info}/METADATA +100 -42
- man_spider-2.0.0.dist-info/RECORD +18 -0
- {man_spider-1.1.2.dist-info → man_spider-2.0.0.dist-info}/WHEEL +1 -1
- man_spider-2.0.0.dist-info/entry_points.txt +2 -0
- man_spider-1.1.2.dist-info/RECORD +0 -18
- man_spider-1.1.2.dist-info/entry_points.txt +0 -3
- {man_spider-1.1.2.dist-info → man_spider-2.0.0.dist-info/licenses}/LICENSE +0 -0
man_spider/lib/spiderling.py
CHANGED
|
@@ -4,6 +4,7 @@ import pathlib
|
|
|
4
4
|
import multiprocessing
|
|
5
5
|
from shutil import move
|
|
6
6
|
from traceback import format_exc
|
|
7
|
+
from datetime import datetime
|
|
7
8
|
|
|
8
9
|
from man_spider.lib.smb import *
|
|
9
10
|
from man_spider.lib.file import *
|
|
@@ -12,52 +13,50 @@ from man_spider.lib.errors import *
|
|
|
12
13
|
from man_spider.lib.processpool import *
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
log = logging.getLogger(
|
|
16
|
+
log = logging.getLogger("manspider.spiderling")
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class SpiderlingMessage:
|
|
19
|
-
|
|
20
|
+
"""
|
|
20
21
|
Message which gets sent back to the parent through parent_queue
|
|
21
|
-
|
|
22
|
+
"""
|
|
22
23
|
|
|
23
24
|
def __init__(self, message_type, target, content):
|
|
24
|
-
|
|
25
|
+
"""
|
|
25
26
|
"message_type" is a string, and can be:
|
|
26
27
|
"e" - error
|
|
27
28
|
"a" - authentication failure
|
|
28
|
-
|
|
29
|
+
"""
|
|
29
30
|
self.type = message_type
|
|
30
31
|
self.target = target
|
|
31
32
|
self.content = content
|
|
32
33
|
|
|
33
34
|
|
|
34
|
-
|
|
35
35
|
class Spiderling:
|
|
36
|
-
|
|
36
|
+
"""
|
|
37
37
|
Enumerates SMB shares and spiders all possible directories/filenames up to maxdepth
|
|
38
38
|
Designed to be threadable
|
|
39
|
-
|
|
39
|
+
"""
|
|
40
40
|
|
|
41
41
|
# these extensions don't get parsed for content, unless explicitly specified
|
|
42
42
|
dont_parse = [
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
43
|
+
".png",
|
|
44
|
+
".gif",
|
|
45
|
+
".tiff",
|
|
46
|
+
".msi",
|
|
47
|
+
".bmp",
|
|
48
|
+
".jpg",
|
|
49
|
+
".jpeg",
|
|
50
|
+
".zip",
|
|
51
|
+
".gz",
|
|
52
|
+
".bz2",
|
|
53
|
+
".7z",
|
|
54
|
+
".xz",
|
|
55
55
|
]
|
|
56
56
|
|
|
57
57
|
def __init__(self, target, parent):
|
|
58
58
|
|
|
59
59
|
try:
|
|
60
|
-
|
|
61
60
|
self.parent = parent
|
|
62
61
|
self.target = target
|
|
63
62
|
|
|
@@ -70,19 +69,20 @@ class Spiderling:
|
|
|
70
69
|
self.local = False
|
|
71
70
|
|
|
72
71
|
self.smb_client = SMBClient(
|
|
73
|
-
target,
|
|
72
|
+
target.host,
|
|
74
73
|
parent.username,
|
|
75
74
|
parent.password,
|
|
76
75
|
parent.domain,
|
|
77
76
|
parent.nthash,
|
|
78
77
|
parent.use_kerberos,
|
|
79
78
|
parent.aes_key,
|
|
80
|
-
parent.dc_ip
|
|
79
|
+
parent.dc_ip,
|
|
80
|
+
port=target.port,
|
|
81
81
|
)
|
|
82
82
|
|
|
83
83
|
logon_result = self.smb_client.login()
|
|
84
84
|
if logon_result not in [True, None]:
|
|
85
|
-
self.message_parent(
|
|
85
|
+
self.message_parent("a", logon_result)
|
|
86
86
|
|
|
87
87
|
if logon_result is not None:
|
|
88
88
|
self.go()
|
|
@@ -92,20 +92,19 @@ class Spiderling:
|
|
|
92
92
|
self.parser_process = None
|
|
93
93
|
|
|
94
94
|
except KeyboardInterrupt:
|
|
95
|
-
log.critical(
|
|
95
|
+
log.critical("Spiderling Interrupted")
|
|
96
96
|
|
|
97
97
|
# log all exceptions
|
|
98
98
|
except Exception as e:
|
|
99
99
|
if log.level <= logging.DEBUG:
|
|
100
100
|
log.error(format_exc())
|
|
101
101
|
else:
|
|
102
|
-
log.error(f
|
|
103
|
-
|
|
102
|
+
log.error(f"Error in spiderling: {e}")
|
|
104
103
|
|
|
105
104
|
def go(self):
|
|
106
|
-
|
|
105
|
+
"""
|
|
107
106
|
go spider go spider go
|
|
108
|
-
|
|
107
|
+
"""
|
|
109
108
|
|
|
110
109
|
# local files
|
|
111
110
|
if self.local:
|
|
@@ -118,7 +117,6 @@ class Spiderling:
|
|
|
118
117
|
else:
|
|
119
118
|
# remote files
|
|
120
119
|
for file in self.files:
|
|
121
|
-
|
|
122
120
|
# if content searching is enabled, parse the file
|
|
123
121
|
if self.parent.parser.content_filters:
|
|
124
122
|
try:
|
|
@@ -130,33 +128,42 @@ class Spiderling:
|
|
|
130
128
|
|
|
131
129
|
# otherwise, just save it
|
|
132
130
|
elif not self.local:
|
|
133
|
-
log.info(f
|
|
131
|
+
log.info(f"{self.target}: {file.share}\\{file.name} ({bytes_to_human(file.size)})")
|
|
134
132
|
if not self.parent.no_download:
|
|
135
133
|
self.save_file(file)
|
|
136
134
|
|
|
137
|
-
log.info(f
|
|
138
|
-
|
|
139
|
-
|
|
135
|
+
log.info(f"Finished spidering {self.target}")
|
|
140
136
|
|
|
141
137
|
@property
|
|
142
138
|
def files(self):
|
|
143
|
-
|
|
139
|
+
"""
|
|
144
140
|
Yields all files on the target to be parsed/downloaded
|
|
145
141
|
Premptively download matching files into temp directory
|
|
146
|
-
|
|
142
|
+
"""
|
|
147
143
|
|
|
148
144
|
if self.local:
|
|
149
145
|
for file in list(list_files(self.target)):
|
|
150
146
|
if self.extension_blacklisted(file):
|
|
151
|
-
log.debug(f
|
|
147
|
+
log.debug(f"{self.target}: Skipping {file}: extension is blacklisted")
|
|
152
148
|
continue
|
|
149
|
+
|
|
150
|
+
if self.parent.modified_after or self.parent.modified_before:
|
|
151
|
+
try:
|
|
152
|
+
mod_time = file.stat().st_mtime
|
|
153
|
+
except Exception:
|
|
154
|
+
mod_time = None
|
|
155
|
+
|
|
156
|
+
if not self.date_match(mod_time):
|
|
157
|
+
log.debug(f"Skipping {file}: does not match date filters")
|
|
158
|
+
continue
|
|
159
|
+
|
|
153
160
|
if self.path_match(file) or (self.parent.or_logic and self.parent.parser.content_filters):
|
|
154
161
|
if self.path_match(file):
|
|
155
162
|
log.debug(pathlib.Path(file).relative_to(self.target))
|
|
156
163
|
if not self.is_binary_file(file):
|
|
157
164
|
yield file
|
|
158
165
|
else:
|
|
159
|
-
log.debug(f
|
|
166
|
+
log.debug(f"Skipping {file}: does not match filename/extension filters")
|
|
160
167
|
|
|
161
168
|
else:
|
|
162
169
|
for share in self.shares:
|
|
@@ -165,16 +172,13 @@ class Spiderling:
|
|
|
165
172
|
self.get_file(remote_file)
|
|
166
173
|
yield remote_file
|
|
167
174
|
|
|
168
|
-
|
|
169
|
-
|
|
170
175
|
def parse_file(self, file):
|
|
171
|
-
|
|
176
|
+
"""
|
|
172
177
|
Simple wrapper around self.parent.parser.parse_file()
|
|
173
178
|
For sole purpose of threading
|
|
174
|
-
|
|
179
|
+
"""
|
|
175
180
|
|
|
176
181
|
try:
|
|
177
|
-
|
|
178
182
|
if type(file) == RemoteFile:
|
|
179
183
|
matches = self.parent.parser.parse_file(str(file.tmp_filename), pretty_filename=str(file))
|
|
180
184
|
if matches and not self.parent.no_download:
|
|
@@ -183,7 +187,7 @@ class Spiderling:
|
|
|
183
187
|
file.tmp_filename.unlink()
|
|
184
188
|
|
|
185
189
|
else:
|
|
186
|
-
log.debug(f
|
|
190
|
+
log.debug(f"Found file: {file}")
|
|
187
191
|
self.parent.parser.parse_file(file, file)
|
|
188
192
|
|
|
189
193
|
# log all exceptions
|
|
@@ -191,73 +195,87 @@ class Spiderling:
|
|
|
191
195
|
if log.level <= logging.DEBUG:
|
|
192
196
|
log.error(format_exc())
|
|
193
197
|
else:
|
|
194
|
-
log.error(f
|
|
198
|
+
log.error(f"Error parsing file {file}: {e}")
|
|
195
199
|
|
|
196
200
|
except KeyboardInterrupt:
|
|
197
|
-
log.critical(
|
|
198
|
-
|
|
201
|
+
log.critical("File parsing interrupted")
|
|
199
202
|
|
|
200
203
|
@property
|
|
201
204
|
def shares(self):
|
|
202
|
-
|
|
205
|
+
"""
|
|
203
206
|
Lists all shares on single target
|
|
204
|
-
|
|
207
|
+
Includes both enumerated shares and user-specified shares (which may be hidden from enumeration)
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
# Keep track of shares we've already yielded to avoid duplicates
|
|
211
|
+
yielded_shares = set()
|
|
205
212
|
|
|
213
|
+
# First, yield enumerated shares that match filters
|
|
206
214
|
for share in self.smb_client.shares:
|
|
207
215
|
if self.share_match(share):
|
|
216
|
+
yielded_shares.add(share.lower())
|
|
208
217
|
yield share
|
|
209
218
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
219
|
+
# If user specified a share whitelist, also try those shares even if not enumerated
|
|
220
|
+
# (some shares are hidden from enumeration but still accessible)
|
|
221
|
+
if self.parent.share_whitelist:
|
|
222
|
+
for share in self.parent.share_whitelist:
|
|
223
|
+
if share.lower() not in yielded_shares:
|
|
224
|
+
# Check if it's blacklisted
|
|
225
|
+
if (not self.parent.share_blacklist) or (share.lower() not in self.parent.share_blacklist):
|
|
226
|
+
log.debug(f"{self.target}: Adding non-enumerated share from whitelist: {share}")
|
|
227
|
+
yielded_shares.add(share.lower())
|
|
228
|
+
yield share
|
|
229
|
+
|
|
230
|
+
def list_files(self, share, path="", depth=0, tries=2):
|
|
231
|
+
"""
|
|
214
232
|
List files inside a specific directory
|
|
215
233
|
Only yield files which conform to all filters (except content)
|
|
216
|
-
|
|
234
|
+
"""
|
|
217
235
|
|
|
218
236
|
if depth < self.parent.maxdepth and self.dir_match(path):
|
|
219
|
-
|
|
220
237
|
files = []
|
|
221
238
|
while tries > 0:
|
|
222
239
|
try:
|
|
223
240
|
files = list(self.smb_client.ls(share, path))
|
|
224
241
|
break
|
|
225
242
|
except FileListError as e:
|
|
226
|
-
if
|
|
227
|
-
log.debug(f
|
|
243
|
+
if "ACCESS_DENIED" in str(e):
|
|
244
|
+
log.debug(f"{self.target}: Error listing files: {e}")
|
|
228
245
|
break
|
|
229
246
|
else:
|
|
230
247
|
tries -= 1
|
|
231
248
|
|
|
232
249
|
if files:
|
|
233
|
-
log.debug(f
|
|
250
|
+
log.debug(f"{self.target}: {share}{path}: contains {len(files):,} items")
|
|
234
251
|
|
|
235
252
|
for f in files:
|
|
236
253
|
name = f.get_longname()
|
|
237
|
-
full_path = f
|
|
254
|
+
full_path = f"{path}\\{name}"
|
|
238
255
|
# if it's a directory, go deeper
|
|
239
256
|
if f.is_directory():
|
|
240
|
-
for file in self.list_files(share, full_path, (depth+1)):
|
|
257
|
+
for file in self.list_files(share, full_path, (depth + 1)):
|
|
241
258
|
yield file
|
|
242
259
|
|
|
243
260
|
else:
|
|
244
|
-
|
|
245
261
|
# skip the file if it didn't match extension filters
|
|
246
262
|
if self.extension_blacklisted(name):
|
|
247
|
-
log.debug(f
|
|
263
|
+
log.debug(f"{self.target}: Skipping {share}{full_path}: extension is blacklisted")
|
|
248
264
|
continue
|
|
249
265
|
|
|
250
266
|
if not self.path_match(name):
|
|
251
267
|
if not (
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
268
|
+
# all of these have to be true in order to get past this point
|
|
269
|
+
# "or logic" is enabled
|
|
270
|
+
self.parent.or_logic
|
|
271
|
+
and
|
|
272
|
+
# and file does not have a "don't parse" extension
|
|
273
|
+
(not self.is_binary_file(name))
|
|
274
|
+
and
|
|
275
|
+
# and content filters are enabled
|
|
276
|
+
self.parent.parser.content_filters
|
|
277
|
+
):
|
|
278
|
+
log.debug(f"{self.target}: Skipping {share}{full_path}: filename/extensions do not match")
|
|
261
279
|
continue
|
|
262
280
|
|
|
263
281
|
# try to get the size of the file
|
|
@@ -267,19 +285,32 @@ class Spiderling:
|
|
|
267
285
|
self.smb_client.handle_impacket_error(e)
|
|
268
286
|
continue
|
|
269
287
|
|
|
288
|
+
if self.parent.modified_after or self.parent.modified_before:
|
|
289
|
+
try:
|
|
290
|
+
mod_time = f.get_mtime_epoch()
|
|
291
|
+
except Exception as e:
|
|
292
|
+
self.smb_client.handle_impacket_error(e)
|
|
293
|
+
mod_time = None
|
|
294
|
+
|
|
295
|
+
# check if file matches date filters
|
|
296
|
+
if not self.date_match(mod_time):
|
|
297
|
+
log.debug(f"{self.target}: Skipping {share}{full_path}: does not match date filters")
|
|
298
|
+
continue
|
|
299
|
+
|
|
270
300
|
# make the RemoteFile object (the file won't be read yet)
|
|
271
|
-
full_path_fixed = full_path.lstrip(
|
|
301
|
+
full_path_fixed = full_path.lstrip("\\")
|
|
272
302
|
remote_file = RemoteFile(full_path_fixed, share, self.target, size=filesize)
|
|
273
303
|
|
|
274
304
|
# if it's a non-empty file that's smaller than the size limit
|
|
275
305
|
if filesize > 0 and filesize < self.parent.max_filesize:
|
|
276
|
-
|
|
277
306
|
# if it matched filename/extension filters and we're downloading files
|
|
278
|
-
if (
|
|
307
|
+
if (
|
|
308
|
+
self.parent.file_extensions or self.parent.filename_filters
|
|
309
|
+
) and not self.parent.no_download:
|
|
279
310
|
# but the extension is marked as "don't parse"
|
|
280
311
|
if self.is_binary_file(name):
|
|
281
312
|
# don't parse it, instead save it and continue
|
|
282
|
-
log.info(f
|
|
313
|
+
log.info(f"{self.target}: {remote_file.share}\\{remote_file.name}")
|
|
283
314
|
if self.get_file(remote_file):
|
|
284
315
|
self.save_file(remote_file)
|
|
285
316
|
continue
|
|
@@ -288,48 +319,46 @@ class Spiderling:
|
|
|
288
319
|
yield remote_file
|
|
289
320
|
|
|
290
321
|
else:
|
|
291
|
-
log.debug(f
|
|
292
|
-
|
|
322
|
+
log.debug(f"{self.target}: {full_path} is either empty or too large")
|
|
293
323
|
|
|
294
324
|
def path_match(self, file):
|
|
295
|
-
|
|
325
|
+
"""
|
|
296
326
|
Based on whether "or" logic is enabled, return True or False
|
|
297
327
|
if the filename + extension meets the requirements
|
|
298
|
-
|
|
328
|
+
"""
|
|
299
329
|
filename_match = self.filename_match(file)
|
|
300
330
|
extension_match = self.extension_whitelisted(file)
|
|
301
331
|
if self.parent.or_logic:
|
|
302
|
-
return (filename_match and self.parent.filename_filters) or (
|
|
332
|
+
return (filename_match and self.parent.filename_filters) or (
|
|
333
|
+
extension_match and self.parent.file_extensions
|
|
334
|
+
)
|
|
303
335
|
else:
|
|
304
336
|
return filename_match and extension_match
|
|
305
337
|
|
|
306
|
-
|
|
307
|
-
|
|
308
338
|
def share_match(self, share):
|
|
309
|
-
|
|
339
|
+
"""
|
|
310
340
|
Return true if "share" matches any of the share filters
|
|
311
|
-
|
|
341
|
+
"""
|
|
312
342
|
|
|
313
343
|
# if the share has been whitelisted
|
|
314
|
-
if (
|
|
344
|
+
if (not self.parent.share_whitelist) or (share.lower() in self.parent.share_whitelist):
|
|
315
345
|
# and hasn't been blacklisted
|
|
316
|
-
if (
|
|
346
|
+
if (not self.parent.share_blacklist) or (share.lower() not in self.parent.share_blacklist):
|
|
317
347
|
return True
|
|
318
348
|
else:
|
|
319
|
-
log.debug(f
|
|
349
|
+
log.debug(f"{self.target}: Skipping blacklisted share: {share}")
|
|
320
350
|
else:
|
|
321
|
-
log.debug(f
|
|
351
|
+
log.debug(f"{self.target}: Skipping share {share}: not in whitelist")
|
|
322
352
|
|
|
323
353
|
return False
|
|
324
354
|
|
|
325
|
-
|
|
326
355
|
def dir_match(self, path):
|
|
327
|
-
|
|
356
|
+
"""
|
|
328
357
|
Return true if "path" matches any of the directory filters
|
|
329
|
-
|
|
358
|
+
"""
|
|
330
359
|
|
|
331
360
|
# convert forward slashes to backwards
|
|
332
|
-
dirname = str(path).lower().replace(
|
|
361
|
+
dirname = str(path).lower().replace("/", "\\")
|
|
333
362
|
|
|
334
363
|
# root path always passes
|
|
335
364
|
if not path:
|
|
@@ -341,44 +370,43 @@ class Spiderling:
|
|
|
341
370
|
if (not self.parent.dir_blacklist) or not any([k.lower() in dirname for k in self.parent.dir_blacklist]):
|
|
342
371
|
return True
|
|
343
372
|
else:
|
|
344
|
-
log.debug(f
|
|
373
|
+
log.debug(f"{self.target}: Skipping blacklisted dir: {path}")
|
|
345
374
|
else:
|
|
346
|
-
log.debug(f
|
|
375
|
+
log.debug(f"{self.target}: Skipping dir {path}: not in whitelist")
|
|
347
376
|
|
|
348
377
|
return False
|
|
349
378
|
|
|
350
|
-
|
|
351
379
|
def filename_match(self, filename):
|
|
352
|
-
|
|
380
|
+
"""
|
|
353
381
|
Return true if "filename" matches any of the filename filters
|
|
354
|
-
|
|
382
|
+
"""
|
|
355
383
|
|
|
356
|
-
if (not self.parent.filename_filters) or any(
|
|
384
|
+
if (not self.parent.filename_filters) or any(
|
|
385
|
+
[f_regex.match(str(pathlib.Path(filename).stem)) for f_regex in self.parent.filename_filters]
|
|
386
|
+
):
|
|
357
387
|
return True
|
|
358
388
|
else:
|
|
359
|
-
log.debug(f
|
|
389
|
+
log.debug(f"{self.target}: {filename} does not match filename filters")
|
|
360
390
|
|
|
361
391
|
return False
|
|
362
392
|
|
|
363
|
-
|
|
364
393
|
def is_binary_file(self, filename):
|
|
365
|
-
|
|
394
|
+
"""
|
|
366
395
|
Returns true if file is a bad extension type, e.g. encrypted or compressed
|
|
367
|
-
|
|
396
|
+
"""
|
|
368
397
|
|
|
369
|
-
extension =
|
|
398
|
+
extension = "".join(pathlib.Path(filename).suffixes).lower()
|
|
370
399
|
if any([extension.endswith(e.lower()) for e in self.dont_parse]):
|
|
371
400
|
if extension not in self.parent.file_extensions:
|
|
372
|
-
log.debug(f
|
|
401
|
+
log.debug(f"{self.target}: Not parsing {filename} due to undesirable extension")
|
|
373
402
|
return True
|
|
374
403
|
return False
|
|
375
404
|
|
|
376
|
-
|
|
377
405
|
def extension_blacklisted(self, filename):
|
|
378
|
-
|
|
406
|
+
"""
|
|
379
407
|
Return True if folder, file name, or extension has been blacklisted
|
|
380
|
-
|
|
381
|
-
extension =
|
|
408
|
+
"""
|
|
409
|
+
extension = "".join(pathlib.Path(filename).suffixes).lower()
|
|
382
410
|
excluded_extensions = list(self.parent.extension_blacklist)
|
|
383
411
|
|
|
384
412
|
if not excluded_extensions:
|
|
@@ -387,39 +415,34 @@ class Spiderling:
|
|
|
387
415
|
if not any([extension.endswith(e) for e in excluded_extensions]):
|
|
388
416
|
return False
|
|
389
417
|
else:
|
|
390
|
-
log.debug(f
|
|
418
|
+
log.debug(f"{self.target}: Skipping file with blacklisted extension: {filename}")
|
|
391
419
|
return True
|
|
392
420
|
|
|
393
|
-
|
|
394
421
|
def extension_whitelisted(self, filename):
|
|
395
|
-
|
|
422
|
+
"""
|
|
396
423
|
Return True if file extension has been whitelisted
|
|
397
|
-
|
|
424
|
+
"""
|
|
398
425
|
# a .tar.gz file will match both filters ".gz" and ".tar.gz"
|
|
399
|
-
extension =
|
|
426
|
+
extension = "".join(pathlib.Path(filename).suffixes).lower()
|
|
400
427
|
extensions = list(self.parent.file_extensions)
|
|
401
428
|
|
|
402
429
|
if not extensions:
|
|
403
430
|
return True
|
|
404
431
|
|
|
405
432
|
# if whitelist check passes
|
|
406
|
-
if
|
|
407
|
-
log.debug(f
|
|
433
|
+
if any([(extension.endswith(e) if e else extension == e) for e in extensions]):
|
|
434
|
+
log.debug(f"{self.target}: {filename} matches extension filters")
|
|
408
435
|
return True
|
|
409
436
|
else:
|
|
410
|
-
log.debug(f
|
|
437
|
+
log.debug(f"{self.target}: Skipping file {filename}, does not match extension filters")
|
|
411
438
|
return False
|
|
412
439
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
'''
|
|
440
|
+
def message_parent(self, message_type, content=""):
|
|
441
|
+
"""
|
|
416
442
|
Send a message to the parent spider
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
self.parent.spiderling_queue.put(
|
|
420
|
-
SpiderlingMessage(message_type, self.target, content)
|
|
421
|
-
)
|
|
443
|
+
"""
|
|
422
444
|
|
|
445
|
+
self.parent.spiderling_queue.put(SpiderlingMessage(message_type, self.target, content))
|
|
423
446
|
|
|
424
447
|
def parse_local_files(self, files):
|
|
425
448
|
|
|
@@ -427,37 +450,60 @@ class Spiderling:
|
|
|
427
450
|
for r in pool.map(self.parse_file, files):
|
|
428
451
|
pass
|
|
429
452
|
|
|
430
|
-
|
|
431
453
|
def save_file(self, remote_file):
|
|
432
|
-
|
|
454
|
+
"""
|
|
433
455
|
Moves a file from temp storage into the loot directory
|
|
434
|
-
|
|
456
|
+
"""
|
|
435
457
|
|
|
436
|
-
allowed_chars = string.ascii_lowercase + string.ascii_uppercase + string.digits +
|
|
458
|
+
allowed_chars = string.ascii_lowercase + string.ascii_uppercase + string.digits + "._ "
|
|
437
459
|
|
|
438
460
|
# replace backslashes with underscores to preserve directory names
|
|
439
|
-
loot_filename = str(remote_file).replace(
|
|
461
|
+
loot_filename = str(remote_file).replace("\\", "_")
|
|
440
462
|
# remove weird characters
|
|
441
|
-
loot_filename =
|
|
463
|
+
loot_filename = "".join([c for c in loot_filename if c in allowed_chars])
|
|
442
464
|
loot_dest = self.parent.loot_dir / loot_filename
|
|
443
465
|
try:
|
|
444
466
|
move(str(remote_file.tmp_filename), str(loot_dest))
|
|
445
467
|
except Exception:
|
|
446
|
-
log.warning(f
|
|
447
|
-
|
|
468
|
+
log.warning(f"Error saving {remote_file}")
|
|
448
469
|
|
|
449
470
|
def get_file(self, remote_file):
|
|
450
|
-
|
|
471
|
+
"""
|
|
451
472
|
Attempts to retrieve "remote_file" from share and returns True if successful
|
|
452
|
-
|
|
473
|
+
"""
|
|
453
474
|
|
|
454
475
|
try:
|
|
455
476
|
smb_client = self.parent.get_smb_client(self.target)
|
|
456
|
-
log.debug(f
|
|
477
|
+
log.debug(f"{self.target}: Downloading {remote_file.share}\\{remote_file.name}")
|
|
457
478
|
remote_file.get(smb_client)
|
|
458
479
|
return True
|
|
459
480
|
except FileRetrievalError as e:
|
|
460
|
-
log.debug(f
|
|
481
|
+
log.debug(f"{self.target}: {e}")
|
|
461
482
|
|
|
462
483
|
return False
|
|
463
484
|
|
|
485
|
+
def date_match(self, file_time):
|
|
486
|
+
"""
|
|
487
|
+
Return True if file modification time matches date filters
|
|
488
|
+
file_time is a unix timestamp
|
|
489
|
+
"""
|
|
490
|
+
|
|
491
|
+
if file_time is None:
|
|
492
|
+
return True
|
|
493
|
+
|
|
494
|
+
# Convert timestamp to datetime
|
|
495
|
+
file_date = datetime.fromtimestamp(file_time)
|
|
496
|
+
|
|
497
|
+
# Check modified_after
|
|
498
|
+
if self.parent.modified_after:
|
|
499
|
+
if file_date < self.parent.modified_after:
|
|
500
|
+
log.debug(f"{self.target}: File too old: {file_date.strftime('%Y-%m-%d')}")
|
|
501
|
+
return False
|
|
502
|
+
|
|
503
|
+
# Check modified_before
|
|
504
|
+
if self.parent.modified_before:
|
|
505
|
+
if file_date > self.parent.modified_before:
|
|
506
|
+
log.debug(f"{self.target}: File too new: {file_date.strftime('%Y-%m-%d')}")
|
|
507
|
+
return False
|
|
508
|
+
|
|
509
|
+
return True
|