man-spider 1.1.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- man_spider/lib/errors.py +8 -4
- man_spider/lib/file.py +16 -19
- man_spider/lib/logger.py +27 -32
- man_spider/lib/parser/__init__.py +1 -1
- man_spider/lib/parser/parser.py +102 -57
- man_spider/lib/processpool.py +24 -31
- man_spider/lib/smb.py +71 -63
- man_spider/lib/spider.py +69 -70
- man_spider/lib/spiderling.py +188 -141
- man_spider/lib/util.py +95 -29
- man_spider/manspider.py +170 -55
- {man_spider-1.1.1.dist-info → man_spider-2.0.0.dist-info}/METADATA +101 -44
- man_spider-2.0.0.dist-info/RECORD +18 -0
- {man_spider-1.1.1.dist-info → man_spider-2.0.0.dist-info}/WHEEL +1 -1
- man_spider-2.0.0.dist-info/entry_points.txt +2 -0
- man_spider-1.1.1.dist-info/RECORD +0 -18
- man_spider-1.1.1.dist-info/entry_points.txt +0 -3
- {man_spider-1.1.1.dist-info → man_spider-2.0.0.dist-info/licenses}/LICENSE +0 -0
man_spider/lib/spiderling.py
CHANGED
|
@@ -1,62 +1,62 @@
|
|
|
1
1
|
import string
|
|
2
2
|
import logging
|
|
3
3
|
import pathlib
|
|
4
|
-
from .smb import *
|
|
5
|
-
from .file import *
|
|
6
|
-
from .util import *
|
|
7
|
-
from .errors import *
|
|
8
4
|
import multiprocessing
|
|
9
5
|
from shutil import move
|
|
10
|
-
from .processpool import *
|
|
11
6
|
from traceback import format_exc
|
|
7
|
+
from datetime import datetime
|
|
12
8
|
|
|
9
|
+
from man_spider.lib.smb import *
|
|
10
|
+
from man_spider.lib.file import *
|
|
11
|
+
from man_spider.lib.util import *
|
|
12
|
+
from man_spider.lib.errors import *
|
|
13
|
+
from man_spider.lib.processpool import *
|
|
13
14
|
|
|
14
|
-
|
|
15
|
+
|
|
16
|
+
log = logging.getLogger("manspider.spiderling")
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
class SpiderlingMessage:
|
|
18
|
-
|
|
20
|
+
"""
|
|
19
21
|
Message which gets sent back to the parent through parent_queue
|
|
20
|
-
|
|
22
|
+
"""
|
|
21
23
|
|
|
22
24
|
def __init__(self, message_type, target, content):
|
|
23
|
-
|
|
25
|
+
"""
|
|
24
26
|
"message_type" is a string, and can be:
|
|
25
27
|
"e" - error
|
|
26
28
|
"a" - authentication failure
|
|
27
|
-
|
|
29
|
+
"""
|
|
28
30
|
self.type = message_type
|
|
29
31
|
self.target = target
|
|
30
32
|
self.content = content
|
|
31
33
|
|
|
32
34
|
|
|
33
|
-
|
|
34
35
|
class Spiderling:
|
|
35
|
-
|
|
36
|
+
"""
|
|
36
37
|
Enumerates SMB shares and spiders all possible directories/filenames up to maxdepth
|
|
37
38
|
Designed to be threadable
|
|
38
|
-
|
|
39
|
+
"""
|
|
39
40
|
|
|
40
41
|
# these extensions don't get parsed for content, unless explicitly specified
|
|
41
42
|
dont_parse = [
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
43
|
+
".png",
|
|
44
|
+
".gif",
|
|
45
|
+
".tiff",
|
|
46
|
+
".msi",
|
|
47
|
+
".bmp",
|
|
48
|
+
".jpg",
|
|
49
|
+
".jpeg",
|
|
50
|
+
".zip",
|
|
51
|
+
".gz",
|
|
52
|
+
".bz2",
|
|
53
|
+
".7z",
|
|
54
|
+
".xz",
|
|
54
55
|
]
|
|
55
56
|
|
|
56
57
|
def __init__(self, target, parent):
|
|
57
58
|
|
|
58
59
|
try:
|
|
59
|
-
|
|
60
60
|
self.parent = parent
|
|
61
61
|
self.target = target
|
|
62
62
|
|
|
@@ -69,19 +69,20 @@ class Spiderling:
|
|
|
69
69
|
self.local = False
|
|
70
70
|
|
|
71
71
|
self.smb_client = SMBClient(
|
|
72
|
-
target,
|
|
72
|
+
target.host,
|
|
73
73
|
parent.username,
|
|
74
74
|
parent.password,
|
|
75
75
|
parent.domain,
|
|
76
76
|
parent.nthash,
|
|
77
77
|
parent.use_kerberos,
|
|
78
78
|
parent.aes_key,
|
|
79
|
-
parent.dc_ip
|
|
79
|
+
parent.dc_ip,
|
|
80
|
+
port=target.port,
|
|
80
81
|
)
|
|
81
82
|
|
|
82
83
|
logon_result = self.smb_client.login()
|
|
83
84
|
if logon_result not in [True, None]:
|
|
84
|
-
self.message_parent(
|
|
85
|
+
self.message_parent("a", logon_result)
|
|
85
86
|
|
|
86
87
|
if logon_result is not None:
|
|
87
88
|
self.go()
|
|
@@ -91,20 +92,19 @@ class Spiderling:
|
|
|
91
92
|
self.parser_process = None
|
|
92
93
|
|
|
93
94
|
except KeyboardInterrupt:
|
|
94
|
-
log.critical(
|
|
95
|
+
log.critical("Spiderling Interrupted")
|
|
95
96
|
|
|
96
97
|
# log all exceptions
|
|
97
98
|
except Exception as e:
|
|
98
99
|
if log.level <= logging.DEBUG:
|
|
99
100
|
log.error(format_exc())
|
|
100
101
|
else:
|
|
101
|
-
log.error(f
|
|
102
|
-
|
|
102
|
+
log.error(f"Error in spiderling: {e}")
|
|
103
103
|
|
|
104
104
|
def go(self):
|
|
105
|
-
|
|
105
|
+
"""
|
|
106
106
|
go spider go spider go
|
|
107
|
-
|
|
107
|
+
"""
|
|
108
108
|
|
|
109
109
|
# local files
|
|
110
110
|
if self.local:
|
|
@@ -117,7 +117,6 @@ class Spiderling:
|
|
|
117
117
|
else:
|
|
118
118
|
# remote files
|
|
119
119
|
for file in self.files:
|
|
120
|
-
|
|
121
120
|
# if content searching is enabled, parse the file
|
|
122
121
|
if self.parent.parser.content_filters:
|
|
123
122
|
try:
|
|
@@ -129,33 +128,42 @@ class Spiderling:
|
|
|
129
128
|
|
|
130
129
|
# otherwise, just save it
|
|
131
130
|
elif not self.local:
|
|
132
|
-
log.info(f
|
|
131
|
+
log.info(f"{self.target}: {file.share}\\{file.name} ({bytes_to_human(file.size)})")
|
|
133
132
|
if not self.parent.no_download:
|
|
134
133
|
self.save_file(file)
|
|
135
134
|
|
|
136
|
-
log.info(f
|
|
137
|
-
|
|
138
|
-
|
|
135
|
+
log.info(f"Finished spidering {self.target}")
|
|
139
136
|
|
|
140
137
|
@property
|
|
141
138
|
def files(self):
|
|
142
|
-
|
|
139
|
+
"""
|
|
143
140
|
Yields all files on the target to be parsed/downloaded
|
|
144
141
|
Premptively download matching files into temp directory
|
|
145
|
-
|
|
142
|
+
"""
|
|
146
143
|
|
|
147
144
|
if self.local:
|
|
148
145
|
for file in list(list_files(self.target)):
|
|
149
146
|
if self.extension_blacklisted(file):
|
|
150
|
-
log.debug(f
|
|
147
|
+
log.debug(f"{self.target}: Skipping {file}: extension is blacklisted")
|
|
151
148
|
continue
|
|
149
|
+
|
|
150
|
+
if self.parent.modified_after or self.parent.modified_before:
|
|
151
|
+
try:
|
|
152
|
+
mod_time = file.stat().st_mtime
|
|
153
|
+
except Exception:
|
|
154
|
+
mod_time = None
|
|
155
|
+
|
|
156
|
+
if not self.date_match(mod_time):
|
|
157
|
+
log.debug(f"Skipping {file}: does not match date filters")
|
|
158
|
+
continue
|
|
159
|
+
|
|
152
160
|
if self.path_match(file) or (self.parent.or_logic and self.parent.parser.content_filters):
|
|
153
161
|
if self.path_match(file):
|
|
154
162
|
log.debug(pathlib.Path(file).relative_to(self.target))
|
|
155
163
|
if not self.is_binary_file(file):
|
|
156
164
|
yield file
|
|
157
165
|
else:
|
|
158
|
-
log.debug(f
|
|
166
|
+
log.debug(f"Skipping {file}: does not match filename/extension filters")
|
|
159
167
|
|
|
160
168
|
else:
|
|
161
169
|
for share in self.shares:
|
|
@@ -164,16 +172,13 @@ class Spiderling:
|
|
|
164
172
|
self.get_file(remote_file)
|
|
165
173
|
yield remote_file
|
|
166
174
|
|
|
167
|
-
|
|
168
|
-
|
|
169
175
|
def parse_file(self, file):
|
|
170
|
-
|
|
176
|
+
"""
|
|
171
177
|
Simple wrapper around self.parent.parser.parse_file()
|
|
172
178
|
For sole purpose of threading
|
|
173
|
-
|
|
179
|
+
"""
|
|
174
180
|
|
|
175
181
|
try:
|
|
176
|
-
|
|
177
182
|
if type(file) == RemoteFile:
|
|
178
183
|
matches = self.parent.parser.parse_file(str(file.tmp_filename), pretty_filename=str(file))
|
|
179
184
|
if matches and not self.parent.no_download:
|
|
@@ -182,7 +187,7 @@ class Spiderling:
|
|
|
182
187
|
file.tmp_filename.unlink()
|
|
183
188
|
|
|
184
189
|
else:
|
|
185
|
-
log.debug(f
|
|
190
|
+
log.debug(f"Found file: {file}")
|
|
186
191
|
self.parent.parser.parse_file(file, file)
|
|
187
192
|
|
|
188
193
|
# log all exceptions
|
|
@@ -190,73 +195,87 @@ class Spiderling:
|
|
|
190
195
|
if log.level <= logging.DEBUG:
|
|
191
196
|
log.error(format_exc())
|
|
192
197
|
else:
|
|
193
|
-
log.error(f
|
|
198
|
+
log.error(f"Error parsing file {file}: {e}")
|
|
194
199
|
|
|
195
200
|
except KeyboardInterrupt:
|
|
196
|
-
log.critical(
|
|
197
|
-
|
|
201
|
+
log.critical("File parsing interrupted")
|
|
198
202
|
|
|
199
203
|
@property
|
|
200
204
|
def shares(self):
|
|
201
|
-
|
|
205
|
+
"""
|
|
202
206
|
Lists all shares on single target
|
|
203
|
-
|
|
207
|
+
Includes both enumerated shares and user-specified shares (which may be hidden from enumeration)
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
# Keep track of shares we've already yielded to avoid duplicates
|
|
211
|
+
yielded_shares = set()
|
|
204
212
|
|
|
213
|
+
# First, yield enumerated shares that match filters
|
|
205
214
|
for share in self.smb_client.shares:
|
|
206
215
|
if self.share_match(share):
|
|
216
|
+
yielded_shares.add(share.lower())
|
|
207
217
|
yield share
|
|
208
218
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
219
|
+
# If user specified a share whitelist, also try those shares even if not enumerated
|
|
220
|
+
# (some shares are hidden from enumeration but still accessible)
|
|
221
|
+
if self.parent.share_whitelist:
|
|
222
|
+
for share in self.parent.share_whitelist:
|
|
223
|
+
if share.lower() not in yielded_shares:
|
|
224
|
+
# Check if it's blacklisted
|
|
225
|
+
if (not self.parent.share_blacklist) or (share.lower() not in self.parent.share_blacklist):
|
|
226
|
+
log.debug(f"{self.target}: Adding non-enumerated share from whitelist: {share}")
|
|
227
|
+
yielded_shares.add(share.lower())
|
|
228
|
+
yield share
|
|
229
|
+
|
|
230
|
+
def list_files(self, share, path="", depth=0, tries=2):
|
|
231
|
+
"""
|
|
213
232
|
List files inside a specific directory
|
|
214
233
|
Only yield files which conform to all filters (except content)
|
|
215
|
-
|
|
234
|
+
"""
|
|
216
235
|
|
|
217
236
|
if depth < self.parent.maxdepth and self.dir_match(path):
|
|
218
|
-
|
|
219
237
|
files = []
|
|
220
238
|
while tries > 0:
|
|
221
239
|
try:
|
|
222
240
|
files = list(self.smb_client.ls(share, path))
|
|
223
241
|
break
|
|
224
242
|
except FileListError as e:
|
|
225
|
-
if
|
|
226
|
-
log.debug(f
|
|
243
|
+
if "ACCESS_DENIED" in str(e):
|
|
244
|
+
log.debug(f"{self.target}: Error listing files: {e}")
|
|
227
245
|
break
|
|
228
246
|
else:
|
|
229
247
|
tries -= 1
|
|
230
248
|
|
|
231
249
|
if files:
|
|
232
|
-
log.debug(f
|
|
250
|
+
log.debug(f"{self.target}: {share}{path}: contains {len(files):,} items")
|
|
233
251
|
|
|
234
252
|
for f in files:
|
|
235
253
|
name = f.get_longname()
|
|
236
|
-
full_path = f
|
|
254
|
+
full_path = f"{path}\\{name}"
|
|
237
255
|
# if it's a directory, go deeper
|
|
238
256
|
if f.is_directory():
|
|
239
|
-
for file in self.list_files(share, full_path, (depth+1)):
|
|
257
|
+
for file in self.list_files(share, full_path, (depth + 1)):
|
|
240
258
|
yield file
|
|
241
259
|
|
|
242
260
|
else:
|
|
243
|
-
|
|
244
261
|
# skip the file if it didn't match extension filters
|
|
245
262
|
if self.extension_blacklisted(name):
|
|
246
|
-
log.debug(f
|
|
263
|
+
log.debug(f"{self.target}: Skipping {share}{full_path}: extension is blacklisted")
|
|
247
264
|
continue
|
|
248
265
|
|
|
249
266
|
if not self.path_match(name):
|
|
250
267
|
if not (
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
268
|
+
# all of these have to be true in order to get past this point
|
|
269
|
+
# "or logic" is enabled
|
|
270
|
+
self.parent.or_logic
|
|
271
|
+
and
|
|
272
|
+
# and file does not have a "don't parse" extension
|
|
273
|
+
(not self.is_binary_file(name))
|
|
274
|
+
and
|
|
275
|
+
# and content filters are enabled
|
|
276
|
+
self.parent.parser.content_filters
|
|
277
|
+
):
|
|
278
|
+
log.debug(f"{self.target}: Skipping {share}{full_path}: filename/extensions do not match")
|
|
260
279
|
continue
|
|
261
280
|
|
|
262
281
|
# try to get the size of the file
|
|
@@ -266,19 +285,32 @@ class Spiderling:
|
|
|
266
285
|
self.smb_client.handle_impacket_error(e)
|
|
267
286
|
continue
|
|
268
287
|
|
|
288
|
+
if self.parent.modified_after or self.parent.modified_before:
|
|
289
|
+
try:
|
|
290
|
+
mod_time = f.get_mtime_epoch()
|
|
291
|
+
except Exception as e:
|
|
292
|
+
self.smb_client.handle_impacket_error(e)
|
|
293
|
+
mod_time = None
|
|
294
|
+
|
|
295
|
+
# check if file matches date filters
|
|
296
|
+
if not self.date_match(mod_time):
|
|
297
|
+
log.debug(f"{self.target}: Skipping {share}{full_path}: does not match date filters")
|
|
298
|
+
continue
|
|
299
|
+
|
|
269
300
|
# make the RemoteFile object (the file won't be read yet)
|
|
270
|
-
full_path_fixed = full_path.lstrip(
|
|
301
|
+
full_path_fixed = full_path.lstrip("\\")
|
|
271
302
|
remote_file = RemoteFile(full_path_fixed, share, self.target, size=filesize)
|
|
272
303
|
|
|
273
304
|
# if it's a non-empty file that's smaller than the size limit
|
|
274
305
|
if filesize > 0 and filesize < self.parent.max_filesize:
|
|
275
|
-
|
|
276
306
|
# if it matched filename/extension filters and we're downloading files
|
|
277
|
-
if (
|
|
307
|
+
if (
|
|
308
|
+
self.parent.file_extensions or self.parent.filename_filters
|
|
309
|
+
) and not self.parent.no_download:
|
|
278
310
|
# but the extension is marked as "don't parse"
|
|
279
311
|
if self.is_binary_file(name):
|
|
280
312
|
# don't parse it, instead save it and continue
|
|
281
|
-
log.info(f
|
|
313
|
+
log.info(f"{self.target}: {remote_file.share}\\{remote_file.name}")
|
|
282
314
|
if self.get_file(remote_file):
|
|
283
315
|
self.save_file(remote_file)
|
|
284
316
|
continue
|
|
@@ -287,48 +319,46 @@ class Spiderling:
|
|
|
287
319
|
yield remote_file
|
|
288
320
|
|
|
289
321
|
else:
|
|
290
|
-
log.debug(f
|
|
291
|
-
|
|
322
|
+
log.debug(f"{self.target}: {full_path} is either empty or too large")
|
|
292
323
|
|
|
293
324
|
def path_match(self, file):
|
|
294
|
-
|
|
325
|
+
"""
|
|
295
326
|
Based on whether "or" logic is enabled, return True or False
|
|
296
327
|
if the filename + extension meets the requirements
|
|
297
|
-
|
|
328
|
+
"""
|
|
298
329
|
filename_match = self.filename_match(file)
|
|
299
330
|
extension_match = self.extension_whitelisted(file)
|
|
300
331
|
if self.parent.or_logic:
|
|
301
|
-
return (filename_match and self.parent.filename_filters) or (
|
|
332
|
+
return (filename_match and self.parent.filename_filters) or (
|
|
333
|
+
extension_match and self.parent.file_extensions
|
|
334
|
+
)
|
|
302
335
|
else:
|
|
303
336
|
return filename_match and extension_match
|
|
304
337
|
|
|
305
|
-
|
|
306
|
-
|
|
307
338
|
def share_match(self, share):
|
|
308
|
-
|
|
339
|
+
"""
|
|
309
340
|
Return true if "share" matches any of the share filters
|
|
310
|
-
|
|
341
|
+
"""
|
|
311
342
|
|
|
312
343
|
# if the share has been whitelisted
|
|
313
|
-
if (
|
|
344
|
+
if (not self.parent.share_whitelist) or (share.lower() in self.parent.share_whitelist):
|
|
314
345
|
# and hasn't been blacklisted
|
|
315
|
-
if (
|
|
346
|
+
if (not self.parent.share_blacklist) or (share.lower() not in self.parent.share_blacklist):
|
|
316
347
|
return True
|
|
317
348
|
else:
|
|
318
|
-
log.debug(f
|
|
349
|
+
log.debug(f"{self.target}: Skipping blacklisted share: {share}")
|
|
319
350
|
else:
|
|
320
|
-
log.debug(f
|
|
351
|
+
log.debug(f"{self.target}: Skipping share {share}: not in whitelist")
|
|
321
352
|
|
|
322
353
|
return False
|
|
323
354
|
|
|
324
|
-
|
|
325
355
|
def dir_match(self, path):
|
|
326
|
-
|
|
356
|
+
"""
|
|
327
357
|
Return true if "path" matches any of the directory filters
|
|
328
|
-
|
|
358
|
+
"""
|
|
329
359
|
|
|
330
360
|
# convert forward slashes to backwards
|
|
331
|
-
dirname = str(path).lower().replace(
|
|
361
|
+
dirname = str(path).lower().replace("/", "\\")
|
|
332
362
|
|
|
333
363
|
# root path always passes
|
|
334
364
|
if not path:
|
|
@@ -340,44 +370,43 @@ class Spiderling:
|
|
|
340
370
|
if (not self.parent.dir_blacklist) or not any([k.lower() in dirname for k in self.parent.dir_blacklist]):
|
|
341
371
|
return True
|
|
342
372
|
else:
|
|
343
|
-
log.debug(f
|
|
373
|
+
log.debug(f"{self.target}: Skipping blacklisted dir: {path}")
|
|
344
374
|
else:
|
|
345
|
-
log.debug(f
|
|
375
|
+
log.debug(f"{self.target}: Skipping dir {path}: not in whitelist")
|
|
346
376
|
|
|
347
377
|
return False
|
|
348
378
|
|
|
349
|
-
|
|
350
379
|
def filename_match(self, filename):
|
|
351
|
-
|
|
380
|
+
"""
|
|
352
381
|
Return true if "filename" matches any of the filename filters
|
|
353
|
-
|
|
382
|
+
"""
|
|
354
383
|
|
|
355
|
-
if (not self.parent.filename_filters) or any(
|
|
384
|
+
if (not self.parent.filename_filters) or any(
|
|
385
|
+
[f_regex.match(str(pathlib.Path(filename).stem)) for f_regex in self.parent.filename_filters]
|
|
386
|
+
):
|
|
356
387
|
return True
|
|
357
388
|
else:
|
|
358
|
-
log.debug(f
|
|
389
|
+
log.debug(f"{self.target}: {filename} does not match filename filters")
|
|
359
390
|
|
|
360
391
|
return False
|
|
361
392
|
|
|
362
|
-
|
|
363
393
|
def is_binary_file(self, filename):
|
|
364
|
-
|
|
394
|
+
"""
|
|
365
395
|
Returns true if file is a bad extension type, e.g. encrypted or compressed
|
|
366
|
-
|
|
396
|
+
"""
|
|
367
397
|
|
|
368
|
-
extension =
|
|
398
|
+
extension = "".join(pathlib.Path(filename).suffixes).lower()
|
|
369
399
|
if any([extension.endswith(e.lower()) for e in self.dont_parse]):
|
|
370
400
|
if extension not in self.parent.file_extensions:
|
|
371
|
-
log.debug(f
|
|
401
|
+
log.debug(f"{self.target}: Not parsing {filename} due to undesirable extension")
|
|
372
402
|
return True
|
|
373
403
|
return False
|
|
374
404
|
|
|
375
|
-
|
|
376
405
|
def extension_blacklisted(self, filename):
|
|
377
|
-
|
|
406
|
+
"""
|
|
378
407
|
Return True if folder, file name, or extension has been blacklisted
|
|
379
|
-
|
|
380
|
-
extension =
|
|
408
|
+
"""
|
|
409
|
+
extension = "".join(pathlib.Path(filename).suffixes).lower()
|
|
381
410
|
excluded_extensions = list(self.parent.extension_blacklist)
|
|
382
411
|
|
|
383
412
|
if not excluded_extensions:
|
|
@@ -386,39 +415,34 @@ class Spiderling:
|
|
|
386
415
|
if not any([extension.endswith(e) for e in excluded_extensions]):
|
|
387
416
|
return False
|
|
388
417
|
else:
|
|
389
|
-
log.debug(f
|
|
418
|
+
log.debug(f"{self.target}: Skipping file with blacklisted extension: {filename}")
|
|
390
419
|
return True
|
|
391
420
|
|
|
392
|
-
|
|
393
421
|
def extension_whitelisted(self, filename):
|
|
394
|
-
|
|
422
|
+
"""
|
|
395
423
|
Return True if file extension has been whitelisted
|
|
396
|
-
|
|
424
|
+
"""
|
|
397
425
|
# a .tar.gz file will match both filters ".gz" and ".tar.gz"
|
|
398
|
-
extension =
|
|
426
|
+
extension = "".join(pathlib.Path(filename).suffixes).lower()
|
|
399
427
|
extensions = list(self.parent.file_extensions)
|
|
400
428
|
|
|
401
429
|
if not extensions:
|
|
402
430
|
return True
|
|
403
431
|
|
|
404
432
|
# if whitelist check passes
|
|
405
|
-
if
|
|
406
|
-
log.debug(f
|
|
433
|
+
if any([(extension.endswith(e) if e else extension == e) for e in extensions]):
|
|
434
|
+
log.debug(f"{self.target}: {filename} matches extension filters")
|
|
407
435
|
return True
|
|
408
436
|
else:
|
|
409
|
-
log.debug(f
|
|
437
|
+
log.debug(f"{self.target}: Skipping file {filename}, does not match extension filters")
|
|
410
438
|
return False
|
|
411
439
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
'''
|
|
440
|
+
def message_parent(self, message_type, content=""):
|
|
441
|
+
"""
|
|
415
442
|
Send a message to the parent spider
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
self.parent.spiderling_queue.put(
|
|
419
|
-
SpiderlingMessage(message_type, self.target, content)
|
|
420
|
-
)
|
|
443
|
+
"""
|
|
421
444
|
|
|
445
|
+
self.parent.spiderling_queue.put(SpiderlingMessage(message_type, self.target, content))
|
|
422
446
|
|
|
423
447
|
def parse_local_files(self, files):
|
|
424
448
|
|
|
@@ -426,37 +450,60 @@ class Spiderling:
|
|
|
426
450
|
for r in pool.map(self.parse_file, files):
|
|
427
451
|
pass
|
|
428
452
|
|
|
429
|
-
|
|
430
453
|
def save_file(self, remote_file):
|
|
431
|
-
|
|
454
|
+
"""
|
|
432
455
|
Moves a file from temp storage into the loot directory
|
|
433
|
-
|
|
456
|
+
"""
|
|
434
457
|
|
|
435
|
-
allowed_chars = string.ascii_lowercase + string.ascii_uppercase + string.digits +
|
|
458
|
+
allowed_chars = string.ascii_lowercase + string.ascii_uppercase + string.digits + "._ "
|
|
436
459
|
|
|
437
460
|
# replace backslashes with underscores to preserve directory names
|
|
438
|
-
loot_filename = str(remote_file).replace(
|
|
461
|
+
loot_filename = str(remote_file).replace("\\", "_")
|
|
439
462
|
# remove weird characters
|
|
440
|
-
loot_filename =
|
|
463
|
+
loot_filename = "".join([c for c in loot_filename if c in allowed_chars])
|
|
441
464
|
loot_dest = self.parent.loot_dir / loot_filename
|
|
442
465
|
try:
|
|
443
466
|
move(str(remote_file.tmp_filename), str(loot_dest))
|
|
444
467
|
except Exception:
|
|
445
|
-
log.warning(f
|
|
446
|
-
|
|
468
|
+
log.warning(f"Error saving {remote_file}")
|
|
447
469
|
|
|
448
470
|
def get_file(self, remote_file):
|
|
449
|
-
|
|
471
|
+
"""
|
|
450
472
|
Attempts to retrieve "remote_file" from share and returns True if successful
|
|
451
|
-
|
|
473
|
+
"""
|
|
452
474
|
|
|
453
475
|
try:
|
|
454
476
|
smb_client = self.parent.get_smb_client(self.target)
|
|
455
|
-
log.debug(f
|
|
477
|
+
log.debug(f"{self.target}: Downloading {remote_file.share}\\{remote_file.name}")
|
|
456
478
|
remote_file.get(smb_client)
|
|
457
479
|
return True
|
|
458
480
|
except FileRetrievalError as e:
|
|
459
|
-
log.debug(f
|
|
481
|
+
log.debug(f"{self.target}: {e}")
|
|
460
482
|
|
|
461
483
|
return False
|
|
462
484
|
|
|
485
|
+
def date_match(self, file_time):
|
|
486
|
+
"""
|
|
487
|
+
Return True if file modification time matches date filters
|
|
488
|
+
file_time is a unix timestamp
|
|
489
|
+
"""
|
|
490
|
+
|
|
491
|
+
if file_time is None:
|
|
492
|
+
return True
|
|
493
|
+
|
|
494
|
+
# Convert timestamp to datetime
|
|
495
|
+
file_date = datetime.fromtimestamp(file_time)
|
|
496
|
+
|
|
497
|
+
# Check modified_after
|
|
498
|
+
if self.parent.modified_after:
|
|
499
|
+
if file_date < self.parent.modified_after:
|
|
500
|
+
log.debug(f"{self.target}: File too old: {file_date.strftime('%Y-%m-%d')}")
|
|
501
|
+
return False
|
|
502
|
+
|
|
503
|
+
# Check modified_before
|
|
504
|
+
if self.parent.modified_before:
|
|
505
|
+
if file_date > self.parent.modified_before:
|
|
506
|
+
log.debug(f"{self.target}: File too new: {file_date.strftime('%Y-%m-%d')}")
|
|
507
|
+
return False
|
|
508
|
+
|
|
509
|
+
return True
|