assemblyline-v4-service 4.4.0.24__py3-none-any.whl → 4.4.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-v4-service might be problematic. Click here for more details.

Files changed (42) hide show
  1. assemblyline_v4_service/VERSION +1 -1
  2. assemblyline_v4_service/common/api.py +3 -2
  3. assemblyline_v4_service/common/base.py +3 -4
  4. assemblyline_v4_service/common/helper.py +1 -2
  5. assemblyline_v4_service/common/{extractor/ocr.py → ocr.py} +0 -1
  6. assemblyline_v4_service/common/ontology_helper.py +7 -8
  7. assemblyline_v4_service/common/request.py +4 -5
  8. assemblyline_v4_service/common/result.py +3 -3
  9. assemblyline_v4_service/common/task.py +3 -3
  10. assemblyline_v4_service/common/utils.py +2 -2
  11. assemblyline_v4_service/updater/helper.py +4 -0
  12. {assemblyline_v4_service-4.4.0.24.dist-info → assemblyline_v4_service-4.4.0.26.dist-info}/METADATA +1 -1
  13. assemblyline_v4_service-4.4.0.26.dist-info/RECORD +28 -0
  14. assemblyline_v4_service/common/balbuzard/__init__.py +0 -0
  15. assemblyline_v4_service/common/balbuzard/balbuzard.py +0 -656
  16. assemblyline_v4_service/common/balbuzard/bbcrack.py +0 -830
  17. assemblyline_v4_service/common/balbuzard/patterns.py +0 -650
  18. assemblyline_v4_service/common/dynamic_service_helper.py +0 -3631
  19. assemblyline_v4_service/common/extractor/__init__.py +0 -1
  20. assemblyline_v4_service/common/extractor/base64.py +0 -86
  21. assemblyline_v4_service/common/extractor/pe_file.py +0 -51
  22. assemblyline_v4_service/common/icap.py +0 -149
  23. assemblyline_v4_service/common/keytool_parse.py +0 -66
  24. assemblyline_v4_service/common/pestudio/__init__.py +0 -0
  25. assemblyline_v4_service/common/pestudio/xml/__init__.py +0 -0
  26. assemblyline_v4_service/common/pestudio/xml/features.xml +0 -5607
  27. assemblyline_v4_service/common/pestudio/xml/functions.xml +0 -5824
  28. assemblyline_v4_service/common/pestudio/xml/languages.xml +0 -375
  29. assemblyline_v4_service/common/pestudio/xml/resources.xml +0 -511
  30. assemblyline_v4_service/common/pestudio/xml/signatures.xml +0 -29105
  31. assemblyline_v4_service/common/pestudio/xml/strings.xml +0 -2379
  32. assemblyline_v4_service/common/safelist_helper.py +0 -73
  33. assemblyline_v4_service/common/section_reducer.py +0 -43
  34. assemblyline_v4_service/common/tag_helper.py +0 -117
  35. assemblyline_v4_service/common/tag_reducer.py +0 -242
  36. assemblyline_v4_service/testing/__init__.py +0 -0
  37. assemblyline_v4_service/testing/helper.py +0 -463
  38. assemblyline_v4_service/testing/regenerate_results.py +0 -37
  39. assemblyline_v4_service-4.4.0.24.dist-info/RECORD +0 -53
  40. {assemblyline_v4_service-4.4.0.24.dist-info → assemblyline_v4_service-4.4.0.26.dist-info}/LICENCE.md +0 -0
  41. {assemblyline_v4_service-4.4.0.24.dist-info → assemblyline_v4_service-4.4.0.26.dist-info}/WHEEL +0 -0
  42. {assemblyline_v4_service-4.4.0.24.dist-info → assemblyline_v4_service-4.4.0.26.dist-info}/top_level.txt +0 -0
@@ -1,656 +0,0 @@
1
- #! /usr/bin/env python2
2
- """
3
- 2016-10-21:
4
- Modified version of balbuzard application for AL, original code found here:
5
- https://github.com/decalage2/balbuzard
6
- """
7
- """
8
- balbuzard - v0.20 2014-06-29 Philippe Lagadec
9
-
10
- Balbuzard is a tool to quickly extract patterns from suspicious files for
11
- malware analysis (IP addresses, domain names, known file headers and strings,
12
- etc).
13
-
14
- For more info and updates: http://www.decalage.info/balbuzard
15
- """
16
-
17
- # LICENSE:
18
- #
19
- # balbuzard is copyright (c) 2007-2014, Philippe Lagadec (http://www.decalage.info)
20
- # All rights reserved.
21
- #
22
- # Redistribution and use in source and binary forms, with or without modification,
23
- # are permitted provided that the following conditions are met:
24
- #
25
- # * Redistributions of source code must retain the above copyright notice, this
26
- # list of conditions and the following disclaimer.
27
- # * Redistributions in binary form must reproduce the above copyright notice,
28
- # this list of conditions and the following disclaimer in the documentation
29
- # and/or other materials provided with the distribution.
30
- #
31
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
32
- # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
33
- # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
34
- # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
35
- # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36
- # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
37
- # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38
- # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
39
- # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41
-
42
- __version__ = '0.20'
43
-
44
- #------------------------------------------------------------------------------
45
- # CHANGELOG:
46
- # 2007-07-11 v0.01 PL: - 1st version
47
- # 2007-07-30 v0.02 PL: - added list of patterns
48
- # 2007-07-31 v0.03 PL: - added patterns
49
- # - added hexadecimal dump
50
- # 2007-08-09 v0.04 PL: - improved some regexs, added Petite detection
51
- # 2008-06-06 v0.05 PL: - escape non-printable characters with '\xNN' when
52
- # displaying matches
53
- # - optional custom pattern list in reScan_custom.py
54
- # - optional call to magic.py to guess filetype
55
- # 2011-05-06 v0.06 PL: - added bruteforce functions
56
- # 2013-02-24 v0.07 PL: - renamed rescan to balbuzard
57
- # - changed license from CeCILL v2 to BSD
58
- # - added patterns for URL, e-mail, Flash
59
- # - new Pattern class to add patterns
60
- # - pattern can now be a regex or a string, with weigth
61
- # - moved bruteforce functions to balbucrack
62
- # 2013-03-18 v0.08 PL: - a few more/improved patterns
63
- # - optionparser with option -s for short display
64
- # 2013-03-21 v0.09 PL: - open file from password-protected zip (inspired from
65
- # Didier Steven's pdfid, thanks Didier! :-)
66
- # - improved plugin system
67
- # 2013-03-26 v0.10 PL: - improved Pattern and Pattern_re classes
68
- # 2013-07-31 v0.11 PL: - added support for Yara plugins
69
- # 2013-08-28 v0.12 PL: - plugins can now be in subfolders
70
- # - improved OLE2 pattern
71
- # 2013-12-03 v0.13 PL: - moved patterns to separate file patterns.py
72
- # - fixed issue when balbuzard launched from another dir
73
- # - added CSV output
74
- # 2013-12-04 v0.14 PL: - can now scan several files from command line args
75
- # - now short display is default, -v for hex view
76
- # 2013-12-09 v0.15 PL: - Pattern_re: added filter function to ignore false
77
- # positives
78
- # 2014-01-14 v0.16 PL: - added riglob, ziglob
79
- # - new option -r to find files recursively in subdirs
80
- # - new option -f to find files within zips with wildcards
81
- # 2014-01-23 v0.17 PL: - Pattern: added partial support for filter function
82
- # 2014-02-24 v0.18 PL: - fixed bug with main_dir when balbuzard is imported
83
- # 2014-03-21 v0.19 PL: - fixed bug when Yara-python is not installed
84
- # 2014-06-29 v0.20 PL: - simplified bbcrack transforms, added Yara signatures
85
-
86
-
87
- #------------------------------------------------------------------------------
88
- # TODO:
89
- # + add yara plugins support to Balbuzard.count and scan_profiling
90
- # + merge Balbuzard.scan_hexdump and short
91
- # + option to choose which plugins to load: all (default), none, python or yara
92
- # only
93
- # + option to use the Yara-python engine for searching (translating balbuzard
94
- # patterns to yara at runtime)
95
- # - Yara plugins: keep track of the filename containing each set of Yara rules
96
- # - option to support Unicode strings? (need to check 2 alignments and 2 byte
97
- # orders, or simply insert \x00 between all chars, e.g. 'T\x00E\x00S\x00T')
98
- # + improve patterns to avoid some false positives: maybe use pefile or magic.py ?
99
- # - HTML report with color highlighting
100
- # - GUI ?
101
- # - optional use of other magic libs (TrIDscan, pymagic, python-magic, etc: see PyPI)
102
- # - provide samples
103
- # - RTF hex object decoder?
104
- # - option to decode stream before searching: unicode, hex, base64, etc
105
- # - options for XML outputs
106
- # - export to OpenIOC?
107
- # ? zip file: open all files instead of only the 1st one, or add an option to
108
- # specify the filename(s) to open within the zip, with wildcards?
109
-
110
-
111
- # ISSUES:
112
- # - BUG: it seems that re ignores null bytes in patterns, despite what the doc says?
113
- # - BUG: the URL pattern is not fully correct, need to find a better one
114
- # - BUG: the e-mail pattern catches a lot of false positives.
115
-
116
-
117
- #--- IMPORTS ------------------------------------------------------------------
118
-
119
- import fnmatch
120
- import glob
121
- import optparse
122
- import os
123
- import os.path
124
- import regex as re
125
- import string
126
- import sys
127
- import time
128
- import zipfile
129
-
130
-
131
- #import csv
132
-
133
- # try to import yara-python:
134
- # try:
135
- # import yara
136
- # YARA = True
137
- # except:
138
- # YARA = False
139
-
140
-
141
- #--- CLASSES ------------------------------------------------------------------
142
-
143
- class Pattern (object):
144
- """
145
- a Pattern object is a string or a list of strings to be searched in data.
146
- Attributes:
147
- - name: str, description of the pattern for display
148
- - pat: str or list/tuple of strings to be searched
149
- - nocase: bool, if True, search is case-insensitive
150
- - single: bool, if True search will stop at the first occurence
151
- - weight: int, weight used by balbucrack
152
- - filt: function to filter out false positives, should be a function
153
- with arguments (value, index, pattern), returning True when acceptable
154
- or False when it is a false positive.
155
- """
156
-
157
- def __init__(self, name, pat=None, nocase=False, single=False, weight=1,
158
- filt=None):
159
- self.name = name
160
- # self.pat should always be a list of strings:
161
- self.pat = []
162
- if not isinstance(pat, list):
163
- pat = [pat]
164
-
165
- for p in pat:
166
- if isinstance(p, bytes):
167
- self.pat.append(p)
168
- else:
169
- self.pat.append(p.encode())
170
-
171
- self.nocase = nocase
172
- if nocase:
173
- # transform pat to lowercase
174
- self.pat_lower = (x.lower() for x in self.pat)
175
- self.single = single
176
- self.weight = weight
177
- # for profiling:
178
- self.total_time = 0
179
- self.filter = filt
180
-
181
-
182
- def find_all (self, data, data_lower=None):
183
- """
184
- find all occurences of pattern in data.
185
- data_lower should be set to data.lower(), if there are case-insensitive
186
- patterns (it's better to do it only once)
187
- return a list of tuples (index, string)
188
- """
189
- found = []
190
- if self.nocase:
191
- d = data_lower
192
- pat = self.pat_lower
193
- else:
194
- d = data
195
- pat = self.pat
196
- for s in pat:
197
- l = len(s)
198
- for i in str_find_all(d, s):
199
- # the matched string is not always s, case can differ:
200
- match = data[i:i+len(s)]
201
- valid = True
202
- if self.filter is not None:
203
- valid = self.filter(value=match, index=i, pattern=self)
204
- if valid: found.append((i, match))
205
- # debug message:
206
- else: print('Filtered out %s: %s' % (self.name, repr(match)))
207
- return found
208
-
209
-
210
- def count (self, data, data_lower=None):
211
- """
212
- count all occurences of pattern in data.
213
- Except for those with single=True, only the first occurence of any
214
- string is counted.
215
- data_lower should be set to data.lower(), if there are case-insensitive
216
- patterns (it's better to do it only once)
217
- return an integer
218
- """
219
- #TODO: add support for filter? (will be much slower...)
220
- count = 0
221
- if self.nocase:
222
- d = data_lower
223
- pat = self.pat_lower
224
- else:
225
- d = data
226
- pat = self.pat
227
- if not self.single:
228
- for s in pat:
229
- count += d.count(s)
230
- return count
231
- else:
232
- for s in pat:
233
- if s in d:
234
- return 1
235
- return 0
236
-
237
-
238
-
239
- class Pattern_re (Pattern):
240
- """
241
- a Pattern_re object is a regular expression to be searched in data.
242
- Attributes:
243
- - name: str, description of the pattern for display
244
- - pat: str, regular expression to be searched
245
- - trigger: str or list/tuple of strings to be searched before pat
246
- - nocase: bool, if True, search is case-insensitive
247
- - single: bool, if True search will stop at the first occurence
248
- - weight: int, weight used by balbucrack
249
- - filt: function to filter out false positives, should be a function
250
- with arguments (value, index, pattern), returning True when acceptable
251
- or False when it is a false positive.
252
- """
253
-
254
- def __init__(self, name, pat=None, trigger=None, nocase=False, single=False,
255
- weight=1, filt=None):
256
- # first call the Pattern constructor:
257
- Pattern.__init__(self, name, pat, nocase, single, weight)
258
- # compile regex
259
- flags = 0
260
- if nocase:
261
- flags = re.IGNORECASE
262
- self.pat = re.compile(pat, flags)
263
- self.trigger = trigger
264
- if trigger is not None:
265
- # create second pattern for trigger, for single search:
266
- self.trigger_pat = Pattern(name, pat=trigger, nocase=nocase, single=True)
267
- self.filter = filt
268
- #print 'pattern %s: filter=%s' % (self.name, self.filter)
269
-
270
-
271
- def find_all (self, data, data_lower=None):
272
- """
273
- find all occurences of pattern in data.
274
- data_lower should be set to data.lower(), if there are case-insensitive
275
- patterns (it's better to do it only once)
276
- return a list of tuples (index, string)
277
- """
278
- found = []
279
- if self.trigger is not None:
280
- # when trigger is specified, search trigger first and stop if not
281
- # found:
282
- if self.trigger_pat.count(data, data_lower) == 0:
283
- return found
284
- for m in self.pat.finditer(data):
285
- valid = True
286
- if self.filter is not None:
287
- valid = self.filter(value=m.group(), index=m.start(), pattern=self)
288
- if valid: found.append((m.start(), m.group()))
289
- # debug message:
290
- #else: print 'Filtered out %s: %s' % (self.name, repr(m.group()))
291
- return found
292
-
293
-
294
- def count (self, data, data_lower=None):
295
- """
296
- count all occurences of pattern in data.
297
- data_lower should be set to data.lower(), if there are case-insensitive
298
- patterns (it's better to do it only once)
299
- return an integer
300
- """
301
- if self.trigger is not None:
302
- # when trigger is specified, search trigger first and stop if not
303
- # found:
304
- if self.trigger_pat.count(data, data_lower) == 0:
305
- return 0
306
- # when no filter is defined, quickest way to count:
307
- if self.filter is None:
308
- return len(self.pat.findall(data))
309
- # otherwise, need to call filter for each match:
310
- c = 0
311
- for m in self.pat.finditer(data):
312
- valid = self.filter(value=m.group(), index=m.start(), pattern=self)
313
- if valid: c += 1
314
- return c
315
-
316
-
317
- #------------------------------------------------------------------------------
318
- class Balbuzard (object):
319
- """
320
- class to scan a string of data, searching for a set of patterns (strings
321
- and regular expressions)
322
- """
323
-
324
- def __init__(self, patterns=None, yara_rules=None):
325
- self.patterns = patterns
326
- if patterns == None:
327
- self.patterns = []
328
- # self.yara_rules = yara_rules
329
-
330
- ## def add_pattern(self, name, regex=None, string=None, weight=1):
331
- ## self.patterns.append(Pattern(name, regex, string, weight))
332
-
333
- def list_patterns(self):
334
- """
335
- Adding function for FrankenStrings to get regex patterns when needed
336
- """
337
- return self.patterns
338
-
339
- def scan (self, data):
340
- """
341
- Scans data for all patterns. This is an iterator: for each pattern
342
- found, yields the Pattern object and a list of matches as tuples
343
- (index in data, matched string).
344
- """
345
- # prep lowercase version of data for case-insensitive patterns
346
- data_lower = data.lower()
347
- for pattern in self.patterns:
348
- matches = pattern.find_all(data, data_lower)
349
- if len(matches)>0:
350
- yield pattern, matches
351
- # if YARA and self.yara_rules is not None:
352
- # for rules in self.yara_rules:
353
- # yara_matches = rules.match(data=data)
354
- # for match in yara_matches:
355
- # # create a fake pattern object, with a single match:
356
- # pattern = Pattern(match.rule)
357
- # matches = []
358
- # for s in match.strings:
359
- # offset, id, d = s
360
- # matches.append((offset, d))
361
- # yield pattern, matches
362
-
363
- def scan_profiling (self, data):
364
- """
365
- Scans data for all patterns. This is an iterator: for each pattern
366
- found, yields the Pattern object and a list of matches as tuples
367
- (index in data, matched string).
368
- Version with profiling, to check which patterns take time.
369
- """
370
- start = time.clock()
371
- # prep lowercase version of data for case-insensitive patterns
372
- data_lower = data.lower()
373
- for pattern in self.patterns:
374
- start_pattern = time.clock()
375
- matches = pattern.find_all(data, data_lower)
376
- pattern.time = time.clock()-start_pattern
377
- pattern.total_time += pattern.time
378
- if len(matches)>0:
379
- yield pattern, matches
380
- self.time = time.clock()-start
381
-
382
- def count (self, data):
383
- """
384
- Scans data for all patterns. This is an iterator: for each pattern
385
- found, yields the Pattern object and the count as int.
386
- """
387
- # prep lowercase version of data for case-insensitive patterns
388
- data_lower = data.lower()
389
- for pattern in self.patterns:
390
- count = pattern.count(data, data_lower)
391
- if count:
392
- yield pattern, count
393
-
394
- def scan_display (self, data, filename, hexdump=False, csv_writer=None):
395
- """
396
- Scans data for all patterns, displaying an hexadecimal dump for each
397
- match on the console (if hexdump=True), or one line for each
398
- match (if hexdump=False).
399
- """
400
- for pattern, matches in self.scan(data):
401
- if hexdump:
402
- print("-"*79)
403
- print("%s:" % pattern.name)
404
- for index, match in matches:
405
- # limit matched string display to 50 chars:
406
- m = repr(match)
407
- if len(m)> 50:
408
- m = m[:24]+'...'+m[-23:]
409
- if hexdump:
410
- print("at %08X: %s" % (index, m))
411
- # 5 lines of hexadecimal dump around the pattern: 2 lines = 32 bytes
412
- start = max(index-32, 0) & 0xFFFFFFF0
413
- index_end = index + len(match)
414
- end = min(index_end+32+15, len(data)) & 0xFFFFFFF0
415
- length = end-start
416
- #print start, end, length
417
- print(hexdump3(data[start:end], length=16, startindex=start))
418
- print("")
419
- else:
420
- print("at %08X: %s - %s" % (index, pattern.name, m))
421
- if csv_writer is not None:
422
- #['Filename', 'Index', 'Pattern name', 'Found string', 'Length']
423
- csv_writer.writerow([filename, '0x%08X' % index, pattern.name,
424
- m, len(match)])
425
- # blank line between each file:
426
- print('')
427
-
428
- ## if item == "EXE MZ headers" and MAGIC:
429
- ## # Check if it's really a EXE header
430
- ## print "Magic: %s\n" % magic.whatis(data[m.start():])
431
-
432
-
433
-
434
- #--- GLOBALS ------------------------------------------------------------------
435
-
436
- patterns = []
437
-
438
-
439
- #--- FUNCTIONS ----------------------------------------------------------------
440
-
441
- ##def add_pattern(name, regex=None, string=None, weight=1):
442
- ## patterns.append(Pattern(name, regex, string, weight))
443
-
444
-
445
- # HEXDUMP from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/142812
446
-
447
- FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
448
-
449
- ##def hexdump(src, length=8):
450
- ## N=0; result=''
451
- ## while src:
452
- ## s,src = src[:length],src[length:]
453
- ## hexa = ' '.join(["%02X"%ord(x) for x in s])
454
- ## s = s.translate(FILTER)
455
- ## result += "%04X %-*s %s\n" % (N, length*3, hexa, s)
456
- ## N+=length
457
- ## return result
458
- ##
459
- ##def hexdump2(src, length=8):
460
- ## result=[]
461
- ## for i in xrange(0, len(src), length):
462
- ## s = src[i:i+length]
463
- ## hexa = ' '.join(["%02X"%ord(x) for x in s])
464
- ## printable = s.translate(FILTER)
465
- ## result.append("%04X %-*s %s\n" % (i, length*3, hexa, printable))
466
- ## return ''.join(result)
467
-
468
- # my improved hexdump, to add a start index:
469
- def hexdump3(src, length=8, startindex=0):
470
- """
471
- Returns a hexadecimal dump of a binary string.
472
- length: number of bytes per row.
473
- startindex: index of 1st byte.
474
- """
475
- result=[]
476
- for i in range(0, len(src), length):
477
- s = src[i:i+length]
478
- hexa = ' '.join(["%02X"%ord(x) for x in s])
479
- printable = s.translate(FILTER)
480
- result.append("%04X %-*s %s\n" % (i+startindex, length*3, hexa, printable))
481
- return ''.join(result)
482
-
483
-
484
- def str_find_all(a_str, sub):
485
- start = 0
486
- while True:
487
- start = a_str.find(sub, start)
488
- if start == -1: return
489
- yield start
490
- start += len(sub)
491
-
492
-
493
- # recursive glob function to find plugin files in any subfolder:
494
- # inspired by http://stackoverflow.com/questions/14798220/how-can-i-search-sub-folders-using-glob-glob-module-in-python
495
- def rglob (path, pattern='*.*'):
496
- """
497
- Recursive glob:
498
- similar to glob.glob, but finds files recursively in all subfolders of path.
499
- path: root directory where to search files
500
- pattern: pattern for filenames, using wildcards, e.g. *.txt
501
- """
502
- #TODO: more compatible API with glob: use single param, split path from pattern
503
- return [os.path.join(dirpath, f)
504
- for dirpath, dirnames, files in os.walk(path)
505
- for f in fnmatch.filter(files, pattern)]
506
-
507
-
508
- def riglob (pathname):
509
- """
510
- Recursive iglob:
511
- similar to glob.iglob, but finds files recursively in all subfolders of path.
512
- pathname: root directory where to search files followed by pattern for
513
- filenames, using wildcards, e.g. *.txt
514
- """
515
- path, filespec = os.path.split(pathname)
516
- for dirpath, dirnames, files in os.walk(path):
517
- for f in fnmatch.filter(files, filespec):
518
- yield os.path.join(dirpath, f)
519
-
520
-
521
- def ziglob (zipfileobj, pathname):
522
- """
523
- iglob in a zip:
524
- similar to glob.iglob, but finds files within a zip archive.
525
- - zipfileobj: zipfile.ZipFile object
526
- - pathname: root directory where to search files followed by pattern for
527
- filenames, using wildcards, e.g. *.txt
528
- """
529
- files = zipfileobj.namelist()
530
- for f in files: print(f)
531
- for f in fnmatch.filter(files, pathname):
532
- yield f
533
-
534
-
535
- def iter_files(files, recursive=False, zip_password=None, zip_fname='*'):
536
- """
537
- Open each file provided as argument:
538
- - files is a list of arguments
539
- - if zip_password is None, each file is opened and read as-is. Wilcards are
540
- supported.
541
- - if not, then each file is opened as a zip archive with the provided password
542
- - then files matching zip_fname are opened from the zip archive
543
- Iterator: yields (filename, data) for each file
544
- """
545
- # choose recursive or non-recursive iglob:
546
- if recursive:
547
- iglob = riglob
548
- else:
549
- iglob = glob.iglob
550
- for filespec in files:
551
- for filename in iglob(filespec):
552
- if zip_password is not None:
553
- # Each file is a zip archive:
554
- print('Opening zip archive %s with provided password' % filename)
555
- z = zipfile.ZipFile(filename, 'r')
556
- print('Looking for file(s) matching "%s"' % zip_fname)
557
- for filename in ziglob(z, zip_fname):
558
- print('Opening file in zip archive:', filename)
559
- data = z.read(filename, zip_password)
560
- yield filename, data
561
- else:
562
- # normal file
563
- print('Opening file', filename)
564
- data = open(filename, 'rb').read()
565
- yield filename, data
566
-
567
-
568
- def relpath(path, start='.'):
569
- """
570
- convert a path to a relative path, using os.path.relpath on Python 2.6+
571
- On Python 2.5 or older, the path is not changed, but no exception is raised.
572
- (this function is just for backward compatibility)
573
- """
574
- # with python 2.6+, make it a relative path:
575
- try:
576
- return os.path.relpath(path, start)
577
- except:
578
- return path
579
-
580
-
581
- #=== INITALIZATION ============================================================
582
-
583
- # get main directory where this script is located:
584
- main_dir = os.path.dirname(__file__)
585
- #print 'main dir:', main_dir
586
- #plugins_dir = os.path.join(main_dir, 'plugins')
587
- #print 'plugins dir:', plugins_dir
588
-
589
- # load patterns
590
- patfile = os.path.join(main_dir, 'patterns.py')
591
- # save __doc__, else it seems to be overwritten:
592
- d = __doc__
593
- #print 'patfile:', patfile
594
- exec(open(patfile).read())
595
- __doc__ = d
596
- del d
597
-
598
-
599
-
600
- #=== MAIN =====================================================================
601
-
602
- if __name__ == '__main__':
603
-
604
- usage = 'usage: %prog [options] <filename> [filename2 ...]'
605
- parser = optparse.OptionParser(usage=usage)
606
- ## parser.add_option('-o', '--outfile', dest='outfile',
607
- ## help='output file')
608
- parser.add_option('-c', '--csv', dest='csv',
609
- help='export results to a CSV file')
610
- parser.add_option("-v", action="store_true", dest="verbose",
611
- help='verbose display, with hex view.')
612
- parser.add_option("-r", action="store_true", dest="recursive",
613
- help='find files recursively in subdirectories.')
614
- parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
615
- help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')
616
- parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
617
- help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
618
-
619
- (options, args) = parser.parse_args()
620
-
621
- # Print help if no argurments are passed
622
- if len(args) == 0:
623
- print(__doc__)
624
- parser.print_help()
625
- sys.exit()
626
-
627
- # load plugins
628
- #for f in rglob(plugins_dir, 'bbz*.py'): # glob.iglob('plugins/bbz*.py'):
629
- # print 'Loading plugin from', relpath(f, plugins_dir)
630
- # execfile(f)
631
-
632
- # load yara plugins
633
- # if YARA:
634
- # yara_rules = []
635
- # for f in rglob(plugins_dir, '*.yara'): #glob.iglob('plugins/*.yara'): # or bbz*.yara?
636
- # print 'Loading yara plugin from', relpath(f, plugins_dir)
637
- # yara_rules.append(yara.compile(f))
638
- # else:
639
- # yara_rules = None
640
-
641
- # open CSV file
642
- # if options.csv:
643
- # print 'Writing output to CSV file: %s' % options.csv
644
- # csvfile = open(options.csv, 'wb')
645
- # csv_writer = csv.writer(csvfile)
646
- # csv_writer.writerow(['Filename', 'Index', 'Pattern name',
647
- # 'Found string', 'Length'])
648
- # else:
649
- # csv_writer = None
650
- #
651
- # # close CSV file
652
- # if options.csv:
653
- # csvfile.close()
654
-
655
-
656
- # This was coded while listening to The National "Boxer".