fileshunter 0.1.0.20130725
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS +3 -0
- data/ChangeLog +5 -0
- data/Credits +21 -0
- data/LICENSE +31 -0
- data/README +15 -0
- data/README.md +11 -0
- data/Rakefile +7 -0
- data/ReleaseInfo +8 -0
- data/bin/fileshunt +216 -0
- data/ext/fileshunter/Decoders/_FLAC.c +233 -0
- data/ext/fileshunter/Decoders/extconf.rb +3 -0
- data/lib/fileshunter/BeginPatternDecoder.rb +218 -0
- data/lib/fileshunter/Decoder.rb +66 -0
- data/lib/fileshunter/Decoders/ASF.rb +50 -0
- data/lib/fileshunter/Decoders/BMP.rb +118 -0
- data/lib/fileshunter/Decoders/CAB.rb +140 -0
- data/lib/fileshunter/Decoders/CFBF.rb +92 -0
- data/lib/fileshunter/Decoders/EBML.rb +369 -0
- data/lib/fileshunter/Decoders/EXE.rb +505 -0
- data/lib/fileshunter/Decoders/FLAC.rb +387 -0
- data/lib/fileshunter/Decoders/ICO.rb +71 -0
- data/lib/fileshunter/Decoders/JPEG.rb +247 -0
- data/lib/fileshunter/Decoders/M2V.rb +30 -0
- data/lib/fileshunter/Decoders/MP3.rb +341 -0
- data/lib/fileshunter/Decoders/MP4.rb +620 -0
- data/lib/fileshunter/Decoders/MPG_Video.rb +30 -0
- data/lib/fileshunter/Decoders/OGG.rb +74 -0
- data/lib/fileshunter/Decoders/RIFF.rb +437 -0
- data/lib/fileshunter/Decoders/TIFF.rb +350 -0
- data/lib/fileshunter/Decoders/Text.rb +240 -0
- data/lib/fileshunter/Segment.rb +50 -0
- data/lib/fileshunter/SegmentsAnalyzer.rb +251 -0
- data/lib/fileshunter.rb +15 -0
- metadata +130 -0
@@ -0,0 +1,218 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
# Decoders that are based on begin patterns (sucha as Magic Numbers) inherit from this class.
|
4
|
+
# They then have to implement the following methods:
|
5
|
+
# * *get_begin_pattern*: To give the begin pattern and eventual options
|
6
|
+
# * *decode*: To decode data starting a given offset that matches the begin pattern
|
7
|
+
# * *check_begin_pattern*: Provide a quick check of the begin pattern when found [optional]
|
8
|
+
# They can then use the following DSL in the decode method:
|
9
|
+
# * *found_relevant_data*: Indicate that we are certain the beginning of data of the given extension has been found
|
10
|
+
# * *invalid_data*: Indicate the data read is invalid for our Decoder
|
11
|
+
# * *truncated_data*: Indicate the data should have continued if it were to be complete. This can happen even in the middle of a stream, if the data has been corrupted.
|
12
|
+
# * *progress*: Indicate the progression of the scan: everything before the progression is considered valid for the given extension (if found_relevant_data was called previously)
|
13
|
+
# * *metadata*: Set metadata properties
|
14
|
+
class BeginPatternDecoder < Decoder
|
15
|
+
|
16
|
+
class TruncatedDataError < RuntimeError
|
17
|
+
|
18
|
+
attr_reader :cursor_truncated
|
19
|
+
|
20
|
+
# Constructor
|
21
|
+
#
|
22
|
+
# Parameters::
|
23
|
+
# * *message* (_String_): The error message
|
24
|
+
# * *cursor_truncated* (_Fixnum_): The exceeding offset
|
25
|
+
def initialize(message, cursor_truncated)
|
26
|
+
super(message)
|
27
|
+
@cursor_truncated = cursor_truncated
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
class InvalidDataError < RuntimeError
|
33
|
+
end
|
34
|
+
|
35
|
+
# Find segments from a given data
|
36
|
+
def find_segments
|
37
|
+
@begin_pattern, options = get_begin_pattern
|
38
|
+
log_debug "Pattern to find: #{@begin_pattern.inspect}"
|
39
|
+
@has_to_check_begin_pattern = self.respond_to?(:check_begin_pattern)
|
40
|
+
# Parse options
|
41
|
+
@max_regexp_size = 32
|
42
|
+
@offset_inc = 1
|
43
|
+
@begin_pattern_offset_in_segment = 0
|
44
|
+
if (options != nil)
|
45
|
+
@max_regexp_size = options[:max_regexp_size] if (options[:max_regexp_size] != nil)
|
46
|
+
@offset_inc = options[:offset_inc] if (options[:offset_inc] != nil)
|
47
|
+
@begin_pattern_offset_in_segment = options[:begin_pattern_offset_in_segment] if (options[:begin_pattern_offset_in_segment] != nil)
|
48
|
+
end
|
49
|
+
@metadata = {}
|
50
|
+
@missing_previous_data = false
|
51
|
+
foreach_begin_pattern do |begin_pattern_offset|
|
52
|
+
next decode(begin_pattern_offset)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
protected
|
57
|
+
|
58
|
+
# Mark the current decoding as being valid.
|
59
|
+
# This is called when the decoder knows that it has valid data matching its specification.
|
60
|
+
# Before calling this method, decoded data might still be junk.
|
61
|
+
#
|
62
|
+
# Parameters::
|
63
|
+
# * *extension* (_Symbol_ or <em>list<Symbol></em>): Extension(s) this data belongs to
|
64
|
+
def found_relevant_data(extension)
|
65
|
+
@extension = extension
|
66
|
+
end
|
67
|
+
|
68
|
+
# Indicate that the data is invalid.
|
69
|
+
# This will stop the decoding by raising an exception.
|
70
|
+
#
|
71
|
+
# Parameters::
|
72
|
+
# * *message* (_String_): Message to give with the exception [default = '']
|
73
|
+
def invalid_data(message = '')
|
74
|
+
raise InvalidDataError.new(message)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Indicate that the data is truncated.
|
78
|
+
# This will stop the decoding by raising an exception.
|
79
|
+
#
|
80
|
+
# Parameters::
|
81
|
+
# * *message* (_String_): Message to give with the exception [default = '']
|
82
|
+
# * *cursor_truncated* (_Fixnum_): Cursor where data has been truncated [default = nil]
|
83
|
+
def truncated_data(message = '', cursor_truncated = nil)
|
84
|
+
raise TruncatedDataError.new(message, ((cursor_truncated == nil) ? ((@last_offset_to_be_decoded == nil) ? @end_offset : @last_offset_to_be_decoded) : cursor_truncated))
|
85
|
+
end
|
86
|
+
|
87
|
+
# Indicate that the data is missing previous data.
|
88
|
+
def missing_previous_data
|
89
|
+
@missing_previous_data = true
|
90
|
+
end
|
91
|
+
|
92
|
+
# Indicate progression in the decoding
|
93
|
+
#
|
94
|
+
# Parameters::
|
95
|
+
# * *offset_to_be_decoded* (_Fixnum_): Next to be decoded
|
96
|
+
def progress(offset_to_be_decoded)
|
97
|
+
@last_offset_to_be_decoded = offset_to_be_decoded
|
98
|
+
raise TruncatedDataError.new("Progression @#{offset_to_be_decoded} is over limit (#{@end_offset})", @end_offset) if (@last_offset_to_be_decoded > @end_offset)
|
99
|
+
keep_alive
|
100
|
+
end
|
101
|
+
|
102
|
+
# Set metadata properties
|
103
|
+
#
|
104
|
+
# Parameters::
|
105
|
+
# * *properties* (<em>map<Symbol,Object></em>): The properties to be set
|
106
|
+
def metadata(properties)
|
107
|
+
#log_debug "Add metadata: #{properties.inspect}"
|
108
|
+
@metadata.merge!(properties)
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
# Find a starting pattern and call a client block when it matches.
|
114
|
+
# Client block decodes data, and calls the following methods to give progress on its decoding:
|
115
|
+
# * *found_relevant_data*: Indicate that there is valid data to be decoded. If a TruncatedDataError occurs before this method is called, the data is ignored ; otherwise it will be marked as decoded but truncated to the end of the current segment.
|
116
|
+
# * *progress*: Indicate progression
|
117
|
+
# * *truncated_data*: Indicate that the data is truncated
|
118
|
+
# * *invalid_data*: Indicate that the data is invalid
|
119
|
+
#
|
120
|
+
# Parameters::
|
121
|
+
# * _Block_: Client code called when such a pattern matches. Its goal is to decode correctly at the given offset.
|
122
|
+
# * Parameters::
|
123
|
+
# * *begin_pattern_offset* (_Fixnum_): The offset of the pattern
|
124
|
+
# * *pattern_index* (_Fixnum_): The pattern index that matched the search. Always nil if begin_pattern is not a list.
|
125
|
+
# * Result::
|
126
|
+
# * *end_offset* (_Fixnum_): The ending offset (nil if could not be decoded). If the ending offset returned is greater than end_offset, segment will be considered as truncated.
|
127
|
+
def foreach_begin_pattern
|
128
|
+
# Loop to the end
|
129
|
+
current_offset = @begin_offset
|
130
|
+
while (current_offset < @end_offset)
|
131
|
+
# Find the begin pattern
|
132
|
+
log_debug "Find begin_pattern starting #{current_offset}..."
|
133
|
+
begin_pattern_offset, pattern_index = @data.index(@begin_pattern, current_offset, @max_regexp_size)
|
134
|
+
if ((begin_pattern_offset == nil) or
|
135
|
+
(begin_pattern_offset >= @end_offset))
|
136
|
+
# No match
|
137
|
+
current_offset = @end_offset
|
138
|
+
log_debug "No more pattern."
|
139
|
+
else
|
140
|
+
if (begin_pattern_offset >= @begin_offset + @begin_pattern_offset_in_segment)
|
141
|
+
begin_pattern_offset -= @begin_pattern_offset_in_segment
|
142
|
+
log_debug "Found begin_pattern at #{begin_pattern_offset}."
|
143
|
+
# We have a candidate
|
144
|
+
# Try to decode it
|
145
|
+
decoded_end_offset = nil
|
146
|
+
truncated = false
|
147
|
+
@missing_previous_data = false
|
148
|
+
@extension = nil
|
149
|
+
@last_offset_to_be_decoded = nil
|
150
|
+
begin
|
151
|
+
# If the decoder can perform additional tests, call them
|
152
|
+
begin_pattern_valid = (@has_to_check_begin_pattern) ? check_begin_pattern(begin_pattern_offset, pattern_index) : true
|
153
|
+
if begin_pattern_valid
|
154
|
+
# Call the Decoder
|
155
|
+
decoded_end_offset = yield(begin_pattern_offset, pattern_index)
|
156
|
+
else
|
157
|
+
log_debug 'Invalid pattern returned by the check.'
|
158
|
+
end
|
159
|
+
rescue InvalidDataError
|
160
|
+
# If data was already validated, it means that the segment is truncated.
|
161
|
+
log_debug "Got an invalid data exception while decoding data: #{$!}"
|
162
|
+
#log_debug $!.backtrace.join("\n")
|
163
|
+
# If not, drop everything.
|
164
|
+
if ((@extension != nil) and
|
165
|
+
(@last_offset_to_be_decoded != nil))
|
166
|
+
truncated = true
|
167
|
+
# Use the last decoded offset as the truncated limit.
|
168
|
+
decoded_end_offset = @last_offset_to_be_decoded
|
169
|
+
else
|
170
|
+
decoded_end_offset = nil
|
171
|
+
end
|
172
|
+
rescue TruncatedDataError, AccessAfterDataError
|
173
|
+
# Data is truncated
|
174
|
+
log_debug "Got a truncation exception while decoding data: #{$!}"
|
175
|
+
#log_debug $!.backtrace.join("\n")
|
176
|
+
# If we already got relevant data, mark it as truncated
|
177
|
+
if (@extension != nil)
|
178
|
+
truncated = true
|
179
|
+
if ($!.is_a?(AccessAfterDataError))
|
180
|
+
decoded_end_offset = $!.exceeding_offset
|
181
|
+
else
|
182
|
+
decoded_end_offset = $!.cursor_truncated
|
183
|
+
end
|
184
|
+
else
|
185
|
+
decoded_end_offset = nil
|
186
|
+
end
|
187
|
+
rescue
|
188
|
+
#log_err "Error while decoding data: #{$!}\n#{$!.backtrace.join("\n")}"
|
189
|
+
#decoded_end_offset = nil
|
190
|
+
raise
|
191
|
+
end
|
192
|
+
if ((decoded_end_offset == nil) or
|
193
|
+
(@extension == nil))
|
194
|
+
log_debug 'Invalid segment.'
|
195
|
+
# Try searching from further: maybe another BEGIN_PATTERN might be found
|
196
|
+
current_offset = begin_pattern_offset + @begin_pattern_offset_in_segment + @offset_inc
|
197
|
+
else
|
198
|
+
log_debug "Decoded segment in offsets [ #{begin_pattern_offset} - #{decoded_end_offset} ]"
|
199
|
+
if (decoded_end_offset > @end_offset)
|
200
|
+
log_debug "Decoded segment ends at #{decoded_end_offset} which is greater than #{@end_offset} => truncated"
|
201
|
+
decoded_end_offset = @end_offset
|
202
|
+
truncated = true
|
203
|
+
end
|
204
|
+
# Extract the segment and go on to the next
|
205
|
+
found_segment(begin_pattern_offset, decoded_end_offset, @extension, truncated, @missing_previous_data, @metadata)
|
206
|
+
current_offset = decoded_end_offset
|
207
|
+
end
|
208
|
+
else
|
209
|
+
# Try searching from further: maybe another BEGIN_PATTERN might be found
|
210
|
+
current_offset = begin_pattern_offset + @offset_inc
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
end
|
217
|
+
|
218
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
# Generic Decode class
|
4
|
+
# All Decoders inherit from this class and have to implement the find_segments method, using @data, @begin_offset and @end_offset instance variables to parse data.
|
5
|
+
# Here is the DSL Decoders can use in their find_segments method:
|
6
|
+
# * *@data* (_IOBlockReader_): The data to be accessed
|
7
|
+
# * *@begin_offset* (_Fixnum_): The begin offset
|
8
|
+
# * *@end_offset* (_Fixnum_): The end offset
|
9
|
+
# * *found_segment*: Method used to indicate a Segment was successfully parsed
|
10
|
+
# * *keep_alive*: Method used to indicate progression
|
11
|
+
class Decoder
|
12
|
+
|
13
|
+
# Prepare for new search
|
14
|
+
#
|
15
|
+
# Parameters::
|
16
|
+
# * *segments_analyzer* (_SegmentsAnalyzer_): The segments analyzer for which this Decoder is working
|
17
|
+
# * *data* (_IOBlockReader_): Data being analyzed
|
18
|
+
# * *begin_offset* (_Fixnum_): The begin offset
|
19
|
+
# * *end_offset* (_Fixnum_): The end offset
|
20
|
+
def setup(segments_analyzer, data, begin_offset, end_offset)
|
21
|
+
@segments_analyzer = segments_analyzer
|
22
|
+
@data = data
|
23
|
+
@begin_offset = begin_offset
|
24
|
+
@end_offset = end_offset
|
25
|
+
@segments = []
|
26
|
+
end
|
27
|
+
|
28
|
+
# Return found segments since last setup
|
29
|
+
#
|
30
|
+
# Result::
|
31
|
+
# * <em>list<Segment></em>: The list of segments
|
32
|
+
def segments_found
|
33
|
+
return @segments
|
34
|
+
end
|
35
|
+
|
36
|
+
protected
|
37
|
+
|
38
|
+
# Callback called by decoders to notify a Segment has been found successfully
|
39
|
+
#
|
40
|
+
# Parameters::
|
41
|
+
# * *begin_offset* (_Fixnum_): The begin offset
|
42
|
+
# * *end_offset* (_Fixnum_): The end offset
|
43
|
+
# * *extension* (_Symbol_ or <em>list<Symbol></em>): The extension (can be a list of possible extensions)
|
44
|
+
# * *truncated* (_Boolean_): Is the data truncated in this segment?
|
45
|
+
# * *missing_previous_data* (_Boolean_): Is some data missing before?
|
46
|
+
# * *metadata* (<em>map<Symbol,Object></em>): Metadata associated to this segment (Decoder dependent) [default = {}]
|
47
|
+
def found_segment(segment_begin_offset, segment_end_offset, extension, truncated, missing_previous_data, metadata)
|
48
|
+
raise "Segment begin offset (#{segment_begin_offset}) is lower than data begin offset (#{@begin_offset})" if (segment_begin_offset < @begin_offset)
|
49
|
+
if (segment_end_offset > @end_offset)
|
50
|
+
log_debug "Segment end offset (#{segment_end_offset}) is greater than data end offset (#{@end_offset}). Mark Segment as truncated."
|
51
|
+
segment_end_offset = @end_offset
|
52
|
+
truncated = true
|
53
|
+
end
|
54
|
+
@segments << Segment.new(segment_begin_offset, segment_end_offset, extension, truncated, missing_previous_data, metadata)
|
55
|
+
@segments_analyzer.add_bytes_decoded(segment_end_offset - segment_begin_offset)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Indicate progression in the decoding
|
59
|
+
# This is used to eventually cancel the parsing
|
60
|
+
def keep_alive
|
61
|
+
raise CancelParsingError.new('Parsing cancelled while decoding') if (@segments_analyzer.parsing_cancelled)
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
module Decoders
|
4
|
+
|
5
|
+
class ASF < BeginPatternDecoder
|
6
|
+
|
7
|
+
BEGIN_PATTERN_ASF = "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C".force_encoding(Encoding::ASCII_8BIT)
|
8
|
+
ASF_DATA_GUID = "\x36\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C".force_encoding(Encoding::ASCII_8BIT)
|
9
|
+
ACCEPTABLE_INDEX_GUID = [
|
10
|
+
"\x90\x08\x00\x33\xB1\xE5\xCF\x11\x89\xF4\x00\xA0\xC9\x03\x49\xCB".force_encoding(Encoding::ASCII_8BIT),
|
11
|
+
"\xD3\x29\xE2\xD6\xDA\x35\xD1\x11\x90\x34\x00\xA0\xC9\x03\x49\xBE".force_encoding(Encoding::ASCII_8BIT),
|
12
|
+
"\xF8\x03\xB1\xFE\xAD\x12\x64\x4C\x84\x0F\x2A\x1D\x2F\x7A\xD4\x8C".force_encoding(Encoding::ASCII_8BIT),
|
13
|
+
"\xD0\x3F\xB7\x3C\x4A\x0C\x03\x48\x95\x3D\xED\xF7\xB6\x22\x8F\x0C".force_encoding(Encoding::ASCII_8BIT)
|
14
|
+
]
|
15
|
+
|
16
|
+
def get_begin_pattern
|
17
|
+
return BEGIN_PATTERN_ASF, { :offset_inc => 16 }
|
18
|
+
end
|
19
|
+
|
20
|
+
def decode(offset)
|
21
|
+
ending_offset = nil
|
22
|
+
|
23
|
+
cursor = offset + BinData::Uint64le.read(@data[cursor+16..cursor+23])
|
24
|
+
progress(cursor)
|
25
|
+
# Should be on the DATA object
|
26
|
+
invalid_data("@#{cursor} - Missing Data object in ASF. GUID does not match.") if (@data[cursor..cursor+15] != ASF_DATA_GUID)
|
27
|
+
found_relevant_data(:asf)
|
28
|
+
cursor += BinData::Uint64le.read(@data[cursor+16..cursor+23])
|
29
|
+
progress(cursor)
|
30
|
+
# Now cycle through optional Index objects
|
31
|
+
while (ending_offset == nil)
|
32
|
+
if (ACCEPTABLE_INDEX_GUID.include?(@data[cursor..cursor+15]))
|
33
|
+
# There is an index object
|
34
|
+
cursor += BinData::Uint64le.read(@data[cursor+16..cursor+23])
|
35
|
+
progress(cursor)
|
36
|
+
ending_offset = cursor if (cursor == @end_offset)
|
37
|
+
else
|
38
|
+
# Finished
|
39
|
+
ending_offset = cursor
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
return ending_offset
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
module Decoders
|
4
|
+
|
5
|
+
class BMP < BeginPatternDecoder
|
6
|
+
|
7
|
+
BEGIN_PATTERN_BMP = Regexp.new("BM....\x00\x00\x00\x00", nil, 'n')
|
8
|
+
|
9
|
+
PADDING_CHAR = "\x00".force_encoding(Encoding::ASCII_8BIT)
|
10
|
+
|
11
|
+
def get_begin_pattern
|
12
|
+
return BEGIN_PATTERN_BMP, { :offset_inc => 2, :max_regexp_size => 10 }
|
13
|
+
end
|
14
|
+
|
15
|
+
def decode(offset)
|
16
|
+
ending_offset = nil
|
17
|
+
|
18
|
+
cursor = offset + 14
|
19
|
+
header_size = BinData::Uint32le.read(@data[cursor..cursor+3])
|
20
|
+
width = nil
|
21
|
+
height = nil
|
22
|
+
bpp = nil
|
23
|
+
header_version = nil
|
24
|
+
bitmap_size = nil
|
25
|
+
compression = 0
|
26
|
+
if (header_size == 12)
|
27
|
+
# BMP v2 header
|
28
|
+
header_version = 2
|
29
|
+
width = BinData::Sint16le.read(@data[cursor+4..cursor+5])
|
30
|
+
height = BinData::Sint16le.read(@data[cursor+6..cursor+7])
|
31
|
+
nbr_planes = BinData::Uint16le.read(@data[cursor+8..cursor+9])
|
32
|
+
invalid_data("@#{cursor} - Number of planes (#{nbr_planes}) should always be 1") if (nbr_planes != 1)
|
33
|
+
bpp = BinData::Uint16le.read(@data[cursor+10..cursor+11])
|
34
|
+
invalid_data("@#{cursor} - Invalid BPP: #{bpp}") if (![1,4,8,16,24,32].include?(bpp))
|
35
|
+
cursor += header_size
|
36
|
+
# Color palette
|
37
|
+
cursor += 3*(1 << bpp) if (bpp != 24)
|
38
|
+
else
|
39
|
+
# BMP v3+ header
|
40
|
+
header_version = 3
|
41
|
+
width = BinData::Uint32le.read(@data[cursor+4..cursor+7])
|
42
|
+
height = BinData::Uint32le.read(@data[cursor+8..cursor+11])
|
43
|
+
nbr_planes = BinData::Uint16le.read(@data[cursor+12..cursor+13])
|
44
|
+
invalid_data("@#{cursor} - Number of planes (#{nbr_planes}) should always be 1") if (nbr_planes != 1)
|
45
|
+
bpp = BinData::Uint16le.read(@data[cursor+14..cursor+15])
|
46
|
+
invalid_data("@#{cursor} - Invalid BPP: #{bpp}") if (![1,4,8,16,24,32].include?(bpp))
|
47
|
+
compression = BinData::Uint32le.read(@data[cursor+16..cursor+19])
|
48
|
+
invalid_data("@#{cursor} - Invalid compression method: #{compression}") if (compression > 3)
|
49
|
+
invalid_data("@#{cursor} - Invalid compression method: #{compression} for given bpp (#{bpp})") if ((compression != 3) and (bpp == 16))
|
50
|
+
bitmap_size = BinData::Uint32le.read(@data[cursor+20..cursor+23])
|
51
|
+
invalid_data("@#{cursor} - Empty bitmap size for compression method: #{compression}") if ((bitmap_size == 0) and ((compression == 1) or (compression == 2)))
|
52
|
+
#ppm_horizontal = BinData::Uint32le.read(@data[cursor+24..cursor+27])
|
53
|
+
#ppm_vertical = BinData::Uint32le.read(@data[cursor+28..cursor+31])
|
54
|
+
nbr_colors_used = BinData::Uint32le.read(@data[cursor+32..cursor+35])
|
55
|
+
invalid_data("@#{cursor} - Number of colors used specified (#{nbr_colors_used} whereas bpp is >= 16 (#{bpp})") if ((bpp >= 16) and (nbr_colors_used > 0))
|
56
|
+
#nbr_colors_important = BinData::Uint32le.read(@data[cursor+36..cursor+39])
|
57
|
+
if (header_size == 56)
|
58
|
+
# BMP v? header
|
59
|
+
header_version = 56
|
60
|
+
elsif (header_size == 108)
|
61
|
+
# BMP v4 header
|
62
|
+
header_version = 4
|
63
|
+
cstype = BinData::Uint32le.read(@data[cursor+56..cursor+59])
|
64
|
+
invalid_data("@#{cursor} - Invalid cstype: #{cstype}") if (cstype > 2)
|
65
|
+
end
|
66
|
+
cursor += header_size
|
67
|
+
# Color palette
|
68
|
+
cursor += 4*(1 << bpp) if (bpp < 16)
|
69
|
+
cursor += 12 if (((bpp == 16) or (bpp == 32)) and (compression == 3) and (header_version == 3))
|
70
|
+
end
|
71
|
+
progress(cursor)
|
72
|
+
found_relevant_data(:bmp)
|
73
|
+
metadata(
|
74
|
+
:width => width,
|
75
|
+
:height => height,
|
76
|
+
:bpp => bpp,
|
77
|
+
:header_version => header_version,
|
78
|
+
:bitmap_size => bitmap_size,
|
79
|
+
:compression => compression
|
80
|
+
)
|
81
|
+
log_debug "@#{cursor} - Decoding bitmap data: header_version=#{header_version} width=#{width} height=#{height} bpp=#{bpp} compression=#{compression} bitmap_size=#{bitmap_size}"
|
82
|
+
if ((compression == 0) or
|
83
|
+
(compression == 3))
|
84
|
+
# Compute the scanline size
|
85
|
+
scanline_size = nil
|
86
|
+
case bpp.to_i
|
87
|
+
when 1, 4, 8
|
88
|
+
scanline_size, extra = width.divmod(8/bpp)
|
89
|
+
scanline_size += 1 if (extra > 0)
|
90
|
+
when 16, 24, 32
|
91
|
+
scanline_size = width * (bpp/8)
|
92
|
+
scanline_size *= 2 if ((bpp == 16) and (header_version == 4))
|
93
|
+
end
|
94
|
+
rest = scanline_size % 4
|
95
|
+
scanline_size += 4 - rest if (rest > 0)
|
96
|
+
computed_bitmap_size = scanline_size * height
|
97
|
+
cursor += computed_bitmap_size
|
98
|
+
else
|
99
|
+
cursor += bitmap_size
|
100
|
+
end
|
101
|
+
progress(cursor)
|
102
|
+
# Eventually pad to the next 32 bits with \x00
|
103
|
+
rest = (cursor - offset) % 4
|
104
|
+
if (rest > 0)
|
105
|
+
# Check if we have padding
|
106
|
+
possible_padding_size = 4 - rest
|
107
|
+
cursor += possible_padding_size if ((cursor + possible_padding_size <= @end_offset) and (@data[cursor..cursor + possible_padding_size - 1] == PADDING_CHAR * possible_padding_size))
|
108
|
+
end
|
109
|
+
ending_offset = cursor
|
110
|
+
|
111
|
+
return ending_offset
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
module Decoders
|
4
|
+
|
5
|
+
class CAB < BeginPatternDecoder
|
6
|
+
|
7
|
+
BEGIN_PATTERN_CAB = "MSCF\x00\x00\x00\x00".force_encoding(Encoding::ASCII_8BIT)
|
8
|
+
|
9
|
+
END_STRING_TERMINATOR = "\x00".force_encoding(Encoding::ASCII_8BIT)
|
10
|
+
|
11
|
+
AUTHENTICODE_ID = "\x30\x82".force_encoding(Encoding::ASCII_8BIT)
|
12
|
+
|
13
|
+
def get_begin_pattern
|
14
|
+
return BEGIN_PATTERN_CAB, { :offset_inc => 4 }
|
15
|
+
end
|
16
|
+
|
17
|
+
def decode(offset)
|
18
|
+
|
19
|
+
# CFHEADER
|
20
|
+
cabinet_size = BinData::Uint32le.read(@data[offset+8..offset+11])
|
21
|
+
invalid_data("@#{offset} - Invalid CAB header.") if (BinData::Uint32le.read(@data[offset+12..offset+15]) != 0)
|
22
|
+
#cf_file_offset = BinData::Uint32le.read(@data[offset+16..offset+19])
|
23
|
+
invalid_data("@#{offset} - Invalid CAB header.") if (BinData::Uint32le.read(@data[offset+20..offset+23]) != 0)
|
24
|
+
minor_version = @data[offset+24].ord
|
25
|
+
major_version = @data[offset+25].ord
|
26
|
+
nbr_cf_folders = BinData::Uint16le.read(@data[offset+26..offset+27])
|
27
|
+
nbr_cf_files = BinData::Uint16le.read(@data[offset+28..offset+29])
|
28
|
+
flags = BinData::Uint16le.read(@data[offset+30..offset+31])
|
29
|
+
flag_prev_cabinet = ((flags & 0b00000000_00000001) != 0)
|
30
|
+
flag_next_cabinet = ((flags & 0b00000000_00000010) != 0)
|
31
|
+
flag_reserve_present = ((flags & 0b00000000_00000100) != 0)
|
32
|
+
set_id = BinData::Uint16le.read(@data[offset+32..offset+33])
|
33
|
+
idx_cabinet = BinData::Uint16le.read(@data[offset+34..offset+35])
|
34
|
+
cursor = offset + 36
|
35
|
+
reserve_field_size_in_folder = 0
|
36
|
+
reserve_field_size_in_data = 0
|
37
|
+
if flag_reserve_present
|
38
|
+
reserve_field_size_in_header = BinData::Uint16le.read(@data[offset+36..offset+37])
|
39
|
+
invalid_data("@#{offset} - Invalid reserve_field_size_in_header (#{reserve_field_size_in_header})") if (reserve_field_size_in_header > 60000)
|
40
|
+
reserve_field_size_in_folder = @data[offset+38].ord
|
41
|
+
reserve_field_size_in_data = @data[offset+39].ord
|
42
|
+
cursor += 4 + reserve_field_size_in_header
|
43
|
+
end
|
44
|
+
if flag_prev_cabinet
|
45
|
+
idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
|
46
|
+
invalid_data("@#{cursor} - Unable to read previous cabinet name") if (idx_terminator == nil)
|
47
|
+
cursor = idx_terminator + 1
|
48
|
+
idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
|
49
|
+
invalid_data("@#{cursor} - Unable to read previous disk name") if (idx_terminator == nil)
|
50
|
+
cursor = idx_terminator + 1
|
51
|
+
end
|
52
|
+
if flag_next_cabinet
|
53
|
+
idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
|
54
|
+
invalid_data("@#{cursor} - Unable to read next cabinet name") if (idx_terminator == nil)
|
55
|
+
cursor = idx_terminator + 1
|
56
|
+
idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
|
57
|
+
invalid_data("@#{cursor} - Unable to read next disk name") if (idx_terminator == nil)
|
58
|
+
cursor = idx_terminator + 1
|
59
|
+
end
|
60
|
+
progress(cursor)
|
61
|
+
found_relevant_data([:cab, :msu, :mzz])
|
62
|
+
metadata(
|
63
|
+
:cabinet_size => cabinet_size,
|
64
|
+
:minor_version => minor_version,
|
65
|
+
:major_version => major_version,
|
66
|
+
:nbr_cf_folders => nbr_cf_folders,
|
67
|
+
:nbr_cf_files => nbr_cf_files,
|
68
|
+
:set_id => set_id,
|
69
|
+
:idx_cabinet => idx_cabinet,
|
70
|
+
:flag_prev_cabinet => flag_prev_cabinet,
|
71
|
+
:flag_next_cabinet => flag_next_cabinet,
|
72
|
+
:flag_reserve_present => flag_reserve_present
|
73
|
+
)
|
74
|
+
|
75
|
+
# CFFOLDER
|
76
|
+
data_blocks = []
|
77
|
+
log_debug "@#{cursor} - Beginning of #{nbr_cf_folders} CFFOLDER structures"
|
78
|
+
nbr_cf_folders.times do |idx_cf_folder|
|
79
|
+
first_data_offset = BinData::Uint32le.read(@data[cursor..cursor+3])
|
80
|
+
nbr_data_blocks = BinData::Uint16le.read(@data[cursor+4..cursor+5])
|
81
|
+
data_blocks << [ first_data_offset, nbr_data_blocks ]
|
82
|
+
# compression_type = BinData::Uint16le.read(@data[cursor+6..cursor+7])
|
83
|
+
cursor += 8 + reserve_field_size_in_folder
|
84
|
+
progress(cursor)
|
85
|
+
end
|
86
|
+
|
87
|
+
# CFFILE
|
88
|
+
log_debug "@#{cursor} - Beginning of #{nbr_cf_files} CFFILE structures"
|
89
|
+
nbr_cf_files.times do |idx_cf_file|
|
90
|
+
# file_size = BinData::Uint32le.read(@data[cursor..cursor+3])
|
91
|
+
# file_offset = BinData::Uint32le.read(@data[cursor+4..cursor+7])
|
92
|
+
# idx_file_in_folder = BinData::Uint16le.read(@data[cursor+8..cursor+9])
|
93
|
+
# file_date = BinData::Uint16le.read(@data[cursor+10..cursor+11])
|
94
|
+
# file_time = BinData::Uint16le.read(@data[cursor+12..cursor+13])
|
95
|
+
# file_attrs = BinData::Uint16le.read(@data[cursor+14..cursor+15])
|
96
|
+
cursor += 16
|
97
|
+
idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
|
98
|
+
invalid_data("@#{cursor} - Unable to read file name") if (idx_terminator == nil)
|
99
|
+
cursor = idx_terminator + 1
|
100
|
+
progress(cursor)
|
101
|
+
end
|
102
|
+
|
103
|
+
# CFDATA
|
104
|
+
log_debug "@#{cursor} - Beginning of CFDATA"
|
105
|
+
while (!data_blocks.empty?)
|
106
|
+
# We should be on the first data block
|
107
|
+
first_datablock_offset, nbr_datablocks = data_blocks.shift
|
108
|
+
invalid_data("@#{cursor} - We should be on the next data block offset (#{offset+first_datablock_offset})") if (cursor-offset != first_datablock_offset)
|
109
|
+
nbr_datablocks.times do |idx_datablock|
|
110
|
+
# data_crc = BinData::Uint32le.read(@data[cursor..cursor+3])
|
111
|
+
nbr_compressed_bytes = BinData::Uint16le.read(@data[cursor+4..cursor+5])
|
112
|
+
# nbr_uncompressed_bytes = BinData::Uint16le.read(@data[cursor+6..cursor+7])
|
113
|
+
cursor += 8 + reserve_field_size_in_data + nbr_compressed_bytes
|
114
|
+
progress(cursor)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
invalid_data("@#{cursor} - We should be on at the end of the CAB file (#{offset+cabinet_size})") if (cursor-offset != cabinet_size)
|
118
|
+
|
119
|
+
# Check if it is signed digitally using Authenticode
|
120
|
+
if ((cursor+4 < @end_offset) and
|
121
|
+
(@data[cursor..cursor+1] == AUTHENTICODE_ID))
|
122
|
+
# Read the size
|
123
|
+
authenticode_size = BinData::Uint16be.read(@data[cursor+2..cursor+3])
|
124
|
+
log_debug "@#{cursor} - Found authenticode data of size #{authenticode_size}"
|
125
|
+
cursor += 4 + authenticode_size
|
126
|
+
# Eat eventually up to 4 "\x00" bytes
|
127
|
+
while ((cursor < @end_offset) and
|
128
|
+
(@data[cursor] == "\x00"))
|
129
|
+
cursor += 1
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
return cursor
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
module Decoders
|
4
|
+
|
5
|
+
class CFBF < BeginPatternDecoder
|
6
|
+
|
7
|
+
BEGIN_PATTERN_CFBF = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00".force_encoding(Encoding::ASCII_8BIT)
|
8
|
+
|
9
|
+
KNOWN_EXTENSIONS = {
|
10
|
+
'MSWordDoc'.force_encoding(Encoding::ASCII_8BIT) => :doc,
|
11
|
+
"P\x00o\x00w\x00e\x00r\x00P\x00o\x00i\x00n\x00t\x00".force_encoding(Encoding::ASCII_8BIT) => :pps,
|
12
|
+
'Microsoft Excel'.force_encoding(Encoding::ASCII_8BIT) => :xls,
|
13
|
+
"C\x00a\x00t\x00a\x00l\x00o\x00g\x00".force_encoding(Encoding::ASCII_8BIT) => :db,
|
14
|
+
'Install,MSI,Framework'.force_encoding(Encoding::ASCII_8BIT) => :msi
|
15
|
+
}
|
16
|
+
|
17
|
+
def get_begin_pattern
|
18
|
+
return BEGIN_PATTERN_CFBF, { :offset_inc => 24 }
|
19
|
+
end
|
20
|
+
|
21
|
+
def decode(offset)
|
22
|
+
# Know if we are little or big-endian
|
23
|
+
big_endian = (@data[offset+28..offset+29] == "\xFF\xFE")
|
24
|
+
bindata32 = big_endian ? BinData::Uint32be : BinData::Uint32le
|
25
|
+
bindata16 = big_endian ? BinData::Uint16be : BinData::Uint16le
|
26
|
+
# Read sector size
|
27
|
+
vector_size = 1 << bindata16.read(@data[offset+30..offset+31])
|
28
|
+
|
29
|
+
# Count the number of sectors
|
30
|
+
# Read the MSAT (first 109 entries)
|
31
|
+
msat = @data[offset+76..offset+511]
|
32
|
+
found_relevant_data(:doc) # Default
|
33
|
+
first_sector_offset = offset + 512
|
34
|
+
# Check if there are additional MSAT sectors
|
35
|
+
next_msat_sector_id = bindata32.read(@data[offset+68..offset+71])
|
36
|
+
while (next_msat_sector_id < 4294967292)
|
37
|
+
# Read the MSAT
|
38
|
+
msat.concat(@data[first_sector_offset+next_msat_sector_id*vector_size..first_sector_offset+(next_msat_sector_id+1)*vector_size-5])
|
39
|
+
# The last sector ID is the next MSAT sector one
|
40
|
+
next_msat_sector_id = bindata32.read(@data[first_sector_offset+(next_msat_sector_id+1)*vector_size-4..first_sector_offset+(next_msat_sector_id+1)*vector_size-1])
|
41
|
+
end
|
42
|
+
# Decode the MSAT and read each SAT sector
|
43
|
+
sat_sector_ids = []
|
44
|
+
log_debug "=== Size of MSAT: #{msat.size}"
|
45
|
+
(msat.size / 4).times do |idx|
|
46
|
+
sector_id = bindata32.read(msat[idx*4..idx*4+3])
|
47
|
+
sat_sector_ids << sector_id if (sector_id < 4294967292)
|
48
|
+
end
|
49
|
+
# Read each SAT sector and get the maximum sector ID
|
50
|
+
max_sector_id = -1
|
51
|
+
sat_sector_ids.each do |container_sector_id|
|
52
|
+
sector_offset = first_sector_offset + container_sector_id*vector_size
|
53
|
+
(vector_size / 4).times do |idx|
|
54
|
+
sector_id = bindata32.read(@data[sector_offset+idx*4..sector_offset+idx*4+3])
|
55
|
+
if ((sector_id < 4294967292) and
|
56
|
+
(sector_id > max_sector_id))
|
57
|
+
max_sector_id = sector_id
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
# We got the number of sectors
|
62
|
+
nbr_sectors = max_sector_id + 1
|
63
|
+
log_debug "=== Number of sectors: #{nbr_sectors}"
|
64
|
+
metadata(
|
65
|
+
:msat_size => msat.size,
|
66
|
+
:nbr_sectors => nbr_sectors
|
67
|
+
)
|
68
|
+
|
69
|
+
# Now find some info about the file extension
|
70
|
+
found_extension = false
|
71
|
+
nbr_sectors.times do |idx_sector|
|
72
|
+
log_debug "=== Find extension @ sector #{idx_sector}"
|
73
|
+
KNOWN_EXTENSIONS.each do |token, extension|
|
74
|
+
if (@data[first_sector_offset+idx_sector*vector_size..first_sector_offset+(idx_sector+1)*vector_size-1].index(token) != nil)
|
75
|
+
log_debug "=== Found extension #{extension}"
|
76
|
+
found_relevant_data(extension)
|
77
|
+
found_extension = true
|
78
|
+
break
|
79
|
+
end
|
80
|
+
end
|
81
|
+
break if found_extension
|
82
|
+
end
|
83
|
+
log_debug "@#{offset} - Unable to get extension from CFBF document." if (!found_extension)
|
84
|
+
|
85
|
+
return first_sector_offset + nbr_sectors*vector_size
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|