fileshunter 0.1.0.20130725

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,218 @@
1
+ module FilesHunter
2
+
3
+ # Decoders that are based on begin patterns (sucha as Magic Numbers) inherit from this class.
4
+ # They then have to implement the following methods:
5
+ # * *get_begin_pattern*: To give the begin pattern and eventual options
6
+ # * *decode*: To decode data starting a given offset that matches the begin pattern
7
+ # * *check_begin_pattern*: Provide a quick check of the begin pattern when found [optional]
8
+ # They can then use the following DSL in the decode method:
9
+ # * *found_relevant_data*: Indicate that we are certain the beginning of data of the given extension has been found
10
+ # * *invalid_data*: Indicate the data read is invalid for our Decoder
11
+ # * *truncated_data*: Indicate the data should have continued if it were to be complete. This can happen even in the middle of a stream, if the data has been corrupted.
12
+ # * *progress*: Indicate the progression of the scan: everything before the progression is considered valid for the given extension (if found_relevant_data was called previously)
13
+ # * *metadata*: Set metadata properties
14
+ class BeginPatternDecoder < Decoder
15
+
16
+ class TruncatedDataError < RuntimeError
17
+
18
+ attr_reader :cursor_truncated
19
+
20
+ # Constructor
21
+ #
22
+ # Parameters::
23
+ # * *message* (_String_): The error message
24
+ # * *cursor_truncated* (_Fixnum_): The exceeding offset
25
+ def initialize(message, cursor_truncated)
26
+ super(message)
27
+ @cursor_truncated = cursor_truncated
28
+ end
29
+
30
+ end
31
+
32
+ class InvalidDataError < RuntimeError
33
+ end
34
+
35
+ # Find segments from a given data
36
+ def find_segments
37
+ @begin_pattern, options = get_begin_pattern
38
+ log_debug "Pattern to find: #{@begin_pattern.inspect}"
39
+ @has_to_check_begin_pattern = self.respond_to?(:check_begin_pattern)
40
+ # Parse options
41
+ @max_regexp_size = 32
42
+ @offset_inc = 1
43
+ @begin_pattern_offset_in_segment = 0
44
+ if (options != nil)
45
+ @max_regexp_size = options[:max_regexp_size] if (options[:max_regexp_size] != nil)
46
+ @offset_inc = options[:offset_inc] if (options[:offset_inc] != nil)
47
+ @begin_pattern_offset_in_segment = options[:begin_pattern_offset_in_segment] if (options[:begin_pattern_offset_in_segment] != nil)
48
+ end
49
+ @metadata = {}
50
+ @missing_previous_data = false
51
+ foreach_begin_pattern do |begin_pattern_offset|
52
+ next decode(begin_pattern_offset)
53
+ end
54
+ end
55
+
56
+ protected
57
+
58
+ # Mark the current decoding as being valid.
59
+ # This is called when the decoder knows that it has valid data matching its specification.
60
+ # Before calling this method, decoded data might still be junk.
61
+ #
62
+ # Parameters::
63
+ # * *extension* (_Symbol_ or <em>list<Symbol></em>): Extension(s) this data belongs to
64
+ def found_relevant_data(extension)
65
+ @extension = extension
66
+ end
67
+
68
+ # Indicate that the data is invalid.
69
+ # This will stop the decoding by raising an exception.
70
+ #
71
+ # Parameters::
72
+ # * *message* (_String_): Message to give with the exception [default = '']
73
+ def invalid_data(message = '')
74
+ raise InvalidDataError.new(message)
75
+ end
76
+
77
+ # Indicate that the data is truncated.
78
+ # This will stop the decoding by raising an exception.
79
+ #
80
+ # Parameters::
81
+ # * *message* (_String_): Message to give with the exception [default = '']
82
+ # * *cursor_truncated* (_Fixnum_): Cursor where data has been truncated [default = nil]
83
+ def truncated_data(message = '', cursor_truncated = nil)
84
+ raise TruncatedDataError.new(message, ((cursor_truncated == nil) ? ((@last_offset_to_be_decoded == nil) ? @end_offset : @last_offset_to_be_decoded) : cursor_truncated))
85
+ end
86
+
87
+ # Indicate that the data is missing previous data.
88
+ def missing_previous_data
89
+ @missing_previous_data = true
90
+ end
91
+
92
+ # Indicate progression in the decoding
93
+ #
94
+ # Parameters::
95
+ # * *offset_to_be_decoded* (_Fixnum_): Next to be decoded
96
+ def progress(offset_to_be_decoded)
97
+ @last_offset_to_be_decoded = offset_to_be_decoded
98
+ raise TruncatedDataError.new("Progression @#{offset_to_be_decoded} is over limit (#{@end_offset})", @end_offset) if (@last_offset_to_be_decoded > @end_offset)
99
+ keep_alive
100
+ end
101
+
102
+ # Set metadata properties
103
+ #
104
+ # Parameters::
105
+ # * *properties* (<em>map<Symbol,Object></em>): The properties to be set
106
+ def metadata(properties)
107
+ #log_debug "Add metadata: #{properties.inspect}"
108
+ @metadata.merge!(properties)
109
+ end
110
+
111
+ private
112
+
113
+ # Find a starting pattern and call a client block when it matches.
114
+ # Client block decodes data, and calls the following methods to give progress on its decoding:
115
+ # * *found_relevant_data*: Indicate that there is valid data to be decoded. If a TruncatedDataError occurs before this method is called, the data is ignored ; otherwise it will be marked as decoded but truncated to the end of the current segment.
116
+ # * *progress*: Indicate progression
117
+ # * *truncated_data*: Indicate that the data is truncated
118
+ # * *invalid_data*: Indicate that the data is invalid
119
+ #
120
+ # Parameters::
121
+ # * _Block_: Client code called when such a pattern matches. Its goal is to decode correctly at the given offset.
122
+ # * Parameters::
123
+ # * *begin_pattern_offset* (_Fixnum_): The offset of the pattern
124
+ # * *pattern_index* (_Fixnum_): The pattern index that matched the search. Always nil if begin_pattern is not a list.
125
+ # * Result::
126
+ # * *end_offset* (_Fixnum_): The ending offset (nil if could not be decoded). If the ending offset returned is greater than end_offset, segment will be considered as truncated.
127
+ def foreach_begin_pattern
128
+ # Loop to the end
129
+ current_offset = @begin_offset
130
+ while (current_offset < @end_offset)
131
+ # Find the begin pattern
132
+ log_debug "Find begin_pattern starting #{current_offset}..."
133
+ begin_pattern_offset, pattern_index = @data.index(@begin_pattern, current_offset, @max_regexp_size)
134
+ if ((begin_pattern_offset == nil) or
135
+ (begin_pattern_offset >= @end_offset))
136
+ # No match
137
+ current_offset = @end_offset
138
+ log_debug "No more pattern."
139
+ else
140
+ if (begin_pattern_offset >= @begin_offset + @begin_pattern_offset_in_segment)
141
+ begin_pattern_offset -= @begin_pattern_offset_in_segment
142
+ log_debug "Found begin_pattern at #{begin_pattern_offset}."
143
+ # We have a candidate
144
+ # Try to decode it
145
+ decoded_end_offset = nil
146
+ truncated = false
147
+ @missing_previous_data = false
148
+ @extension = nil
149
+ @last_offset_to_be_decoded = nil
150
+ begin
151
+ # If the decoder can perform additional tests, call them
152
+ begin_pattern_valid = (@has_to_check_begin_pattern) ? check_begin_pattern(begin_pattern_offset, pattern_index) : true
153
+ if begin_pattern_valid
154
+ # Call the Decoder
155
+ decoded_end_offset = yield(begin_pattern_offset, pattern_index)
156
+ else
157
+ log_debug 'Invalid pattern returned by the check.'
158
+ end
159
+ rescue InvalidDataError
160
+ # If data was already validated, it means that the segment is truncated.
161
+ log_debug "Got an invalid data exception while decoding data: #{$!}"
162
+ #log_debug $!.backtrace.join("\n")
163
+ # If not, drop everything.
164
+ if ((@extension != nil) and
165
+ (@last_offset_to_be_decoded != nil))
166
+ truncated = true
167
+ # Use the last decoded offset as the truncated limit.
168
+ decoded_end_offset = @last_offset_to_be_decoded
169
+ else
170
+ decoded_end_offset = nil
171
+ end
172
+ rescue TruncatedDataError, AccessAfterDataError
173
+ # Data is truncated
174
+ log_debug "Got a truncation exception while decoding data: #{$!}"
175
+ #log_debug $!.backtrace.join("\n")
176
+ # If we already got relevant data, mark it as truncated
177
+ if (@extension != nil)
178
+ truncated = true
179
+ if ($!.is_a?(AccessAfterDataError))
180
+ decoded_end_offset = $!.exceeding_offset
181
+ else
182
+ decoded_end_offset = $!.cursor_truncated
183
+ end
184
+ else
185
+ decoded_end_offset = nil
186
+ end
187
+ rescue
188
+ #log_err "Error while decoding data: #{$!}\n#{$!.backtrace.join("\n")}"
189
+ #decoded_end_offset = nil
190
+ raise
191
+ end
192
+ if ((decoded_end_offset == nil) or
193
+ (@extension == nil))
194
+ log_debug 'Invalid segment.'
195
+ # Try searching from further: maybe another BEGIN_PATTERN might be found
196
+ current_offset = begin_pattern_offset + @begin_pattern_offset_in_segment + @offset_inc
197
+ else
198
+ log_debug "Decoded segment in offsets [ #{begin_pattern_offset} - #{decoded_end_offset} ]"
199
+ if (decoded_end_offset > @end_offset)
200
+ log_debug "Decoded segment ends at #{decoded_end_offset} which is greater than #{@end_offset} => truncated"
201
+ decoded_end_offset = @end_offset
202
+ truncated = true
203
+ end
204
+ # Extract the segment and go on to the next
205
+ found_segment(begin_pattern_offset, decoded_end_offset, @extension, truncated, @missing_previous_data, @metadata)
206
+ current_offset = decoded_end_offset
207
+ end
208
+ else
209
+ # Try searching from further: maybe another BEGIN_PATTERN might be found
210
+ current_offset = begin_pattern_offset + @offset_inc
211
+ end
212
+ end
213
+ end
214
+ end
215
+
216
+ end
217
+
218
+ end
@@ -0,0 +1,66 @@
1
+ module FilesHunter
2
+
3
+ # Generic Decode class
4
+ # All Decoders inherit from this class and have to implement the find_segments method, using @data, @begin_offset and @end_offset instance variables to parse data.
5
+ # Here is the DSL Decoders can use in their find_segments method:
6
+ # * *@data* (_IOBlockReader_): The data to be accessed
7
+ # * *@begin_offset* (_Fixnum_): The begin offset
8
+ # * *@end_offset* (_Fixnum_): The end offset
9
+ # * *found_segment*: Method used to indicate a Segment was successfully parsed
10
+ # * *keep_alive*: Method used to indicate progression
11
+ class Decoder
12
+
13
+ # Prepare for new search
14
+ #
15
+ # Parameters::
16
+ # * *segments_analyzer* (_SegmentsAnalyzer_): The segments analyzer for which this Decoder is working
17
+ # * *data* (_IOBlockReader_): Data being analyzed
18
+ # * *begin_offset* (_Fixnum_): The begin offset
19
+ # * *end_offset* (_Fixnum_): The end offset
20
+ def setup(segments_analyzer, data, begin_offset, end_offset)
21
+ @segments_analyzer = segments_analyzer
22
+ @data = data
23
+ @begin_offset = begin_offset
24
+ @end_offset = end_offset
25
+ @segments = []
26
+ end
27
+
28
+ # Return found segments since last setup
29
+ #
30
+ # Result::
31
+ # * <em>list<Segment></em>: The list of segments
32
+ def segments_found
33
+ return @segments
34
+ end
35
+
36
+ protected
37
+
38
+ # Callback called by decoders to notify a Segment has been found successfully
39
+ #
40
+ # Parameters::
41
+ # * *begin_offset* (_Fixnum_): The begin offset
42
+ # * *end_offset* (_Fixnum_): The end offset
43
+ # * *extension* (_Symbol_ or <em>list<Symbol></em>): The extension (can be a list of possible extensions)
44
+ # * *truncated* (_Boolean_): Is the data truncated in this segment?
45
+ # * *missing_previous_data* (_Boolean_): Is some data missing before?
46
+ # * *metadata* (<em>map<Symbol,Object></em>): Metadata associated to this segment (Decoder dependent) [default = {}]
47
+ def found_segment(segment_begin_offset, segment_end_offset, extension, truncated, missing_previous_data, metadata)
48
+ raise "Segment begin offset (#{segment_begin_offset}) is lower than data begin offset (#{@begin_offset})" if (segment_begin_offset < @begin_offset)
49
+ if (segment_end_offset > @end_offset)
50
+ log_debug "Segment end offset (#{segment_end_offset}) is greater than data end offset (#{@end_offset}). Mark Segment as truncated."
51
+ segment_end_offset = @end_offset
52
+ truncated = true
53
+ end
54
+ @segments << Segment.new(segment_begin_offset, segment_end_offset, extension, truncated, missing_previous_data, metadata)
55
+ @segments_analyzer.add_bytes_decoded(segment_end_offset - segment_begin_offset)
56
+ end
57
+
58
+ # Indicate progression in the decoding
59
+ # This is used to eventually cancel the parsing
60
+ def keep_alive
61
+ raise CancelParsingError.new('Parsing cancelled while decoding') if (@segments_analyzer.parsing_cancelled)
62
+ end
63
+
64
+ end
65
+
66
+ end
@@ -0,0 +1,50 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ class ASF < BeginPatternDecoder
6
+
7
+ BEGIN_PATTERN_ASF = "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C".force_encoding(Encoding::ASCII_8BIT)
8
+ ASF_DATA_GUID = "\x36\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C".force_encoding(Encoding::ASCII_8BIT)
9
+ ACCEPTABLE_INDEX_GUID = [
10
+ "\x90\x08\x00\x33\xB1\xE5\xCF\x11\x89\xF4\x00\xA0\xC9\x03\x49\xCB".force_encoding(Encoding::ASCII_8BIT),
11
+ "\xD3\x29\xE2\xD6\xDA\x35\xD1\x11\x90\x34\x00\xA0\xC9\x03\x49\xBE".force_encoding(Encoding::ASCII_8BIT),
12
+ "\xF8\x03\xB1\xFE\xAD\x12\x64\x4C\x84\x0F\x2A\x1D\x2F\x7A\xD4\x8C".force_encoding(Encoding::ASCII_8BIT),
13
+ "\xD0\x3F\xB7\x3C\x4A\x0C\x03\x48\x95\x3D\xED\xF7\xB6\x22\x8F\x0C".force_encoding(Encoding::ASCII_8BIT)
14
+ ]
15
+
16
+ def get_begin_pattern
17
+ return BEGIN_PATTERN_ASF, { :offset_inc => 16 }
18
+ end
19
+
20
+ def decode(offset)
21
+ ending_offset = nil
22
+
23
+ cursor = offset + BinData::Uint64le.read(@data[cursor+16..cursor+23])
24
+ progress(cursor)
25
+ # Should be on the DATA object
26
+ invalid_data("@#{cursor} - Missing Data object in ASF. GUID does not match.") if (@data[cursor..cursor+15] != ASF_DATA_GUID)
27
+ found_relevant_data(:asf)
28
+ cursor += BinData::Uint64le.read(@data[cursor+16..cursor+23])
29
+ progress(cursor)
30
+ # Now cycle through optional Index objects
31
+ while (ending_offset == nil)
32
+ if (ACCEPTABLE_INDEX_GUID.include?(@data[cursor..cursor+15]))
33
+ # There is an index object
34
+ cursor += BinData::Uint64le.read(@data[cursor+16..cursor+23])
35
+ progress(cursor)
36
+ ending_offset = cursor if (cursor == @end_offset)
37
+ else
38
+ # Finished
39
+ ending_offset = cursor
40
+ end
41
+ end
42
+
43
+ return ending_offset
44
+ end
45
+
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -0,0 +1,118 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ class BMP < BeginPatternDecoder
6
+
7
+ BEGIN_PATTERN_BMP = Regexp.new("BM....\x00\x00\x00\x00", nil, 'n')
8
+
9
+ PADDING_CHAR = "\x00".force_encoding(Encoding::ASCII_8BIT)
10
+
11
+ def get_begin_pattern
12
+ return BEGIN_PATTERN_BMP, { :offset_inc => 2, :max_regexp_size => 10 }
13
+ end
14
+
15
+ def decode(offset)
16
+ ending_offset = nil
17
+
18
+ cursor = offset + 14
19
+ header_size = BinData::Uint32le.read(@data[cursor..cursor+3])
20
+ width = nil
21
+ height = nil
22
+ bpp = nil
23
+ header_version = nil
24
+ bitmap_size = nil
25
+ compression = 0
26
+ if (header_size == 12)
27
+ # BMP v2 header
28
+ header_version = 2
29
+ width = BinData::Sint16le.read(@data[cursor+4..cursor+5])
30
+ height = BinData::Sint16le.read(@data[cursor+6..cursor+7])
31
+ nbr_planes = BinData::Uint16le.read(@data[cursor+8..cursor+9])
32
+ invalid_data("@#{cursor} - Number of planes (#{nbr_planes}) should always be 1") if (nbr_planes != 1)
33
+ bpp = BinData::Uint16le.read(@data[cursor+10..cursor+11])
34
+ invalid_data("@#{cursor} - Invalid BPP: #{bpp}") if (![1,4,8,16,24,32].include?(bpp))
35
+ cursor += header_size
36
+ # Color palette
37
+ cursor += 3*(1 << bpp) if (bpp != 24)
38
+ else
39
+ # BMP v3+ header
40
+ header_version = 3
41
+ width = BinData::Uint32le.read(@data[cursor+4..cursor+7])
42
+ height = BinData::Uint32le.read(@data[cursor+8..cursor+11])
43
+ nbr_planes = BinData::Uint16le.read(@data[cursor+12..cursor+13])
44
+ invalid_data("@#{cursor} - Number of planes (#{nbr_planes}) should always be 1") if (nbr_planes != 1)
45
+ bpp = BinData::Uint16le.read(@data[cursor+14..cursor+15])
46
+ invalid_data("@#{cursor} - Invalid BPP: #{bpp}") if (![1,4,8,16,24,32].include?(bpp))
47
+ compression = BinData::Uint32le.read(@data[cursor+16..cursor+19])
48
+ invalid_data("@#{cursor} - Invalid compression method: #{compression}") if (compression > 3)
49
+ invalid_data("@#{cursor} - Invalid compression method: #{compression} for given bpp (#{bpp})") if ((compression != 3) and (bpp == 16))
50
+ bitmap_size = BinData::Uint32le.read(@data[cursor+20..cursor+23])
51
+ invalid_data("@#{cursor} - Empty bitmap size for compression method: #{compression}") if ((bitmap_size == 0) and ((compression == 1) or (compression == 2)))
52
+ #ppm_horizontal = BinData::Uint32le.read(@data[cursor+24..cursor+27])
53
+ #ppm_vertical = BinData::Uint32le.read(@data[cursor+28..cursor+31])
54
+ nbr_colors_used = BinData::Uint32le.read(@data[cursor+32..cursor+35])
55
+ invalid_data("@#{cursor} - Number of colors used specified (#{nbr_colors_used} whereas bpp is >= 16 (#{bpp})") if ((bpp >= 16) and (nbr_colors_used > 0))
56
+ #nbr_colors_important = BinData::Uint32le.read(@data[cursor+36..cursor+39])
57
+ if (header_size == 56)
58
+ # BMP v? header
59
+ header_version = 56
60
+ elsif (header_size == 108)
61
+ # BMP v4 header
62
+ header_version = 4
63
+ cstype = BinData::Uint32le.read(@data[cursor+56..cursor+59])
64
+ invalid_data("@#{cursor} - Invalid cstype: #{cstype}") if (cstype > 2)
65
+ end
66
+ cursor += header_size
67
+ # Color palette
68
+ cursor += 4*(1 << bpp) if (bpp < 16)
69
+ cursor += 12 if (((bpp == 16) or (bpp == 32)) and (compression == 3) and (header_version == 3))
70
+ end
71
+ progress(cursor)
72
+ found_relevant_data(:bmp)
73
+ metadata(
74
+ :width => width,
75
+ :height => height,
76
+ :bpp => bpp,
77
+ :header_version => header_version,
78
+ :bitmap_size => bitmap_size,
79
+ :compression => compression
80
+ )
81
+ log_debug "@#{cursor} - Decoding bitmap data: header_version=#{header_version} width=#{width} height=#{height} bpp=#{bpp} compression=#{compression} bitmap_size=#{bitmap_size}"
82
+ if ((compression == 0) or
83
+ (compression == 3))
84
+ # Compute the scanline size
85
+ scanline_size = nil
86
+ case bpp.to_i
87
+ when 1, 4, 8
88
+ scanline_size, extra = width.divmod(8/bpp)
89
+ scanline_size += 1 if (extra > 0)
90
+ when 16, 24, 32
91
+ scanline_size = width * (bpp/8)
92
+ scanline_size *= 2 if ((bpp == 16) and (header_version == 4))
93
+ end
94
+ rest = scanline_size % 4
95
+ scanline_size += 4 - rest if (rest > 0)
96
+ computed_bitmap_size = scanline_size * height
97
+ cursor += computed_bitmap_size
98
+ else
99
+ cursor += bitmap_size
100
+ end
101
+ progress(cursor)
102
+ # Eventually pad to the next 32 bits with \x00
103
+ rest = (cursor - offset) % 4
104
+ if (rest > 0)
105
+ # Check if we have padding
106
+ possible_padding_size = 4 - rest
107
+ cursor += possible_padding_size if ((cursor + possible_padding_size <= @end_offset) and (@data[cursor..cursor + possible_padding_size - 1] == PADDING_CHAR * possible_padding_size))
108
+ end
109
+ ending_offset = cursor
110
+
111
+ return ending_offset
112
+ end
113
+
114
+ end
115
+
116
+ end
117
+
118
+ end
@@ -0,0 +1,140 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ class CAB < BeginPatternDecoder
6
+
7
+ BEGIN_PATTERN_CAB = "MSCF\x00\x00\x00\x00".force_encoding(Encoding::ASCII_8BIT)
8
+
9
+ END_STRING_TERMINATOR = "\x00".force_encoding(Encoding::ASCII_8BIT)
10
+
11
+ AUTHENTICODE_ID = "\x30\x82".force_encoding(Encoding::ASCII_8BIT)
12
+
13
+ def get_begin_pattern
14
+ return BEGIN_PATTERN_CAB, { :offset_inc => 4 }
15
+ end
16
+
17
+ def decode(offset)
18
+
19
+ # CFHEADER
20
+ cabinet_size = BinData::Uint32le.read(@data[offset+8..offset+11])
21
+ invalid_data("@#{offset} - Invalid CAB header.") if (BinData::Uint32le.read(@data[offset+12..offset+15]) != 0)
22
+ #cf_file_offset = BinData::Uint32le.read(@data[offset+16..offset+19])
23
+ invalid_data("@#{offset} - Invalid CAB header.") if (BinData::Uint32le.read(@data[offset+20..offset+23]) != 0)
24
+ minor_version = @data[offset+24].ord
25
+ major_version = @data[offset+25].ord
26
+ nbr_cf_folders = BinData::Uint16le.read(@data[offset+26..offset+27])
27
+ nbr_cf_files = BinData::Uint16le.read(@data[offset+28..offset+29])
28
+ flags = BinData::Uint16le.read(@data[offset+30..offset+31])
29
+ flag_prev_cabinet = ((flags & 0b00000000_00000001) != 0)
30
+ flag_next_cabinet = ((flags & 0b00000000_00000010) != 0)
31
+ flag_reserve_present = ((flags & 0b00000000_00000100) != 0)
32
+ set_id = BinData::Uint16le.read(@data[offset+32..offset+33])
33
+ idx_cabinet = BinData::Uint16le.read(@data[offset+34..offset+35])
34
+ cursor = offset + 36
35
+ reserve_field_size_in_folder = 0
36
+ reserve_field_size_in_data = 0
37
+ if flag_reserve_present
38
+ reserve_field_size_in_header = BinData::Uint16le.read(@data[offset+36..offset+37])
39
+ invalid_data("@#{offset} - Invalid reserve_field_size_in_header (#{reserve_field_size_in_header})") if (reserve_field_size_in_header > 60000)
40
+ reserve_field_size_in_folder = @data[offset+38].ord
41
+ reserve_field_size_in_data = @data[offset+39].ord
42
+ cursor += 4 + reserve_field_size_in_header
43
+ end
44
+ if flag_prev_cabinet
45
+ idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
46
+ invalid_data("@#{cursor} - Unable to read previous cabinet name") if (idx_terminator == nil)
47
+ cursor = idx_terminator + 1
48
+ idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
49
+ invalid_data("@#{cursor} - Unable to read previous disk name") if (idx_terminator == nil)
50
+ cursor = idx_terminator + 1
51
+ end
52
+ if flag_next_cabinet
53
+ idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
54
+ invalid_data("@#{cursor} - Unable to read next cabinet name") if (idx_terminator == nil)
55
+ cursor = idx_terminator + 1
56
+ idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
57
+ invalid_data("@#{cursor} - Unable to read next disk name") if (idx_terminator == nil)
58
+ cursor = idx_terminator + 1
59
+ end
60
+ progress(cursor)
61
+ found_relevant_data([:cab, :msu, :mzz])
62
+ metadata(
63
+ :cabinet_size => cabinet_size,
64
+ :minor_version => minor_version,
65
+ :major_version => major_version,
66
+ :nbr_cf_folders => nbr_cf_folders,
67
+ :nbr_cf_files => nbr_cf_files,
68
+ :set_id => set_id,
69
+ :idx_cabinet => idx_cabinet,
70
+ :flag_prev_cabinet => flag_prev_cabinet,
71
+ :flag_next_cabinet => flag_next_cabinet,
72
+ :flag_reserve_present => flag_reserve_present
73
+ )
74
+
75
+ # CFFOLDER
76
+ data_blocks = []
77
+ log_debug "@#{cursor} - Beginning of #{nbr_cf_folders} CFFOLDER structures"
78
+ nbr_cf_folders.times do |idx_cf_folder|
79
+ first_data_offset = BinData::Uint32le.read(@data[cursor..cursor+3])
80
+ nbr_data_blocks = BinData::Uint16le.read(@data[cursor+4..cursor+5])
81
+ data_blocks << [ first_data_offset, nbr_data_blocks ]
82
+ # compression_type = BinData::Uint16le.read(@data[cursor+6..cursor+7])
83
+ cursor += 8 + reserve_field_size_in_folder
84
+ progress(cursor)
85
+ end
86
+
87
+ # CFFILE
88
+ log_debug "@#{cursor} - Beginning of #{nbr_cf_files} CFFILE structures"
89
+ nbr_cf_files.times do |idx_cf_file|
90
+ # file_size = BinData::Uint32le.read(@data[cursor..cursor+3])
91
+ # file_offset = BinData::Uint32le.read(@data[cursor+4..cursor+7])
92
+ # idx_file_in_folder = BinData::Uint16le.read(@data[cursor+8..cursor+9])
93
+ # file_date = BinData::Uint16le.read(@data[cursor+10..cursor+11])
94
+ # file_time = BinData::Uint16le.read(@data[cursor+12..cursor+13])
95
+ # file_attrs = BinData::Uint16le.read(@data[cursor+14..cursor+15])
96
+ cursor += 16
97
+ idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
98
+ invalid_data("@#{cursor} - Unable to read file name") if (idx_terminator == nil)
99
+ cursor = idx_terminator + 1
100
+ progress(cursor)
101
+ end
102
+
103
+ # CFDATA
104
+ log_debug "@#{cursor} - Beginning of CFDATA"
105
+ while (!data_blocks.empty?)
106
+ # We should be on the first data block
107
+ first_datablock_offset, nbr_datablocks = data_blocks.shift
108
+ invalid_data("@#{cursor} - We should be on the next data block offset (#{offset+first_datablock_offset})") if (cursor-offset != first_datablock_offset)
109
+ nbr_datablocks.times do |idx_datablock|
110
+ # data_crc = BinData::Uint32le.read(@data[cursor..cursor+3])
111
+ nbr_compressed_bytes = BinData::Uint16le.read(@data[cursor+4..cursor+5])
112
+ # nbr_uncompressed_bytes = BinData::Uint16le.read(@data[cursor+6..cursor+7])
113
+ cursor += 8 + reserve_field_size_in_data + nbr_compressed_bytes
114
+ progress(cursor)
115
+ end
116
+ end
117
+ invalid_data("@#{cursor} - We should be on at the end of the CAB file (#{offset+cabinet_size})") if (cursor-offset != cabinet_size)
118
+
119
+ # Check if it is signed digitally using Authenticode
120
+ if ((cursor+4 < @end_offset) and
121
+ (@data[cursor..cursor+1] == AUTHENTICODE_ID))
122
+ # Read the size
123
+ authenticode_size = BinData::Uint16be.read(@data[cursor+2..cursor+3])
124
+ log_debug "@#{cursor} - Found authenticode data of size #{authenticode_size}"
125
+ cursor += 4 + authenticode_size
126
+ # Eat eventually up to 4 "\x00" bytes
127
+ while ((cursor < @end_offset) and
128
+ (@data[cursor] == "\x00"))
129
+ cursor += 1
130
+ end
131
+ end
132
+
133
+ return cursor
134
+ end
135
+
136
+ end
137
+
138
+ end
139
+
140
+ end
@@ -0,0 +1,92 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ class CFBF < BeginPatternDecoder
6
+
7
+ BEGIN_PATTERN_CFBF = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00".force_encoding(Encoding::ASCII_8BIT)
8
+
9
+ KNOWN_EXTENSIONS = {
10
+ 'MSWordDoc'.force_encoding(Encoding::ASCII_8BIT) => :doc,
11
+ "P\x00o\x00w\x00e\x00r\x00P\x00o\x00i\x00n\x00t\x00".force_encoding(Encoding::ASCII_8BIT) => :pps,
12
+ 'Microsoft Excel'.force_encoding(Encoding::ASCII_8BIT) => :xls,
13
+ "C\x00a\x00t\x00a\x00l\x00o\x00g\x00".force_encoding(Encoding::ASCII_8BIT) => :db,
14
+ 'Install,MSI,Framework'.force_encoding(Encoding::ASCII_8BIT) => :msi
15
+ }
16
+
17
+ def get_begin_pattern
18
+ return BEGIN_PATTERN_CFBF, { :offset_inc => 24 }
19
+ end
20
+
21
+ def decode(offset)
22
+ # Know if we are little or big-endian
23
+ big_endian = (@data[offset+28..offset+29] == "\xFF\xFE")
24
+ bindata32 = big_endian ? BinData::Uint32be : BinData::Uint32le
25
+ bindata16 = big_endian ? BinData::Uint16be : BinData::Uint16le
26
+ # Read sector size
27
+ vector_size = 1 << bindata16.read(@data[offset+30..offset+31])
28
+
29
+ # Count the number of sectors
30
+ # Read the MSAT (first 109 entries)
31
+ msat = @data[offset+76..offset+511]
32
+ found_relevant_data(:doc) # Default
33
+ first_sector_offset = offset + 512
34
+ # Check if there are additional MSAT sectors
35
+ next_msat_sector_id = bindata32.read(@data[offset+68..offset+71])
36
+ while (next_msat_sector_id < 4294967292)
37
+ # Read the MSAT
38
+ msat.concat(@data[first_sector_offset+next_msat_sector_id*vector_size..first_sector_offset+(next_msat_sector_id+1)*vector_size-5])
39
+ # The last sector ID is the next MSAT sector one
40
+ next_msat_sector_id = bindata32.read(@data[first_sector_offset+(next_msat_sector_id+1)*vector_size-4..first_sector_offset+(next_msat_sector_id+1)*vector_size-1])
41
+ end
42
+ # Decode the MSAT and read each SAT sector
43
+ sat_sector_ids = []
44
+ log_debug "=== Size of MSAT: #{msat.size}"
45
+ (msat.size / 4).times do |idx|
46
+ sector_id = bindata32.read(msat[idx*4..idx*4+3])
47
+ sat_sector_ids << sector_id if (sector_id < 4294967292)
48
+ end
49
+ # Read each SAT sector and get the maximum sector ID
50
+ max_sector_id = -1
51
+ sat_sector_ids.each do |container_sector_id|
52
+ sector_offset = first_sector_offset + container_sector_id*vector_size
53
+ (vector_size / 4).times do |idx|
54
+ sector_id = bindata32.read(@data[sector_offset+idx*4..sector_offset+idx*4+3])
55
+ if ((sector_id < 4294967292) and
56
+ (sector_id > max_sector_id))
57
+ max_sector_id = sector_id
58
+ end
59
+ end
60
+ end
61
+ # We got the number of sectors
62
+ nbr_sectors = max_sector_id + 1
63
+ log_debug "=== Number of sectors: #{nbr_sectors}"
64
+ metadata(
65
+ :msat_size => msat.size,
66
+ :nbr_sectors => nbr_sectors
67
+ )
68
+
69
+ # Now find some info about the file extension
70
+ found_extension = false
71
+ nbr_sectors.times do |idx_sector|
72
+ log_debug "=== Find extension @ sector #{idx_sector}"
73
+ KNOWN_EXTENSIONS.each do |token, extension|
74
+ if (@data[first_sector_offset+idx_sector*vector_size..first_sector_offset+(idx_sector+1)*vector_size-1].index(token) != nil)
75
+ log_debug "=== Found extension #{extension}"
76
+ found_relevant_data(extension)
77
+ found_extension = true
78
+ break
79
+ end
80
+ end
81
+ break if found_extension
82
+ end
83
+ log_debug "@#{offset} - Unable to get extension from CFBF document." if (!found_extension)
84
+
85
+ return first_sector_offset + nbr_sectors*vector_size
86
+ end
87
+
88
+ end
89
+
90
+ end
91
+
92
+ end