fileshunter 0.1.0.20130725

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,218 @@
1
+ module FilesHunter
2
+
3
+ # Decoders that are based on begin patterns (sucha as Magic Numbers) inherit from this class.
4
+ # They then have to implement the following methods:
5
+ # * *get_begin_pattern*: To give the begin pattern and eventual options
6
+ # * *decode*: To decode data starting a given offset that matches the begin pattern
7
+ # * *check_begin_pattern*: Provide a quick check of the begin pattern when found [optional]
8
+ # They can then use the following DSL in the decode method:
9
+ # * *found_relevant_data*: Indicate that we are certain the beginning of data of the given extension has been found
10
+ # * *invalid_data*: Indicate the data read is invalid for our Decoder
11
+ # * *truncated_data*: Indicate the data should have continued if it were to be complete. This can happen even in the middle of a stream, if the data has been corrupted.
12
+ # * *progress*: Indicate the progression of the scan: everything before the progression is considered valid for the given extension (if found_relevant_data was called previously)
13
+ # * *metadata*: Set metadata properties
14
+ class BeginPatternDecoder < Decoder
15
+
16
+ class TruncatedDataError < RuntimeError
17
+
18
+ attr_reader :cursor_truncated
19
+
20
+ # Constructor
21
+ #
22
+ # Parameters::
23
+ # * *message* (_String_): The error message
24
+ # * *cursor_truncated* (_Fixnum_): The exceeding offset
25
+ def initialize(message, cursor_truncated)
26
+ super(message)
27
+ @cursor_truncated = cursor_truncated
28
+ end
29
+
30
+ end
31
+
32
+ class InvalidDataError < RuntimeError
33
+ end
34
+
35
+ # Find segments from a given data
36
+ def find_segments
37
+ @begin_pattern, options = get_begin_pattern
38
+ log_debug "Pattern to find: #{@begin_pattern.inspect}"
39
+ @has_to_check_begin_pattern = self.respond_to?(:check_begin_pattern)
40
+ # Parse options
41
+ @max_regexp_size = 32
42
+ @offset_inc = 1
43
+ @begin_pattern_offset_in_segment = 0
44
+ if (options != nil)
45
+ @max_regexp_size = options[:max_regexp_size] if (options[:max_regexp_size] != nil)
46
+ @offset_inc = options[:offset_inc] if (options[:offset_inc] != nil)
47
+ @begin_pattern_offset_in_segment = options[:begin_pattern_offset_in_segment] if (options[:begin_pattern_offset_in_segment] != nil)
48
+ end
49
+ @metadata = {}
50
+ @missing_previous_data = false
51
+ foreach_begin_pattern do |begin_pattern_offset|
52
+ next decode(begin_pattern_offset)
53
+ end
54
+ end
55
+
56
+ protected
57
+
58
+ # Mark the current decoding as being valid.
59
+ # This is called when the decoder knows that it has valid data matching its specification.
60
+ # Before calling this method, decoded data might still be junk.
61
+ #
62
+ # Parameters::
63
+ # * *extension* (_Symbol_ or <em>list<Symbol></em>): Extension(s) this data belongs to
64
+ def found_relevant_data(extension)
65
+ @extension = extension
66
+ end
67
+
68
+ # Indicate that the data is invalid.
69
+ # This will stop the decoding by raising an exception.
70
+ #
71
+ # Parameters::
72
+ # * *message* (_String_): Message to give with the exception [default = '']
73
+ def invalid_data(message = '')
74
+ raise InvalidDataError.new(message)
75
+ end
76
+
77
+ # Indicate that the data is truncated.
78
+ # This will stop the decoding by raising an exception.
79
+ #
80
+ # Parameters::
81
+ # * *message* (_String_): Message to give with the exception [default = '']
82
+ # * *cursor_truncated* (_Fixnum_): Cursor where data has been truncated [default = nil]
83
+ def truncated_data(message = '', cursor_truncated = nil)
84
+ raise TruncatedDataError.new(message, ((cursor_truncated == nil) ? ((@last_offset_to_be_decoded == nil) ? @end_offset : @last_offset_to_be_decoded) : cursor_truncated))
85
+ end
86
+
87
+ # Indicate that the data is missing previous data.
88
+ def missing_previous_data
89
+ @missing_previous_data = true
90
+ end
91
+
92
+ # Indicate progression in the decoding
93
+ #
94
+ # Parameters::
95
+ # * *offset_to_be_decoded* (_Fixnum_): Next to be decoded
96
+ def progress(offset_to_be_decoded)
97
+ @last_offset_to_be_decoded = offset_to_be_decoded
98
+ raise TruncatedDataError.new("Progression @#{offset_to_be_decoded} is over limit (#{@end_offset})", @end_offset) if (@last_offset_to_be_decoded > @end_offset)
99
+ keep_alive
100
+ end
101
+
102
+ # Set metadata properties
103
+ #
104
+ # Parameters::
105
+ # * *properties* (<em>map<Symbol,Object></em>): The properties to be set
106
+ def metadata(properties)
107
+ #log_debug "Add metadata: #{properties.inspect}"
108
+ @metadata.merge!(properties)
109
+ end
110
+
111
+ private
112
+
113
+ # Find a starting pattern and call a client block when it matches.
114
+ # Client block decodes data, and calls the following methods to give progress on its decoding:
115
+ # * *found_relevant_data*: Indicate that there is valid data to be decoded. If a TruncatedDataError occurs before this method is called, the data is ignored ; otherwise it will be marked as decoded but truncated to the end of the current segment.
116
+ # * *progress*: Indicate progression
117
+ # * *truncated_data*: Indicate that the data is truncated
118
+ # * *invalid_data*: Indicate that the data is invalid
119
+ #
120
+ # Parameters::
121
+ # * _Block_: Client code called when such a pattern matches. Its goal is to decode correctly at the given offset.
122
+ # * Parameters::
123
+ # * *begin_pattern_offset* (_Fixnum_): The offset of the pattern
124
+ # * *pattern_index* (_Fixnum_): The pattern index that matched the search. Always nil if begin_pattern is not a list.
125
+ # * Result::
126
+ # * *end_offset* (_Fixnum_): The ending offset (nil if could not be decoded). If the ending offset returned is greater than end_offset, segment will be considered as truncated.
127
+ def foreach_begin_pattern
128
+ # Loop to the end
129
+ current_offset = @begin_offset
130
+ while (current_offset < @end_offset)
131
+ # Find the begin pattern
132
+ log_debug "Find begin_pattern starting #{current_offset}..."
133
+ begin_pattern_offset, pattern_index = @data.index(@begin_pattern, current_offset, @max_regexp_size)
134
+ if ((begin_pattern_offset == nil) or
135
+ (begin_pattern_offset >= @end_offset))
136
+ # No match
137
+ current_offset = @end_offset
138
+ log_debug "No more pattern."
139
+ else
140
+ if (begin_pattern_offset >= @begin_offset + @begin_pattern_offset_in_segment)
141
+ begin_pattern_offset -= @begin_pattern_offset_in_segment
142
+ log_debug "Found begin_pattern at #{begin_pattern_offset}."
143
+ # We have a candidate
144
+ # Try to decode it
145
+ decoded_end_offset = nil
146
+ truncated = false
147
+ @missing_previous_data = false
148
+ @extension = nil
149
+ @last_offset_to_be_decoded = nil
150
+ begin
151
+ # If the decoder can perform additional tests, call them
152
+ begin_pattern_valid = (@has_to_check_begin_pattern) ? check_begin_pattern(begin_pattern_offset, pattern_index) : true
153
+ if begin_pattern_valid
154
+ # Call the Decoder
155
+ decoded_end_offset = yield(begin_pattern_offset, pattern_index)
156
+ else
157
+ log_debug 'Invalid pattern returned by the check.'
158
+ end
159
+ rescue InvalidDataError
160
+ # If data was already validated, it means that the segment is truncated.
161
+ log_debug "Got an invalid data exception while decoding data: #{$!}"
162
+ #log_debug $!.backtrace.join("\n")
163
+ # If not, drop everything.
164
+ if ((@extension != nil) and
165
+ (@last_offset_to_be_decoded != nil))
166
+ truncated = true
167
+ # Use the last decoded offset as the truncated limit.
168
+ decoded_end_offset = @last_offset_to_be_decoded
169
+ else
170
+ decoded_end_offset = nil
171
+ end
172
+ rescue TruncatedDataError, AccessAfterDataError
173
+ # Data is truncated
174
+ log_debug "Got a truncation exception while decoding data: #{$!}"
175
+ #log_debug $!.backtrace.join("\n")
176
+ # If we already got relevant data, mark it as truncated
177
+ if (@extension != nil)
178
+ truncated = true
179
+ if ($!.is_a?(AccessAfterDataError))
180
+ decoded_end_offset = $!.exceeding_offset
181
+ else
182
+ decoded_end_offset = $!.cursor_truncated
183
+ end
184
+ else
185
+ decoded_end_offset = nil
186
+ end
187
+ rescue
188
+ #log_err "Error while decoding data: #{$!}\n#{$!.backtrace.join("\n")}"
189
+ #decoded_end_offset = nil
190
+ raise
191
+ end
192
+ if ((decoded_end_offset == nil) or
193
+ (@extension == nil))
194
+ log_debug 'Invalid segment.'
195
+ # Try searching from further: maybe another BEGIN_PATTERN might be found
196
+ current_offset = begin_pattern_offset + @begin_pattern_offset_in_segment + @offset_inc
197
+ else
198
+ log_debug "Decoded segment in offsets [ #{begin_pattern_offset} - #{decoded_end_offset} ]"
199
+ if (decoded_end_offset > @end_offset)
200
+ log_debug "Decoded segment ends at #{decoded_end_offset} which is greater than #{@end_offset} => truncated"
201
+ decoded_end_offset = @end_offset
202
+ truncated = true
203
+ end
204
+ # Extract the segment and go on to the next
205
+ found_segment(begin_pattern_offset, decoded_end_offset, @extension, truncated, @missing_previous_data, @metadata)
206
+ current_offset = decoded_end_offset
207
+ end
208
+ else
209
+ # Try searching from further: maybe another BEGIN_PATTERN might be found
210
+ current_offset = begin_pattern_offset + @offset_inc
211
+ end
212
+ end
213
+ end
214
+ end
215
+
216
+ end
217
+
218
+ end
@@ -0,0 +1,66 @@
1
+ module FilesHunter
2
+
3
+ # Generic Decode class
4
+ # All Decoders inherit from this class and have to implement the find_segments method, using @data, @begin_offset and @end_offset instance variables to parse data.
5
+ # Here is the DSL Decoders can use in their find_segments method:
6
+ # * *@data* (_IOBlockReader_): The data to be accessed
7
+ # * *@begin_offset* (_Fixnum_): The begin offset
8
+ # * *@end_offset* (_Fixnum_): The end offset
9
+ # * *found_segment*: Method used to indicate a Segment was successfully parsed
10
+ # * *keep_alive*: Method used to indicate progression
11
+ class Decoder
12
+
13
+ # Prepare for new search
14
+ #
15
+ # Parameters::
16
+ # * *segments_analyzer* (_SegmentsAnalyzer_): The segments analyzer for which this Decoder is working
17
+ # * *data* (_IOBlockReader_): Data being analyzed
18
+ # * *begin_offset* (_Fixnum_): The begin offset
19
+ # * *end_offset* (_Fixnum_): The end offset
20
+ def setup(segments_analyzer, data, begin_offset, end_offset)
21
+ @segments_analyzer = segments_analyzer
22
+ @data = data
23
+ @begin_offset = begin_offset
24
+ @end_offset = end_offset
25
+ @segments = []
26
+ end
27
+
28
+ # Return found segments since last setup
29
+ #
30
+ # Result::
31
+ # * <em>list<Segment></em>: The list of segments
32
+ def segments_found
33
+ return @segments
34
+ end
35
+
36
+ protected
37
+
38
+ # Callback called by decoders to notify a Segment has been found successfully
39
+ #
40
+ # Parameters::
41
+ # * *begin_offset* (_Fixnum_): The begin offset
42
+ # * *end_offset* (_Fixnum_): The end offset
43
+ # * *extension* (_Symbol_ or <em>list<Symbol></em>): The extension (can be a list of possible extensions)
44
+ # * *truncated* (_Boolean_): Is the data truncated in this segment?
45
+ # * *missing_previous_data* (_Boolean_): Is some data missing before?
46
+ # * *metadata* (<em>map<Symbol,Object></em>): Metadata associated to this segment (Decoder dependent) [default = {}]
47
+ def found_segment(segment_begin_offset, segment_end_offset, extension, truncated, missing_previous_data, metadata)
48
+ raise "Segment begin offset (#{segment_begin_offset}) is lower than data begin offset (#{@begin_offset})" if (segment_begin_offset < @begin_offset)
49
+ if (segment_end_offset > @end_offset)
50
+ log_debug "Segment end offset (#{segment_end_offset}) is greater than data end offset (#{@end_offset}). Mark Segment as truncated."
51
+ segment_end_offset = @end_offset
52
+ truncated = true
53
+ end
54
+ @segments << Segment.new(segment_begin_offset, segment_end_offset, extension, truncated, missing_previous_data, metadata)
55
+ @segments_analyzer.add_bytes_decoded(segment_end_offset - segment_begin_offset)
56
+ end
57
+
58
+ # Indicate progression in the decoding
59
+ # This is used to eventually cancel the parsing
60
+ def keep_alive
61
+ raise CancelParsingError.new('Parsing cancelled while decoding') if (@segments_analyzer.parsing_cancelled)
62
+ end
63
+
64
+ end
65
+
66
+ end
@@ -0,0 +1,50 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ class ASF < BeginPatternDecoder
6
+
7
+ BEGIN_PATTERN_ASF = "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C".force_encoding(Encoding::ASCII_8BIT)
8
+ ASF_DATA_GUID = "\x36\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C".force_encoding(Encoding::ASCII_8BIT)
9
+ ACCEPTABLE_INDEX_GUID = [
10
+ "\x90\x08\x00\x33\xB1\xE5\xCF\x11\x89\xF4\x00\xA0\xC9\x03\x49\xCB".force_encoding(Encoding::ASCII_8BIT),
11
+ "\xD3\x29\xE2\xD6\xDA\x35\xD1\x11\x90\x34\x00\xA0\xC9\x03\x49\xBE".force_encoding(Encoding::ASCII_8BIT),
12
+ "\xF8\x03\xB1\xFE\xAD\x12\x64\x4C\x84\x0F\x2A\x1D\x2F\x7A\xD4\x8C".force_encoding(Encoding::ASCII_8BIT),
13
+ "\xD0\x3F\xB7\x3C\x4A\x0C\x03\x48\x95\x3D\xED\xF7\xB6\x22\x8F\x0C".force_encoding(Encoding::ASCII_8BIT)
14
+ ]
15
+
16
+ def get_begin_pattern
17
+ return BEGIN_PATTERN_ASF, { :offset_inc => 16 }
18
+ end
19
+
20
+ def decode(offset)
21
+ ending_offset = nil
22
+
23
+ cursor = offset + BinData::Uint64le.read(@data[cursor+16..cursor+23])
24
+ progress(cursor)
25
+ # Should be on the DATA object
26
+ invalid_data("@#{cursor} - Missing Data object in ASF. GUID does not match.") if (@data[cursor..cursor+15] != ASF_DATA_GUID)
27
+ found_relevant_data(:asf)
28
+ cursor += BinData::Uint64le.read(@data[cursor+16..cursor+23])
29
+ progress(cursor)
30
+ # Now cycle through optional Index objects
31
+ while (ending_offset == nil)
32
+ if (ACCEPTABLE_INDEX_GUID.include?(@data[cursor..cursor+15]))
33
+ # There is an index object
34
+ cursor += BinData::Uint64le.read(@data[cursor+16..cursor+23])
35
+ progress(cursor)
36
+ ending_offset = cursor if (cursor == @end_offset)
37
+ else
38
+ # Finished
39
+ ending_offset = cursor
40
+ end
41
+ end
42
+
43
+ return ending_offset
44
+ end
45
+
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -0,0 +1,118 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ class BMP < BeginPatternDecoder
6
+
7
+ BEGIN_PATTERN_BMP = Regexp.new("BM....\x00\x00\x00\x00", nil, 'n')
8
+
9
+ PADDING_CHAR = "\x00".force_encoding(Encoding::ASCII_8BIT)
10
+
11
+ def get_begin_pattern
12
+ return BEGIN_PATTERN_BMP, { :offset_inc => 2, :max_regexp_size => 10 }
13
+ end
14
+
15
+ def decode(offset)
16
+ ending_offset = nil
17
+
18
+ cursor = offset + 14
19
+ header_size = BinData::Uint32le.read(@data[cursor..cursor+3])
20
+ width = nil
21
+ height = nil
22
+ bpp = nil
23
+ header_version = nil
24
+ bitmap_size = nil
25
+ compression = 0
26
+ if (header_size == 12)
27
+ # BMP v2 header
28
+ header_version = 2
29
+ width = BinData::Sint16le.read(@data[cursor+4..cursor+5])
30
+ height = BinData::Sint16le.read(@data[cursor+6..cursor+7])
31
+ nbr_planes = BinData::Uint16le.read(@data[cursor+8..cursor+9])
32
+ invalid_data("@#{cursor} - Number of planes (#{nbr_planes}) should always be 1") if (nbr_planes != 1)
33
+ bpp = BinData::Uint16le.read(@data[cursor+10..cursor+11])
34
+ invalid_data("@#{cursor} - Invalid BPP: #{bpp}") if (![1,4,8,16,24,32].include?(bpp))
35
+ cursor += header_size
36
+ # Color palette
37
+ cursor += 3*(1 << bpp) if (bpp != 24)
38
+ else
39
+ # BMP v3+ header
40
+ header_version = 3
41
+ width = BinData::Uint32le.read(@data[cursor+4..cursor+7])
42
+ height = BinData::Uint32le.read(@data[cursor+8..cursor+11])
43
+ nbr_planes = BinData::Uint16le.read(@data[cursor+12..cursor+13])
44
+ invalid_data("@#{cursor} - Number of planes (#{nbr_planes}) should always be 1") if (nbr_planes != 1)
45
+ bpp = BinData::Uint16le.read(@data[cursor+14..cursor+15])
46
+ invalid_data("@#{cursor} - Invalid BPP: #{bpp}") if (![1,4,8,16,24,32].include?(bpp))
47
+ compression = BinData::Uint32le.read(@data[cursor+16..cursor+19])
48
+ invalid_data("@#{cursor} - Invalid compression method: #{compression}") if (compression > 3)
49
+ invalid_data("@#{cursor} - Invalid compression method: #{compression} for given bpp (#{bpp})") if ((compression != 3) and (bpp == 16))
50
+ bitmap_size = BinData::Uint32le.read(@data[cursor+20..cursor+23])
51
+ invalid_data("@#{cursor} - Empty bitmap size for compression method: #{compression}") if ((bitmap_size == 0) and ((compression == 1) or (compression == 2)))
52
+ #ppm_horizontal = BinData::Uint32le.read(@data[cursor+24..cursor+27])
53
+ #ppm_vertical = BinData::Uint32le.read(@data[cursor+28..cursor+31])
54
+ nbr_colors_used = BinData::Uint32le.read(@data[cursor+32..cursor+35])
55
+ invalid_data("@#{cursor} - Number of colors used specified (#{nbr_colors_used} whereas bpp is >= 16 (#{bpp})") if ((bpp >= 16) and (nbr_colors_used > 0))
56
+ #nbr_colors_important = BinData::Uint32le.read(@data[cursor+36..cursor+39])
57
+ if (header_size == 56)
58
+ # BMP v? header
59
+ header_version = 56
60
+ elsif (header_size == 108)
61
+ # BMP v4 header
62
+ header_version = 4
63
+ cstype = BinData::Uint32le.read(@data[cursor+56..cursor+59])
64
+ invalid_data("@#{cursor} - Invalid cstype: #{cstype}") if (cstype > 2)
65
+ end
66
+ cursor += header_size
67
+ # Color palette
68
+ cursor += 4*(1 << bpp) if (bpp < 16)
69
+ cursor += 12 if (((bpp == 16) or (bpp == 32)) and (compression == 3) and (header_version == 3))
70
+ end
71
+ progress(cursor)
72
+ found_relevant_data(:bmp)
73
+ metadata(
74
+ :width => width,
75
+ :height => height,
76
+ :bpp => bpp,
77
+ :header_version => header_version,
78
+ :bitmap_size => bitmap_size,
79
+ :compression => compression
80
+ )
81
+ log_debug "@#{cursor} - Decoding bitmap data: header_version=#{header_version} width=#{width} height=#{height} bpp=#{bpp} compression=#{compression} bitmap_size=#{bitmap_size}"
82
+ if ((compression == 0) or
83
+ (compression == 3))
84
+ # Compute the scanline size
85
+ scanline_size = nil
86
+ case bpp.to_i
87
+ when 1, 4, 8
88
+ scanline_size, extra = width.divmod(8/bpp)
89
+ scanline_size += 1 if (extra > 0)
90
+ when 16, 24, 32
91
+ scanline_size = width * (bpp/8)
92
+ scanline_size *= 2 if ((bpp == 16) and (header_version == 4))
93
+ end
94
+ rest = scanline_size % 4
95
+ scanline_size += 4 - rest if (rest > 0)
96
+ computed_bitmap_size = scanline_size * height
97
+ cursor += computed_bitmap_size
98
+ else
99
+ cursor += bitmap_size
100
+ end
101
+ progress(cursor)
102
+ # Eventually pad to the next 32 bits with \x00
103
+ rest = (cursor - offset) % 4
104
+ if (rest > 0)
105
+ # Check if we have padding
106
+ possible_padding_size = 4 - rest
107
+ cursor += possible_padding_size if ((cursor + possible_padding_size <= @end_offset) and (@data[cursor..cursor + possible_padding_size - 1] == PADDING_CHAR * possible_padding_size))
108
+ end
109
+ ending_offset = cursor
110
+
111
+ return ending_offset
112
+ end
113
+
114
+ end
115
+
116
+ end
117
+
118
+ end
@@ -0,0 +1,140 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ class CAB < BeginPatternDecoder
6
+
7
+ BEGIN_PATTERN_CAB = "MSCF\x00\x00\x00\x00".force_encoding(Encoding::ASCII_8BIT)
8
+
9
+ END_STRING_TERMINATOR = "\x00".force_encoding(Encoding::ASCII_8BIT)
10
+
11
+ AUTHENTICODE_ID = "\x30\x82".force_encoding(Encoding::ASCII_8BIT)
12
+
13
+ def get_begin_pattern
14
+ return BEGIN_PATTERN_CAB, { :offset_inc => 4 }
15
+ end
16
+
17
+ def decode(offset)
18
+
19
+ # CFHEADER
20
+ cabinet_size = BinData::Uint32le.read(@data[offset+8..offset+11])
21
+ invalid_data("@#{offset} - Invalid CAB header.") if (BinData::Uint32le.read(@data[offset+12..offset+15]) != 0)
22
+ #cf_file_offset = BinData::Uint32le.read(@data[offset+16..offset+19])
23
+ invalid_data("@#{offset} - Invalid CAB header.") if (BinData::Uint32le.read(@data[offset+20..offset+23]) != 0)
24
+ minor_version = @data[offset+24].ord
25
+ major_version = @data[offset+25].ord
26
+ nbr_cf_folders = BinData::Uint16le.read(@data[offset+26..offset+27])
27
+ nbr_cf_files = BinData::Uint16le.read(@data[offset+28..offset+29])
28
+ flags = BinData::Uint16le.read(@data[offset+30..offset+31])
29
+ flag_prev_cabinet = ((flags & 0b00000000_00000001) != 0)
30
+ flag_next_cabinet = ((flags & 0b00000000_00000010) != 0)
31
+ flag_reserve_present = ((flags & 0b00000000_00000100) != 0)
32
+ set_id = BinData::Uint16le.read(@data[offset+32..offset+33])
33
+ idx_cabinet = BinData::Uint16le.read(@data[offset+34..offset+35])
34
+ cursor = offset + 36
35
+ reserve_field_size_in_folder = 0
36
+ reserve_field_size_in_data = 0
37
+ if flag_reserve_present
38
+ reserve_field_size_in_header = BinData::Uint16le.read(@data[offset+36..offset+37])
39
+ invalid_data("@#{offset} - Invalid reserve_field_size_in_header (#{reserve_field_size_in_header})") if (reserve_field_size_in_header > 60000)
40
+ reserve_field_size_in_folder = @data[offset+38].ord
41
+ reserve_field_size_in_data = @data[offset+39].ord
42
+ cursor += 4 + reserve_field_size_in_header
43
+ end
44
+ if flag_prev_cabinet
45
+ idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
46
+ invalid_data("@#{cursor} - Unable to read previous cabinet name") if (idx_terminator == nil)
47
+ cursor = idx_terminator + 1
48
+ idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
49
+ invalid_data("@#{cursor} - Unable to read previous disk name") if (idx_terminator == nil)
50
+ cursor = idx_terminator + 1
51
+ end
52
+ if flag_next_cabinet
53
+ idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
54
+ invalid_data("@#{cursor} - Unable to read next cabinet name") if (idx_terminator == nil)
55
+ cursor = idx_terminator + 1
56
+ idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
57
+ invalid_data("@#{cursor} - Unable to read next disk name") if (idx_terminator == nil)
58
+ cursor = idx_terminator + 1
59
+ end
60
+ progress(cursor)
61
+ found_relevant_data([:cab, :msu, :mzz])
62
+ metadata(
63
+ :cabinet_size => cabinet_size,
64
+ :minor_version => minor_version,
65
+ :major_version => major_version,
66
+ :nbr_cf_folders => nbr_cf_folders,
67
+ :nbr_cf_files => nbr_cf_files,
68
+ :set_id => set_id,
69
+ :idx_cabinet => idx_cabinet,
70
+ :flag_prev_cabinet => flag_prev_cabinet,
71
+ :flag_next_cabinet => flag_next_cabinet,
72
+ :flag_reserve_present => flag_reserve_present
73
+ )
74
+
75
+ # CFFOLDER
76
+ data_blocks = []
77
+ log_debug "@#{cursor} - Beginning of #{nbr_cf_folders} CFFOLDER structures"
78
+ nbr_cf_folders.times do |idx_cf_folder|
79
+ first_data_offset = BinData::Uint32le.read(@data[cursor..cursor+3])
80
+ nbr_data_blocks = BinData::Uint16le.read(@data[cursor+4..cursor+5])
81
+ data_blocks << [ first_data_offset, nbr_data_blocks ]
82
+ # compression_type = BinData::Uint16le.read(@data[cursor+6..cursor+7])
83
+ cursor += 8 + reserve_field_size_in_folder
84
+ progress(cursor)
85
+ end
86
+
87
+ # CFFILE
88
+ log_debug "@#{cursor} - Beginning of #{nbr_cf_files} CFFILE structures"
89
+ nbr_cf_files.times do |idx_cf_file|
90
+ # file_size = BinData::Uint32le.read(@data[cursor..cursor+3])
91
+ # file_offset = BinData::Uint32le.read(@data[cursor+4..cursor+7])
92
+ # idx_file_in_folder = BinData::Uint16le.read(@data[cursor+8..cursor+9])
93
+ # file_date = BinData::Uint16le.read(@data[cursor+10..cursor+11])
94
+ # file_time = BinData::Uint16le.read(@data[cursor+12..cursor+13])
95
+ # file_attrs = BinData::Uint16le.read(@data[cursor+14..cursor+15])
96
+ cursor += 16
97
+ idx_terminator = @data.index(END_STRING_TERMINATOR, cursor)
98
+ invalid_data("@#{cursor} - Unable to read file name") if (idx_terminator == nil)
99
+ cursor = idx_terminator + 1
100
+ progress(cursor)
101
+ end
102
+
103
+ # CFDATA
104
+ log_debug "@#{cursor} - Beginning of CFDATA"
105
+ while (!data_blocks.empty?)
106
+ # We should be on the first data block
107
+ first_datablock_offset, nbr_datablocks = data_blocks.shift
108
+ invalid_data("@#{cursor} - We should be on the next data block offset (#{offset+first_datablock_offset})") if (cursor-offset != first_datablock_offset)
109
+ nbr_datablocks.times do |idx_datablock|
110
+ # data_crc = BinData::Uint32le.read(@data[cursor..cursor+3])
111
+ nbr_compressed_bytes = BinData::Uint16le.read(@data[cursor+4..cursor+5])
112
+ # nbr_uncompressed_bytes = BinData::Uint16le.read(@data[cursor+6..cursor+7])
113
+ cursor += 8 + reserve_field_size_in_data + nbr_compressed_bytes
114
+ progress(cursor)
115
+ end
116
+ end
117
+ invalid_data("@#{cursor} - We should be on at the end of the CAB file (#{offset+cabinet_size})") if (cursor-offset != cabinet_size)
118
+
119
+ # Check if it is signed digitally using Authenticode
120
+ if ((cursor+4 < @end_offset) and
121
+ (@data[cursor..cursor+1] == AUTHENTICODE_ID))
122
+ # Read the size
123
+ authenticode_size = BinData::Uint16be.read(@data[cursor+2..cursor+3])
124
+ log_debug "@#{cursor} - Found authenticode data of size #{authenticode_size}"
125
+ cursor += 4 + authenticode_size
126
+ # Eat eventually up to 4 "\x00" bytes
127
+ while ((cursor < @end_offset) and
128
+ (@data[cursor] == "\x00"))
129
+ cursor += 1
130
+ end
131
+ end
132
+
133
+ return cursor
134
+ end
135
+
136
+ end
137
+
138
+ end
139
+
140
+ end
@@ -0,0 +1,92 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ class CFBF < BeginPatternDecoder
6
+
7
+ BEGIN_PATTERN_CFBF = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00".force_encoding(Encoding::ASCII_8BIT)
8
+
9
+ KNOWN_EXTENSIONS = {
10
+ 'MSWordDoc'.force_encoding(Encoding::ASCII_8BIT) => :doc,
11
+ "P\x00o\x00w\x00e\x00r\x00P\x00o\x00i\x00n\x00t\x00".force_encoding(Encoding::ASCII_8BIT) => :pps,
12
+ 'Microsoft Excel'.force_encoding(Encoding::ASCII_8BIT) => :xls,
13
+ "C\x00a\x00t\x00a\x00l\x00o\x00g\x00".force_encoding(Encoding::ASCII_8BIT) => :db,
14
+ 'Install,MSI,Framework'.force_encoding(Encoding::ASCII_8BIT) => :msi
15
+ }
16
+
17
+ def get_begin_pattern
18
+ return BEGIN_PATTERN_CFBF, { :offset_inc => 24 }
19
+ end
20
+
21
+ def decode(offset)
22
+ # Know if we are little or big-endian
23
+ big_endian = (@data[offset+28..offset+29] == "\xFF\xFE")
24
+ bindata32 = big_endian ? BinData::Uint32be : BinData::Uint32le
25
+ bindata16 = big_endian ? BinData::Uint16be : BinData::Uint16le
26
+ # Read sector size
27
+ vector_size = 1 << bindata16.read(@data[offset+30..offset+31])
28
+
29
+ # Count the number of sectors
30
+ # Read the MSAT (first 109 entries)
31
+ msat = @data[offset+76..offset+511]
32
+ found_relevant_data(:doc) # Default
33
+ first_sector_offset = offset + 512
34
+ # Check if there are additional MSAT sectors
35
+ next_msat_sector_id = bindata32.read(@data[offset+68..offset+71])
36
+ while (next_msat_sector_id < 4294967292)
37
+ # Read the MSAT
38
+ msat.concat(@data[first_sector_offset+next_msat_sector_id*vector_size..first_sector_offset+(next_msat_sector_id+1)*vector_size-5])
39
+ # The last sector ID is the next MSAT sector one
40
+ next_msat_sector_id = bindata32.read(@data[first_sector_offset+(next_msat_sector_id+1)*vector_size-4..first_sector_offset+(next_msat_sector_id+1)*vector_size-1])
41
+ end
42
+ # Decode the MSAT and read each SAT sector
43
+ sat_sector_ids = []
44
+ log_debug "=== Size of MSAT: #{msat.size}"
45
+ (msat.size / 4).times do |idx|
46
+ sector_id = bindata32.read(msat[idx*4..idx*4+3])
47
+ sat_sector_ids << sector_id if (sector_id < 4294967292)
48
+ end
49
+ # Read each SAT sector and get the maximum sector ID
50
+ max_sector_id = -1
51
+ sat_sector_ids.each do |container_sector_id|
52
+ sector_offset = first_sector_offset + container_sector_id*vector_size
53
+ (vector_size / 4).times do |idx|
54
+ sector_id = bindata32.read(@data[sector_offset+idx*4..sector_offset+idx*4+3])
55
+ if ((sector_id < 4294967292) and
56
+ (sector_id > max_sector_id))
57
+ max_sector_id = sector_id
58
+ end
59
+ end
60
+ end
61
+ # We got the number of sectors
62
+ nbr_sectors = max_sector_id + 1
63
+ log_debug "=== Number of sectors: #{nbr_sectors}"
64
+ metadata(
65
+ :msat_size => msat.size,
66
+ :nbr_sectors => nbr_sectors
67
+ )
68
+
69
+ # Now find some info about the file extension
70
+ found_extension = false
71
+ nbr_sectors.times do |idx_sector|
72
+ log_debug "=== Find extension @ sector #{idx_sector}"
73
+ KNOWN_EXTENSIONS.each do |token, extension|
74
+ if (@data[first_sector_offset+idx_sector*vector_size..first_sector_offset+(idx_sector+1)*vector_size-1].index(token) != nil)
75
+ log_debug "=== Found extension #{extension}"
76
+ found_relevant_data(extension)
77
+ found_extension = true
78
+ break
79
+ end
80
+ end
81
+ break if found_extension
82
+ end
83
+ log_debug "@#{offset} - Unable to get extension from CFBF document." if (!found_extension)
84
+
85
+ return first_sector_offset + nbr_sectors*vector_size
86
+ end
87
+
88
+ end
89
+
90
+ end
91
+
92
+ end