fileshunter 0.1.0.20130725

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,247 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ # JPEG decoder has to be among the last ones to be decoded, as a truncated JPEG followed by other files can consume all files in its truncated data.
6
+ # JPEG files can contain TIFF files
7
+
8
+ class JPEG < BeginPatternDecoder
9
+
10
+ MARKER_PREFIX = "\xFF".force_encoding(Encoding::ASCII_8BIT)
11
+ END_MARKER = "\xD9".force_encoding(Encoding::ASCII_8BIT)
12
+ MARKERS_WITHOUT_PAYLOAD = [
13
+ "\xD8".force_encoding(Encoding::ASCII_8BIT),
14
+ "\xD9".force_encoding(Encoding::ASCII_8BIT)
15
+ ]
16
+ MARKER_WITH_ENTROPY_DATA = "\xDA".force_encoding(Encoding::ASCII_8BIT)
17
+ MARKER_APP0 = "\xE0".force_encoding(Encoding::ASCII_8BIT)
18
+ MARKER_APP1 = "\xE1".force_encoding(Encoding::ASCII_8BIT)
19
+ MARKER_SOF0 = "\xC0".force_encoding(Encoding::ASCII_8BIT)
20
+ MARKER_SOF3 = "\xC3".force_encoding(Encoding::ASCII_8BIT)
21
+ MARKER_DHT = "\xC4".force_encoding(Encoding::ASCII_8BIT)
22
+ MARKER_SOS = "\xDA".force_encoding(Encoding::ASCII_8BIT)
23
+ MARKER_DQT = "\xDB".force_encoding(Encoding::ASCII_8BIT)
24
+ MARKERS_IGNORED_IN_ENTROPY_DATA = [
25
+ "\x00".force_encoding(Encoding::ASCII_8BIT),
26
+ "\xD0".force_encoding(Encoding::ASCII_8BIT),
27
+ "\xD1".force_encoding(Encoding::ASCII_8BIT),
28
+ "\xD2".force_encoding(Encoding::ASCII_8BIT),
29
+ "\xD3".force_encoding(Encoding::ASCII_8BIT),
30
+ "\xD4".force_encoding(Encoding::ASCII_8BIT),
31
+ "\xD5".force_encoding(Encoding::ASCII_8BIT),
32
+ "\xD6".force_encoding(Encoding::ASCII_8BIT),
33
+ "\xD7".force_encoding(Encoding::ASCII_8BIT),
34
+ "\xFF".force_encoding(Encoding::ASCII_8BIT)
35
+ ]
36
+ MARKERS_IGNORED_IN_ENTROPY_DATA_REGEXP = Regexp.new("#{MARKER_PREFIX}[^#{MARKERS_IGNORED_IN_ENTROPY_DATA.join}]", nil, 'n')
37
+
38
+ JFIF_HEADER = "JFIF\x00".force_encoding(Encoding::ASCII_8BIT)
39
+ JFXX_HEADER = "JFXX\x00".force_encoding(Encoding::ASCII_8BIT)
40
+ EXIF_HEADER = "Exif\x00\x00".force_encoding(Encoding::ASCII_8BIT)
41
+
42
+ VALID_EXTENSION_CODES = [ 16, 17, 19 ]
43
+
44
+ def get_begin_pattern
45
+ return "\xFF\xD8\xFF".force_encoding(Encoding::ASCII_8BIT)
46
+ end
47
+
48
+ def decode(offset)
49
+ ending_offset = nil
50
+
51
+ cursor = offset + 2
52
+ nbr_segments = 0
53
+ quantisation_tables_id = []
54
+ huffman_ac_tables_id = []
55
+ huffman_dc_tables_id = []
56
+ found_sos = false
57
+ found_sof = false
58
+ while (ending_offset == nil)
59
+ # Here cursor is at the beginning of the next marker
60
+ # Read the 2 next bytes: they should be FF ??
61
+ log_debug "@#{cursor} Decoding next offset: #{@data[cursor..cursor+1].inspect}"
62
+ invalid_data("@#{cursor} - Did not get a valid marker definition: #{@data[cursor..cursor+1].inspect}") if (@data[cursor] != MARKER_PREFIX)
63
+ c_1 = @data[cursor+1]
64
+ invalid_data("@#{cursor} - Invalid marker: #{c_1.ord}") if (c_1.ord < 192)
65
+ # Does this marker have a payload?
66
+ if (MARKERS_WITHOUT_PAYLOAD.include?(c_1))
67
+ # No payload
68
+ log_debug "=== No payload"
69
+ # Get to the next bytes
70
+ cursor += 2
71
+ # Check if we arrived at the end
72
+ ending_offset = cursor if (c_1 == END_MARKER)
73
+ else
74
+ # There is a payload
75
+ # Read its length
76
+ size = BinData::Uint16be.read(@data[cursor+2..cursor+3])
77
+ log_debug "=== Payload of size #{size}"
78
+ case c_1
79
+ when MARKER_APP0
80
+ # Application specific data
81
+ # Usually used for JFIF
82
+ case @data[cursor+4..cursor+8]
83
+ when JFIF_HEADER
84
+ invalid_data("@#{cursor} - Invalid size for JFIF marker: #{size}") if (size < 16)
85
+ version_major = @data[cursor+9].ord
86
+ version_minor = @data[cursor+10].ord
87
+ units = @data[cursor+11].ord
88
+ invalid_data("@#{cursor} - Invalid units: #{units}") if (units > 2)
89
+ width = BinData::Uint16be.read(@data[cursor+12..cursor+13])
90
+ invalid_data("@#{cursor} - Invalid width: #{width}") if (width == 0)
91
+ height = BinData::Uint16be.read(@data[cursor+14..cursor+15])
92
+ invalid_data("@#{cursor} - Invalid height: #{height}") if (height == 0)
93
+ jfif_metadata = {
94
+ :version_major => version_major,
95
+ :version_minor => version_minor,
96
+ :units => units,
97
+ :width => width,
98
+ :height => height
99
+ }
100
+ if (size > 16)
101
+ width_thumb = BinData::Uint16be.read(@data[cursor+16..cursor+17])
102
+ height_thumb = BinData::Uint16be.read(@data[cursor+18..cursor+19])
103
+ jfif_metadata.merge!(
104
+ :width_thumb => width_thumb,
105
+ :height_thumb => height_thumb
106
+ )
107
+ end
108
+ metadata( :jfif_metadata => jfif_metadata )
109
+ when JFXX_HEADER
110
+ extension_code = @data[cursor+9].ord
111
+ invalid_data("@#{cursor} - Invalid extension code: #{extension_code}") if (!VALID_EXTENSION_CODES.include?(extension_code))
112
+ metadata( :jfxx_metadata => { :extension_code => extension_code } )
113
+ end
114
+ when MARKER_APP1
115
+ # Application specific data
116
+ # Usually used for Exif
117
+ case @data[cursor+4..cursor+9]
118
+ when EXIF_HEADER
119
+ # Read a TIFF file from cursor+10
120
+ require 'fileshunter/Decoders/TIFF'
121
+ invalid_data("@#{cursor} - Invalid TIFF header") if (@data[cursor+10..cursor+13].index(FilesHunter::Decoders::TIFF::BEGIN_PATTERN_TIFF) != 0)
122
+ tiff_decoder = FilesHunter::Decoders::TIFF.new
123
+ tiff_decoder.setup(FilesHunter::get_segments_analyzer, @data, cursor+10, cursor+2+size)
124
+ tiff_decoder.accept_no_image_data
125
+ begin
126
+ tiff_decoder.find_segments
127
+ rescue InvalidDataError, TruncatedDataError, AccessAfterDataError
128
+ # Invalid TIFF data
129
+ invalid_data("@#{cursor} - Invalid TIFF data: #{$!}")
130
+ end
131
+ segments = tiff_decoder.segments_found
132
+ invalid_data("@#{cursor} - No valid TIFF segment found for Exif") if segments.empty?
133
+ invalid_data("@#{cursor} - Not a valid TIFF segment found for Exif. Found #{segments[0].extensions.inspect}.") if (!segments[0].extensions.include?(:tif))
134
+ invalid_data("@#{cursor} - Truncated TIFF segment found for Exif.") if (segments[0].truncated)
135
+ invalid_data("@#{cursor} - TIFF segment (@#{segments[0].begin_offset}) not found at the beginning of Exif (#{cursor+10}).") if (segments[0].begin_offset != cursor+10)
136
+ #invalid_data("@#{cursor} - TIFF segment not ending (#{segments[0].end_offset}) at the end of Exif (#{cursor+2+size}).") if (segments[0].end_offset != cursor+2+size)
137
+ metadata( :exif_metadata => segments[0].metadata )
138
+ found_relevant_data([:jpg, :thm])
139
+ end
140
+ when MARKER_SOF0..MARKER_SOF3
141
+ # SOF: Start of Frame
142
+ invalid_data("@#{cursor} - Found several SOF markers") if found_sof
143
+ invalid_data("@#{cursor} - Found a SOF marker after the SOS marker") if found_sos
144
+ found_sof = true
145
+ sample_precision = @data[cursor+4].ord
146
+ invalid_data("@#{cursor} - Invalid sample precision: #{sample_precision}") if ((sample_precision != 8) and (sample_precision != 12))
147
+ image_height = BinData::Uint16be.read(@data[cursor+5..cursor+6])
148
+ image_width = BinData::Uint16be.read(@data[cursor+7..cursor+8])
149
+ metadata(
150
+ :image_height => image_height,
151
+ :image_width => image_width
152
+ )
153
+ nbr_components = @data[cursor+9].ord
154
+ invalid_data("@#{cursor} - Invalid number of components: #{nbr_components}") if (nbr_components == 0)
155
+ # Check that quantisation tables have been defined
156
+ nbr_components.times do |idx_component|
157
+ sampling = @data[cursor+11+idx_component*3].ord
158
+ horizontal_sampling = ((sampling & 0b11110000) >> 4)
159
+ vertical_sampling = (sampling & 0b00001111)
160
+ invalid_data("@#{cursor} - Invalid horizontal sampling: #{horizontal_sampling}") if (horizontal_sampling == 0)
161
+ invalid_data("@#{cursor} - Invalid vertical sampling: #{vertical_sampling}") if (vertical_sampling == 0)
162
+ dqt_id = @data[cursor+12+idx_component*3].ord
163
+ invalid_data("@#{cursor} - Missing quantisation table ID #{dqt_id}") if (!quantisation_tables_id.include?(dqt_id))
164
+ end
165
+ when MARKER_DHT
166
+ # DHT: Define Huffman tables
167
+ end_cursor = cursor + 2 + size
168
+ dht_cursor = cursor + 4
169
+ while (dht_cursor < end_cursor)
170
+ header_byte = @data[dht_cursor].ord
171
+ huffman_type = ((header_byte & 0b11110000) >> 4)
172
+ invalid_data("@#{cursor} - Unknown Huffman table type: #{huffman_type}") if (huffman_type > 1)
173
+ if (huffman_type == 0)
174
+ huffman_dc_table_id = (header_byte & 0b00001111)
175
+ invalid_data("@#{cursor} - Huffman DC table id #{huffman_dc_table_id} already defined.") if (huffman_dc_tables_id.include?(huffman_dc_table_id))
176
+ huffman_dc_tables_id << huffman_dc_table_id
177
+ log_debug "@#{cursor} - Found Huffman DC table: #{huffman_dc_table_id}"
178
+ else
179
+ huffman_ac_table_id = (header_byte & 0b00001111)
180
+ invalid_data("@#{cursor} - Huffman AC table id #{huffman_ac_table_id} already defined.") if (huffman_ac_tables_id.include?(huffman_ac_table_id))
181
+ huffman_ac_tables_id << huffman_ac_table_id
182
+ log_debug "@#{cursor} - Found Huffman AC table: #{huffman_ac_table_id}"
183
+ end
184
+ nbr_elements = 0
185
+ @data[dht_cursor+1..dht_cursor+16].bytes.each do |nbr_element_for_depth|
186
+ nbr_elements += nbr_element_for_depth
187
+ end
188
+ dht_cursor += 17 + nbr_elements
189
+ invalid_data("@#{dqt_cursor} - End of Huffman table was supposed to be @#{end_cursor}.") if (dht_cursor > end_cursor)
190
+ end
191
+ when MARKER_SOS
192
+ # SOS: Start of Scan
193
+ invalid_data("@#{cursor} - SOS marker begins whereas no Huffman DC table has been defined.") if (huffman_dc_tables_id.empty?)
194
+ invalid_data("@#{cursor} - SOS marker begins whereas no Huffman AC table has been defined.") if (huffman_ac_tables_id.empty?)
195
+ invalid_data("@#{cursor} - SOS marker begins whereas no quantisation table has been defined.") if (quantisation_tables_id.empty?)
196
+ invalid_data("@#{cursor} - SOS marker begins whereas no SOF marker has been encountered.") if (!found_sof)
197
+ found_sos = true
198
+ nbr_components = @data[cursor+4].ord
199
+ invalid_data("@#{cursor} - Invalid number of components: #{nbr_components}") if (nbr_components == 0)
200
+ nbr_components.times do |idx_component|
201
+ huffman_table_ids = @data[cursor+6+2*idx_component].ord
202
+ huffman_dc_table_id = ((huffman_table_ids & 0b11110000) >> 4)
203
+ huffman_ac_table_id = (huffman_table_ids & 0b00001111)
204
+ invalid_data("@#{cursor} - Unknown DC Huffman table: #{huffman_dc_table_id}") if (!huffman_dc_tables_id.include?(huffman_dc_table_id))
205
+ invalid_data("@#{cursor} - Unknown AC Huffman table: #{huffman_ac_table_id}") if (!huffman_ac_tables_id.include?(huffman_ac_table_id))
206
+ end
207
+ when MARKER_DQT
208
+ # DQT: Define quantisation tables
209
+ end_cursor = cursor + 2 + size
210
+ dqt_cursor = cursor + 4
211
+ while (dqt_cursor < end_cursor)
212
+ header_byte = @data[dqt_cursor].ord
213
+ precision = ((header_byte & 0b11110000) >> 4)
214
+ quantisation_table_id = (header_byte & 0b00001111)
215
+ invalid_data("@#{cursor} - Quantisation table id #{quantisation_table_id} already defined.") if (quantisation_tables_id.include?(quantisation_table_id))
216
+ quantisation_tables_id << quantisation_table_id
217
+ log_debug "@#{cursor} - Found quantisation table: #{quantisation_table_id}"
218
+ dqt_cursor += 1 + 64*((precision == 0) ? 1 : 2)
219
+ invalid_data("@#{dqt_cursor} - End of quantisation table was supposed to be @#{end_cursor}.") if (dqt_cursor > end_cursor)
220
+ end
221
+ end
222
+ # Does it have entropy data?
223
+ if (c_1 == MARKER_WITH_ENTROPY_DATA)
224
+ # There is entropy data
225
+ found_relevant_data([:jpg, :thm])
226
+ # Find the next marker that is FF xx, with xx being different than 00, D0..D7 and FF
227
+ cursor = @data.index(MARKERS_IGNORED_IN_ENTROPY_DATA_REGEXP, cursor + 2 + size, 2)
228
+ log_debug "=== Entropy data gets to cursor #{cursor.inspect}"
229
+ truncated_data("@#{cursor} - Truncated entropy data segment", @end_offset) if (cursor == nil)
230
+ else
231
+ # No entropy data: just get to the next segment
232
+ cursor += 2 + size
233
+ end
234
+ end
235
+ nbr_segments += 1
236
+ progress(cursor)
237
+ end
238
+ metadata( :nbr_segments => nbr_segments )
239
+
240
+ return ending_offset
241
+ end
242
+
243
+ end
244
+
245
+ end
246
+
247
+ end
@@ -0,0 +1,30 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ class M2V < BeginPatternDecoder
6
+
7
+ BEGIN_PATTERN_M2V = "\x00\x00\x01\xBA\x44\x00\x04\x00\x14\x01".force_encoding(Encoding::ASCII_8BIT)
8
+ END_PATTERN_M2V = "\x00\x00\x01\xB9".force_encoding(Encoding::ASCII_8BIT)
9
+
10
+ def get_begin_pattern
11
+ return BEGIN_PATTERN_M2V, { :offset_inc => 10 }
12
+ end
13
+
14
+ def decode(offset)
15
+ ending_offset = nil
16
+
17
+ found_relevant_data(:m2v)
18
+ end_pattern_offset = @data.index(END_PATTERN_M2V, offset + 10)
19
+ log_debug "=== @#{offset} - Found ending offset: #{end_pattern_offset.inspect}"
20
+ truncated_data if ((end_pattern_offset == nil) or (end_pattern_offset + 4 > @end_offset))
21
+ ending_offset = end_pattern_offset + 4
22
+
23
+ return ending_offset
24
+ end
25
+
26
+ end
27
+
28
+ end
29
+
30
+ end
@@ -0,0 +1,341 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ # MP3 files can contain JPEG files
6
+
7
+ class MP3 < BeginPatternDecoder
8
+
9
+ BEGIN_PATTERN_ID3V1 = 'TAG'.force_encoding(Encoding::ASCII_8BIT)
10
+ BEGIN_PATTERN_ID3V1E = 'TAG+'.force_encoding(Encoding::ASCII_8BIT)
11
+ BEGIN_PATTERN_ID3V2 = 'ID3'.force_encoding(Encoding::ASCII_8BIT)
12
+ BEGIN_PATTERN_APEV2 = 'APETAGEX'.force_encoding(Encoding::ASCII_8BIT)
13
+ BEGIN_PATTERN_MP3 = Regexp.new("(\xFF[\xE2-\xFF][\x00-\xEF]|#{BEGIN_PATTERN_ID3V2}|#{BEGIN_PATTERN_APEV2})", nil, 'n')
14
+
15
+ BITRATE_INDEX = [
16
+ [ 32, 32, 32, 32, 8 ],
17
+ [ 64, 48, 40, 48, 16 ],
18
+ [ 96, 56, 48, 56, 24 ],
19
+ [ 128, 64, 56, 64, 32 ],
20
+ [ 160, 80, 64, 80, 40 ],
21
+ [ 192, 96, 80, 96, 48 ],
22
+ [ 224, 112, 96, 112, 56 ],
23
+ [ 256, 128, 112, 128, 64 ],
24
+ [ 288, 160, 128, 144, 80 ],
25
+ [ 320, 192, 160, 160, 96 ],
26
+ [ 352, 224, 192, 176, 112 ],
27
+ [ 384, 256, 224, 192, 128 ],
28
+ [ 416, 320, 256, 224, 144 ],
29
+ [ 448, 384, 320, 256, 160 ]
30
+ ]
31
+ SAMPLE_RATE_INDEX = [
32
+ [ 44100, 22050, 11025 ],
33
+ [ 48000, 24000, 12000 ],
34
+ [ 32000, 16000, 8000 ]
35
+ ]
36
+
37
+ MIN_ACCEPTABLE_TIME_MS = 1000
38
+
39
+ MAX_ID3V2_FRAME_SIZE = 256
40
+
41
+ APE_ITEM_KEY_TERMINATOR = "\x00".force_encoding(Encoding::ASCII_8BIT)
42
+
43
+ ID3V2_PADDING_CHAR = "\x00".force_encoding(Encoding::ASCII_8BIT)
44
+
45
+ TRAILING_00_REGEXP = Regexp.new("\x00*$".force_encoding(Encoding::ASCII_8BIT), nil, 'n')
46
+
47
+ def get_begin_pattern
48
+ return BEGIN_PATTERN_MP3, { :max_regexp_size => 8 }
49
+ end
50
+
51
+ def check_begin_pattern(begin_pattern_offset, pattern_index)
52
+ if (@data[begin_pattern_offset] == "\xFF")
53
+ header_bytes = @data[begin_pattern_offset+1..begin_pattern_offset+3].bytes.to_a
54
+ return (((header_bytes[0] & 24) != 16) and
55
+ ((header_bytes[0] & 6) != 0) and
56
+ ((header_bytes[1] & 12) != 12) and
57
+ ((header_bytes[2] & 3) != 2))
58
+ else
59
+ return true
60
+ end
61
+ end
62
+
63
+ def decode(offset)
64
+ ending_offset = nil
65
+
66
+ cursor = offset
67
+ nbr_ms = 0
68
+ valid = false
69
+ while (ending_offset == nil)
70
+ #log_debug "=== @#{cursor} - Reading what's here"
71
+ c_0_2 = @data[cursor..cursor+2]
72
+ if (c_0_2 == BEGIN_PATTERN_ID3V1)
73
+ if (@data[cursor..cursor+3] == BEGIN_PATTERN_ID3V1E)
74
+ log_debug "=== @#{cursor} - Found ID3v1 extended tag"
75
+ metadata( :id3v1e_metadata => {
76
+ :title => read_ascii(cursor+4, cursor+63),
77
+ :artist => read_ascii(cursor+64, cursor+123),
78
+ :album => read_ascii(cursor+124, cursor+183),
79
+ :speed => @data[cursor+184].ord,
80
+ :genre => read_ascii(cursor+185, cursor+214),
81
+ :start_time => read_ascii(cursor+215, cursor+220),
82
+ :end_time => read_ascii(cursor+221, cursor+226)
83
+ } )
84
+ cursor += 227
85
+ else
86
+ # Just met an ID3v1 tag: skip 128 bytes
87
+ log_debug "=== @#{cursor} - Found ID3v1 tag"
88
+ metadata( :id3v1_metadata => {
89
+ :title => read_ascii(cursor+3, cursor+32),
90
+ :artist => read_ascii(cursor+33, cursor+62),
91
+ :album => read_ascii(cursor+63, cursor+92),
92
+ :year => read_ascii(cursor+93, cursor+96),
93
+ :comments => read_ascii(cursor+97, cursor+126),
94
+ :genre => @data[cursor+127].ord
95
+ } )
96
+ cursor += 128
97
+ # Current MP3 is finished: id3v1 is forcefully at the end
98
+ ending_offset = cursor
99
+ end
100
+ elsif (c_0_2 == BEGIN_PATTERN_ID3V2)
101
+ # Just met an ID3v2 tag
102
+ log_debug "=== @#{cursor} - Found ID3v2 tag"
103
+ invalid_data("@#{cursor} - Invalid ID3v2 header") if ((@data[cursor+3].ord == 255) or (@data[cursor+4].ord == 255))
104
+ # Compute the tag's size
105
+ size = 10 # Header
106
+ @data[cursor+6..cursor+9].bytes.each_with_index do |byte, idx|
107
+ invalid_data("@#{cursor} - Invalid ID3v2 header in size specification (#{idx})") if (byte >= 128)
108
+ size += (byte << ((8*(3-idx))-3+idx))
109
+ end
110
+ # Is there a footer?
111
+ size += 10 if ((@data[cursor+5].ord & 16) == 16)
112
+ cursor_end = cursor + size
113
+ cursor += 10
114
+ # Check if following is an extended header
115
+ padding_size = 0
116
+ extended_header_size = BinData::Uint32be.read(@data[cursor..cursor+3])
117
+ if ((extended_header_size == 6) or
118
+ (extended_header_size == 10))
119
+ # There is an extended header
120
+ extended_header_flags = BinData::Uint16be.read(@data[cursor+4..cursor+5])
121
+ invalid_data("@#{cursor} - Invalid extended header flags.") if ((extended_header_flags & 0b01111111_11111111) != 0)
122
+ has_crc = ((extended_header_flags & 0b10000000_00000000) != 0)
123
+ invalid_data("@#{cursor} - Extended header declared size and CRC flag do not match.") if (((extended_header_size == 10) and (!has_crc)) or ((extended_header_size == 6) and (has_crc)))
124
+ padding_size = BinData::Uint32be.read(@data[cursor+6..cursor+9])
125
+ cursor += 10
126
+ cursor += 4 if has_crc
127
+ end
128
+ # Read all frames
129
+ id3v2_metadata = {}
130
+ while ((cursor < cursor_end) and
131
+ (@data[cursor] != ID3V2_PADDING_CHAR))
132
+ # We are on a frame
133
+ frame_id = @data[cursor..cursor+3]
134
+ frame_size = BinData::Uint32be.read(@data[cursor+4..cursor+7])
135
+ frame_flags = BinData::Uint16be.read(@data[cursor+8..cursor+9])
136
+ invalid_data("@#{cursor} - Invalid ID3v2 frame flags: #{frame_flags}.") if ((frame_flags & 0b00011111_00011111) != 0)
137
+ cursor += 10
138
+ id3v2_metadata[frame_id] = read_ascii(cursor, cursor+((frame_size > MAX_ID3V2_FRAME_SIZE) ? MAX_ID3V2_FRAME_SIZE : frame_size)-1)
139
+ cursor += frame_size
140
+ end
141
+ metadata( :id3v2_metadata => id3v2_metadata )
142
+ # Get directly to the previously computed cursor to skip padding
143
+ log_debug("@#{cursor} - Padding size (#{padding_size}) is different from what is being read (#{cursor_end-cursor}).") if (padding_size != cursor_end-cursor)
144
+ cursor = cursor_end
145
+ elsif (@data[cursor..cursor+7] == BEGIN_PATTERN_APEV2)
146
+ log_debug "=== @#{cursor} - Found APEv2 tag"
147
+ info = decode_ape_tag_header(cursor)
148
+ invalid_data("@#{cursor} - APE tag header indicates no header whereas we have one.") if (!info[:has_header])
149
+ invalid_data("@#{cursor} - APE tag header indicates it is a footer whereas we are on the header.") if (info[:on_footer])
150
+ cursor += 32
151
+ cursor_end_tag = cursor + info[:tag_size]
152
+ ape_metadata = {}
153
+ info[:nbr_items].times do |idx_item|
154
+ item_key, item_value, cursor = decode_ape_tag_item(cursor)
155
+ ape_metadata[item_key] = item_value
156
+ end
157
+ invalid_data("@#{cursor} - APE tag header is inconsistent. We should be at cursor #{cursor_end_tag-(info[:has_footer] ? 32 : 0)}") if (cursor != cursor_end_tag-(info[:has_footer] ? 32 : 0))
158
+ metadata( :apev2_metadata => ape_metadata )
159
+ if (info[:has_footer])
160
+ # There is a footer
161
+ invalid_data("@#{cursor} - Invalid APE tag footer magic.") if (@data[cursor..cursor+7] != BEGIN_PATTERN_APEV2)
162
+ footer_info = decode_ape_tag_header(cursor)
163
+ invalid_data("@#{cursor} - APEv2 tag footer indicates no footer whereas we have one.") if (!footer_info[:has_footer])
164
+ invalid_data("@#{cursor} - APEv2 tag footer indicates it is a header whereas we are on the footer.") if (!footer_info[:on_footer])
165
+ cursor += 32
166
+ end
167
+ else
168
+ # We might be on a APEv1 tag, or real MP3 data, or at the end of our file.
169
+ # APEv1 tag occurs only after the last MP3 frame, and before any ID3v1 tag.
170
+ # APEv1 tag has no header, but a footer.
171
+ ape_tag_decoded = false
172
+ if (nbr_ms != 0)
173
+ # Might be good to check for APEv1 tag
174
+ cursor_begin = cursor
175
+ begin
176
+ ape_metadata = {}
177
+ nbr_items = 0
178
+ while (@data[cursor..cursor+7] != BEGIN_PATTERN_APEV2)
179
+ item_key, item_value, cursor = decode_ape_tag_item(cursor)
180
+ ape_metadata[item_key] = item_value
181
+ nbr_items += 1
182
+ log_debug "=== @#{cursor} - Decoded APEv1 tag item: #{item_key.inspect} => #{item_value[0..31].inspect}"
183
+ end
184
+ # Here we are on an APE Tag footer
185
+ footer_info = decode_ape_tag_header(cursor)
186
+ invalid_data("@#{cursor} - APEv1 tag footer indicates no footer whereas we have one.") if (!footer_info[:has_footer])
187
+ invalid_data("@#{cursor} - APEv1 tag footer indicates it is a header whereas we are on the footer.") if (!footer_info[:on_footer])
188
+ invalid_data("@#{cursor} - APEv1 tag footer indicates #{footer_info[:nbr_items]} tag items, whereas we read #{nbr_items}") if (footer_info[:nbr_items] != nbr_items)
189
+ log_debug "=== @#{cursor} - Found APEv1 tag"
190
+ cursor += 32
191
+ ape_tag_decoded = true
192
+ metadata( :apev1_metadata => ape_metadata )
193
+ rescue InvalidDataError, TruncatedDataError, AccessAfterDataError
194
+ # Maybe it is not an APEv1 tag.
195
+ # Scratch it and consider a normal MP3 frame.
196
+ #log_debug("=== @#{cursor_begin} - Failed to decode as APEv1 tag: #{$!}")
197
+ cursor = cursor_begin
198
+ ape_tag_decoded = false
199
+ end
200
+ end
201
+ if (!ape_tag_decoded)
202
+ # Real MP3 data or end of file
203
+ info = nil
204
+ begin
205
+ info = decode_mp3_frame_header(cursor)
206
+ rescue InvalidDataError
207
+ if (nbr_ms >= MIN_ACCEPTABLE_TIME_MS)
208
+ # Consider the file was finished
209
+ #log_debug "=== @#{cursor} - Garbage data found. Should be end of file."
210
+ ending_offset = cursor
211
+ else
212
+ # Problem
213
+ raise
214
+ end
215
+ end
216
+ if (ending_offset == nil)
217
+ #log_debug "=== @#{cursor} - Found MP3 data"
218
+ # Go see after
219
+ cursor += info[:size]
220
+ # Consider we have valid data only if we have enough milliseconds
221
+ nbr_ms += info[:nbr_ms]
222
+ if ((!valid) and (nbr_ms >= MIN_ACCEPTABLE_TIME_MS))
223
+ valid = true
224
+ found_relevant_data(:mp3)
225
+ end
226
+ metadata( :nbr_ms => nbr_ms )
227
+ end
228
+ end
229
+ end
230
+ if ((nbr_ms >= MIN_ACCEPTABLE_TIME_MS) and
231
+ (cursor == @end_offset))
232
+ ending_offset = cursor
233
+ end
234
+ progress(cursor)
235
+ end
236
+
237
+ return ending_offset
238
+ end
239
+
240
+ private
241
+
242
+ # Decode an MP3 frame header
243
+ #
244
+ # Parameters::
245
+ # * *cursor* (_Fixnum_): The cursor
246
+ # Result::
247
+ # * <em>map<Symbol,Object></em>: Corresponding header info
248
+ def decode_mp3_frame_header(cursor)
249
+ info = {}
250
+ # Check the header's values
251
+ header_bytes = @data[cursor..cursor+3].bytes.to_a
252
+ invalid_data("@#{cursor} - Invalid MP3 header") if ((header_bytes[0] != 255) or
253
+ ((header_bytes[1] & 224) != 224) or
254
+ ((header_bytes[1] & 24) == 16) or
255
+ ((header_bytes[1] & 6) == 0) or
256
+ ((header_bytes[2] & 240) == 240) or
257
+ ((header_bytes[2] & 12) == 12) or
258
+ ((header_bytes[3] & 3) == 2))
259
+ invalid_data("@#{cursor} - Invalid MP3 header: can't compute size of free bitrates") if ((header_bytes[2] & 240) == 0)
260
+ # Read header values to compute the size
261
+ version = nil
262
+ case ((header_bytes[1] & 24) >> 3)
263
+ when 0
264
+ version = 3
265
+ when 2
266
+ version = 2
267
+ when 3
268
+ version = 1
269
+ else
270
+ invalid_data("@#{cursor} - Unknown version in header: #{((header_bytes[1] & 24) >> 3)}")
271
+ end
272
+ layer = 4 - ((header_bytes[1] & 6) >> 1)
273
+ bit_rate = BITRATE_INDEX[((header_bytes[2] & 240) >> 4)-1][(version == 1) ? layer - 1 : ((layer == 1) ? 3 : 4)] * 1000
274
+ sample_rate = SAMPLE_RATE_INDEX[(header_bytes[2] & 12) >> 2][version - 1]
275
+ padding = ((header_bytes[2] & 2) >> 1)
276
+ # Compute the size
277
+ info[:size] = (layer == 1) ? ((12 * bit_rate) / sample_rate + padding) * 4 : (144 * bit_rate) / sample_rate + padding
278
+ info[:nbr_ms] = ((layer == 1) ? 384000 : 1152000) / sample_rate
279
+ #log_debug "=== @#{cursor} - Read MP3 frame: Version=#{version} Layer=#{layer} BitRate=#{bit_rate} SampleRate=#{sample_rate} Padding=#{padding} FrameLength=#{info[:size]} Milliseconds=#{info[:nbr_ms]}"
280
+ return info
281
+ end
282
+
283
+ # Decode an APE tag header
284
+ #
285
+ # Parameters::
286
+ # * *cursor* (_Fixnum_): Current cursor
287
+ # Result::
288
+ # * <em>map<Symbol,Object></em>: The APE tag info
289
+ def decode_ape_tag_header(cursor)
290
+ info = {}
291
+ #ape_version = BinData::Uint32le.read(@data[cursor+8..cursor+11])
292
+ info[:tag_size] = BinData::Uint32le.read(@data[cursor+12..cursor+15])
293
+ info[:nbr_items] = BinData::Uint32le.read(@data[cursor+16..cursor+19])
294
+ flags = BinData::Uint32le.read(@data[cursor+20..cursor+23])
295
+ info[:has_header] = ((flags & 0b10000000_00000000_00000000_00000000) != 0)
296
+ info[:has_footer] = ((flags & 0b01000000_00000000_00000000_00000000) == 0)
297
+ info[:on_footer] = ((flags & 0b00100000_00000000_00000000_00000000) == 0)
298
+ invalid_data("@#{cursor} - Invalid APE tag flags: #{flags}") if ((flags & 0b00011111_11111111_11111111_11111000) != 0)
299
+ reserved = BinData::Uint64le.read(@data[cursor+24..cursor+31])
300
+ invalid_data("@#{cursor} - Invalid reserved bytes in APE Tag header: #{reserved} should be 0.") if (reserved != 0)
301
+ return info
302
+ end
303
+
304
+ # Decode an APE tag item
305
+ #
306
+ # Parameters::
307
+ # * *cursor* (_Fixnum_): The cursor
308
+ # Result::
309
+ # * _String_: Item key
310
+ # * _String_: Item value
311
+ # * _Fixnum_: New cursor
312
+ def decode_ape_tag_item(cursor)
313
+ value_size = BinData::Uint32le.read(@data[cursor..cursor+3])
314
+ flags = BinData::Uint32le.read(@data[cursor+4..cursor+7])
315
+ invalid_data("@#{cursor} - Invalid APE tag flags: #{flags}") if ((flags & 0b00011111_11111111_11111111_11111000) != 0)
316
+ cursor_terminator = @data.index(APE_ITEM_KEY_TERMINATOR, cursor+8)
317
+ invalid_data("@#{cursor} - Could not find the end of APE tag item key.") if (cursor_terminator == nil)
318
+ invalid_data("@#{cursor} - Empty APE tag item key.") if (cursor_terminator == cursor+8)
319
+ item_key = @data[cursor+8..cursor_terminator-1]
320
+ cursor = cursor_terminator + 1
321
+ item_value = @data[cursor..cursor+value_size-1]
322
+ cursor += value_size
323
+ return item_key, item_value, cursor
324
+ end
325
+
326
+ # Read an ASCII value
327
+ #
328
+ # Parameters::
329
+ # * *cursor_begin* (_Fixnum_): The cursor to read from
330
+ # * *cursor_end* (_Fixnum_): The end of the cursor
331
+ # Result::
332
+ # * _String_ or <em>list<String></em>: Resulting string or list of strings if several.
333
+ def read_ascii(cursor_begin, cursor_end)
334
+ return @data[cursor_begin..cursor_end].gsub(TRAILING_00_REGEXP, '').strip
335
+ end
336
+
337
+ end
338
+
339
+ end
340
+
341
+ end