fileshunter 0.1.0.20130725

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,247 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ # JPEG decoder has to be among the last ones to be decoded, as a truncated JPEG followed by other files can consume all files in its truncated data.
6
+ # JPEG files can contain TIFF files
7
+
8
+ class JPEG < BeginPatternDecoder
9
+
10
+ MARKER_PREFIX = "\xFF".force_encoding(Encoding::ASCII_8BIT)
11
+ END_MARKER = "\xD9".force_encoding(Encoding::ASCII_8BIT)
12
+ MARKERS_WITHOUT_PAYLOAD = [
13
+ "\xD8".force_encoding(Encoding::ASCII_8BIT),
14
+ "\xD9".force_encoding(Encoding::ASCII_8BIT)
15
+ ]
16
+ MARKER_WITH_ENTROPY_DATA = "\xDA".force_encoding(Encoding::ASCII_8BIT)
17
+ MARKER_APP0 = "\xE0".force_encoding(Encoding::ASCII_8BIT)
18
+ MARKER_APP1 = "\xE1".force_encoding(Encoding::ASCII_8BIT)
19
+ MARKER_SOF0 = "\xC0".force_encoding(Encoding::ASCII_8BIT)
20
+ MARKER_SOF3 = "\xC3".force_encoding(Encoding::ASCII_8BIT)
21
+ MARKER_DHT = "\xC4".force_encoding(Encoding::ASCII_8BIT)
22
+ MARKER_SOS = "\xDA".force_encoding(Encoding::ASCII_8BIT)
23
+ MARKER_DQT = "\xDB".force_encoding(Encoding::ASCII_8BIT)
24
+ MARKERS_IGNORED_IN_ENTROPY_DATA = [
25
+ "\x00".force_encoding(Encoding::ASCII_8BIT),
26
+ "\xD0".force_encoding(Encoding::ASCII_8BIT),
27
+ "\xD1".force_encoding(Encoding::ASCII_8BIT),
28
+ "\xD2".force_encoding(Encoding::ASCII_8BIT),
29
+ "\xD3".force_encoding(Encoding::ASCII_8BIT),
30
+ "\xD4".force_encoding(Encoding::ASCII_8BIT),
31
+ "\xD5".force_encoding(Encoding::ASCII_8BIT),
32
+ "\xD6".force_encoding(Encoding::ASCII_8BIT),
33
+ "\xD7".force_encoding(Encoding::ASCII_8BIT),
34
+ "\xFF".force_encoding(Encoding::ASCII_8BIT)
35
+ ]
36
+ MARKERS_IGNORED_IN_ENTROPY_DATA_REGEXP = Regexp.new("#{MARKER_PREFIX}[^#{MARKERS_IGNORED_IN_ENTROPY_DATA.join}]", nil, 'n')
37
+
38
+ JFIF_HEADER = "JFIF\x00".force_encoding(Encoding::ASCII_8BIT)
39
+ JFXX_HEADER = "JFXX\x00".force_encoding(Encoding::ASCII_8BIT)
40
+ EXIF_HEADER = "Exif\x00\x00".force_encoding(Encoding::ASCII_8BIT)
41
+
42
+ VALID_EXTENSION_CODES = [ 16, 17, 19 ]
43
+
44
+ def get_begin_pattern
45
+ return "\xFF\xD8\xFF".force_encoding(Encoding::ASCII_8BIT)
46
+ end
47
+
48
+ def decode(offset)
49
+ ending_offset = nil
50
+
51
+ cursor = offset + 2
52
+ nbr_segments = 0
53
+ quantisation_tables_id = []
54
+ huffman_ac_tables_id = []
55
+ huffman_dc_tables_id = []
56
+ found_sos = false
57
+ found_sof = false
58
+ while (ending_offset == nil)
59
+ # Here cursor is at the beginning of the next marker
60
+ # Read the 2 next bytes: they should be FF ??
61
+ log_debug "@#{cursor} Decoding next offset: #{@data[cursor..cursor+1].inspect}"
62
+ invalid_data("@#{cursor} - Did not get a valid marker definition: #{@data[cursor..cursor+1].inspect}") if (@data[cursor] != MARKER_PREFIX)
63
+ c_1 = @data[cursor+1]
64
+ invalid_data("@#{cursor} - Invalid marker: #{c_1.ord}") if (c_1.ord < 192)
65
+ # Does this marker have a payload?
66
+ if (MARKERS_WITHOUT_PAYLOAD.include?(c_1))
67
+ # No payload
68
+ log_debug "=== No payload"
69
+ # Get to the next bytes
70
+ cursor += 2
71
+ # Check if we arrived at the end
72
+ ending_offset = cursor if (c_1 == END_MARKER)
73
+ else
74
+ # There is a payload
75
+ # Read its length
76
+ size = BinData::Uint16be.read(@data[cursor+2..cursor+3])
77
+ log_debug "=== Payload of size #{size}"
78
+ case c_1
79
+ when MARKER_APP0
80
+ # Application specific data
81
+ # Usually used for JFIF
82
+ case @data[cursor+4..cursor+8]
83
+ when JFIF_HEADER
84
+ invalid_data("@#{cursor} - Invalid size for JFIF marker: #{size}") if (size < 16)
85
+ version_major = @data[cursor+9].ord
86
+ version_minor = @data[cursor+10].ord
87
+ units = @data[cursor+11].ord
88
+ invalid_data("@#{cursor} - Invalid units: #{units}") if (units > 2)
89
+ width = BinData::Uint16be.read(@data[cursor+12..cursor+13])
90
+ invalid_data("@#{cursor} - Invalid width: #{width}") if (width == 0)
91
+ height = BinData::Uint16be.read(@data[cursor+14..cursor+15])
92
+ invalid_data("@#{cursor} - Invalid height: #{height}") if (height == 0)
93
+ jfif_metadata = {
94
+ :version_major => version_major,
95
+ :version_minor => version_minor,
96
+ :units => units,
97
+ :width => width,
98
+ :height => height
99
+ }
100
+ if (size > 16)
101
+ width_thumb = BinData::Uint16be.read(@data[cursor+16..cursor+17])
102
+ height_thumb = BinData::Uint16be.read(@data[cursor+18..cursor+19])
103
+ jfif_metadata.merge!(
104
+ :width_thumb => width_thumb,
105
+ :height_thumb => height_thumb
106
+ )
107
+ end
108
+ metadata( :jfif_metadata => jfif_metadata )
109
+ when JFXX_HEADER
110
+ extension_code = @data[cursor+9].ord
111
+ invalid_data("@#{cursor} - Invalid extension code: #{extension_code}") if (!VALID_EXTENSION_CODES.include?(extension_code))
112
+ metadata( :jfxx_metadata => { :extension_code => extension_code } )
113
+ end
114
+ when MARKER_APP1
115
+ # Application specific data
116
+ # Usually used for Exif
117
+ case @data[cursor+4..cursor+9]
118
+ when EXIF_HEADER
119
+ # Read a TIFF file from cursor+10
120
+ require 'fileshunter/Decoders/TIFF'
121
+ invalid_data("@#{cursor} - Invalid TIFF header") if (@data[cursor+10..cursor+13].index(FilesHunter::Decoders::TIFF::BEGIN_PATTERN_TIFF) != 0)
122
+ tiff_decoder = FilesHunter::Decoders::TIFF.new
123
+ tiff_decoder.setup(FilesHunter::get_segments_analyzer, @data, cursor+10, cursor+2+size)
124
+ tiff_decoder.accept_no_image_data
125
+ begin
126
+ tiff_decoder.find_segments
127
+ rescue InvalidDataError, TruncatedDataError, AccessAfterDataError
128
+ # Invalid TIFF data
129
+ invalid_data("@#{cursor} - Invalid TIFF data: #{$!}")
130
+ end
131
+ segments = tiff_decoder.segments_found
132
+ invalid_data("@#{cursor} - No valid TIFF segment found for Exif") if segments.empty?
133
+ invalid_data("@#{cursor} - Not a valid TIFF segment found for Exif. Found #{segments[0].extensions.inspect}.") if (!segments[0].extensions.include?(:tif))
134
+ invalid_data("@#{cursor} - Truncated TIFF segment found for Exif.") if (segments[0].truncated)
135
+ invalid_data("@#{cursor} - TIFF segment (@#{segments[0].begin_offset}) not found at the beginning of Exif (#{cursor+10}).") if (segments[0].begin_offset != cursor+10)
136
+ #invalid_data("@#{cursor} - TIFF segment not ending (#{segments[0].end_offset}) at the end of Exif (#{cursor+2+size}).") if (segments[0].end_offset != cursor+2+size)
137
+ metadata( :exif_metadata => segments[0].metadata )
138
+ found_relevant_data([:jpg, :thm])
139
+ end
140
+ when MARKER_SOF0..MARKER_SOF3
141
+ # SOF: Start of Frame
142
+ invalid_data("@#{cursor} - Found several SOF markers") if found_sof
143
+ invalid_data("@#{cursor} - Found a SOF marker after the SOS marker") if found_sos
144
+ found_sof = true
145
+ sample_precision = @data[cursor+4].ord
146
+ invalid_data("@#{cursor} - Invalid sample precision: #{sample_precision}") if ((sample_precision != 8) and (sample_precision != 12))
147
+ image_height = BinData::Uint16be.read(@data[cursor+5..cursor+6])
148
+ image_width = BinData::Uint16be.read(@data[cursor+7..cursor+8])
149
+ metadata(
150
+ :image_height => image_height,
151
+ :image_width => image_width
152
+ )
153
+ nbr_components = @data[cursor+9].ord
154
+ invalid_data("@#{cursor} - Invalid number of components: #{nbr_components}") if (nbr_components == 0)
155
+ # Check that quantisation tables have been defined
156
+ nbr_components.times do |idx_component|
157
+ sampling = @data[cursor+11+idx_component*3].ord
158
+ horizontal_sampling = ((sampling & 0b11110000) >> 4)
159
+ vertical_sampling = (sampling & 0b00001111)
160
+ invalid_data("@#{cursor} - Invalid horizontal sampling: #{horizontal_sampling}") if (horizontal_sampling == 0)
161
+ invalid_data("@#{cursor} - Invalid vertical sampling: #{vertical_sampling}") if (vertical_sampling == 0)
162
+ dqt_id = @data[cursor+12+idx_component*3].ord
163
+ invalid_data("@#{cursor} - Missing quantisation table ID #{dqt_id}") if (!quantisation_tables_id.include?(dqt_id))
164
+ end
165
+ when MARKER_DHT
166
+ # DHT: Define Huffman tables
167
+ end_cursor = cursor + 2 + size
168
+ dht_cursor = cursor + 4
169
+ while (dht_cursor < end_cursor)
170
+ header_byte = @data[dht_cursor].ord
171
+ huffman_type = ((header_byte & 0b11110000) >> 4)
172
+ invalid_data("@#{cursor} - Unknown Huffman table type: #{huffman_type}") if (huffman_type > 1)
173
+ if (huffman_type == 0)
174
+ huffman_dc_table_id = (header_byte & 0b00001111)
175
+ invalid_data("@#{cursor} - Huffman DC table id #{huffman_dc_table_id} already defined.") if (huffman_dc_tables_id.include?(huffman_dc_table_id))
176
+ huffman_dc_tables_id << huffman_dc_table_id
177
+ log_debug "@#{cursor} - Found Huffman DC table: #{huffman_dc_table_id}"
178
+ else
179
+ huffman_ac_table_id = (header_byte & 0b00001111)
180
+ invalid_data("@#{cursor} - Huffman AC table id #{huffman_ac_table_id} already defined.") if (huffman_ac_tables_id.include?(huffman_ac_table_id))
181
+ huffman_ac_tables_id << huffman_ac_table_id
182
+ log_debug "@#{cursor} - Found Huffman AC table: #{huffman_ac_table_id}"
183
+ end
184
+ nbr_elements = 0
185
+ @data[dht_cursor+1..dht_cursor+16].bytes.each do |nbr_element_for_depth|
186
+ nbr_elements += nbr_element_for_depth
187
+ end
188
+ dht_cursor += 17 + nbr_elements
189
+ invalid_data("@#{dqt_cursor} - End of Huffman table was supposed to be @#{end_cursor}.") if (dht_cursor > end_cursor)
190
+ end
191
+ when MARKER_SOS
192
+ # SOS: Start of Scan
193
+ invalid_data("@#{cursor} - SOS marker begins whereas no Huffman DC table has been defined.") if (huffman_dc_tables_id.empty?)
194
+ invalid_data("@#{cursor} - SOS marker begins whereas no Huffman AC table has been defined.") if (huffman_ac_tables_id.empty?)
195
+ invalid_data("@#{cursor} - SOS marker begins whereas no quantisation table has been defined.") if (quantisation_tables_id.empty?)
196
+ invalid_data("@#{cursor} - SOS marker begins whereas no SOF marker has been encountered.") if (!found_sof)
197
+ found_sos = true
198
+ nbr_components = @data[cursor+4].ord
199
+ invalid_data("@#{cursor} - Invalid number of components: #{nbr_components}") if (nbr_components == 0)
200
+ nbr_components.times do |idx_component|
201
+ huffman_table_ids = @data[cursor+6+2*idx_component].ord
202
+ huffman_dc_table_id = ((huffman_table_ids & 0b11110000) >> 4)
203
+ huffman_ac_table_id = (huffman_table_ids & 0b00001111)
204
+ invalid_data("@#{cursor} - Unknown DC Huffman table: #{huffman_dc_table_id}") if (!huffman_dc_tables_id.include?(huffman_dc_table_id))
205
+ invalid_data("@#{cursor} - Unknown AC Huffman table: #{huffman_ac_table_id}") if (!huffman_ac_tables_id.include?(huffman_ac_table_id))
206
+ end
207
+ when MARKER_DQT
208
+ # DQT: Define quantisation tables
209
+ end_cursor = cursor + 2 + size
210
+ dqt_cursor = cursor + 4
211
+ while (dqt_cursor < end_cursor)
212
+ header_byte = @data[dqt_cursor].ord
213
+ precision = ((header_byte & 0b11110000) >> 4)
214
+ quantisation_table_id = (header_byte & 0b00001111)
215
+ invalid_data("@#{cursor} - Quantisation table id #{quantisation_table_id} already defined.") if (quantisation_tables_id.include?(quantisation_table_id))
216
+ quantisation_tables_id << quantisation_table_id
217
+ log_debug "@#{cursor} - Found quantisation table: #{quantisation_table_id}"
218
+ dqt_cursor += 1 + 64*((precision == 0) ? 1 : 2)
219
+ invalid_data("@#{dqt_cursor} - End of quantisation table was supposed to be @#{end_cursor}.") if (dqt_cursor > end_cursor)
220
+ end
221
+ end
222
+ # Does it have entropy data?
223
+ if (c_1 == MARKER_WITH_ENTROPY_DATA)
224
+ # There is entropy data
225
+ found_relevant_data([:jpg, :thm])
226
+ # Find the next marker that is FF xx, with xx being different than 00, D0..D7 and FF
227
+ cursor = @data.index(MARKERS_IGNORED_IN_ENTROPY_DATA_REGEXP, cursor + 2 + size, 2)
228
+ log_debug "=== Entropy data gets to cursor #{cursor.inspect}"
229
+ truncated_data("@#{cursor} - Truncated entropy data segment", @end_offset) if (cursor == nil)
230
+ else
231
+ # No entropy data: just get to the next segment
232
+ cursor += 2 + size
233
+ end
234
+ end
235
+ nbr_segments += 1
236
+ progress(cursor)
237
+ end
238
+ metadata( :nbr_segments => nbr_segments )
239
+
240
+ return ending_offset
241
+ end
242
+
243
+ end
244
+
245
+ end
246
+
247
+ end
@@ -0,0 +1,30 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ class M2V < BeginPatternDecoder
6
+
7
+ BEGIN_PATTERN_M2V = "\x00\x00\x01\xBA\x44\x00\x04\x00\x14\x01".force_encoding(Encoding::ASCII_8BIT)
8
+ END_PATTERN_M2V = "\x00\x00\x01\xB9".force_encoding(Encoding::ASCII_8BIT)
9
+
10
+ def get_begin_pattern
11
+ return BEGIN_PATTERN_M2V, { :offset_inc => 10 }
12
+ end
13
+
14
+ def decode(offset)
15
+ ending_offset = nil
16
+
17
+ found_relevant_data(:m2v)
18
+ end_pattern_offset = @data.index(END_PATTERN_M2V, offset + 10)
19
+ log_debug "=== @#{offset} - Found ending offset: #{end_pattern_offset.inspect}"
20
+ truncated_data if ((end_pattern_offset == nil) or (end_pattern_offset + 4 > @end_offset))
21
+ ending_offset = end_pattern_offset + 4
22
+
23
+ return ending_offset
24
+ end
25
+
26
+ end
27
+
28
+ end
29
+
30
+ end
@@ -0,0 +1,341 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ # MP3 files can contain JPEG files
6
+
7
+ class MP3 < BeginPatternDecoder
8
+
9
+ BEGIN_PATTERN_ID3V1 = 'TAG'.force_encoding(Encoding::ASCII_8BIT)
10
+ BEGIN_PATTERN_ID3V1E = 'TAG+'.force_encoding(Encoding::ASCII_8BIT)
11
+ BEGIN_PATTERN_ID3V2 = 'ID3'.force_encoding(Encoding::ASCII_8BIT)
12
+ BEGIN_PATTERN_APEV2 = 'APETAGEX'.force_encoding(Encoding::ASCII_8BIT)
13
+ BEGIN_PATTERN_MP3 = Regexp.new("(\xFF[\xE2-\xFF][\x00-\xEF]|#{BEGIN_PATTERN_ID3V2}|#{BEGIN_PATTERN_APEV2})", nil, 'n')
14
+
15
+ BITRATE_INDEX = [
16
+ [ 32, 32, 32, 32, 8 ],
17
+ [ 64, 48, 40, 48, 16 ],
18
+ [ 96, 56, 48, 56, 24 ],
19
+ [ 128, 64, 56, 64, 32 ],
20
+ [ 160, 80, 64, 80, 40 ],
21
+ [ 192, 96, 80, 96, 48 ],
22
+ [ 224, 112, 96, 112, 56 ],
23
+ [ 256, 128, 112, 128, 64 ],
24
+ [ 288, 160, 128, 144, 80 ],
25
+ [ 320, 192, 160, 160, 96 ],
26
+ [ 352, 224, 192, 176, 112 ],
27
+ [ 384, 256, 224, 192, 128 ],
28
+ [ 416, 320, 256, 224, 144 ],
29
+ [ 448, 384, 320, 256, 160 ]
30
+ ]
31
+ SAMPLE_RATE_INDEX = [
32
+ [ 44100, 22050, 11025 ],
33
+ [ 48000, 24000, 12000 ],
34
+ [ 32000, 16000, 8000 ]
35
+ ]
36
+
37
+ MIN_ACCEPTABLE_TIME_MS = 1000
38
+
39
+ MAX_ID3V2_FRAME_SIZE = 256
40
+
41
+ APE_ITEM_KEY_TERMINATOR = "\x00".force_encoding(Encoding::ASCII_8BIT)
42
+
43
+ ID3V2_PADDING_CHAR = "\x00".force_encoding(Encoding::ASCII_8BIT)
44
+
45
+ TRAILING_00_REGEXP = Regexp.new("\x00*$".force_encoding(Encoding::ASCII_8BIT), nil, 'n')
46
+
47
+ def get_begin_pattern
48
+ return BEGIN_PATTERN_MP3, { :max_regexp_size => 8 }
49
+ end
50
+
51
+ def check_begin_pattern(begin_pattern_offset, pattern_index)
52
+ if (@data[begin_pattern_offset] == "\xFF")
53
+ header_bytes = @data[begin_pattern_offset+1..begin_pattern_offset+3].bytes.to_a
54
+ return (((header_bytes[0] & 24) != 16) and
55
+ ((header_bytes[0] & 6) != 0) and
56
+ ((header_bytes[1] & 12) != 12) and
57
+ ((header_bytes[2] & 3) != 2))
58
+ else
59
+ return true
60
+ end
61
+ end
62
+
63
+ def decode(offset)
64
+ ending_offset = nil
65
+
66
+ cursor = offset
67
+ nbr_ms = 0
68
+ valid = false
69
+ while (ending_offset == nil)
70
+ #log_debug "=== @#{cursor} - Reading what's here"
71
+ c_0_2 = @data[cursor..cursor+2]
72
+ if (c_0_2 == BEGIN_PATTERN_ID3V1)
73
+ if (@data[cursor..cursor+3] == BEGIN_PATTERN_ID3V1E)
74
+ log_debug "=== @#{cursor} - Found ID3v1 extended tag"
75
+ metadata( :id3v1e_metadata => {
76
+ :title => read_ascii(cursor+4, cursor+63),
77
+ :artist => read_ascii(cursor+64, cursor+123),
78
+ :album => read_ascii(cursor+124, cursor+183),
79
+ :speed => @data[cursor+184].ord,
80
+ :genre => read_ascii(cursor+185, cursor+214),
81
+ :start_time => read_ascii(cursor+215, cursor+220),
82
+ :end_time => read_ascii(cursor+221, cursor+226)
83
+ } )
84
+ cursor += 227
85
+ else
86
+ # Just met an ID3v1 tag: skip 128 bytes
87
+ log_debug "=== @#{cursor} - Found ID3v1 tag"
88
+ metadata( :id3v1_metadata => {
89
+ :title => read_ascii(cursor+3, cursor+32),
90
+ :artist => read_ascii(cursor+33, cursor+62),
91
+ :album => read_ascii(cursor+63, cursor+92),
92
+ :year => read_ascii(cursor+93, cursor+96),
93
+ :comments => read_ascii(cursor+97, cursor+126),
94
+ :genre => @data[cursor+127].ord
95
+ } )
96
+ cursor += 128
97
+ # Current MP3 is finished: id3v1 is forcefully at the end
98
+ ending_offset = cursor
99
+ end
100
+ elsif (c_0_2 == BEGIN_PATTERN_ID3V2)
101
+ # Just met an ID3v2 tag
102
+ log_debug "=== @#{cursor} - Found ID3v2 tag"
103
+ invalid_data("@#{cursor} - Invalid ID3v2 header") if ((@data[cursor+3].ord == 255) or (@data[cursor+4].ord == 255))
104
+ # Compute the tag's size
105
+ size = 10 # Header
106
+ @data[cursor+6..cursor+9].bytes.each_with_index do |byte, idx|
107
+ invalid_data("@#{cursor} - Invalid ID3v2 header in size specification (#{idx})") if (byte >= 128)
108
+ size += (byte << ((8*(3-idx))-3+idx))
109
+ end
110
+ # Is there a footer?
111
+ size += 10 if ((@data[cursor+5].ord & 16) == 16)
112
+ cursor_end = cursor + size
113
+ cursor += 10
114
+ # Check if following is an extended header
115
+ padding_size = 0
116
+ extended_header_size = BinData::Uint32be.read(@data[cursor..cursor+3])
117
+ if ((extended_header_size == 6) or
118
+ (extended_header_size == 10))
119
+ # There is an extended header
120
+ extended_header_flags = BinData::Uint16be.read(@data[cursor+4..cursor+5])
121
+ invalid_data("@#{cursor} - Invalid extended header flags.") if ((extended_header_flags & 0b01111111_11111111) != 0)
122
+ has_crc = ((extended_header_flags & 0b10000000_00000000) != 0)
123
+ invalid_data("@#{cursor} - Extended header declared size and CRC flag do not match.") if (((extended_header_size == 10) and (!has_crc)) or ((extended_header_size == 6) and (has_crc)))
124
+ padding_size = BinData::Uint32be.read(@data[cursor+6..cursor+9])
125
+ cursor += 10
126
+ cursor += 4 if has_crc
127
+ end
128
+ # Read all frames
129
+ id3v2_metadata = {}
130
+ while ((cursor < cursor_end) and
131
+ (@data[cursor] != ID3V2_PADDING_CHAR))
132
+ # We are on a frame
133
+ frame_id = @data[cursor..cursor+3]
134
+ frame_size = BinData::Uint32be.read(@data[cursor+4..cursor+7])
135
+ frame_flags = BinData::Uint16be.read(@data[cursor+8..cursor+9])
136
+ invalid_data("@#{cursor} - Invalid ID3v2 frame flags: #{frame_flags}.") if ((frame_flags & 0b00011111_00011111) != 0)
137
+ cursor += 10
138
+ id3v2_metadata[frame_id] = read_ascii(cursor, cursor+((frame_size > MAX_ID3V2_FRAME_SIZE) ? MAX_ID3V2_FRAME_SIZE : frame_size)-1)
139
+ cursor += frame_size
140
+ end
141
+ metadata( :id3v2_metadata => id3v2_metadata )
142
+ # Get directly to the previously computed cursor to skip padding
143
+ log_debug("@#{cursor} - Padding size (#{padding_size}) is different from what is being read (#{cursor_end-cursor}).") if (padding_size != cursor_end-cursor)
144
+ cursor = cursor_end
145
+ elsif (@data[cursor..cursor+7] == BEGIN_PATTERN_APEV2)
146
+ log_debug "=== @#{cursor} - Found APEv2 tag"
147
+ info = decode_ape_tag_header(cursor)
148
+ invalid_data("@#{cursor} - APE tag header indicates no header whereas we have one.") if (!info[:has_header])
149
+ invalid_data("@#{cursor} - APE tag header indicates it is a footer whereas we are on the header.") if (info[:on_footer])
150
+ cursor += 32
151
+ cursor_end_tag = cursor + info[:tag_size]
152
+ ape_metadata = {}
153
+ info[:nbr_items].times do |idx_item|
154
+ item_key, item_value, cursor = decode_ape_tag_item(cursor)
155
+ ape_metadata[item_key] = item_value
156
+ end
157
+ invalid_data("@#{cursor} - APE tag header is inconsistent. We should be at cursor #{cursor_end_tag-(info[:has_footer] ? 32 : 0)}") if (cursor != cursor_end_tag-(info[:has_footer] ? 32 : 0))
158
+ metadata( :apev2_metadata => ape_metadata )
159
+ if (info[:has_footer])
160
+ # There is a footer
161
+ invalid_data("@#{cursor} - Invalid APE tag footer magic.") if (@data[cursor..cursor+7] != BEGIN_PATTERN_APEV2)
162
+ footer_info = decode_ape_tag_header(cursor)
163
+ invalid_data("@#{cursor} - APEv2 tag footer indicates no footer whereas we have one.") if (!footer_info[:has_footer])
164
+ invalid_data("@#{cursor} - APEv2 tag footer indicates it is a header whereas we are on the footer.") if (!footer_info[:on_footer])
165
+ cursor += 32
166
+ end
167
+ else
168
+ # We might be on a APEv1 tag, or real MP3 data, or at the end of our file.
169
+ # APEv1 tag occurs only after the last MP3 frame, and before any ID3v1 tag.
170
+ # APEv1 tag has no header, but a footer.
171
+ ape_tag_decoded = false
172
+ if (nbr_ms != 0)
173
+ # Might be good to check for APEv1 tag
174
+ cursor_begin = cursor
175
+ begin
176
+ ape_metadata = {}
177
+ nbr_items = 0
178
+ while (@data[cursor..cursor+7] != BEGIN_PATTERN_APEV2)
179
+ item_key, item_value, cursor = decode_ape_tag_item(cursor)
180
+ ape_metadata[item_key] = item_value
181
+ nbr_items += 1
182
+ log_debug "=== @#{cursor} - Decoded APEv1 tag item: #{item_key.inspect} => #{item_value[0..31].inspect}"
183
+ end
184
+ # Here we are on an APE Tag footer
185
+ footer_info = decode_ape_tag_header(cursor)
186
+ invalid_data("@#{cursor} - APEv1 tag footer indicates no footer whereas we have one.") if (!footer_info[:has_footer])
187
+ invalid_data("@#{cursor} - APEv1 tag footer indicates it is a header whereas we are on the footer.") if (!footer_info[:on_footer])
188
+ invalid_data("@#{cursor} - APEv1 tag footer indicates #{footer_info[:nbr_items]} tag items, whereas we read #{nbr_items}") if (footer_info[:nbr_items] != nbr_items)
189
+ log_debug "=== @#{cursor} - Found APEv1 tag"
190
+ cursor += 32
191
+ ape_tag_decoded = true
192
+ metadata( :apev1_metadata => ape_metadata )
193
+ rescue InvalidDataError, TruncatedDataError, AccessAfterDataError
194
+ # Maybe it is not an APEv1 tag.
195
+ # Scratch it and consider a normal MP3 frame.
196
+ #log_debug("=== @#{cursor_begin} - Failed to decode as APEv1 tag: #{$!}")
197
+ cursor = cursor_begin
198
+ ape_tag_decoded = false
199
+ end
200
+ end
201
+ if (!ape_tag_decoded)
202
+ # Real MP3 data or end of file
203
+ info = nil
204
+ begin
205
+ info = decode_mp3_frame_header(cursor)
206
+ rescue InvalidDataError
207
+ if (nbr_ms >= MIN_ACCEPTABLE_TIME_MS)
208
+ # Consider the file was finished
209
+ #log_debug "=== @#{cursor} - Garbage data found. Should be end of file."
210
+ ending_offset = cursor
211
+ else
212
+ # Problem
213
+ raise
214
+ end
215
+ end
216
+ if (ending_offset == nil)
217
+ #log_debug "=== @#{cursor} - Found MP3 data"
218
+ # Go see after
219
+ cursor += info[:size]
220
+ # Consider we have valid data only if we have enough milliseconds
221
+ nbr_ms += info[:nbr_ms]
222
+ if ((!valid) and (nbr_ms >= MIN_ACCEPTABLE_TIME_MS))
223
+ valid = true
224
+ found_relevant_data(:mp3)
225
+ end
226
+ metadata( :nbr_ms => nbr_ms )
227
+ end
228
+ end
229
+ end
230
+ if ((nbr_ms >= MIN_ACCEPTABLE_TIME_MS) and
231
+ (cursor == @end_offset))
232
+ ending_offset = cursor
233
+ end
234
+ progress(cursor)
235
+ end
236
+
237
+ return ending_offset
238
+ end
239
+
240
+ private
241
+
242
+ # Decode an MP3 frame header
243
+ #
244
+ # Parameters::
245
+ # * *cursor* (_Fixnum_): The cursor
246
+ # Result::
247
+ # * <em>map<Symbol,Object></em>: Corresponding header info
248
+ def decode_mp3_frame_header(cursor)
249
+ info = {}
250
+ # Check the header's values
251
+ header_bytes = @data[cursor..cursor+3].bytes.to_a
252
+ invalid_data("@#{cursor} - Invalid MP3 header") if ((header_bytes[0] != 255) or
253
+ ((header_bytes[1] & 224) != 224) or
254
+ ((header_bytes[1] & 24) == 16) or
255
+ ((header_bytes[1] & 6) == 0) or
256
+ ((header_bytes[2] & 240) == 240) or
257
+ ((header_bytes[2] & 12) == 12) or
258
+ ((header_bytes[3] & 3) == 2))
259
+ invalid_data("@#{cursor} - Invalid MP3 header: can't compute size of free bitrates") if ((header_bytes[2] & 240) == 0)
260
+ # Read header values to compute the size
261
+ version = nil
262
+ case ((header_bytes[1] & 24) >> 3)
263
+ when 0
264
+ version = 3
265
+ when 2
266
+ version = 2
267
+ when 3
268
+ version = 1
269
+ else
270
+ invalid_data("@#{cursor} - Unknown version in header: #{((header_bytes[1] & 24) >> 3)}")
271
+ end
272
+ layer = 4 - ((header_bytes[1] & 6) >> 1)
273
+ bit_rate = BITRATE_INDEX[((header_bytes[2] & 240) >> 4)-1][(version == 1) ? layer - 1 : ((layer == 1) ? 3 : 4)] * 1000
274
+ sample_rate = SAMPLE_RATE_INDEX[(header_bytes[2] & 12) >> 2][version - 1]
275
+ padding = ((header_bytes[2] & 2) >> 1)
276
+ # Compute the size
277
+ info[:size] = (layer == 1) ? ((12 * bit_rate) / sample_rate + padding) * 4 : (144 * bit_rate) / sample_rate + padding
278
+ info[:nbr_ms] = ((layer == 1) ? 384000 : 1152000) / sample_rate
279
+ #log_debug "=== @#{cursor} - Read MP3 frame: Version=#{version} Layer=#{layer} BitRate=#{bit_rate} SampleRate=#{sample_rate} Padding=#{padding} FrameLength=#{info[:size]} Milliseconds=#{info[:nbr_ms]}"
280
+ return info
281
+ end
282
+
283
+ # Decode an APE tag header
284
+ #
285
+ # Parameters::
286
+ # * *cursor* (_Fixnum_): Current cursor
287
+ # Result::
288
+ # * <em>map<Symbol,Object></em>: The APE tag info
289
+ def decode_ape_tag_header(cursor)
290
+ info = {}
291
+ #ape_version = BinData::Uint32le.read(@data[cursor+8..cursor+11])
292
+ info[:tag_size] = BinData::Uint32le.read(@data[cursor+12..cursor+15])
293
+ info[:nbr_items] = BinData::Uint32le.read(@data[cursor+16..cursor+19])
294
+ flags = BinData::Uint32le.read(@data[cursor+20..cursor+23])
295
+ info[:has_header] = ((flags & 0b10000000_00000000_00000000_00000000) != 0)
296
+ info[:has_footer] = ((flags & 0b01000000_00000000_00000000_00000000) == 0)
297
+ info[:on_footer] = ((flags & 0b00100000_00000000_00000000_00000000) == 0)
298
+ invalid_data("@#{cursor} - Invalid APE tag flags: #{flags}") if ((flags & 0b00011111_11111111_11111111_11111000) != 0)
299
+ reserved = BinData::Uint64le.read(@data[cursor+24..cursor+31])
300
+ invalid_data("@#{cursor} - Invalid reserved bytes in APE Tag header: #{reserved} should be 0.") if (reserved != 0)
301
+ return info
302
+ end
303
+
304
+ # Decode an APE tag item
305
+ #
306
+ # Parameters::
307
+ # * *cursor* (_Fixnum_): The cursor
308
+ # Result::
309
+ # * _String_: Item key
310
+ # * _String_: Item value
311
+ # * _Fixnum_: New cursor
312
+ def decode_ape_tag_item(cursor)
313
+ value_size = BinData::Uint32le.read(@data[cursor..cursor+3])
314
+ flags = BinData::Uint32le.read(@data[cursor+4..cursor+7])
315
+ invalid_data("@#{cursor} - Invalid APE tag flags: #{flags}") if ((flags & 0b00011111_11111111_11111111_11111000) != 0)
316
+ cursor_terminator = @data.index(APE_ITEM_KEY_TERMINATOR, cursor+8)
317
+ invalid_data("@#{cursor} - Could not find the end of APE tag item key.") if (cursor_terminator == nil)
318
+ invalid_data("@#{cursor} - Empty APE tag item key.") if (cursor_terminator == cursor+8)
319
+ item_key = @data[cursor+8..cursor_terminator-1]
320
+ cursor = cursor_terminator + 1
321
+ item_value = @data[cursor..cursor+value_size-1]
322
+ cursor += value_size
323
+ return item_key, item_value, cursor
324
+ end
325
+
326
+ # Read an ASCII value
327
+ #
328
+ # Parameters::
329
+ # * *cursor_begin* (_Fixnum_): The cursor to read from
330
+ # * *cursor_end* (_Fixnum_): The end of the cursor
331
+ # Result::
332
+ # * _String_ or <em>list<String></em>: Resulting string or list of strings if several.
333
+ def read_ascii(cursor_begin, cursor_end)
334
+ return @data[cursor_begin..cursor_end].gsub(TRAILING_00_REGEXP, '').strip
335
+ end
336
+
337
+ end
338
+
339
+ end
340
+
341
+ end