fileshunter 0.1.0.20130725
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS +3 -0
- data/ChangeLog +5 -0
- data/Credits +21 -0
- data/LICENSE +31 -0
- data/README +15 -0
- data/README.md +11 -0
- data/Rakefile +7 -0
- data/ReleaseInfo +8 -0
- data/bin/fileshunt +216 -0
- data/ext/fileshunter/Decoders/_FLAC.c +233 -0
- data/ext/fileshunter/Decoders/extconf.rb +3 -0
- data/lib/fileshunter/BeginPatternDecoder.rb +218 -0
- data/lib/fileshunter/Decoder.rb +66 -0
- data/lib/fileshunter/Decoders/ASF.rb +50 -0
- data/lib/fileshunter/Decoders/BMP.rb +118 -0
- data/lib/fileshunter/Decoders/CAB.rb +140 -0
- data/lib/fileshunter/Decoders/CFBF.rb +92 -0
- data/lib/fileshunter/Decoders/EBML.rb +369 -0
- data/lib/fileshunter/Decoders/EXE.rb +505 -0
- data/lib/fileshunter/Decoders/FLAC.rb +387 -0
- data/lib/fileshunter/Decoders/ICO.rb +71 -0
- data/lib/fileshunter/Decoders/JPEG.rb +247 -0
- data/lib/fileshunter/Decoders/M2V.rb +30 -0
- data/lib/fileshunter/Decoders/MP3.rb +341 -0
- data/lib/fileshunter/Decoders/MP4.rb +620 -0
- data/lib/fileshunter/Decoders/MPG_Video.rb +30 -0
- data/lib/fileshunter/Decoders/OGG.rb +74 -0
- data/lib/fileshunter/Decoders/RIFF.rb +437 -0
- data/lib/fileshunter/Decoders/TIFF.rb +350 -0
- data/lib/fileshunter/Decoders/Text.rb +240 -0
- data/lib/fileshunter/Segment.rb +50 -0
- data/lib/fileshunter/SegmentsAnalyzer.rb +251 -0
- data/lib/fileshunter.rb +15 -0
- metadata +130 -0
@@ -0,0 +1,247 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
module Decoders
|
4
|
+
|
5
|
+
# JPEG decoder has to be among the last ones to be decoded, as a truncated JPEG followed by other files can consume all files in its truncated data.
|
6
|
+
# JPEG files can contain TIFF files
|
7
|
+
|
8
|
+
class JPEG < BeginPatternDecoder
|
9
|
+
|
10
|
+
MARKER_PREFIX = "\xFF".force_encoding(Encoding::ASCII_8BIT)
|
11
|
+
END_MARKER = "\xD9".force_encoding(Encoding::ASCII_8BIT)
|
12
|
+
MARKERS_WITHOUT_PAYLOAD = [
|
13
|
+
"\xD8".force_encoding(Encoding::ASCII_8BIT),
|
14
|
+
"\xD9".force_encoding(Encoding::ASCII_8BIT)
|
15
|
+
]
|
16
|
+
MARKER_WITH_ENTROPY_DATA = "\xDA".force_encoding(Encoding::ASCII_8BIT)
|
17
|
+
MARKER_APP0 = "\xE0".force_encoding(Encoding::ASCII_8BIT)
|
18
|
+
MARKER_APP1 = "\xE1".force_encoding(Encoding::ASCII_8BIT)
|
19
|
+
MARKER_SOF0 = "\xC0".force_encoding(Encoding::ASCII_8BIT)
|
20
|
+
MARKER_SOF3 = "\xC3".force_encoding(Encoding::ASCII_8BIT)
|
21
|
+
MARKER_DHT = "\xC4".force_encoding(Encoding::ASCII_8BIT)
|
22
|
+
MARKER_SOS = "\xDA".force_encoding(Encoding::ASCII_8BIT)
|
23
|
+
MARKER_DQT = "\xDB".force_encoding(Encoding::ASCII_8BIT)
|
24
|
+
MARKERS_IGNORED_IN_ENTROPY_DATA = [
|
25
|
+
"\x00".force_encoding(Encoding::ASCII_8BIT),
|
26
|
+
"\xD0".force_encoding(Encoding::ASCII_8BIT),
|
27
|
+
"\xD1".force_encoding(Encoding::ASCII_8BIT),
|
28
|
+
"\xD2".force_encoding(Encoding::ASCII_8BIT),
|
29
|
+
"\xD3".force_encoding(Encoding::ASCII_8BIT),
|
30
|
+
"\xD4".force_encoding(Encoding::ASCII_8BIT),
|
31
|
+
"\xD5".force_encoding(Encoding::ASCII_8BIT),
|
32
|
+
"\xD6".force_encoding(Encoding::ASCII_8BIT),
|
33
|
+
"\xD7".force_encoding(Encoding::ASCII_8BIT),
|
34
|
+
"\xFF".force_encoding(Encoding::ASCII_8BIT)
|
35
|
+
]
|
36
|
+
MARKERS_IGNORED_IN_ENTROPY_DATA_REGEXP = Regexp.new("#{MARKER_PREFIX}[^#{MARKERS_IGNORED_IN_ENTROPY_DATA.join}]", nil, 'n')
|
37
|
+
|
38
|
+
JFIF_HEADER = "JFIF\x00".force_encoding(Encoding::ASCII_8BIT)
|
39
|
+
JFXX_HEADER = "JFXX\x00".force_encoding(Encoding::ASCII_8BIT)
|
40
|
+
EXIF_HEADER = "Exif\x00\x00".force_encoding(Encoding::ASCII_8BIT)
|
41
|
+
|
42
|
+
VALID_EXTENSION_CODES = [ 16, 17, 19 ]
|
43
|
+
|
44
|
+
def get_begin_pattern
|
45
|
+
return "\xFF\xD8\xFF".force_encoding(Encoding::ASCII_8BIT)
|
46
|
+
end
|
47
|
+
|
48
|
+
def decode(offset)
|
49
|
+
ending_offset = nil
|
50
|
+
|
51
|
+
cursor = offset + 2
|
52
|
+
nbr_segments = 0
|
53
|
+
quantisation_tables_id = []
|
54
|
+
huffman_ac_tables_id = []
|
55
|
+
huffman_dc_tables_id = []
|
56
|
+
found_sos = false
|
57
|
+
found_sof = false
|
58
|
+
while (ending_offset == nil)
|
59
|
+
# Here cursor is at the beginning of the next marker
|
60
|
+
# Read the 2 next bytes: they should be FF ??
|
61
|
+
log_debug "@#{cursor} Decoding next offset: #{@data[cursor..cursor+1].inspect}"
|
62
|
+
invalid_data("@#{cursor} - Did not get a valid marker definition: #{@data[cursor..cursor+1].inspect}") if (@data[cursor] != MARKER_PREFIX)
|
63
|
+
c_1 = @data[cursor+1]
|
64
|
+
invalid_data("@#{cursor} - Invalid marker: #{c_1.ord}") if (c_1.ord < 192)
|
65
|
+
# Does this marker have a payload?
|
66
|
+
if (MARKERS_WITHOUT_PAYLOAD.include?(c_1))
|
67
|
+
# No payload
|
68
|
+
log_debug "=== No payload"
|
69
|
+
# Get to the next bytes
|
70
|
+
cursor += 2
|
71
|
+
# Check if we arrived at the end
|
72
|
+
ending_offset = cursor if (c_1 == END_MARKER)
|
73
|
+
else
|
74
|
+
# There is a payload
|
75
|
+
# Read its length
|
76
|
+
size = BinData::Uint16be.read(@data[cursor+2..cursor+3])
|
77
|
+
log_debug "=== Payload of size #{size}"
|
78
|
+
case c_1
|
79
|
+
when MARKER_APP0
|
80
|
+
# Application specific data
|
81
|
+
# Usually used for JFIF
|
82
|
+
case @data[cursor+4..cursor+8]
|
83
|
+
when JFIF_HEADER
|
84
|
+
invalid_data("@#{cursor} - Invalid size for JFIF marker: #{size}") if (size < 16)
|
85
|
+
version_major = @data[cursor+9].ord
|
86
|
+
version_minor = @data[cursor+10].ord
|
87
|
+
units = @data[cursor+11].ord
|
88
|
+
invalid_data("@#{cursor} - Invalid units: #{units}") if (units > 2)
|
89
|
+
width = BinData::Uint16be.read(@data[cursor+12..cursor+13])
|
90
|
+
invalid_data("@#{cursor} - Invalid width: #{width}") if (width == 0)
|
91
|
+
height = BinData::Uint16be.read(@data[cursor+14..cursor+15])
|
92
|
+
invalid_data("@#{cursor} - Invalid height: #{height}") if (height == 0)
|
93
|
+
jfif_metadata = {
|
94
|
+
:version_major => version_major,
|
95
|
+
:version_minor => version_minor,
|
96
|
+
:units => units,
|
97
|
+
:width => width,
|
98
|
+
:height => height
|
99
|
+
}
|
100
|
+
if (size > 16)
|
101
|
+
width_thumb = BinData::Uint16be.read(@data[cursor+16..cursor+17])
|
102
|
+
height_thumb = BinData::Uint16be.read(@data[cursor+18..cursor+19])
|
103
|
+
jfif_metadata.merge!(
|
104
|
+
:width_thumb => width_thumb,
|
105
|
+
:height_thumb => height_thumb
|
106
|
+
)
|
107
|
+
end
|
108
|
+
metadata( :jfif_metadata => jfif_metadata )
|
109
|
+
when JFXX_HEADER
|
110
|
+
extension_code = @data[cursor+9].ord
|
111
|
+
invalid_data("@#{cursor} - Invalid extension code: #{extension_code}") if (!VALID_EXTENSION_CODES.include?(extension_code))
|
112
|
+
metadata( :jfxx_metadata => { :extension_code => extension_code } )
|
113
|
+
end
|
114
|
+
when MARKER_APP1
|
115
|
+
# Application specific data
|
116
|
+
# Usually used for Exif
|
117
|
+
case @data[cursor+4..cursor+9]
|
118
|
+
when EXIF_HEADER
|
119
|
+
# Read a TIFF file from cursor+10
|
120
|
+
require 'fileshunter/Decoders/TIFF'
|
121
|
+
invalid_data("@#{cursor} - Invalid TIFF header") if (@data[cursor+10..cursor+13].index(FilesHunter::Decoders::TIFF::BEGIN_PATTERN_TIFF) != 0)
|
122
|
+
tiff_decoder = FilesHunter::Decoders::TIFF.new
|
123
|
+
tiff_decoder.setup(FilesHunter::get_segments_analyzer, @data, cursor+10, cursor+2+size)
|
124
|
+
tiff_decoder.accept_no_image_data
|
125
|
+
begin
|
126
|
+
tiff_decoder.find_segments
|
127
|
+
rescue InvalidDataError, TruncatedDataError, AccessAfterDataError
|
128
|
+
# Invalid TIFF data
|
129
|
+
invalid_data("@#{cursor} - Invalid TIFF data: #{$!}")
|
130
|
+
end
|
131
|
+
segments = tiff_decoder.segments_found
|
132
|
+
invalid_data("@#{cursor} - No valid TIFF segment found for Exif") if segments.empty?
|
133
|
+
invalid_data("@#{cursor} - Not a valid TIFF segment found for Exif. Found #{segments[0].extensions.inspect}.") if (!segments[0].extensions.include?(:tif))
|
134
|
+
invalid_data("@#{cursor} - Truncated TIFF segment found for Exif.") if (segments[0].truncated)
|
135
|
+
invalid_data("@#{cursor} - TIFF segment (@#{segments[0].begin_offset}) not found at the beginning of Exif (#{cursor+10}).") if (segments[0].begin_offset != cursor+10)
|
136
|
+
#invalid_data("@#{cursor} - TIFF segment not ending (#{segments[0].end_offset}) at the end of Exif (#{cursor+2+size}).") if (segments[0].end_offset != cursor+2+size)
|
137
|
+
metadata( :exif_metadata => segments[0].metadata )
|
138
|
+
found_relevant_data([:jpg, :thm])
|
139
|
+
end
|
140
|
+
when MARKER_SOF0..MARKER_SOF3
|
141
|
+
# SOF: Start of Frame
|
142
|
+
invalid_data("@#{cursor} - Found several SOF markers") if found_sof
|
143
|
+
invalid_data("@#{cursor} - Found a SOF marker after the SOS marker") if found_sos
|
144
|
+
found_sof = true
|
145
|
+
sample_precision = @data[cursor+4].ord
|
146
|
+
invalid_data("@#{cursor} - Invalid sample precision: #{sample_precision}") if ((sample_precision != 8) and (sample_precision != 12))
|
147
|
+
image_height = BinData::Uint16be.read(@data[cursor+5..cursor+6])
|
148
|
+
image_width = BinData::Uint16be.read(@data[cursor+7..cursor+8])
|
149
|
+
metadata(
|
150
|
+
:image_height => image_height,
|
151
|
+
:image_width => image_width
|
152
|
+
)
|
153
|
+
nbr_components = @data[cursor+9].ord
|
154
|
+
invalid_data("@#{cursor} - Invalid number of components: #{nbr_components}") if (nbr_components == 0)
|
155
|
+
# Check that quantisation tables have been defined
|
156
|
+
nbr_components.times do |idx_component|
|
157
|
+
sampling = @data[cursor+11+idx_component*3].ord
|
158
|
+
horizontal_sampling = ((sampling & 0b11110000) >> 4)
|
159
|
+
vertical_sampling = (sampling & 0b00001111)
|
160
|
+
invalid_data("@#{cursor} - Invalid horizontal sampling: #{horizontal_sampling}") if (horizontal_sampling == 0)
|
161
|
+
invalid_data("@#{cursor} - Invalid vertical sampling: #{vertical_sampling}") if (vertical_sampling == 0)
|
162
|
+
dqt_id = @data[cursor+12+idx_component*3].ord
|
163
|
+
invalid_data("@#{cursor} - Missing quantisation table ID #{dqt_id}") if (!quantisation_tables_id.include?(dqt_id))
|
164
|
+
end
|
165
|
+
when MARKER_DHT
|
166
|
+
# DHT: Define Huffman tables
|
167
|
+
end_cursor = cursor + 2 + size
|
168
|
+
dht_cursor = cursor + 4
|
169
|
+
while (dht_cursor < end_cursor)
|
170
|
+
header_byte = @data[dht_cursor].ord
|
171
|
+
huffman_type = ((header_byte & 0b11110000) >> 4)
|
172
|
+
invalid_data("@#{cursor} - Unknown Huffman table type: #{huffman_type}") if (huffman_type > 1)
|
173
|
+
if (huffman_type == 0)
|
174
|
+
huffman_dc_table_id = (header_byte & 0b00001111)
|
175
|
+
invalid_data("@#{cursor} - Huffman DC table id #{huffman_dc_table_id} already defined.") if (huffman_dc_tables_id.include?(huffman_dc_table_id))
|
176
|
+
huffman_dc_tables_id << huffman_dc_table_id
|
177
|
+
log_debug "@#{cursor} - Found Huffman DC table: #{huffman_dc_table_id}"
|
178
|
+
else
|
179
|
+
huffman_ac_table_id = (header_byte & 0b00001111)
|
180
|
+
invalid_data("@#{cursor} - Huffman AC table id #{huffman_ac_table_id} already defined.") if (huffman_ac_tables_id.include?(huffman_ac_table_id))
|
181
|
+
huffman_ac_tables_id << huffman_ac_table_id
|
182
|
+
log_debug "@#{cursor} - Found Huffman AC table: #{huffman_ac_table_id}"
|
183
|
+
end
|
184
|
+
nbr_elements = 0
|
185
|
+
@data[dht_cursor+1..dht_cursor+16].bytes.each do |nbr_element_for_depth|
|
186
|
+
nbr_elements += nbr_element_for_depth
|
187
|
+
end
|
188
|
+
dht_cursor += 17 + nbr_elements
|
189
|
+
invalid_data("@#{dqt_cursor} - End of Huffman table was supposed to be @#{end_cursor}.") if (dht_cursor > end_cursor)
|
190
|
+
end
|
191
|
+
when MARKER_SOS
|
192
|
+
# SOS: Start of Scan
|
193
|
+
invalid_data("@#{cursor} - SOS marker begins whereas no Huffman DC table has been defined.") if (huffman_dc_tables_id.empty?)
|
194
|
+
invalid_data("@#{cursor} - SOS marker begins whereas no Huffman AC table has been defined.") if (huffman_ac_tables_id.empty?)
|
195
|
+
invalid_data("@#{cursor} - SOS marker begins whereas no quantisation table has been defined.") if (quantisation_tables_id.empty?)
|
196
|
+
invalid_data("@#{cursor} - SOS marker begins whereas no SOF marker has been encountered.") if (!found_sof)
|
197
|
+
found_sos = true
|
198
|
+
nbr_components = @data[cursor+4].ord
|
199
|
+
invalid_data("@#{cursor} - Invalid number of components: #{nbr_components}") if (nbr_components == 0)
|
200
|
+
nbr_components.times do |idx_component|
|
201
|
+
huffman_table_ids = @data[cursor+6+2*idx_component].ord
|
202
|
+
huffman_dc_table_id = ((huffman_table_ids & 0b11110000) >> 4)
|
203
|
+
huffman_ac_table_id = (huffman_table_ids & 0b00001111)
|
204
|
+
invalid_data("@#{cursor} - Unknown DC Huffman table: #{huffman_dc_table_id}") if (!huffman_dc_tables_id.include?(huffman_dc_table_id))
|
205
|
+
invalid_data("@#{cursor} - Unknown AC Huffman table: #{huffman_ac_table_id}") if (!huffman_ac_tables_id.include?(huffman_ac_table_id))
|
206
|
+
end
|
207
|
+
when MARKER_DQT
|
208
|
+
# DQT: Define quantisation tables
|
209
|
+
end_cursor = cursor + 2 + size
|
210
|
+
dqt_cursor = cursor + 4
|
211
|
+
while (dqt_cursor < end_cursor)
|
212
|
+
header_byte = @data[dqt_cursor].ord
|
213
|
+
precision = ((header_byte & 0b11110000) >> 4)
|
214
|
+
quantisation_table_id = (header_byte & 0b00001111)
|
215
|
+
invalid_data("@#{cursor} - Quantisation table id #{quantisation_table_id} already defined.") if (quantisation_tables_id.include?(quantisation_table_id))
|
216
|
+
quantisation_tables_id << quantisation_table_id
|
217
|
+
log_debug "@#{cursor} - Found quantisation table: #{quantisation_table_id}"
|
218
|
+
dqt_cursor += 1 + 64*((precision == 0) ? 1 : 2)
|
219
|
+
invalid_data("@#{dqt_cursor} - End of quantisation table was supposed to be @#{end_cursor}.") if (dqt_cursor > end_cursor)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
# Does it have entropy data?
|
223
|
+
if (c_1 == MARKER_WITH_ENTROPY_DATA)
|
224
|
+
# There is entropy data
|
225
|
+
found_relevant_data([:jpg, :thm])
|
226
|
+
# Find the next marker that is FF xx, with xx being different than 00, D0..D7 and FF
|
227
|
+
cursor = @data.index(MARKERS_IGNORED_IN_ENTROPY_DATA_REGEXP, cursor + 2 + size, 2)
|
228
|
+
log_debug "=== Entropy data gets to cursor #{cursor.inspect}"
|
229
|
+
truncated_data("@#{cursor} - Truncated entropy data segment", @end_offset) if (cursor == nil)
|
230
|
+
else
|
231
|
+
# No entropy data: just get to the next segment
|
232
|
+
cursor += 2 + size
|
233
|
+
end
|
234
|
+
end
|
235
|
+
nbr_segments += 1
|
236
|
+
progress(cursor)
|
237
|
+
end
|
238
|
+
metadata( :nbr_segments => nbr_segments )
|
239
|
+
|
240
|
+
return ending_offset
|
241
|
+
end
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
end
|
246
|
+
|
247
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
module Decoders
|
4
|
+
|
5
|
+
class M2V < BeginPatternDecoder
|
6
|
+
|
7
|
+
BEGIN_PATTERN_M2V = "\x00\x00\x01\xBA\x44\x00\x04\x00\x14\x01".force_encoding(Encoding::ASCII_8BIT)
|
8
|
+
END_PATTERN_M2V = "\x00\x00\x01\xB9".force_encoding(Encoding::ASCII_8BIT)
|
9
|
+
|
10
|
+
def get_begin_pattern
|
11
|
+
return BEGIN_PATTERN_M2V, { :offset_inc => 10 }
|
12
|
+
end
|
13
|
+
|
14
|
+
def decode(offset)
|
15
|
+
ending_offset = nil
|
16
|
+
|
17
|
+
found_relevant_data(:m2v)
|
18
|
+
end_pattern_offset = @data.index(END_PATTERN_M2V, offset + 10)
|
19
|
+
log_debug "=== @#{offset} - Found ending offset: #{end_pattern_offset.inspect}"
|
20
|
+
truncated_data if ((end_pattern_offset == nil) or (end_pattern_offset + 4 > @end_offset))
|
21
|
+
ending_offset = end_pattern_offset + 4
|
22
|
+
|
23
|
+
return ending_offset
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,341 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
module Decoders
|
4
|
+
|
5
|
+
# MP3 files can contain JPEG files
|
6
|
+
|
7
|
+
class MP3 < BeginPatternDecoder
|
8
|
+
|
9
|
+
BEGIN_PATTERN_ID3V1 = 'TAG'.force_encoding(Encoding::ASCII_8BIT)
|
10
|
+
BEGIN_PATTERN_ID3V1E = 'TAG+'.force_encoding(Encoding::ASCII_8BIT)
|
11
|
+
BEGIN_PATTERN_ID3V2 = 'ID3'.force_encoding(Encoding::ASCII_8BIT)
|
12
|
+
BEGIN_PATTERN_APEV2 = 'APETAGEX'.force_encoding(Encoding::ASCII_8BIT)
|
13
|
+
BEGIN_PATTERN_MP3 = Regexp.new("(\xFF[\xE2-\xFF][\x00-\xEF]|#{BEGIN_PATTERN_ID3V2}|#{BEGIN_PATTERN_APEV2})", nil, 'n')
|
14
|
+
|
15
|
+
BITRATE_INDEX = [
|
16
|
+
[ 32, 32, 32, 32, 8 ],
|
17
|
+
[ 64, 48, 40, 48, 16 ],
|
18
|
+
[ 96, 56, 48, 56, 24 ],
|
19
|
+
[ 128, 64, 56, 64, 32 ],
|
20
|
+
[ 160, 80, 64, 80, 40 ],
|
21
|
+
[ 192, 96, 80, 96, 48 ],
|
22
|
+
[ 224, 112, 96, 112, 56 ],
|
23
|
+
[ 256, 128, 112, 128, 64 ],
|
24
|
+
[ 288, 160, 128, 144, 80 ],
|
25
|
+
[ 320, 192, 160, 160, 96 ],
|
26
|
+
[ 352, 224, 192, 176, 112 ],
|
27
|
+
[ 384, 256, 224, 192, 128 ],
|
28
|
+
[ 416, 320, 256, 224, 144 ],
|
29
|
+
[ 448, 384, 320, 256, 160 ]
|
30
|
+
]
|
31
|
+
SAMPLE_RATE_INDEX = [
|
32
|
+
[ 44100, 22050, 11025 ],
|
33
|
+
[ 48000, 24000, 12000 ],
|
34
|
+
[ 32000, 16000, 8000 ]
|
35
|
+
]
|
36
|
+
|
37
|
+
MIN_ACCEPTABLE_TIME_MS = 1000
|
38
|
+
|
39
|
+
MAX_ID3V2_FRAME_SIZE = 256
|
40
|
+
|
41
|
+
APE_ITEM_KEY_TERMINATOR = "\x00".force_encoding(Encoding::ASCII_8BIT)
|
42
|
+
|
43
|
+
ID3V2_PADDING_CHAR = "\x00".force_encoding(Encoding::ASCII_8BIT)
|
44
|
+
|
45
|
+
TRAILING_00_REGEXP = Regexp.new("\x00*$".force_encoding(Encoding::ASCII_8BIT), nil, 'n')
|
46
|
+
|
47
|
+
def get_begin_pattern
|
48
|
+
return BEGIN_PATTERN_MP3, { :max_regexp_size => 8 }
|
49
|
+
end
|
50
|
+
|
51
|
+
def check_begin_pattern(begin_pattern_offset, pattern_index)
|
52
|
+
if (@data[begin_pattern_offset] == "\xFF")
|
53
|
+
header_bytes = @data[begin_pattern_offset+1..begin_pattern_offset+3].bytes.to_a
|
54
|
+
return (((header_bytes[0] & 24) != 16) and
|
55
|
+
((header_bytes[0] & 6) != 0) and
|
56
|
+
((header_bytes[1] & 12) != 12) and
|
57
|
+
((header_bytes[2] & 3) != 2))
|
58
|
+
else
|
59
|
+
return true
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def decode(offset)
|
64
|
+
ending_offset = nil
|
65
|
+
|
66
|
+
cursor = offset
|
67
|
+
nbr_ms = 0
|
68
|
+
valid = false
|
69
|
+
while (ending_offset == nil)
|
70
|
+
#log_debug "=== @#{cursor} - Reading what's here"
|
71
|
+
c_0_2 = @data[cursor..cursor+2]
|
72
|
+
if (c_0_2 == BEGIN_PATTERN_ID3V1)
|
73
|
+
if (@data[cursor..cursor+3] == BEGIN_PATTERN_ID3V1E)
|
74
|
+
log_debug "=== @#{cursor} - Found ID3v1 extended tag"
|
75
|
+
metadata( :id3v1e_metadata => {
|
76
|
+
:title => read_ascii(cursor+4, cursor+63),
|
77
|
+
:artist => read_ascii(cursor+64, cursor+123),
|
78
|
+
:album => read_ascii(cursor+124, cursor+183),
|
79
|
+
:speed => @data[cursor+184].ord,
|
80
|
+
:genre => read_ascii(cursor+185, cursor+214),
|
81
|
+
:start_time => read_ascii(cursor+215, cursor+220),
|
82
|
+
:end_time => read_ascii(cursor+221, cursor+226)
|
83
|
+
} )
|
84
|
+
cursor += 227
|
85
|
+
else
|
86
|
+
# Just met an ID3v1 tag: skip 128 bytes
|
87
|
+
log_debug "=== @#{cursor} - Found ID3v1 tag"
|
88
|
+
metadata( :id3v1_metadata => {
|
89
|
+
:title => read_ascii(cursor+3, cursor+32),
|
90
|
+
:artist => read_ascii(cursor+33, cursor+62),
|
91
|
+
:album => read_ascii(cursor+63, cursor+92),
|
92
|
+
:year => read_ascii(cursor+93, cursor+96),
|
93
|
+
:comments => read_ascii(cursor+97, cursor+126),
|
94
|
+
:genre => @data[cursor+127].ord
|
95
|
+
} )
|
96
|
+
cursor += 128
|
97
|
+
# Current MP3 is finished: id3v1 is forcefully at the end
|
98
|
+
ending_offset = cursor
|
99
|
+
end
|
100
|
+
elsif (c_0_2 == BEGIN_PATTERN_ID3V2)
|
101
|
+
# Just met an ID3v2 tag
|
102
|
+
log_debug "=== @#{cursor} - Found ID3v2 tag"
|
103
|
+
invalid_data("@#{cursor} - Invalid ID3v2 header") if ((@data[cursor+3].ord == 255) or (@data[cursor+4].ord == 255))
|
104
|
+
# Compute the tag's size
|
105
|
+
size = 10 # Header
|
106
|
+
@data[cursor+6..cursor+9].bytes.each_with_index do |byte, idx|
|
107
|
+
invalid_data("@#{cursor} - Invalid ID3v2 header in size specification (#{idx})") if (byte >= 128)
|
108
|
+
size += (byte << ((8*(3-idx))-3+idx))
|
109
|
+
end
|
110
|
+
# Is there a footer?
|
111
|
+
size += 10 if ((@data[cursor+5].ord & 16) == 16)
|
112
|
+
cursor_end = cursor + size
|
113
|
+
cursor += 10
|
114
|
+
# Check if following is an extended header
|
115
|
+
padding_size = 0
|
116
|
+
extended_header_size = BinData::Uint32be.read(@data[cursor..cursor+3])
|
117
|
+
if ((extended_header_size == 6) or
|
118
|
+
(extended_header_size == 10))
|
119
|
+
# There is an extended header
|
120
|
+
extended_header_flags = BinData::Uint16be.read(@data[cursor+4..cursor+5])
|
121
|
+
invalid_data("@#{cursor} - Invalid extended header flags.") if ((extended_header_flags & 0b01111111_11111111) != 0)
|
122
|
+
has_crc = ((extended_header_flags & 0b10000000_00000000) != 0)
|
123
|
+
invalid_data("@#{cursor} - Extended header declared size and CRC flag do not match.") if (((extended_header_size == 10) and (!has_crc)) or ((extended_header_size == 6) and (has_crc)))
|
124
|
+
padding_size = BinData::Uint32be.read(@data[cursor+6..cursor+9])
|
125
|
+
cursor += 10
|
126
|
+
cursor += 4 if has_crc
|
127
|
+
end
|
128
|
+
# Read all frames
|
129
|
+
id3v2_metadata = {}
|
130
|
+
while ((cursor < cursor_end) and
|
131
|
+
(@data[cursor] != ID3V2_PADDING_CHAR))
|
132
|
+
# We are on a frame
|
133
|
+
frame_id = @data[cursor..cursor+3]
|
134
|
+
frame_size = BinData::Uint32be.read(@data[cursor+4..cursor+7])
|
135
|
+
frame_flags = BinData::Uint16be.read(@data[cursor+8..cursor+9])
|
136
|
+
invalid_data("@#{cursor} - Invalid ID3v2 frame flags: #{frame_flags}.") if ((frame_flags & 0b00011111_00011111) != 0)
|
137
|
+
cursor += 10
|
138
|
+
id3v2_metadata[frame_id] = read_ascii(cursor, cursor+((frame_size > MAX_ID3V2_FRAME_SIZE) ? MAX_ID3V2_FRAME_SIZE : frame_size)-1)
|
139
|
+
cursor += frame_size
|
140
|
+
end
|
141
|
+
metadata( :id3v2_metadata => id3v2_metadata )
|
142
|
+
# Get directly to the previously computed cursor to skip padding
|
143
|
+
log_debug("@#{cursor} - Padding size (#{padding_size}) is different from what is being read (#{cursor_end-cursor}).") if (padding_size != cursor_end-cursor)
|
144
|
+
cursor = cursor_end
|
145
|
+
elsif (@data[cursor..cursor+7] == BEGIN_PATTERN_APEV2)
|
146
|
+
log_debug "=== @#{cursor} - Found APEv2 tag"
|
147
|
+
info = decode_ape_tag_header(cursor)
|
148
|
+
invalid_data("@#{cursor} - APE tag header indicates no header whereas we have one.") if (!info[:has_header])
|
149
|
+
invalid_data("@#{cursor} - APE tag header indicates it is a footer whereas we are on the header.") if (info[:on_footer])
|
150
|
+
cursor += 32
|
151
|
+
cursor_end_tag = cursor + info[:tag_size]
|
152
|
+
ape_metadata = {}
|
153
|
+
info[:nbr_items].times do |idx_item|
|
154
|
+
item_key, item_value, cursor = decode_ape_tag_item(cursor)
|
155
|
+
ape_metadata[item_key] = item_value
|
156
|
+
end
|
157
|
+
invalid_data("@#{cursor} - APE tag header is inconsistent. We should be at cursor #{cursor_end_tag-(info[:has_footer] ? 32 : 0)}") if (cursor != cursor_end_tag-(info[:has_footer] ? 32 : 0))
|
158
|
+
metadata( :apev2_metadata => ape_metadata )
|
159
|
+
if (info[:has_footer])
|
160
|
+
# There is a footer
|
161
|
+
invalid_data("@#{cursor} - Invalid APE tag footer magic.") if (@data[cursor..cursor+7] != BEGIN_PATTERN_APEV2)
|
162
|
+
footer_info = decode_ape_tag_header(cursor)
|
163
|
+
invalid_data("@#{cursor} - APEv2 tag footer indicates no footer whereas we have one.") if (!footer_info[:has_footer])
|
164
|
+
invalid_data("@#{cursor} - APEv2 tag footer indicates it is a header whereas we are on the footer.") if (!footer_info[:on_footer])
|
165
|
+
cursor += 32
|
166
|
+
end
|
167
|
+
else
|
168
|
+
# We might be on a APEv1 tag, or real MP3 data, or at the end of our file.
|
169
|
+
# APEv1 tag occurs only after the last MP3 frame, and before any ID3v1 tag.
|
170
|
+
# APEv1 tag has no header, but a footer.
|
171
|
+
ape_tag_decoded = false
|
172
|
+
if (nbr_ms != 0)
|
173
|
+
# Might be good to check for APEv1 tag
|
174
|
+
cursor_begin = cursor
|
175
|
+
begin
|
176
|
+
ape_metadata = {}
|
177
|
+
nbr_items = 0
|
178
|
+
while (@data[cursor..cursor+7] != BEGIN_PATTERN_APEV2)
|
179
|
+
item_key, item_value, cursor = decode_ape_tag_item(cursor)
|
180
|
+
ape_metadata[item_key] = item_value
|
181
|
+
nbr_items += 1
|
182
|
+
log_debug "=== @#{cursor} - Decoded APEv1 tag item: #{item_key.inspect} => #{item_value[0..31].inspect}"
|
183
|
+
end
|
184
|
+
# Here we are on an APE Tag footer
|
185
|
+
footer_info = decode_ape_tag_header(cursor)
|
186
|
+
invalid_data("@#{cursor} - APEv1 tag footer indicates no footer whereas we have one.") if (!footer_info[:has_footer])
|
187
|
+
invalid_data("@#{cursor} - APEv1 tag footer indicates it is a header whereas we are on the footer.") if (!footer_info[:on_footer])
|
188
|
+
invalid_data("@#{cursor} - APEv1 tag footer indicates #{footer_info[:nbr_items]} tag items, whereas we read #{nbr_items}") if (footer_info[:nbr_items] != nbr_items)
|
189
|
+
log_debug "=== @#{cursor} - Found APEv1 tag"
|
190
|
+
cursor += 32
|
191
|
+
ape_tag_decoded = true
|
192
|
+
metadata( :apev1_metadata => ape_metadata )
|
193
|
+
rescue InvalidDataError, TruncatedDataError, AccessAfterDataError
|
194
|
+
# Maybe it is not an APEv1 tag.
|
195
|
+
# Scratch it and consider a normal MP3 frame.
|
196
|
+
#log_debug("=== @#{cursor_begin} - Failed to decode as APEv1 tag: #{$!}")
|
197
|
+
cursor = cursor_begin
|
198
|
+
ape_tag_decoded = false
|
199
|
+
end
|
200
|
+
end
|
201
|
+
if (!ape_tag_decoded)
|
202
|
+
# Real MP3 data or end of file
|
203
|
+
info = nil
|
204
|
+
begin
|
205
|
+
info = decode_mp3_frame_header(cursor)
|
206
|
+
rescue InvalidDataError
|
207
|
+
if (nbr_ms >= MIN_ACCEPTABLE_TIME_MS)
|
208
|
+
# Consider the file was finished
|
209
|
+
#log_debug "=== @#{cursor} - Garbage data found. Should be end of file."
|
210
|
+
ending_offset = cursor
|
211
|
+
else
|
212
|
+
# Problem
|
213
|
+
raise
|
214
|
+
end
|
215
|
+
end
|
216
|
+
if (ending_offset == nil)
|
217
|
+
#log_debug "=== @#{cursor} - Found MP3 data"
|
218
|
+
# Go see after
|
219
|
+
cursor += info[:size]
|
220
|
+
# Consider we have valid data only if we have enough milliseconds
|
221
|
+
nbr_ms += info[:nbr_ms]
|
222
|
+
if ((!valid) and (nbr_ms >= MIN_ACCEPTABLE_TIME_MS))
|
223
|
+
valid = true
|
224
|
+
found_relevant_data(:mp3)
|
225
|
+
end
|
226
|
+
metadata( :nbr_ms => nbr_ms )
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
if ((nbr_ms >= MIN_ACCEPTABLE_TIME_MS) and
|
231
|
+
(cursor == @end_offset))
|
232
|
+
ending_offset = cursor
|
233
|
+
end
|
234
|
+
progress(cursor)
|
235
|
+
end
|
236
|
+
|
237
|
+
return ending_offset
|
238
|
+
end
|
239
|
+
|
240
|
+
private
|
241
|
+
|
242
|
+
# Decode an MP3 frame header
|
243
|
+
#
|
244
|
+
# Parameters::
|
245
|
+
# * *cursor* (_Fixnum_): The cursor
|
246
|
+
# Result::
|
247
|
+
# * <em>map<Symbol,Object></em>: Corresponding header info
|
248
|
+
def decode_mp3_frame_header(cursor)
|
249
|
+
info = {}
|
250
|
+
# Check the header's values
|
251
|
+
header_bytes = @data[cursor..cursor+3].bytes.to_a
|
252
|
+
invalid_data("@#{cursor} - Invalid MP3 header") if ((header_bytes[0] != 255) or
|
253
|
+
((header_bytes[1] & 224) != 224) or
|
254
|
+
((header_bytes[1] & 24) == 16) or
|
255
|
+
((header_bytes[1] & 6) == 0) or
|
256
|
+
((header_bytes[2] & 240) == 240) or
|
257
|
+
((header_bytes[2] & 12) == 12) or
|
258
|
+
((header_bytes[3] & 3) == 2))
|
259
|
+
invalid_data("@#{cursor} - Invalid MP3 header: can't compute size of free bitrates") if ((header_bytes[2] & 240) == 0)
|
260
|
+
# Read header values to compute the size
|
261
|
+
version = nil
|
262
|
+
case ((header_bytes[1] & 24) >> 3)
|
263
|
+
when 0
|
264
|
+
version = 3
|
265
|
+
when 2
|
266
|
+
version = 2
|
267
|
+
when 3
|
268
|
+
version = 1
|
269
|
+
else
|
270
|
+
invalid_data("@#{cursor} - Unknown version in header: #{((header_bytes[1] & 24) >> 3)}")
|
271
|
+
end
|
272
|
+
layer = 4 - ((header_bytes[1] & 6) >> 1)
|
273
|
+
bit_rate = BITRATE_INDEX[((header_bytes[2] & 240) >> 4)-1][(version == 1) ? layer - 1 : ((layer == 1) ? 3 : 4)] * 1000
|
274
|
+
sample_rate = SAMPLE_RATE_INDEX[(header_bytes[2] & 12) >> 2][version - 1]
|
275
|
+
padding = ((header_bytes[2] & 2) >> 1)
|
276
|
+
# Compute the size
|
277
|
+
info[:size] = (layer == 1) ? ((12 * bit_rate) / sample_rate + padding) * 4 : (144 * bit_rate) / sample_rate + padding
|
278
|
+
info[:nbr_ms] = ((layer == 1) ? 384000 : 1152000) / sample_rate
|
279
|
+
#log_debug "=== @#{cursor} - Read MP3 frame: Version=#{version} Layer=#{layer} BitRate=#{bit_rate} SampleRate=#{sample_rate} Padding=#{padding} FrameLength=#{info[:size]} Milliseconds=#{info[:nbr_ms]}"
|
280
|
+
return info
|
281
|
+
end
|
282
|
+
|
283
|
+
# Decode an APE tag header
|
284
|
+
#
|
285
|
+
# Parameters::
|
286
|
+
# * *cursor* (_Fixnum_): Current cursor
|
287
|
+
# Result::
|
288
|
+
# * <em>map<Symbol,Object></em>: The APE tag info
|
289
|
+
def decode_ape_tag_header(cursor)
|
290
|
+
info = {}
|
291
|
+
#ape_version = BinData::Uint32le.read(@data[cursor+8..cursor+11])
|
292
|
+
info[:tag_size] = BinData::Uint32le.read(@data[cursor+12..cursor+15])
|
293
|
+
info[:nbr_items] = BinData::Uint32le.read(@data[cursor+16..cursor+19])
|
294
|
+
flags = BinData::Uint32le.read(@data[cursor+20..cursor+23])
|
295
|
+
info[:has_header] = ((flags & 0b10000000_00000000_00000000_00000000) != 0)
|
296
|
+
info[:has_footer] = ((flags & 0b01000000_00000000_00000000_00000000) == 0)
|
297
|
+
info[:on_footer] = ((flags & 0b00100000_00000000_00000000_00000000) == 0)
|
298
|
+
invalid_data("@#{cursor} - Invalid APE tag flags: #{flags}") if ((flags & 0b00011111_11111111_11111111_11111000) != 0)
|
299
|
+
reserved = BinData::Uint64le.read(@data[cursor+24..cursor+31])
|
300
|
+
invalid_data("@#{cursor} - Invalid reserved bytes in APE Tag header: #{reserved} should be 0.") if (reserved != 0)
|
301
|
+
return info
|
302
|
+
end
|
303
|
+
|
304
|
+
# Decode an APE tag item
|
305
|
+
#
|
306
|
+
# Parameters::
|
307
|
+
# * *cursor* (_Fixnum_): The cursor
|
308
|
+
# Result::
|
309
|
+
# * _String_: Item key
|
310
|
+
# * _String_: Item value
|
311
|
+
# * _Fixnum_: New cursor
|
312
|
+
def decode_ape_tag_item(cursor)
|
313
|
+
value_size = BinData::Uint32le.read(@data[cursor..cursor+3])
|
314
|
+
flags = BinData::Uint32le.read(@data[cursor+4..cursor+7])
|
315
|
+
invalid_data("@#{cursor} - Invalid APE tag flags: #{flags}") if ((flags & 0b00011111_11111111_11111111_11111000) != 0)
|
316
|
+
cursor_terminator = @data.index(APE_ITEM_KEY_TERMINATOR, cursor+8)
|
317
|
+
invalid_data("@#{cursor} - Could not find the end of APE tag item key.") if (cursor_terminator == nil)
|
318
|
+
invalid_data("@#{cursor} - Empty APE tag item key.") if (cursor_terminator == cursor+8)
|
319
|
+
item_key = @data[cursor+8..cursor_terminator-1]
|
320
|
+
cursor = cursor_terminator + 1
|
321
|
+
item_value = @data[cursor..cursor+value_size-1]
|
322
|
+
cursor += value_size
|
323
|
+
return item_key, item_value, cursor
|
324
|
+
end
|
325
|
+
|
326
|
+
# Read an ASCII value
|
327
|
+
#
|
328
|
+
# Parameters::
|
329
|
+
# * *cursor_begin* (_Fixnum_): The cursor to read from
|
330
|
+
# * *cursor_end* (_Fixnum_): The end of the cursor
|
331
|
+
# Result::
|
332
|
+
# * _String_ or <em>list<String></em>: Resulting string or list of strings if several.
|
333
|
+
def read_ascii(cursor_begin, cursor_end)
|
334
|
+
return @data[cursor_begin..cursor_end].gsub(TRAILING_00_REGEXP, '').strip
|
335
|
+
end
|
336
|
+
|
337
|
+
end
|
338
|
+
|
339
|
+
end
|
340
|
+
|
341
|
+
end
|