fileshunter 0.1.0.20130725
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS +3 -0
- data/ChangeLog +5 -0
- data/Credits +21 -0
- data/LICENSE +31 -0
- data/README +15 -0
- data/README.md +11 -0
- data/Rakefile +7 -0
- data/ReleaseInfo +8 -0
- data/bin/fileshunt +216 -0
- data/ext/fileshunter/Decoders/_FLAC.c +233 -0
- data/ext/fileshunter/Decoders/extconf.rb +3 -0
- data/lib/fileshunter/BeginPatternDecoder.rb +218 -0
- data/lib/fileshunter/Decoder.rb +66 -0
- data/lib/fileshunter/Decoders/ASF.rb +50 -0
- data/lib/fileshunter/Decoders/BMP.rb +118 -0
- data/lib/fileshunter/Decoders/CAB.rb +140 -0
- data/lib/fileshunter/Decoders/CFBF.rb +92 -0
- data/lib/fileshunter/Decoders/EBML.rb +369 -0
- data/lib/fileshunter/Decoders/EXE.rb +505 -0
- data/lib/fileshunter/Decoders/FLAC.rb +387 -0
- data/lib/fileshunter/Decoders/ICO.rb +71 -0
- data/lib/fileshunter/Decoders/JPEG.rb +247 -0
- data/lib/fileshunter/Decoders/M2V.rb +30 -0
- data/lib/fileshunter/Decoders/MP3.rb +341 -0
- data/lib/fileshunter/Decoders/MP4.rb +620 -0
- data/lib/fileshunter/Decoders/MPG_Video.rb +30 -0
- data/lib/fileshunter/Decoders/OGG.rb +74 -0
- data/lib/fileshunter/Decoders/RIFF.rb +437 -0
- data/lib/fileshunter/Decoders/TIFF.rb +350 -0
- data/lib/fileshunter/Decoders/Text.rb +240 -0
- data/lib/fileshunter/Segment.rb +50 -0
- data/lib/fileshunter/SegmentsAnalyzer.rb +251 -0
- data/lib/fileshunter.rb +15 -0
- metadata +130 -0
@@ -0,0 +1,247 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
module Decoders
|
4
|
+
|
5
|
+
# JPEG decoder has to be among the last ones to be decoded, as a truncated JPEG followed by other files can consume all files in its truncated data.
|
6
|
+
# JPEG files can contain TIFF files
|
7
|
+
|
8
|
+
class JPEG < BeginPatternDecoder
|
9
|
+
|
10
|
+
MARKER_PREFIX = "\xFF".force_encoding(Encoding::ASCII_8BIT)
|
11
|
+
END_MARKER = "\xD9".force_encoding(Encoding::ASCII_8BIT)
|
12
|
+
MARKERS_WITHOUT_PAYLOAD = [
|
13
|
+
"\xD8".force_encoding(Encoding::ASCII_8BIT),
|
14
|
+
"\xD9".force_encoding(Encoding::ASCII_8BIT)
|
15
|
+
]
|
16
|
+
MARKER_WITH_ENTROPY_DATA = "\xDA".force_encoding(Encoding::ASCII_8BIT)
|
17
|
+
MARKER_APP0 = "\xE0".force_encoding(Encoding::ASCII_8BIT)
|
18
|
+
MARKER_APP1 = "\xE1".force_encoding(Encoding::ASCII_8BIT)
|
19
|
+
MARKER_SOF0 = "\xC0".force_encoding(Encoding::ASCII_8BIT)
|
20
|
+
MARKER_SOF3 = "\xC3".force_encoding(Encoding::ASCII_8BIT)
|
21
|
+
MARKER_DHT = "\xC4".force_encoding(Encoding::ASCII_8BIT)
|
22
|
+
MARKER_SOS = "\xDA".force_encoding(Encoding::ASCII_8BIT)
|
23
|
+
MARKER_DQT = "\xDB".force_encoding(Encoding::ASCII_8BIT)
|
24
|
+
MARKERS_IGNORED_IN_ENTROPY_DATA = [
|
25
|
+
"\x00".force_encoding(Encoding::ASCII_8BIT),
|
26
|
+
"\xD0".force_encoding(Encoding::ASCII_8BIT),
|
27
|
+
"\xD1".force_encoding(Encoding::ASCII_8BIT),
|
28
|
+
"\xD2".force_encoding(Encoding::ASCII_8BIT),
|
29
|
+
"\xD3".force_encoding(Encoding::ASCII_8BIT),
|
30
|
+
"\xD4".force_encoding(Encoding::ASCII_8BIT),
|
31
|
+
"\xD5".force_encoding(Encoding::ASCII_8BIT),
|
32
|
+
"\xD6".force_encoding(Encoding::ASCII_8BIT),
|
33
|
+
"\xD7".force_encoding(Encoding::ASCII_8BIT),
|
34
|
+
"\xFF".force_encoding(Encoding::ASCII_8BIT)
|
35
|
+
]
|
36
|
+
MARKERS_IGNORED_IN_ENTROPY_DATA_REGEXP = Regexp.new("#{MARKER_PREFIX}[^#{MARKERS_IGNORED_IN_ENTROPY_DATA.join}]", nil, 'n')
|
37
|
+
|
38
|
+
JFIF_HEADER = "JFIF\x00".force_encoding(Encoding::ASCII_8BIT)
|
39
|
+
JFXX_HEADER = "JFXX\x00".force_encoding(Encoding::ASCII_8BIT)
|
40
|
+
EXIF_HEADER = "Exif\x00\x00".force_encoding(Encoding::ASCII_8BIT)
|
41
|
+
|
42
|
+
VALID_EXTENSION_CODES = [ 16, 17, 19 ]
|
43
|
+
|
44
|
+
def get_begin_pattern
|
45
|
+
return "\xFF\xD8\xFF".force_encoding(Encoding::ASCII_8BIT)
|
46
|
+
end
|
47
|
+
|
48
|
+
def decode(offset)
|
49
|
+
ending_offset = nil
|
50
|
+
|
51
|
+
cursor = offset + 2
|
52
|
+
nbr_segments = 0
|
53
|
+
quantisation_tables_id = []
|
54
|
+
huffman_ac_tables_id = []
|
55
|
+
huffman_dc_tables_id = []
|
56
|
+
found_sos = false
|
57
|
+
found_sof = false
|
58
|
+
while (ending_offset == nil)
|
59
|
+
# Here cursor is at the beginning of the next marker
|
60
|
+
# Read the 2 next bytes: they should be FF ??
|
61
|
+
log_debug "@#{cursor} Decoding next offset: #{@data[cursor..cursor+1].inspect}"
|
62
|
+
invalid_data("@#{cursor} - Did not get a valid marker definition: #{@data[cursor..cursor+1].inspect}") if (@data[cursor] != MARKER_PREFIX)
|
63
|
+
c_1 = @data[cursor+1]
|
64
|
+
invalid_data("@#{cursor} - Invalid marker: #{c_1.ord}") if (c_1.ord < 192)
|
65
|
+
# Does this marker have a payload?
|
66
|
+
if (MARKERS_WITHOUT_PAYLOAD.include?(c_1))
|
67
|
+
# No payload
|
68
|
+
log_debug "=== No payload"
|
69
|
+
# Get to the next bytes
|
70
|
+
cursor += 2
|
71
|
+
# Check if we arrived at the end
|
72
|
+
ending_offset = cursor if (c_1 == END_MARKER)
|
73
|
+
else
|
74
|
+
# There is a payload
|
75
|
+
# Read its length
|
76
|
+
size = BinData::Uint16be.read(@data[cursor+2..cursor+3])
|
77
|
+
log_debug "=== Payload of size #{size}"
|
78
|
+
case c_1
|
79
|
+
when MARKER_APP0
|
80
|
+
# Application specific data
|
81
|
+
# Usually used for JFIF
|
82
|
+
case @data[cursor+4..cursor+8]
|
83
|
+
when JFIF_HEADER
|
84
|
+
invalid_data("@#{cursor} - Invalid size for JFIF marker: #{size}") if (size < 16)
|
85
|
+
version_major = @data[cursor+9].ord
|
86
|
+
version_minor = @data[cursor+10].ord
|
87
|
+
units = @data[cursor+11].ord
|
88
|
+
invalid_data("@#{cursor} - Invalid units: #{units}") if (units > 2)
|
89
|
+
width = BinData::Uint16be.read(@data[cursor+12..cursor+13])
|
90
|
+
invalid_data("@#{cursor} - Invalid width: #{width}") if (width == 0)
|
91
|
+
height = BinData::Uint16be.read(@data[cursor+14..cursor+15])
|
92
|
+
invalid_data("@#{cursor} - Invalid height: #{height}") if (height == 0)
|
93
|
+
jfif_metadata = {
|
94
|
+
:version_major => version_major,
|
95
|
+
:version_minor => version_minor,
|
96
|
+
:units => units,
|
97
|
+
:width => width,
|
98
|
+
:height => height
|
99
|
+
}
|
100
|
+
if (size > 16)
|
101
|
+
width_thumb = BinData::Uint16be.read(@data[cursor+16..cursor+17])
|
102
|
+
height_thumb = BinData::Uint16be.read(@data[cursor+18..cursor+19])
|
103
|
+
jfif_metadata.merge!(
|
104
|
+
:width_thumb => width_thumb,
|
105
|
+
:height_thumb => height_thumb
|
106
|
+
)
|
107
|
+
end
|
108
|
+
metadata( :jfif_metadata => jfif_metadata )
|
109
|
+
when JFXX_HEADER
|
110
|
+
extension_code = @data[cursor+9].ord
|
111
|
+
invalid_data("@#{cursor} - Invalid extension code: #{extension_code}") if (!VALID_EXTENSION_CODES.include?(extension_code))
|
112
|
+
metadata( :jfxx_metadata => { :extension_code => extension_code } )
|
113
|
+
end
|
114
|
+
when MARKER_APP1
|
115
|
+
# Application specific data
|
116
|
+
# Usually used for Exif
|
117
|
+
case @data[cursor+4..cursor+9]
|
118
|
+
when EXIF_HEADER
|
119
|
+
# Read a TIFF file from cursor+10
|
120
|
+
require 'fileshunter/Decoders/TIFF'
|
121
|
+
invalid_data("@#{cursor} - Invalid TIFF header") if (@data[cursor+10..cursor+13].index(FilesHunter::Decoders::TIFF::BEGIN_PATTERN_TIFF) != 0)
|
122
|
+
tiff_decoder = FilesHunter::Decoders::TIFF.new
|
123
|
+
tiff_decoder.setup(FilesHunter::get_segments_analyzer, @data, cursor+10, cursor+2+size)
|
124
|
+
tiff_decoder.accept_no_image_data
|
125
|
+
begin
|
126
|
+
tiff_decoder.find_segments
|
127
|
+
rescue InvalidDataError, TruncatedDataError, AccessAfterDataError
|
128
|
+
# Invalid TIFF data
|
129
|
+
invalid_data("@#{cursor} - Invalid TIFF data: #{$!}")
|
130
|
+
end
|
131
|
+
segments = tiff_decoder.segments_found
|
132
|
+
invalid_data("@#{cursor} - No valid TIFF segment found for Exif") if segments.empty?
|
133
|
+
invalid_data("@#{cursor} - Not a valid TIFF segment found for Exif. Found #{segments[0].extensions.inspect}.") if (!segments[0].extensions.include?(:tif))
|
134
|
+
invalid_data("@#{cursor} - Truncated TIFF segment found for Exif.") if (segments[0].truncated)
|
135
|
+
invalid_data("@#{cursor} - TIFF segment (@#{segments[0].begin_offset}) not found at the beginning of Exif (#{cursor+10}).") if (segments[0].begin_offset != cursor+10)
|
136
|
+
#invalid_data("@#{cursor} - TIFF segment not ending (#{segments[0].end_offset}) at the end of Exif (#{cursor+2+size}).") if (segments[0].end_offset != cursor+2+size)
|
137
|
+
metadata( :exif_metadata => segments[0].metadata )
|
138
|
+
found_relevant_data([:jpg, :thm])
|
139
|
+
end
|
140
|
+
when MARKER_SOF0..MARKER_SOF3
|
141
|
+
# SOF: Start of Frame
|
142
|
+
invalid_data("@#{cursor} - Found several SOF markers") if found_sof
|
143
|
+
invalid_data("@#{cursor} - Found a SOF marker after the SOS marker") if found_sos
|
144
|
+
found_sof = true
|
145
|
+
sample_precision = @data[cursor+4].ord
|
146
|
+
invalid_data("@#{cursor} - Invalid sample precision: #{sample_precision}") if ((sample_precision != 8) and (sample_precision != 12))
|
147
|
+
image_height = BinData::Uint16be.read(@data[cursor+5..cursor+6])
|
148
|
+
image_width = BinData::Uint16be.read(@data[cursor+7..cursor+8])
|
149
|
+
metadata(
|
150
|
+
:image_height => image_height,
|
151
|
+
:image_width => image_width
|
152
|
+
)
|
153
|
+
nbr_components = @data[cursor+9].ord
|
154
|
+
invalid_data("@#{cursor} - Invalid number of components: #{nbr_components}") if (nbr_components == 0)
|
155
|
+
# Check that quantisation tables have been defined
|
156
|
+
nbr_components.times do |idx_component|
|
157
|
+
sampling = @data[cursor+11+idx_component*3].ord
|
158
|
+
horizontal_sampling = ((sampling & 0b11110000) >> 4)
|
159
|
+
vertical_sampling = (sampling & 0b00001111)
|
160
|
+
invalid_data("@#{cursor} - Invalid horizontal sampling: #{horizontal_sampling}") if (horizontal_sampling == 0)
|
161
|
+
invalid_data("@#{cursor} - Invalid vertical sampling: #{vertical_sampling}") if (vertical_sampling == 0)
|
162
|
+
dqt_id = @data[cursor+12+idx_component*3].ord
|
163
|
+
invalid_data("@#{cursor} - Missing quantisation table ID #{dqt_id}") if (!quantisation_tables_id.include?(dqt_id))
|
164
|
+
end
|
165
|
+
when MARKER_DHT
|
166
|
+
# DHT: Define Huffman tables
|
167
|
+
end_cursor = cursor + 2 + size
|
168
|
+
dht_cursor = cursor + 4
|
169
|
+
while (dht_cursor < end_cursor)
|
170
|
+
header_byte = @data[dht_cursor].ord
|
171
|
+
huffman_type = ((header_byte & 0b11110000) >> 4)
|
172
|
+
invalid_data("@#{cursor} - Unknown Huffman table type: #{huffman_type}") if (huffman_type > 1)
|
173
|
+
if (huffman_type == 0)
|
174
|
+
huffman_dc_table_id = (header_byte & 0b00001111)
|
175
|
+
invalid_data("@#{cursor} - Huffman DC table id #{huffman_dc_table_id} already defined.") if (huffman_dc_tables_id.include?(huffman_dc_table_id))
|
176
|
+
huffman_dc_tables_id << huffman_dc_table_id
|
177
|
+
log_debug "@#{cursor} - Found Huffman DC table: #{huffman_dc_table_id}"
|
178
|
+
else
|
179
|
+
huffman_ac_table_id = (header_byte & 0b00001111)
|
180
|
+
invalid_data("@#{cursor} - Huffman AC table id #{huffman_ac_table_id} already defined.") if (huffman_ac_tables_id.include?(huffman_ac_table_id))
|
181
|
+
huffman_ac_tables_id << huffman_ac_table_id
|
182
|
+
log_debug "@#{cursor} - Found Huffman AC table: #{huffman_ac_table_id}"
|
183
|
+
end
|
184
|
+
nbr_elements = 0
|
185
|
+
@data[dht_cursor+1..dht_cursor+16].bytes.each do |nbr_element_for_depth|
|
186
|
+
nbr_elements += nbr_element_for_depth
|
187
|
+
end
|
188
|
+
dht_cursor += 17 + nbr_elements
|
189
|
+
invalid_data("@#{dqt_cursor} - End of Huffman table was supposed to be @#{end_cursor}.") if (dht_cursor > end_cursor)
|
190
|
+
end
|
191
|
+
when MARKER_SOS
|
192
|
+
# SOS: Start of Scan
|
193
|
+
invalid_data("@#{cursor} - SOS marker begins whereas no Huffman DC table has been defined.") if (huffman_dc_tables_id.empty?)
|
194
|
+
invalid_data("@#{cursor} - SOS marker begins whereas no Huffman AC table has been defined.") if (huffman_ac_tables_id.empty?)
|
195
|
+
invalid_data("@#{cursor} - SOS marker begins whereas no quantisation table has been defined.") if (quantisation_tables_id.empty?)
|
196
|
+
invalid_data("@#{cursor} - SOS marker begins whereas no SOF marker has been encountered.") if (!found_sof)
|
197
|
+
found_sos = true
|
198
|
+
nbr_components = @data[cursor+4].ord
|
199
|
+
invalid_data("@#{cursor} - Invalid number of components: #{nbr_components}") if (nbr_components == 0)
|
200
|
+
nbr_components.times do |idx_component|
|
201
|
+
huffman_table_ids = @data[cursor+6+2*idx_component].ord
|
202
|
+
huffman_dc_table_id = ((huffman_table_ids & 0b11110000) >> 4)
|
203
|
+
huffman_ac_table_id = (huffman_table_ids & 0b00001111)
|
204
|
+
invalid_data("@#{cursor} - Unknown DC Huffman table: #{huffman_dc_table_id}") if (!huffman_dc_tables_id.include?(huffman_dc_table_id))
|
205
|
+
invalid_data("@#{cursor} - Unknown AC Huffman table: #{huffman_ac_table_id}") if (!huffman_ac_tables_id.include?(huffman_ac_table_id))
|
206
|
+
end
|
207
|
+
when MARKER_DQT
|
208
|
+
# DQT: Define quantisation tables
|
209
|
+
end_cursor = cursor + 2 + size
|
210
|
+
dqt_cursor = cursor + 4
|
211
|
+
while (dqt_cursor < end_cursor)
|
212
|
+
header_byte = @data[dqt_cursor].ord
|
213
|
+
precision = ((header_byte & 0b11110000) >> 4)
|
214
|
+
quantisation_table_id = (header_byte & 0b00001111)
|
215
|
+
invalid_data("@#{cursor} - Quantisation table id #{quantisation_table_id} already defined.") if (quantisation_tables_id.include?(quantisation_table_id))
|
216
|
+
quantisation_tables_id << quantisation_table_id
|
217
|
+
log_debug "@#{cursor} - Found quantisation table: #{quantisation_table_id}"
|
218
|
+
dqt_cursor += 1 + 64*((precision == 0) ? 1 : 2)
|
219
|
+
invalid_data("@#{dqt_cursor} - End of quantisation table was supposed to be @#{end_cursor}.") if (dqt_cursor > end_cursor)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
# Does it have entropy data?
|
223
|
+
if (c_1 == MARKER_WITH_ENTROPY_DATA)
|
224
|
+
# There is entropy data
|
225
|
+
found_relevant_data([:jpg, :thm])
|
226
|
+
# Find the next marker that is FF xx, with xx being different than 00, D0..D7 and FF
|
227
|
+
cursor = @data.index(MARKERS_IGNORED_IN_ENTROPY_DATA_REGEXP, cursor + 2 + size, 2)
|
228
|
+
log_debug "=== Entropy data gets to cursor #{cursor.inspect}"
|
229
|
+
truncated_data("@#{cursor} - Truncated entropy data segment", @end_offset) if (cursor == nil)
|
230
|
+
else
|
231
|
+
# No entropy data: just get to the next segment
|
232
|
+
cursor += 2 + size
|
233
|
+
end
|
234
|
+
end
|
235
|
+
nbr_segments += 1
|
236
|
+
progress(cursor)
|
237
|
+
end
|
238
|
+
metadata( :nbr_segments => nbr_segments )
|
239
|
+
|
240
|
+
return ending_offset
|
241
|
+
end
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
end
|
246
|
+
|
247
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
module Decoders
|
4
|
+
|
5
|
+
class M2V < BeginPatternDecoder
|
6
|
+
|
7
|
+
BEGIN_PATTERN_M2V = "\x00\x00\x01\xBA\x44\x00\x04\x00\x14\x01".force_encoding(Encoding::ASCII_8BIT)
|
8
|
+
END_PATTERN_M2V = "\x00\x00\x01\xB9".force_encoding(Encoding::ASCII_8BIT)
|
9
|
+
|
10
|
+
def get_begin_pattern
|
11
|
+
return BEGIN_PATTERN_M2V, { :offset_inc => 10 }
|
12
|
+
end
|
13
|
+
|
14
|
+
def decode(offset)
|
15
|
+
ending_offset = nil
|
16
|
+
|
17
|
+
found_relevant_data(:m2v)
|
18
|
+
end_pattern_offset = @data.index(END_PATTERN_M2V, offset + 10)
|
19
|
+
log_debug "=== @#{offset} - Found ending offset: #{end_pattern_offset.inspect}"
|
20
|
+
truncated_data if ((end_pattern_offset == nil) or (end_pattern_offset + 4 > @end_offset))
|
21
|
+
ending_offset = end_pattern_offset + 4
|
22
|
+
|
23
|
+
return ending_offset
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,341 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
module Decoders
|
4
|
+
|
5
|
+
# MP3 files can contain JPEG files
|
6
|
+
|
7
|
+
class MP3 < BeginPatternDecoder
|
8
|
+
|
9
|
+
BEGIN_PATTERN_ID3V1 = 'TAG'.force_encoding(Encoding::ASCII_8BIT)
|
10
|
+
BEGIN_PATTERN_ID3V1E = 'TAG+'.force_encoding(Encoding::ASCII_8BIT)
|
11
|
+
BEGIN_PATTERN_ID3V2 = 'ID3'.force_encoding(Encoding::ASCII_8BIT)
|
12
|
+
BEGIN_PATTERN_APEV2 = 'APETAGEX'.force_encoding(Encoding::ASCII_8BIT)
|
13
|
+
BEGIN_PATTERN_MP3 = Regexp.new("(\xFF[\xE2-\xFF][\x00-\xEF]|#{BEGIN_PATTERN_ID3V2}|#{BEGIN_PATTERN_APEV2})", nil, 'n')
|
14
|
+
|
15
|
+
BITRATE_INDEX = [
|
16
|
+
[ 32, 32, 32, 32, 8 ],
|
17
|
+
[ 64, 48, 40, 48, 16 ],
|
18
|
+
[ 96, 56, 48, 56, 24 ],
|
19
|
+
[ 128, 64, 56, 64, 32 ],
|
20
|
+
[ 160, 80, 64, 80, 40 ],
|
21
|
+
[ 192, 96, 80, 96, 48 ],
|
22
|
+
[ 224, 112, 96, 112, 56 ],
|
23
|
+
[ 256, 128, 112, 128, 64 ],
|
24
|
+
[ 288, 160, 128, 144, 80 ],
|
25
|
+
[ 320, 192, 160, 160, 96 ],
|
26
|
+
[ 352, 224, 192, 176, 112 ],
|
27
|
+
[ 384, 256, 224, 192, 128 ],
|
28
|
+
[ 416, 320, 256, 224, 144 ],
|
29
|
+
[ 448, 384, 320, 256, 160 ]
|
30
|
+
]
|
31
|
+
SAMPLE_RATE_INDEX = [
|
32
|
+
[ 44100, 22050, 11025 ],
|
33
|
+
[ 48000, 24000, 12000 ],
|
34
|
+
[ 32000, 16000, 8000 ]
|
35
|
+
]
|
36
|
+
|
37
|
+
MIN_ACCEPTABLE_TIME_MS = 1000
|
38
|
+
|
39
|
+
MAX_ID3V2_FRAME_SIZE = 256
|
40
|
+
|
41
|
+
APE_ITEM_KEY_TERMINATOR = "\x00".force_encoding(Encoding::ASCII_8BIT)
|
42
|
+
|
43
|
+
ID3V2_PADDING_CHAR = "\x00".force_encoding(Encoding::ASCII_8BIT)
|
44
|
+
|
45
|
+
TRAILING_00_REGEXP = Regexp.new("\x00*$".force_encoding(Encoding::ASCII_8BIT), nil, 'n')
|
46
|
+
|
47
|
+
def get_begin_pattern
|
48
|
+
return BEGIN_PATTERN_MP3, { :max_regexp_size => 8 }
|
49
|
+
end
|
50
|
+
|
51
|
+
def check_begin_pattern(begin_pattern_offset, pattern_index)
|
52
|
+
if (@data[begin_pattern_offset] == "\xFF")
|
53
|
+
header_bytes = @data[begin_pattern_offset+1..begin_pattern_offset+3].bytes.to_a
|
54
|
+
return (((header_bytes[0] & 24) != 16) and
|
55
|
+
((header_bytes[0] & 6) != 0) and
|
56
|
+
((header_bytes[1] & 12) != 12) and
|
57
|
+
((header_bytes[2] & 3) != 2))
|
58
|
+
else
|
59
|
+
return true
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def decode(offset)
|
64
|
+
ending_offset = nil
|
65
|
+
|
66
|
+
cursor = offset
|
67
|
+
nbr_ms = 0
|
68
|
+
valid = false
|
69
|
+
while (ending_offset == nil)
|
70
|
+
#log_debug "=== @#{cursor} - Reading what's here"
|
71
|
+
c_0_2 = @data[cursor..cursor+2]
|
72
|
+
if (c_0_2 == BEGIN_PATTERN_ID3V1)
|
73
|
+
if (@data[cursor..cursor+3] == BEGIN_PATTERN_ID3V1E)
|
74
|
+
log_debug "=== @#{cursor} - Found ID3v1 extended tag"
|
75
|
+
metadata( :id3v1e_metadata => {
|
76
|
+
:title => read_ascii(cursor+4, cursor+63),
|
77
|
+
:artist => read_ascii(cursor+64, cursor+123),
|
78
|
+
:album => read_ascii(cursor+124, cursor+183),
|
79
|
+
:speed => @data[cursor+184].ord,
|
80
|
+
:genre => read_ascii(cursor+185, cursor+214),
|
81
|
+
:start_time => read_ascii(cursor+215, cursor+220),
|
82
|
+
:end_time => read_ascii(cursor+221, cursor+226)
|
83
|
+
} )
|
84
|
+
cursor += 227
|
85
|
+
else
|
86
|
+
# Just met an ID3v1 tag: skip 128 bytes
|
87
|
+
log_debug "=== @#{cursor} - Found ID3v1 tag"
|
88
|
+
metadata( :id3v1_metadata => {
|
89
|
+
:title => read_ascii(cursor+3, cursor+32),
|
90
|
+
:artist => read_ascii(cursor+33, cursor+62),
|
91
|
+
:album => read_ascii(cursor+63, cursor+92),
|
92
|
+
:year => read_ascii(cursor+93, cursor+96),
|
93
|
+
:comments => read_ascii(cursor+97, cursor+126),
|
94
|
+
:genre => @data[cursor+127].ord
|
95
|
+
} )
|
96
|
+
cursor += 128
|
97
|
+
# Current MP3 is finished: id3v1 is forcefully at the end
|
98
|
+
ending_offset = cursor
|
99
|
+
end
|
100
|
+
elsif (c_0_2 == BEGIN_PATTERN_ID3V2)
|
101
|
+
# Just met an ID3v2 tag
|
102
|
+
log_debug "=== @#{cursor} - Found ID3v2 tag"
|
103
|
+
invalid_data("@#{cursor} - Invalid ID3v2 header") if ((@data[cursor+3].ord == 255) or (@data[cursor+4].ord == 255))
|
104
|
+
# Compute the tag's size
|
105
|
+
size = 10 # Header
|
106
|
+
@data[cursor+6..cursor+9].bytes.each_with_index do |byte, idx|
|
107
|
+
invalid_data("@#{cursor} - Invalid ID3v2 header in size specification (#{idx})") if (byte >= 128)
|
108
|
+
size += (byte << ((8*(3-idx))-3+idx))
|
109
|
+
end
|
110
|
+
# Is there a footer?
|
111
|
+
size += 10 if ((@data[cursor+5].ord & 16) == 16)
|
112
|
+
cursor_end = cursor + size
|
113
|
+
cursor += 10
|
114
|
+
# Check if following is an extended header
|
115
|
+
padding_size = 0
|
116
|
+
extended_header_size = BinData::Uint32be.read(@data[cursor..cursor+3])
|
117
|
+
if ((extended_header_size == 6) or
|
118
|
+
(extended_header_size == 10))
|
119
|
+
# There is an extended header
|
120
|
+
extended_header_flags = BinData::Uint16be.read(@data[cursor+4..cursor+5])
|
121
|
+
invalid_data("@#{cursor} - Invalid extended header flags.") if ((extended_header_flags & 0b01111111_11111111) != 0)
|
122
|
+
has_crc = ((extended_header_flags & 0b10000000_00000000) != 0)
|
123
|
+
invalid_data("@#{cursor} - Extended header declared size and CRC flag do not match.") if (((extended_header_size == 10) and (!has_crc)) or ((extended_header_size == 6) and (has_crc)))
|
124
|
+
padding_size = BinData::Uint32be.read(@data[cursor+6..cursor+9])
|
125
|
+
cursor += 10
|
126
|
+
cursor += 4 if has_crc
|
127
|
+
end
|
128
|
+
# Read all frames
|
129
|
+
id3v2_metadata = {}
|
130
|
+
while ((cursor < cursor_end) and
|
131
|
+
(@data[cursor] != ID3V2_PADDING_CHAR))
|
132
|
+
# We are on a frame
|
133
|
+
frame_id = @data[cursor..cursor+3]
|
134
|
+
frame_size = BinData::Uint32be.read(@data[cursor+4..cursor+7])
|
135
|
+
frame_flags = BinData::Uint16be.read(@data[cursor+8..cursor+9])
|
136
|
+
invalid_data("@#{cursor} - Invalid ID3v2 frame flags: #{frame_flags}.") if ((frame_flags & 0b00011111_00011111) != 0)
|
137
|
+
cursor += 10
|
138
|
+
id3v2_metadata[frame_id] = read_ascii(cursor, cursor+((frame_size > MAX_ID3V2_FRAME_SIZE) ? MAX_ID3V2_FRAME_SIZE : frame_size)-1)
|
139
|
+
cursor += frame_size
|
140
|
+
end
|
141
|
+
metadata( :id3v2_metadata => id3v2_metadata )
|
142
|
+
# Get directly to the previously computed cursor to skip padding
|
143
|
+
log_debug("@#{cursor} - Padding size (#{padding_size}) is different from what is being read (#{cursor_end-cursor}).") if (padding_size != cursor_end-cursor)
|
144
|
+
cursor = cursor_end
|
145
|
+
elsif (@data[cursor..cursor+7] == BEGIN_PATTERN_APEV2)
|
146
|
+
log_debug "=== @#{cursor} - Found APEv2 tag"
|
147
|
+
info = decode_ape_tag_header(cursor)
|
148
|
+
invalid_data("@#{cursor} - APE tag header indicates no header whereas we have one.") if (!info[:has_header])
|
149
|
+
invalid_data("@#{cursor} - APE tag header indicates it is a footer whereas we are on the header.") if (info[:on_footer])
|
150
|
+
cursor += 32
|
151
|
+
cursor_end_tag = cursor + info[:tag_size]
|
152
|
+
ape_metadata = {}
|
153
|
+
info[:nbr_items].times do |idx_item|
|
154
|
+
item_key, item_value, cursor = decode_ape_tag_item(cursor)
|
155
|
+
ape_metadata[item_key] = item_value
|
156
|
+
end
|
157
|
+
invalid_data("@#{cursor} - APE tag header is inconsistent. We should be at cursor #{cursor_end_tag-(info[:has_footer] ? 32 : 0)}") if (cursor != cursor_end_tag-(info[:has_footer] ? 32 : 0))
|
158
|
+
metadata( :apev2_metadata => ape_metadata )
|
159
|
+
if (info[:has_footer])
|
160
|
+
# There is a footer
|
161
|
+
invalid_data("@#{cursor} - Invalid APE tag footer magic.") if (@data[cursor..cursor+7] != BEGIN_PATTERN_APEV2)
|
162
|
+
footer_info = decode_ape_tag_header(cursor)
|
163
|
+
invalid_data("@#{cursor} - APEv2 tag footer indicates no footer whereas we have one.") if (!footer_info[:has_footer])
|
164
|
+
invalid_data("@#{cursor} - APEv2 tag footer indicates it is a header whereas we are on the footer.") if (!footer_info[:on_footer])
|
165
|
+
cursor += 32
|
166
|
+
end
|
167
|
+
else
|
168
|
+
# We might be on a APEv1 tag, or real MP3 data, or at the end of our file.
|
169
|
+
# APEv1 tag occurs only after the last MP3 frame, and before any ID3v1 tag.
|
170
|
+
# APEv1 tag has no header, but a footer.
|
171
|
+
ape_tag_decoded = false
|
172
|
+
if (nbr_ms != 0)
|
173
|
+
# Might be good to check for APEv1 tag
|
174
|
+
cursor_begin = cursor
|
175
|
+
begin
|
176
|
+
ape_metadata = {}
|
177
|
+
nbr_items = 0
|
178
|
+
while (@data[cursor..cursor+7] != BEGIN_PATTERN_APEV2)
|
179
|
+
item_key, item_value, cursor = decode_ape_tag_item(cursor)
|
180
|
+
ape_metadata[item_key] = item_value
|
181
|
+
nbr_items += 1
|
182
|
+
log_debug "=== @#{cursor} - Decoded APEv1 tag item: #{item_key.inspect} => #{item_value[0..31].inspect}"
|
183
|
+
end
|
184
|
+
# Here we are on an APE Tag footer
|
185
|
+
footer_info = decode_ape_tag_header(cursor)
|
186
|
+
invalid_data("@#{cursor} - APEv1 tag footer indicates no footer whereas we have one.") if (!footer_info[:has_footer])
|
187
|
+
invalid_data("@#{cursor} - APEv1 tag footer indicates it is a header whereas we are on the footer.") if (!footer_info[:on_footer])
|
188
|
+
invalid_data("@#{cursor} - APEv1 tag footer indicates #{footer_info[:nbr_items]} tag items, whereas we read #{nbr_items}") if (footer_info[:nbr_items] != nbr_items)
|
189
|
+
log_debug "=== @#{cursor} - Found APEv1 tag"
|
190
|
+
cursor += 32
|
191
|
+
ape_tag_decoded = true
|
192
|
+
metadata( :apev1_metadata => ape_metadata )
|
193
|
+
rescue InvalidDataError, TruncatedDataError, AccessAfterDataError
|
194
|
+
# Maybe it is not an APEv1 tag.
|
195
|
+
# Scratch it and consider a normal MP3 frame.
|
196
|
+
#log_debug("=== @#{cursor_begin} - Failed to decode as APEv1 tag: #{$!}")
|
197
|
+
cursor = cursor_begin
|
198
|
+
ape_tag_decoded = false
|
199
|
+
end
|
200
|
+
end
|
201
|
+
if (!ape_tag_decoded)
|
202
|
+
# Real MP3 data or end of file
|
203
|
+
info = nil
|
204
|
+
begin
|
205
|
+
info = decode_mp3_frame_header(cursor)
|
206
|
+
rescue InvalidDataError
|
207
|
+
if (nbr_ms >= MIN_ACCEPTABLE_TIME_MS)
|
208
|
+
# Consider the file was finished
|
209
|
+
#log_debug "=== @#{cursor} - Garbage data found. Should be end of file."
|
210
|
+
ending_offset = cursor
|
211
|
+
else
|
212
|
+
# Problem
|
213
|
+
raise
|
214
|
+
end
|
215
|
+
end
|
216
|
+
if (ending_offset == nil)
|
217
|
+
#log_debug "=== @#{cursor} - Found MP3 data"
|
218
|
+
# Go see after
|
219
|
+
cursor += info[:size]
|
220
|
+
# Consider we have valid data only if we have enough milliseconds
|
221
|
+
nbr_ms += info[:nbr_ms]
|
222
|
+
if ((!valid) and (nbr_ms >= MIN_ACCEPTABLE_TIME_MS))
|
223
|
+
valid = true
|
224
|
+
found_relevant_data(:mp3)
|
225
|
+
end
|
226
|
+
metadata( :nbr_ms => nbr_ms )
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
if ((nbr_ms >= MIN_ACCEPTABLE_TIME_MS) and
|
231
|
+
(cursor == @end_offset))
|
232
|
+
ending_offset = cursor
|
233
|
+
end
|
234
|
+
progress(cursor)
|
235
|
+
end
|
236
|
+
|
237
|
+
return ending_offset
|
238
|
+
end
|
239
|
+
|
240
|
+
private
|
241
|
+
|
242
|
+
# Decode an MP3 frame header
|
243
|
+
#
|
244
|
+
# Parameters::
|
245
|
+
# * *cursor* (_Fixnum_): The cursor
|
246
|
+
# Result::
|
247
|
+
# * <em>map<Symbol,Object></em>: Corresponding header info
|
248
|
+
def decode_mp3_frame_header(cursor)
|
249
|
+
info = {}
|
250
|
+
# Check the header's values
|
251
|
+
header_bytes = @data[cursor..cursor+3].bytes.to_a
|
252
|
+
invalid_data("@#{cursor} - Invalid MP3 header") if ((header_bytes[0] != 255) or
|
253
|
+
((header_bytes[1] & 224) != 224) or
|
254
|
+
((header_bytes[1] & 24) == 16) or
|
255
|
+
((header_bytes[1] & 6) == 0) or
|
256
|
+
((header_bytes[2] & 240) == 240) or
|
257
|
+
((header_bytes[2] & 12) == 12) or
|
258
|
+
((header_bytes[3] & 3) == 2))
|
259
|
+
invalid_data("@#{cursor} - Invalid MP3 header: can't compute size of free bitrates") if ((header_bytes[2] & 240) == 0)
|
260
|
+
# Read header values to compute the size
|
261
|
+
version = nil
|
262
|
+
case ((header_bytes[1] & 24) >> 3)
|
263
|
+
when 0
|
264
|
+
version = 3
|
265
|
+
when 2
|
266
|
+
version = 2
|
267
|
+
when 3
|
268
|
+
version = 1
|
269
|
+
else
|
270
|
+
invalid_data("@#{cursor} - Unknown version in header: #{((header_bytes[1] & 24) >> 3)}")
|
271
|
+
end
|
272
|
+
layer = 4 - ((header_bytes[1] & 6) >> 1)
|
273
|
+
bit_rate = BITRATE_INDEX[((header_bytes[2] & 240) >> 4)-1][(version == 1) ? layer - 1 : ((layer == 1) ? 3 : 4)] * 1000
|
274
|
+
sample_rate = SAMPLE_RATE_INDEX[(header_bytes[2] & 12) >> 2][version - 1]
|
275
|
+
padding = ((header_bytes[2] & 2) >> 1)
|
276
|
+
# Compute the size
|
277
|
+
info[:size] = (layer == 1) ? ((12 * bit_rate) / sample_rate + padding) * 4 : (144 * bit_rate) / sample_rate + padding
|
278
|
+
info[:nbr_ms] = ((layer == 1) ? 384000 : 1152000) / sample_rate
|
279
|
+
#log_debug "=== @#{cursor} - Read MP3 frame: Version=#{version} Layer=#{layer} BitRate=#{bit_rate} SampleRate=#{sample_rate} Padding=#{padding} FrameLength=#{info[:size]} Milliseconds=#{info[:nbr_ms]}"
|
280
|
+
return info
|
281
|
+
end
|
282
|
+
|
283
|
+
# Decode an APE tag header
|
284
|
+
#
|
285
|
+
# Parameters::
|
286
|
+
# * *cursor* (_Fixnum_): Current cursor
|
287
|
+
# Result::
|
288
|
+
# * <em>map<Symbol,Object></em>: The APE tag info
|
289
|
+
def decode_ape_tag_header(cursor)
|
290
|
+
info = {}
|
291
|
+
#ape_version = BinData::Uint32le.read(@data[cursor+8..cursor+11])
|
292
|
+
info[:tag_size] = BinData::Uint32le.read(@data[cursor+12..cursor+15])
|
293
|
+
info[:nbr_items] = BinData::Uint32le.read(@data[cursor+16..cursor+19])
|
294
|
+
flags = BinData::Uint32le.read(@data[cursor+20..cursor+23])
|
295
|
+
info[:has_header] = ((flags & 0b10000000_00000000_00000000_00000000) != 0)
|
296
|
+
info[:has_footer] = ((flags & 0b01000000_00000000_00000000_00000000) == 0)
|
297
|
+
info[:on_footer] = ((flags & 0b00100000_00000000_00000000_00000000) == 0)
|
298
|
+
invalid_data("@#{cursor} - Invalid APE tag flags: #{flags}") if ((flags & 0b00011111_11111111_11111111_11111000) != 0)
|
299
|
+
reserved = BinData::Uint64le.read(@data[cursor+24..cursor+31])
|
300
|
+
invalid_data("@#{cursor} - Invalid reserved bytes in APE Tag header: #{reserved} should be 0.") if (reserved != 0)
|
301
|
+
return info
|
302
|
+
end
|
303
|
+
|
304
|
+
# Decode an APE tag item
|
305
|
+
#
|
306
|
+
# Parameters::
|
307
|
+
# * *cursor* (_Fixnum_): The cursor
|
308
|
+
# Result::
|
309
|
+
# * _String_: Item key
|
310
|
+
# * _String_: Item value
|
311
|
+
# * _Fixnum_: New cursor
|
312
|
+
def decode_ape_tag_item(cursor)
|
313
|
+
value_size = BinData::Uint32le.read(@data[cursor..cursor+3])
|
314
|
+
flags = BinData::Uint32le.read(@data[cursor+4..cursor+7])
|
315
|
+
invalid_data("@#{cursor} - Invalid APE tag flags: #{flags}") if ((flags & 0b00011111_11111111_11111111_11111000) != 0)
|
316
|
+
cursor_terminator = @data.index(APE_ITEM_KEY_TERMINATOR, cursor+8)
|
317
|
+
invalid_data("@#{cursor} - Could not find the end of APE tag item key.") if (cursor_terminator == nil)
|
318
|
+
invalid_data("@#{cursor} - Empty APE tag item key.") if (cursor_terminator == cursor+8)
|
319
|
+
item_key = @data[cursor+8..cursor_terminator-1]
|
320
|
+
cursor = cursor_terminator + 1
|
321
|
+
item_value = @data[cursor..cursor+value_size-1]
|
322
|
+
cursor += value_size
|
323
|
+
return item_key, item_value, cursor
|
324
|
+
end
|
325
|
+
|
326
|
+
# Read an ASCII value
|
327
|
+
#
|
328
|
+
# Parameters::
|
329
|
+
# * *cursor_begin* (_Fixnum_): The cursor to read from
|
330
|
+
# * *cursor_end* (_Fixnum_): The end of the cursor
|
331
|
+
# Result::
|
332
|
+
# * _String_ or <em>list<String></em>: Resulting string or list of strings if several.
|
333
|
+
def read_ascii(cursor_begin, cursor_end)
|
334
|
+
return @data[cursor_begin..cursor_end].gsub(TRAILING_00_REGEXP, '').strip
|
335
|
+
end
|
336
|
+
|
337
|
+
end
|
338
|
+
|
339
|
+
end
|
340
|
+
|
341
|
+
end
|