fileshunter 0.1.0.20130725

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,387 @@
1
+ require 'fileshunter/Decoders/_FLAC'
2
+
3
+ module FilesHunter
4
+
5
+ module Decoders
6
+
7
+ class FLAC < BeginPatternDecoder
8
+
9
+ BEGIN_PATTERN_FLAC = 'fLaC'.force_encoding(Encoding::ASCII_8BIT)
10
+
11
+ def get_begin_pattern
12
+ return BEGIN_PATTERN_FLAC, { :offset_inc => 4 }
13
+ end
14
+
15
+ def decode(offset)
16
+ ending_offset = nil
17
+
18
+ # Read all Metadata blocks
19
+ cursor = offset+4
20
+ metadata_finished = false
21
+ nbr_bits_per_sample_header = nil
22
+ while (!metadata_finished)
23
+ c = @data[cursor].ord
24
+ metadata_finished = (c > 128)
25
+ metadata_type = (c & 127)
26
+ invalid_data("@#{cursor} - Invalid Metadata type: #{c}") if (metadata_type > 6)
27
+ if (metadata_type == 0)
28
+ nbr_bits_per_sample_header = ((@data[cursor+16].ord & 1) << 4) + ((@data[cursor+17].ord & 240) >> 4) + 1
29
+ end
30
+ metadata_size = BinData::Uint24be.read(@data[cursor+1..cursor+3])
31
+ cursor += 4 + metadata_size
32
+ progress(cursor)
33
+ end
34
+ invalid_data("@#{offset} - Missing METADATA_BLOCK_STREAMINFO from headers") if (nbr_bits_per_sample_header == nil)
35
+ found_relevant_data(:flac)
36
+ metadata(
37
+ :nbr_bits_per_sample_header => nbr_bits_per_sample_header
38
+ )
39
+ # Read frames
40
+ nbr_frames = 0
41
+ while (ending_offset == nil)
42
+ log_debug "@#{cursor} - Reading new frame"
43
+ # Check frame header
44
+ header_bytes = @data[cursor..cursor+4].bytes.to_a
45
+ if ((header_bytes[0] != 255) or
46
+ ((header_bytes[1] & 254) != 248) or
47
+ ((header_bytes[2] & 240) == 0) or
48
+ ((header_bytes[2] & 15) == 15) or
49
+ (header_bytes[3] >= 176) or
50
+ ((header_bytes[3] & 14) == 6) or
51
+ ((header_bytes[3] & 14) == 14) or
52
+ (header_bytes[3].odd?))
53
+ if (nbr_frames == 0)
54
+ invalid_data("@#{cursor} - Incorrect frame header")
55
+ else
56
+ log_debug "@#{cursor} - Incorrect frame header. Consider the file is finished."
57
+ ending_offset = cursor
58
+ end
59
+ else
60
+ utf8_number_size = get_utf8_size(header_bytes[4])
61
+ invalid_data("@#{cursor} - Incorrect UTF-8 size") if ((header_bytes[1].even?) and (utf8_number_size >= 7))
62
+ cursor += 4 + utf8_number_size
63
+ block_size = 0
64
+ block_size_byte = ((header_bytes[2] & 240) >> 4)
65
+ log_debug "@#{cursor} - block_size_byte=#{block_size_byte}"
66
+ case block_size_byte
67
+ when 1
68
+ block_size = 192
69
+ when 2..5
70
+ block_size = 576 * (2**(block_size_byte-2))
71
+ when 6
72
+ # Blocksize is coded here on 8 bits
73
+ block_size = @data[cursor].ord + 1
74
+ cursor += 1
75
+ when 7
76
+ # Blocksize is coded here on 16 bits
77
+ block_size = BinData::Uint16be.read(@data[cursor..cursor+1]) + 1
78
+ cursor += 2
79
+ else
80
+ block_size = 256 * (2**(block_size_byte-8))
81
+ end
82
+ case (header_bytes[2] & 15)
83
+ when 12
84
+ # Sample rate is coded here on 8 bits
85
+ cursor += 1
86
+ when 13, 14
87
+ # Sample rate is coded here on 16 bits
88
+ cursor += 2
89
+ end
90
+ cursor += 1 # CRC
91
+ # Decode some values needed further
92
+ nbr_channels = ((header_bytes[3] & 240) >> 4) + 1
93
+ # Channels encoding side (differences) always have +1 bit per sample
94
+ bps_inc = nil
95
+ case nbr_channels
96
+ when 9, 11
97
+ bps_inc = [ 0, 1 ]
98
+ when 10
99
+ bps_inc = [ 1, 0 ]
100
+ else
101
+ bps_inc = [ 0, 0 ]
102
+ end
103
+ nbr_channels = 2 if (nbr_channels > 8)
104
+ nbr_bits_per_sample_frame_header = 0
105
+ case ((header_bytes[3] & 14) >> 1)
106
+ when 0
107
+ nbr_bits_per_sample_frame_header = nbr_bits_per_sample_header
108
+ when 1
109
+ nbr_bits_per_sample_frame_header = 8
110
+ when 2
111
+ nbr_bits_per_sample_frame_header = 12
112
+ when 4
113
+ nbr_bits_per_sample_frame_header = 16
114
+ when 5
115
+ nbr_bits_per_sample_frame_header = 20
116
+ when 6
117
+ nbr_bits_per_sample_frame_header = 24
118
+ end
119
+ log_debug "@#{cursor} - block_size=#{block_size} nbr_channels=#{nbr_channels} nbr_bits_per_sample_frame_header=#{nbr_bits_per_sample_frame_header} bps_inc=#{bps_inc.inspect}"
120
+ progress(cursor)
121
+ # Here cursor is on the next byte after the frame header
122
+ # We have nbr_channels subframes
123
+ # !!! Starting from here, we have to track bits shifting
124
+ cursor_bits = 0
125
+ nbr_channels.times do |idx_channel|
126
+ nbr_bits_per_sample = nbr_bits_per_sample_frame_header + ((bps_inc[idx_channel] == nil) ? 0 : bps_inc[idx_channel])
127
+ log_debug "@#{cursor},#{cursor_bits} - Reading Subframe"
128
+ nbr_encoded_partitions = 0
129
+ # Decode the sub-frame header
130
+ sub_header_first_byte, cursor, cursor_bits = decode_bits(cursor, cursor_bits, 8)
131
+ invalid_data("@#{cursor},#{cursor_bits} - Invalid Sub frame header: #{sub_header_first_byte}") if ((sub_header_first_byte > 127) or
132
+ ((sub_header_first_byte & 124) == 4) or
133
+ ((sub_header_first_byte & 240) == 8) or
134
+ ((sub_header_first_byte & 96) == 32))
135
+ wasted_bits = 0
136
+ if (sub_header_first_byte.odd?)
137
+ wasted_bits, cursor, cursor_bits = decode_unary(cursor, cursor_bits)
138
+ end
139
+ log_debug "@#{cursor},#{cursor_bits} - Found #{wasted_bits} wasted bits"
140
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, wasted_bits)
141
+ # Now decode the Subframe itself
142
+ if ((sub_header_first_byte & 126) == 0)
143
+ # SUBFRAME_CONSTANT
144
+ log_debug "@#{cursor},#{cursor_bits} - Found Subframe header SUBFRAME_CONSTANT"
145
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, nbr_bits_per_sample)
146
+ elsif ((sub_header_first_byte & 126) == 1)
147
+ # SUBFRAME_VERBATIM
148
+ log_debug "@#{cursor},#{cursor_bits} - Found Subframe header SUBFRAME_VERBATIM"
149
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, nbr_bits_per_sample * block_size)
150
+ elsif ((sub_header_first_byte & 112) == 16)
151
+ # SUBFRAME_FIXED
152
+ order = ((sub_header_first_byte & 14) >> 1)
153
+ invalid_data("@#{cursor},#{cursor_bits} - Invalid SUBFRAME_FIXED") if (order > 4)
154
+ log_debug "@#{cursor},#{cursor_bits} - Found Subframe header SUBFRAME_FIXED of order #{order}"
155
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, nbr_bits_per_sample * order)
156
+ cursor, cursor_bits, nbr_encoded_partitions = decode_residual(cursor, cursor_bits, nbr_bits_per_sample, block_size, order, nbr_encoded_partitions)
157
+ else
158
+ # SUBFRAME_LPC
159
+ lpc_order = ((sub_header_first_byte & 62) >> 1) + 1
160
+ log_debug "@#{cursor},#{cursor_bits} - Found Subframe header SUBFRAME_LPC of order #{lpc_order}"
161
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, nbr_bits_per_sample * lpc_order)
162
+ qlpc_precision, cursor, cursor_bits = decode_bits(cursor, cursor_bits, 4)
163
+ invalid_data("@#{cursor},#{cursor_bits} - Invalid qlpc_precision: #{qlpc_precision}") if (qlpc_precision == 15)
164
+ qlpc_precision += 1
165
+ log_debug "@#{cursor},#{cursor_bits} - qlpc_precision=#{qlpc_precision}"
166
+
167
+ # DEBUG only
168
+ # qlpc_shift, cursor, cursor_bits = decode_bits(cursor, cursor_bits, 5)
169
+ # qlpc_shift = -((qlpc_shift - 1) ^ 31) if ((qlpc_shift & 16) != 0)
170
+ # log_debug "@#{cursor},#{cursor_bits} - qlpc_shift=#{qlpc_shift}"
171
+ # lpc_order.times do |idx_coeff|
172
+ # coeff, cursor, cursor_bits = decode_bits(cursor, cursor_bits, qlpc_precision)
173
+ # # Negative value
174
+ # coeff = -((coeff - 1) ^ ((1 << qlpc_precision) - 1)) if ((coeff & (1 << (qlpc_precision-1))) != 0)
175
+ # log_debug "@#{cursor},#{cursor_bits} - qlpc_coeff[#{idx_coeff}]=#{coeff}"
176
+ # end
177
+ # NON DEBUG only
178
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, 5)
179
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, qlpc_precision * lpc_order)
180
+
181
+ cursor, cursor_bits, nbr_encoded_partitions = decode_residual(cursor, cursor_bits, nbr_bits_per_sample, block_size, lpc_order, nbr_encoded_partitions)
182
+ end
183
+ progress(cursor)
184
+ end
185
+ # We align back to byte
186
+ cursor += 1 if (cursor_bits > 0)
187
+ # Frame footer
188
+ cursor += 2
189
+ progress(cursor)
190
+ nbr_frames += 1
191
+ ending_offset = cursor if (cursor == @end_offset)
192
+ end
193
+ end
194
+ metadata(
195
+ :nbr_frames => nbr_frames
196
+ )
197
+
198
+ return ending_offset
199
+ end
200
+
201
+ private
202
+
203
+ # Get number of bytes taken by an UTF-8 character that has the given byte as the first one.
204
+ #
205
+ # Parameters::
206
+ # * *first_utf8_byte* (_Fixnum_): The first UTF-8 byte
207
+ # Result::
208
+ # * _Fixnum_: The total size of the UTF-8 character
209
+ def get_utf8_size(first_utf8_byte)
210
+ return 1 if (first_utf8_byte < 128)
211
+ raise "Invalid variable UTF-8 byte encoded: #{first_utf8_byte} (is a UTF-16 character)" if ((first_utf8_byte & 192) == 128)
212
+ size = 2
213
+ while ((first_utf8_byte & (1 << (7-size))) != 0)
214
+ size += 1
215
+ raise "Invalid variable UTF-8 byte encoded: #{first_utf8_byte}" if (size > 7)
216
+ end
217
+ return size
218
+ end
219
+
220
+ # Get position (in binary terms) of the next bit set to 1 in data.
221
+ # Return nil if none found.
222
+ #
223
+ # Parameters::
224
+ # * *data* (_String_): The data to analyze
225
+ # * *idx_bit_begin_search* (_Fixnum_): Index of the first bit to begin search (has to be < 32)
226
+ # Result::
227
+ # * _Fixnum_: The position of the first 1. For example: 001 would return 2
228
+ def find_bit(data, idx_bit_begin_search)
229
+ data_32bits = data.unpack('N*')
230
+ # Mask the ignored bits with 0
231
+ data_32bits[0] = data_32bits[0] & ((1 << (32-idx_bit_begin_search))-1) if (idx_bit_begin_search > 0)
232
+ idx_not_null = data_32bits.find_index { |v| v != 0 }
233
+ return nil if (idx_not_null == nil)
234
+ not_null = data_32bits[idx_not_null]
235
+ position_in_32bits = 0
236
+ mask = (1 << 31)
237
+ while ((not_null & mask) == 0)
238
+ position_in_32bits += 1
239
+ mask = mask >> 1
240
+ end
241
+ return idx_not_null*32 + position_in_32bits
242
+ end
243
+
244
+ # Decode the next value as unary encoded (0 padding, ending with 1)
245
+ #
246
+ # Parameters::
247
+ # * *cursor* (_Fixnum_): Current cursor
248
+ # * *cursor_bits* (_Fixnum_): Current cursor_bits
249
+ # Result::
250
+ # * _Fixnum_: Value
251
+ # * _Fixnum_: New cursor
252
+ # * _Fixnum_: New cursor_bits
253
+ def decode_unary(cursor, cursor_bits)
254
+ # There are some wasted bits-per-sample: count them
255
+ value = 1
256
+ first_block = true
257
+ @data.each_block(cursor) do |data_block|
258
+ bit_position_in_block = find_bit(data_block, first_block ? cursor_bits : 0)
259
+ if (bit_position_in_block == nil)
260
+ value += 8 * data_block.size
261
+ value -= cursor_bits if first_block
262
+ else
263
+ # We found it
264
+ value += bit_position_in_block
265
+ value -= cursor_bits if first_block
266
+ break
267
+ end
268
+ first_block = false
269
+ end
270
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, value)
271
+
272
+ return value, cursor, cursor_bits
273
+ end
274
+
275
+ # Increment cursor and cursor_bits by a given amount of bits
276
+ #
277
+ # Parameters::
278
+ # * *cursor* (_Fixnum_): The cursor in bytes
279
+ # * *cursor_bits* (_Fixnum_): The cursor in bits
280
+ # * *nbr_bits* (_Fixnum_): The number of bits
281
+ # Result::
282
+ # * _Fixnum_: The new cursor
283
+ # * _Fixnum_: The new cursor_bits
284
+ def inc_cursor_bits(cursor, cursor_bits, nbr_bits)
285
+ nbr_bytes, result_cursor_bits = (cursor_bits + nbr_bits).divmod(8)
286
+ return cursor + nbr_bytes, result_cursor_bits
287
+ end
288
+
289
+ # Increment cursor and cursor_bits by reading a RESIDUAL section
290
+ #
291
+ # Parameters::
292
+ # * *cursor* (_Fixnum_): The cursor in bytes
293
+ # * *cursor_bits* (_Fixnum_): The cursor in bits
294
+ # * *bits_per_sample* (_Fixnum_): Number of bits per sample
295
+ # * *block_size* (_Fixnum_): The block size
296
+ # * *predictor_order* (_Fixnum_): The predictor order
297
+ # * *nbr_encoded_partitions* (_Fixnum_): The number of encoded partitions
298
+ # Result::
299
+ # * _Fixnum_: The new cursor
300
+ # * _Fixnum_: The new cursor_bits
301
+ # * _Fixnum_: The number of encoded partitions
302
+ def decode_residual(cursor, cursor_bits, bits_per_sample, block_size, predictor_order, nbr_encoded_partitions)
303
+ method_id, cursor, cursor_bits = decode_bits(cursor, cursor_bits, 2)
304
+
305
+ invalid_data("@#{cursor},#{cursor_bits} - Invalid Residual method id: #{method_id}") if (method_id > 1)
306
+ rice_parameter_size = 4 + method_id
307
+ partition_order, cursor, cursor_bits = decode_bits(cursor, cursor_bits, 4)
308
+ log_debug "@#{cursor},#{cursor_bits} - Found residual with method_id=#{method_id} rice_parameter_size=#{rice_parameter_size} partition_order=#{partition_order}"
309
+ nbr_partitions = 2**partition_order
310
+ nbr_partitions.times do |idx_partition|
311
+ log_debug "@#{cursor},#{cursor_bits} - Decode partition"
312
+ rice_parameter, cursor, cursor_bits = decode_bits(cursor, cursor_bits, rice_parameter_size)
313
+ partition_bits_per_sample, cursor, cursor_bits = decode_bits(cursor, cursor_bits, 5) if (rice_parameter == 15)
314
+ nbr_samples = nil
315
+ if (partition_order == 0)
316
+ nbr_samples = block_size - predictor_order
317
+ elsif (nbr_encoded_partitions > 0)
318
+ nbr_samples = block_size / nbr_partitions
319
+ else
320
+ nbr_samples = (block_size / nbr_partitions) - predictor_order
321
+ end
322
+ log_debug "@#{cursor},#{cursor_bits} - Begin decoding Rice samples: rice_parameter=#{rice_parameter} partition_bits_per_sample=#{partition_bits_per_sample} nbr_samples=#{nbr_samples}"
323
+ if (partition_bits_per_sample == nil)
324
+ # Samples encoded using Unary high values and rice_parameter length low values.
325
+ # See http://www.hydrogenaudio.org/forums//lofiversion/index.php/t81718.html
326
+ cursor, cursor_bits = decode_rice(cursor, cursor_bits, nbr_samples, rice_parameter)
327
+ # Ruby version, very slow
328
+ # nbr_samples.times do |idx_sample|
329
+
330
+ # # DEBUG only
331
+ # # value_high, cursor, cursor_bits = decode_unary(cursor, cursor_bits)
332
+ # # value_low, cursor, cursor_bits = decode_bits(cursor, cursor_bits, rice_parameter)
333
+ # # value = ((value_high-1) << (rice_parameter-1)) + (value_low >> 1)
334
+ # # value = -value-1 if (value_low.odd?)
335
+ # # log_debug "@#{cursor},#{cursor_bits} - Residual[#{idx_sample}]=#{value}"
336
+ # # NON DEBUG only
337
+ # _, cursor, cursor_bits = decode_unary(cursor, cursor_bits)
338
+ # _, cursor, cursor_bits = decode_bits(cursor, cursor_bits, rice_parameter)
339
+
340
+ # end
341
+ else
342
+ # Fixed-size encoded samples
343
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, nbr_samples * partition_bits_per_sample)
344
+ end
345
+ nbr_encoded_partitions += 1
346
+ progress(cursor)
347
+ end
348
+
349
+ return cursor, cursor_bits, nbr_encoded_partitions
350
+ end
351
+
352
+ # Decode the next n bits and increment cursor and cursor_bits accordingly
353
+ #
354
+ # Parameters::
355
+ # * *cursor* (_Fixnum_): The cursor in bytes
356
+ # * *cursor_bits* (_Fixnum_): The cursor in bits
357
+ # * *nbr_bits* (_Fixnum_): The number of bits to decode (has to be maximum 24)
358
+ # Result::
359
+ # * _Fixnum_: The decoded value
360
+ # * _Fixnum_: The new cursor
361
+ # * _Fixnum_: The new cursor_bits
362
+ def decode_bits(cursor, cursor_bits, nbr_bits)
363
+ value = nil
364
+ nbr_bits_to_read = cursor_bits + nbr_bits
365
+ if (nbr_bits_to_read > 24)
366
+ # The value is split between 4 bytes
367
+ value = (BinData::Uint32be.read(@data[cursor..cursor+3]) >> (32-nbr_bits_to_read)) & ((1 << nbr_bits)-1)
368
+ elsif (nbr_bits_to_read > 16)
369
+ # The value is split between 3 bytes
370
+ value = (BinData::Uint24be.read(@data[cursor..cursor+2]) >> (24-nbr_bits_to_read)) & ((1 << nbr_bits)-1)
371
+ elsif (nbr_bits_to_read > 8)
372
+ # The value is split between 2 bytes
373
+ value = (BinData::Uint16be.read(@data[cursor..cursor+1]) >> (16-nbr_bits_to_read)) & ((1 << nbr_bits)-1)
374
+ else
375
+ # The value is accessible through the same byte (@data[cursor])
376
+ value = (@data[cursor].ord >> (8-nbr_bits_to_read)) & ((1 << nbr_bits)-1)
377
+ end
378
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, nbr_bits)
379
+
380
+ return value, cursor, cursor_bits
381
+ end
382
+
383
+ end
384
+
385
+ end
386
+
387
+ end
@@ -0,0 +1,71 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ class ICO < BeginPatternDecoder
6
+
7
+ BEGIN_PATTERN_ICO = Regexp.new("\x00\x00[\x01\x02]\x00.....\x00", nil, 'n')
8
+
9
+ ALLOWED_BPP_VALUES = [ 0, 1, 4, 8, 16, 24, 32 ]
10
+
11
+ def get_begin_pattern
12
+ return BEGIN_PATTERN_ICO, { :offset_inc => 3, :max_regexp_size => 10 }
13
+ end
14
+
15
+ def decode(offset)
16
+ ending_offset = nil
17
+
18
+ extension = ((@data[offset+2] == "\x01") ? :ico : :cur)
19
+ nbr_images = BinData::Uint16le.read(@data[offset+4..offset+5])
20
+ invalid_data("@#{offset} - Number of images is 0") if (nbr_images == 0)
21
+ cursor = offset + 6
22
+ # Read all image headers
23
+ images = []
24
+ nbr_images.times do |idx_image|
25
+ #image_width = @data[cursor].ord
26
+ #image_height = @data[cursor+1].ord
27
+ nbr_colors = @data[cursor+2].ord
28
+ invalid_data("@#{cursor} - Invalid ICONDIRENTRY header") if (@data[cursor+3].ord != 0)
29
+ color_planes = BinData::Uint16le.read(@data[cursor+4..cursor+5])
30
+ invalid_data("@#{cursor} - Invalid color planes") if ((extension == :ico) and (color_planes > 1))
31
+ bpp = BinData::Uint16le.read(@data[cursor+6..cursor+7])
32
+ invalid_data("@#{cursor} - Invalid bpp value") if ((extension == :ico) and (!ALLOWED_BPP_VALUES.include?(bpp)))
33
+ invalid_data("@#{cursor} - Invalid number of colors") if ((extension == :ico) and (bpp >= 8) and (nbr_colors != 0))
34
+ image_size = BinData::Uint32le.read(@data[cursor+8..cursor+11])
35
+ invalid_data("@#{cursor} - Invalid image size") if (image_size == 0)
36
+ image_offset = BinData::Uint32le.read(@data[cursor+12..cursor+15])
37
+ images << [ image_offset, image_size ]
38
+ cursor += 16
39
+ end
40
+ progress(cursor)
41
+ # Make sure images are not overlapping
42
+ next_offset_min = cursor-offset
43
+ images.sort.each do |image_offset, image_size|
44
+ invalid_data("@#{cursor} - Invalid image offset: #{image_offset} could not be before #{next_offset_min} as it belongs to another image") if (image_offset < next_offset_min)
45
+ next_offset_min += image_size
46
+ end
47
+ # OK now we consider we might have a valid file
48
+ log_debug "@#{cursor} - #{extension.to_s} file with #{nbr_images} images."
49
+ found_relevant_data(extension)
50
+ metadata(
51
+ :nbr_images => nbr_images
52
+ )
53
+ cursor = offset + next_offset_min
54
+ progress(cursor)
55
+ ending_offset = cursor
56
+ # # Decode each image
57
+ # images.each do |image_offset, image_size|
58
+ # invalid_data("@#{cursor} - Image offset (#{image_offset}) should be #{cursor-offset}") if (cursor-offset != image_offset)
59
+ # cursor += image_size
60
+ # progress(cursor)
61
+ # end
62
+ # ending_offset = cursor
63
+
64
+ return ending_offset
65
+ end
66
+
67
+ end
68
+
69
+ end
70
+
71
+ end