fileshunter 0.1.0.20130725

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,387 @@
1
+ require 'fileshunter/Decoders/_FLAC'
2
+
3
+ module FilesHunter
4
+
5
+ module Decoders
6
+
7
+ class FLAC < BeginPatternDecoder
8
+
9
+ BEGIN_PATTERN_FLAC = 'fLaC'.force_encoding(Encoding::ASCII_8BIT)
10
+
11
+ def get_begin_pattern
12
+ return BEGIN_PATTERN_FLAC, { :offset_inc => 4 }
13
+ end
14
+
15
+ def decode(offset)
16
+ ending_offset = nil
17
+
18
+ # Read all Metadata blocks
19
+ cursor = offset+4
20
+ metadata_finished = false
21
+ nbr_bits_per_sample_header = nil
22
+ while (!metadata_finished)
23
+ c = @data[cursor].ord
24
+ metadata_finished = (c > 128)
25
+ metadata_type = (c & 127)
26
+ invalid_data("@#{cursor} - Invalid Metadata type: #{c}") if (metadata_type > 6)
27
+ if (metadata_type == 0)
28
+ nbr_bits_per_sample_header = ((@data[cursor+16].ord & 1) << 4) + ((@data[cursor+17].ord & 240) >> 4) + 1
29
+ end
30
+ metadata_size = BinData::Uint24be.read(@data[cursor+1..cursor+3])
31
+ cursor += 4 + metadata_size
32
+ progress(cursor)
33
+ end
34
+ invalid_data("@#{offset} - Missing METADATA_BLOCK_STREAMINFO from headers") if (nbr_bits_per_sample_header == nil)
35
+ found_relevant_data(:flac)
36
+ metadata(
37
+ :nbr_bits_per_sample_header => nbr_bits_per_sample_header
38
+ )
39
+ # Read frames
40
+ nbr_frames = 0
41
+ while (ending_offset == nil)
42
+ log_debug "@#{cursor} - Reading new frame"
43
+ # Check frame header
44
+ header_bytes = @data[cursor..cursor+4].bytes.to_a
45
+ if ((header_bytes[0] != 255) or
46
+ ((header_bytes[1] & 254) != 248) or
47
+ ((header_bytes[2] & 240) == 0) or
48
+ ((header_bytes[2] & 15) == 15) or
49
+ (header_bytes[3] >= 176) or
50
+ ((header_bytes[3] & 14) == 6) or
51
+ ((header_bytes[3] & 14) == 14) or
52
+ (header_bytes[3].odd?))
53
+ if (nbr_frames == 0)
54
+ invalid_data("@#{cursor} - Incorrect frame header")
55
+ else
56
+ log_debug "@#{cursor} - Incorrect frame header. Consider the file is finished."
57
+ ending_offset = cursor
58
+ end
59
+ else
60
+ utf8_number_size = get_utf8_size(header_bytes[4])
61
+ invalid_data("@#{cursor} - Incorrect UTF-8 size") if ((header_bytes[1].even?) and (utf8_number_size >= 7))
62
+ cursor += 4 + utf8_number_size
63
+ block_size = 0
64
+ block_size_byte = ((header_bytes[2] & 240) >> 4)
65
+ log_debug "@#{cursor} - block_size_byte=#{block_size_byte}"
66
+ case block_size_byte
67
+ when 1
68
+ block_size = 192
69
+ when 2..5
70
+ block_size = 576 * (2**(block_size_byte-2))
71
+ when 6
72
+ # Blocksize is coded here on 8 bits
73
+ block_size = @data[cursor].ord + 1
74
+ cursor += 1
75
+ when 7
76
+ # Blocksize is coded here on 16 bits
77
+ block_size = BinData::Uint16be.read(@data[cursor..cursor+1]) + 1
78
+ cursor += 2
79
+ else
80
+ block_size = 256 * (2**(block_size_byte-8))
81
+ end
82
+ case (header_bytes[2] & 15)
83
+ when 12
84
+ # Sample rate is coded here on 8 bits
85
+ cursor += 1
86
+ when 13, 14
87
+ # Sample rate is coded here on 16 bits
88
+ cursor += 2
89
+ end
90
+ cursor += 1 # CRC
91
+ # Decode some values needed further
92
+ nbr_channels = ((header_bytes[3] & 240) >> 4) + 1
93
+ # Channels encoding side (differences) always have +1 bit per sample
94
+ bps_inc = nil
95
+ case nbr_channels
96
+ when 9, 11
97
+ bps_inc = [ 0, 1 ]
98
+ when 10
99
+ bps_inc = [ 1, 0 ]
100
+ else
101
+ bps_inc = [ 0, 0 ]
102
+ end
103
+ nbr_channels = 2 if (nbr_channels > 8)
104
+ nbr_bits_per_sample_frame_header = 0
105
+ case ((header_bytes[3] & 14) >> 1)
106
+ when 0
107
+ nbr_bits_per_sample_frame_header = nbr_bits_per_sample_header
108
+ when 1
109
+ nbr_bits_per_sample_frame_header = 8
110
+ when 2
111
+ nbr_bits_per_sample_frame_header = 12
112
+ when 4
113
+ nbr_bits_per_sample_frame_header = 16
114
+ when 5
115
+ nbr_bits_per_sample_frame_header = 20
116
+ when 6
117
+ nbr_bits_per_sample_frame_header = 24
118
+ end
119
+ log_debug "@#{cursor} - block_size=#{block_size} nbr_channels=#{nbr_channels} nbr_bits_per_sample_frame_header=#{nbr_bits_per_sample_frame_header} bps_inc=#{bps_inc.inspect}"
120
+ progress(cursor)
121
+ # Here cursor is on the next byte after the frame header
122
+ # We have nbr_channels subframes
123
+ # !!! Starting from here, we have to track bits shifting
124
+ cursor_bits = 0
125
+ nbr_channels.times do |idx_channel|
126
+ nbr_bits_per_sample = nbr_bits_per_sample_frame_header + ((bps_inc[idx_channel] == nil) ? 0 : bps_inc[idx_channel])
127
+ log_debug "@#{cursor},#{cursor_bits} - Reading Subframe"
128
+ nbr_encoded_partitions = 0
129
+ # Decode the sub-frame header
130
+ sub_header_first_byte, cursor, cursor_bits = decode_bits(cursor, cursor_bits, 8)
131
+ invalid_data("@#{cursor},#{cursor_bits} - Invalid Sub frame header: #{sub_header_first_byte}") if ((sub_header_first_byte > 127) or
132
+ ((sub_header_first_byte & 124) == 4) or
133
+ ((sub_header_first_byte & 240) == 8) or
134
+ ((sub_header_first_byte & 96) == 32))
135
+ wasted_bits = 0
136
+ if (sub_header_first_byte.odd?)
137
+ wasted_bits, cursor, cursor_bits = decode_unary(cursor, cursor_bits)
138
+ end
139
+ log_debug "@#{cursor},#{cursor_bits} - Found #{wasted_bits} wasted bits"
140
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, wasted_bits)
141
+ # Now decode the Subframe itself
142
+ if ((sub_header_first_byte & 126) == 0)
143
+ # SUBFRAME_CONSTANT
144
+ log_debug "@#{cursor},#{cursor_bits} - Found Subframe header SUBFRAME_CONSTANT"
145
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, nbr_bits_per_sample)
146
+ elsif ((sub_header_first_byte & 126) == 1)
147
+ # SUBFRAME_VERBATIM
148
+ log_debug "@#{cursor},#{cursor_bits} - Found Subframe header SUBFRAME_VERBATIM"
149
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, nbr_bits_per_sample * block_size)
150
+ elsif ((sub_header_first_byte & 112) == 16)
151
+ # SUBFRAME_FIXED
152
+ order = ((sub_header_first_byte & 14) >> 1)
153
+ invalid_data("@#{cursor},#{cursor_bits} - Invalid SUBFRAME_FIXED") if (order > 4)
154
+ log_debug "@#{cursor},#{cursor_bits} - Found Subframe header SUBFRAME_FIXED of order #{order}"
155
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, nbr_bits_per_sample * order)
156
+ cursor, cursor_bits, nbr_encoded_partitions = decode_residual(cursor, cursor_bits, nbr_bits_per_sample, block_size, order, nbr_encoded_partitions)
157
+ else
158
+ # SUBFRAME_LPC
159
+ lpc_order = ((sub_header_first_byte & 62) >> 1) + 1
160
+ log_debug "@#{cursor},#{cursor_bits} - Found Subframe header SUBFRAME_LPC of order #{lpc_order}"
161
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, nbr_bits_per_sample * lpc_order)
162
+ qlpc_precision, cursor, cursor_bits = decode_bits(cursor, cursor_bits, 4)
163
+ invalid_data("@#{cursor},#{cursor_bits} - Invalid qlpc_precision: #{qlpc_precision}") if (qlpc_precision == 15)
164
+ qlpc_precision += 1
165
+ log_debug "@#{cursor},#{cursor_bits} - qlpc_precision=#{qlpc_precision}"
166
+
167
+ # DEBUG only
168
+ # qlpc_shift, cursor, cursor_bits = decode_bits(cursor, cursor_bits, 5)
169
+ # qlpc_shift = -((qlpc_shift - 1) ^ 31) if ((qlpc_shift & 16) != 0)
170
+ # log_debug "@#{cursor},#{cursor_bits} - qlpc_shift=#{qlpc_shift}"
171
+ # lpc_order.times do |idx_coeff|
172
+ # coeff, cursor, cursor_bits = decode_bits(cursor, cursor_bits, qlpc_precision)
173
+ # # Negative value
174
+ # coeff = -((coeff - 1) ^ ((1 << qlpc_precision) - 1)) if ((coeff & (1 << (qlpc_precision-1))) != 0)
175
+ # log_debug "@#{cursor},#{cursor_bits} - qlpc_coeff[#{idx_coeff}]=#{coeff}"
176
+ # end
177
+ # NON DEBUG only
178
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, 5)
179
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, qlpc_precision * lpc_order)
180
+
181
+ cursor, cursor_bits, nbr_encoded_partitions = decode_residual(cursor, cursor_bits, nbr_bits_per_sample, block_size, lpc_order, nbr_encoded_partitions)
182
+ end
183
+ progress(cursor)
184
+ end
185
+ # We align back to byte
186
+ cursor += 1 if (cursor_bits > 0)
187
+ # Frame footer
188
+ cursor += 2
189
+ progress(cursor)
190
+ nbr_frames += 1
191
+ ending_offset = cursor if (cursor == @end_offset)
192
+ end
193
+ end
194
+ metadata(
195
+ :nbr_frames => nbr_frames
196
+ )
197
+
198
+ return ending_offset
199
+ end
200
+
201
+ private
202
+
203
+ # Get number of bytes taken by an UTF-8 character that has the given byte as the first one.
204
+ #
205
+ # Parameters::
206
+ # * *first_utf8_byte* (_Fixnum_): The first UTF-8 byte
207
+ # Result::
208
+ # * _Fixnum_: The total size of the UTF-8 character
209
+ def get_utf8_size(first_utf8_byte)
210
+ return 1 if (first_utf8_byte < 128)
211
+ raise "Invalid variable UTF-8 byte encoded: #{first_utf8_byte} (is a UTF-16 character)" if ((first_utf8_byte & 192) == 128)
212
+ size = 2
213
+ while ((first_utf8_byte & (1 << (7-size))) != 0)
214
+ size += 1
215
+ raise "Invalid variable UTF-8 byte encoded: #{first_utf8_byte}" if (size > 7)
216
+ end
217
+ return size
218
+ end
219
+
220
+ # Get position (in binary terms) of the next bit set to 1 in data.
221
+ # Return nil if none found.
222
+ #
223
+ # Parameters::
224
+ # * *data* (_String_): The data to analyze
225
+ # * *idx_bit_begin_search* (_Fixnum_): Index of the first bit to begin search (has to be < 32)
226
+ # Result::
227
+ # * _Fixnum_: The position of the first 1. For example: 001 would return 2
228
+ def find_bit(data, idx_bit_begin_search)
229
+ data_32bits = data.unpack('N*')
230
+ # Mask the ignored bits with 0
231
+ data_32bits[0] = data_32bits[0] & ((1 << (32-idx_bit_begin_search))-1) if (idx_bit_begin_search > 0)
232
+ idx_not_null = data_32bits.find_index { |v| v != 0 }
233
+ return nil if (idx_not_null == nil)
234
+ not_null = data_32bits[idx_not_null]
235
+ position_in_32bits = 0
236
+ mask = (1 << 31)
237
+ while ((not_null & mask) == 0)
238
+ position_in_32bits += 1
239
+ mask = mask >> 1
240
+ end
241
+ return idx_not_null*32 + position_in_32bits
242
+ end
243
+
244
+ # Decode the next value as unary encoded (0 padding, ending with 1)
245
+ #
246
+ # Parameters::
247
+ # * *cursor* (_Fixnum_): Current cursor
248
+ # * *cursor_bits* (_Fixnum_): Current cursor_bits
249
+ # Result::
250
+ # * _Fixnum_: Value
251
+ # * _Fixnum_: New cursor
252
+ # * _Fixnum_: New cursor_bits
253
+ def decode_unary(cursor, cursor_bits)
254
+ # There are some wasted bits-per-sample: count them
255
+ value = 1
256
+ first_block = true
257
+ @data.each_block(cursor) do |data_block|
258
+ bit_position_in_block = find_bit(data_block, first_block ? cursor_bits : 0)
259
+ if (bit_position_in_block == nil)
260
+ value += 8 * data_block.size
261
+ value -= cursor_bits if first_block
262
+ else
263
+ # We found it
264
+ value += bit_position_in_block
265
+ value -= cursor_bits if first_block
266
+ break
267
+ end
268
+ first_block = false
269
+ end
270
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, value)
271
+
272
+ return value, cursor, cursor_bits
273
+ end
274
+
275
+ # Increment cursor and cursor_bits by a given amount of bits
276
+ #
277
+ # Parameters::
278
+ # * *cursor* (_Fixnum_): The cursor in bytes
279
+ # * *cursor_bits* (_Fixnum_): The cursor in bits
280
+ # * *nbr_bits* (_Fixnum_): The number of bits
281
+ # Result::
282
+ # * _Fixnum_: The new cursor
283
+ # * _Fixnum_: The new cursor_bits
284
+ def inc_cursor_bits(cursor, cursor_bits, nbr_bits)
285
+ nbr_bytes, result_cursor_bits = (cursor_bits + nbr_bits).divmod(8)
286
+ return cursor + nbr_bytes, result_cursor_bits
287
+ end
288
+
289
+ # Increment cursor and cursor_bits by reading a RESIDUAL section
290
+ #
291
+ # Parameters::
292
+ # * *cursor* (_Fixnum_): The cursor in bytes
293
+ # * *cursor_bits* (_Fixnum_): The cursor in bits
294
+ # * *bits_per_sample* (_Fixnum_): Number of bits per sample
295
+ # * *block_size* (_Fixnum_): The block size
296
+ # * *predictor_order* (_Fixnum_): The predictor order
297
+ # * *nbr_encoded_partitions* (_Fixnum_): The number of encoded partitions
298
+ # Result::
299
+ # * _Fixnum_: The new cursor
300
+ # * _Fixnum_: The new cursor_bits
301
+ # * _Fixnum_: The number of encoded partitions
302
+ def decode_residual(cursor, cursor_bits, bits_per_sample, block_size, predictor_order, nbr_encoded_partitions)
303
+ method_id, cursor, cursor_bits = decode_bits(cursor, cursor_bits, 2)
304
+
305
+ invalid_data("@#{cursor},#{cursor_bits} - Invalid Residual method id: #{method_id}") if (method_id > 1)
306
+ rice_parameter_size = 4 + method_id
307
+ partition_order, cursor, cursor_bits = decode_bits(cursor, cursor_bits, 4)
308
+ log_debug "@#{cursor},#{cursor_bits} - Found residual with method_id=#{method_id} rice_parameter_size=#{rice_parameter_size} partition_order=#{partition_order}"
309
+ nbr_partitions = 2**partition_order
310
+ nbr_partitions.times do |idx_partition|
311
+ log_debug "@#{cursor},#{cursor_bits} - Decode partition"
312
+ rice_parameter, cursor, cursor_bits = decode_bits(cursor, cursor_bits, rice_parameter_size)
313
+ partition_bits_per_sample, cursor, cursor_bits = decode_bits(cursor, cursor_bits, 5) if (rice_parameter == 15)
314
+ nbr_samples = nil
315
+ if (partition_order == 0)
316
+ nbr_samples = block_size - predictor_order
317
+ elsif (nbr_encoded_partitions > 0)
318
+ nbr_samples = block_size / nbr_partitions
319
+ else
320
+ nbr_samples = (block_size / nbr_partitions) - predictor_order
321
+ end
322
+ log_debug "@#{cursor},#{cursor_bits} - Begin decoding Rice samples: rice_parameter=#{rice_parameter} partition_bits_per_sample=#{partition_bits_per_sample} nbr_samples=#{nbr_samples}"
323
+ if (partition_bits_per_sample == nil)
324
+ # Samples encoded using Unary high values and rice_parameter length low values.
325
+ # See http://www.hydrogenaudio.org/forums//lofiversion/index.php/t81718.html
326
+ cursor, cursor_bits = decode_rice(cursor, cursor_bits, nbr_samples, rice_parameter)
327
+ # Ruby version, very slow
328
+ # nbr_samples.times do |idx_sample|
329
+
330
+ # # DEBUG only
331
+ # # value_high, cursor, cursor_bits = decode_unary(cursor, cursor_bits)
332
+ # # value_low, cursor, cursor_bits = decode_bits(cursor, cursor_bits, rice_parameter)
333
+ # # value = ((value_high-1) << (rice_parameter-1)) + (value_low >> 1)
334
+ # # value = -value-1 if (value_low.odd?)
335
+ # # log_debug "@#{cursor},#{cursor_bits} - Residual[#{idx_sample}]=#{value}"
336
+ # # NON DEBUG only
337
+ # _, cursor, cursor_bits = decode_unary(cursor, cursor_bits)
338
+ # _, cursor, cursor_bits = decode_bits(cursor, cursor_bits, rice_parameter)
339
+
340
+ # end
341
+ else
342
+ # Fixed-size encoded samples
343
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, nbr_samples * partition_bits_per_sample)
344
+ end
345
+ nbr_encoded_partitions += 1
346
+ progress(cursor)
347
+ end
348
+
349
+ return cursor, cursor_bits, nbr_encoded_partitions
350
+ end
351
+
352
+ # Decode the next n bits and increment cursor and cursor_bits accordingly
353
+ #
354
+ # Parameters::
355
+ # * *cursor* (_Fixnum_): The cursor in bytes
356
+ # * *cursor_bits* (_Fixnum_): The cursor in bits
357
+ # * *nbr_bits* (_Fixnum_): The number of bits to decode (has to be maximum 24)
358
+ # Result::
359
+ # * _Fixnum_: The decoded value
360
+ # * _Fixnum_: The new cursor
361
+ # * _Fixnum_: The new cursor_bits
362
+ def decode_bits(cursor, cursor_bits, nbr_bits)
363
+ value = nil
364
+ nbr_bits_to_read = cursor_bits + nbr_bits
365
+ if (nbr_bits_to_read > 24)
366
+ # The value is split between 4 bytes
367
+ value = (BinData::Uint32be.read(@data[cursor..cursor+3]) >> (32-nbr_bits_to_read)) & ((1 << nbr_bits)-1)
368
+ elsif (nbr_bits_to_read > 16)
369
+ # The value is split between 3 bytes
370
+ value = (BinData::Uint24be.read(@data[cursor..cursor+2]) >> (24-nbr_bits_to_read)) & ((1 << nbr_bits)-1)
371
+ elsif (nbr_bits_to_read > 8)
372
+ # The value is split between 2 bytes
373
+ value = (BinData::Uint16be.read(@data[cursor..cursor+1]) >> (16-nbr_bits_to_read)) & ((1 << nbr_bits)-1)
374
+ else
375
+ # The value is accessible through the same byte (@data[cursor])
376
+ value = (@data[cursor].ord >> (8-nbr_bits_to_read)) & ((1 << nbr_bits)-1)
377
+ end
378
+ cursor, cursor_bits = inc_cursor_bits(cursor, cursor_bits, nbr_bits)
379
+
380
+ return value, cursor, cursor_bits
381
+ end
382
+
383
+ end
384
+
385
+ end
386
+
387
+ end
@@ -0,0 +1,71 @@
1
+ module FilesHunter
2
+
3
+ module Decoders
4
+
5
+ class ICO < BeginPatternDecoder
6
+
7
+ BEGIN_PATTERN_ICO = Regexp.new("\x00\x00[\x01\x02]\x00.....\x00", nil, 'n')
8
+
9
+ ALLOWED_BPP_VALUES = [ 0, 1, 4, 8, 16, 24, 32 ]
10
+
11
+ def get_begin_pattern
12
+ return BEGIN_PATTERN_ICO, { :offset_inc => 3, :max_regexp_size => 10 }
13
+ end
14
+
15
+ def decode(offset)
16
+ ending_offset = nil
17
+
18
+ extension = ((@data[offset+2] == "\x01") ? :ico : :cur)
19
+ nbr_images = BinData::Uint16le.read(@data[offset+4..offset+5])
20
+ invalid_data("@#{offset} - Number of images is 0") if (nbr_images == 0)
21
+ cursor = offset + 6
22
+ # Read all image headers
23
+ images = []
24
+ nbr_images.times do |idx_image|
25
+ #image_width = @data[cursor].ord
26
+ #image_height = @data[cursor+1].ord
27
+ nbr_colors = @data[cursor+2].ord
28
+ invalid_data("@#{cursor} - Invalid ICONDIRENTRY header") if (@data[cursor+3].ord != 0)
29
+ color_planes = BinData::Uint16le.read(@data[cursor+4..cursor+5])
30
+ invalid_data("@#{cursor} - Invalid color planes") if ((extension == :ico) and (color_planes > 1))
31
+ bpp = BinData::Uint16le.read(@data[cursor+6..cursor+7])
32
+ invalid_data("@#{cursor} - Invalid bpp value") if ((extension == :ico) and (!ALLOWED_BPP_VALUES.include?(bpp)))
33
+ invalid_data("@#{cursor} - Invalid number of colors") if ((extension == :ico) and (bpp >= 8) and (nbr_colors != 0))
34
+ image_size = BinData::Uint32le.read(@data[cursor+8..cursor+11])
35
+ invalid_data("@#{cursor} - Invalid image size") if (image_size == 0)
36
+ image_offset = BinData::Uint32le.read(@data[cursor+12..cursor+15])
37
+ images << [ image_offset, image_size ]
38
+ cursor += 16
39
+ end
40
+ progress(cursor)
41
+ # Make sure images are not overlapping
42
+ next_offset_min = cursor-offset
43
+ images.sort.each do |image_offset, image_size|
44
+ invalid_data("@#{cursor} - Invalid image offset: #{image_offset} could not be before #{next_offset_min} as it belongs to another image") if (image_offset < next_offset_min)
45
+ next_offset_min += image_size
46
+ end
47
+ # OK now we consider we might have a valid file
48
+ log_debug "@#{cursor} - #{extension.to_s} file with #{nbr_images} images."
49
+ found_relevant_data(extension)
50
+ metadata(
51
+ :nbr_images => nbr_images
52
+ )
53
+ cursor = offset + next_offset_min
54
+ progress(cursor)
55
+ ending_offset = cursor
56
+ # # Decode each image
57
+ # images.each do |image_offset, image_size|
58
+ # invalid_data("@#{cursor} - Image offset (#{image_offset}) should be #{cursor-offset}") if (cursor-offset != image_offset)
59
+ # cursor += image_size
60
+ # progress(cursor)
61
+ # end
62
+ # ending_offset = cursor
63
+
64
+ return ending_offset
65
+ end
66
+
67
+ end
68
+
69
+ end
70
+
71
+ end