fileshunter 0.1.0.20130725
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS +3 -0
- data/ChangeLog +5 -0
- data/Credits +21 -0
- data/LICENSE +31 -0
- data/README +15 -0
- data/README.md +11 -0
- data/Rakefile +7 -0
- data/ReleaseInfo +8 -0
- data/bin/fileshunt +216 -0
- data/ext/fileshunter/Decoders/_FLAC.c +233 -0
- data/ext/fileshunter/Decoders/extconf.rb +3 -0
- data/lib/fileshunter/BeginPatternDecoder.rb +218 -0
- data/lib/fileshunter/Decoder.rb +66 -0
- data/lib/fileshunter/Decoders/ASF.rb +50 -0
- data/lib/fileshunter/Decoders/BMP.rb +118 -0
- data/lib/fileshunter/Decoders/CAB.rb +140 -0
- data/lib/fileshunter/Decoders/CFBF.rb +92 -0
- data/lib/fileshunter/Decoders/EBML.rb +369 -0
- data/lib/fileshunter/Decoders/EXE.rb +505 -0
- data/lib/fileshunter/Decoders/FLAC.rb +387 -0
- data/lib/fileshunter/Decoders/ICO.rb +71 -0
- data/lib/fileshunter/Decoders/JPEG.rb +247 -0
- data/lib/fileshunter/Decoders/M2V.rb +30 -0
- data/lib/fileshunter/Decoders/MP3.rb +341 -0
- data/lib/fileshunter/Decoders/MP4.rb +620 -0
- data/lib/fileshunter/Decoders/MPG_Video.rb +30 -0
- data/lib/fileshunter/Decoders/OGG.rb +74 -0
- data/lib/fileshunter/Decoders/RIFF.rb +437 -0
- data/lib/fileshunter/Decoders/TIFF.rb +350 -0
- data/lib/fileshunter/Decoders/Text.rb +240 -0
- data/lib/fileshunter/Segment.rb +50 -0
- data/lib/fileshunter/SegmentsAnalyzer.rb +251 -0
- data/lib/fileshunter.rb +15 -0
- metadata +130 -0
data/AUTHORS
ADDED
data/ChangeLog
ADDED
data/Credits
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
= Projects used by FilesHunter
|
2
|
+
|
3
|
+
== bindata
|
4
|
+
* Dion Mendel (https://github.com/dmendel)
|
5
|
+
* https://github.com/dmendel/bindata
|
6
|
+
* Used to parse binary data efficiently.
|
7
|
+
|
8
|
+
== IOBlockReader
|
9
|
+
* Muriel Salvan (http://murielsalvan.users.sourceforge.net)
|
10
|
+
* http://ioblockreader.sourceforge.net
|
11
|
+
* Used to read files efficiently.
|
12
|
+
|
13
|
+
== Ruby
|
14
|
+
* Yukihiro « matz » Matsumoto (http://www.rubyist.net/~matz/)
|
15
|
+
* http://www.ruby-lang.org/
|
16
|
+
* Thanks a lot Matz for this truly wonderful language!
|
17
|
+
|
18
|
+
== rUtilAnts
|
19
|
+
* Muriel Salvan (http://murielsalvan.users.sourceforge.net)
|
20
|
+
* http://rutilants.sourceforge.net
|
21
|
+
* Used for plugins and logging handling.
|
data/LICENSE
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
|
2
|
+
The license stated herein is a copy of the BSD License (modified on July 1999).
|
3
|
+
The AUTHOR mentionned below refers to the list of people involved in the
|
4
|
+
creation and modification of any file included in the delivered package.
|
5
|
+
This list is found in the file named AUTHORS.
|
6
|
+
The AUTHORS and LICENSE files have to be included in any release of software
|
7
|
+
embedding source code of this package, or using it as a derivative software.
|
8
|
+
|
9
|
+
Copyright (c) 2010 - 2013 Muriel Salvan (muriel@x-aeon.com)
|
10
|
+
|
11
|
+
Redistribution and use in source and binary forms, with or without
|
12
|
+
modification, are permitted provided that the following conditions are met:
|
13
|
+
|
14
|
+
1. Redistributions of source code must retain the above copyright notice,
|
15
|
+
this list of conditions and the following disclaimer.
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
17
|
+
this list of conditions and the following disclaimer in the documentation
|
18
|
+
and/or other materials provided with the distribution.
|
19
|
+
3. The name of the author may not be used to endorse or promote products
|
20
|
+
derived from this software without specific prior written permission.
|
21
|
+
|
22
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
23
|
+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
24
|
+
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
25
|
+
EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
26
|
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
27
|
+
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
28
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
29
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
30
|
+
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
31
|
+
OF SUCH DAMAGE.
|
data/README
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
= Files Hunter
|
2
|
+
|
3
|
+
Analyze files to get their real format. Ideal to retrieve hidden and corrupted files.
|
4
|
+
|
5
|
+
== Where is the documentation ?
|
6
|
+
|
7
|
+
Check the website at https://github.com/Muriel-Salvan/files-hunter
|
8
|
+
|
9
|
+
== Who wrote it ?
|
10
|
+
|
11
|
+
Check the AUTHORS[link:AUTHORS.html] file.
|
12
|
+
|
13
|
+
== What is the license ?
|
14
|
+
|
15
|
+
You can find out in the LICENSE[link:LICENSE.html] file.
|
data/README.md
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
FilesHunter
|
2
|
+
=============
|
3
|
+
|
4
|
+
Analyze files to get their real format. Ideal to retrieve hidden and corrupted files.
|
5
|
+
|
6
|
+
Analyze files and guess their true content\'s format.
|
7
|
+
Extract hidden files from corrupted ones.
|
8
|
+
Easily extensible by adding new plug-ins for new formats.
|
9
|
+
Handles documents, videos, images, music, executables...
|
10
|
+
|
11
|
+
[See its documentation here.](http://fileshunter.sourceforge.net)
|
data/Rakefile
ADDED
data/ReleaseInfo
ADDED
data/bin/fileshunt
ADDED
@@ -0,0 +1,216 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'fileshunter'
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
module FilesHunter
|
8
|
+
|
9
|
+
class Extractor
|
10
|
+
|
11
|
+
# Constructor
|
12
|
+
def initialize
|
13
|
+
# Default options
|
14
|
+
@extract = false
|
15
|
+
@display_help = false
|
16
|
+
@debug = false
|
17
|
+
@log = false
|
18
|
+
@block_size = 134217728
|
19
|
+
@extract_dir = nil
|
20
|
+
# The command line parser
|
21
|
+
@options = OptionParser.new
|
22
|
+
@options.banner = 'fileshunt [--help] [--debug] [--extract] [--extractdir <DirectoryName>] [--log] [--blocksize <BlockSizeInBytes>] <FileName1> <FileName2> ...'
|
23
|
+
@options.on( '--extract',
|
24
|
+
'Extract found segments as extra files next to the analyzed ones (named from the original file name and __EXTRACT__ suffixes).') do
|
25
|
+
@extract = true
|
26
|
+
end
|
27
|
+
@options.on( '--log',
|
28
|
+
'Log found segments in various log files (named fileshunt*.log).') do
|
29
|
+
@log = true
|
30
|
+
end
|
31
|
+
@options.on( '--extractdir <DirectoryName>', String,
|
32
|
+
'<DirectoryName>: Directory name to extract to.',
|
33
|
+
'Specify the directory where extracted files are written. If none specified, they will be written next to original files.') do |arg|
|
34
|
+
@extract_dir = arg
|
35
|
+
end
|
36
|
+
@options.on( '--blocksize <BlockSizeInBytes>', Integer,
|
37
|
+
"<BlockSizeInBytes>: Size of blocks to read at once from disk. Default = #{@block_size}.",
|
38
|
+
'Specify the block size when reading from files') do |arg|
|
39
|
+
@block_size = arg
|
40
|
+
end
|
41
|
+
@options.on( '--help',
|
42
|
+
'Display help') do
|
43
|
+
@display_help = true
|
44
|
+
end
|
45
|
+
@options.on( '--debug',
|
46
|
+
'Activate debug logs') do
|
47
|
+
@debug = true
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Executes the extractor
|
52
|
+
#
|
53
|
+
# Parameters::
|
54
|
+
# * *args* (<em>list<String></em>): Arguments given to the extractor
|
55
|
+
def execute(args)
|
56
|
+
# Analyze arguments
|
57
|
+
remaining_args = @options.parse(args)
|
58
|
+
|
59
|
+
if @display_help
|
60
|
+
puts @options
|
61
|
+
else
|
62
|
+
activate_log_debug(true) if @debug
|
63
|
+
if @log
|
64
|
+
# Clean log files
|
65
|
+
File.unlink(EXTRACTOR_LOG) if (File.exist?(EXTRACTOR_LOG))
|
66
|
+
File.unlink(EXTRACTOR_1_LOG) if (File.exist?(EXTRACTOR_1_LOG))
|
67
|
+
File.unlink(EXTRACTOR_2_20_LOG) if (File.exist?(EXTRACTOR_2_20_LOG))
|
68
|
+
File.unlink(EXTRACTOR_20_LOG) if (File.exist?(EXTRACTOR_20_LOG))
|
69
|
+
end
|
70
|
+
|
71
|
+
# Compute the list of files
|
72
|
+
files = []
|
73
|
+
if (remaining_args.empty?)
|
74
|
+
# Put current directory
|
75
|
+
log_debug 'Adding current directory to inspect'
|
76
|
+
files = Dir.glob("#{Dir.getwd}/**/*")
|
77
|
+
else
|
78
|
+
remaining_args.each do |file_name|
|
79
|
+
if (File.exists?(file_name))
|
80
|
+
if (File.directory?(file_name))
|
81
|
+
log_debug "Adding directory #{file_name} to inspect"
|
82
|
+
files.concat(Dir.glob("#{file_name}/**/*"))
|
83
|
+
else
|
84
|
+
log_debug "Adding file #{file_name} to inspect"
|
85
|
+
files << file_name
|
86
|
+
end
|
87
|
+
else
|
88
|
+
# Might have wildcards in it
|
89
|
+
log_debug "Adding filter #{file_name} to inspect"
|
90
|
+
lst_files = Dir.glob(file_name)
|
91
|
+
if (lst_files.empty?)
|
92
|
+
log_warn "Unable to find file: #{file_name}"
|
93
|
+
else
|
94
|
+
files.concat(lst_files)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
# Remove already extracted files from the list
|
100
|
+
files.select! { |file_name| ((file_name =~ /\.__EXTRACT__\./) == nil) }
|
101
|
+
|
102
|
+
# Analyze them
|
103
|
+
analyze_files(files)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
EXTRACTOR_LOG = 'fileshunt.log'
|
110
|
+
EXTRACTOR_1_LOG = 'fileshunt_1_segment.log'
|
111
|
+
EXTRACTOR_2_20_LOG = 'fileshunt_2-20_segments.log'
|
112
|
+
EXTRACTOR_20_LOG = 'fileshunt_20_segments.log'
|
113
|
+
|
114
|
+
# Analyze a list of files
|
115
|
+
#
|
116
|
+
# Parameters::
|
117
|
+
# * *files* (<em>list<String></em>): List of file names to analyze
|
118
|
+
def analyze_files(files)
|
119
|
+
nbr_files = files.size
|
120
|
+
if (nbr_files == 0)
|
121
|
+
log_info 'No file found to be analyzed.'
|
122
|
+
else
|
123
|
+
segments_analyzer = FilesHunter::get_segments_analyzer(:block_Size => @block_size)
|
124
|
+
start_time = Time.now
|
125
|
+
files.each_with_index do |file_name, idx|
|
126
|
+
log_debug "Handle file #{file_name}"
|
127
|
+
# List of segments identified in this file
|
128
|
+
segments = segments_analyzer.get_segments(file_name)
|
129
|
+
|
130
|
+
report_line = "[#{file_name}] - Found #{segments.size} segments: #{segments.map { |segment| "#{segment.extensions.join(',')}#{segment.truncated ? ' (truncated)' : ''}#{segment.missing_previous_data ? ' (missing previous data)' : ''}[#{segment.begin_offset}-#{segment.end_offset}]" }}"
|
131
|
+
log_info report_line
|
132
|
+
if debug_activated?
|
133
|
+
segments.each_with_index do |segment, idx_segment|
|
134
|
+
log_debug "+ ##{idx_segment} [#{segment.begin_offset}-#{segment.end_offset}]: #{segment.extensions.join(', ')}#{segment.truncated ? ' (truncated)' : ''}#{segment.missing_previous_data ? ' (missing previous data)' : ''}"
|
135
|
+
segment.metadata.each do |key, value|
|
136
|
+
log_debug " - #{key} => #{value.inspect}"
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
if @log
|
142
|
+
log_files = [EXTRACTOR_LOG]
|
143
|
+
if (segments.size == 1)
|
144
|
+
log_files << EXTRACTOR_1_LOG
|
145
|
+
elsif (segments.size <= 20)
|
146
|
+
log_files << EXTRACTOR_2_20_LOG
|
147
|
+
else
|
148
|
+
log_files << EXTRACTOR_20_LOG
|
149
|
+
end
|
150
|
+
log_files.each do |log_file|
|
151
|
+
File.open(log_file, 'a') do |file|
|
152
|
+
file.puts report_line
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# Write them on disk
|
158
|
+
if (@extract and
|
159
|
+
(segments.size > 1))
|
160
|
+
File.open(file_name, 'rb') do |file|
|
161
|
+
content = IOBlockReader.init(file, :block_size => 64*1048576)
|
162
|
+
segments.each_with_index do |segment, segment_idx|
|
163
|
+
write_segment(content, segment_idx, segment, file_name)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
elapsed_time = Time.now-start_time
|
169
|
+
total_time = (elapsed_time*nbr_files)/(idx+1)
|
170
|
+
$stdout.write "[#{((idx*100)/nbr_files).to_i}%] - [ #{print_time(total_time-elapsed_time)} / #{print_time(total_time)} ] - #{file_name} \r"
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# Write a segment to disk
|
176
|
+
#
|
177
|
+
# Parameters::
|
178
|
+
# * *data* (_IOBlockReader_): The data containing the segment to be written
|
179
|
+
# * *index* (_Fixnum_): Index of the segment
|
180
|
+
# * *segment* (_Segment_): Segment to be written
|
181
|
+
# * *file_name* (_String_): File name containing the original data
|
182
|
+
def write_segment(data, index, segment, file_name)
|
183
|
+
file_name_suffix = ".__EXTRACT__.#{sprintf('%.4d', index)}.#{segment.truncated ? 'truncated.' : ''}#{segment.missing_previous_data ? 'missing_previous_data.' : ''}#{segment.extensions[0].to_s}"
|
184
|
+
extracted_file_name = nil
|
185
|
+
if (@extract_dir == nil)
|
186
|
+
extracted_file_name = "#{file_name}#{file_name_suffix}"
|
187
|
+
else
|
188
|
+
extracted_file_name = "#{@extract_dir}/#{file_name}#{file_name_suffix}"
|
189
|
+
FileUtils::mkdir_p(File.dirname(extracted_file_name))
|
190
|
+
end
|
191
|
+
log_info "Write extracted segment in #{extracted_file_name}"
|
192
|
+
File.open(extracted_file_name, 'wb') do |file|
|
193
|
+
data.each_block(segment.begin_offset..segment.end_offset-1) do |block_data|
|
194
|
+
file.write(block_data)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# Get a number of seconds in a nice way
|
200
|
+
#
|
201
|
+
# Parameters::
|
202
|
+
# * *nbr_secs* (_Fixnum_): Number of seconds
|
203
|
+
# Result::
|
204
|
+
# * _String_: Formatted way
|
205
|
+
def print_time(nbr_secs)
|
206
|
+
secs = nbr_secs.to_i
|
207
|
+
mins = secs / 60
|
208
|
+
hours = mins / 60
|
209
|
+
return "#{hours}:#{sprintf('%.2d',mins % 60)}:#{sprintf('%.2d',secs % 60)}"
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
end
|
215
|
+
|
216
|
+
FilesHunter::Extractor.new.execute(ARGV)
|
@@ -0,0 +1,233 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
|
3
|
+
//#define DEBUG
|
4
|
+
|
5
|
+
/** Load the data block containing a given offset and update variables pointing to the underlying data.
|
6
|
+
*
|
7
|
+
* Parameters::
|
8
|
+
* * *rb_self* (_FLAC_): Self
|
9
|
+
* * *rb_data* (_IOBlockReader_): Data
|
10
|
+
* * *offset* (_int_): Offset indicating the block to be loaded
|
11
|
+
* * *ptr_str_cursor* (uint8_t**_): [Result] C-String that points on the offset
|
12
|
+
* * *ptr_str_cursor_start* (uint8_t**_): [Result] C-String that points on the offset too
|
13
|
+
* * *ptr_str_cursor_end* (uint8_t**_): [Result] C-String that points on the end of the block
|
14
|
+
* * *ptr_last_data_block* (_int*_): [Result] Boolean indicating if the data block is the last one. The bool has to be previsouly set to the value of the previous block.
|
15
|
+
**/
|
16
|
+
void load_next_block(
|
17
|
+
VALUE rb_self,
|
18
|
+
VALUE rb_data,
|
19
|
+
int offset,
|
20
|
+
uint8_t** ptr_str_cursor,
|
21
|
+
uint8_t** ptr_str_cursor_start,
|
22
|
+
uint8_t** ptr_str_cursor_end,
|
23
|
+
int* ptr_last_data_block) {
|
24
|
+
// Check that there is data to read
|
25
|
+
if (*ptr_last_data_block) {
|
26
|
+
rb_funcall(rb_self, rb_intern("truncated_data"), 2, rb_str_new2("Unable to get next block to read"), rb_ivar_get(rb_self, rb_intern("@end_offset")));
|
27
|
+
}
|
28
|
+
// Load the block in memory and get it
|
29
|
+
VALUE rb_result = rb_funcall(rb_data, rb_intern("get_block_containing_offset"), 1, INT2FIX(offset));
|
30
|
+
VALUE rb_str_data_block = rb_ary_entry(rb_result, 0);
|
31
|
+
uint32_t size_data_block = RSTRING_LEN(rb_str_data_block);
|
32
|
+
uint32_t offset_data_block = FIX2INT(rb_ary_entry(rb_result, 1));
|
33
|
+
*ptr_last_data_block = (rb_ary_entry(rb_result, 2) == Qtrue);
|
34
|
+
*ptr_str_cursor_start = RSTRING_PTR(rb_str_data_block) + offset - offset_data_block;
|
35
|
+
*ptr_str_cursor = (*ptr_str_cursor_start);
|
36
|
+
*ptr_str_cursor_end = RSTRING_PTR(rb_str_data_block) + size_data_block;
|
37
|
+
//printf("Next block loaded correctly: @%u (size=%u) &start=%u &end=%u\n", offset, size_data_block, *ptr_str_cursor_start, *ptr_str_cursor_end);
|
38
|
+
}
|
39
|
+
|
40
|
+
/** Decode data at a given cursor and cursor_bits position as a given number of samples encoded in a Rice partition
|
41
|
+
*
|
42
|
+
* Parameters::
|
43
|
+
* * *rb_self* (_FLAC_): Self
|
44
|
+
* * *rb_cursor* (_Fixnum_): Current cursor
|
45
|
+
* * *rb_cursor_bits* (_Fixnum_): Current cursor_bits
|
46
|
+
* * *rb_nbr_samples* (_Fixnum_): Number of samples to decode
|
47
|
+
* * *rb_rice_parameter* (_Fixnum_): Rice parameter
|
48
|
+
* Return::
|
49
|
+
* * _Fixnum_: New cursor
|
50
|
+
* * _Fixnum_: New cursor_bits
|
51
|
+
**/
|
52
|
+
static VALUE flac_decode_rice(
|
53
|
+
VALUE rb_self,
|
54
|
+
VALUE rb_cursor,
|
55
|
+
VALUE rb_cursor_bits,
|
56
|
+
VALUE rb_nbr_samples,
|
57
|
+
VALUE rb_rice_parameter) {
|
58
|
+
// Translate Ruby objects
|
59
|
+
uint32_t cursor = FIX2INT(rb_cursor);
|
60
|
+
uint32_t cursor_bits = FIX2INT(rb_cursor_bits);
|
61
|
+
uint32_t nbr_samples = FIX2INT(rb_nbr_samples);
|
62
|
+
uint32_t rice_parameter = FIX2INT(rb_rice_parameter);
|
63
|
+
VALUE rb_data = rb_ivar_get(rb_self, rb_intern("@data"));
|
64
|
+
// Initialize the data stream
|
65
|
+
int last_data_block = 0;
|
66
|
+
uint8_t* str_cursor;
|
67
|
+
uint8_t* str_cursor_start;
|
68
|
+
uint8_t* str_cursor_end;
|
69
|
+
load_next_block(rb_self, rb_data, cursor, &str_cursor, &str_cursor_start, &str_cursor_end, &last_data_block);
|
70
|
+
// Temporary variables
|
71
|
+
uint8_t current_byte;
|
72
|
+
int found;
|
73
|
+
uint32_t bits_count;
|
74
|
+
uint32_t idx_sample;
|
75
|
+
#ifdef DEBUG
|
76
|
+
uint32_t high_part;
|
77
|
+
uint32_t low_part;
|
78
|
+
uint32_t remaining_bits_to_decode;
|
79
|
+
int32_t value;
|
80
|
+
uint32_t temp;
|
81
|
+
#endif
|
82
|
+
|
83
|
+
// Loop on samples
|
84
|
+
for (idx_sample = 0; idx_sample < nbr_samples; ++idx_sample) {
|
85
|
+
|
86
|
+
// cursor is the offset of str_cursor_start in the data stream.
|
87
|
+
// str_cursor, cursor_bits point to the data being decoded.
|
88
|
+
// str_cursor_start points to the beginning of the current data block
|
89
|
+
// str_cursor_end points to the end of the current data block
|
90
|
+
// last_data_block indicates if this is the last block
|
91
|
+
|
92
|
+
// 1. Decode next bits as a unary encoded number: this will be the high bits of the value
|
93
|
+
//printf("@%u,%u - 0 cursor=%u str_cursor=%u str_cursor_end=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, cursor, str_cursor-str_cursor_start, str_cursor_end-str_cursor_start);
|
94
|
+
#ifdef DEBUG
|
95
|
+
printf("@%u,%u - Reading %u\n", cursor+str_cursor-str_cursor_start, cursor_bits, *str_cursor);
|
96
|
+
high_part = 0;
|
97
|
+
#endif
|
98
|
+
found = 0;
|
99
|
+
if (cursor_bits > 0) {
|
100
|
+
// Consider ending bits of current byte
|
101
|
+
current_byte = *str_cursor;
|
102
|
+
while ((cursor_bits < 8) &&
|
103
|
+
((current_byte & (1 << (7-cursor_bits))) == 0)) {
|
104
|
+
++cursor_bits;
|
105
|
+
#ifdef DEBUG
|
106
|
+
++high_part;
|
107
|
+
#endif
|
108
|
+
}
|
109
|
+
if (cursor_bits == 8) {
|
110
|
+
// Not found in the current byte
|
111
|
+
++str_cursor;
|
112
|
+
} else {
|
113
|
+
// Found it
|
114
|
+
found = 1;
|
115
|
+
}
|
116
|
+
}
|
117
|
+
//printf("@%u,%u - 0.5 cursor=%u str_cursor=%u str_cursor_end=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, cursor, str_cursor-str_cursor_start, str_cursor_end-str_cursor_start);
|
118
|
+
if (!found) {
|
119
|
+
// Here we are byte-aligned
|
120
|
+
// str_cursor points on the byte we are starting to search from (can be at the end of our current block)
|
121
|
+
// cursor_bits has no significant value
|
122
|
+
// First check if we need to read an extra block
|
123
|
+
if (str_cursor == str_cursor_end) {
|
124
|
+
cursor += str_cursor - str_cursor_start;
|
125
|
+
load_next_block(rb_self, rb_data, cursor, &str_cursor, &str_cursor_start, &str_cursor_end, &last_data_block);
|
126
|
+
}
|
127
|
+
// Now we can continue our inspection
|
128
|
+
// Loop until we find a non-null byte
|
129
|
+
while (!found) {
|
130
|
+
while ((str_cursor != str_cursor_end) &&
|
131
|
+
((*str_cursor) == 0)) {
|
132
|
+
++str_cursor;
|
133
|
+
#ifdef DEBUG
|
134
|
+
high_part += 8;
|
135
|
+
#endif
|
136
|
+
}
|
137
|
+
if (str_cursor == str_cursor_end) {
|
138
|
+
cursor += str_cursor - str_cursor_start;
|
139
|
+
load_next_block(rb_self, rb_data, cursor, &str_cursor, &str_cursor_start, &str_cursor_end, &last_data_block);
|
140
|
+
} else {
|
141
|
+
found = 1;
|
142
|
+
}
|
143
|
+
}
|
144
|
+
//printf("@%u,%u - 0.8 cursor=%u str_cursor=%u str_cursor_end=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, cursor, str_cursor-str_cursor_start, str_cursor_end-str_cursor_start);
|
145
|
+
// Here, str_cursor points on the first non-null byte
|
146
|
+
current_byte = *str_cursor;
|
147
|
+
cursor_bits = 0;
|
148
|
+
while ((cursor_bits < 8) &&
|
149
|
+
((current_byte & (1 << (7-cursor_bits))) == 0)) {
|
150
|
+
++cursor_bits;
|
151
|
+
#ifdef DEBUG
|
152
|
+
++high_part;
|
153
|
+
#endif
|
154
|
+
}
|
155
|
+
}
|
156
|
+
// Here, str_cursor and cursor_bits point on the first bit set to 1
|
157
|
+
//printf("@%u,%u - 1 cursor=%u str_cursor=%u str_cursor_end=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, cursor, str_cursor-str_cursor_start, str_cursor_end-str_cursor_start);
|
158
|
+
|
159
|
+
// 2. Read the next rice_parameter bits: this will be the low bits of the value
|
160
|
+
#ifdef DEBUG
|
161
|
+
printf("@%u,%u - Got high part (%u). Now decode low value (%u bits)\n", cursor+str_cursor-str_cursor_start, cursor_bits, high_part, rice_parameter);
|
162
|
+
++cursor_bits;
|
163
|
+
if (cursor_bits == 8) {
|
164
|
+
cursor_bits = 0;
|
165
|
+
++str_cursor;
|
166
|
+
if (str_cursor == str_cursor_end) {
|
167
|
+
cursor += str_cursor - str_cursor_start;
|
168
|
+
load_next_block(rb_self, rb_data, cursor, &str_cursor, &str_cursor_start, &str_cursor_end, &last_data_block);
|
169
|
+
}
|
170
|
+
}
|
171
|
+
if (cursor_bits + rice_parameter <= 8) {
|
172
|
+
// The value can be decoded using current byte only
|
173
|
+
low_part = ((*str_cursor) & ((1 << (8-cursor_bits)) - 1)) >> (8-cursor_bits-rice_parameter);
|
174
|
+
cursor_bits += rice_parameter;
|
175
|
+
} else {
|
176
|
+
// Decode current byte and go on next ones
|
177
|
+
low_part = (*str_cursor) & ((1 << (8-cursor_bits)) - 1);
|
178
|
+
printf("@%u,%u - A - low_part=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, low_part);
|
179
|
+
++str_cursor;
|
180
|
+
remaining_bits_to_decode = rice_parameter - 8 + cursor_bits;
|
181
|
+
cursor_bits = 0;
|
182
|
+
while (remaining_bits_to_decode > 0) {
|
183
|
+
// Here we are byte aligned
|
184
|
+
if (str_cursor == str_cursor_end) {
|
185
|
+
cursor += str_cursor - str_cursor_start;
|
186
|
+
load_next_block(rb_self, rb_data, cursor, &str_cursor, &str_cursor_start, &str_cursor_end, &last_data_block);
|
187
|
+
}
|
188
|
+
if (remaining_bits_to_decode >= 8) {
|
189
|
+
low_part = (low_part << 8) + (*str_cursor);
|
190
|
+
printf("@%u,%u - B (%u) - low_part=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, remaining_bits_to_decode, low_part);
|
191
|
+
++str_cursor;
|
192
|
+
remaining_bits_to_decode -= 8;
|
193
|
+
} else {
|
194
|
+
// This byte is the last one to decode
|
195
|
+
temp = low_part;
|
196
|
+
low_part = (low_part << remaining_bits_to_decode) + ((*str_cursor) >> (8-remaining_bits_to_decode));
|
197
|
+
printf("@%u,%u - C (%u) - low_part=%u (%u + %u)\n", cursor+str_cursor-str_cursor_start, cursor_bits, remaining_bits_to_decode, low_part, (temp << remaining_bits_to_decode), (current_byte >> (8-remaining_bits_to_decode)));
|
198
|
+
cursor_bits = remaining_bits_to_decode;
|
199
|
+
remaining_bits_to_decode = 0;
|
200
|
+
}
|
201
|
+
}
|
202
|
+
}
|
203
|
+
// Here we have high_part and low_part
|
204
|
+
value = (high_part << (rice_parameter-1)) + (low_part >> 1);
|
205
|
+
if ((low_part & 1) == 1) {
|
206
|
+
value = -value-1;
|
207
|
+
}
|
208
|
+
printf("@%u,%u - Residual[%u]=%d (%u and %u)\n", cursor+str_cursor-str_cursor_start, cursor_bits, idx_sample, value, high_part, low_part);
|
209
|
+
#else
|
210
|
+
bits_count = cursor_bits + 1 + rice_parameter;
|
211
|
+
cursor_bits = (bits_count & 7);
|
212
|
+
str_cursor += (bits_count >> 3);
|
213
|
+
if (str_cursor >= str_cursor_end) {
|
214
|
+
cursor += str_cursor - str_cursor_start;
|
215
|
+
load_next_block(rb_self, rb_data, cursor, &str_cursor, &str_cursor_start, &str_cursor_end, &last_data_block);
|
216
|
+
}
|
217
|
+
//printf("@%u,%u - 2 cursor=%u str_cursor=%u str_cursor_end=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, cursor, str_cursor-str_cursor_start, str_cursor_end-str_cursor_start);
|
218
|
+
#endif
|
219
|
+
|
220
|
+
}
|
221
|
+
|
222
|
+
return rb_ary_new3(2, INT2FIX(cursor+str_cursor-str_cursor_start), INT2FIX(cursor_bits));
|
223
|
+
}
|
224
|
+
|
225
|
+
// Initialize the module
|
226
|
+
void Init__FLAC() {
|
227
|
+
VALUE rb_mFilesHunter = rb_define_module("FilesHunter");
|
228
|
+
VALUE rb_mDecoders = rb_define_module_under(rb_mFilesHunter, "Decoders");
|
229
|
+
VALUE rb_cDecoder = rb_define_class_under(rb_mFilesHunter, "Decoder", rb_cObject);
|
230
|
+
VALUE rb_cBeginPatternDecoder = rb_define_class_under(rb_mFilesHunter, "BeginPatternDecoder", rb_cDecoder);
|
231
|
+
VALUE rb_cFLAC = rb_define_class_under(rb_mDecoders, "FLAC", rb_cBeginPatternDecoder);
|
232
|
+
rb_define_method(rb_cFLAC, "decode_rice", flac_decode_rice, 4);
|
233
|
+
}
|