fileshunter 0.1.0.20130725
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS +3 -0
- data/ChangeLog +5 -0
- data/Credits +21 -0
- data/LICENSE +31 -0
- data/README +15 -0
- data/README.md +11 -0
- data/Rakefile +7 -0
- data/ReleaseInfo +8 -0
- data/bin/fileshunt +216 -0
- data/ext/fileshunter/Decoders/_FLAC.c +233 -0
- data/ext/fileshunter/Decoders/extconf.rb +3 -0
- data/lib/fileshunter/BeginPatternDecoder.rb +218 -0
- data/lib/fileshunter/Decoder.rb +66 -0
- data/lib/fileshunter/Decoders/ASF.rb +50 -0
- data/lib/fileshunter/Decoders/BMP.rb +118 -0
- data/lib/fileshunter/Decoders/CAB.rb +140 -0
- data/lib/fileshunter/Decoders/CFBF.rb +92 -0
- data/lib/fileshunter/Decoders/EBML.rb +369 -0
- data/lib/fileshunter/Decoders/EXE.rb +505 -0
- data/lib/fileshunter/Decoders/FLAC.rb +387 -0
- data/lib/fileshunter/Decoders/ICO.rb +71 -0
- data/lib/fileshunter/Decoders/JPEG.rb +247 -0
- data/lib/fileshunter/Decoders/M2V.rb +30 -0
- data/lib/fileshunter/Decoders/MP3.rb +341 -0
- data/lib/fileshunter/Decoders/MP4.rb +620 -0
- data/lib/fileshunter/Decoders/MPG_Video.rb +30 -0
- data/lib/fileshunter/Decoders/OGG.rb +74 -0
- data/lib/fileshunter/Decoders/RIFF.rb +437 -0
- data/lib/fileshunter/Decoders/TIFF.rb +350 -0
- data/lib/fileshunter/Decoders/Text.rb +240 -0
- data/lib/fileshunter/Segment.rb +50 -0
- data/lib/fileshunter/SegmentsAnalyzer.rb +251 -0
- data/lib/fileshunter.rb +15 -0
- metadata +130 -0
data/AUTHORS
ADDED
data/ChangeLog
ADDED
data/Credits
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
= Projects used by FilesHunter
|
2
|
+
|
3
|
+
== bindata
|
4
|
+
* Dion Mendel (https://github.com/dmendel)
|
5
|
+
* https://github.com/dmendel/bindata
|
6
|
+
* Used to parse binary data efficiently.
|
7
|
+
|
8
|
+
== IOBlockReader
|
9
|
+
* Muriel Salvan (http://murielsalvan.users.sourceforge.net)
|
10
|
+
* http://ioblockreader.sourceforge.net
|
11
|
+
* Used to read files efficiently.
|
12
|
+
|
13
|
+
== Ruby
|
14
|
+
* Yukihiro « matz » Matsumoto (http://www.rubyist.net/~matz/)
|
15
|
+
* http://www.ruby-lang.org/
|
16
|
+
* Thanks a lot Matz for this truly wonderful language!
|
17
|
+
|
18
|
+
== rUtilAnts
|
19
|
+
* Muriel Salvan (http://murielsalvan.users.sourceforge.net)
|
20
|
+
* http://rutilants.sourceforge.net
|
21
|
+
* Used for plugins and logging handling.
|
data/LICENSE
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
|
2
|
+
The license stated herein is a copy of the BSD License (modified on July 1999).
|
3
|
+
The AUTHOR mentionned below refers to the list of people involved in the
|
4
|
+
creation and modification of any file included in the delivered package.
|
5
|
+
This list is found in the file named AUTHORS.
|
6
|
+
The AUTHORS and LICENSE files have to be included in any release of software
|
7
|
+
embedding source code of this package, or using it as a derivative software.
|
8
|
+
|
9
|
+
Copyright (c) 2010 - 2013 Muriel Salvan (muriel@x-aeon.com)
|
10
|
+
|
11
|
+
Redistribution and use in source and binary forms, with or without
|
12
|
+
modification, are permitted provided that the following conditions are met:
|
13
|
+
|
14
|
+
1. Redistributions of source code must retain the above copyright notice,
|
15
|
+
this list of conditions and the following disclaimer.
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
17
|
+
this list of conditions and the following disclaimer in the documentation
|
18
|
+
and/or other materials provided with the distribution.
|
19
|
+
3. The name of the author may not be used to endorse or promote products
|
20
|
+
derived from this software without specific prior written permission.
|
21
|
+
|
22
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
23
|
+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
24
|
+
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
25
|
+
EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
26
|
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
27
|
+
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
28
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
29
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
30
|
+
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
31
|
+
OF SUCH DAMAGE.
|
data/README
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
= Files Hunter
|
2
|
+
|
3
|
+
Analyze files to get their real format. Ideal to retrieve hidden and corrupted files.
|
4
|
+
|
5
|
+
== Where is the documentation ?
|
6
|
+
|
7
|
+
Check the website at https://github.com/Muriel-Salvan/files-hunter
|
8
|
+
|
9
|
+
== Who wrote it ?
|
10
|
+
|
11
|
+
Check the AUTHORS[link:AUTHORS.html] file.
|
12
|
+
|
13
|
+
== What is the license ?
|
14
|
+
|
15
|
+
You can find out in the LICENSE[link:LICENSE.html] file.
|
data/README.md
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
FilesHunter
|
2
|
+
=============
|
3
|
+
|
4
|
+
Analyze files to get their real format. Ideal to retrieve hidden and corrupted files.
|
5
|
+
|
6
|
+
Analyze files and guess their true content\'s format.
|
7
|
+
Extract hidden files from corrupted ones.
|
8
|
+
Easily extensible by adding new plug-ins for new formats.
|
9
|
+
Handles documents, videos, images, music, executables...
|
10
|
+
|
11
|
+
[See its documentation here.](http://fileshunter.sourceforge.net)
|
data/Rakefile
ADDED
data/ReleaseInfo
ADDED
data/bin/fileshunt
ADDED
@@ -0,0 +1,216 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'fileshunter'
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
module FilesHunter
|
8
|
+
|
9
|
+
class Extractor
|
10
|
+
|
11
|
+
# Constructor
|
12
|
+
def initialize
|
13
|
+
# Default options
|
14
|
+
@extract = false
|
15
|
+
@display_help = false
|
16
|
+
@debug = false
|
17
|
+
@log = false
|
18
|
+
@block_size = 134217728
|
19
|
+
@extract_dir = nil
|
20
|
+
# The command line parser
|
21
|
+
@options = OptionParser.new
|
22
|
+
@options.banner = 'fileshunt [--help] [--debug] [--extract] [--extractdir <DirectoryName>] [--log] [--blocksize <BlockSizeInBytes>] <FileName1> <FileName2> ...'
|
23
|
+
@options.on( '--extract',
|
24
|
+
'Extract found segments as extra files next to the analyzed ones (named from the original file name and __EXTRACT__ suffixes).') do
|
25
|
+
@extract = true
|
26
|
+
end
|
27
|
+
@options.on( '--log',
|
28
|
+
'Log found segments in various log files (named fileshunt*.log).') do
|
29
|
+
@log = true
|
30
|
+
end
|
31
|
+
@options.on( '--extractdir <DirectoryName>', String,
|
32
|
+
'<DirectoryName>: Directory name to extract to.',
|
33
|
+
'Specify the directory where extracted files are written. If none specified, they will be written next to original files.') do |arg|
|
34
|
+
@extract_dir = arg
|
35
|
+
end
|
36
|
+
@options.on( '--blocksize <BlockSizeInBytes>', Integer,
|
37
|
+
"<BlockSizeInBytes>: Size of blocks to read at once from disk. Default = #{@block_size}.",
|
38
|
+
'Specify the block size when reading from files') do |arg|
|
39
|
+
@block_size = arg
|
40
|
+
end
|
41
|
+
@options.on( '--help',
|
42
|
+
'Display help') do
|
43
|
+
@display_help = true
|
44
|
+
end
|
45
|
+
@options.on( '--debug',
|
46
|
+
'Activate debug logs') do
|
47
|
+
@debug = true
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Executes the extractor
|
52
|
+
#
|
53
|
+
# Parameters::
|
54
|
+
# * *args* (<em>list<String></em>): Arguments given to the extractor
|
55
|
+
def execute(args)
|
56
|
+
# Analyze arguments
|
57
|
+
remaining_args = @options.parse(args)
|
58
|
+
|
59
|
+
if @display_help
|
60
|
+
puts @options
|
61
|
+
else
|
62
|
+
activate_log_debug(true) if @debug
|
63
|
+
if @log
|
64
|
+
# Clean log files
|
65
|
+
File.unlink(EXTRACTOR_LOG) if (File.exist?(EXTRACTOR_LOG))
|
66
|
+
File.unlink(EXTRACTOR_1_LOG) if (File.exist?(EXTRACTOR_1_LOG))
|
67
|
+
File.unlink(EXTRACTOR_2_20_LOG) if (File.exist?(EXTRACTOR_2_20_LOG))
|
68
|
+
File.unlink(EXTRACTOR_20_LOG) if (File.exist?(EXTRACTOR_20_LOG))
|
69
|
+
end
|
70
|
+
|
71
|
+
# Compute the list of files
|
72
|
+
files = []
|
73
|
+
if (remaining_args.empty?)
|
74
|
+
# Put current directory
|
75
|
+
log_debug 'Adding current directory to inspect'
|
76
|
+
files = Dir.glob("#{Dir.getwd}/**/*")
|
77
|
+
else
|
78
|
+
remaining_args.each do |file_name|
|
79
|
+
if (File.exists?(file_name))
|
80
|
+
if (File.directory?(file_name))
|
81
|
+
log_debug "Adding directory #{file_name} to inspect"
|
82
|
+
files.concat(Dir.glob("#{file_name}/**/*"))
|
83
|
+
else
|
84
|
+
log_debug "Adding file #{file_name} to inspect"
|
85
|
+
files << file_name
|
86
|
+
end
|
87
|
+
else
|
88
|
+
# Might have wildcards in it
|
89
|
+
log_debug "Adding filter #{file_name} to inspect"
|
90
|
+
lst_files = Dir.glob(file_name)
|
91
|
+
if (lst_files.empty?)
|
92
|
+
log_warn "Unable to find file: #{file_name}"
|
93
|
+
else
|
94
|
+
files.concat(lst_files)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
# Remove already extracted files from the list
|
100
|
+
files.select! { |file_name| ((file_name =~ /\.__EXTRACT__\./) == nil) }
|
101
|
+
|
102
|
+
# Analyze them
|
103
|
+
analyze_files(files)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
EXTRACTOR_LOG = 'fileshunt.log'
|
110
|
+
EXTRACTOR_1_LOG = 'fileshunt_1_segment.log'
|
111
|
+
EXTRACTOR_2_20_LOG = 'fileshunt_2-20_segments.log'
|
112
|
+
EXTRACTOR_20_LOG = 'fileshunt_20_segments.log'
|
113
|
+
|
114
|
+
# Analyze a list of files
|
115
|
+
#
|
116
|
+
# Parameters::
|
117
|
+
# * *files* (<em>list<String></em>): List of file names to analyze
|
118
|
+
def analyze_files(files)
|
119
|
+
nbr_files = files.size
|
120
|
+
if (nbr_files == 0)
|
121
|
+
log_info 'No file found to be analyzed.'
|
122
|
+
else
|
123
|
+
segments_analyzer = FilesHunter::get_segments_analyzer(:block_Size => @block_size)
|
124
|
+
start_time = Time.now
|
125
|
+
files.each_with_index do |file_name, idx|
|
126
|
+
log_debug "Handle file #{file_name}"
|
127
|
+
# List of segments identified in this file
|
128
|
+
segments = segments_analyzer.get_segments(file_name)
|
129
|
+
|
130
|
+
report_line = "[#{file_name}] - Found #{segments.size} segments: #{segments.map { |segment| "#{segment.extensions.join(',')}#{segment.truncated ? ' (truncated)' : ''}#{segment.missing_previous_data ? ' (missing previous data)' : ''}[#{segment.begin_offset}-#{segment.end_offset}]" }}"
|
131
|
+
log_info report_line
|
132
|
+
if debug_activated?
|
133
|
+
segments.each_with_index do |segment, idx_segment|
|
134
|
+
log_debug "+ ##{idx_segment} [#{segment.begin_offset}-#{segment.end_offset}]: #{segment.extensions.join(', ')}#{segment.truncated ? ' (truncated)' : ''}#{segment.missing_previous_data ? ' (missing previous data)' : ''}"
|
135
|
+
segment.metadata.each do |key, value|
|
136
|
+
log_debug " - #{key} => #{value.inspect}"
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
if @log
|
142
|
+
log_files = [EXTRACTOR_LOG]
|
143
|
+
if (segments.size == 1)
|
144
|
+
log_files << EXTRACTOR_1_LOG
|
145
|
+
elsif (segments.size <= 20)
|
146
|
+
log_files << EXTRACTOR_2_20_LOG
|
147
|
+
else
|
148
|
+
log_files << EXTRACTOR_20_LOG
|
149
|
+
end
|
150
|
+
log_files.each do |log_file|
|
151
|
+
File.open(log_file, 'a') do |file|
|
152
|
+
file.puts report_line
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# Write them on disk
|
158
|
+
if (@extract and
|
159
|
+
(segments.size > 1))
|
160
|
+
File.open(file_name, 'rb') do |file|
|
161
|
+
content = IOBlockReader.init(file, :block_size => 64*1048576)
|
162
|
+
segments.each_with_index do |segment, segment_idx|
|
163
|
+
write_segment(content, segment_idx, segment, file_name)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
elapsed_time = Time.now-start_time
|
169
|
+
total_time = (elapsed_time*nbr_files)/(idx+1)
|
170
|
+
$stdout.write "[#{((idx*100)/nbr_files).to_i}%] - [ #{print_time(total_time-elapsed_time)} / #{print_time(total_time)} ] - #{file_name} \r"
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# Write a segment to disk
|
176
|
+
#
|
177
|
+
# Parameters::
|
178
|
+
# * *data* (_IOBlockReader_): The data containing the segment to be written
|
179
|
+
# * *index* (_Fixnum_): Index of the segment
|
180
|
+
# * *segment* (_Segment_): Segment to be written
|
181
|
+
# * *file_name* (_String_): File name containing the original data
|
182
|
+
def write_segment(data, index, segment, file_name)
|
183
|
+
file_name_suffix = ".__EXTRACT__.#{sprintf('%.4d', index)}.#{segment.truncated ? 'truncated.' : ''}#{segment.missing_previous_data ? 'missing_previous_data.' : ''}#{segment.extensions[0].to_s}"
|
184
|
+
extracted_file_name = nil
|
185
|
+
if (@extract_dir == nil)
|
186
|
+
extracted_file_name = "#{file_name}#{file_name_suffix}"
|
187
|
+
else
|
188
|
+
extracted_file_name = "#{@extract_dir}/#{file_name}#{file_name_suffix}"
|
189
|
+
FileUtils::mkdir_p(File.dirname(extracted_file_name))
|
190
|
+
end
|
191
|
+
log_info "Write extracted segment in #{extracted_file_name}"
|
192
|
+
File.open(extracted_file_name, 'wb') do |file|
|
193
|
+
data.each_block(segment.begin_offset..segment.end_offset-1) do |block_data|
|
194
|
+
file.write(block_data)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# Get a number of seconds in a nice way
|
200
|
+
#
|
201
|
+
# Parameters::
|
202
|
+
# * *nbr_secs* (_Fixnum_): Number of seconds
|
203
|
+
# Result::
|
204
|
+
# * _String_: Formatted way
|
205
|
+
def print_time(nbr_secs)
|
206
|
+
secs = nbr_secs.to_i
|
207
|
+
mins = secs / 60
|
208
|
+
hours = mins / 60
|
209
|
+
return "#{hours}:#{sprintf('%.2d',mins % 60)}:#{sprintf('%.2d',secs % 60)}"
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
end
|
215
|
+
|
216
|
+
FilesHunter::Extractor.new.execute(ARGV)
|
@@ -0,0 +1,233 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
|
3
|
+
//#define DEBUG
|
4
|
+
|
5
|
+
/** Load the data block containing a given offset and update variables pointing to the underlying data.
|
6
|
+
*
|
7
|
+
* Parameters::
|
8
|
+
* * *rb_self* (_FLAC_): Self
|
9
|
+
* * *rb_data* (_IOBlockReader_): Data
|
10
|
+
* * *offset* (_int_): Offset indicating the block to be loaded
|
11
|
+
* * *ptr_str_cursor* (uint8_t**_): [Result] C-String that points on the offset
|
12
|
+
* * *ptr_str_cursor_start* (uint8_t**_): [Result] C-String that points on the offset too
|
13
|
+
* * *ptr_str_cursor_end* (uint8_t**_): [Result] C-String that points on the end of the block
|
14
|
+
* * *ptr_last_data_block* (_int*_): [Result] Boolean indicating if the data block is the last one. The bool has to be previsouly set to the value of the previous block.
|
15
|
+
**/
|
16
|
+
void load_next_block(
|
17
|
+
VALUE rb_self,
|
18
|
+
VALUE rb_data,
|
19
|
+
int offset,
|
20
|
+
uint8_t** ptr_str_cursor,
|
21
|
+
uint8_t** ptr_str_cursor_start,
|
22
|
+
uint8_t** ptr_str_cursor_end,
|
23
|
+
int* ptr_last_data_block) {
|
24
|
+
// Check that there is data to read
|
25
|
+
if (*ptr_last_data_block) {
|
26
|
+
rb_funcall(rb_self, rb_intern("truncated_data"), 2, rb_str_new2("Unable to get next block to read"), rb_ivar_get(rb_self, rb_intern("@end_offset")));
|
27
|
+
}
|
28
|
+
// Load the block in memory and get it
|
29
|
+
VALUE rb_result = rb_funcall(rb_data, rb_intern("get_block_containing_offset"), 1, INT2FIX(offset));
|
30
|
+
VALUE rb_str_data_block = rb_ary_entry(rb_result, 0);
|
31
|
+
uint32_t size_data_block = RSTRING_LEN(rb_str_data_block);
|
32
|
+
uint32_t offset_data_block = FIX2INT(rb_ary_entry(rb_result, 1));
|
33
|
+
*ptr_last_data_block = (rb_ary_entry(rb_result, 2) == Qtrue);
|
34
|
+
*ptr_str_cursor_start = RSTRING_PTR(rb_str_data_block) + offset - offset_data_block;
|
35
|
+
*ptr_str_cursor = (*ptr_str_cursor_start);
|
36
|
+
*ptr_str_cursor_end = RSTRING_PTR(rb_str_data_block) + size_data_block;
|
37
|
+
//printf("Next block loaded correctly: @%u (size=%u) &start=%u &end=%u\n", offset, size_data_block, *ptr_str_cursor_start, *ptr_str_cursor_end);
|
38
|
+
}
|
39
|
+
|
40
|
+
/** Decode data at a given cursor and cursor_bits position as a given number of samples encoded in a Rice partition
|
41
|
+
*
|
42
|
+
* Parameters::
|
43
|
+
* * *rb_self* (_FLAC_): Self
|
44
|
+
* * *rb_cursor* (_Fixnum_): Current cursor
|
45
|
+
* * *rb_cursor_bits* (_Fixnum_): Current cursor_bits
|
46
|
+
* * *rb_nbr_samples* (_Fixnum_): Number of samples to decode
|
47
|
+
* * *rb_rice_parameter* (_Fixnum_): Rice parameter
|
48
|
+
* Return::
|
49
|
+
* * _Fixnum_: New cursor
|
50
|
+
* * _Fixnum_: New cursor_bits
|
51
|
+
**/
|
52
|
+
static VALUE flac_decode_rice(
|
53
|
+
VALUE rb_self,
|
54
|
+
VALUE rb_cursor,
|
55
|
+
VALUE rb_cursor_bits,
|
56
|
+
VALUE rb_nbr_samples,
|
57
|
+
VALUE rb_rice_parameter) {
|
58
|
+
// Translate Ruby objects
|
59
|
+
uint32_t cursor = FIX2INT(rb_cursor);
|
60
|
+
uint32_t cursor_bits = FIX2INT(rb_cursor_bits);
|
61
|
+
uint32_t nbr_samples = FIX2INT(rb_nbr_samples);
|
62
|
+
uint32_t rice_parameter = FIX2INT(rb_rice_parameter);
|
63
|
+
VALUE rb_data = rb_ivar_get(rb_self, rb_intern("@data"));
|
64
|
+
// Initialize the data stream
|
65
|
+
int last_data_block = 0;
|
66
|
+
uint8_t* str_cursor;
|
67
|
+
uint8_t* str_cursor_start;
|
68
|
+
uint8_t* str_cursor_end;
|
69
|
+
load_next_block(rb_self, rb_data, cursor, &str_cursor, &str_cursor_start, &str_cursor_end, &last_data_block);
|
70
|
+
// Temporary variables
|
71
|
+
uint8_t current_byte;
|
72
|
+
int found;
|
73
|
+
uint32_t bits_count;
|
74
|
+
uint32_t idx_sample;
|
75
|
+
#ifdef DEBUG
|
76
|
+
uint32_t high_part;
|
77
|
+
uint32_t low_part;
|
78
|
+
uint32_t remaining_bits_to_decode;
|
79
|
+
int32_t value;
|
80
|
+
uint32_t temp;
|
81
|
+
#endif
|
82
|
+
|
83
|
+
// Loop on samples
|
84
|
+
for (idx_sample = 0; idx_sample < nbr_samples; ++idx_sample) {
|
85
|
+
|
86
|
+
// cursor is the offset of str_cursor_start in the data stream.
|
87
|
+
// str_cursor, cursor_bits point to the data being decoded.
|
88
|
+
// str_cursor_start points to the beginning of the current data block
|
89
|
+
// str_cursor_end points to the end of the current data block
|
90
|
+
// last_data_block indicates if this is the last block
|
91
|
+
|
92
|
+
// 1. Decode next bits as a unary encoded number: this will be the high bits of the value
|
93
|
+
//printf("@%u,%u - 0 cursor=%u str_cursor=%u str_cursor_end=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, cursor, str_cursor-str_cursor_start, str_cursor_end-str_cursor_start);
|
94
|
+
#ifdef DEBUG
|
95
|
+
printf("@%u,%u - Reading %u\n", cursor+str_cursor-str_cursor_start, cursor_bits, *str_cursor);
|
96
|
+
high_part = 0;
|
97
|
+
#endif
|
98
|
+
found = 0;
|
99
|
+
if (cursor_bits > 0) {
|
100
|
+
// Consider ending bits of current byte
|
101
|
+
current_byte = *str_cursor;
|
102
|
+
while ((cursor_bits < 8) &&
|
103
|
+
((current_byte & (1 << (7-cursor_bits))) == 0)) {
|
104
|
+
++cursor_bits;
|
105
|
+
#ifdef DEBUG
|
106
|
+
++high_part;
|
107
|
+
#endif
|
108
|
+
}
|
109
|
+
if (cursor_bits == 8) {
|
110
|
+
// Not found in the current byte
|
111
|
+
++str_cursor;
|
112
|
+
} else {
|
113
|
+
// Found it
|
114
|
+
found = 1;
|
115
|
+
}
|
116
|
+
}
|
117
|
+
//printf("@%u,%u - 0.5 cursor=%u str_cursor=%u str_cursor_end=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, cursor, str_cursor-str_cursor_start, str_cursor_end-str_cursor_start);
|
118
|
+
if (!found) {
|
119
|
+
// Here we are byte-aligned
|
120
|
+
// str_cursor points on the byte we are starting to search from (can be at the end of our current block)
|
121
|
+
// cursor_bits has no significant value
|
122
|
+
// First check if we need to read an extra block
|
123
|
+
if (str_cursor == str_cursor_end) {
|
124
|
+
cursor += str_cursor - str_cursor_start;
|
125
|
+
load_next_block(rb_self, rb_data, cursor, &str_cursor, &str_cursor_start, &str_cursor_end, &last_data_block);
|
126
|
+
}
|
127
|
+
// Now we can continue our inspection
|
128
|
+
// Loop until we find a non-null byte
|
129
|
+
while (!found) {
|
130
|
+
while ((str_cursor != str_cursor_end) &&
|
131
|
+
((*str_cursor) == 0)) {
|
132
|
+
++str_cursor;
|
133
|
+
#ifdef DEBUG
|
134
|
+
high_part += 8;
|
135
|
+
#endif
|
136
|
+
}
|
137
|
+
if (str_cursor == str_cursor_end) {
|
138
|
+
cursor += str_cursor - str_cursor_start;
|
139
|
+
load_next_block(rb_self, rb_data, cursor, &str_cursor, &str_cursor_start, &str_cursor_end, &last_data_block);
|
140
|
+
} else {
|
141
|
+
found = 1;
|
142
|
+
}
|
143
|
+
}
|
144
|
+
//printf("@%u,%u - 0.8 cursor=%u str_cursor=%u str_cursor_end=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, cursor, str_cursor-str_cursor_start, str_cursor_end-str_cursor_start);
|
145
|
+
// Here, str_cursor points on the first non-null byte
|
146
|
+
current_byte = *str_cursor;
|
147
|
+
cursor_bits = 0;
|
148
|
+
while ((cursor_bits < 8) &&
|
149
|
+
((current_byte & (1 << (7-cursor_bits))) == 0)) {
|
150
|
+
++cursor_bits;
|
151
|
+
#ifdef DEBUG
|
152
|
+
++high_part;
|
153
|
+
#endif
|
154
|
+
}
|
155
|
+
}
|
156
|
+
// Here, str_cursor and cursor_bits point on the first bit set to 1
|
157
|
+
//printf("@%u,%u - 1 cursor=%u str_cursor=%u str_cursor_end=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, cursor, str_cursor-str_cursor_start, str_cursor_end-str_cursor_start);
|
158
|
+
|
159
|
+
// 2. Read the next rice_parameter bits: this will be the low bits of the value
|
160
|
+
#ifdef DEBUG
|
161
|
+
printf("@%u,%u - Got high part (%u). Now decode low value (%u bits)\n", cursor+str_cursor-str_cursor_start, cursor_bits, high_part, rice_parameter);
|
162
|
+
++cursor_bits;
|
163
|
+
if (cursor_bits == 8) {
|
164
|
+
cursor_bits = 0;
|
165
|
+
++str_cursor;
|
166
|
+
if (str_cursor == str_cursor_end) {
|
167
|
+
cursor += str_cursor - str_cursor_start;
|
168
|
+
load_next_block(rb_self, rb_data, cursor, &str_cursor, &str_cursor_start, &str_cursor_end, &last_data_block);
|
169
|
+
}
|
170
|
+
}
|
171
|
+
if (cursor_bits + rice_parameter <= 8) {
|
172
|
+
// The value can be decoded using current byte only
|
173
|
+
low_part = ((*str_cursor) & ((1 << (8-cursor_bits)) - 1)) >> (8-cursor_bits-rice_parameter);
|
174
|
+
cursor_bits += rice_parameter;
|
175
|
+
} else {
|
176
|
+
// Decode current byte and go on next ones
|
177
|
+
low_part = (*str_cursor) & ((1 << (8-cursor_bits)) - 1);
|
178
|
+
printf("@%u,%u - A - low_part=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, low_part);
|
179
|
+
++str_cursor;
|
180
|
+
remaining_bits_to_decode = rice_parameter - 8 + cursor_bits;
|
181
|
+
cursor_bits = 0;
|
182
|
+
while (remaining_bits_to_decode > 0) {
|
183
|
+
// Here we are byte aligned
|
184
|
+
if (str_cursor == str_cursor_end) {
|
185
|
+
cursor += str_cursor - str_cursor_start;
|
186
|
+
load_next_block(rb_self, rb_data, cursor, &str_cursor, &str_cursor_start, &str_cursor_end, &last_data_block);
|
187
|
+
}
|
188
|
+
if (remaining_bits_to_decode >= 8) {
|
189
|
+
low_part = (low_part << 8) + (*str_cursor);
|
190
|
+
printf("@%u,%u - B (%u) - low_part=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, remaining_bits_to_decode, low_part);
|
191
|
+
++str_cursor;
|
192
|
+
remaining_bits_to_decode -= 8;
|
193
|
+
} else {
|
194
|
+
// This byte is the last one to decode
|
195
|
+
temp = low_part;
|
196
|
+
low_part = (low_part << remaining_bits_to_decode) + ((*str_cursor) >> (8-remaining_bits_to_decode));
|
197
|
+
printf("@%u,%u - C (%u) - low_part=%u (%u + %u)\n", cursor+str_cursor-str_cursor_start, cursor_bits, remaining_bits_to_decode, low_part, (temp << remaining_bits_to_decode), (current_byte >> (8-remaining_bits_to_decode)));
|
198
|
+
cursor_bits = remaining_bits_to_decode;
|
199
|
+
remaining_bits_to_decode = 0;
|
200
|
+
}
|
201
|
+
}
|
202
|
+
}
|
203
|
+
// Here we have high_part and low_part
|
204
|
+
value = (high_part << (rice_parameter-1)) + (low_part >> 1);
|
205
|
+
if ((low_part & 1) == 1) {
|
206
|
+
value = -value-1;
|
207
|
+
}
|
208
|
+
printf("@%u,%u - Residual[%u]=%d (%u and %u)\n", cursor+str_cursor-str_cursor_start, cursor_bits, idx_sample, value, high_part, low_part);
|
209
|
+
#else
|
210
|
+
bits_count = cursor_bits + 1 + rice_parameter;
|
211
|
+
cursor_bits = (bits_count & 7);
|
212
|
+
str_cursor += (bits_count >> 3);
|
213
|
+
if (str_cursor >= str_cursor_end) {
|
214
|
+
cursor += str_cursor - str_cursor_start;
|
215
|
+
load_next_block(rb_self, rb_data, cursor, &str_cursor, &str_cursor_start, &str_cursor_end, &last_data_block);
|
216
|
+
}
|
217
|
+
//printf("@%u,%u - 2 cursor=%u str_cursor=%u str_cursor_end=%u\n", cursor+str_cursor-str_cursor_start, cursor_bits, cursor, str_cursor-str_cursor_start, str_cursor_end-str_cursor_start);
|
218
|
+
#endif
|
219
|
+
|
220
|
+
}
|
221
|
+
|
222
|
+
return rb_ary_new3(2, INT2FIX(cursor+str_cursor-str_cursor_start), INT2FIX(cursor_bits));
|
223
|
+
}
|
224
|
+
|
225
|
+
// Initialize the module
|
226
|
+
void Init__FLAC() {
|
227
|
+
VALUE rb_mFilesHunter = rb_define_module("FilesHunter");
|
228
|
+
VALUE rb_mDecoders = rb_define_module_under(rb_mFilesHunter, "Decoders");
|
229
|
+
VALUE rb_cDecoder = rb_define_class_under(rb_mFilesHunter, "Decoder", rb_cObject);
|
230
|
+
VALUE rb_cBeginPatternDecoder = rb_define_class_under(rb_mFilesHunter, "BeginPatternDecoder", rb_cDecoder);
|
231
|
+
VALUE rb_cFLAC = rb_define_class_under(rb_mDecoders, "FLAC", rb_cBeginPatternDecoder);
|
232
|
+
rb_define_method(rb_cFLAC, "decode_rice", flac_decode_rice, 4);
|
233
|
+
}
|