fileshunter 0.1.0.20130725
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS +3 -0
- data/ChangeLog +5 -0
- data/Credits +21 -0
- data/LICENSE +31 -0
- data/README +15 -0
- data/README.md +11 -0
- data/Rakefile +7 -0
- data/ReleaseInfo +8 -0
- data/bin/fileshunt +216 -0
- data/ext/fileshunter/Decoders/_FLAC.c +233 -0
- data/ext/fileshunter/Decoders/extconf.rb +3 -0
- data/lib/fileshunter/BeginPatternDecoder.rb +218 -0
- data/lib/fileshunter/Decoder.rb +66 -0
- data/lib/fileshunter/Decoders/ASF.rb +50 -0
- data/lib/fileshunter/Decoders/BMP.rb +118 -0
- data/lib/fileshunter/Decoders/CAB.rb +140 -0
- data/lib/fileshunter/Decoders/CFBF.rb +92 -0
- data/lib/fileshunter/Decoders/EBML.rb +369 -0
- data/lib/fileshunter/Decoders/EXE.rb +505 -0
- data/lib/fileshunter/Decoders/FLAC.rb +387 -0
- data/lib/fileshunter/Decoders/ICO.rb +71 -0
- data/lib/fileshunter/Decoders/JPEG.rb +247 -0
- data/lib/fileshunter/Decoders/M2V.rb +30 -0
- data/lib/fileshunter/Decoders/MP3.rb +341 -0
- data/lib/fileshunter/Decoders/MP4.rb +620 -0
- data/lib/fileshunter/Decoders/MPG_Video.rb +30 -0
- data/lib/fileshunter/Decoders/OGG.rb +74 -0
- data/lib/fileshunter/Decoders/RIFF.rb +437 -0
- data/lib/fileshunter/Decoders/TIFF.rb +350 -0
- data/lib/fileshunter/Decoders/Text.rb +240 -0
- data/lib/fileshunter/Segment.rb +50 -0
- data/lib/fileshunter/SegmentsAnalyzer.rb +251 -0
- data/lib/fileshunter.rb +15 -0
- metadata +130 -0
@@ -0,0 +1,74 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
module Decoders
|
4
|
+
|
5
|
+
class OGG < BeginPatternDecoder
|
6
|
+
|
7
|
+
BEGIN_PATTERN_OGG = "OggS\x00".force_encoding(Encoding::ASCII_8BIT)
|
8
|
+
|
9
|
+
# Sorted by the least dominating extension first
|
10
|
+
KNOWN_EXTENSIONS = {
|
11
|
+
'vorbis'.force_encoding(Encoding::ASCII_8BIT) => :oga,
|
12
|
+
'theora'.force_encoding(Encoding::ASCII_8BIT) => :ogv
|
13
|
+
}
|
14
|
+
|
15
|
+
def get_begin_pattern
|
16
|
+
return BEGIN_PATTERN_OGG, { :offset_inc => 5 }
|
17
|
+
end
|
18
|
+
|
19
|
+
def decode(offset)
|
20
|
+
ending_offset = nil
|
21
|
+
|
22
|
+
cursor = offset
|
23
|
+
extensions = [ :ogg, :ogx ] # By default
|
24
|
+
nbr_pages = 0
|
25
|
+
bitstreams = []
|
26
|
+
while (ending_offset == nil)
|
27
|
+
#version = @data[cursor+4].ord
|
28
|
+
header_type = @data[cursor+5].ord
|
29
|
+
invalid_data("@#{cursor} - Invalid header type: #{header_type}") if (header_type > 7)
|
30
|
+
#granule_position = @data[cursor+6..cursor+13]
|
31
|
+
bitstream_sn = BinData::Uint32le.read(@data[cursor+14..cursor+17])
|
32
|
+
page_sequence_idx = BinData::Uint32le.read(@data[cursor+18..cursor+21])
|
33
|
+
#checksum = @data[cursor+22..cursor+25]
|
34
|
+
# Read the number of segments
|
35
|
+
nbr_segments = @data[cursor+26].ord
|
36
|
+
# Compute the total size of the payload
|
37
|
+
size = 0
|
38
|
+
@data[cursor+27..cursor+26+nbr_segments].bytes.each do |segment_size|
|
39
|
+
size += segment_size
|
40
|
+
end
|
41
|
+
log_debug("@#{cursor} - [ Bitstream ##{bitstream_sn} / Page ##{page_sequence_idx} ]: Type #{header_type}, having #{nbr_segments} (total size of #{size})")
|
42
|
+
cursor += 27 + nbr_segments
|
43
|
+
found_relevant_data(extensions)
|
44
|
+
if ((header_type & 0b00000010) != 0)
|
45
|
+
# Page of type BOS: Beginning of Stream
|
46
|
+
invalid_data("@#{cursor} - Stream #{bitstream_sn} was already marked as begun.") if (bitstreams.include?(bitstream_sn))
|
47
|
+
# We can find whether it is a video file or an audio one
|
48
|
+
KNOWN_EXTENSIONS.each do |token, extension|
|
49
|
+
extensions.unshift(extension) if (@data[cursor..cursor+size-1].index(token) != nil)
|
50
|
+
extensions.delete(:oga) if (extensions.include?(:ogv))
|
51
|
+
found_relevant_data(extensions)
|
52
|
+
end
|
53
|
+
bitstreams << bitstream_sn
|
54
|
+
elsif ((header_type & 0b00000100) == 0)
|
55
|
+
# This is a packet in the middle of a stream
|
56
|
+
missing_previous_data if (!bitstreams.include?(bitstream_sn))
|
57
|
+
#invalid_data("@#{cursor} - Stream #{bitstream_sn} has not been declared previously.") if (!bitstreams.include?(bitstream_sn))
|
58
|
+
end
|
59
|
+
cursor += size
|
60
|
+
progress(cursor)
|
61
|
+
nbr_pages += 1
|
62
|
+
# Check if a subsequent page is present
|
63
|
+
ending_offset = cursor if ((cursor == @end_offset) or (@data[cursor..cursor+4] != BEGIN_PATTERN_OGG))
|
64
|
+
end
|
65
|
+
metadata( :nbr_pages => nbr_pages )
|
66
|
+
|
67
|
+
return ending_offset
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
@@ -0,0 +1,437 @@
|
|
1
|
+
# encoding: ASCII-8BIT
|
2
|
+
|
3
|
+
module FilesHunter
|
4
|
+
|
5
|
+
module Decoders
|
6
|
+
|
7
|
+
# WAV files can contain MP3 files
|
8
|
+
|
9
|
+
class RIFF < BeginPatternDecoder
|
10
|
+
|
11
|
+
# Reference: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/RIFF.html
|
12
|
+
# Reference: http://msdn.microsoft.com/en-us/library/windows/desktop/dd318189%28v=vs.85%29.aspx
|
13
|
+
# Reference: http://www.the-labs.com/Video/odmlff2-avidef.pdf
|
14
|
+
|
15
|
+
BEGIN_PATTERN_RIFF = 'RIFF'
|
16
|
+
BEGIN_PATTERN_RIFX = 'RIFX'
|
17
|
+
BEGIN_PATTERN_JUNK = 'JUNK'
|
18
|
+
BEGIN_PATTERN_FILE = Regexp.new("RIF(F|X)", nil, 'n')
|
19
|
+
|
20
|
+
# INFO elements
|
21
|
+
INFO_ELEMENTS_ID = {
|
22
|
+
'AGES' => :Rated,
|
23
|
+
'CMNT' => :Comment,
|
24
|
+
'CODE' => :EncodedBy,
|
25
|
+
'COMM' => :Comments,
|
26
|
+
'DIRC' => :Directory,
|
27
|
+
'DISP' => :SoundSchemeTitle,
|
28
|
+
'DTIM' => :DateTimeOriginal,
|
29
|
+
'GENR' => :Genre,
|
30
|
+
'IARL' => :ArchivalLocation,
|
31
|
+
'IART' => :Artist,
|
32
|
+
'IAS1' => :FirstLanguage,
|
33
|
+
'IAS2' => :SecondLanguage,
|
34
|
+
'IAS3' => :ThirdLanguage,
|
35
|
+
'IAS4' => :FourthLanguage,
|
36
|
+
'IAS5' => :FifthLanguage,
|
37
|
+
'IAS6' => :SixthLanguage,
|
38
|
+
'IAS7' => :SeventhLanguage,
|
39
|
+
'IAS8' => :EighthLanguage,
|
40
|
+
'IAS9' => :NinthLanguage,
|
41
|
+
'IBSU' => :BaseURL,
|
42
|
+
'ICAS' => :DefaultAudioStream,
|
43
|
+
'ICDS' => :CostumeDesigner,
|
44
|
+
'ICMS' => :Commissioned,
|
45
|
+
'ICMT' => :Comment,
|
46
|
+
'ICNM' => :Cinematographer,
|
47
|
+
'ICNT' => :Country,
|
48
|
+
'ICOP' => :Copyright,
|
49
|
+
'ICRD' => :DateCreated,
|
50
|
+
'ICRP' => :Cropped,
|
51
|
+
'IDIM' => :Dimensions,
|
52
|
+
'IDPI' => :DotsPerInch,
|
53
|
+
'IDST' => :DistributedBy,
|
54
|
+
'IEDT' => :EditedBy,
|
55
|
+
'IENC' => :EncodedBy,
|
56
|
+
'IENG' => :Engineer,
|
57
|
+
'IGNR' => :Genre,
|
58
|
+
'IKEY' => :Keywords,
|
59
|
+
'ILGT' => :Lightness,
|
60
|
+
'ILGU' => :LogoURL,
|
61
|
+
'ILIU' => :LogoIconURL,
|
62
|
+
'ILNG' => :Language,
|
63
|
+
'IMBI' => :MoreInfoBannerImage,
|
64
|
+
'IMBU' => :MoreInfoBannerURL,
|
65
|
+
'IMED' => :Medium,
|
66
|
+
'IMIT' => :MoreInfoText,
|
67
|
+
'IMIU' => :MoreInfoURL,
|
68
|
+
'IMUS' => :MusicBy,
|
69
|
+
'INAM' => :Title,
|
70
|
+
'IPDS' => :ProductionDesigner,
|
71
|
+
'IPLT' => :NumColors,
|
72
|
+
'IPRD' => :Product,
|
73
|
+
'IPRO' => :ProducedBy,
|
74
|
+
'IRIP' => :RippedBy,
|
75
|
+
'IRTD' => :Rating,
|
76
|
+
'ISBJ' => :Subject,
|
77
|
+
'ISFT' => :Software,
|
78
|
+
'ISGN' => :SecondaryGenre,
|
79
|
+
'ISHP' => :Sharpness,
|
80
|
+
'ISRC' => :Source,
|
81
|
+
'ISRF' => :SourceForm,
|
82
|
+
'ISTD' => :ProductionStudio,
|
83
|
+
'ISTR' => :Starring,
|
84
|
+
'ITCH' => :Technician,
|
85
|
+
'IWMU' => :WatermarkURL,
|
86
|
+
'IWRI' => :WrittenBy,
|
87
|
+
'LANG' => :Language,
|
88
|
+
'LOCA' => :Location,
|
89
|
+
'PRT1' => :Part,
|
90
|
+
'PRT2' => :NumberOfParts,
|
91
|
+
'RATE' => :Rate,
|
92
|
+
'STAR' => :Starring,
|
93
|
+
'STAT' => :Statistics,
|
94
|
+
'TAPE' => :TapeName,
|
95
|
+
'TCDO' => :EndTimecode,
|
96
|
+
'TCOD' => :StartTimecode,
|
97
|
+
'TITL' => :Title,
|
98
|
+
'TLEN' => :Length,
|
99
|
+
'TORG' => :Organization,
|
100
|
+
'TRCK' => :TrackNumber,
|
101
|
+
'TURL' => :URL,
|
102
|
+
'TVER' => :Version,
|
103
|
+
'VMAJ' => :VegasVersionMajor,
|
104
|
+
'VMIN' => :VegasVersionMinor,
|
105
|
+
'YEAR' => :Year,
|
106
|
+
# Exif tags
|
107
|
+
'ecor' => :Make,
|
108
|
+
'emdl' => :Model,
|
109
|
+
'emnt' => :MakerNotes,
|
110
|
+
'erel' => :RelatedImageFile,
|
111
|
+
'etim' => :TimeCreated,
|
112
|
+
'eucm' => :UserComment,
|
113
|
+
'ever' => :ExifVersion
|
114
|
+
}
|
115
|
+
|
116
|
+
# Wave elements
|
117
|
+
ELEMENT_ID_WAVE = 'WAVE'
|
118
|
+
ELEMENT_ID_FORMAT = 'fmt '
|
119
|
+
ELEMENT_ID_DATA = 'data'
|
120
|
+
ELEMENT_ID_FACT = 'fact'
|
121
|
+
|
122
|
+
# AVI elements
|
123
|
+
ELEMENT_ID_AVI = 'AVI '
|
124
|
+
ELEMENT_ID_STRH = 'strh'
|
125
|
+
ELEMENT_ID_STRF = 'strf'
|
126
|
+
ELEMENT_ID_MOVI = 'movi'
|
127
|
+
ELEMENT_ID_IDX1 = 'idx1'
|
128
|
+
ELEMENT_ID_DMLH = 'dmlh'
|
129
|
+
ELEMENT_ID_IDIT = 'IDIT'
|
130
|
+
HDLR_ELEMENTS = {
|
131
|
+
ELEMENT_ID_IDIT => nil,
|
132
|
+
'ISMP' => nil,
|
133
|
+
'avih' => nil
|
134
|
+
}
|
135
|
+
STREAM_ELEMENTS = {
|
136
|
+
'strd' => nil,
|
137
|
+
'strf' => nil,
|
138
|
+
'strh' => nil,
|
139
|
+
'strn' => nil,
|
140
|
+
'indx' => nil
|
141
|
+
}
|
142
|
+
ODML_IDX_ELEMENTS = {}
|
143
|
+
100.times do |idx|
|
144
|
+
ODML_IDX_ELEMENTS[sprintf('ix%.2d', idx)] = nil
|
145
|
+
end
|
146
|
+
|
147
|
+
# ANI elements
|
148
|
+
ELEMENT_ID_ANI = 'ACON'
|
149
|
+
ELEMENT_ID_ANIH = 'anih'
|
150
|
+
ELEMENT_ID_ICON = 'icon'
|
151
|
+
ELEMENT_ID_SEQ = 'seq '
|
152
|
+
ELEMENT_ID_RATE = 'rate'
|
153
|
+
|
154
|
+
RIFF_INFO_ELEMENTS = {}
|
155
|
+
INFO_ELEMENTS_ID.keys.each do |info_element_id|
|
156
|
+
RIFF_INFO_ELEMENTS[info_element_id] = nil
|
157
|
+
end
|
158
|
+
ELEMENT_ID_LIST = 'LIST'
|
159
|
+
RIFF_GENERIC_ELEMENTS = {
|
160
|
+
BEGIN_PATTERN_JUNK => nil,
|
161
|
+
ELEMENT_ID_LIST => {
|
162
|
+
'INFO' => RIFF_INFO_ELEMENTS,
|
163
|
+
# AVI elements
|
164
|
+
'hdrl' => HDLR_ELEMENTS,
|
165
|
+
'strl' => STREAM_ELEMENTS,
|
166
|
+
ELEMENT_ID_MOVI => nil,
|
167
|
+
'ncdt' => { :element_info => { :ignore_unknown_elements => true } },
|
168
|
+
'odml' => {
|
169
|
+
ELEMENT_ID_DMLH => nil
|
170
|
+
},
|
171
|
+
# ANI elements
|
172
|
+
'fram' => {
|
173
|
+
ELEMENT_ID_ICON => nil
|
174
|
+
}
|
175
|
+
}.merge(ODML_IDX_ELEMENTS)
|
176
|
+
}
|
177
|
+
|
178
|
+
RIFF_ROOT_ELEMENTS = {
|
179
|
+
# Wave elements
|
180
|
+
ELEMENT_ID_WAVE => {
|
181
|
+
ELEMENT_ID_FORMAT => nil,
|
182
|
+
ELEMENT_ID_DATA => nil,
|
183
|
+
ELEMENT_ID_FACT => nil
|
184
|
+
},
|
185
|
+
# AVI elements
|
186
|
+
ELEMENT_ID_AVI => nil,
|
187
|
+
ELEMENT_ID_IDX1 => nil,
|
188
|
+
# ANI elements
|
189
|
+
ELEMENT_ID_ANI => {
|
190
|
+
ELEMENT_ID_ANIH => nil,
|
191
|
+
ELEMENT_ID_SEQ => nil,
|
192
|
+
ELEMENT_ID_RATE => nil
|
193
|
+
}
|
194
|
+
}
|
195
|
+
RIFF_ELEMENTS = {
|
196
|
+
BEGIN_PATTERN_RIFF => RIFF_ROOT_ELEMENTS,
|
197
|
+
BEGIN_PATTERN_RIFX => RIFF_ROOT_ELEMENTS
|
198
|
+
}
|
199
|
+
|
200
|
+
RIFF_ELEMENTS_WITH_SIZE = [
|
201
|
+
BEGIN_PATTERN_RIFF,
|
202
|
+
BEGIN_PATTERN_RIFX,
|
203
|
+
BEGIN_PATTERN_JUNK,
|
204
|
+
ELEMENT_ID_LIST,
|
205
|
+
# WAVE elements
|
206
|
+
ELEMENT_ID_FORMAT,
|
207
|
+
ELEMENT_ID_DATA,
|
208
|
+
ELEMENT_ID_FACT,
|
209
|
+
# AVI elements
|
210
|
+
ELEMENT_ID_IDX1,
|
211
|
+
ELEMENT_ID_DMLH,
|
212
|
+
# ANI elements
|
213
|
+
ELEMENT_ID_ANIH,
|
214
|
+
ELEMENT_ID_ICON,
|
215
|
+
ELEMENT_ID_SEQ,
|
216
|
+
ELEMENT_ID_RATE
|
217
|
+
] +
|
218
|
+
RIFF_INFO_ELEMENTS.keys +
|
219
|
+
HDLR_ELEMENTS.keys +
|
220
|
+
STREAM_ELEMENTS.keys +
|
221
|
+
ODML_IDX_ELEMENTS.keys
|
222
|
+
|
223
|
+
AVI_STREAM_TYPES = [ 'db', 'dc', 'pc', 'wb' ]
|
224
|
+
|
225
|
+
TRAILING_00_REGEXP = Regexp.new("\x00*$".force_encoding(Encoding::ASCII_8BIT), nil, 'n')
|
226
|
+
|
227
|
+
def get_begin_pattern
|
228
|
+
return BEGIN_PATTERN_FILE, { :offset_inc => 4, :max_regexp_size => 4 }
|
229
|
+
end
|
230
|
+
|
231
|
+
def decode(offset)
|
232
|
+
ending_offset = nil
|
233
|
+
|
234
|
+
# Check endianness
|
235
|
+
name = @data[offset..offset+3]
|
236
|
+
@bindata_16 = BinData::Uint16le
|
237
|
+
@bindata_32 = BinData::Uint32le
|
238
|
+
if (name == BEGIN_PATTERN_RIFX)
|
239
|
+
@bindata_16 = BinData::Uint16be
|
240
|
+
@bindata_32 = BinData::Uint32be
|
241
|
+
end
|
242
|
+
|
243
|
+
# Parse RIFF
|
244
|
+
found_RIFF = false
|
245
|
+
found_WAVE_data = false
|
246
|
+
found_AVI_data = false
|
247
|
+
extension = nil
|
248
|
+
cursor, nbr_elements = parse_riff_element(offset, RIFF_ELEMENTS) do |element_hierarchy, element_cursor, size, container_end_offset|
|
249
|
+
element_name = element_hierarchy[-1]
|
250
|
+
if ((element_name == BEGIN_PATTERN_RIFF) or
|
251
|
+
(element_name == BEGIN_PATTERN_RIFX))
|
252
|
+
# Check we are not getting on a second RIFF file
|
253
|
+
if found_RIFF
|
254
|
+
ending_offset = element_cursor - 8
|
255
|
+
next nil
|
256
|
+
end
|
257
|
+
found_RIFF = true
|
258
|
+
elsif (INFO_ELEMENTS_ID[element_name] != nil)
|
259
|
+
# Standard info
|
260
|
+
metadata( INFO_ELEMENTS_ID[element_name] => read_ascii(element_cursor, size) )
|
261
|
+
else
|
262
|
+
# Special cases
|
263
|
+
case element_name
|
264
|
+
|
265
|
+
# Wave elements
|
266
|
+
when ELEMENT_ID_WAVE
|
267
|
+
extension = :wav
|
268
|
+
found_relevant_data(extension)
|
269
|
+
when ELEMENT_ID_FORMAT
|
270
|
+
invalid_data("@#{cursor} - Wave file having an invalid fmt size: #{size}") if (size < 16)
|
271
|
+
# Decode header
|
272
|
+
audio_format = @bindata_16.read(@data[element_cursor..element_cursor+1])
|
273
|
+
num_channels = @bindata_16.read(@data[element_cursor+2..element_cursor+3])
|
274
|
+
sample_rate = @bindata_32.read(@data[element_cursor+4..element_cursor+7])
|
275
|
+
byte_rate = @bindata_32.read(@data[element_cursor+8..element_cursor+11])
|
276
|
+
block_align = @bindata_16.read(@data[element_cursor+12..element_cursor+13])
|
277
|
+
bits_per_sample = @bindata_16.read(@data[element_cursor+14..element_cursor+15])
|
278
|
+
metadata(
|
279
|
+
:audio_format => audio_format,
|
280
|
+
:num_channels => num_channels,
|
281
|
+
:sample_rate => sample_rate,
|
282
|
+
:byte_rate => byte_rate,
|
283
|
+
:block_align => block_align,
|
284
|
+
:bits_per_sample => bits_per_sample
|
285
|
+
)
|
286
|
+
when ELEMENT_ID_DATA
|
287
|
+
found_WAVE_data = true
|
288
|
+
|
289
|
+
# AVI elements
|
290
|
+
when ELEMENT_ID_AVI
|
291
|
+
extension = :avi
|
292
|
+
found_relevant_data(:avi)
|
293
|
+
when ELEMENT_ID_MOVI
|
294
|
+
# Parse the following RIFF tags manually
|
295
|
+
cursor = element_cursor
|
296
|
+
stream_id = @data[cursor..cursor+1]
|
297
|
+
stream_type = @data[cursor+2..cursor+3]
|
298
|
+
while ((cursor < container_end_offset) and
|
299
|
+
(stream_id.match(/^\d\d$/) != nil) and
|
300
|
+
(AVI_STREAM_TYPES.include?(stream_type)))
|
301
|
+
# Read size
|
302
|
+
stream_size = @bindata_32.read(@data[cursor+4..cursor+7])
|
303
|
+
stream_size += 1 if stream_size.odd?
|
304
|
+
log_debug "@#{cursor} - Found AVI stream #{stream_id}#{stream_type} of size #{stream_size}"
|
305
|
+
cursor += 8 + stream_size
|
306
|
+
stream_id = @data[cursor..cursor+1]
|
307
|
+
stream_type = @data[cursor+2..cursor+3]
|
308
|
+
end
|
309
|
+
found_AVI_data = true
|
310
|
+
next cursor
|
311
|
+
when ELEMENT_ID_IDIT
|
312
|
+
metadata( :date_time_original => read_ascii(element_cursor, size) )
|
313
|
+
|
314
|
+
# ANI elements
|
315
|
+
when ELEMENT_ID_ANI
|
316
|
+
extension = :ani
|
317
|
+
found_relevant_data(:ani)
|
318
|
+
|
319
|
+
end
|
320
|
+
|
321
|
+
end
|
322
|
+
|
323
|
+
# By default: no data
|
324
|
+
next element_cursor
|
325
|
+
end
|
326
|
+
metadata( :nbr_elements => nbr_elements )
|
327
|
+
invalid_data("@#{cursor} - Missing WAVE data.") if ((extension == :wav) and (!found_WAVE_data))
|
328
|
+
invalid_data("@#{cursor} - Missing AVI data.") if ((extension == :avi) and (!found_AVI_data))
|
329
|
+
ending_offset = cursor if (ending_offset == nil)
|
330
|
+
|
331
|
+
return ending_offset
|
332
|
+
end
|
333
|
+
|
334
|
+
private
|
335
|
+
|
336
|
+
# Parse a RIFF element, calling a callback for each sub-element read (recursively)
|
337
|
+
#
|
338
|
+
# Parameters::
|
339
|
+
# * *cursor* (_Fixnum_): Current parsing cursor
|
340
|
+
# * *element_names* (<em>map<String,Object></em>): Possible element names, with their possible sub-elements (or nil if none).
|
341
|
+
# * *hierarchy* (<em>list<String></em>): The hierarchy of element names leading to this element [default = []]
|
342
|
+
# * *max_cursor* (_Fixnum_): Maximal cursor for the element. This is set using the size of the element containing the ones being parsed. Can be nil if unknown. [default = nil]
|
343
|
+
# * *&proc* (_Proc_): Code block called for each box encountered.
|
344
|
+
# * Parameters::
|
345
|
+
# * *element_hierarchy* (<em>list<String></em>): Complete element names hierarchy leading to this element
|
346
|
+
# * *element_cursor* (_Fixnum_): Cursor of the beginning of this element data
|
347
|
+
# * *element_size* (_Fixnum_): Size of this element data
|
348
|
+
# * *container_end_offset* (_Fixnum_): End offset of this element's container
|
349
|
+
# * Result::
|
350
|
+
# * _Fixnum_: The cursor ending parsing this element, or nil to stop the parsing
|
351
|
+
# Result::
|
352
|
+
# * _Fixnum_: The new cursor after having parsed this element, or nil to stop the parsing
|
353
|
+
# * _Fixnum_: The number of elements parsed
|
354
|
+
def parse_riff_element(cursor, element_names, hierarchy = [], max_cursor = nil, &proc)
|
355
|
+
nbr_elements = 0
|
356
|
+
nbr_direct_subelements = 0
|
357
|
+
container_element_max_cursor = ((max_cursor == nil) ? @end_offset : max_cursor)
|
358
|
+
# Compute the map of possible element names
|
359
|
+
complete_element_names = element_names.merge(RIFF_GENERIC_ELEMENTS)
|
360
|
+
ignore_unknown_elements = ((element_names[:element_info] != nil) and (element_names[:element_info][:ignore_unknown_elements] = true))
|
361
|
+
while (cursor < container_element_max_cursor)
|
362
|
+
name = @data[cursor..cursor+3]
|
363
|
+
# Check the validity of the element
|
364
|
+
if ((!ignore_unknown_elements) and
|
365
|
+
(!complete_element_names.has_key?(name)))
|
366
|
+
log_debug "@#{cursor} - Invalid element name: #{name.inspect} within #{hierarchy.join('/')}. Known ones are: #{complete_element_names.keys.join(', ')}."
|
367
|
+
if (max_cursor == nil)
|
368
|
+
# We consider the file is finished, as the element being parsed is the root one.
|
369
|
+
return cursor, nbr_elements
|
370
|
+
else
|
371
|
+
truncated_data("@#{cursor} - No valid element found, but container element has not been parsed completely.")
|
372
|
+
end
|
373
|
+
end
|
374
|
+
# If there is a size, read it
|
375
|
+
# Consider that if we ignore unknown elements they all HAVE a size
|
376
|
+
size = nil
|
377
|
+
if (ignore_unknown_elements or
|
378
|
+
(RIFF_ELEMENTS_WITH_SIZE.include?(name)))
|
379
|
+
size = @bindata_32.read(@data[cursor+4..cursor+7])
|
380
|
+
size += 1 if size.odd?
|
381
|
+
end
|
382
|
+
# This element is valid
|
383
|
+
nbr_elements += 1
|
384
|
+
nbr_direct_subelements += 1
|
385
|
+
element_hierarchy = hierarchy + [name]
|
386
|
+
cursor += 4
|
387
|
+
cursor += 4 if (size != nil)
|
388
|
+
element_cursor = cursor
|
389
|
+
log_debug "@#{cursor} - Found element #{element_hierarchy.join('/')} of size #{size} - Data: #{@data[element_cursor..element_cursor+(((size != nil) and (size < 32)) ? size : ((@end_offset-element_cursor < 32) ? @end_offset-element_cursor : 32))-1].inspect}"
|
390
|
+
# Parse this element's data
|
391
|
+
element_cursor_end = yield(element_hierarchy, element_cursor, size, container_element_max_cursor)
|
392
|
+
if (element_cursor_end == nil)
|
393
|
+
cursor = nil
|
394
|
+
break
|
395
|
+
end
|
396
|
+
invalid_data("@#{cursor} - Element parsing exceeded its element's size (#{element_cursor_end} > #{element_cursor + size})") if ((size != nil) and (element_cursor_end > element_cursor + size))
|
397
|
+
if (max_cursor == nil)
|
398
|
+
# For root elements, this error is synonym of truncated data as container_element_max_cursor is set arbitrarily to @end_offset
|
399
|
+
truncated_data("@#{cursor} - Element parsing exceeded its container limit (#{element_cursor_end} > #{container_element_max_cursor})", container_element_max_cursor) if (element_cursor_end > container_element_max_cursor)
|
400
|
+
else
|
401
|
+
invalid_data("@#{cursor} - Element parsing exceeded its container limit (#{element_cursor_end} > #{container_element_max_cursor})") if (element_cursor_end > container_element_max_cursor)
|
402
|
+
end
|
403
|
+
cursor = element_cursor_end
|
404
|
+
if ((complete_element_names[name] != nil) and
|
405
|
+
(cursor < container_element_max_cursor))
|
406
|
+
# Now call sub-elements that should start at current cursor
|
407
|
+
new_cursor, nbr_subelements = parse_riff_element(cursor, complete_element_names[name], element_hierarchy, (size == nil) ? container_element_max_cursor : element_cursor + size, &proc)
|
408
|
+
nbr_elements += nbr_subelements
|
409
|
+
cursor = new_cursor
|
410
|
+
break if (new_cursor == nil)
|
411
|
+
# Check cursor is at the correct position
|
412
|
+
invalid_data("@#{cursor} - Element parsing should have stopped at #{element_cursor + size} but is instead at #{cursor}") if ((size != nil) and (cursor != element_cursor + size))
|
413
|
+
end
|
414
|
+
invalid_data("@#{cursor} - Element #{element_hierarchy.join('/')} with size #{size} finishes at cursor #{element_cursor + size}, but container element set maximal cursor to #{container_element_max_cursor}.") if ((size != nil) and (element_cursor + size > container_element_max_cursor))
|
415
|
+
cursor = element_cursor + size if (size != nil)
|
416
|
+
progress(cursor)
|
417
|
+
end
|
418
|
+
|
419
|
+
return cursor, nbr_elements
|
420
|
+
end
|
421
|
+
|
422
|
+
# Read an ASCII value
|
423
|
+
#
|
424
|
+
# Parameters::
|
425
|
+
# * *cursor* (_Fixnum_): The cursor to read from
|
426
|
+
# * *size* (_Fixnum_): Size of the string
|
427
|
+
# Result::
|
428
|
+
# * _String_ or <em>list<String></em>: Resulting string or list of strings if several.
|
429
|
+
def read_ascii(cursor, size)
|
430
|
+
return @data[cursor..cursor+size-1].gsub(TRAILING_00_REGEXP, '').strip
|
431
|
+
end
|
432
|
+
|
433
|
+
end
|
434
|
+
|
435
|
+
end
|
436
|
+
|
437
|
+
end
|