fileshunter 0.1.0.20130725
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS +3 -0
- data/ChangeLog +5 -0
- data/Credits +21 -0
- data/LICENSE +31 -0
- data/README +15 -0
- data/README.md +11 -0
- data/Rakefile +7 -0
- data/ReleaseInfo +8 -0
- data/bin/fileshunt +216 -0
- data/ext/fileshunter/Decoders/_FLAC.c +233 -0
- data/ext/fileshunter/Decoders/extconf.rb +3 -0
- data/lib/fileshunter/BeginPatternDecoder.rb +218 -0
- data/lib/fileshunter/Decoder.rb +66 -0
- data/lib/fileshunter/Decoders/ASF.rb +50 -0
- data/lib/fileshunter/Decoders/BMP.rb +118 -0
- data/lib/fileshunter/Decoders/CAB.rb +140 -0
- data/lib/fileshunter/Decoders/CFBF.rb +92 -0
- data/lib/fileshunter/Decoders/EBML.rb +369 -0
- data/lib/fileshunter/Decoders/EXE.rb +505 -0
- data/lib/fileshunter/Decoders/FLAC.rb +387 -0
- data/lib/fileshunter/Decoders/ICO.rb +71 -0
- data/lib/fileshunter/Decoders/JPEG.rb +247 -0
- data/lib/fileshunter/Decoders/M2V.rb +30 -0
- data/lib/fileshunter/Decoders/MP3.rb +341 -0
- data/lib/fileshunter/Decoders/MP4.rb +620 -0
- data/lib/fileshunter/Decoders/MPG_Video.rb +30 -0
- data/lib/fileshunter/Decoders/OGG.rb +74 -0
- data/lib/fileshunter/Decoders/RIFF.rb +437 -0
- data/lib/fileshunter/Decoders/TIFF.rb +350 -0
- data/lib/fileshunter/Decoders/Text.rb +240 -0
- data/lib/fileshunter/Segment.rb +50 -0
- data/lib/fileshunter/SegmentsAnalyzer.rb +251 -0
- data/lib/fileshunter.rb +15 -0
- metadata +130 -0
@@ -0,0 +1,369 @@
|
|
1
|
+
module FilesHunter
|
2
|
+
|
3
|
+
module Decoders
|
4
|
+
|
5
|
+
class EBML < BeginPatternDecoder
|
6
|
+
|
7
|
+
BEGIN_PATTERN_MKV = "\x1A\x45\xDF\xA3".force_encoding(Encoding::ASCII_8BIT)
|
8
|
+
DOCTYPE_ID_INT = 642
|
9
|
+
SEGMENT_MATROSKA_ID = "\x18\x53\x80\x67".force_encoding(Encoding::ASCII_8BIT)
|
10
|
+
ACCEPTABLE_DOCTYPES = {
|
11
|
+
'matroska' => :mkv,
|
12
|
+
'webm' => :webm
|
13
|
+
}
|
14
|
+
|
15
|
+
# List of possible elements, sorted by size.
|
16
|
+
# Taken from http://matroska.svn.sourceforge.net/viewvc/matroska/trunk/foundation_src/spectool/specdata.xml?view=markup
|
17
|
+
VALID_ELEMENT_IDS = {
|
18
|
+
1 => [
|
19
|
+
"\x80".force_encoding(Encoding::ASCII_8BIT),
|
20
|
+
"\x83".force_encoding(Encoding::ASCII_8BIT),
|
21
|
+
"\x85".force_encoding(Encoding::ASCII_8BIT),
|
22
|
+
"\x86".force_encoding(Encoding::ASCII_8BIT),
|
23
|
+
"\x88".force_encoding(Encoding::ASCII_8BIT),
|
24
|
+
"\x89".force_encoding(Encoding::ASCII_8BIT),
|
25
|
+
"\x8e".force_encoding(Encoding::ASCII_8BIT),
|
26
|
+
"\x8f".force_encoding(Encoding::ASCII_8BIT),
|
27
|
+
"\x91".force_encoding(Encoding::ASCII_8BIT),
|
28
|
+
"\x92".force_encoding(Encoding::ASCII_8BIT),
|
29
|
+
"\x96".force_encoding(Encoding::ASCII_8BIT),
|
30
|
+
"\x97".force_encoding(Encoding::ASCII_8BIT),
|
31
|
+
"\x98".force_encoding(Encoding::ASCII_8BIT),
|
32
|
+
"\x9a".force_encoding(Encoding::ASCII_8BIT),
|
33
|
+
"\x9b".force_encoding(Encoding::ASCII_8BIT),
|
34
|
+
"\x9c".force_encoding(Encoding::ASCII_8BIT),
|
35
|
+
"\x9f".force_encoding(Encoding::ASCII_8BIT),
|
36
|
+
"\xa0".force_encoding(Encoding::ASCII_8BIT),
|
37
|
+
"\xa1".force_encoding(Encoding::ASCII_8BIT),
|
38
|
+
"\xa2".force_encoding(Encoding::ASCII_8BIT),
|
39
|
+
"\xa3".force_encoding(Encoding::ASCII_8BIT),
|
40
|
+
"\xa4".force_encoding(Encoding::ASCII_8BIT),
|
41
|
+
"\xa5".force_encoding(Encoding::ASCII_8BIT),
|
42
|
+
"\xa6".force_encoding(Encoding::ASCII_8BIT),
|
43
|
+
"\xa7".force_encoding(Encoding::ASCII_8BIT),
|
44
|
+
"\xaa".force_encoding(Encoding::ASCII_8BIT),
|
45
|
+
"\xab".force_encoding(Encoding::ASCII_8BIT),
|
46
|
+
"\xae".force_encoding(Encoding::ASCII_8BIT),
|
47
|
+
"\xaf".force_encoding(Encoding::ASCII_8BIT),
|
48
|
+
"\xb0".force_encoding(Encoding::ASCII_8BIT),
|
49
|
+
"\xb2".force_encoding(Encoding::ASCII_8BIT),
|
50
|
+
"\xb3".force_encoding(Encoding::ASCII_8BIT),
|
51
|
+
"\xb5".force_encoding(Encoding::ASCII_8BIT),
|
52
|
+
"\xb6".force_encoding(Encoding::ASCII_8BIT),
|
53
|
+
"\xb7".force_encoding(Encoding::ASCII_8BIT),
|
54
|
+
"\xb9".force_encoding(Encoding::ASCII_8BIT),
|
55
|
+
"\xba".force_encoding(Encoding::ASCII_8BIT),
|
56
|
+
"\xbb".force_encoding(Encoding::ASCII_8BIT),
|
57
|
+
"\xbf".force_encoding(Encoding::ASCII_8BIT),
|
58
|
+
"\xc0".force_encoding(Encoding::ASCII_8BIT),
|
59
|
+
"\xc1".force_encoding(Encoding::ASCII_8BIT),
|
60
|
+
"\xc4".force_encoding(Encoding::ASCII_8BIT),
|
61
|
+
"\xc6".force_encoding(Encoding::ASCII_8BIT),
|
62
|
+
"\xc7".force_encoding(Encoding::ASCII_8BIT),
|
63
|
+
"\xc8".force_encoding(Encoding::ASCII_8BIT),
|
64
|
+
"\xc9".force_encoding(Encoding::ASCII_8BIT),
|
65
|
+
"\xca".force_encoding(Encoding::ASCII_8BIT),
|
66
|
+
"\xcb".force_encoding(Encoding::ASCII_8BIT),
|
67
|
+
"\xcc".force_encoding(Encoding::ASCII_8BIT),
|
68
|
+
"\xcd".force_encoding(Encoding::ASCII_8BIT),
|
69
|
+
"\xce".force_encoding(Encoding::ASCII_8BIT),
|
70
|
+
"\xcf".force_encoding(Encoding::ASCII_8BIT),
|
71
|
+
"\xd7".force_encoding(Encoding::ASCII_8BIT),
|
72
|
+
"\xdb".force_encoding(Encoding::ASCII_8BIT),
|
73
|
+
"\xe0".force_encoding(Encoding::ASCII_8BIT),
|
74
|
+
"\xe1".force_encoding(Encoding::ASCII_8BIT),
|
75
|
+
"\xe2".force_encoding(Encoding::ASCII_8BIT),
|
76
|
+
"\xe3".force_encoding(Encoding::ASCII_8BIT),
|
77
|
+
"\xe4".force_encoding(Encoding::ASCII_8BIT),
|
78
|
+
"\xe5".force_encoding(Encoding::ASCII_8BIT),
|
79
|
+
"\xe6".force_encoding(Encoding::ASCII_8BIT),
|
80
|
+
"\xe7".force_encoding(Encoding::ASCII_8BIT),
|
81
|
+
"\xe8".force_encoding(Encoding::ASCII_8BIT),
|
82
|
+
"\xe9".force_encoding(Encoding::ASCII_8BIT),
|
83
|
+
"\xea".force_encoding(Encoding::ASCII_8BIT),
|
84
|
+
"\xeb".force_encoding(Encoding::ASCII_8BIT),
|
85
|
+
"\xec".force_encoding(Encoding::ASCII_8BIT),
|
86
|
+
"\xed".force_encoding(Encoding::ASCII_8BIT),
|
87
|
+
"\xee".force_encoding(Encoding::ASCII_8BIT),
|
88
|
+
"\xf0".force_encoding(Encoding::ASCII_8BIT),
|
89
|
+
"\xf1".force_encoding(Encoding::ASCII_8BIT),
|
90
|
+
"\xf7".force_encoding(Encoding::ASCII_8BIT),
|
91
|
+
"\xfa".force_encoding(Encoding::ASCII_8BIT),
|
92
|
+
"\xfb".force_encoding(Encoding::ASCII_8BIT),
|
93
|
+
"\xfd".force_encoding(Encoding::ASCII_8BIT)
|
94
|
+
],
|
95
|
+
2 => [
|
96
|
+
"\x42\x54".force_encoding(Encoding::ASCII_8BIT),
|
97
|
+
"\x42\x55".force_encoding(Encoding::ASCII_8BIT),
|
98
|
+
"\x42\x82".force_encoding(Encoding::ASCII_8BIT),
|
99
|
+
"\x42\x85".force_encoding(Encoding::ASCII_8BIT),
|
100
|
+
"\x42\x86".force_encoding(Encoding::ASCII_8BIT),
|
101
|
+
"\x42\x87".force_encoding(Encoding::ASCII_8BIT),
|
102
|
+
"\x42\xf2".force_encoding(Encoding::ASCII_8BIT),
|
103
|
+
"\x42\xf3".force_encoding(Encoding::ASCII_8BIT),
|
104
|
+
"\x42\xf7".force_encoding(Encoding::ASCII_8BIT),
|
105
|
+
"\x43\x7c".force_encoding(Encoding::ASCII_8BIT),
|
106
|
+
"\x43\x7e".force_encoding(Encoding::ASCII_8BIT),
|
107
|
+
"\x44\x44".force_encoding(Encoding::ASCII_8BIT),
|
108
|
+
"\x44\x61".force_encoding(Encoding::ASCII_8BIT),
|
109
|
+
"\x44\x7a".force_encoding(Encoding::ASCII_8BIT),
|
110
|
+
"\x44\x84".force_encoding(Encoding::ASCII_8BIT),
|
111
|
+
"\x44\x85".force_encoding(Encoding::ASCII_8BIT),
|
112
|
+
"\x44\x87".force_encoding(Encoding::ASCII_8BIT),
|
113
|
+
"\x44\x89".force_encoding(Encoding::ASCII_8BIT),
|
114
|
+
"\x45\x0d".force_encoding(Encoding::ASCII_8BIT),
|
115
|
+
"\x45\x98".force_encoding(Encoding::ASCII_8BIT),
|
116
|
+
"\x45\xa3".force_encoding(Encoding::ASCII_8BIT),
|
117
|
+
"\x45\xb9".force_encoding(Encoding::ASCII_8BIT),
|
118
|
+
"\x45\xbc".force_encoding(Encoding::ASCII_8BIT),
|
119
|
+
"\x45\xbd".force_encoding(Encoding::ASCII_8BIT),
|
120
|
+
"\x45\xdb".force_encoding(Encoding::ASCII_8BIT),
|
121
|
+
"\x45\xdd".force_encoding(Encoding::ASCII_8BIT),
|
122
|
+
"\x46\x5c".force_encoding(Encoding::ASCII_8BIT),
|
123
|
+
"\x46\x60".force_encoding(Encoding::ASCII_8BIT),
|
124
|
+
"\x46\x61".force_encoding(Encoding::ASCII_8BIT),
|
125
|
+
"\x46\x62".force_encoding(Encoding::ASCII_8BIT),
|
126
|
+
"\x46\x6e".force_encoding(Encoding::ASCII_8BIT),
|
127
|
+
"\x46\x75".force_encoding(Encoding::ASCII_8BIT),
|
128
|
+
"\x46\x7e".force_encoding(Encoding::ASCII_8BIT),
|
129
|
+
"\x46\xae".force_encoding(Encoding::ASCII_8BIT),
|
130
|
+
"\x47\xe1".force_encoding(Encoding::ASCII_8BIT),
|
131
|
+
"\x47\xe2".force_encoding(Encoding::ASCII_8BIT),
|
132
|
+
"\x47\xe3".force_encoding(Encoding::ASCII_8BIT),
|
133
|
+
"\x47\xe4".force_encoding(Encoding::ASCII_8BIT),
|
134
|
+
"\x47\xe5".force_encoding(Encoding::ASCII_8BIT),
|
135
|
+
"\x47\xe6".force_encoding(Encoding::ASCII_8BIT),
|
136
|
+
"\x4d\x80".force_encoding(Encoding::ASCII_8BIT),
|
137
|
+
"\x4d\xbb".force_encoding(Encoding::ASCII_8BIT),
|
138
|
+
"\x50\x31".force_encoding(Encoding::ASCII_8BIT),
|
139
|
+
"\x50\x32".force_encoding(Encoding::ASCII_8BIT),
|
140
|
+
"\x50\x33".force_encoding(Encoding::ASCII_8BIT),
|
141
|
+
"\x50\x34".force_encoding(Encoding::ASCII_8BIT),
|
142
|
+
"\x50\x35".force_encoding(Encoding::ASCII_8BIT),
|
143
|
+
"\x53\x5f".force_encoding(Encoding::ASCII_8BIT),
|
144
|
+
"\x53\x6e".force_encoding(Encoding::ASCII_8BIT),
|
145
|
+
"\x53\x78".force_encoding(Encoding::ASCII_8BIT),
|
146
|
+
"\x53\x7f".force_encoding(Encoding::ASCII_8BIT),
|
147
|
+
"\x53\xab".force_encoding(Encoding::ASCII_8BIT),
|
148
|
+
"\x53\xac".force_encoding(Encoding::ASCII_8BIT),
|
149
|
+
"\x53\xb8".force_encoding(Encoding::ASCII_8BIT),
|
150
|
+
"\x53\xb9".force_encoding(Encoding::ASCII_8BIT),
|
151
|
+
"\x54\xaa".force_encoding(Encoding::ASCII_8BIT),
|
152
|
+
"\x54\xb0".force_encoding(Encoding::ASCII_8BIT),
|
153
|
+
"\x54\xb2".force_encoding(Encoding::ASCII_8BIT),
|
154
|
+
"\x54\xb3".force_encoding(Encoding::ASCII_8BIT),
|
155
|
+
"\x54\xba".force_encoding(Encoding::ASCII_8BIT),
|
156
|
+
"\x54\xbb".force_encoding(Encoding::ASCII_8BIT),
|
157
|
+
"\x54\xcc".force_encoding(Encoding::ASCII_8BIT),
|
158
|
+
"\x54\xdd".force_encoding(Encoding::ASCII_8BIT),
|
159
|
+
"\x55\xaa".force_encoding(Encoding::ASCII_8BIT),
|
160
|
+
"\x55\xee".force_encoding(Encoding::ASCII_8BIT),
|
161
|
+
"\x56\x54".force_encoding(Encoding::ASCII_8BIT),
|
162
|
+
"\x57\x41".force_encoding(Encoding::ASCII_8BIT),
|
163
|
+
"\x58\x54".force_encoding(Encoding::ASCII_8BIT),
|
164
|
+
"\x58\xd7".force_encoding(Encoding::ASCII_8BIT),
|
165
|
+
"\x61\xa7".force_encoding(Encoding::ASCII_8BIT),
|
166
|
+
"\x62\x40".force_encoding(Encoding::ASCII_8BIT),
|
167
|
+
"\x62\x64".force_encoding(Encoding::ASCII_8BIT),
|
168
|
+
"\x63\xa2".force_encoding(Encoding::ASCII_8BIT),
|
169
|
+
"\x63\xc0".force_encoding(Encoding::ASCII_8BIT),
|
170
|
+
"\x63\xc3".force_encoding(Encoding::ASCII_8BIT),
|
171
|
+
"\x63\xc4".force_encoding(Encoding::ASCII_8BIT),
|
172
|
+
"\x63\xc5".force_encoding(Encoding::ASCII_8BIT),
|
173
|
+
"\x63\xc6".force_encoding(Encoding::ASCII_8BIT),
|
174
|
+
"\x63\xc9".force_encoding(Encoding::ASCII_8BIT),
|
175
|
+
"\x63\xca".force_encoding(Encoding::ASCII_8BIT),
|
176
|
+
"\x65\x32".force_encoding(Encoding::ASCII_8BIT),
|
177
|
+
"\x66\x24".force_encoding(Encoding::ASCII_8BIT),
|
178
|
+
"\x66\xa5".force_encoding(Encoding::ASCII_8BIT),
|
179
|
+
"\x66\xbf".force_encoding(Encoding::ASCII_8BIT),
|
180
|
+
"\x66\xfc".force_encoding(Encoding::ASCII_8BIT),
|
181
|
+
"\x67\xc8".force_encoding(Encoding::ASCII_8BIT),
|
182
|
+
"\x68\xca".force_encoding(Encoding::ASCII_8BIT),
|
183
|
+
"\x69\x11".force_encoding(Encoding::ASCII_8BIT),
|
184
|
+
"\x69\x22".force_encoding(Encoding::ASCII_8BIT),
|
185
|
+
"\x69\x24".force_encoding(Encoding::ASCII_8BIT),
|
186
|
+
"\x69\x33".force_encoding(Encoding::ASCII_8BIT),
|
187
|
+
"\x69\x44".force_encoding(Encoding::ASCII_8BIT),
|
188
|
+
"\x69\x55".force_encoding(Encoding::ASCII_8BIT),
|
189
|
+
"\x69\xa5".force_encoding(Encoding::ASCII_8BIT),
|
190
|
+
"\x69\xbf".force_encoding(Encoding::ASCII_8BIT),
|
191
|
+
"\x69\xfc".force_encoding(Encoding::ASCII_8BIT),
|
192
|
+
"\x6d\x80".force_encoding(Encoding::ASCII_8BIT),
|
193
|
+
"\x6d\xe7".force_encoding(Encoding::ASCII_8BIT),
|
194
|
+
"\x6d\xf8".force_encoding(Encoding::ASCII_8BIT),
|
195
|
+
"\x6e\x67".force_encoding(Encoding::ASCII_8BIT),
|
196
|
+
"\x6e\xbc".force_encoding(Encoding::ASCII_8BIT),
|
197
|
+
"\x6f\xab".force_encoding(Encoding::ASCII_8BIT),
|
198
|
+
"\x73\x73".force_encoding(Encoding::ASCII_8BIT),
|
199
|
+
"\x73\x84".force_encoding(Encoding::ASCII_8BIT),
|
200
|
+
"\x73\xa4".force_encoding(Encoding::ASCII_8BIT),
|
201
|
+
"\x73\xc4".force_encoding(Encoding::ASCII_8BIT),
|
202
|
+
"\x73\xc5".force_encoding(Encoding::ASCII_8BIT),
|
203
|
+
"\x74\x46".force_encoding(Encoding::ASCII_8BIT),
|
204
|
+
"\x75\xa1".force_encoding(Encoding::ASCII_8BIT),
|
205
|
+
"\x78\xb5".force_encoding(Encoding::ASCII_8BIT),
|
206
|
+
"\x7b\xa9".force_encoding(Encoding::ASCII_8BIT),
|
207
|
+
"\x7d\x7b".force_encoding(Encoding::ASCII_8BIT),
|
208
|
+
"\x7e\x5b".force_encoding(Encoding::ASCII_8BIT),
|
209
|
+
"\x7e\x7b".force_encoding(Encoding::ASCII_8BIT),
|
210
|
+
"\x7e\x8a".force_encoding(Encoding::ASCII_8BIT),
|
211
|
+
"\x7e\x9a".force_encoding(Encoding::ASCII_8BIT),
|
212
|
+
"\x7e\xa5".force_encoding(Encoding::ASCII_8BIT),
|
213
|
+
"\x7e\xb5".force_encoding(Encoding::ASCII_8BIT)
|
214
|
+
],
|
215
|
+
3 => [
|
216
|
+
"\x22\xb5\x9c".force_encoding(Encoding::ASCII_8BIT),
|
217
|
+
"\x23\x31\x4f".force_encoding(Encoding::ASCII_8BIT),
|
218
|
+
"\x23\x83\xe3".force_encoding(Encoding::ASCII_8BIT),
|
219
|
+
"\x23\xe3\x83".force_encoding(Encoding::ASCII_8BIT),
|
220
|
+
"\x25\x86\x88".force_encoding(Encoding::ASCII_8BIT),
|
221
|
+
"\x26\xb2\x40".force_encoding(Encoding::ASCII_8BIT),
|
222
|
+
"\x2a\xd7\xb1".force_encoding(Encoding::ASCII_8BIT),
|
223
|
+
"\x2e\xb5\x24".force_encoding(Encoding::ASCII_8BIT),
|
224
|
+
"\x2f\xb5\x23".force_encoding(Encoding::ASCII_8BIT),
|
225
|
+
"\x3a\x96\x97".force_encoding(Encoding::ASCII_8BIT),
|
226
|
+
"\x3b\x40\x40".force_encoding(Encoding::ASCII_8BIT),
|
227
|
+
"\x3c\x83\xab".force_encoding(Encoding::ASCII_8BIT),
|
228
|
+
"\x3c\xb9\x23".force_encoding(Encoding::ASCII_8BIT),
|
229
|
+
"\x3e\x83\xbb".force_encoding(Encoding::ASCII_8BIT),
|
230
|
+
"\x3e\xb9\x23".force_encoding(Encoding::ASCII_8BIT)
|
231
|
+
],
|
232
|
+
4 => [
|
233
|
+
"\x10\x43\xa7\x70".force_encoding(Encoding::ASCII_8BIT),
|
234
|
+
"\x11\x4d\x9b\x74".force_encoding(Encoding::ASCII_8BIT),
|
235
|
+
"\x12\x54\xc3\x67".force_encoding(Encoding::ASCII_8BIT),
|
236
|
+
"\x15\x49\xa9\x66".force_encoding(Encoding::ASCII_8BIT),
|
237
|
+
"\x16\x54\xae\x6b".force_encoding(Encoding::ASCII_8BIT),
|
238
|
+
"\x18\x53\x80\x67".force_encoding(Encoding::ASCII_8BIT),
|
239
|
+
"\x19\x41\xa4\x69".force_encoding(Encoding::ASCII_8BIT),
|
240
|
+
"\x1a\x45\xdf\xa3".force_encoding(Encoding::ASCII_8BIT),
|
241
|
+
"\x1b\x53\x86\x67".force_encoding(Encoding::ASCII_8BIT),
|
242
|
+
"\x1c\x53\xbb\x6b".force_encoding(Encoding::ASCII_8BIT),
|
243
|
+
"\x1f\x43\xb6\x75".force_encoding(Encoding::ASCII_8BIT)
|
244
|
+
]
|
245
|
+
}
|
246
|
+
|
247
|
+
def get_begin_pattern
|
248
|
+
return BEGIN_PATTERN_MKV, { :offset_inc => 4 }
|
249
|
+
end
|
250
|
+
|
251
|
+
def decode(offset)
|
252
|
+
ending_offset = nil
|
253
|
+
|
254
|
+
cursor = offset
|
255
|
+
# Read the variable int for the header size
|
256
|
+
header_size, vint_size = decode_vint(@data[cursor+4..cursor+11])
|
257
|
+
cursor += 4 + vint_size
|
258
|
+
progress(cursor)
|
259
|
+
# Here we have header_size bytes for the header data.
|
260
|
+
# Get the DocType
|
261
|
+
max_header_cursor = cursor + header_size
|
262
|
+
doc_type = nil
|
263
|
+
while ((cursor < max_header_cursor) and (doc_type == nil))
|
264
|
+
log_debug "=== @#{cursor} - Inspecting #{@data[cursor..cursor+20].inspect}"
|
265
|
+
# Read next EBML segment
|
266
|
+
segment_id, vint_size = decode_vint(@data[cursor..cursor+7])
|
267
|
+
log_debug "=== @#{cursor} - Found ID #{segment_id}"
|
268
|
+
cursor += vint_size
|
269
|
+
# Read its size
|
270
|
+
segment_size, vint_size = decode_vint(@data[cursor..cursor+7])
|
271
|
+
cursor += vint_size
|
272
|
+
if (segment_id == DOCTYPE_ID_INT)
|
273
|
+
doc_type = @data[cursor..cursor+segment_size-1]
|
274
|
+
log_debug "=== @#{cursor} - Found DocType: #{doc_type.inspect}"
|
275
|
+
end
|
276
|
+
cursor += segment_size
|
277
|
+
progress(cursor)
|
278
|
+
end
|
279
|
+
invalid_data("@#{offset} - Unable to get the DocType from the EBML file") if (doc_type == nil)
|
280
|
+
extension = ACCEPTABLE_DOCTYPES[doc_type]
|
281
|
+
invalid_data("@#{offset} - Unknown DocType: #{doc_type}") if (extension == nil)
|
282
|
+
# Make sure we consumed the header completely
|
283
|
+
cursor = max_header_cursor
|
284
|
+
# Now read the segment
|
285
|
+
invalid_data("@#{cursor} - Invalid Segment ID") if (@data[cursor..cursor+3] != SEGMENT_MATROSKA_ID)
|
286
|
+
found_relevant_data(extension)
|
287
|
+
# Read segment size
|
288
|
+
segment_size, vint_size = decode_vint(@data[cursor+4..cursor+11])
|
289
|
+
log_debug "=== @#{cursor} - Found segment of size #{segment_size}"
|
290
|
+
cursor += 4 + vint_size
|
291
|
+
if (segment_size == 127)
|
292
|
+
# The size is unknown
|
293
|
+
# We have to make a deep decoding
|
294
|
+
while (ebml_id_size = decode_ebml_id(cursor))
|
295
|
+
# Read segment size
|
296
|
+
segment_size, vint_size = decode_vint(@data[cursor+ebml_id_size..cursor+ebml_id_size+7])
|
297
|
+
log_debug "=== @#{cursor} - Found segment #{segment_id} (size #{ebml_id_size}) of size #{segment_size} (size #{vint_size})"
|
298
|
+
cursor += ebml_id_size + vint_size
|
299
|
+
if (segment_size != 127)
|
300
|
+
cursor += segment_size
|
301
|
+
break if (cursor == @end_offset)
|
302
|
+
end
|
303
|
+
progress(cursor)
|
304
|
+
end
|
305
|
+
else
|
306
|
+
cursor += segment_size
|
307
|
+
end
|
308
|
+
progress(cursor)
|
309
|
+
ending_offset = cursor
|
310
|
+
|
311
|
+
return ending_offset
|
312
|
+
end
|
313
|
+
|
314
|
+
private
|
315
|
+
|
316
|
+
# Take the data (as a String) and read it as a variable size integer (return also the size)
|
317
|
+
#
|
318
|
+
# Parameters::
|
319
|
+
# * *data* (_String_): The data to decode
|
320
|
+
# Result::
|
321
|
+
# * _Fixnum_: The corresponding value
|
322
|
+
# * _Fixnum_: The size of the vint
|
323
|
+
def decode_vint(data)
|
324
|
+
value = 0
|
325
|
+
size = 1
|
326
|
+
|
327
|
+
bytes = data.bytes.to_a
|
328
|
+
# Size of the integer is defined in first byte only
|
329
|
+
first_byte = bytes.first
|
330
|
+
size = 1
|
331
|
+
while ((first_byte & (1 << (8-size))) == 0)
|
332
|
+
size += 1
|
333
|
+
invalid_data("Invalid variable int encoded: #{data}") if (size > 8)
|
334
|
+
end
|
335
|
+
# Replace first byte with its true value
|
336
|
+
bytes[0] = first_byte & ((1 << (8-size))-1)
|
337
|
+
# Read all
|
338
|
+
size.times do |idx|
|
339
|
+
value = (value << 8) + bytes[idx]
|
340
|
+
end
|
341
|
+
|
342
|
+
return value, size
|
343
|
+
end
|
344
|
+
|
345
|
+
# Decode an EBML ID
|
346
|
+
#
|
347
|
+
# Parameters::
|
348
|
+
# * *cursor* (_Fixnum_): The cursor
|
349
|
+
# Result::
|
350
|
+
# * _Fixnum_: Size of the decoded EBML ID, or false if not a valid EBML ID
|
351
|
+
def decode_ebml_id(cursor)
|
352
|
+
if (VALID_ELEMENT_IDS[1].include?(@data[cursor]))
|
353
|
+
return 1
|
354
|
+
elsif (VALID_ELEMENT_IDS[2].include?(@data[cursor..cursor+1]))
|
355
|
+
return 2
|
356
|
+
elsif (VALID_ELEMENT_IDS[3].include?(@data[cursor..cursor+2]))
|
357
|
+
return 3
|
358
|
+
elsif (VALID_ELEMENT_IDS[4].include?(@data[cursor..cursor+3]))
|
359
|
+
return 4
|
360
|
+
else
|
361
|
+
return false
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
end
|
366
|
+
|
367
|
+
end
|
368
|
+
|
369
|
+
end
|