format_parser 2.3.0 → 2.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +13 -6
- data/format_parser.gemspec +1 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/io_utils.rb +18 -33
- data/lib/parsers/cr3_parser/decoder.rb +2 -2
- data/lib/parsers/cr3_parser.rb +13 -11
- data/lib/parsers/heif_parser.rb +46 -46
- data/lib/parsers/iso_base_media_file_format/box.rb +80 -0
- data/lib/parsers/iso_base_media_file_format/decoder.rb +342 -376
- data/lib/parsers/iso_base_media_file_format/utils.rb +89 -0
- data/lib/parsers/mov_parser/decoder.rb +53 -0
- data/lib/parsers/mov_parser.rb +48 -0
- data/lib/parsers/mp4_parser.rb +80 -0
- data/lib/parsers/pdf_parser.rb +5 -2
- data/lib/parsers/webp_parser.rb +2 -2
- data/spec/format_parser_spec.rb +1 -1
- data/spec/parsers/cr3_parser_spec.rb +3 -3
- data/spec/parsers/iso_base_media_file_format/box_spec.rb +399 -0
- data/spec/parsers/iso_base_media_file_format/decoder_spec.rb +53 -178
- data/spec/parsers/iso_base_media_file_format/utils_spec.rb +632 -0
- data/spec/parsers/mov_parser_spec.rb +90 -0
- data/spec/parsers/mp4_parser_spec.rb +114 -0
- data/spec/parsers/pdf_parser_spec.rb +37 -23
- metadata +25 -5
- data/lib/parsers/moov_parser/decoder.rb +0 -353
- data/lib/parsers/moov_parser.rb +0 -165
- data/spec/parsers/moov_parser_spec.rb +0 -144
@@ -1,353 +0,0 @@
|
|
1
|
-
# Handles decoding of MOV/MPEG4 atoms/boxes in a stream. Will recursively
|
2
|
-
# read atoms and parse their data fields if applicable. Also contains
|
3
|
-
# a few utility functions for finding atoms in a list etc.
|
4
|
-
# To know more about Atoms: https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html
|
5
|
-
class FormatParser::MOOVParser::Decoder
|
6
|
-
include FormatParser::IOUtils
|
7
|
-
|
8
|
-
class Atom < Struct.new(:at, :atom_size, :atom_type, :path, :children, :atom_fields)
|
9
|
-
def to_s
|
10
|
-
'%s (%s): %d bytes at offset %d' % [atom_type, path.join('.'), atom_size, at]
|
11
|
-
end
|
12
|
-
|
13
|
-
def field_value(data_field)
|
14
|
-
(atom_fields || {}).fetch(data_field)
|
15
|
-
end
|
16
|
-
|
17
|
-
def as_json(*a)
|
18
|
-
members.each_with_object({}) do |member_name, o|
|
19
|
-
o[member_name] = public_send(member_name).as_json(*a)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
# Atoms (boxes) that are known to only contain children, no data fields.
|
25
|
-
# Avoid including udta or udta.meta here since we do not have methods
|
26
|
-
# for dealing with them yet.
|
27
|
-
KNOWN_BRANCH_ATOM_TYPES = %w(moov mdia trak clip edts minf dinf stbl)
|
28
|
-
|
29
|
-
# Mark that udta may contain both
|
30
|
-
KNOWN_BRANCH_AND_LEAF_ATOM_TYPES = [] # %w(udta) # the udta.meta thing used by iTunes
|
31
|
-
|
32
|
-
# Limit how many atoms we scan in sequence, to prevent derailments
|
33
|
-
MAX_ATOMS_AT_LEVEL = 128
|
34
|
-
|
35
|
-
# Finds the first atom in the given Array of Atom structs that
|
36
|
-
# matches the type, drilling down if a list of atom names is given
|
37
|
-
def find_first_atom_by_path(atoms, *atom_types)
|
38
|
-
type_to_find = atom_types.shift
|
39
|
-
requisite = atoms.find { |e| e.atom_type == type_to_find }
|
40
|
-
|
41
|
-
# Return if we found our match
|
42
|
-
return requisite if atom_types.empty?
|
43
|
-
|
44
|
-
# Return nil if we didn't find the match at this nesting level
|
45
|
-
return unless requisite
|
46
|
-
|
47
|
-
# ...otherwise drill further down
|
48
|
-
find_first_atom_by_path(requisite.children || [], *atom_types)
|
49
|
-
end
|
50
|
-
|
51
|
-
def find_atoms_by_path(atoms, atom_types)
|
52
|
-
type_to_find = atom_types.shift
|
53
|
-
requisites = atoms.select { |e| e.atom_type == type_to_find }
|
54
|
-
|
55
|
-
# Return if we found our match
|
56
|
-
return requisites if atom_types.empty?
|
57
|
-
|
58
|
-
# Return nil if we didn't find the match at this nesting level
|
59
|
-
return unless requisites
|
60
|
-
|
61
|
-
# ...otherwise drill further down
|
62
|
-
find_atoms_by_path(requisites.flat_map(&:children).compact || [], atom_types)
|
63
|
-
end
|
64
|
-
|
65
|
-
# A file can have multiple tracks. To identify the type it is necessary to check
|
66
|
-
# the fields `component_subtype` in hdlr atom under the trak atom
|
67
|
-
# More details in https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-DontLinkElementID_147
|
68
|
-
def find_video_trak_atom(atoms)
|
69
|
-
trak_atoms = find_atoms_by_path(atoms, ['moov', 'trak'])
|
70
|
-
|
71
|
-
return if trak_atoms.empty?
|
72
|
-
|
73
|
-
trak_atoms.find do |trak_atom|
|
74
|
-
hdlr_atom = find_first_atom_by_path([trak_atom], 'trak', 'mdia', 'hdlr')
|
75
|
-
next if hdlr_atom.nil?
|
76
|
-
hdlr_atom.atom_fields[:component_type] == 'mhlr' && hdlr_atom.atom_fields[:component_subtype] == 'vide'
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
def parse_ftyp_atom(io, atom_size)
|
81
|
-
# Subtract 8 for the atom_size+atom_type,
|
82
|
-
# and 8 once more for the major_brand and minor_version. The remaining
|
83
|
-
# numbr of bytes is reserved for the compatible brands, 4 bytes per
|
84
|
-
# brand.
|
85
|
-
num_brands = (atom_size - 8 - 8) / 4
|
86
|
-
{
|
87
|
-
major_brand: read_bytes(io, 4),
|
88
|
-
minor_version: read_binary_coded_decimal(io),
|
89
|
-
compatible_brands: (1..num_brands).map { read_bytes(io, 4) },
|
90
|
-
}
|
91
|
-
end
|
92
|
-
|
93
|
-
def parse_tkhd_atom(io, _)
|
94
|
-
version = read_byte_value(io)
|
95
|
-
is_v1 = version == 1
|
96
|
-
{
|
97
|
-
version: version,
|
98
|
-
flags: read_chars(io, 3),
|
99
|
-
ctime: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
100
|
-
mtime: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
101
|
-
trak_id: read_32bit_uint(io),
|
102
|
-
reserved_1: read_chars(io, 4),
|
103
|
-
duration: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
104
|
-
reserved_2: read_chars(io, 8),
|
105
|
-
layer: read_16bit_uint(io),
|
106
|
-
alternate_group: read_16bit_uint(io),
|
107
|
-
volume: read_16bit_uint(io),
|
108
|
-
reserved_3: read_chars(io, 2),
|
109
|
-
matrix_structure: (1..9).map { read_32bit_fixed_point(io) },
|
110
|
-
track_width: read_32bit_fixed_point(io),
|
111
|
-
track_height: read_32bit_fixed_point(io),
|
112
|
-
}
|
113
|
-
end
|
114
|
-
|
115
|
-
def parse_stts_atom(io, _)
|
116
|
-
version = read_byte_value(io)
|
117
|
-
is_v1 = version == 1
|
118
|
-
stts = {
|
119
|
-
version: version,
|
120
|
-
flags: read_bytes(io, 3),
|
121
|
-
number_of_entries: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
122
|
-
entries: []
|
123
|
-
}
|
124
|
-
stts[:number_of_entries].times {
|
125
|
-
stts[:entries] << {
|
126
|
-
sample_count: read_32bit_uint(io),
|
127
|
-
sample_duration: read_32bit_uint(io)
|
128
|
-
}
|
129
|
-
}
|
130
|
-
stts
|
131
|
-
end
|
132
|
-
|
133
|
-
def parse_stsd_atom(io, _)
|
134
|
-
version = read_byte_value(io)
|
135
|
-
is_v1 = version == 1
|
136
|
-
stsd = {
|
137
|
-
version: version,
|
138
|
-
flags: read_bytes(io, 3),
|
139
|
-
number_of_entries: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
140
|
-
codecs: []
|
141
|
-
}
|
142
|
-
stsd[:number_of_entries].times {
|
143
|
-
codec_length = read_32bit_uint(io)
|
144
|
-
stsd[:codecs] << read_bytes(io, 4)
|
145
|
-
io.seek(io.pos + codec_length - 8) # 8 bytes is the header length containing the codec length and the codec name that we just did read
|
146
|
-
}
|
147
|
-
stsd
|
148
|
-
end
|
149
|
-
|
150
|
-
def parse_mdhd_atom(io, _)
|
151
|
-
version = read_byte_value(io)
|
152
|
-
is_v1 = version == 1
|
153
|
-
{
|
154
|
-
version: version,
|
155
|
-
flags: read_bytes(io, 3),
|
156
|
-
ctime: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
157
|
-
mtime: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
158
|
-
tscale: read_32bit_uint(io),
|
159
|
-
duration: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
160
|
-
language: read_32bit_uint(io),
|
161
|
-
quality: read_32bit_uint(io),
|
162
|
-
}
|
163
|
-
end
|
164
|
-
|
165
|
-
def parse_vmhd_atom(io, _)
|
166
|
-
{
|
167
|
-
version: read_byte_value(io),
|
168
|
-
flags: read_bytes(io, 3),
|
169
|
-
graphics_mode: read_bytes(io, 2),
|
170
|
-
opcolor_r: read_32bit_uint(io),
|
171
|
-
opcolor_g: read_32bit_uint(io),
|
172
|
-
opcolor_b: read_32bit_uint(io),
|
173
|
-
}
|
174
|
-
end
|
175
|
-
|
176
|
-
def parse_mvhd_atom(io, _)
|
177
|
-
version = read_byte_value(io)
|
178
|
-
is_v1 = version == 1
|
179
|
-
{
|
180
|
-
version: version,
|
181
|
-
flags: read_bytes(io, 3),
|
182
|
-
ctime: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
183
|
-
mtime: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
184
|
-
tscale: read_32bit_uint(io),
|
185
|
-
duration: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
186
|
-
preferred_rate: read_32bit_uint(io),
|
187
|
-
reserved: read_bytes(io, 10),
|
188
|
-
matrix_structure: (1..9).map { read_32bit_fixed_point(io) },
|
189
|
-
preview_time: read_32bit_uint(io),
|
190
|
-
preview_duration: read_32bit_uint(io),
|
191
|
-
poster_time: read_32bit_uint(io),
|
192
|
-
selection_time: read_32bit_uint(io),
|
193
|
-
selection_duration: read_32bit_uint(io),
|
194
|
-
current_time: read_32bit_uint(io),
|
195
|
-
next_trak_id: read_32bit_uint(io),
|
196
|
-
}
|
197
|
-
end
|
198
|
-
|
199
|
-
def parse_dref_atom(io, _)
|
200
|
-
dict = {
|
201
|
-
version: read_byte_value(io),
|
202
|
-
flags: read_bytes(io, 3),
|
203
|
-
num_entries: read_32bit_uint(io),
|
204
|
-
}
|
205
|
-
num_entries = dict[:num_entries]
|
206
|
-
entries = (1..num_entries).map do
|
207
|
-
entry = {
|
208
|
-
size: read_32bit_uint(io),
|
209
|
-
type: read_bytes(io, 4),
|
210
|
-
version: read_bytes(io, 1),
|
211
|
-
flags: read_bytes(io, 3),
|
212
|
-
}
|
213
|
-
entry[:data] = read_bytes(io, entry[:size] - 12)
|
214
|
-
entry
|
215
|
-
end
|
216
|
-
dict[:entries] = entries
|
217
|
-
dict
|
218
|
-
end
|
219
|
-
|
220
|
-
def parse_elst_atom(io, _)
|
221
|
-
dict = {
|
222
|
-
version: read_byte_value(io),
|
223
|
-
flags: read_bytes(io, 3),
|
224
|
-
num_entries: read_32bit_uint(io),
|
225
|
-
}
|
226
|
-
is_v1 = dict[:version] == 1 # Usual is 0, version 1 has 64bit durations
|
227
|
-
num_entries = dict[:num_entries]
|
228
|
-
entries = (1..num_entries).map do
|
229
|
-
{
|
230
|
-
track_duration: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
231
|
-
media_time: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
232
|
-
media_rate: read_32bit_uint(io),
|
233
|
-
}
|
234
|
-
end
|
235
|
-
dict[:entries] = entries
|
236
|
-
dict
|
237
|
-
end
|
238
|
-
|
239
|
-
def parse_hdlr_atom(io, atom_size)
|
240
|
-
sub_io = StringIO.new(io.read(atom_size - 8))
|
241
|
-
version = read_byte_value(sub_io)
|
242
|
-
base_fields = {
|
243
|
-
version: version,
|
244
|
-
flags: read_bytes(sub_io, 3),
|
245
|
-
component_type: read_bytes(sub_io, 4),
|
246
|
-
component_subtype: read_bytes(sub_io, 4),
|
247
|
-
component_manufacturer: read_bytes(sub_io, 4),
|
248
|
-
}
|
249
|
-
if version == 1
|
250
|
-
version1_fields = {
|
251
|
-
component_flags: read_bytes(sub_io, 4),
|
252
|
-
component_flags_mask: read_bytes(sub_io, 4),
|
253
|
-
component_name: sub_io.read,
|
254
|
-
}
|
255
|
-
base_fields.merge(version1_fields)
|
256
|
-
else
|
257
|
-
base_fields
|
258
|
-
end
|
259
|
-
end
|
260
|
-
|
261
|
-
def parse_meta_atom(io, atom_size)
|
262
|
-
return if atom_size == 0 # this atom can be empty
|
263
|
-
|
264
|
-
parse_hdlr_atom(io, atom_size)
|
265
|
-
end
|
266
|
-
|
267
|
-
def parse_atom_fields_per_type(io, atom_size, atom_type)
|
268
|
-
if respond_to?("parse_#{atom_type}_atom", true)
|
269
|
-
send("parse_#{atom_type}_atom", io, atom_size)
|
270
|
-
else
|
271
|
-
nil # We can't look inside this leaf atom
|
272
|
-
end
|
273
|
-
end
|
274
|
-
|
275
|
-
def parse_atom_children_and_data_fields(io, atom_size_sans_header, atom_type, current_branch)
|
276
|
-
parse_atom_fields_per_type(io, atom_size_sans_header, atom_type)
|
277
|
-
extract_atom_stream(io, atom_size_sans_header, current_branch + [atom_type])
|
278
|
-
end
|
279
|
-
|
280
|
-
# Recursive descent parser - will drill down to atoms which
|
281
|
-
# we know are permitted to have leaf/branch atoms within itself,
|
282
|
-
# and will attempt to recover the data fields for leaf atoms
|
283
|
-
def extract_atom_stream(io, max_read, current_branch = [])
|
284
|
-
initial_pos = io.pos
|
285
|
-
atoms = []
|
286
|
-
MAX_ATOMS_AT_LEVEL.times do
|
287
|
-
atom_pos = io.pos
|
288
|
-
|
289
|
-
break if atom_pos - initial_pos >= max_read
|
290
|
-
|
291
|
-
size_and_type = io.read(4 + 4)
|
292
|
-
break if size_and_type.to_s.bytesize < 8
|
293
|
-
|
294
|
-
atom_size, atom_type = size_and_type.unpack('Na4')
|
295
|
-
|
296
|
-
# If atom_size is specified to be 1, it is larger than what fits into the
|
297
|
-
# 4 bytes and we need to read it right after the atom type
|
298
|
-
atom_size = read_64bit_uint(io) if atom_size == 1
|
299
|
-
atom_header_size = io.pos - atom_pos
|
300
|
-
atom_size_sans_header = atom_size - atom_header_size
|
301
|
-
|
302
|
-
children, fields = if KNOWN_BRANCH_AND_LEAF_ATOM_TYPES.include?(atom_type)
|
303
|
-
parse_atom_children_and_data_fields(io, atom_size_sans_header, atom_type, current_branch)
|
304
|
-
elsif KNOWN_BRANCH_ATOM_TYPES.include?(atom_type)
|
305
|
-
[extract_atom_stream(io, atom_size_sans_header, current_branch + [atom_type]), nil]
|
306
|
-
else # Assume leaf atom
|
307
|
-
[nil, parse_atom_fields_per_type(io, atom_size_sans_header, atom_type)]
|
308
|
-
end
|
309
|
-
|
310
|
-
atoms << Atom.new(atom_pos, atom_size, atom_type, current_branch + [atom_type], children, fields)
|
311
|
-
|
312
|
-
io.seek(atom_pos + atom_size)
|
313
|
-
end
|
314
|
-
atoms
|
315
|
-
end
|
316
|
-
|
317
|
-
def read_16bit_fixed_point(io)
|
318
|
-
_whole, _fraction = safe_read(io, 2).unpack('CC')
|
319
|
-
end
|
320
|
-
|
321
|
-
def read_32bit_fixed_point(io)
|
322
|
-
_whole, _fraction = safe_read(io, 4).unpack('nn')
|
323
|
-
end
|
324
|
-
|
325
|
-
def read_chars(io, n)
|
326
|
-
safe_read(io, n)
|
327
|
-
end
|
328
|
-
|
329
|
-
def read_byte_value(io)
|
330
|
-
safe_read(io, 1).unpack('C').first
|
331
|
-
end
|
332
|
-
|
333
|
-
def read_bytes(io, n)
|
334
|
-
safe_read(io, n)
|
335
|
-
end
|
336
|
-
|
337
|
-
def read_16bit_uint(io)
|
338
|
-
safe_read(io, 2).unpack('n').first
|
339
|
-
end
|
340
|
-
|
341
|
-
def read_32bit_uint(io)
|
342
|
-
safe_read(io, 4).unpack('N').first
|
343
|
-
end
|
344
|
-
|
345
|
-
def read_64bit_uint(io)
|
346
|
-
safe_read(io, 8).unpack('Q>').first
|
347
|
-
end
|
348
|
-
|
349
|
-
def read_binary_coded_decimal(io)
|
350
|
-
bcd_string = safe_read(io, 4)
|
351
|
-
[bcd_string].pack('H*').unpack('C*')
|
352
|
-
end
|
353
|
-
end
|
data/lib/parsers/moov_parser.rb
DELETED
@@ -1,165 +0,0 @@
|
|
1
|
-
class FormatParser::MOOVParser
|
2
|
-
include FormatParser::IOUtils
|
3
|
-
require_relative 'moov_parser/decoder'
|
4
|
-
|
5
|
-
# Maps values of the "ftyp" atom to something
|
6
|
-
# we can reasonably call "file type" (something
|
7
|
-
# usable as a filename extension)
|
8
|
-
FTYP_MAP = {
|
9
|
-
'm4a ' => :m4a,
|
10
|
-
'mp4 ' => :mp4,
|
11
|
-
'qt ' => :mov,
|
12
|
-
}
|
13
|
-
|
14
|
-
# https://tools.ietf.org/html/rfc4337#section-2
|
15
|
-
# There is also video/quicktime which we should be able to capture
|
16
|
-
# here, but there is currently no detection for MOVs versus MP4s
|
17
|
-
MP4_AU_MIME_TYPE = 'audio/mp4'
|
18
|
-
MP4_MIXED_MIME_TYPE = 'video/mp4'
|
19
|
-
|
20
|
-
def likely_match?(filename)
|
21
|
-
filename =~ /\.(m4a|m4v|ma4|mov|mp4)$/i
|
22
|
-
end
|
23
|
-
|
24
|
-
def call(io)
|
25
|
-
return unless matches_moov_definition?(io)
|
26
|
-
|
27
|
-
# Now we know we are in a MOOV, so go back and parse out the atom structure.
|
28
|
-
# Parsing out the atoms does not read their contents - at least it doesn't
|
29
|
-
# for the atoms we consider opaque (one of which is the "mdat" atom which
|
30
|
-
# will be the prevalent part of the file body). We do not parse these huge
|
31
|
-
# atoms - we skip over them and note where they are located.
|
32
|
-
io.seek(0)
|
33
|
-
|
34
|
-
# We have to tell the parser how far we are willing to go within the stream.
|
35
|
-
# Knowing that we will bail out early anyway we will permit a large read. The
|
36
|
-
# branch parse calls will know the maximum size to read from the parent atom
|
37
|
-
# size that gets parsed just before.
|
38
|
-
max_read_offset = 0xFFFFFFFF
|
39
|
-
decoder = Decoder.new
|
40
|
-
atom_tree = Measurometer.instrument('format_parser.decoder.extract_atom_stream') do
|
41
|
-
decoder.extract_atom_stream(io, max_read_offset)
|
42
|
-
end
|
43
|
-
|
44
|
-
ftyp_atom = decoder.find_first_atom_by_path(atom_tree, 'ftyp')
|
45
|
-
file_type = ftyp_atom&.field_value(:major_brand)
|
46
|
-
format = format_from_moov_type(file_type)
|
47
|
-
|
48
|
-
# Try to find the width and height in the tkhd
|
49
|
-
width, height = parse_dimensions(decoder, atom_tree)
|
50
|
-
|
51
|
-
# Try to find the "topmost" duration (respecting edits)
|
52
|
-
if mvhd = decoder.find_first_atom_by_path(atom_tree, 'moov', 'mvhd')
|
53
|
-
timescale = mvhd.field_value(:tscale)
|
54
|
-
duration = mvhd.field_value(:duration)
|
55
|
-
media_duration_s = duration / timescale.to_f
|
56
|
-
end
|
57
|
-
|
58
|
-
# M4A only contains audio, while MP4 and friends can contain video.
|
59
|
-
if format == :m4a
|
60
|
-
FormatParser::Audio.new(
|
61
|
-
format: format,
|
62
|
-
media_duration_seconds: media_duration_s,
|
63
|
-
content_type: MP4_AU_MIME_TYPE,
|
64
|
-
intrinsics: atom_tree,
|
65
|
-
)
|
66
|
-
else
|
67
|
-
FormatParser::Video.new(
|
68
|
-
format: format,
|
69
|
-
width_px: width,
|
70
|
-
height_px: height,
|
71
|
-
frame_rate: parse_time_to_sample_atom(decoder, atom_tree)&.truncate(2),
|
72
|
-
media_duration_seconds: media_duration_s,
|
73
|
-
content_type: MP4_MIXED_MIME_TYPE,
|
74
|
-
codecs: parse_sample_description_atom(decoder, atom_tree),
|
75
|
-
intrinsics: atom_tree
|
76
|
-
)
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
private
|
81
|
-
|
82
|
-
def format_from_moov_type(file_type)
|
83
|
-
FTYP_MAP.fetch(file_type.downcase, :mov)
|
84
|
-
end
|
85
|
-
|
86
|
-
# The dimensions are located in tkhd atom, but in some files it is necessary
|
87
|
-
# to get it below the video track, because it can have other tracks such as
|
88
|
-
# audio which does not have the dimensions.
|
89
|
-
# More details in https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-DontLinkElementID_147
|
90
|
-
#
|
91
|
-
# Returns [width, height] if the dimension is found
|
92
|
-
# Returns [nil, nil] if the dimension is not found
|
93
|
-
def parse_dimensions(decoder, atom_tree)
|
94
|
-
video_trak_atom = decoder.find_video_trak_atom(atom_tree)
|
95
|
-
|
96
|
-
tkhd = if video_trak_atom
|
97
|
-
decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'tkhd')
|
98
|
-
else
|
99
|
-
decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
|
100
|
-
end
|
101
|
-
|
102
|
-
if tkhd
|
103
|
-
[tkhd.field_value(:track_width).first, tkhd.field_value(:track_height).first]
|
104
|
-
else
|
105
|
-
[nil, nil]
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
# An MPEG4/MOV/M4A will start with the "ftyp" atom. The atom must have a length
|
110
|
-
# of at least 16 (to accomodate the atom size and the atom type itself) plus the major brand
|
111
|
-
# and minor version fields. If we cannot find it we can be certain this is not our file.
|
112
|
-
def matches_moov_definition?(io)
|
113
|
-
maybe_atom_size, maybe_ftyp_atom_signature, maybe_major_brand = safe_read(io, 12).unpack('N1a4a4')
|
114
|
-
minimum_ftyp_atom_size = 4 + 4 + 4 + 4
|
115
|
-
maybe_atom_size >= minimum_ftyp_atom_size && maybe_ftyp_atom_signature == 'ftyp' && maybe_major_brand != 'crx '
|
116
|
-
end
|
117
|
-
|
118
|
-
# Sample information is found in the 'time-to-sample' stts atom.
|
119
|
-
# The media atom mdhd is needed too in order to get the movie timescale
|
120
|
-
def parse_time_to_sample_atom(decoder, atom_tree)
|
121
|
-
video_trak_atom = decoder.find_video_trak_atom(atom_tree)
|
122
|
-
|
123
|
-
stts = if video_trak_atom
|
124
|
-
decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'mdia', 'minf', 'stbl', 'stts')
|
125
|
-
else
|
126
|
-
decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'mdia', 'minf', 'stbl', 'stts')
|
127
|
-
end
|
128
|
-
|
129
|
-
mdhd = if video_trak_atom
|
130
|
-
decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'mdia', 'mdhd')
|
131
|
-
else
|
132
|
-
decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'mdia', 'mdhd')
|
133
|
-
end
|
134
|
-
|
135
|
-
if stts && mdhd
|
136
|
-
timescale = mdhd.atom_fields[:tscale]
|
137
|
-
sample_duration = stts.field_value(:entries).dig(0, :sample_duration)
|
138
|
-
if timescale.nil? || timescale == 0 || sample_duration.nil? || sample_duration == 0
|
139
|
-
nil
|
140
|
-
else
|
141
|
-
timescale.to_f / sample_duration
|
142
|
-
end
|
143
|
-
else
|
144
|
-
nil
|
145
|
-
end
|
146
|
-
end
|
147
|
-
|
148
|
-
def parse_sample_description_atom(decoder, atom_tree)
|
149
|
-
video_trak_atom = decoder.find_video_trak_atom(atom_tree)
|
150
|
-
|
151
|
-
stsd = if video_trak_atom
|
152
|
-
decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'mdia', 'minf', 'stbl', 'stsd')
|
153
|
-
else
|
154
|
-
decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'mdia', 'minf', 'stbl', 'stsd')
|
155
|
-
end
|
156
|
-
|
157
|
-
if stsd
|
158
|
-
stsd.field_value(:codecs)
|
159
|
-
else
|
160
|
-
nil
|
161
|
-
end
|
162
|
-
end
|
163
|
-
|
164
|
-
FormatParser.register_parser new, natures: [:audio, :video], formats: FTYP_MAP.values, priority: 3
|
165
|
-
end
|
@@ -1,144 +0,0 @@
|
|
1
|
-
|
2
|
-
require 'spec_helper'
|
3
|
-
|
4
|
-
describe FormatParser::MOOVParser do
|
5
|
-
def deep_print_atoms(atoms, output, swimlanes = [])
|
6
|
-
return unless atoms
|
7
|
-
|
8
|
-
mid = '├'
|
9
|
-
last = '└'
|
10
|
-
horz = '─'
|
11
|
-
vert = '│'
|
12
|
-
cdn = '┬'
|
13
|
-
n_atoms = atoms.length
|
14
|
-
|
15
|
-
atoms.each_with_index do |atom, i|
|
16
|
-
is_last_child = i == (n_atoms - 1)
|
17
|
-
has_children = atom.children && atom.children.any?
|
18
|
-
connector = is_last_child ? last : mid
|
19
|
-
connector_down = has_children ? cdn : horz
|
20
|
-
connector_left = is_last_child ? ' ' : vert
|
21
|
-
|
22
|
-
output << swimlanes.join << connector << connector_down << horz << atom.to_s << "\n"
|
23
|
-
if af = atom.atom_fields
|
24
|
-
af.each do |(field, value)|
|
25
|
-
output << swimlanes.join << connector_left << (' %s: %s' % [field, value.inspect]) << "\n"
|
26
|
-
end
|
27
|
-
end
|
28
|
-
deep_print_atoms(atom.children, output, swimlanes + [connector_left])
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
Dir.glob(fixtures_dir + '/MOOV/**/*.m4a').sort.each do |m4a_path|
|
33
|
-
it "is able to parse #{File.basename(m4a_path)}" do
|
34
|
-
result = subject.call(File.open(m4a_path, 'rb'))
|
35
|
-
|
36
|
-
expect(result).not_to be_nil
|
37
|
-
expect(result.nature).to eq(:audio)
|
38
|
-
expect(result.media_duration_seconds).to be_kind_of(Float)
|
39
|
-
expect(result.media_duration_seconds).to be > 0
|
40
|
-
expect(result.content_type).to be_kind_of(String)
|
41
|
-
expect(result.intrinsics).not_to be_nil
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
Dir.glob(fixtures_dir + '/MOOV/**/*.mov').sort.each do |mov_path|
|
46
|
-
it "is able to parse #{File.basename(mov_path)}" do
|
47
|
-
result = subject.call(File.open(mov_path, 'rb'))
|
48
|
-
|
49
|
-
expect(result).not_to be_nil
|
50
|
-
expect(result.nature).to eq(:video)
|
51
|
-
expect(result.width_px).to be > 0
|
52
|
-
expect(result.height_px).to be > 0
|
53
|
-
expect(result.media_duration_seconds).to be_kind_of(Float)
|
54
|
-
expect(result.media_duration_seconds).to be > 0
|
55
|
-
expect(result.content_type).to eq('video/mp4')
|
56
|
-
|
57
|
-
expect(result.intrinsics).not_to be_nil
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
Dir.glob(fixtures_dir + '/MOOV/**/*.mp4').sort.each do |mp4_path|
|
62
|
-
it "is able to parse #{File.basename(mp4_path)}" do
|
63
|
-
result = subject.call(File.open(mp4_path, 'rb'))
|
64
|
-
|
65
|
-
expect(result).not_to be_nil
|
66
|
-
expect(result.nature).to eq(:video)
|
67
|
-
expect(result.width_px).to be > 0
|
68
|
-
expect(result.height_px).to be > 0
|
69
|
-
expect(result.media_duration_seconds).to be_kind_of(Float)
|
70
|
-
expect(result.media_duration_seconds).to be > 0
|
71
|
-
expect(result.content_type).to eq('video/mp4')
|
72
|
-
|
73
|
-
expect(result.intrinsics).not_to be_nil
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
it 'parses an M4A file and provides the necessary metadata' do
|
78
|
-
m4a_path = fixtures_dir + '/MOOV/M4A/fixture.m4a'
|
79
|
-
|
80
|
-
result = subject.call(File.open(m4a_path, 'rb'))
|
81
|
-
expect(result).not_to be_nil
|
82
|
-
expect(result.nature).to eq(:audio)
|
83
|
-
expect(result.format).to eq(:m4a)
|
84
|
-
expect(result.content_type).to eq('audio/mp4')
|
85
|
-
end
|
86
|
-
|
87
|
-
it 'parses a MOV file and provides the necessary metadata' do
|
88
|
-
mov_path = fixtures_dir + '/MOOV/MOV/Test_Circular_ProRes422.mov'
|
89
|
-
|
90
|
-
result = subject.call(File.open(mov_path, 'rb'))
|
91
|
-
|
92
|
-
expect(result).not_to be_nil
|
93
|
-
expect(result.nature).to eq(:video)
|
94
|
-
expect(result.format).to eq(:mov)
|
95
|
-
expect(result.width_px).to eq(1920)
|
96
|
-
expect(result.height_px).to eq(1080)
|
97
|
-
expect(result.codecs).to eq(['apcn'])
|
98
|
-
end
|
99
|
-
|
100
|
-
it 'parses an MP4 video file and provides the necessary metadata' do
|
101
|
-
mov_path = fixtures_dir + '/MOOV/MP4/bmff.mp4'
|
102
|
-
|
103
|
-
result = subject.call(File.open(mov_path, 'rb'))
|
104
|
-
|
105
|
-
expect(result).not_to be_nil
|
106
|
-
expect(result.nature).to eq(:video)
|
107
|
-
expect(result.format).to eq(:mov)
|
108
|
-
expect(result.width_px).to eq(160)
|
109
|
-
expect(result.height_px).to eq(90)
|
110
|
-
expect(result.frame_rate).to eq(14.98)
|
111
|
-
expect(result.codecs).to eq(['avc1'])
|
112
|
-
end
|
113
|
-
|
114
|
-
it 'provides filename hints' do
|
115
|
-
expect(subject).to be_likely_match('file.m4v')
|
116
|
-
end
|
117
|
-
|
118
|
-
it 'reads correctly the video dimensions' do
|
119
|
-
mov_path = fixtures_dir + '/MOOV/MOV/Test_Dimensions.mov'
|
120
|
-
|
121
|
-
result = subject.call(File.open(mov_path, 'rb'))
|
122
|
-
|
123
|
-
expect(result).not_to be_nil
|
124
|
-
expect(result.nature).to eq(:video)
|
125
|
-
expect(result.format).to eq(:mov)
|
126
|
-
expect(result.width_px).to eq(640)
|
127
|
-
expect(result.height_px).to eq(360)
|
128
|
-
expect(result.frame_rate).to eq(30)
|
129
|
-
end
|
130
|
-
|
131
|
-
it 'does not raise error when a meta atom has size 0' do
|
132
|
-
mov_path = fixtures_dir + '/MOOV/MOV/Test_Meta_Atom_With_Size_Zero.mov'
|
133
|
-
|
134
|
-
result = subject.call(File.open(mov_path, 'rb'))
|
135
|
-
expect(result).not_to be_nil
|
136
|
-
expect(result.format).to eq(:mov)
|
137
|
-
end
|
138
|
-
|
139
|
-
it 'does not parse CR3 files' do
|
140
|
-
cr3_path = fixtures_dir + '/CR3/Canon EOS R10 (RAW).CR3'
|
141
|
-
result = subject.call(File.open(cr3_path, 'rb'))
|
142
|
-
expect(result).to be_nil
|
143
|
-
end
|
144
|
-
end
|