format_parser 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -2
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/moov_parser.rb +80 -0
- data/lib/parsers/moov_parser/decoder.rb +285 -0
- data/lib/parsers/mp3_parser/id3_v2.rb +4 -6
- data/spec/care_spec.rb +3 -0
- data/spec/parsers/moov_parser_spec.rb +72 -0
- data/spec/parsers/mp3_parser_spec.rb +16 -0
- metadata +6 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 33fd6ee96653abe4059457cf1ba86113fc2c2a88
|
|
4
|
+
data.tar.gz: 7f042ecb080e9d04a78d7d22102dc3905f40b0d3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9326f30de6b5344b9a17bd0832381ef5775094f1578173d5f5735b21aaff9f7358e7397ccf83b0c67ab7f4f71a7093ecf98d32007200f690026f953562211180
|
|
7
|
+
data.tar.gz: 393643805c0a9d995a07d8f7fdbd7afae28b2d617ccd0f05d456799b547a0f76fc8e160ec8e8127f20f39cd8b45ca4cd25dd516d64c8e3852a009f4e7c4ccda6
|
data/README.md
CHANGED
|
@@ -9,7 +9,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
|
9
9
|
|
|
10
10
|
## Currently supported filetypes:
|
|
11
11
|
|
|
12
|
-
`TIFF, PSD, PNG, MP3, JPEG, GIF, DPX, AIFF, WAV, FDX`
|
|
12
|
+
`TIFF, PSD, PNG, MP3, JPEG, GIF, DPX, AIFF, WAV, FDX, MOV, MP4`
|
|
13
13
|
|
|
14
14
|
...with more on the way!
|
|
15
15
|
|
|
@@ -66,5 +66,12 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
|
66
66
|
- c_39064__alienbomb__atmo-truck.wav is from [freesound](https://freesound.org/people/alienbomb/sounds/39064/) and is CC0 licensed
|
|
67
67
|
- c_M1F1-Alaw-AFsp.wav and d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)
|
|
68
68
|
|
|
69
|
-
|
|
69
|
+
### MP3
|
|
70
|
+
- Cassy.mp3 has been produced by WeTransfer and may be used with the library for the purposes of testing
|
|
71
|
+
|
|
72
|
+
### FDX
|
|
70
73
|
- fixture.fdx was created by one of the project maintainers and is MIT licensed
|
|
74
|
+
|
|
75
|
+
### MOOV
|
|
76
|
+
- bmff.mp4 is borrowed from the [bmff](https://github.com/zuku/bmff) project
|
|
77
|
+
- Test_Circular MOV files were created by one of the project maintainers and are MIT licensed
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
class FormatParser::MOOVParser
|
|
2
|
+
include FormatParser::IOUtils
|
|
3
|
+
|
|
4
|
+
require_relative 'moov_parser/decoder'
|
|
5
|
+
|
|
6
|
+
# Maps values of the "ftyp" atom to something
|
|
7
|
+
# we can reasonably call "file type" (something
|
|
8
|
+
# usable as a filename extension)
|
|
9
|
+
FTYP_MAP = {
|
|
10
|
+
"qt " => :mov,
|
|
11
|
+
"mp4 " => :mp4,
|
|
12
|
+
"m4a " => :m4a,
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
# It is currently not documented and not particularly well-tested,
|
|
16
|
+
# so not considered a public API for now
|
|
17
|
+
private_constant :Decoder
|
|
18
|
+
|
|
19
|
+
def information_from_io(io)
|
|
20
|
+
return nil unless matches_moov_definition?(io)
|
|
21
|
+
|
|
22
|
+
# Now we know we are in a MOOV, so go back and parse out the atom structure.
|
|
23
|
+
# Parsing out the atoms does not read their contents - at least it doesn't
|
|
24
|
+
# for the atoms we consider opaque (one of which is the "mdat" atom which
|
|
25
|
+
# will be the prevalent part of the file body). We do not parse these huge
|
|
26
|
+
# atoms - we skip over them and note where they are located.
|
|
27
|
+
io.seek(0)
|
|
28
|
+
|
|
29
|
+
# We have to tell the parser how far we are willing to go within the stream.
|
|
30
|
+
# Knowing that we will bail out early anyway we will permit a large read. The
|
|
31
|
+
# branch parse calls will know the maximum size to read from the parent atom
|
|
32
|
+
# size that gets parsed just before.
|
|
33
|
+
max_read_offset = 0xFFFFFFFF
|
|
34
|
+
decoder = Decoder.new
|
|
35
|
+
atom_tree = decoder.extract_atom_stream(io, max_read_offset)
|
|
36
|
+
|
|
37
|
+
ftyp_atom = decoder.find_first_atom_by_path(atom_tree, 'ftyp')
|
|
38
|
+
file_type = ftyp_atom.field_value(:major_brand)
|
|
39
|
+
|
|
40
|
+
width, height = nil, nil
|
|
41
|
+
|
|
42
|
+
# Try to find the width and height in the tkhd
|
|
43
|
+
if tkhd = decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
|
|
44
|
+
width = tkhd.field_value(:track_width).first
|
|
45
|
+
height = tkhd.field_value(:track_height).first
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Try to find the "topmost" duration (respecting edits)
|
|
49
|
+
if mdhd = decoder.find_first_atom_by_path(atom_tree, 'moov', 'mvhd')
|
|
50
|
+
timescale, duration = mdhd.field_value(:tscale), mdhd.field_value(:duration)
|
|
51
|
+
media_duration_s = duration / timescale.to_f
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
FormatParser::FileInformation.new(
|
|
55
|
+
file_nature: :video,
|
|
56
|
+
file_type: file_type_from_moov_type(file_type),
|
|
57
|
+
width_px: width,
|
|
58
|
+
height_px: height,
|
|
59
|
+
media_duration_seconds: media_duration_s,
|
|
60
|
+
intrinsics: atom_tree,
|
|
61
|
+
)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
def file_type_from_moov_type(file_type)
|
|
67
|
+
FTYP_MAP.fetch(file_type, :mov)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# An MPEG4/MOV/M4A will start with the "ftyp" atom. The atom must have a length
|
|
71
|
+
# of at least 8 (to accomodate the atom size and the atom type itself) plus the major
|
|
72
|
+
# and minor version fields. If we cannot find it we can be certain this is not our file.
|
|
73
|
+
def matches_moov_definition?(io)
|
|
74
|
+
maybe_atom_size, maybe_ftyp_atom_signature = safe_read(io, 8).unpack('N1a4')
|
|
75
|
+
minimum_ftyp_atom_size = 4 + 4 + 4 + 4
|
|
76
|
+
maybe_atom_size >= minimum_ftyp_atom_size && maybe_ftyp_atom_signature == 'ftyp'
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
FormatParser.register_parser_constructor self
|
|
80
|
+
end
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
# Handles decoding of MOV/MPEG4 atoms/boxes in a stream. Will recursively
|
|
2
|
+
# read atoms and parse their data fields if applicable. Also contains
|
|
3
|
+
# a few utility functions for finding atoms in a list etc.
|
|
4
|
+
class FormatParser::MOOVParser::Decoder
|
|
5
|
+
|
|
6
|
+
class Atom < Struct.new(:at, :atom_size, :atom_type, :path, :children, :atom_fields)
|
|
7
|
+
def to_s
|
|
8
|
+
"%s (%s): %d bytes at offset %d" % [atom_type, path.join('.'), atom_size, at]
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def field_value(data_field)
|
|
12
|
+
(atom_fields || {}).fetch(data_field)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def as_json(*a)
|
|
16
|
+
members.each_with_object({}) do |member_name, o|
|
|
17
|
+
o[member_name] = public_send(member_name).as_json(*a)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Atoms (boxes) that are known to only contain children, no data fields
|
|
23
|
+
KNOWN_BRANCH_ATOM_TYPES = %w( moov mdia trak clip edts minf dinf stbl udta meta)
|
|
24
|
+
|
|
25
|
+
# Atoms (boxes) that are known to contain both leaves and data fields
|
|
26
|
+
KNOWN_BRANCH_AND_LEAF_ATOM_TYPES = %w( meta ) # the udta.meta thing used by iTunes
|
|
27
|
+
|
|
28
|
+
# Limit how many atoms we scan in sequence, to prevent derailments
|
|
29
|
+
MAX_ATOMS_AT_LEVEL = 128
|
|
30
|
+
|
|
31
|
+
# Finds the first atom in the given Array of Atom structs that
|
|
32
|
+
# matches the type, drilling down if a list of atom names is given
|
|
33
|
+
def find_first_atom_by_path(atoms, *atom_types)
|
|
34
|
+
type_to_find = atom_types.shift
|
|
35
|
+
requisite = atoms.find {|e| e.atom_type == type_to_find }
|
|
36
|
+
|
|
37
|
+
# Return if we found our match
|
|
38
|
+
return requisite if atom_types.empty?
|
|
39
|
+
|
|
40
|
+
# Return nil if we didn't find the match at this nesting level
|
|
41
|
+
return nil unless requisite
|
|
42
|
+
|
|
43
|
+
# ...otherwise drill further down
|
|
44
|
+
find_first_atom_by_path(requisite.children || [], *atom_types)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def parse_ftyp_atom(io, atom_size)
|
|
48
|
+
# Subtract 8 for the atom_size+atom_type,
|
|
49
|
+
# and 8 once more for the major_brand and minor_version. The remaining
|
|
50
|
+
# numbr of bytes is reserved for the compatible brands, 4 bytes per
|
|
51
|
+
# brand.
|
|
52
|
+
num_brands = (atom_size - 8 - 8) / 4
|
|
53
|
+
ret = {
|
|
54
|
+
major_brand: read_bytes(io, 4),
|
|
55
|
+
minor_version: read_binary_coded_decimal(io),
|
|
56
|
+
compatible_brands: (1..num_brands).map { read_bytes(io, 4) },
|
|
57
|
+
}
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def parse_tkhd_atom(io, _)
|
|
61
|
+
version = read_byte_value(io)
|
|
62
|
+
is_v1 = version == 1
|
|
63
|
+
tkhd_info_bites = [
|
|
64
|
+
:version, version,
|
|
65
|
+
:flags, read_chars(io, 3),
|
|
66
|
+
:ctime, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
|
67
|
+
:mtime, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
|
68
|
+
:trak_id, read_32bit_uint(io),
|
|
69
|
+
:reserved_1, read_chars(io, 4),
|
|
70
|
+
:duration, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
|
71
|
+
:reserved_2, read_chars(io, 8),
|
|
72
|
+
:layer, read_16bit_uint(io),
|
|
73
|
+
:alternate_group, read_16bit_uint(io),
|
|
74
|
+
:volume, read_16bit_uint(io),
|
|
75
|
+
:reserved_3, read_chars(io, 2),
|
|
76
|
+
:matrix_structure, (1..9).map { read_32bit_fixed_point(io) },
|
|
77
|
+
:track_width, read_32bit_fixed_point(io),
|
|
78
|
+
:track_height, read_32bit_fixed_point(io),
|
|
79
|
+
]
|
|
80
|
+
repack(tkhd_info_bites)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def parse_mdhd_atom(io, _)
|
|
84
|
+
version = read_byte_value(io)
|
|
85
|
+
is_v1 = version == 1
|
|
86
|
+
mdhd_info_bites = [
|
|
87
|
+
:version, version,
|
|
88
|
+
:flags, read_bytes(io, 3),
|
|
89
|
+
:ctime, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
|
90
|
+
:mtime, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
|
91
|
+
:tscale, read_32bit_uint(io),
|
|
92
|
+
:duration, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
|
93
|
+
:language, read_32bit_uint(io),
|
|
94
|
+
:quality, read_32bit_uint(io),
|
|
95
|
+
]
|
|
96
|
+
repack(mdhd_info_bites)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def parse_vmhd_atom(io, _)
|
|
100
|
+
vmhd_info_bites = [
|
|
101
|
+
:version, read_byte_value(io),
|
|
102
|
+
:flags, read_bytes(io, 3),
|
|
103
|
+
:graphics_mode, read_bytes(io, 2),
|
|
104
|
+
:opcolor_r, read_32bit_uint(io),
|
|
105
|
+
:opcolor_g, read_32bit_uint(io),
|
|
106
|
+
:opcolor_b, read_32bit_uint(io),
|
|
107
|
+
]
|
|
108
|
+
repack(vmhd_info_bites)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def parse_mvhd_atom(io, _)
|
|
112
|
+
version = read_byte_value(io)
|
|
113
|
+
is_v1 = version == 1
|
|
114
|
+
mvhd_info_bites = [
|
|
115
|
+
:version, version,
|
|
116
|
+
:flags, read_bytes(io, 3),
|
|
117
|
+
:ctime, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
|
118
|
+
:mtime, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
|
119
|
+
:tscale, read_32bit_uint(io),
|
|
120
|
+
:duration, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
|
121
|
+
:preferred_rate, read_32bit_uint(io),
|
|
122
|
+
:reserved, read_bytes(io, 10),
|
|
123
|
+
:matrix_structure, (1..9).map { read_32bit_fixed_point(io) },
|
|
124
|
+
:preview_time, read_32bit_uint(io),
|
|
125
|
+
:preview_duration, read_32bit_uint(io),
|
|
126
|
+
:poster_time, read_32bit_uint(io),
|
|
127
|
+
:selection_time, read_32bit_uint(io),
|
|
128
|
+
:selection_duration, read_32bit_uint(io),
|
|
129
|
+
:current_time, read_32bit_uint(io),
|
|
130
|
+
:next_trak_id, read_32bit_uint(io),
|
|
131
|
+
]
|
|
132
|
+
repack(mvhd_info_bites)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def parse_dref_atom(io, _)
|
|
136
|
+
dref_info_bites = [
|
|
137
|
+
:version, read_byte_value(io),
|
|
138
|
+
:flags, read_bytes(io, 3),
|
|
139
|
+
:num_entries, read_32bit_uint(io),
|
|
140
|
+
]
|
|
141
|
+
dict = repack(dref_info_bites)
|
|
142
|
+
num_entries = dict[:num_entries]
|
|
143
|
+
entries = (1..num_entries).map do
|
|
144
|
+
dref_entry_bites = [
|
|
145
|
+
:size, read_32bit_uint(io),
|
|
146
|
+
:type, read_bytes(io, 4),
|
|
147
|
+
:version, read_bytes(io, 1),
|
|
148
|
+
:flags, read_bytes(io, 3),
|
|
149
|
+
]
|
|
150
|
+
entry = repack(dref_entry_bites)
|
|
151
|
+
entry[:data] = read_bytes(io, entry[:size] - 12)
|
|
152
|
+
entry
|
|
153
|
+
end
|
|
154
|
+
dict[:entries] = entries
|
|
155
|
+
dict
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def parse_elst_atom(io, _)
|
|
159
|
+
elst_info_bites = [
|
|
160
|
+
:version, read_byte_value(io),
|
|
161
|
+
:flags, read_bytes(io, 3),
|
|
162
|
+
:num_entries, read_32bit_uint(io),
|
|
163
|
+
]
|
|
164
|
+
dict = repack(elst_info_bites)
|
|
165
|
+
is_v1 = dict[:version] == 1 # Usual is 0, version 1 has 64bit durations
|
|
166
|
+
num_entries = dict[:num_entries]
|
|
167
|
+
entries = (1..num_entries).map do
|
|
168
|
+
entry_bites = [
|
|
169
|
+
:track_duration, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
|
170
|
+
:media_time, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
|
171
|
+
:media_rate, read_32bit_uint(io),
|
|
172
|
+
]
|
|
173
|
+
repack(entry_bites)
|
|
174
|
+
end
|
|
175
|
+
dict[:entries] = entries
|
|
176
|
+
dict
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def parse_hdlr_atom(io, atom_size)
|
|
180
|
+
sub_io = StringIO.new(io.read(atom_size - 8))
|
|
181
|
+
hdlr_info_bites = [
|
|
182
|
+
:version, read_byte_value(sub_io),
|
|
183
|
+
:flags, read_bytes(sub_io, 3),
|
|
184
|
+
:component_type, read_bytes(sub_io, 4),
|
|
185
|
+
:component_subtype, read_bytes(sub_io, 4),
|
|
186
|
+
:component_manufacturer, read_bytes(sub_io, 4),
|
|
187
|
+
:component_flags, read_bytes(sub_io, 4),
|
|
188
|
+
:component_flags_mask, read_bytes(sub_io, 4),
|
|
189
|
+
:component_name, sub_io.read,
|
|
190
|
+
]
|
|
191
|
+
repack(hdlr_info_bites)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def parse_atom_fields_per_type(io, atom_size, atom_type)
|
|
195
|
+
if respond_to?("parse_#{atom_type}_atom", including_privates = true)
|
|
196
|
+
send("parse_#{atom_type}_atom", io, atom_size)
|
|
197
|
+
else
|
|
198
|
+
nil # We can't look inside this leaf atom
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Recursive descent parser - will drill down to atoms which
|
|
203
|
+
# we know are permitted to have leaf/branch atoms within itself,
|
|
204
|
+
# and will attempt to recover the data fields for leaf atoms
|
|
205
|
+
def extract_atom_stream(io, max_read, current_branch = [])
|
|
206
|
+
initial_pos = io.pos
|
|
207
|
+
atoms = []
|
|
208
|
+
MAX_ATOMS_AT_LEVEL.times do
|
|
209
|
+
atom_pos = io.pos
|
|
210
|
+
|
|
211
|
+
if atom_pos - initial_pos >= max_read
|
|
212
|
+
break
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
size_and_type = io.read(4+4)
|
|
216
|
+
if size_and_type.to_s.bytesize < 8
|
|
217
|
+
break
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
atom_size, atom_type = size_and_type.unpack('Na4')
|
|
221
|
+
|
|
222
|
+
# If atom_size is specified to be 1, it is larger than what fits into the
|
|
223
|
+
# 4 bytes and we need to read it right after the atom type
|
|
224
|
+
if atom_size == 1
|
|
225
|
+
atom_size = read_64bit_uint(io)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
children, fields = if KNOWN_BRANCH_AND_LEAF_ATOM_TYPES.include?(atom_type)
|
|
229
|
+
parse_atom_children_and_data_fields(io, atom_size, atom_type)
|
|
230
|
+
elsif KNOWN_BRANCH_ATOM_TYPES.include?(atom_type)
|
|
231
|
+
[extract_atom_stream(io, atom_size - 8, current_branch + [atom_type]), nil]
|
|
232
|
+
else # Assume leaf atom
|
|
233
|
+
[nil, parse_atom_fields_per_type(io, atom_size, atom_type)]
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
atoms << Atom.new(atom_pos, atom_size, atom_type, current_branch + [atom_type], children, fields)
|
|
237
|
+
|
|
238
|
+
io.seek(atom_pos + atom_size)
|
|
239
|
+
end
|
|
240
|
+
atoms
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def read_16bit_fixed_point(io)
|
|
244
|
+
whole, fraction = io.read(2).unpack('CC')
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def read_32bit_fixed_point(io)
|
|
248
|
+
whole, fraction = io.read(4).unpack('nn')
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def read_chars(io, n)
|
|
252
|
+
io.read(n)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def read_byte_value(io)
|
|
256
|
+
io.read(1).unpack('C').first
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def read_bytes(io, n)
|
|
260
|
+
io.read(n)
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def read_16bit_uint(io)
|
|
264
|
+
io.read(2).unpack('n').first
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def read_32bit_uint(io)
|
|
268
|
+
io.read(4).unpack('N').first
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def read_64bit_uint(io)
|
|
272
|
+
io.read(8).unpack('Q>').first
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def read_binary_coded_decimal(io)
|
|
276
|
+
bcd_string = io.read(4)
|
|
277
|
+
bcd_string.insert(0, '0') if bcd_string.length.odd?
|
|
278
|
+
[bcd_string].pack('H*').unpack('C*')
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def repack(properties_to_packspecs)
|
|
282
|
+
keys, bytes = properties_to_packspecs.partition.with_index { |_, i| i.even? }
|
|
283
|
+
Hash[keys.zip(bytes)]
|
|
284
|
+
end
|
|
285
|
+
end
|
|
@@ -2,7 +2,6 @@ module FormatParser::MP3Parser::ID3V2
|
|
|
2
2
|
def attempt_id3_v2_extraction(io)
|
|
3
3
|
io.seek(0) # Only support header ID3v2
|
|
4
4
|
header_bytes = io.read(10)
|
|
5
|
-
|
|
6
5
|
return nil unless header_bytes
|
|
7
6
|
|
|
8
7
|
header = parse_id3_v2_header(header_bytes)
|
|
@@ -50,12 +49,11 @@ module FormatParser::MP3Parser::ID3V2
|
|
|
50
49
|
end
|
|
51
50
|
|
|
52
51
|
def parse_id3_v2_frame(io)
|
|
53
|
-
id,
|
|
54
|
-
size = decode_syncsafe_int(
|
|
52
|
+
id, syncsafe_size, flags = io.read(10).unpack('a4a4a2')
|
|
53
|
+
size = decode_syncsafe_int(syncsafe_size)
|
|
55
54
|
content = io.read(size)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
end
|
|
55
|
+
# It might so happen in sutations of terrible invalidity that we end up
|
|
56
|
+
# with less data than advertised by the syncsafe size. We will just truck on.
|
|
59
57
|
{id: id, size: size, flags: flags, content: content}
|
|
60
58
|
end
|
|
61
59
|
|
data/spec/care_spec.rb
CHANGED
|
@@ -71,9 +71,12 @@ describe Care do
|
|
|
71
71
|
|
|
72
72
|
subject = Care::IOWrapper.new(io_double, cache_double)
|
|
73
73
|
|
|
74
|
+
expect(subject.pos).to eq(0)
|
|
74
75
|
subject.read(2)
|
|
75
76
|
subject.read(3)
|
|
77
|
+
expect(subject.pos).to eq(5)
|
|
76
78
|
subject.seek(11)
|
|
79
|
+
expect(subject.pos).to eq(11)
|
|
77
80
|
subject.read(5)
|
|
78
81
|
|
|
79
82
|
expect(cache_double.recorded_calls).to be_kind_of(Array)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe FormatParser::MOOVParser do
|
|
4
|
+
|
|
5
|
+
def deep_print_atoms(atoms, output, swimlanes = [])
|
|
6
|
+
return unless atoms
|
|
7
|
+
|
|
8
|
+
mid = '├'
|
|
9
|
+
last = '└'
|
|
10
|
+
horz = '─'
|
|
11
|
+
vert = '│'
|
|
12
|
+
cdn = '┬'
|
|
13
|
+
n_atoms = atoms.length
|
|
14
|
+
|
|
15
|
+
atoms.each_with_index do |atom, i|
|
|
16
|
+
is_last_child = i == (n_atoms - 1)
|
|
17
|
+
has_children = atom.children && atom.children.any?
|
|
18
|
+
connector = is_last_child ? last : mid
|
|
19
|
+
connector_down = has_children ? cdn : horz
|
|
20
|
+
connector_left = is_last_child ? ' ' : vert
|
|
21
|
+
|
|
22
|
+
output << swimlanes.join << connector << connector_down << horz << atom.to_s << "\n"
|
|
23
|
+
if af = atom.atom_fields
|
|
24
|
+
af.each do |(field, value)|
|
|
25
|
+
output << swimlanes.join << connector_left << (' %s: %s' % [field, value.inspect]) << "\n"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
deep_print_atoms(atom.children, output, swimlanes + [connector_left])
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
Dir.glob(fixtures_dir + '/MOOV/**/*.*').sort.each do |moov_path|
|
|
33
|
+
it "is able to parse #{File.basename(moov_path)}" do
|
|
34
|
+
result = subject.information_from_io(File.open(moov_path, 'rb'))
|
|
35
|
+
|
|
36
|
+
expect(result).not_to be_nil
|
|
37
|
+
expect(result.file_nature).to eq(:video)
|
|
38
|
+
expect(result.width_px).to be > 0
|
|
39
|
+
expect(result.height_px).to be > 0
|
|
40
|
+
expect(result.media_duration_seconds).to be_kind_of(Float)
|
|
41
|
+
expect(result.media_duration_seconds).to be > 0
|
|
42
|
+
|
|
43
|
+
expect(result.intrinsics).not_to be_nil
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it 'parses an M4A file and provides the necessary metadata'
|
|
48
|
+
|
|
49
|
+
it 'parses a MOV file and provides the necessary metadata' do
|
|
50
|
+
mov_path = fixtures_dir + '/MOOV/MOV/Test_Circular_ProRes422.mov'
|
|
51
|
+
|
|
52
|
+
result = subject.information_from_io(File.open(mov_path, 'rb'))
|
|
53
|
+
|
|
54
|
+
expect(result).not_to be_nil
|
|
55
|
+
expect(result.file_nature).to eq(:video)
|
|
56
|
+
expect(result.file_type).to eq(:mov)
|
|
57
|
+
expect(result.width_px).to eq(1920)
|
|
58
|
+
expect(result.height_px).to eq(1080)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it 'parses an MP4 video file and provides the necessary metadata' do
|
|
62
|
+
mov_path = fixtures_dir + '/MOOV/MP4/bmff.mp4'
|
|
63
|
+
|
|
64
|
+
result = subject.information_from_io(File.open(mov_path, 'rb'))
|
|
65
|
+
|
|
66
|
+
expect(result).not_to be_nil
|
|
67
|
+
expect(result.file_nature).to eq(:video)
|
|
68
|
+
expect(result.file_type).to eq(:mov)
|
|
69
|
+
expect(result.width_px).to eq(160)
|
|
70
|
+
expect(result.height_px).to eq(90)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -28,4 +28,20 @@ describe FormatParser::MP3Parser do
|
|
|
28
28
|
expect(parsed.intrinsics).not_to be_nil
|
|
29
29
|
expect(parsed.media_duration_seconds).to be_within(0.1).of(0.81)
|
|
30
30
|
end
|
|
31
|
+
|
|
32
|
+
it 'parses the Cassy MP3' do
|
|
33
|
+
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
|
34
|
+
parsed = subject.information_from_io(File.open(fpath, 'rb'))
|
|
35
|
+
|
|
36
|
+
expect(parsed).not_to be_nil
|
|
37
|
+
|
|
38
|
+
expect(parsed.file_nature).to eq(:audio)
|
|
39
|
+
expect(parsed.file_type).to eq(:mp3)
|
|
40
|
+
expect(parsed.num_audio_channels).to eq(2)
|
|
41
|
+
expect(parsed.audio_sample_rate_hz).to eq(44100)
|
|
42
|
+
expect(parsed.intrinsics).not_to be_nil
|
|
43
|
+
expect(parsed.media_duration_seconds).to be_within(0.1).of(1102.46)
|
|
44
|
+
|
|
45
|
+
expect(parsed.intrinsics).not_to be_nil
|
|
46
|
+
end
|
|
31
47
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: format_parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Noah Berman
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: exe
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2018-01-
|
|
12
|
+
date: 2018-01-16 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: ks
|
|
@@ -156,6 +156,8 @@ files:
|
|
|
156
156
|
- lib/parsers/fdx_parser.rb
|
|
157
157
|
- lib/parsers/gif_parser.rb
|
|
158
158
|
- lib/parsers/jpeg_parser.rb
|
|
159
|
+
- lib/parsers/moov_parser.rb
|
|
160
|
+
- lib/parsers/moov_parser/decoder.rb
|
|
159
161
|
- lib/parsers/mp3_parser.rb
|
|
160
162
|
- lib/parsers/mp3_parser/id3_v1.rb
|
|
161
163
|
- lib/parsers/mp3_parser/id3_v2.rb
|
|
@@ -175,6 +177,7 @@ files:
|
|
|
175
177
|
- spec/parsers/fdx_parser_spec.rb
|
|
176
178
|
- spec/parsers/gif_parser_spec.rb
|
|
177
179
|
- spec/parsers/jpeg_parser_spec.rb
|
|
180
|
+
- spec/parsers/moov_parser_spec.rb
|
|
178
181
|
- spec/parsers/mp3_parser_spec.rb
|
|
179
182
|
- spec/parsers/png_parser_spec.rb
|
|
180
183
|
- spec/parsers/psd_parser_spec.rb
|
|
@@ -205,7 +208,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
205
208
|
version: '0'
|
|
206
209
|
requirements: []
|
|
207
210
|
rubyforge_project:
|
|
208
|
-
rubygems_version: 2.5.
|
|
211
|
+
rubygems_version: 2.5.2
|
|
209
212
|
signing_key:
|
|
210
213
|
specification_version: 4
|
|
211
214
|
summary: A library for efficient parsing of file metadata
|