format_parser 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -0
- data/format_parser.gemspec +1 -0
- data/lib/care.rb +9 -2
- data/lib/file_information.rb +4 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/mp3_parser.rb +246 -0
- data/lib/parsers/mp3_parser/id3_v1.rb +54 -0
- data/lib/parsers/mp3_parser/id3_v2.rb +86 -0
- data/lib/read_limiter.rb +5 -0
- data/spec/care_spec.rb +8 -1
- data/spec/parsers/mp3_parser_spec.rb +31 -0
- metadata +20 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b0ef6923a01b8fbe52f4491979a3be1224ea7018
|
4
|
+
data.tar.gz: a19859f36a81154f73d1071857834cd0e7e1ca74
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca6bd5d8324a4dcb41d6f3137a1ab621a016acf8e9fafae5983bb8c325f883ecc72b4046d774e423e5e4ca7f35d048dae20a1e6ca2287d842c111c2714e2d606
|
7
|
+
data.tar.gz: 99d6517341e8b48635c8540e7f92cb48e772ed02047eb6cd56412b21de2411c06d64d43878ffa1a3ce43bf8e82b7e886eef4f00b221bfdfe42bafc19435f3f35
|
data/README.md
CHANGED
@@ -7,6 +7,12 @@ minimum amount of data possible.
|
|
7
7
|
`format_parser` is inspired by [imagesize,](https://rubygems.org/gem/imagesize) [fastimage](https://github.com/sdsykes/fastimage)
|
8
8
|
and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them where appropriate.
|
9
9
|
|
10
|
+
## Currently supported filetypes:
|
11
|
+
|
12
|
+
`TIFF, PSD, PNG, MP3, JPEG, GIF, DPX, AIFF`
|
13
|
+
|
14
|
+
...with more on the way!
|
15
|
+
|
10
16
|
## Basic usage
|
11
17
|
|
12
18
|
Pass an IO object that responds to `read` and `seek` to `FormatParser`.
|
data/format_parser.gemspec
CHANGED
@@ -32,6 +32,7 @@ Gem::Specification.new do |spec|
|
|
32
32
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
33
33
|
spec.require_paths = ["lib"]
|
34
34
|
|
35
|
+
spec.add_dependency 'ks', '~> 0.0.1'
|
35
36
|
spec.add_dependency 'exifr', '~> 1.0'
|
36
37
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
38
|
|
data/lib/care.rb
CHANGED
@@ -39,6 +39,10 @@ class Care
|
|
39
39
|
clear
|
40
40
|
@io.close if @io.respond_to?(:close)
|
41
41
|
end
|
42
|
+
|
43
|
+
def size
|
44
|
+
@io.size
|
45
|
+
end
|
42
46
|
end
|
43
47
|
|
44
48
|
# Stores cached pages of data from the given IO as strings.
|
@@ -58,7 +62,10 @@ class Care
|
|
58
62
|
# or fetch pages where necessary
|
59
63
|
def byteslice(io, at, n_bytes)
|
60
64
|
if n_bytes < 1
|
61
|
-
raise ArgumentError, "The number of bytes to fetch must be a positive Integer"
|
65
|
+
raise ArgumentError, "The number of bytes to fetch must be a positive Integer"
|
66
|
+
end
|
67
|
+
if at < 0
|
68
|
+
raise ArgumentError, "Negative offsets are not supported (got #{at})"
|
62
69
|
end
|
63
70
|
|
64
71
|
first_page = at / @page_size
|
@@ -124,7 +131,7 @@ class Care
|
|
124
131
|
# to read following this one, so we can also optimize
|
125
132
|
@lowest_known_empty_page = page_i + 1
|
126
133
|
end
|
127
|
-
|
134
|
+
|
128
135
|
read_result
|
129
136
|
end
|
130
137
|
end
|
data/lib/file_information.rb
CHANGED
@@ -58,6 +58,10 @@ module FormatParser
|
|
58
58
|
# as an Integer
|
59
59
|
attr_accessor :media_duration_frames
|
60
60
|
|
61
|
+
# If a parser wants to provide any extra information to the caller
|
62
|
+
# it can be placed here
|
63
|
+
attr_accessor :intrinsics
|
64
|
+
|
61
65
|
# Only permits assignments via defined accessors
|
62
66
|
def initialize(**attributes)
|
63
67
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
@@ -0,0 +1,246 @@
|
|
1
|
+
require 'ks'
|
2
|
+
|
3
|
+
class FormatParser::MP3Parser
|
4
|
+
require_relative 'mp3_parser/id3_v1'
|
5
|
+
require_relative 'mp3_parser/id3_v2'
|
6
|
+
|
7
|
+
class MPEGFrame < Ks.strict(:offset_in_file, :mpeg_id, :channels, :sample_rate, :frame_length, :frame_bitrate)
|
8
|
+
end
|
9
|
+
|
10
|
+
class VBRHeader < Ks.strict(:frames, :byte_count, :toc_entries, :vbr_scale)
|
11
|
+
end
|
12
|
+
|
13
|
+
class MP3Info < Ks.strict(:duration_seconds, :num_channels, :sampling_rate)
|
14
|
+
end
|
15
|
+
|
16
|
+
class InvalidDeepFetch < KeyError
|
17
|
+
end
|
18
|
+
|
19
|
+
# We limit the number of MPEG frames we scan
|
20
|
+
# to obtain our duration estimation
|
21
|
+
MAX_FRAMES_TO_SCAN = 128
|
22
|
+
|
23
|
+
# Default frame size for mp3
|
24
|
+
SAMPLES_PER_FRAME = 1152
|
25
|
+
|
26
|
+
def information_from_io(io)
|
27
|
+
# Read the last 128 bytes which might contain ID3v1
|
28
|
+
id3_v1 = ID3V1.attempt_id3_v1_extraction(io)
|
29
|
+
# Read the header bytes that might contain ID3v1
|
30
|
+
id3_v2 = ID3V2.attempt_id3_v2_extraction(io)
|
31
|
+
|
32
|
+
# Compute how many bytes are occupied by the actual MPEG frames
|
33
|
+
ignore_bytes_at_tail = id3_v1 ? 128 : 0
|
34
|
+
ignore_bytes_at_head = id3_v2 ? io.pos : 0
|
35
|
+
bytes_used_by_frames = io.size - ignore_bytes_at_tail - ignore_bytes_at_tail
|
36
|
+
|
37
|
+
io.seek(ignore_bytes_at_head)
|
38
|
+
|
39
|
+
maybe_xing_header, initial_frames = parse_mpeg_frames(io)
|
40
|
+
|
41
|
+
return nil if initial_frames.empty?
|
42
|
+
|
43
|
+
first_frame = initial_frames.first
|
44
|
+
|
45
|
+
file_info = FormatParser::FileInformation.new(
|
46
|
+
file_nature: :audio,
|
47
|
+
file_type: :mp3,
|
48
|
+
num_audio_channels: first_frame.channels,
|
49
|
+
audio_sample_rate_hz: first_frame.sample_rate,
|
50
|
+
# media_duration_frames is omitted because the frames
|
51
|
+
# in MPEG are not the same thing as in a movie file - they
|
52
|
+
# do not tell anything of substance
|
53
|
+
intrinsics: {
|
54
|
+
id3_v1: id3_v1 ? id3_v1.to_h : nil,
|
55
|
+
id3_v2: id3_v2 ? id3_v2.map(&:to_h) : nil,
|
56
|
+
xing_header: maybe_xing_header.to_h,
|
57
|
+
initial_frames: initial_frames.map(&:to_h)
|
58
|
+
}
|
59
|
+
)
|
60
|
+
|
61
|
+
if maybe_xing_header
|
62
|
+
duration = maybe_xing_header.frames * SAMPLES_PER_FRAME / first_frame.sample_rate.to_f
|
63
|
+
bit_rate = maybe_xing_header.byte_count * 8 / duration / 1000
|
64
|
+
file_info.media_duration_seconds = duration
|
65
|
+
return file_info
|
66
|
+
end
|
67
|
+
|
68
|
+
# Estimate duration using the frames we did parse - to have an exact one
|
69
|
+
# we would need to have all the frames and thus read most of the file
|
70
|
+
avg_bitrate = float_average_over(initial_frames, :frame_bitrate)
|
71
|
+
avg_frame_size = float_average_over(initial_frames, :frame_length)
|
72
|
+
avg_sample_rate = float_average_over(initial_frames, :sample_rate)
|
73
|
+
|
74
|
+
est_frame_count = bytes_used_by_frames / avg_frame_size
|
75
|
+
est_samples = est_frame_count * SAMPLES_PER_FRAME
|
76
|
+
est_duration_seconds = est_samples / avg_sample_rate
|
77
|
+
|
78
|
+
file_info.media_duration_seconds = est_duration_seconds
|
79
|
+
return file_info
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
# The implementation of the MPEG frames parsing is mostly based on tinytag,
|
85
|
+
# a sweet little Python library for parsing audio metadata - do check it out
|
86
|
+
# if you have a minute. https://pypi.python.org/pypi/tinytag
|
87
|
+
def parse_mpeg_frames(io)
|
88
|
+
mpeg_frames = []
|
89
|
+
|
90
|
+
MAX_FRAMES_TO_SCAN.times do |frame_i|
|
91
|
+
# Read through until we can latch onto the 11 sync bits. Read in 4-byte
|
92
|
+
# increments to save on read() calls
|
93
|
+
data = io.read(4)
|
94
|
+
|
95
|
+
# If we are at EOF - stop iterating
|
96
|
+
break unless data && data.bytesize == 4
|
97
|
+
|
98
|
+
# Look for the sync pattern. It can be either the last byte being 0xFF,
|
99
|
+
# or any of the 2 bytes in sequence being 0xFF and > 0xF0.
|
100
|
+
four_bytes = data.unpack('C4')
|
101
|
+
seek_jmp = sync_bytes_offset_in_4_byte_seq(four_bytes)
|
102
|
+
if seek_jmp > 0
|
103
|
+
io.seek(io.pos + seek_jmp)
|
104
|
+
next
|
105
|
+
end
|
106
|
+
|
107
|
+
# Once we are past that stage we have latched onto a sync frame header
|
108
|
+
sync, conf, bitrate_freq, rest = four_bytes
|
109
|
+
frame_detail = parse_mpeg_frame_header(io.pos - 4, sync, conf, bitrate_freq, rest)
|
110
|
+
mpeg_frames << frame_detail
|
111
|
+
|
112
|
+
# There might be a xing header in the first frame that contains
|
113
|
+
# all the info we need, otherwise parse multiple frames to find the
|
114
|
+
# accurate average bitrate
|
115
|
+
if frame_i == 0
|
116
|
+
frame_data_str = io.read(frame_detail.frame_length)
|
117
|
+
io.seek(io.pos - frame_detail.frame_length)
|
118
|
+
xing_header = attempt_xing_header(frame_data_str)
|
119
|
+
if xing_header_usable_for_duration?(xing_header)
|
120
|
+
return [xing_header, mpeg_frames]
|
121
|
+
end
|
122
|
+
end
|
123
|
+
if frame_detail.frame_length > 1 # jump over current frame body
|
124
|
+
io.seek(io.pos + frame_detail.frame_length - 4)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
[nil, mpeg_frames]
|
128
|
+
rescue InvalidDeepFetch # A frame was invalid - bail out since it's unlikely we can recover
|
129
|
+
[nil, mpeg_frames]
|
130
|
+
end
|
131
|
+
|
132
|
+
def parse_mpeg_frame_header(offset_in_file, sync, conf, bitrate_freq, rest)
|
133
|
+
# see this page for the magic values used in mp3:
|
134
|
+
# http:/www.mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm
|
135
|
+
samplerates = [
|
136
|
+
[11025, 12000, 8000], # MPEG 2.5
|
137
|
+
[], # reserved
|
138
|
+
[22050, 24000, 16000], # MPEG 2
|
139
|
+
[44100, 48000, 32000], # MPEG 1
|
140
|
+
]
|
141
|
+
v1l1 = [0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0]
|
142
|
+
v1l2 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0]
|
143
|
+
v1l3 = [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0]
|
144
|
+
v2l1 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0]
|
145
|
+
v2l2 = [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0]
|
146
|
+
v2l3 = v2l2
|
147
|
+
bitrate_by_version_by_layer = [
|
148
|
+
[nil, v2l3, v2l2, v2l1], # MPEG Version 2.5 # note that the layers go
|
149
|
+
nil, # reserved # from 3 to 1 by design.
|
150
|
+
[nil, v2l3, v2l2, v2l1], # MPEG Version 2 # the first layer id is
|
151
|
+
[nil, v1l3, v1l2, v1l1], # MPEG Version 1 # reserved
|
152
|
+
]
|
153
|
+
samples_per_frame = 1152 # the default frame size for mp3
|
154
|
+
channels_per_channel_mode = [
|
155
|
+
2, # 00 Stereo
|
156
|
+
2, # 01 Joint stereo (Stereo)
|
157
|
+
2, # 10 Dual channel (2 mono channels)
|
158
|
+
1, # 11 Single channel (Mono)
|
159
|
+
]
|
160
|
+
|
161
|
+
br_id = (bitrate_freq >> 4) & 0x0F # biterate id
|
162
|
+
sr_id = (bitrate_freq >> 2) & 0x03 # sample rate id
|
163
|
+
padding = bitrate_freq & 0x02 > 0 ? 1 : 0
|
164
|
+
mpeg_id = (conf >> 3) & 0x03
|
165
|
+
layer_id = (conf >> 1) & 0x03
|
166
|
+
channel_mode = (rest >> 6) & 0x03
|
167
|
+
channels = channels_per_channel_mode.fetch(channel_mode)
|
168
|
+
sample_rate = deep_fetch(samplerates, mpeg_id, sr_id)
|
169
|
+
frame_bitrate = deep_fetch(bitrate_by_version_by_layer, mpeg_id, layer_id, br_id)
|
170
|
+
frame_length = (144000 * frame_bitrate) / sample_rate + padding
|
171
|
+
MPEGFrame.new(
|
172
|
+
offset_in_file: offset_in_file,
|
173
|
+
mpeg_id: mpeg_id,
|
174
|
+
channels: channels,
|
175
|
+
sample_rate: sample_rate,
|
176
|
+
frame_length: frame_length,
|
177
|
+
frame_bitrate: frame_bitrate,
|
178
|
+
)
|
179
|
+
end
|
180
|
+
|
181
|
+
# Scan 4 byte values, and check whether there is
|
182
|
+
# a pattern of the 11 set bits anywhere within it
|
183
|
+
# or whether there is the 0xFF byte at the end
|
184
|
+
def sync_bytes_offset_in_4_byte_seq(four_bytes)
|
185
|
+
four_bytes[0...3].each_with_index do |byte, i|
|
186
|
+
next_byte = four_bytes[i+1]
|
187
|
+
if byte == 0xFF && next_byte > 0xE0
|
188
|
+
return i
|
189
|
+
end
|
190
|
+
end
|
191
|
+
four_bytes[-1] == 0xFF ? 3 : 4
|
192
|
+
end
|
193
|
+
|
194
|
+
def attempt_xing_header(frame_body)
|
195
|
+
unless xing_offset = frame_body.index("Xing")
|
196
|
+
return nil # No Xing in this frame
|
197
|
+
end
|
198
|
+
|
199
|
+
io = StringIO.new(frame_body)
|
200
|
+
io.seek(xing_offset + 4) # Include the length of "Xing" itself
|
201
|
+
|
202
|
+
# https://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#XINGHeader
|
203
|
+
header_flags, _ = io.read(4).unpack('s>s>')
|
204
|
+
frames = byte_count = toc = vbr_scale = nil
|
205
|
+
|
206
|
+
if header_flags & 1 # FRAMES FLAG
|
207
|
+
frames = io.read(4).unpack('N1').first
|
208
|
+
end
|
209
|
+
|
210
|
+
if header_flags & 2 # BYTES FLAG
|
211
|
+
byte_count = io.read(4).unpack('N1').first
|
212
|
+
end
|
213
|
+
|
214
|
+
if header_flags & 4 # TOC FLAG
|
215
|
+
toc = io.read(100).unpack('C100')
|
216
|
+
end
|
217
|
+
|
218
|
+
if header_flags & 8 # VBR SCALE FLAG
|
219
|
+
vbr_scale = io.read(4).unpack('N1').first
|
220
|
+
end
|
221
|
+
|
222
|
+
VBRHeader.new(frames: frames, byte_count: byte_count, toc_entries: toc, vbr_scale: vbr_scale)
|
223
|
+
end
|
224
|
+
|
225
|
+
def average_bytes_and_bitrate(mpeg_frames)
|
226
|
+
avg_bytes_per_frame = initial_frames.map(&:frame_length).inject(&:+) / initial_frames.length.to_f
|
227
|
+
avg_bitrate_per_frame = initial_frames.map(&:frame_bitrate).inject(&:+) / initial_frames.length.to_f
|
228
|
+
[avg_bytes_per_frame, avg_bitrate_per_frame]
|
229
|
+
end
|
230
|
+
|
231
|
+
def xing_header_usable_for_duration?(xing_header)
|
232
|
+
xing_header && xing_header.frames && xing_header.byte_count && xing_header.vbr_scale
|
233
|
+
end
|
234
|
+
|
235
|
+
def float_average_over(enum, property)
|
236
|
+
enum.map(&property).inject(&:+) / enum.length.to_f
|
237
|
+
end
|
238
|
+
|
239
|
+
def deep_fetch(from, *keys)
|
240
|
+
keys.inject(from) { |receiver, key_or_idx| receiver.fetch(key_or_idx) }
|
241
|
+
rescue KeyError, IndexError, NoMethodError
|
242
|
+
raise InvalidDeepFetch, "Could not retrieve #{keys.inspect} from #{from.inspect}"
|
243
|
+
end
|
244
|
+
|
245
|
+
FormatParser.register_parser_constructor self
|
246
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module FormatParser::MP3Parser::ID3V1
|
2
|
+
PACKSPEC = [
|
3
|
+
:tag, :a3,
|
4
|
+
:song_name, :a30,
|
5
|
+
:artist, :a30,
|
6
|
+
:album, :a30,
|
7
|
+
:year, :N1,
|
8
|
+
:comment, :a30,
|
9
|
+
:genre, :C,
|
10
|
+
]
|
11
|
+
packspec_keys = PACKSPEC.select.with_index{|_, i| i.even? }
|
12
|
+
TAG_SIZE_BYTES = 128
|
13
|
+
|
14
|
+
class TagInformation < Struct.new(*packspec_keys)
|
15
|
+
end
|
16
|
+
|
17
|
+
def attempt_id3_v1_extraction(io)
|
18
|
+
if io.size < TAG_SIZE_BYTES # Won't fit the ID3v1 regardless
|
19
|
+
return nil
|
20
|
+
end
|
21
|
+
|
22
|
+
io.seek(io.size - 128)
|
23
|
+
trailer_bytes = io.read(128)
|
24
|
+
|
25
|
+
unless trailer_bytes && trailer_bytes.byteslice(0, 3) == 'TAG'
|
26
|
+
return nil
|
27
|
+
end
|
28
|
+
|
29
|
+
id3_v1 = parse_id3_v1(trailer_bytes)
|
30
|
+
|
31
|
+
# If all of the resulting strings are empty this ID3v1 tag is invalid and
|
32
|
+
# we should ignore it.
|
33
|
+
strings_from_id3v1 = id3_v1.values.select{|e| e.is_a?(String) && e != 'TAG' }
|
34
|
+
if strings_from_id3v1.all?(&:empty?)
|
35
|
+
return nil
|
36
|
+
end
|
37
|
+
|
38
|
+
id3_v1
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse_id3_v1(byte_str)
|
42
|
+
keys, values = PACKSPEC.partition.with_index {|_, i| i.even? }
|
43
|
+
unpacked_values = byte_str.unpack(values.join)
|
44
|
+
unpacked_values.map! {|e| e.is_a?(String) ? trim_id3v1_string(e) : e }
|
45
|
+
TagInformation.new(unpacked_values)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Remove trailing whitespace and trailing nullbytes
|
49
|
+
def trim_id3v1_string(str)
|
50
|
+
str.tr("\x00".b, '').strip
|
51
|
+
end
|
52
|
+
|
53
|
+
extend self
|
54
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module FormatParser::MP3Parser::ID3V2
|
2
|
+
def attempt_id3_v2_extraction(io)
|
3
|
+
io.seek(0) # Only support header ID3v2
|
4
|
+
header_bytes = io.read(10)
|
5
|
+
|
6
|
+
return nil unless header_bytes
|
7
|
+
|
8
|
+
header = parse_id3_v2_header(header_bytes)
|
9
|
+
return nil unless header[:tag] == 'ID3'
|
10
|
+
return nil unless header[:size] > 0
|
11
|
+
|
12
|
+
header_tag_payload = io.read(header[:size])
|
13
|
+
header_tag_payload = StringIO.new(header_tag_payload)
|
14
|
+
|
15
|
+
return nil unless header_tag_payload.size == header[:size]
|
16
|
+
|
17
|
+
frames = []
|
18
|
+
loop do
|
19
|
+
break if header_tag_payload.eof?
|
20
|
+
frame = parse_id3_v2_frame(header_tag_payload)
|
21
|
+
# Some files include padding, which is there so that when you edit ID3v2
|
22
|
+
# you do not have to overwrite the entire file - you can use this padding to
|
23
|
+
# add some more tags or to grow the existing ones. In practice if we hit
|
24
|
+
# something with a type of "0x00000000" we have entered the padding zone and
|
25
|
+
# there is no point in parsing further
|
26
|
+
if frame[:id] == "\x00\x00\x00\x00".b
|
27
|
+
break
|
28
|
+
else
|
29
|
+
frames << frame
|
30
|
+
end
|
31
|
+
end
|
32
|
+
frames
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_id3_v2_header(byte_str)
|
36
|
+
packspec = [
|
37
|
+
:tag, :a3,
|
38
|
+
:version, :a2,
|
39
|
+
:flags, :C1,
|
40
|
+
:size, :a4,
|
41
|
+
]
|
42
|
+
keys, values = packspec.partition.with_index {|_, i| i.even? }
|
43
|
+
unpacked_values = byte_str.unpack(values.join)
|
44
|
+
header_data = Hash[keys.zip(unpacked_values)]
|
45
|
+
|
46
|
+
header_data[:version] = header_data[:version].unpack('C2')
|
47
|
+
header_data[:size] = decode_syncsafe_int(header_data[:size])
|
48
|
+
|
49
|
+
header_data
|
50
|
+
end
|
51
|
+
|
52
|
+
def parse_id3_v2_frame(io)
|
53
|
+
id, size, flags = io.read(10).unpack('a4a4a2')
|
54
|
+
size = decode_syncsafe_int(size)
|
55
|
+
content = io.read(size)
|
56
|
+
if content.bytesize != size
|
57
|
+
raise "Expected to read #{size} bytes for ID3V2 frame #{id}, but got #{content.bytesize}"
|
58
|
+
end
|
59
|
+
{id: id, size: size, flags: flags, content: content}
|
60
|
+
end
|
61
|
+
|
62
|
+
# ID3v2 uses "unsynchronized integers", which are unsigned integers smeared
|
63
|
+
# over multiple bytes in such a manner that the first bit is always 0 (unset).
|
64
|
+
# This is done so that ID3v2 incompatible decoders will not by accident see
|
65
|
+
# the 0xFF0xFF0xFF0xFF sequence anywhere that can be mistaken for the MPEG frame
|
66
|
+
# synchronisation header. Effectively it is a 7 bit big-endian unsigned integer
|
67
|
+
# encoding.
|
68
|
+
#
|
69
|
+
# 8 bit 255 (0xFF) encoded in this mannner takes 16 bits instead,
|
70
|
+
# and looks like this: `0b00000001 01111111`. Note how it avoids having
|
71
|
+
# the first bit of the second byte be 1.
|
72
|
+
# This method decodes an unsigned integer packed in this fashion
|
73
|
+
def decode_syncsafe_int(bytes)
|
74
|
+
size = 0
|
75
|
+
j = 0
|
76
|
+
i = bytes.bytesize - 1
|
77
|
+
while i >= 0
|
78
|
+
size += 128**i * (bytes.getbyte(j) & 0x7f)
|
79
|
+
j += 1
|
80
|
+
i -= 1
|
81
|
+
end
|
82
|
+
size
|
83
|
+
end
|
84
|
+
|
85
|
+
extend self
|
86
|
+
end
|
data/lib/read_limiter.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
class FormatParser::ReadLimiter
|
2
2
|
NO_LIMIT = nil
|
3
|
+
|
3
4
|
class BudgetExceeded < StandardError
|
4
5
|
end
|
5
6
|
|
@@ -30,6 +31,10 @@ class FormatParser::ReadLimiter
|
|
30
31
|
@io.seek(to_offset)
|
31
32
|
end
|
32
33
|
|
34
|
+
def size
|
35
|
+
@io.size
|
36
|
+
end
|
37
|
+
|
33
38
|
def read(n)
|
34
39
|
@bytes += n
|
35
40
|
@reads += 1
|
data/spec/care_spec.rb
CHANGED
@@ -14,6 +14,13 @@ describe Care do
|
|
14
14
|
expect(cache.byteslice(source, 120, 12)).to be_nil
|
15
15
|
end
|
16
16
|
|
17
|
+
it 'raises on a negative read offset' do
|
18
|
+
cache = Care::Cache.new(3)
|
19
|
+
expect {
|
20
|
+
cache.byteslice(source, -2, 3)
|
21
|
+
}.to raise_error(/negative/i)
|
22
|
+
end
|
23
|
+
|
17
24
|
it 'can be cleared' do
|
18
25
|
cache = Care::Cache.new(3)
|
19
26
|
expect(cache.byteslice(source, 0, 3)).to eq("Hel")
|
@@ -80,7 +87,7 @@ describe Care do
|
|
80
87
|
methods_not_covered = Set.new(FormatParser::IOConstraint.public_instance_methods) - Set.new(Care::IOWrapper.public_instance_methods)
|
81
88
|
expect(methods_not_covered).to be_empty
|
82
89
|
end
|
83
|
-
|
90
|
+
|
84
91
|
it 'forwards calls to size() to the underlying IO' do
|
85
92
|
io_double = double('IO')
|
86
93
|
expect(io_double).to receive(:size).and_return(123)
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::MP3Parser do
|
4
|
+
it 'decodes and estimates duration for a VBR MP3' do
|
5
|
+
fpath = fixtures_dir + '/MP3/atc_fixture_vbr.mp3'
|
6
|
+
parsed = subject.information_from_io(File.open(fpath, 'rb'))
|
7
|
+
|
8
|
+
expect(parsed).not_to be_nil
|
9
|
+
|
10
|
+
expect(parsed.file_nature).to eq(:audio)
|
11
|
+
expect(parsed.file_type).to eq(:mp3)
|
12
|
+
expect(parsed.num_audio_channels).to eq(2)
|
13
|
+
expect(parsed.audio_sample_rate_hz).to eq(44100)
|
14
|
+
expect(parsed.intrinsics).not_to be_nil
|
15
|
+
expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'decodes and estimates duration for a CBR MP3' do
|
19
|
+
fpath = fixtures_dir + '/MP3/atc_fixture_cbr.mp3'
|
20
|
+
parsed = subject.information_from_io(File.open(fpath, 'rb'))
|
21
|
+
|
22
|
+
expect(parsed).not_to be_nil
|
23
|
+
|
24
|
+
expect(parsed.file_nature).to eq(:audio)
|
25
|
+
expect(parsed.file_type).to eq(:mp3)
|
26
|
+
expect(parsed.num_audio_channels).to eq(2)
|
27
|
+
expect(parsed.audio_sample_rate_hz).to eq(44100)
|
28
|
+
expect(parsed.intrinsics).not_to be_nil
|
29
|
+
expect(parsed.media_duration_seconds).to be_within(0.1).of(0.81)
|
30
|
+
end
|
31
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,8 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-01-
|
12
|
+
date: 2018-01-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: ks
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: 0.0.1
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: 0.0.1
|
14
28
|
- !ruby/object:Gem::Dependency
|
15
29
|
name: exifr
|
16
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -141,6 +155,9 @@ files:
|
|
141
155
|
- lib/parsers/exif_parser.rb
|
142
156
|
- lib/parsers/gif_parser.rb
|
143
157
|
- lib/parsers/jpeg_parser.rb
|
158
|
+
- lib/parsers/mp3_parser.rb
|
159
|
+
- lib/parsers/mp3_parser/id3_v1.rb
|
160
|
+
- lib/parsers/mp3_parser/id3_v2.rb
|
144
161
|
- lib/parsers/png_parser.rb
|
145
162
|
- lib/parsers/psd_parser.rb
|
146
163
|
- lib/parsers/tiff_parser.rb
|
@@ -155,6 +172,7 @@ files:
|
|
155
172
|
- spec/parsers/exif_parser_spec.rb
|
156
173
|
- spec/parsers/gif_parser_spec.rb
|
157
174
|
- spec/parsers/jpeg_parser_spec.rb
|
175
|
+
- spec/parsers/mp3_parser_spec.rb
|
158
176
|
- spec/parsers/png_parser_spec.rb
|
159
177
|
- spec/parsers/psd_parser_spec.rb
|
160
178
|
- spec/parsers/tiff_parser_spec.rb
|