format_parser 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -0
- data/format_parser.gemspec +1 -0
- data/lib/care.rb +9 -2
- data/lib/file_information.rb +4 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/mp3_parser.rb +246 -0
- data/lib/parsers/mp3_parser/id3_v1.rb +54 -0
- data/lib/parsers/mp3_parser/id3_v2.rb +86 -0
- data/lib/read_limiter.rb +5 -0
- data/spec/care_spec.rb +8 -1
- data/spec/parsers/mp3_parser_spec.rb +31 -0
- metadata +20 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b0ef6923a01b8fbe52f4491979a3be1224ea7018
|
4
|
+
data.tar.gz: a19859f36a81154f73d1071857834cd0e7e1ca74
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca6bd5d8324a4dcb41d6f3137a1ab621a016acf8e9fafae5983bb8c325f883ecc72b4046d774e423e5e4ca7f35d048dae20a1e6ca2287d842c111c2714e2d606
|
7
|
+
data.tar.gz: 99d6517341e8b48635c8540e7f92cb48e772ed02047eb6cd56412b21de2411c06d64d43878ffa1a3ce43bf8e82b7e886eef4f00b221bfdfe42bafc19435f3f35
|
data/README.md
CHANGED
@@ -7,6 +7,12 @@ minimum amount of data possible.
|
|
7
7
|
`format_parser` is inspired by [imagesize,](https://rubygems.org/gem/imagesize) [fastimage](https://github.com/sdsykes/fastimage)
|
8
8
|
and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them where appropriate.
|
9
9
|
|
10
|
+
## Currently supported filetypes:
|
11
|
+
|
12
|
+
`TIFF, PSD, PNG, MP3, JPEG, GIF, DPX, AIFF`
|
13
|
+
|
14
|
+
...with more on the way!
|
15
|
+
|
10
16
|
## Basic usage
|
11
17
|
|
12
18
|
Pass an IO object that responds to `read` and `seek` to `FormatParser`.
|
data/format_parser.gemspec
CHANGED
@@ -32,6 +32,7 @@ Gem::Specification.new do |spec|
|
|
32
32
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
33
33
|
spec.require_paths = ["lib"]
|
34
34
|
|
35
|
+
spec.add_dependency 'ks', '~> 0.0.1'
|
35
36
|
spec.add_dependency 'exifr', '~> 1.0'
|
36
37
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
38
|
|
data/lib/care.rb
CHANGED
@@ -39,6 +39,10 @@ class Care
|
|
39
39
|
clear
|
40
40
|
@io.close if @io.respond_to?(:close)
|
41
41
|
end
|
42
|
+
|
43
|
+
def size
|
44
|
+
@io.size
|
45
|
+
end
|
42
46
|
end
|
43
47
|
|
44
48
|
# Stores cached pages of data from the given IO as strings.
|
@@ -58,7 +62,10 @@ class Care
|
|
58
62
|
# or fetch pages where necessary
|
59
63
|
def byteslice(io, at, n_bytes)
|
60
64
|
if n_bytes < 1
|
61
|
-
raise ArgumentError, "The number of bytes to fetch must be a positive Integer"
|
65
|
+
raise ArgumentError, "The number of bytes to fetch must be a positive Integer"
|
66
|
+
end
|
67
|
+
if at < 0
|
68
|
+
raise ArgumentError, "Negative offsets are not supported (got #{at})"
|
62
69
|
end
|
63
70
|
|
64
71
|
first_page = at / @page_size
|
@@ -124,7 +131,7 @@ class Care
|
|
124
131
|
# to read following this one, so we can also optimize
|
125
132
|
@lowest_known_empty_page = page_i + 1
|
126
133
|
end
|
127
|
-
|
134
|
+
|
128
135
|
read_result
|
129
136
|
end
|
130
137
|
end
|
data/lib/file_information.rb
CHANGED
@@ -58,6 +58,10 @@ module FormatParser
|
|
58
58
|
# as an Integer
|
59
59
|
attr_accessor :media_duration_frames
|
60
60
|
|
61
|
+
# If a parser wants to provide any extra information to the caller
|
62
|
+
# it can be placed here
|
63
|
+
attr_accessor :intrinsics
|
64
|
+
|
61
65
|
# Only permits assignments via defined accessors
|
62
66
|
def initialize(**attributes)
|
63
67
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
@@ -0,0 +1,246 @@
|
|
1
|
+
require 'ks'
|
2
|
+
|
3
|
+
class FormatParser::MP3Parser
|
4
|
+
require_relative 'mp3_parser/id3_v1'
|
5
|
+
require_relative 'mp3_parser/id3_v2'
|
6
|
+
|
7
|
+
class MPEGFrame < Ks.strict(:offset_in_file, :mpeg_id, :channels, :sample_rate, :frame_length, :frame_bitrate)
|
8
|
+
end
|
9
|
+
|
10
|
+
class VBRHeader < Ks.strict(:frames, :byte_count, :toc_entries, :vbr_scale)
|
11
|
+
end
|
12
|
+
|
13
|
+
class MP3Info < Ks.strict(:duration_seconds, :num_channels, :sampling_rate)
|
14
|
+
end
|
15
|
+
|
16
|
+
class InvalidDeepFetch < KeyError
|
17
|
+
end
|
18
|
+
|
19
|
+
# We limit the number of MPEG frames we scan
|
20
|
+
# to obtain our duration estimation
|
21
|
+
MAX_FRAMES_TO_SCAN = 128
|
22
|
+
|
23
|
+
# Default frame size for mp3
|
24
|
+
SAMPLES_PER_FRAME = 1152
|
25
|
+
|
26
|
+
def information_from_io(io)
|
27
|
+
# Read the last 128 bytes which might contain ID3v1
|
28
|
+
id3_v1 = ID3V1.attempt_id3_v1_extraction(io)
|
29
|
+
# Read the header bytes that might contain ID3v1
|
30
|
+
id3_v2 = ID3V2.attempt_id3_v2_extraction(io)
|
31
|
+
|
32
|
+
# Compute how many bytes are occupied by the actual MPEG frames
|
33
|
+
ignore_bytes_at_tail = id3_v1 ? 128 : 0
|
34
|
+
ignore_bytes_at_head = id3_v2 ? io.pos : 0
|
35
|
+
bytes_used_by_frames = io.size - ignore_bytes_at_tail - ignore_bytes_at_tail
|
36
|
+
|
37
|
+
io.seek(ignore_bytes_at_head)
|
38
|
+
|
39
|
+
maybe_xing_header, initial_frames = parse_mpeg_frames(io)
|
40
|
+
|
41
|
+
return nil if initial_frames.empty?
|
42
|
+
|
43
|
+
first_frame = initial_frames.first
|
44
|
+
|
45
|
+
file_info = FormatParser::FileInformation.new(
|
46
|
+
file_nature: :audio,
|
47
|
+
file_type: :mp3,
|
48
|
+
num_audio_channels: first_frame.channels,
|
49
|
+
audio_sample_rate_hz: first_frame.sample_rate,
|
50
|
+
# media_duration_frames is omitted because the frames
|
51
|
+
# in MPEG are not the same thing as in a movie file - they
|
52
|
+
# do not tell anything of substance
|
53
|
+
intrinsics: {
|
54
|
+
id3_v1: id3_v1 ? id3_v1.to_h : nil,
|
55
|
+
id3_v2: id3_v2 ? id3_v2.map(&:to_h) : nil,
|
56
|
+
xing_header: maybe_xing_header.to_h,
|
57
|
+
initial_frames: initial_frames.map(&:to_h)
|
58
|
+
}
|
59
|
+
)
|
60
|
+
|
61
|
+
if maybe_xing_header
|
62
|
+
duration = maybe_xing_header.frames * SAMPLES_PER_FRAME / first_frame.sample_rate.to_f
|
63
|
+
bit_rate = maybe_xing_header.byte_count * 8 / duration / 1000
|
64
|
+
file_info.media_duration_seconds = duration
|
65
|
+
return file_info
|
66
|
+
end
|
67
|
+
|
68
|
+
# Estimate duration using the frames we did parse - to have an exact one
|
69
|
+
# we would need to have all the frames and thus read most of the file
|
70
|
+
avg_bitrate = float_average_over(initial_frames, :frame_bitrate)
|
71
|
+
avg_frame_size = float_average_over(initial_frames, :frame_length)
|
72
|
+
avg_sample_rate = float_average_over(initial_frames, :sample_rate)
|
73
|
+
|
74
|
+
est_frame_count = bytes_used_by_frames / avg_frame_size
|
75
|
+
est_samples = est_frame_count * SAMPLES_PER_FRAME
|
76
|
+
est_duration_seconds = est_samples / avg_sample_rate
|
77
|
+
|
78
|
+
file_info.media_duration_seconds = est_duration_seconds
|
79
|
+
return file_info
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
# The implementation of the MPEG frames parsing is mostly based on tinytag,
|
85
|
+
# a sweet little Python library for parsing audio metadata - do check it out
|
86
|
+
# if you have a minute. https://pypi.python.org/pypi/tinytag
|
87
|
+
def parse_mpeg_frames(io)
|
88
|
+
mpeg_frames = []
|
89
|
+
|
90
|
+
MAX_FRAMES_TO_SCAN.times do |frame_i|
|
91
|
+
# Read through until we can latch onto the 11 sync bits. Read in 4-byte
|
92
|
+
# increments to save on read() calls
|
93
|
+
data = io.read(4)
|
94
|
+
|
95
|
+
# If we are at EOF - stop iterating
|
96
|
+
break unless data && data.bytesize == 4
|
97
|
+
|
98
|
+
# Look for the sync pattern. It can be either the last byte being 0xFF,
|
99
|
+
# or any of the 2 bytes in sequence being 0xFF and > 0xF0.
|
100
|
+
four_bytes = data.unpack('C4')
|
101
|
+
seek_jmp = sync_bytes_offset_in_4_byte_seq(four_bytes)
|
102
|
+
if seek_jmp > 0
|
103
|
+
io.seek(io.pos + seek_jmp)
|
104
|
+
next
|
105
|
+
end
|
106
|
+
|
107
|
+
# Once we are past that stage we have latched onto a sync frame header
|
108
|
+
sync, conf, bitrate_freq, rest = four_bytes
|
109
|
+
frame_detail = parse_mpeg_frame_header(io.pos - 4, sync, conf, bitrate_freq, rest)
|
110
|
+
mpeg_frames << frame_detail
|
111
|
+
|
112
|
+
# There might be a xing header in the first frame that contains
|
113
|
+
# all the info we need, otherwise parse multiple frames to find the
|
114
|
+
# accurate average bitrate
|
115
|
+
if frame_i == 0
|
116
|
+
frame_data_str = io.read(frame_detail.frame_length)
|
117
|
+
io.seek(io.pos - frame_detail.frame_length)
|
118
|
+
xing_header = attempt_xing_header(frame_data_str)
|
119
|
+
if xing_header_usable_for_duration?(xing_header)
|
120
|
+
return [xing_header, mpeg_frames]
|
121
|
+
end
|
122
|
+
end
|
123
|
+
if frame_detail.frame_length > 1 # jump over current frame body
|
124
|
+
io.seek(io.pos + frame_detail.frame_length - 4)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
[nil, mpeg_frames]
|
128
|
+
rescue InvalidDeepFetch # A frame was invalid - bail out since it's unlikely we can recover
|
129
|
+
[nil, mpeg_frames]
|
130
|
+
end
|
131
|
+
|
132
|
+
def parse_mpeg_frame_header(offset_in_file, sync, conf, bitrate_freq, rest)
|
133
|
+
# see this page for the magic values used in mp3:
|
134
|
+
# http:/www.mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm
|
135
|
+
samplerates = [
|
136
|
+
[11025, 12000, 8000], # MPEG 2.5
|
137
|
+
[], # reserved
|
138
|
+
[22050, 24000, 16000], # MPEG 2
|
139
|
+
[44100, 48000, 32000], # MPEG 1
|
140
|
+
]
|
141
|
+
v1l1 = [0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0]
|
142
|
+
v1l2 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0]
|
143
|
+
v1l3 = [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0]
|
144
|
+
v2l1 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0]
|
145
|
+
v2l2 = [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0]
|
146
|
+
v2l3 = v2l2
|
147
|
+
bitrate_by_version_by_layer = [
|
148
|
+
[nil, v2l3, v2l2, v2l1], # MPEG Version 2.5 # note that the layers go
|
149
|
+
nil, # reserved # from 3 to 1 by design.
|
150
|
+
[nil, v2l3, v2l2, v2l1], # MPEG Version 2 # the first layer id is
|
151
|
+
[nil, v1l3, v1l2, v1l1], # MPEG Version 1 # reserved
|
152
|
+
]
|
153
|
+
samples_per_frame = 1152 # the default frame size for mp3
|
154
|
+
channels_per_channel_mode = [
|
155
|
+
2, # 00 Stereo
|
156
|
+
2, # 01 Joint stereo (Stereo)
|
157
|
+
2, # 10 Dual channel (2 mono channels)
|
158
|
+
1, # 11 Single channel (Mono)
|
159
|
+
]
|
160
|
+
|
161
|
+
br_id = (bitrate_freq >> 4) & 0x0F # biterate id
|
162
|
+
sr_id = (bitrate_freq >> 2) & 0x03 # sample rate id
|
163
|
+
padding = bitrate_freq & 0x02 > 0 ? 1 : 0
|
164
|
+
mpeg_id = (conf >> 3) & 0x03
|
165
|
+
layer_id = (conf >> 1) & 0x03
|
166
|
+
channel_mode = (rest >> 6) & 0x03
|
167
|
+
channels = channels_per_channel_mode.fetch(channel_mode)
|
168
|
+
sample_rate = deep_fetch(samplerates, mpeg_id, sr_id)
|
169
|
+
frame_bitrate = deep_fetch(bitrate_by_version_by_layer, mpeg_id, layer_id, br_id)
|
170
|
+
frame_length = (144000 * frame_bitrate) / sample_rate + padding
|
171
|
+
MPEGFrame.new(
|
172
|
+
offset_in_file: offset_in_file,
|
173
|
+
mpeg_id: mpeg_id,
|
174
|
+
channels: channels,
|
175
|
+
sample_rate: sample_rate,
|
176
|
+
frame_length: frame_length,
|
177
|
+
frame_bitrate: frame_bitrate,
|
178
|
+
)
|
179
|
+
end
|
180
|
+
|
181
|
+
# Scan 4 byte values, and check whether there is
|
182
|
+
# a pattern of the 11 set bits anywhere within it
|
183
|
+
# or whether there is the 0xFF byte at the end
|
184
|
+
def sync_bytes_offset_in_4_byte_seq(four_bytes)
|
185
|
+
four_bytes[0...3].each_with_index do |byte, i|
|
186
|
+
next_byte = four_bytes[i+1]
|
187
|
+
if byte == 0xFF && next_byte > 0xE0
|
188
|
+
return i
|
189
|
+
end
|
190
|
+
end
|
191
|
+
four_bytes[-1] == 0xFF ? 3 : 4
|
192
|
+
end
|
193
|
+
|
194
|
+
def attempt_xing_header(frame_body)
|
195
|
+
unless xing_offset = frame_body.index("Xing")
|
196
|
+
return nil # No Xing in this frame
|
197
|
+
end
|
198
|
+
|
199
|
+
io = StringIO.new(frame_body)
|
200
|
+
io.seek(xing_offset + 4) # Include the length of "Xing" itself
|
201
|
+
|
202
|
+
# https://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#XINGHeader
|
203
|
+
header_flags, _ = io.read(4).unpack('s>s>')
|
204
|
+
frames = byte_count = toc = vbr_scale = nil
|
205
|
+
|
206
|
+
if header_flags & 1 # FRAMES FLAG
|
207
|
+
frames = io.read(4).unpack('N1').first
|
208
|
+
end
|
209
|
+
|
210
|
+
if header_flags & 2 # BYTES FLAG
|
211
|
+
byte_count = io.read(4).unpack('N1').first
|
212
|
+
end
|
213
|
+
|
214
|
+
if header_flags & 4 # TOC FLAG
|
215
|
+
toc = io.read(100).unpack('C100')
|
216
|
+
end
|
217
|
+
|
218
|
+
if header_flags & 8 # VBR SCALE FLAG
|
219
|
+
vbr_scale = io.read(4).unpack('N1').first
|
220
|
+
end
|
221
|
+
|
222
|
+
VBRHeader.new(frames: frames, byte_count: byte_count, toc_entries: toc, vbr_scale: vbr_scale)
|
223
|
+
end
|
224
|
+
|
225
|
+
def average_bytes_and_bitrate(mpeg_frames)
|
226
|
+
avg_bytes_per_frame = initial_frames.map(&:frame_length).inject(&:+) / initial_frames.length.to_f
|
227
|
+
avg_bitrate_per_frame = initial_frames.map(&:frame_bitrate).inject(&:+) / initial_frames.length.to_f
|
228
|
+
[avg_bytes_per_frame, avg_bitrate_per_frame]
|
229
|
+
end
|
230
|
+
|
231
|
+
def xing_header_usable_for_duration?(xing_header)
|
232
|
+
xing_header && xing_header.frames && xing_header.byte_count && xing_header.vbr_scale
|
233
|
+
end
|
234
|
+
|
235
|
+
def float_average_over(enum, property)
|
236
|
+
enum.map(&property).inject(&:+) / enum.length.to_f
|
237
|
+
end
|
238
|
+
|
239
|
+
def deep_fetch(from, *keys)
|
240
|
+
keys.inject(from) { |receiver, key_or_idx| receiver.fetch(key_or_idx) }
|
241
|
+
rescue KeyError, IndexError, NoMethodError
|
242
|
+
raise InvalidDeepFetch, "Could not retrieve #{keys.inspect} from #{from.inspect}"
|
243
|
+
end
|
244
|
+
|
245
|
+
FormatParser.register_parser_constructor self
|
246
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module FormatParser::MP3Parser::ID3V1
|
2
|
+
PACKSPEC = [
|
3
|
+
:tag, :a3,
|
4
|
+
:song_name, :a30,
|
5
|
+
:artist, :a30,
|
6
|
+
:album, :a30,
|
7
|
+
:year, :N1,
|
8
|
+
:comment, :a30,
|
9
|
+
:genre, :C,
|
10
|
+
]
|
11
|
+
packspec_keys = PACKSPEC.select.with_index{|_, i| i.even? }
|
12
|
+
TAG_SIZE_BYTES = 128
|
13
|
+
|
14
|
+
class TagInformation < Struct.new(*packspec_keys)
|
15
|
+
end
|
16
|
+
|
17
|
+
def attempt_id3_v1_extraction(io)
|
18
|
+
if io.size < TAG_SIZE_BYTES # Won't fit the ID3v1 regardless
|
19
|
+
return nil
|
20
|
+
end
|
21
|
+
|
22
|
+
io.seek(io.size - 128)
|
23
|
+
trailer_bytes = io.read(128)
|
24
|
+
|
25
|
+
unless trailer_bytes && trailer_bytes.byteslice(0, 3) == 'TAG'
|
26
|
+
return nil
|
27
|
+
end
|
28
|
+
|
29
|
+
id3_v1 = parse_id3_v1(trailer_bytes)
|
30
|
+
|
31
|
+
# If all of the resulting strings are empty this ID3v1 tag is invalid and
|
32
|
+
# we should ignore it.
|
33
|
+
strings_from_id3v1 = id3_v1.values.select{|e| e.is_a?(String) && e != 'TAG' }
|
34
|
+
if strings_from_id3v1.all?(&:empty?)
|
35
|
+
return nil
|
36
|
+
end
|
37
|
+
|
38
|
+
id3_v1
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse_id3_v1(byte_str)
|
42
|
+
keys, values = PACKSPEC.partition.with_index {|_, i| i.even? }
|
43
|
+
unpacked_values = byte_str.unpack(values.join)
|
44
|
+
unpacked_values.map! {|e| e.is_a?(String) ? trim_id3v1_string(e) : e }
|
45
|
+
TagInformation.new(unpacked_values)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Remove trailing whitespace and trailing nullbytes
|
49
|
+
def trim_id3v1_string(str)
|
50
|
+
str.tr("\x00".b, '').strip
|
51
|
+
end
|
52
|
+
|
53
|
+
extend self
|
54
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module FormatParser::MP3Parser::ID3V2
|
2
|
+
def attempt_id3_v2_extraction(io)
|
3
|
+
io.seek(0) # Only support header ID3v2
|
4
|
+
header_bytes = io.read(10)
|
5
|
+
|
6
|
+
return nil unless header_bytes
|
7
|
+
|
8
|
+
header = parse_id3_v2_header(header_bytes)
|
9
|
+
return nil unless header[:tag] == 'ID3'
|
10
|
+
return nil unless header[:size] > 0
|
11
|
+
|
12
|
+
header_tag_payload = io.read(header[:size])
|
13
|
+
header_tag_payload = StringIO.new(header_tag_payload)
|
14
|
+
|
15
|
+
return nil unless header_tag_payload.size == header[:size]
|
16
|
+
|
17
|
+
frames = []
|
18
|
+
loop do
|
19
|
+
break if header_tag_payload.eof?
|
20
|
+
frame = parse_id3_v2_frame(header_tag_payload)
|
21
|
+
# Some files include padding, which is there so that when you edit ID3v2
|
22
|
+
# you do not have to overwrite the entire file - you can use this padding to
|
23
|
+
# add some more tags or to grow the existing ones. In practice if we hit
|
24
|
+
# something with a type of "0x00000000" we have entered the padding zone and
|
25
|
+
# there is no point in parsing further
|
26
|
+
if frame[:id] == "\x00\x00\x00\x00".b
|
27
|
+
break
|
28
|
+
else
|
29
|
+
frames << frame
|
30
|
+
end
|
31
|
+
end
|
32
|
+
frames
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_id3_v2_header(byte_str)
|
36
|
+
packspec = [
|
37
|
+
:tag, :a3,
|
38
|
+
:version, :a2,
|
39
|
+
:flags, :C1,
|
40
|
+
:size, :a4,
|
41
|
+
]
|
42
|
+
keys, values = packspec.partition.with_index {|_, i| i.even? }
|
43
|
+
unpacked_values = byte_str.unpack(values.join)
|
44
|
+
header_data = Hash[keys.zip(unpacked_values)]
|
45
|
+
|
46
|
+
header_data[:version] = header_data[:version].unpack('C2')
|
47
|
+
header_data[:size] = decode_syncsafe_int(header_data[:size])
|
48
|
+
|
49
|
+
header_data
|
50
|
+
end
|
51
|
+
|
52
|
+
def parse_id3_v2_frame(io)
|
53
|
+
id, size, flags = io.read(10).unpack('a4a4a2')
|
54
|
+
size = decode_syncsafe_int(size)
|
55
|
+
content = io.read(size)
|
56
|
+
if content.bytesize != size
|
57
|
+
raise "Expected to read #{size} bytes for ID3V2 frame #{id}, but got #{content.bytesize}"
|
58
|
+
end
|
59
|
+
{id: id, size: size, flags: flags, content: content}
|
60
|
+
end
|
61
|
+
|
62
|
+
# ID3v2 uses "unsynchronized integers", which are unsigned integers smeared
|
63
|
+
# over multiple bytes in such a manner that the first bit is always 0 (unset).
|
64
|
+
# This is done so that ID3v2 incompatible decoders will not by accident see
|
65
|
+
# the 0xFF0xFF0xFF0xFF sequence anywhere that can be mistaken for the MPEG frame
|
66
|
+
# synchronisation header. Effectively it is a 7 bit big-endian unsigned integer
|
67
|
+
# encoding.
|
68
|
+
#
|
69
|
+
# 8 bit 255 (0xFF) encoded in this mannner takes 16 bits instead,
|
70
|
+
# and looks like this: `0b00000001 01111111`. Note how it avoids having
|
71
|
+
# the first bit of the second byte be 1.
|
72
|
+
# This method decodes an unsigned integer packed in this fashion
|
73
|
+
def decode_syncsafe_int(bytes)
|
74
|
+
size = 0
|
75
|
+
j = 0
|
76
|
+
i = bytes.bytesize - 1
|
77
|
+
while i >= 0
|
78
|
+
size += 128**i * (bytes.getbyte(j) & 0x7f)
|
79
|
+
j += 1
|
80
|
+
i -= 1
|
81
|
+
end
|
82
|
+
size
|
83
|
+
end
|
84
|
+
|
85
|
+
extend self
|
86
|
+
end
|
data/lib/read_limiter.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
class FormatParser::ReadLimiter
|
2
2
|
NO_LIMIT = nil
|
3
|
+
|
3
4
|
class BudgetExceeded < StandardError
|
4
5
|
end
|
5
6
|
|
@@ -30,6 +31,10 @@ class FormatParser::ReadLimiter
|
|
30
31
|
@io.seek(to_offset)
|
31
32
|
end
|
32
33
|
|
34
|
+
def size
|
35
|
+
@io.size
|
36
|
+
end
|
37
|
+
|
33
38
|
def read(n)
|
34
39
|
@bytes += n
|
35
40
|
@reads += 1
|
data/spec/care_spec.rb
CHANGED
@@ -14,6 +14,13 @@ describe Care do
|
|
14
14
|
expect(cache.byteslice(source, 120, 12)).to be_nil
|
15
15
|
end
|
16
16
|
|
17
|
+
it 'raises on a negative read offset' do
|
18
|
+
cache = Care::Cache.new(3)
|
19
|
+
expect {
|
20
|
+
cache.byteslice(source, -2, 3)
|
21
|
+
}.to raise_error(/negative/i)
|
22
|
+
end
|
23
|
+
|
17
24
|
it 'can be cleared' do
|
18
25
|
cache = Care::Cache.new(3)
|
19
26
|
expect(cache.byteslice(source, 0, 3)).to eq("Hel")
|
@@ -80,7 +87,7 @@ describe Care do
|
|
80
87
|
methods_not_covered = Set.new(FormatParser::IOConstraint.public_instance_methods) - Set.new(Care::IOWrapper.public_instance_methods)
|
81
88
|
expect(methods_not_covered).to be_empty
|
82
89
|
end
|
83
|
-
|
90
|
+
|
84
91
|
it 'forwards calls to size() to the underlying IO' do
|
85
92
|
io_double = double('IO')
|
86
93
|
expect(io_double).to receive(:size).and_return(123)
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::MP3Parser do
|
4
|
+
it 'decodes and estimates duration for a VBR MP3' do
|
5
|
+
fpath = fixtures_dir + '/MP3/atc_fixture_vbr.mp3'
|
6
|
+
parsed = subject.information_from_io(File.open(fpath, 'rb'))
|
7
|
+
|
8
|
+
expect(parsed).not_to be_nil
|
9
|
+
|
10
|
+
expect(parsed.file_nature).to eq(:audio)
|
11
|
+
expect(parsed.file_type).to eq(:mp3)
|
12
|
+
expect(parsed.num_audio_channels).to eq(2)
|
13
|
+
expect(parsed.audio_sample_rate_hz).to eq(44100)
|
14
|
+
expect(parsed.intrinsics).not_to be_nil
|
15
|
+
expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'decodes and estimates duration for a CBR MP3' do
|
19
|
+
fpath = fixtures_dir + '/MP3/atc_fixture_cbr.mp3'
|
20
|
+
parsed = subject.information_from_io(File.open(fpath, 'rb'))
|
21
|
+
|
22
|
+
expect(parsed).not_to be_nil
|
23
|
+
|
24
|
+
expect(parsed.file_nature).to eq(:audio)
|
25
|
+
expect(parsed.file_type).to eq(:mp3)
|
26
|
+
expect(parsed.num_audio_channels).to eq(2)
|
27
|
+
expect(parsed.audio_sample_rate_hz).to eq(44100)
|
28
|
+
expect(parsed.intrinsics).not_to be_nil
|
29
|
+
expect(parsed.media_duration_seconds).to be_within(0.1).of(0.81)
|
30
|
+
end
|
31
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,8 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-01-
|
12
|
+
date: 2018-01-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: ks
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: 0.0.1
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: 0.0.1
|
14
28
|
- !ruby/object:Gem::Dependency
|
15
29
|
name: exifr
|
16
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -141,6 +155,9 @@ files:
|
|
141
155
|
- lib/parsers/exif_parser.rb
|
142
156
|
- lib/parsers/gif_parser.rb
|
143
157
|
- lib/parsers/jpeg_parser.rb
|
158
|
+
- lib/parsers/mp3_parser.rb
|
159
|
+
- lib/parsers/mp3_parser/id3_v1.rb
|
160
|
+
- lib/parsers/mp3_parser/id3_v2.rb
|
144
161
|
- lib/parsers/png_parser.rb
|
145
162
|
- lib/parsers/psd_parser.rb
|
146
163
|
- lib/parsers/tiff_parser.rb
|
@@ -155,6 +172,7 @@ files:
|
|
155
172
|
- spec/parsers/exif_parser_spec.rb
|
156
173
|
- spec/parsers/gif_parser_spec.rb
|
157
174
|
- spec/parsers/jpeg_parser_spec.rb
|
175
|
+
- spec/parsers/mp3_parser_spec.rb
|
158
176
|
- spec/parsers/png_parser_spec.rb
|
159
177
|
- spec/parsers/psd_parser_spec.rb
|
160
178
|
- spec/parsers/tiff_parser_spec.rb
|