RubyGems - format_parser - Versions diffs - 0.2.0 → 0.3.0 - Mend

format_parser 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +4 -4
data/.rubocop.yml +2 -0
data/.travis.yml +1 -0
data/README.md +14 -11
data/format_parser.gemspec +11 -10
data/lib/care.rb +9 -17
data/lib/format_parser.rb +11 -13
data/lib/format_parser/version.rb +1 -1
data/lib/io_constraint.rb +3 -3
data/lib/io_utils.rb +4 -10
data/lib/parsers/aiff_parser.rb +9 -10
data/lib/parsers/dpx_parser.rb +42 -42
data/lib/parsers/dsl.rb +2 -2
data/lib/parsers/exif_parser.rb +3 -8
data/lib/parsers/fdx_parser.rb +3 -3
data/lib/parsers/gif_parser.rb +3 -5
data/lib/parsers/jpeg_parser.rb +4 -8
data/lib/parsers/moov_parser.rb +8 -6
data/lib/parsers/moov_parser/decoder.rb +105 -122
data/lib/parsers/mp3_parser.rb +36 -46
data/lib/parsers/mp3_parser/id3_v1.rb +7 -13
data/lib/parsers/mp3_parser/id3_v2.rb +6 -6
data/lib/parsers/png_parser.rb +5 -12
data/lib/parsers/psd_parser.rb +2 -2
data/lib/parsers/tiff_parser.rb +10 -12
data/lib/parsers/wav_parser.rb +3 -3
data/lib/read_limiter.rb +3 -7
data/lib/remote_io.rb +3 -6
data/spec/care_spec.rb +10 -10
data/spec/file_information_spec.rb +1 -3
data/spec/format_parser_spec.rb +6 -6
data/spec/io_utils_spec.rb +7 -7
data/spec/parsers/exif_parser_spec.rb +2 -3
data/spec/parsers/gif_parser_spec.rb +1 -1
data/spec/parsers/jpeg_parser_spec.rb +0 -1
data/spec/parsers/moov_parser_spec.rb +2 -3
data/spec/parsers/png_parser_spec.rb +1 -1
data/spec/parsers/tiff_parser_spec.rb +0 -1
data/spec/parsers/wav_parser_spec.rb +3 -3
data/spec/read_limiter_spec.rb +0 -1
data/spec/remote_fetching_spec.rb +34 -20
data/spec/remote_io_spec.rb +20 -21
data/spec/spec_helper.rb +2 -2
metadata +19 -4

data/lib/parsers/mp3_parser.rb CHANGED

@@ -37,12 +37,12 @@ class FormatParser::MP3Parser
     ignore_bytes_at_tail = id3_v1 ? 128 : 0
     ignore_bytes_at_head = id3_v2 ? io.pos : 0
     bytes_used_by_frames = io.size - ignore_bytes_at_tail - ignore_bytes_at_tail
     io.seek(ignore_bytes_at_head)
     maybe_xing_header, initial_frames = parse_mpeg_frames(io)
-    return nil if initial_frames.empty?
+    return if initial_frames.empty?
     first_frame = initial_frames.first
@@ -63,14 +63,14 @@ class FormatParser::MP3Parser
     if maybe_xing_header
       duration = maybe_xing_header.frames * SAMPLES_PER_FRAME / first_frame.sample_rate.to_f
-      bit_rate = maybe_xing_header.byte_count * 8 / duration / 1000
+      _bit_rate = maybe_xing_header.byte_count * 8 / duration / 1000
       file_info.media_duration_seconds = duration
       return file_info
     end
     # Estimate duration using the frames we did parse - to have an exact one
     # we would need to have all the frames and thus read most of the file
-    avg_bitrate = float_average_over(initial_frames, :frame_bitrate)
+    _avg_bitrate = float_average_over(initial_frames, :frame_bitrate)
     avg_frame_size = float_average_over(initial_frames, :frame_length)
     avg_sample_rate = float_average_over(initial_frames, :sample_rate)
@@ -79,17 +79,17 @@ class FormatParser::MP3Parser
     est_duration_seconds = est_samples / avg_sample_rate
     file_info.media_duration_seconds = est_duration_seconds
-    return file_info
+    file_info
   end
   private
   # The implementation of the MPEG frames parsing is mostly based on tinytag,
   # a sweet little Python library for parsing audio metadata - do check it out
   # if you have a minute. https://pypi.python.org/pypi/tinytag
   def parse_mpeg_frames(io)
     mpeg_frames = []
     MAX_FRAMES_TO_SCAN.times do |frame_i|
       # Read through until we can latch onto the 11 sync bits. Read in 4-byte
       # increments to save on read() calls
@@ -123,7 +123,7 @@ class FormatParser::MP3Parser
           return [xing_header, mpeg_frames]
         end
       end
-      if frame_detail.frame_length > 1  # jump over current frame body
+      if frame_detail.frame_length > 1 # jump over current frame body
         io.seek(io.pos + frame_detail.frame_length - 4)
       end
     end
@@ -132,14 +132,14 @@ class FormatParser::MP3Parser
     [nil, mpeg_frames]
   end
-  def parse_mpeg_frame_header(offset_in_file, sync, conf, bitrate_freq, rest)
+  def parse_mpeg_frame_header(offset_in_file, _sync, conf, bitrate_freq, rest)
     # see this page for the magic values used in mp3:
     # http:/www.mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm
     samplerates = [
-        [11025, 12000,  8000],  # MPEG 2.5
-        [],                     # reserved
-        [22050, 24000, 16000],  # MPEG 2
-        [44100, 48000, 32000],  # MPEG 1
+      [11025, 12000,  8000],  # MPEG 2.5
+      [],                     # reserved
+      [22050, 24000, 16000],  # MPEG 2
+      [44100, 48000, 32000],  # MPEG 1
     ]
     v1l1 = [0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0]
     v1l2 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0]
@@ -148,19 +148,19 @@ class FormatParser::MP3Parser
     v2l2 = [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0]
     v2l3 = v2l2
     bitrate_by_version_by_layer = [
-        [nil, v2l3, v2l2, v2l1],  # MPEG Version 2.5  # note that the layers go
-        nil,                      # reserved          # from 3 to 1 by design.
-        [nil, v2l3, v2l2, v2l1],  # MPEG Version 2    # the first layer id is
-        [nil, v1l3, v1l2, v1l1],  # MPEG Version 1    # reserved
+      [nil, v2l3, v2l2, v2l1],  # MPEG Version 2.5  # note that the layers go
+      nil,                      # reserved          # from 3 to 1 by design.
+      [nil, v2l3, v2l2, v2l1],  # MPEG Version 2    # the first layer id is
+      [nil, v1l3, v1l2, v1l1],  # MPEG Version 1    # reserved
     ]
-    samples_per_frame = 1152  # the default frame size for mp3
     channels_per_channel_mode = [
-        2,  # 00 Stereo
-        2,  # 01 Joint stereo (Stereo)
-        2,  # 10 Dual channel (2 mono channels)
-        1,  # 11 Single channel (Mono)
+      2,  # 00 Stereo
+      2,  # 01 Joint stereo (Stereo)
+      2,  # 10 Dual channel (2 mono channels)
+      1,  # 11 Single channel (Mono)
     ]
     br_id = (bitrate_freq >> 4) & 0x0F  # biterate id
     sr_id = (bitrate_freq >> 2) & 0x03  # sample rate id
     padding = bitrate_freq & 0x02 > 0 ? 1 : 0
@@ -170,7 +170,7 @@ class FormatParser::MP3Parser
     channels = channels_per_channel_mode.fetch(channel_mode)
     sample_rate = deep_fetch(samplerates, mpeg_id, sr_id)
     frame_bitrate = deep_fetch(bitrate_by_version_by_layer, mpeg_id, layer_id, br_id)
-    frame_length = (144000 * frame_bitrate) / sample_rate + padding
+    frame_length = (144_000 * frame_bitrate) / sample_rate + padding
     MPEGFrame.new(
       offset_in_file: offset_in_file,
       mpeg_id: mpeg_id,
@@ -186,17 +186,15 @@ class FormatParser::MP3Parser
   # or whether there is the 0xFF byte at the end
   def sync_bytes_offset_in_4_byte_seq(four_bytes)
     four_bytes[0...3].each_with_index do |byte, i|
-      next_byte = four_bytes[i+1]
-      if byte == 0xFF && next_byte > 0xE0
-        return i
-      end
+      next_byte = four_bytes[i + 1]
+      return i if byte == 0xFF && next_byte > 0xE0
     end
     four_bytes[-1] == 0xFF ? 3 : 4
   end
   def attempt_xing_header(frame_body)
-    unless xing_offset = frame_body.index("Xing")
-      return nil # No Xing in this frame
+    unless xing_offset = frame_body.index('Xing')
+      return # No Xing in this frame
     end
     io = StringIO.new(frame_body)
@@ -205,32 +203,24 @@ class FormatParser::MP3Parser
     # https://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#XINGHeader
     header_flags, _ = io.read(4).unpack('s>s>')
     frames = byte_count = toc = vbr_scale = nil
-    if header_flags & 1  # FRAMES FLAG
-      frames = io.read(4).unpack('N1').first
-    end
-    if header_flags & 2  # BYTES FLAG
-      byte_count = io.read(4).unpack('N1').first
-    end
+    frames = io.read(4).unpack('N1').first if header_flags & 1 # FRAMES FLAG
-    if header_flags & 4  # TOC FLAG
-      toc = io.read(100).unpack('C100')
-    end
+    byte_count = io.read(4).unpack('N1').first if header_flags & 2 # BYTES FLAG
-    if header_flags & 8  # VBR SCALE FLAG
-      vbr_scale = io.read(4).unpack('N1').first
-    end
+    toc = io.read(100).unpack('C100') if header_flags & 4 # TOC FLAG
+    vbr_scale = io.read(4).unpack('N1').first if header_flags & 8 # VBR SCALE FLAG
     VBRHeader.new(frames: frames, byte_count: byte_count, toc_entries: toc, vbr_scale: vbr_scale)
   end
-  def average_bytes_and_bitrate(mpeg_frames)
+  def average_bytes_and_bitrate(_mpeg_frames)
     avg_bytes_per_frame = initial_frames.map(&:frame_length).inject(&:+) / initial_frames.length.to_f
     avg_bitrate_per_frame = initial_frames.map(&:frame_bitrate).inject(&:+) / initial_frames.length.to_f
     [avg_bytes_per_frame, avg_bitrate_per_frame]
   end
   def xing_header_usable_for_duration?(xing_header)
     xing_header && xing_header.frames && xing_header.byte_count && xing_header.vbr_scale
   end
@@ -241,7 +231,7 @@ class FormatParser::MP3Parser
   def deep_fetch(from, *keys)
     keys.inject(from) { |receiver, key_or_idx| receiver.fetch(key_or_idx) }
-  rescue KeyError, IndexError, NoMethodError
+  rescue IndexError, NoMethodError
     raise InvalidDeepFetch, "Could not retrieve #{keys.inspect} from #{from.inspect}"
   end

data/lib/parsers/mp3_parser/id3_v1.rb CHANGED

@@ -8,40 +8,34 @@ module FormatParser::MP3Parser::ID3V1
     :comment, :a30,
     :genre, :C,
   ]
-  packspec_keys = PACKSPEC.select.with_index{|_, i| i.even? }
+  packspec_keys = PACKSPEC.select.with_index { |_, i| i.even? }
   TAG_SIZE_BYTES = 128
   class TagInformation < Struct.new(*packspec_keys)
   end
   def attempt_id3_v1_extraction(io)
-    if io.size < TAG_SIZE_BYTES # Won't fit the ID3v1 regardless
-      return nil
-    end
+    return if io.size < TAG_SIZE_BYTES # Won't fit the ID3v1 regardless
     io.seek(io.size - 128)
     trailer_bytes = io.read(128)
-    unless trailer_bytes && trailer_bytes.byteslice(0, 3) == 'TAG'
-      return nil
-    end
+    return unless trailer_bytes && trailer_bytes.byteslice(0, 3) == 'TAG'
     id3_v1 = parse_id3_v1(trailer_bytes)
     # If all of the resulting strings are empty this ID3v1 tag is invalid and
     # we should ignore it.
-    strings_from_id3v1 = id3_v1.values.select{|e| e.is_a?(String) && e != 'TAG' }
-    if strings_from_id3v1.all?(&:empty?)
-      return nil
-    end
+    strings_from_id3v1 = id3_v1.values.select { |e| e.is_a?(String) && e != 'TAG' }
+    return if strings_from_id3v1.all?(&:empty?)
     id3_v1
   end
   def parse_id3_v1(byte_str)
-    keys, values = PACKSPEC.partition.with_index {|_, i| i.even? }
+    _keys, values = PACKSPEC.partition.with_index { |_, i| i.even? }
     unpacked_values = byte_str.unpack(values.join)
-    unpacked_values.map! {|e| e.is_a?(String) ? trim_id3v1_string(e) : e }
+    unpacked_values.map! { |e| e.is_a?(String) ? trim_id3v1_string(e) : e }
     TagInformation.new(unpacked_values)
   end

data/lib/parsers/mp3_parser/id3_v2.rb CHANGED

@@ -2,16 +2,16 @@ module FormatParser::MP3Parser::ID3V2
   def attempt_id3_v2_extraction(io)
     io.seek(0) # Only support header ID3v2
     header_bytes = io.read(10)
-    return nil unless header_bytes
+    return unless header_bytes
     header = parse_id3_v2_header(header_bytes)
-    return nil unless header[:tag] == 'ID3'
-    return nil unless header[:size] > 0
+    return unless header[:tag] == 'ID3'
+    return unless header[:size] > 0
     header_tag_payload = io.read(header[:size])
     header_tag_payload = StringIO.new(header_tag_payload)
-    return nil unless header_tag_payload.size == header[:size]
+    return unless header_tag_payload.size == header[:size]
     frames = []
     loop do
@@ -38,10 +38,10 @@ module FormatParser::MP3Parser::ID3V2
       :flags, :C1,
       :size, :a4,
     ]
-    keys, values = packspec.partition.with_index {|_, i| i.even? }
+    keys, values = packspec.partition.with_index { |_, i| i.even? }
     unpacked_values = byte_str.unpack(values.join)
     header_data = Hash[keys.zip(unpacked_values)]
     header_data[:version] = header_data[:version].unpack('C2')
     header_data[:size] = decode_syncsafe_int(header_data[:size])

data/lib/parsers/png_parser.rb CHANGED

@@ -19,9 +19,8 @@ class FormatParser::PNGParser
     6 => true,
   }
   def chunk_length_and_type(io)
-    safe_read(io, 8).unpack("Na4")
+    safe_read(io, 8).unpack('Na4')
   end
   def call(io)
@@ -36,7 +35,7 @@ class FormatParser::PNGParser
     # correct length as well.
     # IHDR _must_ come first, no exceptions. If it doesn't
     # we should not consider this a valid PNG.
-    return unless chunk_type == "IHDR" && chunk_length == 13
+    return unless chunk_type == 'IHDR' && chunk_length == 13
     chunk_data = safe_read(io, chunk_length)
     # Width:              4 bytes
@@ -46,8 +45,8 @@ class FormatParser::PNGParser
     # Compression method: 1 byte
     # Filter method:      1 byte
     # Interlace method:   1 byte
-    w, h, bit_depth, color_type,
-      compression_method, filter_method, interlace_method = chunk_data.unpack("N2C5")
+    w, h, _bit_depth, color_type, _compression_method,
+      _filter_method, _interlace_method = chunk_data.unpack('N2C5')
     color_mode = COLOR_TYPES.fetch(color_type)
     has_transparency = TRANSPARENCY_PER_COLOR_TYPE[color_type]
@@ -56,18 +55,12 @@ class FormatParser::PNGParser
     # we are dealing with an APNG.
     safe_skip(io, 4)
-    # dry-validation won't let booleans be filled with nil so we have to set
-    # has_animation to false by default
-    has_animation = nil
-    num_frames = nil
-    loop_n_times = nil
     chunk_length, chunk_type = chunk_length_and_type(io)
     if chunk_length == 8 && chunk_type == 'acTL'
       # https://wiki.mozilla.org/APNG_Specification#.60acTL.60:_The_Animation_Control_Chunk
       # Unlike GIF, we do have the frame count that we can recover
       has_animation = true
-      num_frames, loop_n_times = safe_read(io, 8).unpack('NN')
+      num_frames, _loop_n_times = safe_read(io, 8).unpack('NN')
     end
     FormatParser::Image.new(

data/lib/parsers/psd_parser.rb CHANGED

@@ -8,13 +8,13 @@ class FormatParser::PSDParser
   def call(io)
     io = FormatParser::IOConstraint.new(io)
-    magic_bytes = safe_read(io, 4).unpack("C4")
+    magic_bytes = safe_read(io, 4).unpack('C4')
     return unless magic_bytes == PSD_HEADER
     # We can be reasonably certain this is a PSD so we grab the height
     # and width bytes
-    w,h = safe_read(io, 22).unpack("x10N2")
+    w, h = safe_read(io, 22).unpack('x10N2')
     FormatParser::Image.new(
       format: :psd,
       width_px: w,

data/lib/parsers/tiff_parser.rb CHANGED

@@ -12,30 +12,28 @@ class FormatParser::TIFFParser
   def call(io)
     io = FormatParser::IOConstraint.new(io)
-    magic_bytes = safe_read(io, 4).unpack("C4")
+    magic_bytes = safe_read(io, 4).unpack('C4')
     endianness = scan_tiff_endianness(magic_bytes)
     return unless endianness
     w, h = read_tiff_by_endianness(io, endianness)
     scanner = FormatParser::EXIFParser.new(:tiff, io)
     scanner.scan_image_exif
-    return FormatParser::Image.new(
-        format: :tif,
-        width_px: w,
-        height_px: h,
-        # might be nil if EXIF metadata wasn't found
-        orientation: scanner.orientation
-      )
+    FormatParser::Image.new(
+      format: :tif,
+      width_px: w,
+      height_px: h,
+      # might be nil if EXIF metadata wasn't found
+      orientation: scanner.orientation
+    )
   end
   # TIFFs can be either big or little endian, so we check here
   # and set our unpack method argument to suit.
   def scan_tiff_endianness(magic_bytes)
     if magic_bytes == LITTLE_ENDIAN_TIFF_HEADER_BYTES
-      "v"
+      'v'
     elsif magic_bytes == BIG_ENDIAN_TIFF_HEADER_BYTES
-      "n"
-    else
-      nil
+      'n'
     end
   end

data/lib/parsers/wav_parser.rb CHANGED

@@ -6,10 +6,10 @@ class FormatParser::WAVParser
   formats :wav
   def call(io)
-    # Read the RIFF header. Chunk descriptor should be RIFF, the size should
-    # contain the size of the entire file in bytes minus 8 bytes for the
+    # Read the RIFF header. Chunk descriptor should be RIFF, the size should
+    # contain the size of the entire file in bytes minus 8 bytes for the
     # two fields not included in this count: chunk_id and size.
-    chunk_id, size, riff_type = safe_read(io, 12).unpack('a4la4')
+    chunk_id, _size, riff_type = safe_read(io, 12).unpack('a4la4')
     # The chunk_id and riff_type should be RIFF and WAVE respectively
     return unless chunk_id == 'RIFF' && riff_type == 'WAVE'

data/lib/read_limiter.rb CHANGED

@@ -26,25 +26,21 @@ class FormatParser::ReadLimiter
   def seek(to_offset)
     @seeks += 1
     if @max_seeks && @seeks > @max_seeks
-      raise BudgetExceeded, "Seek budget exceeded (%d seeks performed)" % @max_seeks
+      raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks
     end
     @io.seek(to_offset)
   end
-  def size
-    @io.size
-  end
   def read(n)
     @bytes += n
     @reads += 1
     if @max_bytes && @bytes > @max_bytes
-      raise BudgetExceeded, "Read bytes budget (%d) exceeded" % @max_bytes
+      raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes
     end
     if @max_reads && @reads > @max_reads
-      raise BudgetExceeded, "Number of read() calls exceeded (%d max)" % @max_reads
+      raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads
     end
     @io.read(n)

data/lib/remote_io.rb CHANGED

@@ -1,5 +1,4 @@
 class FormatParser::RemoteIO
   # Represents a failure that might be retried
   # (like a 5xx response or a timeout)
   class IntermittentFailure < StandardError
@@ -33,7 +32,7 @@ class FormatParser::RemoteIO
   #
   # @return [Integer] the size of the remote resource
   def size
-    raise "Remote size not yet obtained, need to perform at least one read() to retrieve it" unless @remote_size
+    raise 'Remote size not yet obtained, need to perform at least one read() to retrieve it' unless @remote_size
     @remote_size
   end
@@ -52,8 +51,6 @@ class FormatParser::RemoteIO
       @remote_size = maybe_size
       @pos += maybe_body.bytesize
       maybe_body.force_encoding(Encoding::ASCII_8BIT)
-    else
-      nil
     end
   end
@@ -67,7 +64,7 @@ class FormatParser::RemoteIO
     # We use a GET and not a HEAD request followed by a GET because
     # S3 does not allow HEAD requests if you only presigned your URL for GETs, so we
     # combine the first GET of a segment and retrieving the size of the resource
-    response = Faraday.get(@uri, nil, range: "bytes=%d-%d" % [range.begin, range.end])
+    response = Faraday.get(@uri, nil, range: 'bytes=%d-%d' % [range.begin, range.end])
     case response.status
     when 200, 206
@@ -90,7 +87,7 @@ class FormatParser::RemoteIO
       # which satisfies the Ruby IO convention. The caller should deal with `nil` being the result of a read()
       # S3 will also handily _not_ supply us with the Content-Range of the actual resource, so we
       # cannot hint size with this response - at lease not when working with S3
-      return nil
+      return
     when 500..599
       raise IntermittentFailure, "Server at #{@uri} replied with a #{response.status} and we might want to retry"
     else