RubyGems - format_parser - Versions diffs - 0.2.0 → 0.3.0 - Mend

format_parser 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +4 -4
data/.rubocop.yml +2 -0
data/.travis.yml +1 -0
data/README.md +14 -11
data/format_parser.gemspec +11 -10
data/lib/care.rb +9 -17
data/lib/format_parser.rb +11 -13
data/lib/format_parser/version.rb +1 -1
data/lib/io_constraint.rb +3 -3
data/lib/io_utils.rb +4 -10
data/lib/parsers/aiff_parser.rb +9 -10
data/lib/parsers/dpx_parser.rb +42 -42
data/lib/parsers/dsl.rb +2 -2
data/lib/parsers/exif_parser.rb +3 -8
data/lib/parsers/fdx_parser.rb +3 -3
data/lib/parsers/gif_parser.rb +3 -5
data/lib/parsers/jpeg_parser.rb +4 -8
data/lib/parsers/moov_parser.rb +8 -6
data/lib/parsers/moov_parser/decoder.rb +105 -122
data/lib/parsers/mp3_parser.rb +36 -46
data/lib/parsers/mp3_parser/id3_v1.rb +7 -13
data/lib/parsers/mp3_parser/id3_v2.rb +6 -6
data/lib/parsers/png_parser.rb +5 -12
data/lib/parsers/psd_parser.rb +2 -2
data/lib/parsers/tiff_parser.rb +10 -12
data/lib/parsers/wav_parser.rb +3 -3
data/lib/read_limiter.rb +3 -7
data/lib/remote_io.rb +3 -6
data/spec/care_spec.rb +10 -10
data/spec/file_information_spec.rb +1 -3
data/spec/format_parser_spec.rb +6 -6
data/spec/io_utils_spec.rb +7 -7
data/spec/parsers/exif_parser_spec.rb +2 -3
data/spec/parsers/gif_parser_spec.rb +1 -1
data/spec/parsers/jpeg_parser_spec.rb +0 -1
data/spec/parsers/moov_parser_spec.rb +2 -3
data/spec/parsers/png_parser_spec.rb +1 -1
data/spec/parsers/tiff_parser_spec.rb +0 -1
data/spec/parsers/wav_parser_spec.rb +3 -3
data/spec/read_limiter_spec.rb +0 -1
data/spec/remote_fetching_spec.rb +34 -20
data/spec/remote_io_spec.rb +20 -21
data/spec/spec_helper.rb +2 -2
metadata +19 -4

data/lib/parsers/dsl.rb CHANGED

@@ -18,8 +18,8 @@ module FormatParser
       private
       def __define(name, value)
-        throw ArgumentError("empty array") if value.empty?
-        throw ArgumentError("requires array of symbols") if value.any? { |s| !s.is_a?(Symbol) }
+        throw ArgumentError('empty array') if value.empty?
+        throw ArgumentError('requires array of symbols') if value.any? { |s| !s.is_a?(Symbol) }
         define_method(name) do
           value
         end

data/lib/parsers/exif_parser.rb CHANGED

@@ -11,8 +11,6 @@ class FormatParser::EXIFParser
     def readbyte
       if byte = read(1)
         byte.unpack('C').first
-      else
-        nil
       end
     end
@@ -62,19 +60,16 @@ class FormatParser::EXIFParser
   def orientation_parser(raw_exif_data)
     value = raw_exif_data.orientation.to_i
-    if valid_orientation?(value)
-      @orientation = ORIENTATIONS[value - 1]
-    end
+    @orientation = ORIENTATIONS[value - 1] if valid_orientation?(value)
   end
   def valid_orientation?(value)
     (1..ORIENTATIONS.length).include?(value)
   end
-  def cr2_check(file_io)
+  def cr2_check(_file_io)
     @file_io.seek(8)
     cr2_check_bytes = @file_io.read(2)
-    cr2_check_bytes == "CR" ? true : false
+    cr2_check_bytes == 'CR'
   end
 end

data/lib/parsers/fdx_parser.rb CHANGED

@@ -6,7 +6,7 @@ class FormatParser::FDXParser
   natures :document
   def call(io)
-    return if !xml_check(io)
+    return unless xml_check(io)
     file_and_document_type = safe_read(io, 100)
     file_type, document_type = check_for_document_type(file_and_document_type)
     return if file_type != :fdx
@@ -18,12 +18,12 @@ class FormatParser::FDXParser
   def xml_check(io)
     xml_check = safe_read(io, 5)
-    xml_check == "<?xml"
+    xml_check == '<?xml'
   end
   def check_for_document_type(file_and_document_type)
     sanitized_data = file_and_document_type.downcase
-    if sanitized_data.include?("finaldraft") && sanitized_data.include?("script")
+    if sanitized_data.include?('finaldraft') && sanitized_data.include?('script')
       return :fdx, :script
     else
       return

data/lib/parsers/gif_parser.rb CHANGED

@@ -14,7 +14,7 @@ class FormatParser::GIFParser
     return unless HEADERS.include?(header)
     w, h = safe_read(io, 4).unpack('vv')
-    gct_byte, bgcolor_index, pixel_aspect_ratio = safe_read(io, 5).unpack('Cvv')
+    gct_byte, _bgcolor_index, _pixel_aspect_ratio = safe_read(io, 5).unpack('Cvv')
     # and actually onwards for this:
     # http://www.matthewflickinger.com/lab/whatsinagif/bits_and_bytes.asp
@@ -24,12 +24,10 @@ class FormatParser::GIFParser
     bytes_per_color = gct_byte >> 6
     unpacked_radix = gct_byte & 0b00000111
     num_colors = 2**(unpacked_radix + 1)
-    gct_table_size = num_colors*bytes_per_color
+    gct_table_size = num_colors * bytes_per_color
     # If we have the global color table - skip over it
-    if has_gct
-      safe_read(io, gct_table_size)
-    end
+    safe_read(io, gct_table_size) if has_gct
     # Now it gets interesting - we are at the place where an
     # application extension for the NETSCAPE2.0 block will occur.

data/lib/parsers/jpeg_parser.rb CHANGED

@@ -24,10 +24,6 @@ class FormatParser::JPEGParser
   private
-  def advance(n)
-    safe_read(@buf, n); nil
-  end
   def read_char
     safe_read(@buf, 1).unpack('C').first
   end
@@ -68,7 +64,6 @@ class FormatParser::JPEGParser
     nil # Due to the way JPEG is structured it is possible that some invalid inputs will get caught
   end
   # Read a byte, if it is 0xFF then skip bytes as long as they are also 0xFF (byte stuffing)
   # and return the first byte scanned that is not 0xFF
   def read_next_marker
@@ -85,7 +80,8 @@ class FormatParser::JPEGParser
     size   = read_char
     if length == (size * 3) + 8
-      @width, @height = width, height
+      @width = width
+      @height = height
     else
       raise InvalidStructure
     end
@@ -93,7 +89,7 @@ class FormatParser::JPEGParser
   def scan_app1_frame
     frame = @buf.read(8)
-    if frame.include?("Exif")
+    if frame.include?('Exif')
       scanner = FormatParser::EXIFParser.new(:jpeg, @buf)
       if scanner.scan_image_exif
         @exif_output = scanner.exif_data
@@ -111,7 +107,7 @@ class FormatParser::JPEGParser
   def skip_frame
     length = read_short - 2
-    advance(length)
+    safe_skip(@buf, length)
   end
   FormatParser.register_parser_constructor self

data/lib/parsers/moov_parser.rb CHANGED

@@ -7,9 +7,9 @@ class FormatParser::MOOVParser
   # we can reasonably call "file type" (something
   # usable as a filename extension)
   FTYP_MAP = {
-    "qt  " => :mov,
-    "mp4 " => :mp4,
-    "m4a " => :m4a,
+    'qt  ' => :mov,
+    'mp4 ' => :mp4,
+    'm4a ' => :m4a,
   }
   natures :video
@@ -20,7 +20,7 @@ class FormatParser::MOOVParser
   private_constant :Decoder
   def call(io)
-    return nil unless matches_moov_definition?(io)
+    return unless matches_moov_definition?(io)
     # Now we know we are in a MOOV, so go back and parse out the atom structure.
     # Parsing out the atoms does not read their contents - at least it doesn't
@@ -40,7 +40,8 @@ class FormatParser::MOOVParser
     ftyp_atom = decoder.find_first_atom_by_path(atom_tree, 'ftyp')
     file_type = ftyp_atom.field_value(:major_brand)
-    width, height = nil, nil
+    width = nil
+    height = nil
     # Try to find the width and height in the tkhd
     if tkhd = decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
@@ -50,7 +51,8 @@ class FormatParser::MOOVParser
     # Try to find the "topmost" duration (respecting edits)
     if mdhd = decoder.find_first_atom_by_path(atom_tree, 'moov', 'mvhd')
-      timescale, duration = mdhd.field_value(:tscale), mdhd.field_value(:duration)
+      timescale = mdhd.field_value(:tscale)
+      duration = mdhd.field_value(:duration)
       media_duration_s = duration / timescale.to_f
     end

data/lib/parsers/moov_parser/decoder.rb CHANGED

@@ -2,10 +2,9 @@
 # read atoms and parse their data fields if applicable. Also contains
 # a few utility functions for finding atoms in a list etc.
 class FormatParser::MOOVParser::Decoder
   class Atom < Struct.new(:at, :atom_size, :atom_type, :path, :children, :atom_fields)
     def to_s
-      "%s (%s): %d bytes at offset %d" % [atom_type, path.join('.'), atom_size, at]
+      '%s (%s): %d bytes at offset %d' % [atom_type, path.join('.'), atom_size, at]
     end
     def field_value(data_field)
@@ -20,10 +19,10 @@ class FormatParser::MOOVParser::Decoder
   end
   # Atoms (boxes) that are known to only contain children, no data fields
-  KNOWN_BRANCH_ATOM_TYPES = %w( moov mdia trak clip edts minf dinf stbl udta meta)
+  KNOWN_BRANCH_ATOM_TYPES = %w(moov mdia trak clip edts minf dinf stbl udta meta)
   # Atoms (boxes) that are known to contain both leaves and data fields
-  KNOWN_BRANCH_AND_LEAF_ATOM_TYPES = %w( meta ) # the udta.meta thing used by iTunes
+  KNOWN_BRANCH_AND_LEAF_ATOM_TYPES = %w(meta) # the udta.meta thing used by iTunes
   # Limit how many atoms we scan in sequence, to prevent derailments
   MAX_ATOMS_AT_LEVEL = 128
@@ -32,13 +31,13 @@ class FormatParser::MOOVParser::Decoder
   # matches the type, drilling down if a list of atom names is given
   def find_first_atom_by_path(atoms, *atom_types)
     type_to_find = atom_types.shift
-    requisite = atoms.find {|e| e.atom_type == type_to_find }
+    requisite = atoms.find { |e| e.atom_type == type_to_find }
     # Return if we found our match
     return requisite if atom_types.empty?
     # Return nil if we didn't find the match at this nesting level
-    return nil unless requisite
+    return unless requisite
     # ...otherwise drill further down
     find_first_atom_by_path(requisite.children || [], *atom_types)
@@ -50,7 +49,7 @@ class FormatParser::MOOVParser::Decoder
     # numbr of bytes is reserved for the compatible brands, 4 bytes per
     # brand.
     num_brands = (atom_size - 8 - 8) / 4
-    ret = {
+    {
       major_brand: read_bytes(io, 4),
       minor_version: read_binary_coded_decimal(io),
       compatible_brands: (1..num_brands).map { read_bytes(io, 4) },
@@ -60,94 +59,88 @@ class FormatParser::MOOVParser::Decoder
   def parse_tkhd_atom(io, _)
     version = read_byte_value(io)
     is_v1 = version == 1
-    tkhd_info_bites = [
-      :version, version,
-      :flags, read_chars(io, 3),
-      :ctime, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
-      :mtime, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
-      :trak_id, read_32bit_uint(io),
-      :reserved_1, read_chars(io, 4),
-      :duration, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
-      :reserved_2, read_chars(io, 8),
-      :layer, read_16bit_uint(io),
-      :alternate_group, read_16bit_uint(io),
-      :volume, read_16bit_uint(io),
-      :reserved_3, read_chars(io, 2),
-      :matrix_structure, (1..9).map { read_32bit_fixed_point(io) },
-      :track_width, read_32bit_fixed_point(io),
-      :track_height, read_32bit_fixed_point(io),
-    ]
-    repack(tkhd_info_bites)
+    {
+      version: version,
+      flags: read_chars(io, 3),
+      ctime: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
+      mtime: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
+      trak_id: read_32bit_uint(io),
+      reserved_1: read_chars(io, 4),
+      duration: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
+      reserved_2: read_chars(io, 8),
+      layer: read_16bit_uint(io),
+      alternate_group: read_16bit_uint(io),
+      volume: read_16bit_uint(io),
+      reserved_3: read_chars(io, 2),
+      matrix_structure: (1..9).map { read_32bit_fixed_point(io) },
+      track_width: read_32bit_fixed_point(io),
+      track_height: read_32bit_fixed_point(io),
+    }
   end
   def parse_mdhd_atom(io, _)
     version = read_byte_value(io)
     is_v1 = version == 1
-    mdhd_info_bites = [
-      :version, version,
-      :flags, read_bytes(io, 3),
-      :ctime, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
-      :mtime, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
-      :tscale, read_32bit_uint(io),
-      :duration, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
-      :language, read_32bit_uint(io),
-      :quality, read_32bit_uint(io),
-    ]
-    repack(mdhd_info_bites)
+    {
+      version: version,
+      flags: read_bytes(io, 3),
+      ctime: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
+      mtime: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
+      tscale: read_32bit_uint(io),
+      duration: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
+      language: read_32bit_uint(io),
+      quality: read_32bit_uint(io),
+    }
   end
   def parse_vmhd_atom(io, _)
-    vmhd_info_bites = [
-      :version, read_byte_value(io),
-      :flags, read_bytes(io, 3),
-      :graphics_mode, read_bytes(io, 2),
-      :opcolor_r, read_32bit_uint(io),
-      :opcolor_g, read_32bit_uint(io),
-      :opcolor_b, read_32bit_uint(io),
-    ]
-    repack(vmhd_info_bites)
+    {
+      version: read_byte_value(io),
+      flags: read_bytes(io, 3),
+      graphics_mode: read_bytes(io, 2),
+      opcolor_r: read_32bit_uint(io),
+      opcolor_g: read_32bit_uint(io),
+      opcolor_b: read_32bit_uint(io),
+    }
   end
   def parse_mvhd_atom(io, _)
     version = read_byte_value(io)
     is_v1 = version == 1
-    mvhd_info_bites = [
-      :version, version,
-      :flags, read_bytes(io, 3),
-      :ctime, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
-      :mtime, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
-      :tscale, read_32bit_uint(io),
-      :duration, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
-      :preferred_rate, read_32bit_uint(io),
-      :reserved, read_bytes(io, 10),
-      :matrix_structure, (1..9).map { read_32bit_fixed_point(io) },
-      :preview_time, read_32bit_uint(io),
-      :preview_duration, read_32bit_uint(io),
-      :poster_time, read_32bit_uint(io),
-      :selection_time, read_32bit_uint(io),
-      :selection_duration, read_32bit_uint(io),
-      :current_time, read_32bit_uint(io),
-      :next_trak_id, read_32bit_uint(io),
-    ]
-    repack(mvhd_info_bites)
+    {
+      version: version,
+      flags: read_bytes(io, 3),
+      ctime: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
+      mtime: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
+      tscale: read_32bit_uint(io),
+      duration: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
+      preferred_rate: read_32bit_uint(io),
+      reserved: read_bytes(io, 10),
+      matrix_structure: (1..9).map { read_32bit_fixed_point(io) },
+      preview_time: read_32bit_uint(io),
+      preview_duration: read_32bit_uint(io),
+      poster_time: read_32bit_uint(io),
+      selection_time: read_32bit_uint(io),
+      selection_duration: read_32bit_uint(io),
+      current_time: read_32bit_uint(io),
+      next_trak_id: read_32bit_uint(io),
+    }
   end
   def parse_dref_atom(io, _)
-    dref_info_bites = [
-      :version, read_byte_value(io),
-      :flags, read_bytes(io, 3),
-      :num_entries, read_32bit_uint(io),
-    ]
-    dict = repack(dref_info_bites)
+    dict = {
+      version: read_byte_value(io),
+      flags: read_bytes(io, 3),
+      num_entries: read_32bit_uint(io),
+    }
     num_entries = dict[:num_entries]
     entries = (1..num_entries).map do
-      dref_entry_bites = [
-        :size, read_32bit_uint(io),
-        :type, read_bytes(io, 4),
-        :version, read_bytes(io, 1),
-        :flags, read_bytes(io, 3),
-      ]
-      entry = repack(dref_entry_bites)
+      entry = {
+        size: read_32bit_uint(io),
+        type: read_bytes(io, 4),
+        version: read_bytes(io, 1),
+        flags: read_bytes(io, 3),
+      }
       entry[:data] = read_bytes(io, entry[:size] - 12)
       entry
     end
@@ -156,21 +149,19 @@ class FormatParser::MOOVParser::Decoder
   end
   def parse_elst_atom(io, _)
-    elst_info_bites = [
-      :version, read_byte_value(io),
-      :flags, read_bytes(io, 3),
-      :num_entries, read_32bit_uint(io),
-    ]
-    dict = repack(elst_info_bites)
+    dict = {
+      version: read_byte_value(io),
+      flags: read_bytes(io, 3),
+      num_entries: read_32bit_uint(io),
+    }
     is_v1 = dict[:version] == 1 # Usual is 0, version 1 has 64bit durations
     num_entries = dict[:num_entries]
     entries = (1..num_entries).map do
-      entry_bites = [
-        :track_duration, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
-        :media_time, is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
-        :media_rate, read_32bit_uint(io),
-      ]
-      repack(entry_bites)
+      {
+        track_duration: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
+        media_time: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
+        media_rate: read_32bit_uint(io),
+      }
     end
     dict[:entries] = entries
     dict
@@ -178,21 +169,20 @@ class FormatParser::MOOVParser::Decoder
   def parse_hdlr_atom(io, atom_size)
     sub_io = StringIO.new(io.read(atom_size - 8))
-    hdlr_info_bites = [
-      :version, read_byte_value(sub_io),
-      :flags, read_bytes(sub_io, 3),
-      :component_type, read_bytes(sub_io, 4),
-      :component_subtype, read_bytes(sub_io, 4),
-      :component_manufacturer, read_bytes(sub_io, 4),
-      :component_flags, read_bytes(sub_io, 4),
-      :component_flags_mask, read_bytes(sub_io, 4),
-      :component_name, sub_io.read,
-    ]
-    repack(hdlr_info_bites)
+    {
+      version: read_byte_value(sub_io),
+      flags: read_bytes(sub_io, 3),
+      component_type: read_bytes(sub_io, 4),
+      component_subtype: read_bytes(sub_io, 4),
+      component_manufacturer: read_bytes(sub_io, 4),
+      component_flags: read_bytes(sub_io, 4),
+      component_flags_mask: read_bytes(sub_io, 4),
+      component_name: sub_io.read,
+    }
   end
   def parse_atom_fields_per_type(io, atom_size, atom_type)
-    if respond_to?("parse_#{atom_type}_atom", including_privates = true)
+    if respond_to?("parse_#{atom_type}_atom", true)
       send("parse_#{atom_type}_atom", io, atom_size)
     else
       nil # We can't look inside this leaf atom
@@ -208,29 +198,28 @@ class FormatParser::MOOVParser::Decoder
     MAX_ATOMS_AT_LEVEL.times do
       atom_pos = io.pos
-      if atom_pos - initial_pos >= max_read
-        break
-      end
+      break if atom_pos - initial_pos >= max_read
-      size_and_type = io.read(4+4)
-      if size_and_type.to_s.bytesize < 8
-        break
-      end
+      size_and_type = io.read(4 + 4)
+      break if size_and_type.to_s.bytesize < 8
       atom_size, atom_type = size_and_type.unpack('Na4')
       # If atom_size is specified to be 1, it is larger than what fits into the
       # 4 bytes and we need to read it right after the atom type
-      if atom_size == 1
-        atom_size = read_64bit_uint(io)
-      end
+      atom_size = read_64bit_uint(io) if atom_size == 1
+      # We are allowed to read what comes after
+      # the atom size and atom type, but not any more than that
+      size_of_atom_type_and_size = io.pos - atom_pos
+      atom_size_sans_header = atom_size - size_of_atom_type_and_size
       children, fields = if KNOWN_BRANCH_AND_LEAF_ATOM_TYPES.include?(atom_type)
-        parse_atom_children_and_data_fields(io, atom_size, atom_type)
+        parse_atom_children_and_data_fields(io, atom_size_sans_header, atom_type)
       elsif KNOWN_BRANCH_ATOM_TYPES.include?(atom_type)
-        [extract_atom_stream(io, atom_size - 8, current_branch + [atom_type]), nil]
+        [extract_atom_stream(io, atom_size_sans_header, current_branch + [atom_type]), nil]
       else # Assume leaf atom
-        [nil, parse_atom_fields_per_type(io, atom_size, atom_type)]
+        [nil, parse_atom_fields_per_type(io, atom_size_sans_header, atom_type)]
       end
       atoms << Atom.new(atom_pos, atom_size, atom_type, current_branch + [atom_type], children, fields)
@@ -241,11 +230,11 @@ class FormatParser::MOOVParser::Decoder
   end
   def read_16bit_fixed_point(io)
-    whole, fraction = io.read(2).unpack('CC')
+    _whole, _fraction = io.read(2).unpack('CC')
   end
   def read_32bit_fixed_point(io)
-    whole, fraction = io.read(4).unpack('nn')
+    _whole, _fraction = io.read(4).unpack('nn')
   end
   def read_chars(io, n)
@@ -274,12 +263,6 @@ class FormatParser::MOOVParser::Decoder
   def read_binary_coded_decimal(io)
     bcd_string = io.read(4)
-    bcd_string.insert(0, '0') if bcd_string.length.odd?
     [bcd_string].pack('H*').unpack('C*')
   end
-  def repack(properties_to_packspecs)
-    keys, bytes = properties_to_packspecs.partition.with_index { |_, i| i.even? }
-    Hash[keys.zip(bytes)]
-  end
 end