RubyGems - format_parser - Versions diffs - 1.7.0 → 2.0.0.pre - Mend

format_parser 1.7.0 → 2.0.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/.github/workflows/main.yml +4 -9
data/CHANGELOG.md +6 -0
data/format_parser.gemspec +9 -11
data/lib/care.rb +5 -11
data/lib/format_parser/version.rb +1 -1
data/lib/format_parser.rb +8 -11
data/lib/io_utils.rb +2 -6
data/lib/parsers/aac_parser/adts_header_info.rb +3 -9
data/lib/parsers/dpx_parser/dpx_structs.rb +1 -1
data/lib/parsers/exif_parser.rb +2 -4
data/lib/parsers/fdx_parser.rb +2 -2
data/lib/parsers/flac_parser.rb +2 -6
data/lib/parsers/jpeg_parser.rb +2 -2
data/lib/parsers/moov_parser.rb +5 -7
data/lib/parsers/mp3_parser.rb +2 -6
data/lib/parsers/mpeg_parser.rb +1 -3
data/lib/parsers/wav_parser.rb +9 -12
data/lib/parsers/zip_parser/file_reader.rb +45 -70
data/lib/parsers/zip_parser.rb +1 -1
data/lib/read_limiter.rb +8 -16
data/lib/remote_io.rb +64 -34
data/lib/string.rb +9 -0
data/spec/attributes_json_spec.rb +0 -3
data/spec/remote_fetching_spec.rb +3 -8
data/spec/remote_io_spec.rb +116 -60
metadata +40 -79

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8b3cef665ae16efd68e8da952fd4656e2d9403f3899bd58839da3d8026db91f4
-  data.tar.gz: 7b0ec88efc2ea62f526699a4041cb3f1b3062994d2a6e0b24c2cfdf247aaf532
+  metadata.gz: d7c965b7783ecaea4802f7e585861b4400b2210fee4cb90388757530880fa074
+  data.tar.gz: fc8b7cc3f00825fa054c948a7ae817b1eee6457ffaec9e5a6b5bdd9a0b92d126
 SHA512:
-  metadata.gz: 24c6379ef4fd3b5a9f061c6fc40fd8c0498ad33213684d08dd27a8b8994ba40a98bf1fa18a6d6b3b8189aa71436ec9bb394e3b8d41a8dd3ca90a5b93d0f1718a
-  data.tar.gz: a2d3df2c17d2559aa99f52f04624032a9243915f2a1b28a6f3626bd3b9112eb8c325b0c9a286864d25c2b4e92a44a8939448a85d1004ac5d48c2f81f747749c1
+  metadata.gz: 73f774ebe540dfd54e87f89cedecfc0fabf4a97f4e2ef72afcd94edc5e0fbc344c7c67b365942e3bb915dfe76f94f038072671c259c2d366a69d64a73cbde960
+  data.tar.gz: bc1405329d521487ec4d0738c258fb12c3acdb37b6b8ecebf7451a866d5f1072cfc23774e2ecc3d7d297095ff280320756fb4cd9000de3eac447a105cf87028b

data/.github/workflows/main.yml CHANGED Viewed

@@ -14,8 +14,8 @@ jobs:
       matrix:
         ruby:
           - 2.7
-          - 2.6
-          - 2.5
+          - 3.0
+          - 3.1
           - jruby
     steps:
       - name: Checkout
@@ -60,15 +60,10 @@ jobs:
       matrix:
         ruby:
           - 2.7
-          - 2.6
-          - 2.5
+          - 3.0
+          - 3.1
           - jruby
         experimental: [false]
-        include:
-          - ruby: 3.1
-            experimental: true
-          - ruby: 3.0
-            experimental: true
     steps:
       - name: Checkout
         uses: actions/checkout@v2

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,9 @@
+## 2.0.0.pre (Prerelease)
+* Drop support for Ruby `<2.7`.
+* Drop faraday dependencies.
+* Loosen version constraints on other dependencies.
+* Update measurometer metrics for consistency and clarity.
 ## 1.7.0
 * Add support for `ARW` files.

data/format_parser.gemspec CHANGED Viewed

@@ -30,17 +30,15 @@ Gem::Specification.new do |spec|
   spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
   spec.require_paths = ['lib']
-  spec.add_dependency 'ks', '~> 0.0'
-  spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
-  spec.add_dependency 'id3tag', '~> 0.14', '>= 0.14.2'
-  spec.add_dependency 'faraday', '~> 0.13'
-  spec.add_dependency 'faraday_middleware', '~> 0.14'
-  spec.add_dependency 'measurometer', '~> 1'
+  spec.add_dependency 'exifr', '>= 1.3.8'
+  spec.add_dependency 'id3tag', '>= 0.14.2'
+  spec.add_dependency 'ks'
+  spec.add_dependency 'measurometer'
-  spec.add_development_dependency 'rspec', '~> 3.0'
-  spec.add_development_dependency 'rake', '~> 12'
-  spec.add_development_dependency 'simplecov', '~> 0.15'
-  spec.add_development_dependency 'yard', '~> 0.9'
-  spec.add_development_dependency 'wetransfer_style', '0.5.0'
   spec.add_development_dependency 'parallel_tests'
+  spec.add_development_dependency 'rake'
+  spec.add_development_dependency 'rspec'
+  spec.add_development_dependency 'simplecov'
+  spec.add_development_dependency 'wetransfer_style', '1.0.0'
+  spec.add_development_dependency 'yard'
 end

data/lib/care.rb CHANGED Viewed

@@ -96,12 +96,8 @@ class Care
     # @return [String, nil] the content read from the IO or `nil` if no data was available
     # @raise ArgumentError
     def byteslice(io, at, n_bytes)
-      if n_bytes < 1
-        raise ArgumentError, "The number of bytes to fetch must be a positive Integer, but was #{n_bytes}"
-      end
-      if at < 0
-        raise ArgumentError, "Negative offsets are not supported (got #{at})"
-      end
+      raise ArgumentError, "The number of bytes to fetch must be a positive Integer, but was #{n_bytes}" if n_bytes < 1
+      raise ArgumentError, "Negative offsets are not supported (got #{at})" if at < 0
       first_page = at / @page_size
       last_page = (at + n_bytes) / @page_size
@@ -174,16 +170,14 @@ class Care
     # @param io[IO] the IO to read from
     # @param page_i[Integer] which page (zero-based) to read
     def read_page(io, page_i)
-      Measurometer.increment_counter('format_parser.parser.Care.page_reads_from_upsteam', 1)
+      Measurometer.increment_counter('format_parser.parser.care.page_reads_from_upsteam', 1)
       io.seek(page_i * @page_size)
-      read_result = Measurometer.instrument('format_parser.Care.read_page') { io.read(@page_size) }
+      read_result = Measurometer.instrument('format_parser.care.read_page') { io.read(@page_size) }
       if read_result.nil?
         # If the read went past the end of the IO the read result will be nil,
         # so we know our IO is exhausted here
-        if @lowest_known_empty_page.nil? || @lowest_known_empty_page > page_i
-          @lowest_known_empty_page = page_i
-        end
+        @lowest_known_empty_page = page_i if @lowest_known_empty_page.nil? || @lowest_known_empty_page > page_i
       elsif read_result.bytesize < @page_size
         # If we read less than we initially wanted we know there are no pages
         # to read following this one, so we can also optimize

data/lib/format_parser/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module FormatParser
-  VERSION = '1.7.0'
+  VERSION = '2.0.0.pre'
 end

data/lib/format_parser.rb CHANGED Viewed

@@ -20,6 +20,7 @@ module FormatParser
   require_relative 'care'
   require_relative 'active_storage/blob_analyzer'
   require_relative 'text'
+  require_relative 'string'
   # Define Measurometer in the internal namespace as well
   # so that we stay compatible for the applications that use it
@@ -87,8 +88,8 @@ module FormatParser
   # Parses the resource at the given `url` and returns the results as if it were any IO
   # given to `.parse`. The accepted keyword arguments are the same as the ones for `parse`.
   #
-  # @param url[String, URI] the HTTP(S) URL to request the object from using Faraday and `Range:` requests
-  # @param headers[Hash] (optional) the HTTP headers to request the object from using Faraday
+  # @param url[String, URI] the HTTP(S) URL to request the object from using `Range:` requests
+  # @param headers[Hash] (optional) the HTTP headers to request the object from
   # @param kwargs the keyword arguments to be delegated to `.parse`
   # @see {.parse}
   def self.parse_http(url, headers: {}, **kwargs)
@@ -177,9 +178,7 @@ module FormatParser
     # Convert the results from a lazy enumerator to an Array.
     results = results.to_a
-    if results.empty?
-      Measurometer.increment_counter('format_parser.unknown_files', 1)
-    end
+    Measurometer.increment_counter('format_parser.unknown_files', 1) if results.empty?
     amount == 1 ? results.first : results
   ensure
@@ -202,12 +201,12 @@ module FormatParser
   end
   def self.execute_parser_and_capture_expected_exceptions(parser, limited_io)
-    parser_name_for_instrumentation = parser.class.to_s.split('::').last
+    parser_name_for_instrumentation = parser.class.to_s.split('::').last.underscore
     Measurometer.instrument('format_parser.parser.%s' % parser_name_for_instrumentation) do
       parser.call(limited_io).tap do |result|
         if result
-          Measurometer.increment_counter('format_parser.detected_natures.%s' % result.nature, 1)
-          Measurometer.increment_counter('format_parser.detected_formats.%s' % result.format, 1)
+          Measurometer.increment_counter('format_parser.detected_natures', 1, nature: result.nature)
+          Measurometer.increment_counter('format_parser.detected_formats', 1, format: result.format)
         end
       end
     end
@@ -252,9 +251,7 @@ module FormatParser
     fitting_by_formats = assemble_parser_set[@parsers_per_format, desired_formats]
     parsers = fitting_by_natures & fitting_by_formats
-    if parsers.empty?
-      raise ArgumentError, "No parsers provide both natures #{desired_natures.inspect} and formats #{desired_formats.inspect}"
-    end
+    raise ArgumentError, "No parsers provide both natures #{desired_natures.inspect} and formats #{desired_formats.inspect}" if parsers.empty?
     # Order the parsers according to their priority value. The ones having a lower
     # value will sort higher and will be applied sooner

data/lib/io_utils.rb CHANGED Viewed

@@ -9,12 +9,8 @@ module FormatParser::IOUtils
     raise ArgumentError, 'Unbounded reads are not supported' if n.nil?
     buf = io.read(n)
-    unless buf
-      raise InvalidRead, "We wanted to read #{n} bytes from the IO, but the IO is at EOF"
-    end
-    if buf.bytesize != n
-      raise InvalidRead, "We wanted to read #{n} bytes from the IO, but we got #{buf.bytesize} instead"
-    end
+    raise InvalidRead, "We wanted to read #{n} bytes from the IO, but the IO is at EOF" unless buf
+    raise InvalidRead, "We wanted to read #{n} bytes from the IO, but we got #{buf.bytesize} instead" if buf.bytesize != n
     buf
   end

data/lib/parsers/aac_parser/adts_header_info.rb CHANGED Viewed

@@ -33,23 +33,17 @@ class FormatParser::AdtsHeaderInfo
   MPEG_VERSION_HASH = { 0 => 'MPEG-4', 1 => 'MPEG-2'}
   def mpeg4_sampling_frequency
-    if !@mpeg4_sampling_frequency_index.nil? && MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH.key?(@mpeg4_sampling_frequency_index)
-      return MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH[@mpeg4_sampling_frequency_index]
-    end
+    return MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH[@mpeg4_sampling_frequency_index] if !@mpeg4_sampling_frequency_index.nil? && MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH.key?(@mpeg4_sampling_frequency_index)
     nil
   end
   def profile_description
-    if !@profile.nil? && AAC_PROFILE_DESCRIPTION_HASH.key?(@profile)
-      return AAC_PROFILE_DESCRIPTION_HASH[@profile]
-    end
+    return AAC_PROFILE_DESCRIPTION_HASH[@profile] if !@profile.nil? && AAC_PROFILE_DESCRIPTION_HASH.key?(@profile)
     nil
   end
   def mpeg_version_description
-    if !@mpeg_version.nil? && MPEG_VERSION_HASH.key?(@mpeg_version)
-      return MPEG_VERSION_HASH[@mpeg_version]
-    end
+    return MPEG_VERSION_HASH[@mpeg_version] if !@mpeg_version.nil? && MPEG_VERSION_HASH.key?(@mpeg_version)
     nil
   end

data/lib/parsers/dpx_parser/dpx_structs.rb CHANGED Viewed

@@ -201,7 +201,7 @@ class FormatParser::DPXParser
     blanking :reserve, 52
     # Only expose the elements present
-    def image_elements #:nodoc:
+    def image_elements # :nodoc:
       @image_elements[0...number_elements]
     end
   end

data/lib/parsers/exif_parser.rb CHANGED Viewed

@@ -125,9 +125,7 @@ module FormatParser::EXIFParser
       # those and return the _last_ non-0 orientation, or 0 otherwise
       @multiple_exif_results.reverse_each do |exif_tag_frame|
         orientation_value = exif_tag_frame.orientation
-        if !orientation_value.nil? && orientation_value != 0
-          return orientation_value
-        end
+        return orientation_value if !orientation_value.nil? && orientation_value != 0
       end
       0 # If none were found - the orientation is unknown
     end
@@ -175,7 +173,7 @@ module FormatParser::EXIFParser
   EXIFR.logger = Logger.new(nil)
   def exif_from_tiff_io(constrained_io, should_include_sub_ifds = false)
-    Measurometer.instrument('format_parser.EXIFParser.exif_from_tiff_io') do
+    Measurometer.instrument('format_parser.exif_parser.exif_from_tiff_io') do
       extended_io = IOExt.new(constrained_io)
       exif_raw_data = EXIFR::TIFF.new(extended_io)

data/lib/parsers/fdx_parser.rb CHANGED Viewed

@@ -24,9 +24,9 @@ class FormatParser::FDXParser
   def check_for_document_type(file_and_document_type)
     sanitized_data = file_and_document_type.downcase
     if sanitized_data.include?('finaldraft') && sanitized_data.include?('script')
-      return :fdx, :script
+      [:fdx, :script]
     else
-      return
+      nil
     end
   end

data/lib/parsers/flac_parser.rb CHANGED Viewed

@@ -20,15 +20,11 @@ class FormatParser::FLACParser
     minimum_block_size = bytestring_to_int(safe_read(io, 2))
-    if minimum_block_size < 16
-      raise MalformedFile, 'FLAC file minimum block size must be larger than 16'
-    end
+    raise MalformedFile, 'FLAC file minimum block size must be larger than 16' if minimum_block_size < 16
     maximum_block_size = bytestring_to_int(safe_read(io, 2))
-    if maximum_block_size < minimum_block_size
-      raise MalformedFile, 'FLAC file maximum block size must be equal to or larger than minimum block size'
-    end
+    raise MalformedFile, 'FLAC file maximum block size must be equal to or larger than minimum block size' if maximum_block_size < minimum_block_size
     minimum_frame_size = bytestring_to_int(safe_read(io, 3))
     maximum_frame_size = bytestring_to_int(safe_read(io, 3))

data/lib/parsers/jpeg_parser.rb CHANGED Viewed

@@ -69,7 +69,7 @@ class FormatParser::JPEGParser
       end
     end
-    Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_read_until_capture', @buf.pos)
+    Measurometer.add_distribution_value('format_parser.jpeg_parser.bytes_read_until_capture', @buf.pos)
     # A single file might contain multiple EXIF data frames. In a JPEG this would
     # manifest as multiple APP1 markers. The way different programs handle these
@@ -156,7 +156,7 @@ class FormatParser::JPEGParser
     # Use StringIO.new instead of #write - https://github.com/aws/aws-sdk-ruby/issues/785#issuecomment-95456838
     exif_buf = StringIO.new(safe_read(@buf, app1_frame_content_length - EXIF_MAGIC_STRING.bytesize))
-    Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_sent_to_exif_parser', exif_buf.size)
+    Measurometer.add_distribution_value('format_parser.jpeg_parser.bytes_sent_to_exif_parser', exif_buf.size)
     @exif_data_frames << exif_from_tiff_io(exif_buf)
   rescue EXIFR::MalformedTIFF

data/lib/parsers/moov_parser.rb CHANGED Viewed

@@ -37,7 +37,7 @@ class FormatParser::MOOVParser
     # size that gets parsed just before.
     max_read_offset = 0xFFFFFFFF
     decoder = Decoder.new
-    atom_tree = Measurometer.instrument('format_parser.Decoder.extract_atom_stream') do
+    atom_tree = Measurometer.instrument('format_parser.decoder.extract_atom_stream') do
       decoder.extract_atom_stream(io, max_read_offset)
     end
@@ -93,12 +93,10 @@ class FormatParser::MOOVParser
   def parse_dimensions(decoder, atom_tree)
     video_trak_atom = decoder.find_video_trak_atom(atom_tree)
-    tkhd = begin
-      if video_trak_atom
-        decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'tkhd')
-      else
-        decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
-      end
+    tkhd = if video_trak_atom
+      decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'tkhd')
+    else
+      decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
     end
     if tkhd

data/lib/parsers/mp3_parser.rb CHANGED Viewed

@@ -179,13 +179,9 @@ class FormatParser::MP3Parser
         frame_data_str = io.read(frame_detail.frame_length)
         io.seek(io.pos - frame_detail.frame_length)
         xing_header = attempt_xing_header(frame_data_str)
-        if xing_header_usable_for_duration?(xing_header)
-          return [xing_header, mpeg_frames]
-        end
-      end
-      if frame_detail.frame_length > 1 # jump over current frame body
-        io.seek(io.pos + frame_detail.frame_length - bytes_to_read)
+        return [xing_header, mpeg_frames] if xing_header_usable_for_duration?(xing_header)
       end
+      io.seek(io.pos + frame_detail.frame_length - bytes_to_read) if frame_detail.frame_length > 1 # jump over current frame body
     end
     [nil, mpeg_frames]
   rescue InvalidDeepFetch # A frame was invalid - bail out since it's unlikely we can recover

data/lib/parsers/mpeg_parser.rb CHANGED Viewed

@@ -44,9 +44,7 @@ class FormatParser::MPEGParser
       io.seek(pos + 1)
       horizontal_size, vertical_size = parse_image_size(io)
       ratio_code, rate_code = parse_rate_information(io)
-      if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
-        return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
-      end
+      return file_info(horizontal_size, vertical_size, ratio_code, rate_code) if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
     end
     nil # otherwise the return value of Integer#times will be returned
   rescue FormatParser::IOUtils::InvalidRead

data/lib/parsers/wav_parser.rb CHANGED Viewed

@@ -34,9 +34,7 @@ class FormatParser::WAVParser
       case chunk_type
       when 'fmt ' # watch out: the chunk ID of the format chunk ends with a space
         fmt_data = unpack_fmt_chunk(io, chunk_size)
-        if fmt_data[:audio_format] != 1 and fact_processed
-          return process_non_pcm(fmt_data, total_sample_frames)
-        end
+        return process_non_pcm(fmt_data, total_sample_frames) if fmt_data[:audio_format] != 1 and fact_processed
         fmt_processed = true
       when 'data'
         return unless fmt_processed # the 'data' chunk cannot preceed the 'fmt ' chunk
@@ -45,11 +43,10 @@ class FormatParser::WAVParser
       when 'fact'
         total_sample_frames = safe_read(io, 4).unpack('l').first
         safe_skip(io, chunk_size - 4)
-        if fmt_processed and fmt_data[:audio_format] != 1
-          return process_non_pcm(fmt_data, total_sample_frames)
-        end
+        return process_non_pcm(fmt_data, total_sample_frames) if fmt_processed and fmt_data[:audio_format] != 1
         fact_processed = true
-      else # Skip this chunk until a known chunk is encountered
+      else
+        # Skip this chunk until a known chunk is encountered
         safe_skip(io, chunk_size)
       end
     end
@@ -70,11 +67,11 @@ class FormatParser::WAVParser
     safe_skip(io, chunk_size - 16) # skip the extra fields
     {
-      audio_format:    fmt_info[0],
-      channels:        fmt_info[1],
-      sample_rate:     fmt_info[2],
-      byte_rate:       fmt_info[3],
-      block_align:     fmt_info[4],
+      audio_format: fmt_info[0],
+      channels: fmt_info[1],
+      sample_rate: fmt_info[2],
+      byte_rate: fmt_info[3],
+      block_align: fmt_info[4],
       bits_per_sample: fmt_info[5],
     }
   end

data/lib/parsers/zip_parser/file_reader.rb CHANGED Viewed

@@ -27,52 +27,43 @@ class FormatParser::ZIPParser::FileReader
   # To prevent too many tiny reads, read the maximum possible size of end of
   # central directory record upfront (all the fixed fields + at most 0xFFFF
   # bytes of the archive comment)
-  MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE =
-    begin
-      4 + # Offset of the start of central directory
-        4 + # Size of the central directory
-        2 + # Number of files in the cdir
-        4 + # End-of-central-directory signature
-        2 + # Number of this disk
-        2 + # Number of disk with the start of cdir
-        2 + # Number of files in the cdir of this disk
-        2 + # The comment size
-        0xFFFF # Maximum comment size
-    end
+  MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE = 4 + # Offset of the start of central directory
+                                             4 + # Size of the central directory
+                                             2 + # Number of files in the cdir
+                                             4 + # End-of-central-directory signature
+                                             2 + # Number of this disk
+                                             2 + # Number of disk with the start of cdir
+                                             2 + # Number of files in the cdir of this disk
+                                             2 + # The comment size
+                                             0xFFFF # Maximum comment size
   # To prevent too many tiny reads, read the maximum possible size of the local file header upfront.
   # The maximum size is all the usual items, plus the maximum size
   # of the filename (0xFFFF bytes) and the maximum size of the extras (0xFFFF bytes)
-  MAX_LOCAL_HEADER_SIZE =
-    begin
-      4 + # signature
-        2 + # Version needed to extract
-        2 + # gp flags
-        2 + # storage mode
-        2 + # dos time
-        2 + # dos date
-        4 + # CRC32
-        4 + # Comp size
-        4 + # Uncomp size
-        2 + # Filename size
-        2 + # Extra fields size
-        0xFFFF + # Maximum filename size
-        0xFFFF   # Maximum extra fields size
-    end
-  SIZE_OF_USABLE_EOCD_RECORD =
-    begin
-      4 + # Signature
-        2 + # Number of this disk
-        2 + # Number of the disk with the EOCD record
-        2 + # Number of entries in the central directory of this disk
-        2 + # Number of entries in the central directory total
-        4 + # Size of the central directory
-        4   # Start of the central directory offset
-    end
+  MAX_LOCAL_HEADER_SIZE = 4 + # signature
+                          2 + # Version needed to extract
+                          2 + # gp flags
+                          2 + # storage mode
+                          2 + # dos time
+                          2 + # dos date
+                          4 + # CRC32
+                          4 + # Comp size
+                          4 + # Uncomp size
+                          2 + # Filename size
+                          2 + # Extra fields size
+                          0xFFFF + # Maximum filename size
+                          0xFFFF # Maximum extra fields size
+  SIZE_OF_USABLE_EOCD_RECORD = 4 + # Signature
+                               2 + # Number of this disk
+                               2 + # Number of the disk with the EOCD record
+                               2 + # Number of entries in the central directory of this disk
+                               2 + # Number of entries in the central directory total
+                               4 + # Size of the central directory
+                               4 # Start of the central directory offset
   private_constant :C_UINT32LE, :C_UINT16LE, :C_UINT64LE, :MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE,
-                   :MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
+    :MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
   # Represents a file within the ZIP archive being read
   class ZipEntry
@@ -216,7 +207,7 @@ class FormatParser::ZIPParser::FileReader
     io.seek(absolute_pos)
     unless absolute_pos == io.pos
       raise ReadError,
-            "Expected to seek to #{absolute_pos} but only got to #{io.pos}"
+        "Expected to seek to #{absolute_pos} but only got to #{io.pos}"
     end
     nil
   end
@@ -235,18 +226,14 @@ class FormatParser::ZIPParser::FileReader
     io.seek(io.pos + n)
     pos_after = io.pos
     delta = pos_after - pos_before
-    unless delta == n
-      raise ReadError, "Expected to seek #{n} bytes ahead, but could only seek #{delta} bytes ahead"
-    end
+    raise ReadError, "Expected to seek #{n} bytes ahead, but could only seek #{delta} bytes ahead" unless delta == n
     nil
   end
   def read_n(io, n_bytes)
     io.read(n_bytes).tap do |d|
       raise ReadError, "Expected to read #{n_bytes} bytes, but the IO was at the end" if d.nil?
-      unless d.bytesize == n_bytes
-        raise ReadError, "Expected to read #{n_bytes} bytes, read #{d.bytesize}"
-      end
+      raise ReadError, "Expected to read #{n_bytes} bytes, read #{d.bytesize}" unless d.bytesize == n_bytes
     end
   end
@@ -310,15 +297,9 @@ class FormatParser::ZIPParser::FileReader
         #
         # It means that before we read this stuff we need to check if the previously-read
         # values are at overflow, and only _then_ proceed to read them. Bah.
-        if e.uncompressed_size == 0xFFFFFFFF
-          e.uncompressed_size = read_8b(zip64_extra)
-        end
-        if e.compressed_size == 0xFFFFFFFF
-          e.compressed_size = read_8b(zip64_extra)
-        end
-        if e.local_file_header_offset == 0xFFFFFFFF
-          e.local_file_header_offset = read_8b(zip64_extra)
-        end
+        e.uncompressed_size = read_8b(zip64_extra) if e.uncompressed_size == 0xFFFFFFFF
+        e.compressed_size = read_8b(zip64_extra) if e.compressed_size == 0xFFFFFFFF
+        e.local_file_header_offset = read_8b(zip64_extra) if e.local_file_header_offset == 0xFFFFFFFF
         # Disk number comes last and we can skip it anyway, since we do
         # not support multi-disk archives
       end
@@ -370,9 +351,7 @@ class FormatParser::ZIPParser::FileReader
       signature, *_rest, comment_size = maybe_record.unpack(unpack_pattern)
       # Check the only condition for the match
-      if signature == 0x06054b50 && (maybe_record.bytesize - minimum_record_size) == comment_size
-        return check_at # Found the EOCD marker location
-      end
+      return check_at if signature == 0x06054b50 && (maybe_record.bytesize - minimum_record_size) == comment_size
     end
     # If we haven't caught anything, return nil deliberately instead of returning the last statement
     nil
@@ -422,16 +401,12 @@ class FormatParser::ZIPParser::FileReader
     disk_n = read_4b(zip64_eocdr) # number of this disk
     disk_n_with_eocdr = read_4b(zip64_eocdr) # number of the disk with the EOCDR
-    if disk_n != disk_n_with_eocdr
-      raise UnsupportedFeature, 'The archive spans multiple disks'
-    end
+    raise UnsupportedFeature, 'The archive spans multiple disks' if disk_n != disk_n_with_eocdr
     num_files_this_disk = read_8b(zip64_eocdr) # number of files on this disk
-    num_files_total     = read_8b(zip64_eocdr) # files total in the central directory
+    num_files_total = read_8b(zip64_eocdr) # files total in the central directory
-    if num_files_this_disk != num_files_total
-      raise UnsupportedFeature, 'The archive spans multiple disks'
-    end
+    raise UnsupportedFeature, 'The archive spans multiple disks' if num_files_this_disk != num_files_total
     log do
       format(
@@ -439,8 +414,8 @@ class FormatParser::ZIPParser::FileReader
         num_files_total)
     end
-    central_dir_size    = read_8b(zip64_eocdr) # Size of the central directory
-    central_dir_offset  = read_8b(zip64_eocdr) # Where the central directory starts
+    central_dir_size = read_8b(zip64_eocdr) # Size of the central directory
+    central_dir_offset = read_8b(zip64_eocdr) # Where the central directory starts
     [num_files_total, central_dir_offset, central_dir_size]
   end
@@ -456,8 +431,8 @@ class FormatParser::ZIPParser::FileReader
     skip_ahead_2(io) # number_of_this_disk
     skip_ahead_2(io) # number of the disk with the EOCD record
     skip_ahead_2(io) # number of entries in the central directory of this disk
-    num_files = read_2b(io)   # number of entries in the central directory total
-    cdir_size = read_4b(io)   # size of the central directory
+    num_files = read_2b(io) # number of entries in the central directory total
+    cdir_size = read_4b(io) # size of the central directory
     cdir_offset = read_4b(io) # start of central directorty offset
     [num_files, cdir_offset, cdir_size]
   end

data/lib/parsers/zip_parser.rb CHANGED Viewed

@@ -34,7 +34,7 @@ class FormatParser::ZIPParser
     end
   rescue FileReader::Error
     # This is not a ZIP, or a broken ZIP.
-    return
+    nil
   end
   def directory?(zip_entry)

data/lib/read_limiter.rb CHANGED Viewed

@@ -45,9 +45,7 @@ class FormatParser::ReadLimiter
   # @return Integer
   def seek(to)
     @seeks += 1
-    if @max_seeks && @seeks > @max_seeks
-      raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks
-    end
+    raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks if @max_seeks && @seeks > @max_seeks
     @io.seek(to)
   end
@@ -60,26 +58,20 @@ class FormatParser::ReadLimiter
     @bytes += n_bytes
     @reads += 1
-    if @max_bytes && @bytes > @max_bytes
-      raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes
-    end
-    if @max_reads && @reads > @max_reads
-      raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads
-    end
+    raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes if @max_bytes && @bytes > @max_bytes
+    raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads if @max_reads && @reads > @max_reads
     @io.read(n_bytes)
   end
   # Sends the metrics about the state of this ReadLimiter to a Measurometer
   #
-  # @param prefix[String] the prefix to set. For example, with prefix "TIFF" the metrics will be called
-  #   `format_parser.TIFF.read_limiter.num_seeks` and so forth
+  # @param parser[String] the parser to add as a tag.
   # @return void
-  def send_metrics(prefix)
-    Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_seeks' % prefix, @seeks)
-    Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_reads' % prefix, @reads)
-    Measurometer.add_distribution_value('format_parser.%s.read_limiter.read_bytes' % prefix, @bytes)
+  def send_metrics(parser)
+    Measurometer.add_distribution_value('format_parser.read_limiter.num_seeks', @seeks, parser: parser)
+    Measurometer.add_distribution_value('format_parser.read_limiter.num_reads', @reads, parser: parser)
+    Measurometer.add_distribution_value('format_parser.read_limiter.read_bytes', @bytes, parser: parser)
   end
   # Resets all the recorded call counters so that the object can be reused for the next parser,