format_parser 1.7.0 → 2.0.0.pre

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b3cef665ae16efd68e8da952fd4656e2d9403f3899bd58839da3d8026db91f4
4
- data.tar.gz: 7b0ec88efc2ea62f526699a4041cb3f1b3062994d2a6e0b24c2cfdf247aaf532
3
+ metadata.gz: d7c965b7783ecaea4802f7e585861b4400b2210fee4cb90388757530880fa074
4
+ data.tar.gz: fc8b7cc3f00825fa054c948a7ae817b1eee6457ffaec9e5a6b5bdd9a0b92d126
5
5
  SHA512:
6
- metadata.gz: 24c6379ef4fd3b5a9f061c6fc40fd8c0498ad33213684d08dd27a8b8994ba40a98bf1fa18a6d6b3b8189aa71436ec9bb394e3b8d41a8dd3ca90a5b93d0f1718a
7
- data.tar.gz: a2d3df2c17d2559aa99f52f04624032a9243915f2a1b28a6f3626bd3b9112eb8c325b0c9a286864d25c2b4e92a44a8939448a85d1004ac5d48c2f81f747749c1
6
+ metadata.gz: 73f774ebe540dfd54e87f89cedecfc0fabf4a97f4e2ef72afcd94edc5e0fbc344c7c67b365942e3bb915dfe76f94f038072671c259c2d366a69d64a73cbde960
7
+ data.tar.gz: bc1405329d521487ec4d0738c258fb12c3acdb37b6b8ecebf7451a866d5f1072cfc23774e2ecc3d7d297095ff280320756fb4cd9000de3eac447a105cf87028b
@@ -14,8 +14,8 @@ jobs:
14
14
  matrix:
15
15
  ruby:
16
16
  - 2.7
17
- - 2.6
18
- - 2.5
17
+ - 3.0
18
+ - 3.1
19
19
  - jruby
20
20
  steps:
21
21
  - name: Checkout
@@ -60,15 +60,10 @@ jobs:
60
60
  matrix:
61
61
  ruby:
62
62
  - 2.7
63
- - 2.6
64
- - 2.5
63
+ - 3.0
64
+ - 3.1
65
65
  - jruby
66
66
  experimental: [false]
67
- include:
68
- - ruby: 3.1
69
- experimental: true
70
- - ruby: 3.0
71
- experimental: true
72
67
  steps:
73
68
  - name: Checkout
74
69
  uses: actions/checkout@v2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 2.0.0.pre (Prerelease)
2
+ * Drop support for Ruby `<2.7`.
3
+ * Drop faraday dependencies.
4
+ * Loosen version constraints on other dependencies.
5
+ * Update measurometer metrics for consistency and clarity.
6
+
1
7
  ## 1.7.0
2
8
  * Add support for `ARW` files.
3
9
 
@@ -30,17 +30,15 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ['lib']
32
32
 
33
- spec.add_dependency 'ks', '~> 0.0'
34
- spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
35
- spec.add_dependency 'id3tag', '~> 0.14', '>= 0.14.2'
36
- spec.add_dependency 'faraday', '~> 0.13'
37
- spec.add_dependency 'faraday_middleware', '~> 0.14'
38
- spec.add_dependency 'measurometer', '~> 1'
33
+ spec.add_dependency 'exifr', '>= 1.3.8'
34
+ spec.add_dependency 'id3tag', '>= 0.14.2'
35
+ spec.add_dependency 'ks'
36
+ spec.add_dependency 'measurometer'
39
37
 
40
- spec.add_development_dependency 'rspec', '~> 3.0'
41
- spec.add_development_dependency 'rake', '~> 12'
42
- spec.add_development_dependency 'simplecov', '~> 0.15'
43
- spec.add_development_dependency 'yard', '~> 0.9'
44
- spec.add_development_dependency 'wetransfer_style', '0.5.0'
45
38
  spec.add_development_dependency 'parallel_tests'
39
+ spec.add_development_dependency 'rake'
40
+ spec.add_development_dependency 'rspec'
41
+ spec.add_development_dependency 'simplecov'
42
+ spec.add_development_dependency 'wetransfer_style', '1.0.0'
43
+ spec.add_development_dependency 'yard'
46
44
  end
data/lib/care.rb CHANGED
@@ -96,12 +96,8 @@ class Care
96
96
  # @return [String, nil] the content read from the IO or `nil` if no data was available
97
97
  # @raise ArgumentError
98
98
  def byteslice(io, at, n_bytes)
99
- if n_bytes < 1
100
- raise ArgumentError, "The number of bytes to fetch must be a positive Integer, but was #{n_bytes}"
101
- end
102
- if at < 0
103
- raise ArgumentError, "Negative offsets are not supported (got #{at})"
104
- end
99
+ raise ArgumentError, "The number of bytes to fetch must be a positive Integer, but was #{n_bytes}" if n_bytes < 1
100
+ raise ArgumentError, "Negative offsets are not supported (got #{at})" if at < 0
105
101
 
106
102
  first_page = at / @page_size
107
103
  last_page = (at + n_bytes) / @page_size
@@ -174,16 +170,14 @@ class Care
174
170
  # @param io[IO] the IO to read from
175
171
  # @param page_i[Integer] which page (zero-based) to read
176
172
  def read_page(io, page_i)
177
- Measurometer.increment_counter('format_parser.parser.Care.page_reads_from_upsteam', 1)
173
+ Measurometer.increment_counter('format_parser.parser.care.page_reads_from_upsteam', 1)
178
174
 
179
175
  io.seek(page_i * @page_size)
180
- read_result = Measurometer.instrument('format_parser.Care.read_page') { io.read(@page_size) }
176
+ read_result = Measurometer.instrument('format_parser.care.read_page') { io.read(@page_size) }
181
177
  if read_result.nil?
182
178
  # If the read went past the end of the IO the read result will be nil,
183
179
  # so we know our IO is exhausted here
184
- if @lowest_known_empty_page.nil? || @lowest_known_empty_page > page_i
185
- @lowest_known_empty_page = page_i
186
- end
180
+ @lowest_known_empty_page = page_i if @lowest_known_empty_page.nil? || @lowest_known_empty_page > page_i
187
181
  elsif read_result.bytesize < @page_size
188
182
  # If we read less than we initially wanted we know there are no pages
189
183
  # to read following this one, so we can also optimize
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '1.7.0'
2
+ VERSION = '2.0.0.pre'
3
3
  end
data/lib/format_parser.rb CHANGED
@@ -20,6 +20,7 @@ module FormatParser
20
20
  require_relative 'care'
21
21
  require_relative 'active_storage/blob_analyzer'
22
22
  require_relative 'text'
23
+ require_relative 'string'
23
24
 
24
25
  # Define Measurometer in the internal namespace as well
25
26
  # so that we stay compatible for the applications that use it
@@ -87,8 +88,8 @@ module FormatParser
87
88
  # Parses the resource at the given `url` and returns the results as if it were any IO
88
89
  # given to `.parse`. The accepted keyword arguments are the same as the ones for `parse`.
89
90
  #
90
- # @param url[String, URI] the HTTP(S) URL to request the object from using Faraday and `Range:` requests
91
- # @param headers[Hash] (optional) the HTTP headers to request the object from using Faraday
91
+ # @param url[String, URI] the HTTP(S) URL to request the object from using `Range:` requests
92
+ # @param headers[Hash] (optional) the HTTP headers to request the object from
92
93
  # @param kwargs the keyword arguments to be delegated to `.parse`
93
94
  # @see {.parse}
94
95
  def self.parse_http(url, headers: {}, **kwargs)
@@ -177,9 +178,7 @@ module FormatParser
177
178
  # Convert the results from a lazy enumerator to an Array.
178
179
  results = results.to_a
179
180
 
180
- if results.empty?
181
- Measurometer.increment_counter('format_parser.unknown_files', 1)
182
- end
181
+ Measurometer.increment_counter('format_parser.unknown_files', 1) if results.empty?
183
182
 
184
183
  amount == 1 ? results.first : results
185
184
  ensure
@@ -202,12 +201,12 @@ module FormatParser
202
201
  end
203
202
 
204
203
  def self.execute_parser_and_capture_expected_exceptions(parser, limited_io)
205
- parser_name_for_instrumentation = parser.class.to_s.split('::').last
204
+ parser_name_for_instrumentation = parser.class.to_s.split('::').last.underscore
206
205
  Measurometer.instrument('format_parser.parser.%s' % parser_name_for_instrumentation) do
207
206
  parser.call(limited_io).tap do |result|
208
207
  if result
209
- Measurometer.increment_counter('format_parser.detected_natures.%s' % result.nature, 1)
210
- Measurometer.increment_counter('format_parser.detected_formats.%s' % result.format, 1)
208
+ Measurometer.increment_counter('format_parser.detected_natures', 1, nature: result.nature)
209
+ Measurometer.increment_counter('format_parser.detected_formats', 1, format: result.format)
211
210
  end
212
211
  end
213
212
  end
@@ -252,9 +251,7 @@ module FormatParser
252
251
  fitting_by_formats = assemble_parser_set[@parsers_per_format, desired_formats]
253
252
  parsers = fitting_by_natures & fitting_by_formats
254
253
 
255
- if parsers.empty?
256
- raise ArgumentError, "No parsers provide both natures #{desired_natures.inspect} and formats #{desired_formats.inspect}"
257
- end
254
+ raise ArgumentError, "No parsers provide both natures #{desired_natures.inspect} and formats #{desired_formats.inspect}" if parsers.empty?
258
255
 
259
256
  # Order the parsers according to their priority value. The ones having a lower
260
257
  # value will sort higher and will be applied sooner
data/lib/io_utils.rb CHANGED
@@ -9,12 +9,8 @@ module FormatParser::IOUtils
9
9
  raise ArgumentError, 'Unbounded reads are not supported' if n.nil?
10
10
  buf = io.read(n)
11
11
 
12
- unless buf
13
- raise InvalidRead, "We wanted to read #{n} bytes from the IO, but the IO is at EOF"
14
- end
15
- if buf.bytesize != n
16
- raise InvalidRead, "We wanted to read #{n} bytes from the IO, but we got #{buf.bytesize} instead"
17
- end
12
+ raise InvalidRead, "We wanted to read #{n} bytes from the IO, but the IO is at EOF" unless buf
13
+ raise InvalidRead, "We wanted to read #{n} bytes from the IO, but we got #{buf.bytesize} instead" if buf.bytesize != n
18
14
 
19
15
  buf
20
16
  end
@@ -33,23 +33,17 @@ class FormatParser::AdtsHeaderInfo
33
33
  MPEG_VERSION_HASH = { 0 => 'MPEG-4', 1 => 'MPEG-2'}
34
34
 
35
35
  def mpeg4_sampling_frequency
36
- if !@mpeg4_sampling_frequency_index.nil? && MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH.key?(@mpeg4_sampling_frequency_index)
37
- return MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH[@mpeg4_sampling_frequency_index]
38
- end
36
+ return MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH[@mpeg4_sampling_frequency_index] if !@mpeg4_sampling_frequency_index.nil? && MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH.key?(@mpeg4_sampling_frequency_index)
39
37
  nil
40
38
  end
41
39
 
42
40
  def profile_description
43
- if !@profile.nil? && AAC_PROFILE_DESCRIPTION_HASH.key?(@profile)
44
- return AAC_PROFILE_DESCRIPTION_HASH[@profile]
45
- end
41
+ return AAC_PROFILE_DESCRIPTION_HASH[@profile] if !@profile.nil? && AAC_PROFILE_DESCRIPTION_HASH.key?(@profile)
46
42
  nil
47
43
  end
48
44
 
49
45
  def mpeg_version_description
50
- if !@mpeg_version.nil? && MPEG_VERSION_HASH.key?(@mpeg_version)
51
- return MPEG_VERSION_HASH[@mpeg_version]
52
- end
46
+ return MPEG_VERSION_HASH[@mpeg_version] if !@mpeg_version.nil? && MPEG_VERSION_HASH.key?(@mpeg_version)
53
47
  nil
54
48
  end
55
49
 
@@ -201,7 +201,7 @@ class FormatParser::DPXParser
201
201
  blanking :reserve, 52
202
202
 
203
203
  # Only expose the elements present
204
- def image_elements #:nodoc:
204
+ def image_elements # :nodoc:
205
205
  @image_elements[0...number_elements]
206
206
  end
207
207
  end
@@ -125,9 +125,7 @@ module FormatParser::EXIFParser
125
125
  # those and return the _last_ non-0 orientation, or 0 otherwise
126
126
  @multiple_exif_results.reverse_each do |exif_tag_frame|
127
127
  orientation_value = exif_tag_frame.orientation
128
- if !orientation_value.nil? && orientation_value != 0
129
- return orientation_value
130
- end
128
+ return orientation_value if !orientation_value.nil? && orientation_value != 0
131
129
  end
132
130
  0 # If none were found - the orientation is unknown
133
131
  end
@@ -175,7 +173,7 @@ module FormatParser::EXIFParser
175
173
  EXIFR.logger = Logger.new(nil)
176
174
 
177
175
  def exif_from_tiff_io(constrained_io, should_include_sub_ifds = false)
178
- Measurometer.instrument('format_parser.EXIFParser.exif_from_tiff_io') do
176
+ Measurometer.instrument('format_parser.exif_parser.exif_from_tiff_io') do
179
177
  extended_io = IOExt.new(constrained_io)
180
178
  exif_raw_data = EXIFR::TIFF.new(extended_io)
181
179
 
@@ -24,9 +24,9 @@ class FormatParser::FDXParser
24
24
  def check_for_document_type(file_and_document_type)
25
25
  sanitized_data = file_and_document_type.downcase
26
26
  if sanitized_data.include?('finaldraft') && sanitized_data.include?('script')
27
- return :fdx, :script
27
+ [:fdx, :script]
28
28
  else
29
- return
29
+ nil
30
30
  end
31
31
  end
32
32
 
@@ -20,15 +20,11 @@ class FormatParser::FLACParser
20
20
 
21
21
  minimum_block_size = bytestring_to_int(safe_read(io, 2))
22
22
 
23
- if minimum_block_size < 16
24
- raise MalformedFile, 'FLAC file minimum block size must be larger than 16'
25
- end
23
+ raise MalformedFile, 'FLAC file minimum block size must be larger than 16' if minimum_block_size < 16
26
24
 
27
25
  maximum_block_size = bytestring_to_int(safe_read(io, 2))
28
26
 
29
- if maximum_block_size < minimum_block_size
30
- raise MalformedFile, 'FLAC file maximum block size must be equal to or larger than minimum block size'
31
- end
27
+ raise MalformedFile, 'FLAC file maximum block size must be equal to or larger than minimum block size' if maximum_block_size < minimum_block_size
32
28
 
33
29
  minimum_frame_size = bytestring_to_int(safe_read(io, 3))
34
30
  maximum_frame_size = bytestring_to_int(safe_read(io, 3))
@@ -69,7 +69,7 @@ class FormatParser::JPEGParser
69
69
  end
70
70
  end
71
71
 
72
- Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_read_until_capture', @buf.pos)
72
+ Measurometer.add_distribution_value('format_parser.jpeg_parser.bytes_read_until_capture', @buf.pos)
73
73
 
74
74
  # A single file might contain multiple EXIF data frames. In a JPEG this would
75
75
  # manifest as multiple APP1 markers. The way different programs handle these
@@ -156,7 +156,7 @@ class FormatParser::JPEGParser
156
156
  # Use StringIO.new instead of #write - https://github.com/aws/aws-sdk-ruby/issues/785#issuecomment-95456838
157
157
  exif_buf = StringIO.new(safe_read(@buf, app1_frame_content_length - EXIF_MAGIC_STRING.bytesize))
158
158
 
159
- Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_sent_to_exif_parser', exif_buf.size)
159
+ Measurometer.add_distribution_value('format_parser.jpeg_parser.bytes_sent_to_exif_parser', exif_buf.size)
160
160
 
161
161
  @exif_data_frames << exif_from_tiff_io(exif_buf)
162
162
  rescue EXIFR::MalformedTIFF
@@ -37,7 +37,7 @@ class FormatParser::MOOVParser
37
37
  # size that gets parsed just before.
38
38
  max_read_offset = 0xFFFFFFFF
39
39
  decoder = Decoder.new
40
- atom_tree = Measurometer.instrument('format_parser.Decoder.extract_atom_stream') do
40
+ atom_tree = Measurometer.instrument('format_parser.decoder.extract_atom_stream') do
41
41
  decoder.extract_atom_stream(io, max_read_offset)
42
42
  end
43
43
 
@@ -93,12 +93,10 @@ class FormatParser::MOOVParser
93
93
  def parse_dimensions(decoder, atom_tree)
94
94
  video_trak_atom = decoder.find_video_trak_atom(atom_tree)
95
95
 
96
- tkhd = begin
97
- if video_trak_atom
98
- decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'tkhd')
99
- else
100
- decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
101
- end
96
+ tkhd = if video_trak_atom
97
+ decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'tkhd')
98
+ else
99
+ decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
102
100
  end
103
101
 
104
102
  if tkhd
@@ -179,13 +179,9 @@ class FormatParser::MP3Parser
179
179
  frame_data_str = io.read(frame_detail.frame_length)
180
180
  io.seek(io.pos - frame_detail.frame_length)
181
181
  xing_header = attempt_xing_header(frame_data_str)
182
- if xing_header_usable_for_duration?(xing_header)
183
- return [xing_header, mpeg_frames]
184
- end
185
- end
186
- if frame_detail.frame_length > 1 # jump over current frame body
187
- io.seek(io.pos + frame_detail.frame_length - bytes_to_read)
182
+ return [xing_header, mpeg_frames] if xing_header_usable_for_duration?(xing_header)
188
183
  end
184
+ io.seek(io.pos + frame_detail.frame_length - bytes_to_read) if frame_detail.frame_length > 1 # jump over current frame body
189
185
  end
190
186
  [nil, mpeg_frames]
191
187
  rescue InvalidDeepFetch # A frame was invalid - bail out since it's unlikely we can recover
@@ -44,9 +44,7 @@ class FormatParser::MPEGParser
44
44
  io.seek(pos + 1)
45
45
  horizontal_size, vertical_size = parse_image_size(io)
46
46
  ratio_code, rate_code = parse_rate_information(io)
47
- if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
48
- return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
49
- end
47
+ return file_info(horizontal_size, vertical_size, ratio_code, rate_code) if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
50
48
  end
51
49
  nil # otherwise the return value of Integer#times will be returned
52
50
  rescue FormatParser::IOUtils::InvalidRead
@@ -34,9 +34,7 @@ class FormatParser::WAVParser
34
34
  case chunk_type
35
35
  when 'fmt ' # watch out: the chunk ID of the format chunk ends with a space
36
36
  fmt_data = unpack_fmt_chunk(io, chunk_size)
37
- if fmt_data[:audio_format] != 1 and fact_processed
38
- return process_non_pcm(fmt_data, total_sample_frames)
39
- end
37
+ return process_non_pcm(fmt_data, total_sample_frames) if fmt_data[:audio_format] != 1 and fact_processed
40
38
  fmt_processed = true
41
39
  when 'data'
42
40
  return unless fmt_processed # the 'data' chunk cannot preceed the 'fmt ' chunk
@@ -45,11 +43,10 @@ class FormatParser::WAVParser
45
43
  when 'fact'
46
44
  total_sample_frames = safe_read(io, 4).unpack('l').first
47
45
  safe_skip(io, chunk_size - 4)
48
- if fmt_processed and fmt_data[:audio_format] != 1
49
- return process_non_pcm(fmt_data, total_sample_frames)
50
- end
46
+ return process_non_pcm(fmt_data, total_sample_frames) if fmt_processed and fmt_data[:audio_format] != 1
51
47
  fact_processed = true
52
- else # Skip this chunk until a known chunk is encountered
48
+ else
49
+ # Skip this chunk until a known chunk is encountered
53
50
  safe_skip(io, chunk_size)
54
51
  end
55
52
  end
@@ -70,11 +67,11 @@ class FormatParser::WAVParser
70
67
  safe_skip(io, chunk_size - 16) # skip the extra fields
71
68
 
72
69
  {
73
- audio_format: fmt_info[0],
74
- channels: fmt_info[1],
75
- sample_rate: fmt_info[2],
76
- byte_rate: fmt_info[3],
77
- block_align: fmt_info[4],
70
+ audio_format: fmt_info[0],
71
+ channels: fmt_info[1],
72
+ sample_rate: fmt_info[2],
73
+ byte_rate: fmt_info[3],
74
+ block_align: fmt_info[4],
78
75
  bits_per_sample: fmt_info[5],
79
76
  }
80
77
  end
@@ -27,52 +27,43 @@ class FormatParser::ZIPParser::FileReader
27
27
  # To prevent too many tiny reads, read the maximum possible size of end of
28
28
  # central directory record upfront (all the fixed fields + at most 0xFFFF
29
29
  # bytes of the archive comment)
30
- MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE =
31
- begin
32
- 4 + # Offset of the start of central directory
33
- 4 + # Size of the central directory
34
- 2 + # Number of files in the cdir
35
- 4 + # End-of-central-directory signature
36
- 2 + # Number of this disk
37
- 2 + # Number of disk with the start of cdir
38
- 2 + # Number of files in the cdir of this disk
39
- 2 + # The comment size
40
- 0xFFFF # Maximum comment size
41
- end
30
+ MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE = 4 + # Offset of the start of central directory
31
+ 4 + # Size of the central directory
32
+ 2 + # Number of files in the cdir
33
+ 4 + # End-of-central-directory signature
34
+ 2 + # Number of this disk
35
+ 2 + # Number of disk with the start of cdir
36
+ 2 + # Number of files in the cdir of this disk
37
+ 2 + # The comment size
38
+ 0xFFFF # Maximum comment size
42
39
 
43
40
  # To prevent too many tiny reads, read the maximum possible size of the local file header upfront.
44
41
  # The maximum size is all the usual items, plus the maximum size
45
42
  # of the filename (0xFFFF bytes) and the maximum size of the extras (0xFFFF bytes)
46
- MAX_LOCAL_HEADER_SIZE =
47
- begin
48
- 4 + # signature
49
- 2 + # Version needed to extract
50
- 2 + # gp flags
51
- 2 + # storage mode
52
- 2 + # dos time
53
- 2 + # dos date
54
- 4 + # CRC32
55
- 4 + # Comp size
56
- 4 + # Uncomp size
57
- 2 + # Filename size
58
- 2 + # Extra fields size
59
- 0xFFFF + # Maximum filename size
60
- 0xFFFF # Maximum extra fields size
61
- end
62
-
63
- SIZE_OF_USABLE_EOCD_RECORD =
64
- begin
65
- 4 + # Signature
66
- 2 + # Number of this disk
67
- 2 + # Number of the disk with the EOCD record
68
- 2 + # Number of entries in the central directory of this disk
69
- 2 + # Number of entries in the central directory total
70
- 4 + # Size of the central directory
71
- 4 # Start of the central directory offset
72
- end
43
+ MAX_LOCAL_HEADER_SIZE = 4 + # signature
44
+ 2 + # Version needed to extract
45
+ 2 + # gp flags
46
+ 2 + # storage mode
47
+ 2 + # dos time
48
+ 2 + # dos date
49
+ 4 + # CRC32
50
+ 4 + # Comp size
51
+ 4 + # Uncomp size
52
+ 2 + # Filename size
53
+ 2 + # Extra fields size
54
+ 0xFFFF + # Maximum filename size
55
+ 0xFFFF # Maximum extra fields size
56
+
57
+ SIZE_OF_USABLE_EOCD_RECORD = 4 + # Signature
58
+ 2 + # Number of this disk
59
+ 2 + # Number of the disk with the EOCD record
60
+ 2 + # Number of entries in the central directory of this disk
61
+ 2 + # Number of entries in the central directory total
62
+ 4 + # Size of the central directory
63
+ 4 # Start of the central directory offset
73
64
 
74
65
  private_constant :C_UINT32LE, :C_UINT16LE, :C_UINT64LE, :MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE,
75
- :MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
66
+ :MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
76
67
 
77
68
  # Represents a file within the ZIP archive being read
78
69
  class ZipEntry
@@ -216,7 +207,7 @@ class FormatParser::ZIPParser::FileReader
216
207
  io.seek(absolute_pos)
217
208
  unless absolute_pos == io.pos
218
209
  raise ReadError,
219
- "Expected to seek to #{absolute_pos} but only got to #{io.pos}"
210
+ "Expected to seek to #{absolute_pos} but only got to #{io.pos}"
220
211
  end
221
212
  nil
222
213
  end
@@ -235,18 +226,14 @@ class FormatParser::ZIPParser::FileReader
235
226
  io.seek(io.pos + n)
236
227
  pos_after = io.pos
237
228
  delta = pos_after - pos_before
238
- unless delta == n
239
- raise ReadError, "Expected to seek #{n} bytes ahead, but could only seek #{delta} bytes ahead"
240
- end
229
+ raise ReadError, "Expected to seek #{n} bytes ahead, but could only seek #{delta} bytes ahead" unless delta == n
241
230
  nil
242
231
  end
243
232
 
244
233
  def read_n(io, n_bytes)
245
234
  io.read(n_bytes).tap do |d|
246
235
  raise ReadError, "Expected to read #{n_bytes} bytes, but the IO was at the end" if d.nil?
247
- unless d.bytesize == n_bytes
248
- raise ReadError, "Expected to read #{n_bytes} bytes, read #{d.bytesize}"
249
- end
236
+ raise ReadError, "Expected to read #{n_bytes} bytes, read #{d.bytesize}" unless d.bytesize == n_bytes
250
237
  end
251
238
  end
252
239
 
@@ -310,15 +297,9 @@ class FormatParser::ZIPParser::FileReader
310
297
  #
311
298
  # It means that before we read this stuff we need to check if the previously-read
312
299
  # values are at overflow, and only _then_ proceed to read them. Bah.
313
- if e.uncompressed_size == 0xFFFFFFFF
314
- e.uncompressed_size = read_8b(zip64_extra)
315
- end
316
- if e.compressed_size == 0xFFFFFFFF
317
- e.compressed_size = read_8b(zip64_extra)
318
- end
319
- if e.local_file_header_offset == 0xFFFFFFFF
320
- e.local_file_header_offset = read_8b(zip64_extra)
321
- end
300
+ e.uncompressed_size = read_8b(zip64_extra) if e.uncompressed_size == 0xFFFFFFFF
301
+ e.compressed_size = read_8b(zip64_extra) if e.compressed_size == 0xFFFFFFFF
302
+ e.local_file_header_offset = read_8b(zip64_extra) if e.local_file_header_offset == 0xFFFFFFFF
322
303
  # Disk number comes last and we can skip it anyway, since we do
323
304
  # not support multi-disk archives
324
305
  end
@@ -370,9 +351,7 @@ class FormatParser::ZIPParser::FileReader
370
351
  signature, *_rest, comment_size = maybe_record.unpack(unpack_pattern)
371
352
 
372
353
  # Check the only condition for the match
373
- if signature == 0x06054b50 && (maybe_record.bytesize - minimum_record_size) == comment_size
374
- return check_at # Found the EOCD marker location
375
- end
354
+ return check_at if signature == 0x06054b50 && (maybe_record.bytesize - minimum_record_size) == comment_size
376
355
  end
377
356
  # If we haven't caught anything, return nil deliberately instead of returning the last statement
378
357
  nil
@@ -422,16 +401,12 @@ class FormatParser::ZIPParser::FileReader
422
401
 
423
402
  disk_n = read_4b(zip64_eocdr) # number of this disk
424
403
  disk_n_with_eocdr = read_4b(zip64_eocdr) # number of the disk with the EOCDR
425
- if disk_n != disk_n_with_eocdr
426
- raise UnsupportedFeature, 'The archive spans multiple disks'
427
- end
404
+ raise UnsupportedFeature, 'The archive spans multiple disks' if disk_n != disk_n_with_eocdr
428
405
 
429
406
  num_files_this_disk = read_8b(zip64_eocdr) # number of files on this disk
430
- num_files_total = read_8b(zip64_eocdr) # files total in the central directory
407
+ num_files_total = read_8b(zip64_eocdr) # files total in the central directory
431
408
 
432
- if num_files_this_disk != num_files_total
433
- raise UnsupportedFeature, 'The archive spans multiple disks'
434
- end
409
+ raise UnsupportedFeature, 'The archive spans multiple disks' if num_files_this_disk != num_files_total
435
410
 
436
411
  log do
437
412
  format(
@@ -439,8 +414,8 @@ class FormatParser::ZIPParser::FileReader
439
414
  num_files_total)
440
415
  end
441
416
 
442
- central_dir_size = read_8b(zip64_eocdr) # Size of the central directory
443
- central_dir_offset = read_8b(zip64_eocdr) # Where the central directory starts
417
+ central_dir_size = read_8b(zip64_eocdr) # Size of the central directory
418
+ central_dir_offset = read_8b(zip64_eocdr) # Where the central directory starts
444
419
 
445
420
  [num_files_total, central_dir_offset, central_dir_size]
446
421
  end
@@ -456,8 +431,8 @@ class FormatParser::ZIPParser::FileReader
456
431
  skip_ahead_2(io) # number_of_this_disk
457
432
  skip_ahead_2(io) # number of the disk with the EOCD record
458
433
  skip_ahead_2(io) # number of entries in the central directory of this disk
459
- num_files = read_2b(io) # number of entries in the central directory total
460
- cdir_size = read_4b(io) # size of the central directory
434
+ num_files = read_2b(io) # number of entries in the central directory total
435
+ cdir_size = read_4b(io) # size of the central directory
461
436
  cdir_offset = read_4b(io) # start of central directorty offset
462
437
  [num_files, cdir_offset, cdir_size]
463
438
  end
@@ -34,7 +34,7 @@ class FormatParser::ZIPParser
34
34
  end
35
35
  rescue FileReader::Error
36
36
  # This is not a ZIP, or a broken ZIP.
37
- return
37
+ nil
38
38
  end
39
39
 
40
40
  def directory?(zip_entry)
data/lib/read_limiter.rb CHANGED
@@ -45,9 +45,7 @@ class FormatParser::ReadLimiter
45
45
  # @return Integer
46
46
  def seek(to)
47
47
  @seeks += 1
48
- if @max_seeks && @seeks > @max_seeks
49
- raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks
50
- end
48
+ raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks if @max_seeks && @seeks > @max_seeks
51
49
  @io.seek(to)
52
50
  end
53
51
 
@@ -60,26 +58,20 @@ class FormatParser::ReadLimiter
60
58
  @bytes += n_bytes
61
59
  @reads += 1
62
60
 
63
- if @max_bytes && @bytes > @max_bytes
64
- raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes
65
- end
66
-
67
- if @max_reads && @reads > @max_reads
68
- raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads
69
- end
61
+ raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes if @max_bytes && @bytes > @max_bytes
62
+ raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads if @max_reads && @reads > @max_reads
70
63
 
71
64
  @io.read(n_bytes)
72
65
  end
73
66
 
74
67
  # Sends the metrics about the state of this ReadLimiter to a Measurometer
75
68
  #
76
- # @param prefix[String] the prefix to set. For example, with prefix "TIFF" the metrics will be called
77
- # `format_parser.TIFF.read_limiter.num_seeks` and so forth
69
+ # @param parser[String] the parser to add as a tag.
78
70
  # @return void
79
- def send_metrics(prefix)
80
- Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_seeks' % prefix, @seeks)
81
- Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_reads' % prefix, @reads)
82
- Measurometer.add_distribution_value('format_parser.%s.read_limiter.read_bytes' % prefix, @bytes)
71
+ def send_metrics(parser)
72
+ Measurometer.add_distribution_value('format_parser.read_limiter.num_seeks', @seeks, parser: parser)
73
+ Measurometer.add_distribution_value('format_parser.read_limiter.num_reads', @reads, parser: parser)
74
+ Measurometer.add_distribution_value('format_parser.read_limiter.read_bytes', @bytes, parser: parser)
83
75
  end
84
76
 
85
77
  # Resets all the recorded call counters so that the object can be reused for the next parser,