format_parser 1.7.0 → 2.0.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b3cef665ae16efd68e8da952fd4656e2d9403f3899bd58839da3d8026db91f4
4
- data.tar.gz: 7b0ec88efc2ea62f526699a4041cb3f1b3062994d2a6e0b24c2cfdf247aaf532
3
+ metadata.gz: d7c965b7783ecaea4802f7e585861b4400b2210fee4cb90388757530880fa074
4
+ data.tar.gz: fc8b7cc3f00825fa054c948a7ae817b1eee6457ffaec9e5a6b5bdd9a0b92d126
5
5
  SHA512:
6
- metadata.gz: 24c6379ef4fd3b5a9f061c6fc40fd8c0498ad33213684d08dd27a8b8994ba40a98bf1fa18a6d6b3b8189aa71436ec9bb394e3b8d41a8dd3ca90a5b93d0f1718a
7
- data.tar.gz: a2d3df2c17d2559aa99f52f04624032a9243915f2a1b28a6f3626bd3b9112eb8c325b0c9a286864d25c2b4e92a44a8939448a85d1004ac5d48c2f81f747749c1
6
+ metadata.gz: 73f774ebe540dfd54e87f89cedecfc0fabf4a97f4e2ef72afcd94edc5e0fbc344c7c67b365942e3bb915dfe76f94f038072671c259c2d366a69d64a73cbde960
7
+ data.tar.gz: bc1405329d521487ec4d0738c258fb12c3acdb37b6b8ecebf7451a866d5f1072cfc23774e2ecc3d7d297095ff280320756fb4cd9000de3eac447a105cf87028b
@@ -14,8 +14,8 @@ jobs:
14
14
  matrix:
15
15
  ruby:
16
16
  - 2.7
17
- - 2.6
18
- - 2.5
17
+ - 3.0
18
+ - 3.1
19
19
  - jruby
20
20
  steps:
21
21
  - name: Checkout
@@ -60,15 +60,10 @@ jobs:
60
60
  matrix:
61
61
  ruby:
62
62
  - 2.7
63
- - 2.6
64
- - 2.5
63
+ - 3.0
64
+ - 3.1
65
65
  - jruby
66
66
  experimental: [false]
67
- include:
68
- - ruby: 3.1
69
- experimental: true
70
- - ruby: 3.0
71
- experimental: true
72
67
  steps:
73
68
  - name: Checkout
74
69
  uses: actions/checkout@v2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 2.0.0.pre (Prerelease)
2
+ * Drop support for Ruby `<2.7`.
3
+ * Drop faraday dependencies.
4
+ * Loosen version constraints on other dependencies.
5
+ * Update measurometer metrics for consistency and clarity.
6
+
1
7
  ## 1.7.0
2
8
  * Add support for `ARW` files.
3
9
 
@@ -30,17 +30,15 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ['lib']
32
32
 
33
- spec.add_dependency 'ks', '~> 0.0'
34
- spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
35
- spec.add_dependency 'id3tag', '~> 0.14', '>= 0.14.2'
36
- spec.add_dependency 'faraday', '~> 0.13'
37
- spec.add_dependency 'faraday_middleware', '~> 0.14'
38
- spec.add_dependency 'measurometer', '~> 1'
33
+ spec.add_dependency 'exifr', '>= 1.3.8'
34
+ spec.add_dependency 'id3tag', '>= 0.14.2'
35
+ spec.add_dependency 'ks'
36
+ spec.add_dependency 'measurometer'
39
37
 
40
- spec.add_development_dependency 'rspec', '~> 3.0'
41
- spec.add_development_dependency 'rake', '~> 12'
42
- spec.add_development_dependency 'simplecov', '~> 0.15'
43
- spec.add_development_dependency 'yard', '~> 0.9'
44
- spec.add_development_dependency 'wetransfer_style', '0.5.0'
45
38
  spec.add_development_dependency 'parallel_tests'
39
+ spec.add_development_dependency 'rake'
40
+ spec.add_development_dependency 'rspec'
41
+ spec.add_development_dependency 'simplecov'
42
+ spec.add_development_dependency 'wetransfer_style', '1.0.0'
43
+ spec.add_development_dependency 'yard'
46
44
  end
data/lib/care.rb CHANGED
@@ -96,12 +96,8 @@ class Care
96
96
  # @return [String, nil] the content read from the IO or `nil` if no data was available
97
97
  # @raise ArgumentError
98
98
  def byteslice(io, at, n_bytes)
99
- if n_bytes < 1
100
- raise ArgumentError, "The number of bytes to fetch must be a positive Integer, but was #{n_bytes}"
101
- end
102
- if at < 0
103
- raise ArgumentError, "Negative offsets are not supported (got #{at})"
104
- end
99
+ raise ArgumentError, "The number of bytes to fetch must be a positive Integer, but was #{n_bytes}" if n_bytes < 1
100
+ raise ArgumentError, "Negative offsets are not supported (got #{at})" if at < 0
105
101
 
106
102
  first_page = at / @page_size
107
103
  last_page = (at + n_bytes) / @page_size
@@ -174,16 +170,14 @@ class Care
174
170
  # @param io[IO] the IO to read from
175
171
  # @param page_i[Integer] which page (zero-based) to read
176
172
  def read_page(io, page_i)
177
- Measurometer.increment_counter('format_parser.parser.Care.page_reads_from_upsteam', 1)
173
+ Measurometer.increment_counter('format_parser.parser.care.page_reads_from_upsteam', 1)
178
174
 
179
175
  io.seek(page_i * @page_size)
180
- read_result = Measurometer.instrument('format_parser.Care.read_page') { io.read(@page_size) }
176
+ read_result = Measurometer.instrument('format_parser.care.read_page') { io.read(@page_size) }
181
177
  if read_result.nil?
182
178
  # If the read went past the end of the IO the read result will be nil,
183
179
  # so we know our IO is exhausted here
184
- if @lowest_known_empty_page.nil? || @lowest_known_empty_page > page_i
185
- @lowest_known_empty_page = page_i
186
- end
180
+ @lowest_known_empty_page = page_i if @lowest_known_empty_page.nil? || @lowest_known_empty_page > page_i
187
181
  elsif read_result.bytesize < @page_size
188
182
  # If we read less than we initially wanted we know there are no pages
189
183
  # to read following this one, so we can also optimize
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '1.7.0'
2
+ VERSION = '2.0.0.pre'
3
3
  end
data/lib/format_parser.rb CHANGED
@@ -20,6 +20,7 @@ module FormatParser
20
20
  require_relative 'care'
21
21
  require_relative 'active_storage/blob_analyzer'
22
22
  require_relative 'text'
23
+ require_relative 'string'
23
24
 
24
25
  # Define Measurometer in the internal namespace as well
25
26
  # so that we stay compatible for the applications that use it
@@ -87,8 +88,8 @@ module FormatParser
87
88
  # Parses the resource at the given `url` and returns the results as if it were any IO
88
89
  # given to `.parse`. The accepted keyword arguments are the same as the ones for `parse`.
89
90
  #
90
- # @param url[String, URI] the HTTP(S) URL to request the object from using Faraday and `Range:` requests
91
- # @param headers[Hash] (optional) the HTTP headers to request the object from using Faraday
91
+ # @param url[String, URI] the HTTP(S) URL to request the object from using `Range:` requests
92
+ # @param headers[Hash] (optional) the HTTP headers to request the object from
92
93
  # @param kwargs the keyword arguments to be delegated to `.parse`
93
94
  # @see {.parse}
94
95
  def self.parse_http(url, headers: {}, **kwargs)
@@ -177,9 +178,7 @@ module FormatParser
177
178
  # Convert the results from a lazy enumerator to an Array.
178
179
  results = results.to_a
179
180
 
180
- if results.empty?
181
- Measurometer.increment_counter('format_parser.unknown_files', 1)
182
- end
181
+ Measurometer.increment_counter('format_parser.unknown_files', 1) if results.empty?
183
182
 
184
183
  amount == 1 ? results.first : results
185
184
  ensure
@@ -202,12 +201,12 @@ module FormatParser
202
201
  end
203
202
 
204
203
  def self.execute_parser_and_capture_expected_exceptions(parser, limited_io)
205
- parser_name_for_instrumentation = parser.class.to_s.split('::').last
204
+ parser_name_for_instrumentation = parser.class.to_s.split('::').last.underscore
206
205
  Measurometer.instrument('format_parser.parser.%s' % parser_name_for_instrumentation) do
207
206
  parser.call(limited_io).tap do |result|
208
207
  if result
209
- Measurometer.increment_counter('format_parser.detected_natures.%s' % result.nature, 1)
210
- Measurometer.increment_counter('format_parser.detected_formats.%s' % result.format, 1)
208
+ Measurometer.increment_counter('format_parser.detected_natures', 1, nature: result.nature)
209
+ Measurometer.increment_counter('format_parser.detected_formats', 1, format: result.format)
211
210
  end
212
211
  end
213
212
  end
@@ -252,9 +251,7 @@ module FormatParser
252
251
  fitting_by_formats = assemble_parser_set[@parsers_per_format, desired_formats]
253
252
  parsers = fitting_by_natures & fitting_by_formats
254
253
 
255
- if parsers.empty?
256
- raise ArgumentError, "No parsers provide both natures #{desired_natures.inspect} and formats #{desired_formats.inspect}"
257
- end
254
+ raise ArgumentError, "No parsers provide both natures #{desired_natures.inspect} and formats #{desired_formats.inspect}" if parsers.empty?
258
255
 
259
256
  # Order the parsers according to their priority value. The ones having a lower
260
257
  # value will sort higher and will be applied sooner
data/lib/io_utils.rb CHANGED
@@ -9,12 +9,8 @@ module FormatParser::IOUtils
9
9
  raise ArgumentError, 'Unbounded reads are not supported' if n.nil?
10
10
  buf = io.read(n)
11
11
 
12
- unless buf
13
- raise InvalidRead, "We wanted to read #{n} bytes from the IO, but the IO is at EOF"
14
- end
15
- if buf.bytesize != n
16
- raise InvalidRead, "We wanted to read #{n} bytes from the IO, but we got #{buf.bytesize} instead"
17
- end
12
+ raise InvalidRead, "We wanted to read #{n} bytes from the IO, but the IO is at EOF" unless buf
13
+ raise InvalidRead, "We wanted to read #{n} bytes from the IO, but we got #{buf.bytesize} instead" if buf.bytesize != n
18
14
 
19
15
  buf
20
16
  end
@@ -33,23 +33,17 @@ class FormatParser::AdtsHeaderInfo
33
33
  MPEG_VERSION_HASH = { 0 => 'MPEG-4', 1 => 'MPEG-2'}
34
34
 
35
35
  def mpeg4_sampling_frequency
36
- if !@mpeg4_sampling_frequency_index.nil? && MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH.key?(@mpeg4_sampling_frequency_index)
37
- return MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH[@mpeg4_sampling_frequency_index]
38
- end
36
+ return MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH[@mpeg4_sampling_frequency_index] if !@mpeg4_sampling_frequency_index.nil? && MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH.key?(@mpeg4_sampling_frequency_index)
39
37
  nil
40
38
  end
41
39
 
42
40
  def profile_description
43
- if !@profile.nil? && AAC_PROFILE_DESCRIPTION_HASH.key?(@profile)
44
- return AAC_PROFILE_DESCRIPTION_HASH[@profile]
45
- end
41
+ return AAC_PROFILE_DESCRIPTION_HASH[@profile] if !@profile.nil? && AAC_PROFILE_DESCRIPTION_HASH.key?(@profile)
46
42
  nil
47
43
  end
48
44
 
49
45
  def mpeg_version_description
50
- if !@mpeg_version.nil? && MPEG_VERSION_HASH.key?(@mpeg_version)
51
- return MPEG_VERSION_HASH[@mpeg_version]
52
- end
46
+ return MPEG_VERSION_HASH[@mpeg_version] if !@mpeg_version.nil? && MPEG_VERSION_HASH.key?(@mpeg_version)
53
47
  nil
54
48
  end
55
49
 
@@ -201,7 +201,7 @@ class FormatParser::DPXParser
201
201
  blanking :reserve, 52
202
202
 
203
203
  # Only expose the elements present
204
- def image_elements #:nodoc:
204
+ def image_elements # :nodoc:
205
205
  @image_elements[0...number_elements]
206
206
  end
207
207
  end
@@ -125,9 +125,7 @@ module FormatParser::EXIFParser
125
125
  # those and return the _last_ non-0 orientation, or 0 otherwise
126
126
  @multiple_exif_results.reverse_each do |exif_tag_frame|
127
127
  orientation_value = exif_tag_frame.orientation
128
- if !orientation_value.nil? && orientation_value != 0
129
- return orientation_value
130
- end
128
+ return orientation_value if !orientation_value.nil? && orientation_value != 0
131
129
  end
132
130
  0 # If none were found - the orientation is unknown
133
131
  end
@@ -175,7 +173,7 @@ module FormatParser::EXIFParser
175
173
  EXIFR.logger = Logger.new(nil)
176
174
 
177
175
  def exif_from_tiff_io(constrained_io, should_include_sub_ifds = false)
178
- Measurometer.instrument('format_parser.EXIFParser.exif_from_tiff_io') do
176
+ Measurometer.instrument('format_parser.exif_parser.exif_from_tiff_io') do
179
177
  extended_io = IOExt.new(constrained_io)
180
178
  exif_raw_data = EXIFR::TIFF.new(extended_io)
181
179
 
@@ -24,9 +24,9 @@ class FormatParser::FDXParser
24
24
  def check_for_document_type(file_and_document_type)
25
25
  sanitized_data = file_and_document_type.downcase
26
26
  if sanitized_data.include?('finaldraft') && sanitized_data.include?('script')
27
- return :fdx, :script
27
+ [:fdx, :script]
28
28
  else
29
- return
29
+ nil
30
30
  end
31
31
  end
32
32
 
@@ -20,15 +20,11 @@ class FormatParser::FLACParser
20
20
 
21
21
  minimum_block_size = bytestring_to_int(safe_read(io, 2))
22
22
 
23
- if minimum_block_size < 16
24
- raise MalformedFile, 'FLAC file minimum block size must be larger than 16'
25
- end
23
+ raise MalformedFile, 'FLAC file minimum block size must be larger than 16' if minimum_block_size < 16
26
24
 
27
25
  maximum_block_size = bytestring_to_int(safe_read(io, 2))
28
26
 
29
- if maximum_block_size < minimum_block_size
30
- raise MalformedFile, 'FLAC file maximum block size must be equal to or larger than minimum block size'
31
- end
27
+ raise MalformedFile, 'FLAC file maximum block size must be equal to or larger than minimum block size' if maximum_block_size < minimum_block_size
32
28
 
33
29
  minimum_frame_size = bytestring_to_int(safe_read(io, 3))
34
30
  maximum_frame_size = bytestring_to_int(safe_read(io, 3))
@@ -69,7 +69,7 @@ class FormatParser::JPEGParser
69
69
  end
70
70
  end
71
71
 
72
- Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_read_until_capture', @buf.pos)
72
+ Measurometer.add_distribution_value('format_parser.jpeg_parser.bytes_read_until_capture', @buf.pos)
73
73
 
74
74
  # A single file might contain multiple EXIF data frames. In a JPEG this would
75
75
  # manifest as multiple APP1 markers. The way different programs handle these
@@ -156,7 +156,7 @@ class FormatParser::JPEGParser
156
156
  # Use StringIO.new instead of #write - https://github.com/aws/aws-sdk-ruby/issues/785#issuecomment-95456838
157
157
  exif_buf = StringIO.new(safe_read(@buf, app1_frame_content_length - EXIF_MAGIC_STRING.bytesize))
158
158
 
159
- Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_sent_to_exif_parser', exif_buf.size)
159
+ Measurometer.add_distribution_value('format_parser.jpeg_parser.bytes_sent_to_exif_parser', exif_buf.size)
160
160
 
161
161
  @exif_data_frames << exif_from_tiff_io(exif_buf)
162
162
  rescue EXIFR::MalformedTIFF
@@ -37,7 +37,7 @@ class FormatParser::MOOVParser
37
37
  # size that gets parsed just before.
38
38
  max_read_offset = 0xFFFFFFFF
39
39
  decoder = Decoder.new
40
- atom_tree = Measurometer.instrument('format_parser.Decoder.extract_atom_stream') do
40
+ atom_tree = Measurometer.instrument('format_parser.decoder.extract_atom_stream') do
41
41
  decoder.extract_atom_stream(io, max_read_offset)
42
42
  end
43
43
 
@@ -93,12 +93,10 @@ class FormatParser::MOOVParser
93
93
  def parse_dimensions(decoder, atom_tree)
94
94
  video_trak_atom = decoder.find_video_trak_atom(atom_tree)
95
95
 
96
- tkhd = begin
97
- if video_trak_atom
98
- decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'tkhd')
99
- else
100
- decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
101
- end
96
+ tkhd = if video_trak_atom
97
+ decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'tkhd')
98
+ else
99
+ decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
102
100
  end
103
101
 
104
102
  if tkhd
@@ -179,13 +179,9 @@ class FormatParser::MP3Parser
179
179
  frame_data_str = io.read(frame_detail.frame_length)
180
180
  io.seek(io.pos - frame_detail.frame_length)
181
181
  xing_header = attempt_xing_header(frame_data_str)
182
- if xing_header_usable_for_duration?(xing_header)
183
- return [xing_header, mpeg_frames]
184
- end
185
- end
186
- if frame_detail.frame_length > 1 # jump over current frame body
187
- io.seek(io.pos + frame_detail.frame_length - bytes_to_read)
182
+ return [xing_header, mpeg_frames] if xing_header_usable_for_duration?(xing_header)
188
183
  end
184
+ io.seek(io.pos + frame_detail.frame_length - bytes_to_read) if frame_detail.frame_length > 1 # jump over current frame body
189
185
  end
190
186
  [nil, mpeg_frames]
191
187
  rescue InvalidDeepFetch # A frame was invalid - bail out since it's unlikely we can recover
@@ -44,9 +44,7 @@ class FormatParser::MPEGParser
44
44
  io.seek(pos + 1)
45
45
  horizontal_size, vertical_size = parse_image_size(io)
46
46
  ratio_code, rate_code = parse_rate_information(io)
47
- if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
48
- return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
49
- end
47
+ return file_info(horizontal_size, vertical_size, ratio_code, rate_code) if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
50
48
  end
51
49
  nil # otherwise the return value of Integer#times will be returned
52
50
  rescue FormatParser::IOUtils::InvalidRead
@@ -34,9 +34,7 @@ class FormatParser::WAVParser
34
34
  case chunk_type
35
35
  when 'fmt ' # watch out: the chunk ID of the format chunk ends with a space
36
36
  fmt_data = unpack_fmt_chunk(io, chunk_size)
37
- if fmt_data[:audio_format] != 1 and fact_processed
38
- return process_non_pcm(fmt_data, total_sample_frames)
39
- end
37
+ return process_non_pcm(fmt_data, total_sample_frames) if fmt_data[:audio_format] != 1 and fact_processed
40
38
  fmt_processed = true
41
39
  when 'data'
42
40
  return unless fmt_processed # the 'data' chunk cannot preceed the 'fmt ' chunk
@@ -45,11 +43,10 @@ class FormatParser::WAVParser
45
43
  when 'fact'
46
44
  total_sample_frames = safe_read(io, 4).unpack('l').first
47
45
  safe_skip(io, chunk_size - 4)
48
- if fmt_processed and fmt_data[:audio_format] != 1
49
- return process_non_pcm(fmt_data, total_sample_frames)
50
- end
46
+ return process_non_pcm(fmt_data, total_sample_frames) if fmt_processed and fmt_data[:audio_format] != 1
51
47
  fact_processed = true
52
- else # Skip this chunk until a known chunk is encountered
48
+ else
49
+ # Skip this chunk until a known chunk is encountered
53
50
  safe_skip(io, chunk_size)
54
51
  end
55
52
  end
@@ -70,11 +67,11 @@ class FormatParser::WAVParser
70
67
  safe_skip(io, chunk_size - 16) # skip the extra fields
71
68
 
72
69
  {
73
- audio_format: fmt_info[0],
74
- channels: fmt_info[1],
75
- sample_rate: fmt_info[2],
76
- byte_rate: fmt_info[3],
77
- block_align: fmt_info[4],
70
+ audio_format: fmt_info[0],
71
+ channels: fmt_info[1],
72
+ sample_rate: fmt_info[2],
73
+ byte_rate: fmt_info[3],
74
+ block_align: fmt_info[4],
78
75
  bits_per_sample: fmt_info[5],
79
76
  }
80
77
  end
@@ -27,52 +27,43 @@ class FormatParser::ZIPParser::FileReader
27
27
  # To prevent too many tiny reads, read the maximum possible size of end of
28
28
  # central directory record upfront (all the fixed fields + at most 0xFFFF
29
29
  # bytes of the archive comment)
30
- MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE =
31
- begin
32
- 4 + # Offset of the start of central directory
33
- 4 + # Size of the central directory
34
- 2 + # Number of files in the cdir
35
- 4 + # End-of-central-directory signature
36
- 2 + # Number of this disk
37
- 2 + # Number of disk with the start of cdir
38
- 2 + # Number of files in the cdir of this disk
39
- 2 + # The comment size
40
- 0xFFFF # Maximum comment size
41
- end
30
+ MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE = 4 + # Offset of the start of central directory
31
+ 4 + # Size of the central directory
32
+ 2 + # Number of files in the cdir
33
+ 4 + # End-of-central-directory signature
34
+ 2 + # Number of this disk
35
+ 2 + # Number of disk with the start of cdir
36
+ 2 + # Number of files in the cdir of this disk
37
+ 2 + # The comment size
38
+ 0xFFFF # Maximum comment size
42
39
 
43
40
  # To prevent too many tiny reads, read the maximum possible size of the local file header upfront.
44
41
  # The maximum size is all the usual items, plus the maximum size
45
42
  # of the filename (0xFFFF bytes) and the maximum size of the extras (0xFFFF bytes)
46
- MAX_LOCAL_HEADER_SIZE =
47
- begin
48
- 4 + # signature
49
- 2 + # Version needed to extract
50
- 2 + # gp flags
51
- 2 + # storage mode
52
- 2 + # dos time
53
- 2 + # dos date
54
- 4 + # CRC32
55
- 4 + # Comp size
56
- 4 + # Uncomp size
57
- 2 + # Filename size
58
- 2 + # Extra fields size
59
- 0xFFFF + # Maximum filename size
60
- 0xFFFF # Maximum extra fields size
61
- end
62
-
63
- SIZE_OF_USABLE_EOCD_RECORD =
64
- begin
65
- 4 + # Signature
66
- 2 + # Number of this disk
67
- 2 + # Number of the disk with the EOCD record
68
- 2 + # Number of entries in the central directory of this disk
69
- 2 + # Number of entries in the central directory total
70
- 4 + # Size of the central directory
71
- 4 # Start of the central directory offset
72
- end
43
+ MAX_LOCAL_HEADER_SIZE = 4 + # signature
44
+ 2 + # Version needed to extract
45
+ 2 + # gp flags
46
+ 2 + # storage mode
47
+ 2 + # dos time
48
+ 2 + # dos date
49
+ 4 + # CRC32
50
+ 4 + # Comp size
51
+ 4 + # Uncomp size
52
+ 2 + # Filename size
53
+ 2 + # Extra fields size
54
+ 0xFFFF + # Maximum filename size
55
+ 0xFFFF # Maximum extra fields size
56
+
57
+ SIZE_OF_USABLE_EOCD_RECORD = 4 + # Signature
58
+ 2 + # Number of this disk
59
+ 2 + # Number of the disk with the EOCD record
60
+ 2 + # Number of entries in the central directory of this disk
61
+ 2 + # Number of entries in the central directory total
62
+ 4 + # Size of the central directory
63
+ 4 # Start of the central directory offset
73
64
 
74
65
  private_constant :C_UINT32LE, :C_UINT16LE, :C_UINT64LE, :MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE,
75
- :MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
66
+ :MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
76
67
 
77
68
  # Represents a file within the ZIP archive being read
78
69
  class ZipEntry
@@ -216,7 +207,7 @@ class FormatParser::ZIPParser::FileReader
216
207
  io.seek(absolute_pos)
217
208
  unless absolute_pos == io.pos
218
209
  raise ReadError,
219
- "Expected to seek to #{absolute_pos} but only got to #{io.pos}"
210
+ "Expected to seek to #{absolute_pos} but only got to #{io.pos}"
220
211
  end
221
212
  nil
222
213
  end
@@ -235,18 +226,14 @@ class FormatParser::ZIPParser::FileReader
235
226
  io.seek(io.pos + n)
236
227
  pos_after = io.pos
237
228
  delta = pos_after - pos_before
238
- unless delta == n
239
- raise ReadError, "Expected to seek #{n} bytes ahead, but could only seek #{delta} bytes ahead"
240
- end
229
+ raise ReadError, "Expected to seek #{n} bytes ahead, but could only seek #{delta} bytes ahead" unless delta == n
241
230
  nil
242
231
  end
243
232
 
244
233
  def read_n(io, n_bytes)
245
234
  io.read(n_bytes).tap do |d|
246
235
  raise ReadError, "Expected to read #{n_bytes} bytes, but the IO was at the end" if d.nil?
247
- unless d.bytesize == n_bytes
248
- raise ReadError, "Expected to read #{n_bytes} bytes, read #{d.bytesize}"
249
- end
236
+ raise ReadError, "Expected to read #{n_bytes} bytes, read #{d.bytesize}" unless d.bytesize == n_bytes
250
237
  end
251
238
  end
252
239
 
@@ -310,15 +297,9 @@ class FormatParser::ZIPParser::FileReader
310
297
  #
311
298
  # It means that before we read this stuff we need to check if the previously-read
312
299
  # values are at overflow, and only _then_ proceed to read them. Bah.
313
- if e.uncompressed_size == 0xFFFFFFFF
314
- e.uncompressed_size = read_8b(zip64_extra)
315
- end
316
- if e.compressed_size == 0xFFFFFFFF
317
- e.compressed_size = read_8b(zip64_extra)
318
- end
319
- if e.local_file_header_offset == 0xFFFFFFFF
320
- e.local_file_header_offset = read_8b(zip64_extra)
321
- end
300
+ e.uncompressed_size = read_8b(zip64_extra) if e.uncompressed_size == 0xFFFFFFFF
301
+ e.compressed_size = read_8b(zip64_extra) if e.compressed_size == 0xFFFFFFFF
302
+ e.local_file_header_offset = read_8b(zip64_extra) if e.local_file_header_offset == 0xFFFFFFFF
322
303
  # Disk number comes last and we can skip it anyway, since we do
323
304
  # not support multi-disk archives
324
305
  end
@@ -370,9 +351,7 @@ class FormatParser::ZIPParser::FileReader
370
351
  signature, *_rest, comment_size = maybe_record.unpack(unpack_pattern)
371
352
 
372
353
  # Check the only condition for the match
373
- if signature == 0x06054b50 && (maybe_record.bytesize - minimum_record_size) == comment_size
374
- return check_at # Found the EOCD marker location
375
- end
354
+ return check_at if signature == 0x06054b50 && (maybe_record.bytesize - minimum_record_size) == comment_size
376
355
  end
377
356
  # If we haven't caught anything, return nil deliberately instead of returning the last statement
378
357
  nil
@@ -422,16 +401,12 @@ class FormatParser::ZIPParser::FileReader
422
401
 
423
402
  disk_n = read_4b(zip64_eocdr) # number of this disk
424
403
  disk_n_with_eocdr = read_4b(zip64_eocdr) # number of the disk with the EOCDR
425
- if disk_n != disk_n_with_eocdr
426
- raise UnsupportedFeature, 'The archive spans multiple disks'
427
- end
404
+ raise UnsupportedFeature, 'The archive spans multiple disks' if disk_n != disk_n_with_eocdr
428
405
 
429
406
  num_files_this_disk = read_8b(zip64_eocdr) # number of files on this disk
430
- num_files_total = read_8b(zip64_eocdr) # files total in the central directory
407
+ num_files_total = read_8b(zip64_eocdr) # files total in the central directory
431
408
 
432
- if num_files_this_disk != num_files_total
433
- raise UnsupportedFeature, 'The archive spans multiple disks'
434
- end
409
+ raise UnsupportedFeature, 'The archive spans multiple disks' if num_files_this_disk != num_files_total
435
410
 
436
411
  log do
437
412
  format(
@@ -439,8 +414,8 @@ class FormatParser::ZIPParser::FileReader
439
414
  num_files_total)
440
415
  end
441
416
 
442
- central_dir_size = read_8b(zip64_eocdr) # Size of the central directory
443
- central_dir_offset = read_8b(zip64_eocdr) # Where the central directory starts
417
+ central_dir_size = read_8b(zip64_eocdr) # Size of the central directory
418
+ central_dir_offset = read_8b(zip64_eocdr) # Where the central directory starts
444
419
 
445
420
  [num_files_total, central_dir_offset, central_dir_size]
446
421
  end
@@ -456,8 +431,8 @@ class FormatParser::ZIPParser::FileReader
456
431
  skip_ahead_2(io) # number_of_this_disk
457
432
  skip_ahead_2(io) # number of the disk with the EOCD record
458
433
  skip_ahead_2(io) # number of entries in the central directory of this disk
459
- num_files = read_2b(io) # number of entries in the central directory total
460
- cdir_size = read_4b(io) # size of the central directory
434
+ num_files = read_2b(io) # number of entries in the central directory total
435
+ cdir_size = read_4b(io) # size of the central directory
461
436
  cdir_offset = read_4b(io) # start of central directorty offset
462
437
  [num_files, cdir_offset, cdir_size]
463
438
  end
@@ -34,7 +34,7 @@ class FormatParser::ZIPParser
34
34
  end
35
35
  rescue FileReader::Error
36
36
  # This is not a ZIP, or a broken ZIP.
37
- return
37
+ nil
38
38
  end
39
39
 
40
40
  def directory?(zip_entry)
data/lib/read_limiter.rb CHANGED
@@ -45,9 +45,7 @@ class FormatParser::ReadLimiter
45
45
  # @return Integer
46
46
  def seek(to)
47
47
  @seeks += 1
48
- if @max_seeks && @seeks > @max_seeks
49
- raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks
50
- end
48
+ raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks if @max_seeks && @seeks > @max_seeks
51
49
  @io.seek(to)
52
50
  end
53
51
 
@@ -60,26 +58,20 @@ class FormatParser::ReadLimiter
60
58
  @bytes += n_bytes
61
59
  @reads += 1
62
60
 
63
- if @max_bytes && @bytes > @max_bytes
64
- raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes
65
- end
66
-
67
- if @max_reads && @reads > @max_reads
68
- raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads
69
- end
61
+ raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes if @max_bytes && @bytes > @max_bytes
62
+ raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads if @max_reads && @reads > @max_reads
70
63
 
71
64
  @io.read(n_bytes)
72
65
  end
73
66
 
74
67
  # Sends the metrics about the state of this ReadLimiter to a Measurometer
75
68
  #
76
- # @param prefix[String] the prefix to set. For example, with prefix "TIFF" the metrics will be called
77
- # `format_parser.TIFF.read_limiter.num_seeks` and so forth
69
+ # @param parser[String] the parser to add as a tag.
78
70
  # @return void
79
- def send_metrics(prefix)
80
- Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_seeks' % prefix, @seeks)
81
- Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_reads' % prefix, @reads)
82
- Measurometer.add_distribution_value('format_parser.%s.read_limiter.read_bytes' % prefix, @bytes)
71
+ def send_metrics(parser)
72
+ Measurometer.add_distribution_value('format_parser.read_limiter.num_seeks', @seeks, parser: parser)
73
+ Measurometer.add_distribution_value('format_parser.read_limiter.num_reads', @reads, parser: parser)
74
+ Measurometer.add_distribution_value('format_parser.read_limiter.read_bytes', @bytes, parser: parser)
83
75
  end
84
76
 
85
77
  # Resets all the recorded call counters so that the object can be reused for the next parser,