format_parser 1.6.0 → 2.0.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +4 -9
- data/CHANGELOG.md +12 -0
- data/README.md +1 -1
- data/format_parser.gemspec +9 -11
- data/lib/care.rb +5 -11
- data/lib/format_parser/version.rb +1 -1
- data/lib/format_parser.rb +8 -11
- data/lib/io_utils.rb +2 -6
- data/lib/parsers/aac_parser/adts_header_info.rb +3 -9
- data/lib/parsers/arw_parser.rb +50 -0
- data/lib/parsers/dpx_parser/dpx_structs.rb +1 -1
- data/lib/parsers/exif_parser.rb +2 -4
- data/lib/parsers/fdx_parser.rb +2 -2
- data/lib/parsers/flac_parser.rb +2 -6
- data/lib/parsers/jpeg_parser.rb +2 -2
- data/lib/parsers/moov_parser.rb +5 -7
- data/lib/parsers/mp3_parser.rb +2 -6
- data/lib/parsers/mpeg_parser.rb +1 -3
- data/lib/parsers/tiff_parser.rb +5 -6
- data/lib/parsers/wav_parser.rb +9 -12
- data/lib/parsers/zip_parser/file_reader.rb +45 -70
- data/lib/parsers/zip_parser.rb +1 -1
- data/lib/read_limiter.rb +8 -16
- data/lib/remote_io.rb +64 -34
- data/lib/string.rb +9 -0
- data/spec/attributes_json_spec.rb +0 -3
- data/spec/parsers/arw_parser_spec.rb +119 -0
- data/spec/parsers/tiff_parser_spec.rb +9 -15
- data/spec/remote_fetching_spec.rb +3 -8
- data/spec/remote_io_spec.rb +116 -60
- metadata +38 -75
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d7c965b7783ecaea4802f7e585861b4400b2210fee4cb90388757530880fa074
|
4
|
+
data.tar.gz: fc8b7cc3f00825fa054c948a7ae817b1eee6457ffaec9e5a6b5bdd9a0b92d126
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 73f774ebe540dfd54e87f89cedecfc0fabf4a97f4e2ef72afcd94edc5e0fbc344c7c67b365942e3bb915dfe76f94f038072671c259c2d366a69d64a73cbde960
|
7
|
+
data.tar.gz: bc1405329d521487ec4d0738c258fb12c3acdb37b6b8ecebf7451a866d5f1072cfc23774e2ecc3d7d297095ff280320756fb4cd9000de3eac447a105cf87028b
|
data/.github/workflows/main.yml
CHANGED
@@ -14,8 +14,8 @@ jobs:
|
|
14
14
|
matrix:
|
15
15
|
ruby:
|
16
16
|
- 2.7
|
17
|
-
-
|
18
|
-
-
|
17
|
+
- 3.0
|
18
|
+
- 3.1
|
19
19
|
- jruby
|
20
20
|
steps:
|
21
21
|
- name: Checkout
|
@@ -60,15 +60,10 @@ jobs:
|
|
60
60
|
matrix:
|
61
61
|
ruby:
|
62
62
|
- 2.7
|
63
|
-
-
|
64
|
-
-
|
63
|
+
- 3.0
|
64
|
+
- 3.1
|
65
65
|
- jruby
|
66
66
|
experimental: [false]
|
67
|
-
include:
|
68
|
-
- ruby: 3.1
|
69
|
-
experimental: true
|
70
|
-
- ruby: 3.0
|
71
|
-
experimental: true
|
72
67
|
steps:
|
73
68
|
- name: Checkout
|
74
69
|
uses: actions/checkout@v2
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
## 2.0.0.pre (Prerelease)
|
2
|
+
* Drop support for Ruby `<2.7`.
|
3
|
+
* Drop faraday dependencies.
|
4
|
+
* Loosen version constraints on other dependencies.
|
5
|
+
* Update measurometer metrics for consistency and clarity.
|
6
|
+
|
7
|
+
## 1.7.0
|
8
|
+
* Add support for `ARW` files.
|
9
|
+
|
10
|
+
## 1.6.0
|
11
|
+
* Add support for `AAC` files.
|
12
|
+
|
1
13
|
## 1.5.0
|
2
14
|
* Add support for `NEF` files.
|
3
15
|
|
data/README.md
CHANGED
@@ -194,7 +194,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
194
194
|
- `IMG_9266_*.tif` and all it's variations were created by the project maintainers
|
195
195
|
|
196
196
|
### ARW
|
197
|
-
- ARW
|
197
|
+
- ARW examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
198
198
|
|
199
199
|
### ZIP
|
200
200
|
- The .zip fixture files have been created by the project maintainers
|
data/format_parser.gemspec
CHANGED
@@ -30,17 +30,15 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
|
-
spec.add_dependency '
|
34
|
-
spec.add_dependency '
|
35
|
-
spec.add_dependency '
|
36
|
-
spec.add_dependency '
|
37
|
-
spec.add_dependency 'faraday_middleware', '~> 0.14'
|
38
|
-
spec.add_dependency 'measurometer', '~> 1'
|
33
|
+
spec.add_dependency 'exifr', '>= 1.3.8'
|
34
|
+
spec.add_dependency 'id3tag', '>= 0.14.2'
|
35
|
+
spec.add_dependency 'ks'
|
36
|
+
spec.add_dependency 'measurometer'
|
39
37
|
|
40
|
-
spec.add_development_dependency 'rspec', '~> 3.0'
|
41
|
-
spec.add_development_dependency 'rake', '~> 12'
|
42
|
-
spec.add_development_dependency 'simplecov', '~> 0.15'
|
43
|
-
spec.add_development_dependency 'yard', '~> 0.9'
|
44
|
-
spec.add_development_dependency 'wetransfer_style', '0.5.0'
|
45
38
|
spec.add_development_dependency 'parallel_tests'
|
39
|
+
spec.add_development_dependency 'rake'
|
40
|
+
spec.add_development_dependency 'rspec'
|
41
|
+
spec.add_development_dependency 'simplecov'
|
42
|
+
spec.add_development_dependency 'wetransfer_style', '1.0.0'
|
43
|
+
spec.add_development_dependency 'yard'
|
46
44
|
end
|
data/lib/care.rb
CHANGED
@@ -96,12 +96,8 @@ class Care
|
|
96
96
|
# @return [String, nil] the content read from the IO or `nil` if no data was available
|
97
97
|
# @raise ArgumentError
|
98
98
|
def byteslice(io, at, n_bytes)
|
99
|
-
if n_bytes < 1
|
100
|
-
|
101
|
-
end
|
102
|
-
if at < 0
|
103
|
-
raise ArgumentError, "Negative offsets are not supported (got #{at})"
|
104
|
-
end
|
99
|
+
raise ArgumentError, "The number of bytes to fetch must be a positive Integer, but was #{n_bytes}" if n_bytes < 1
|
100
|
+
raise ArgumentError, "Negative offsets are not supported (got #{at})" if at < 0
|
105
101
|
|
106
102
|
first_page = at / @page_size
|
107
103
|
last_page = (at + n_bytes) / @page_size
|
@@ -174,16 +170,14 @@ class Care
|
|
174
170
|
# @param io[IO] the IO to read from
|
175
171
|
# @param page_i[Integer] which page (zero-based) to read
|
176
172
|
def read_page(io, page_i)
|
177
|
-
Measurometer.increment_counter('format_parser.parser.
|
173
|
+
Measurometer.increment_counter('format_parser.parser.care.page_reads_from_upsteam', 1)
|
178
174
|
|
179
175
|
io.seek(page_i * @page_size)
|
180
|
-
read_result = Measurometer.instrument('format_parser.
|
176
|
+
read_result = Measurometer.instrument('format_parser.care.read_page') { io.read(@page_size) }
|
181
177
|
if read_result.nil?
|
182
178
|
# If the read went past the end of the IO the read result will be nil,
|
183
179
|
# so we know our IO is exhausted here
|
184
|
-
if @lowest_known_empty_page.nil? || @lowest_known_empty_page > page_i
|
185
|
-
@lowest_known_empty_page = page_i
|
186
|
-
end
|
180
|
+
@lowest_known_empty_page = page_i if @lowest_known_empty_page.nil? || @lowest_known_empty_page > page_i
|
187
181
|
elsif read_result.bytesize < @page_size
|
188
182
|
# If we read less than we initially wanted we know there are no pages
|
189
183
|
# to read following this one, so we can also optimize
|
data/lib/format_parser.rb
CHANGED
@@ -20,6 +20,7 @@ module FormatParser
|
|
20
20
|
require_relative 'care'
|
21
21
|
require_relative 'active_storage/blob_analyzer'
|
22
22
|
require_relative 'text'
|
23
|
+
require_relative 'string'
|
23
24
|
|
24
25
|
# Define Measurometer in the internal namespace as well
|
25
26
|
# so that we stay compatible for the applications that use it
|
@@ -87,8 +88,8 @@ module FormatParser
|
|
87
88
|
# Parses the resource at the given `url` and returns the results as if it were any IO
|
88
89
|
# given to `.parse`. The accepted keyword arguments are the same as the ones for `parse`.
|
89
90
|
#
|
90
|
-
# @param url[String, URI] the HTTP(S) URL to request the object from using
|
91
|
-
# @param headers[Hash] (optional) the HTTP headers to request the object from
|
91
|
+
# @param url[String, URI] the HTTP(S) URL to request the object from using `Range:` requests
|
92
|
+
# @param headers[Hash] (optional) the HTTP headers to request the object from
|
92
93
|
# @param kwargs the keyword arguments to be delegated to `.parse`
|
93
94
|
# @see {.parse}
|
94
95
|
def self.parse_http(url, headers: {}, **kwargs)
|
@@ -177,9 +178,7 @@ module FormatParser
|
|
177
178
|
# Convert the results from a lazy enumerator to an Array.
|
178
179
|
results = results.to_a
|
179
180
|
|
180
|
-
if results.empty?
|
181
|
-
Measurometer.increment_counter('format_parser.unknown_files', 1)
|
182
|
-
end
|
181
|
+
Measurometer.increment_counter('format_parser.unknown_files', 1) if results.empty?
|
183
182
|
|
184
183
|
amount == 1 ? results.first : results
|
185
184
|
ensure
|
@@ -202,12 +201,12 @@ module FormatParser
|
|
202
201
|
end
|
203
202
|
|
204
203
|
def self.execute_parser_and_capture_expected_exceptions(parser, limited_io)
|
205
|
-
parser_name_for_instrumentation = parser.class.to_s.split('::').last
|
204
|
+
parser_name_for_instrumentation = parser.class.to_s.split('::').last.underscore
|
206
205
|
Measurometer.instrument('format_parser.parser.%s' % parser_name_for_instrumentation) do
|
207
206
|
parser.call(limited_io).tap do |result|
|
208
207
|
if result
|
209
|
-
Measurometer.increment_counter('format_parser.detected_natures
|
210
|
-
Measurometer.increment_counter('format_parser.detected_formats
|
208
|
+
Measurometer.increment_counter('format_parser.detected_natures', 1, nature: result.nature)
|
209
|
+
Measurometer.increment_counter('format_parser.detected_formats', 1, format: result.format)
|
211
210
|
end
|
212
211
|
end
|
213
212
|
end
|
@@ -252,9 +251,7 @@ module FormatParser
|
|
252
251
|
fitting_by_formats = assemble_parser_set[@parsers_per_format, desired_formats]
|
253
252
|
parsers = fitting_by_natures & fitting_by_formats
|
254
253
|
|
255
|
-
if parsers.empty?
|
256
|
-
raise ArgumentError, "No parsers provide both natures #{desired_natures.inspect} and formats #{desired_formats.inspect}"
|
257
|
-
end
|
254
|
+
raise ArgumentError, "No parsers provide both natures #{desired_natures.inspect} and formats #{desired_formats.inspect}" if parsers.empty?
|
258
255
|
|
259
256
|
# Order the parsers according to their priority value. The ones having a lower
|
260
257
|
# value will sort higher and will be applied sooner
|
data/lib/io_utils.rb
CHANGED
@@ -9,12 +9,8 @@ module FormatParser::IOUtils
|
|
9
9
|
raise ArgumentError, 'Unbounded reads are not supported' if n.nil?
|
10
10
|
buf = io.read(n)
|
11
11
|
|
12
|
-
unless buf
|
13
|
-
|
14
|
-
end
|
15
|
-
if buf.bytesize != n
|
16
|
-
raise InvalidRead, "We wanted to read #{n} bytes from the IO, but we got #{buf.bytesize} instead"
|
17
|
-
end
|
12
|
+
raise InvalidRead, "We wanted to read #{n} bytes from the IO, but the IO is at EOF" unless buf
|
13
|
+
raise InvalidRead, "We wanted to read #{n} bytes from the IO, but we got #{buf.bytesize} instead" if buf.bytesize != n
|
18
14
|
|
19
15
|
buf
|
20
16
|
end
|
@@ -33,23 +33,17 @@ class FormatParser::AdtsHeaderInfo
|
|
33
33
|
MPEG_VERSION_HASH = { 0 => 'MPEG-4', 1 => 'MPEG-2'}
|
34
34
|
|
35
35
|
def mpeg4_sampling_frequency
|
36
|
-
if !@mpeg4_sampling_frequency_index.nil? && MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH.key?(@mpeg4_sampling_frequency_index)
|
37
|
-
return MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH[@mpeg4_sampling_frequency_index]
|
38
|
-
end
|
36
|
+
return MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH[@mpeg4_sampling_frequency_index] if !@mpeg4_sampling_frequency_index.nil? && MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH.key?(@mpeg4_sampling_frequency_index)
|
39
37
|
nil
|
40
38
|
end
|
41
39
|
|
42
40
|
def profile_description
|
43
|
-
if !@profile.nil? && AAC_PROFILE_DESCRIPTION_HASH.key?(@profile)
|
44
|
-
return AAC_PROFILE_DESCRIPTION_HASH[@profile]
|
45
|
-
end
|
41
|
+
return AAC_PROFILE_DESCRIPTION_HASH[@profile] if !@profile.nil? && AAC_PROFILE_DESCRIPTION_HASH.key?(@profile)
|
46
42
|
nil
|
47
43
|
end
|
48
44
|
|
49
45
|
def mpeg_version_description
|
50
|
-
if !@mpeg_version.nil? && MPEG_VERSION_HASH.key?(@mpeg_version)
|
51
|
-
return MPEG_VERSION_HASH[@mpeg_version]
|
52
|
-
end
|
46
|
+
return MPEG_VERSION_HASH[@mpeg_version] if !@mpeg_version.nil? && MPEG_VERSION_HASH.key?(@mpeg_version)
|
53
47
|
nil
|
54
48
|
end
|
55
49
|
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require_relative 'exif_parser'
|
2
|
+
|
3
|
+
class FormatParser::ARWParser
|
4
|
+
include FormatParser::IOUtils
|
5
|
+
include FormatParser::EXIFParser
|
6
|
+
|
7
|
+
# Standard TIFF headers
|
8
|
+
MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
|
9
|
+
MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
|
10
|
+
HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
|
11
|
+
ARW_MIME_TYPE = 'image/x-sony-arw'
|
12
|
+
|
13
|
+
def likely_match?(filename)
|
14
|
+
filename =~ /\.arw$/i
|
15
|
+
end
|
16
|
+
|
17
|
+
def call(io)
|
18
|
+
io = FormatParser::IOConstraint.new(io)
|
19
|
+
|
20
|
+
return unless HEADER_BYTES.include?(safe_read(io, 4))
|
21
|
+
exif_data = exif_from_tiff_io(io)
|
22
|
+
|
23
|
+
return unless valid?(exif_data)
|
24
|
+
|
25
|
+
w = exif_data.width || exif_data.pixel_x_dimension
|
26
|
+
h = exif_data.height || exif_data.pixel_y_dimension
|
27
|
+
|
28
|
+
FormatParser::Image.new(
|
29
|
+
format: :arw,
|
30
|
+
width_px: w,
|
31
|
+
height_px: h,
|
32
|
+
display_width_px: exif_data.rotated? ? h : w,
|
33
|
+
display_height_px: exif_data.rotated? ? w : h,
|
34
|
+
orientation: exif_data.orientation_sym,
|
35
|
+
intrinsics: { exif: exif_data },
|
36
|
+
content_type: ARW_MIME_TYPE,
|
37
|
+
)
|
38
|
+
rescue EXIFR::MalformedTIFF
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
def valid?(exif_data)
|
43
|
+
# taken directly from tiff_parser.rb
|
44
|
+
# Similar to how exiftool determines the image type as ARW, we are implementing a check here
|
45
|
+
# https://github.com/exiftool/exiftool/blob/e969456372fbaf4b980fea8bb094d71033ac8bf7/lib/Image/ExifTool/Exif.pm#L929
|
46
|
+
exif_data.compression == 6 && exif_data.new_subfile_type == 1 && exif_data.make&.start_with?('SONY')
|
47
|
+
end
|
48
|
+
|
49
|
+
FormatParser.register_parser new, natures: :image, formats: :arw
|
50
|
+
end
|
data/lib/parsers/exif_parser.rb
CHANGED
@@ -125,9 +125,7 @@ module FormatParser::EXIFParser
|
|
125
125
|
# those and return the _last_ non-0 orientation, or 0 otherwise
|
126
126
|
@multiple_exif_results.reverse_each do |exif_tag_frame|
|
127
127
|
orientation_value = exif_tag_frame.orientation
|
128
|
-
if !orientation_value.nil? && orientation_value != 0
|
129
|
-
return orientation_value
|
130
|
-
end
|
128
|
+
return orientation_value if !orientation_value.nil? && orientation_value != 0
|
131
129
|
end
|
132
130
|
0 # If none were found - the orientation is unknown
|
133
131
|
end
|
@@ -175,7 +173,7 @@ module FormatParser::EXIFParser
|
|
175
173
|
EXIFR.logger = Logger.new(nil)
|
176
174
|
|
177
175
|
def exif_from_tiff_io(constrained_io, should_include_sub_ifds = false)
|
178
|
-
Measurometer.instrument('format_parser.
|
176
|
+
Measurometer.instrument('format_parser.exif_parser.exif_from_tiff_io') do
|
179
177
|
extended_io = IOExt.new(constrained_io)
|
180
178
|
exif_raw_data = EXIFR::TIFF.new(extended_io)
|
181
179
|
|
data/lib/parsers/fdx_parser.rb
CHANGED
@@ -24,9 +24,9 @@ class FormatParser::FDXParser
|
|
24
24
|
def check_for_document_type(file_and_document_type)
|
25
25
|
sanitized_data = file_and_document_type.downcase
|
26
26
|
if sanitized_data.include?('finaldraft') && sanitized_data.include?('script')
|
27
|
-
|
27
|
+
[:fdx, :script]
|
28
28
|
else
|
29
|
-
|
29
|
+
nil
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
data/lib/parsers/flac_parser.rb
CHANGED
@@ -20,15 +20,11 @@ class FormatParser::FLACParser
|
|
20
20
|
|
21
21
|
minimum_block_size = bytestring_to_int(safe_read(io, 2))
|
22
22
|
|
23
|
-
if minimum_block_size < 16
|
24
|
-
raise MalformedFile, 'FLAC file minimum block size must be larger than 16'
|
25
|
-
end
|
23
|
+
raise MalformedFile, 'FLAC file minimum block size must be larger than 16' if minimum_block_size < 16
|
26
24
|
|
27
25
|
maximum_block_size = bytestring_to_int(safe_read(io, 2))
|
28
26
|
|
29
|
-
if maximum_block_size < minimum_block_size
|
30
|
-
raise MalformedFile, 'FLAC file maximum block size must be equal to or larger than minimum block size'
|
31
|
-
end
|
27
|
+
raise MalformedFile, 'FLAC file maximum block size must be equal to or larger than minimum block size' if maximum_block_size < minimum_block_size
|
32
28
|
|
33
29
|
minimum_frame_size = bytestring_to_int(safe_read(io, 3))
|
34
30
|
maximum_frame_size = bytestring_to_int(safe_read(io, 3))
|
data/lib/parsers/jpeg_parser.rb
CHANGED
@@ -69,7 +69,7 @@ class FormatParser::JPEGParser
|
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
72
|
-
Measurometer.add_distribution_value('format_parser.
|
72
|
+
Measurometer.add_distribution_value('format_parser.jpeg_parser.bytes_read_until_capture', @buf.pos)
|
73
73
|
|
74
74
|
# A single file might contain multiple EXIF data frames. In a JPEG this would
|
75
75
|
# manifest as multiple APP1 markers. The way different programs handle these
|
@@ -156,7 +156,7 @@ class FormatParser::JPEGParser
|
|
156
156
|
# Use StringIO.new instead of #write - https://github.com/aws/aws-sdk-ruby/issues/785#issuecomment-95456838
|
157
157
|
exif_buf = StringIO.new(safe_read(@buf, app1_frame_content_length - EXIF_MAGIC_STRING.bytesize))
|
158
158
|
|
159
|
-
Measurometer.add_distribution_value('format_parser.
|
159
|
+
Measurometer.add_distribution_value('format_parser.jpeg_parser.bytes_sent_to_exif_parser', exif_buf.size)
|
160
160
|
|
161
161
|
@exif_data_frames << exif_from_tiff_io(exif_buf)
|
162
162
|
rescue EXIFR::MalformedTIFF
|
data/lib/parsers/moov_parser.rb
CHANGED
@@ -37,7 +37,7 @@ class FormatParser::MOOVParser
|
|
37
37
|
# size that gets parsed just before.
|
38
38
|
max_read_offset = 0xFFFFFFFF
|
39
39
|
decoder = Decoder.new
|
40
|
-
atom_tree = Measurometer.instrument('format_parser.
|
40
|
+
atom_tree = Measurometer.instrument('format_parser.decoder.extract_atom_stream') do
|
41
41
|
decoder.extract_atom_stream(io, max_read_offset)
|
42
42
|
end
|
43
43
|
|
@@ -93,12 +93,10 @@ class FormatParser::MOOVParser
|
|
93
93
|
def parse_dimensions(decoder, atom_tree)
|
94
94
|
video_trak_atom = decoder.find_video_trak_atom(atom_tree)
|
95
95
|
|
96
|
-
tkhd =
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
|
101
|
-
end
|
96
|
+
tkhd = if video_trak_atom
|
97
|
+
decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'tkhd')
|
98
|
+
else
|
99
|
+
decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
|
102
100
|
end
|
103
101
|
|
104
102
|
if tkhd
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -179,13 +179,9 @@ class FormatParser::MP3Parser
|
|
179
179
|
frame_data_str = io.read(frame_detail.frame_length)
|
180
180
|
io.seek(io.pos - frame_detail.frame_length)
|
181
181
|
xing_header = attempt_xing_header(frame_data_str)
|
182
|
-
if xing_header_usable_for_duration?(xing_header)
|
183
|
-
return [xing_header, mpeg_frames]
|
184
|
-
end
|
185
|
-
end
|
186
|
-
if frame_detail.frame_length > 1 # jump over current frame body
|
187
|
-
io.seek(io.pos + frame_detail.frame_length - bytes_to_read)
|
182
|
+
return [xing_header, mpeg_frames] if xing_header_usable_for_duration?(xing_header)
|
188
183
|
end
|
184
|
+
io.seek(io.pos + frame_detail.frame_length - bytes_to_read) if frame_detail.frame_length > 1 # jump over current frame body
|
189
185
|
end
|
190
186
|
[nil, mpeg_frames]
|
191
187
|
rescue InvalidDeepFetch # A frame was invalid - bail out since it's unlikely we can recover
|
data/lib/parsers/mpeg_parser.rb
CHANGED
@@ -44,9 +44,7 @@ class FormatParser::MPEGParser
|
|
44
44
|
io.seek(pos + 1)
|
45
45
|
horizontal_size, vertical_size = parse_image_size(io)
|
46
46
|
ratio_code, rate_code = parse_rate_information(io)
|
47
|
-
if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
|
48
|
-
return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
|
49
|
-
end
|
47
|
+
return file_info(horizontal_size, vertical_size, ratio_code, rate_code) if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
|
50
48
|
end
|
51
49
|
nil # otherwise the return value of Integer#times will be returned
|
52
50
|
rescue FormatParser::IOUtils::InvalidRead
|
data/lib/parsers/tiff_parser.rb
CHANGED
@@ -6,7 +6,6 @@ class FormatParser::TIFFParser
|
|
6
6
|
MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
|
7
7
|
HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
|
8
8
|
TIFF_MIME_TYPE = 'image/tiff'
|
9
|
-
ARW_MIME_TYPE = 'image/x-sony-arw'
|
10
9
|
|
11
10
|
def likely_match?(filename)
|
12
11
|
filename =~ /\.tiff?$/i
|
@@ -28,20 +27,20 @@ class FormatParser::TIFFParser
|
|
28
27
|
exif_data = exif_from_tiff_io(io)
|
29
28
|
return unless exif_data
|
30
29
|
|
30
|
+
return if arw?(exif_data)
|
31
|
+
|
31
32
|
w = exif_data.width || exif_data.pixel_x_dimension
|
32
33
|
h = exif_data.height || exif_data.pixel_y_dimension
|
33
34
|
|
34
|
-
format = arw?(exif_data) ? :arw : :tif
|
35
|
-
mime_type = arw?(exif_data) ? ARW_MIME_TYPE : TIFF_MIME_TYPE
|
36
35
|
FormatParser::Image.new(
|
37
|
-
format:
|
36
|
+
format: :tif,
|
38
37
|
width_px: w,
|
39
38
|
height_px: h,
|
40
39
|
display_width_px: exif_data.rotated? ? h : w,
|
41
40
|
display_height_px: exif_data.rotated? ? w : h,
|
42
41
|
orientation: exif_data.orientation_sym,
|
43
42
|
intrinsics: {exif: exif_data},
|
44
|
-
content_type:
|
43
|
+
content_type: TIFF_MIME_TYPE,
|
45
44
|
)
|
46
45
|
rescue EXIFR::MalformedTIFF
|
47
46
|
nil
|
@@ -55,7 +54,7 @@ class FormatParser::TIFFParser
|
|
55
54
|
# Similar to how exiftool determines the image type as ARW, we are implementing a check here
|
56
55
|
# https://github.com/exiftool/exiftool/blob/e969456372fbaf4b980fea8bb094d71033ac8bf7/lib/Image/ExifTool/Exif.pm#L929
|
57
56
|
def arw?(exif_data)
|
58
|
-
exif_data.compression == 6 && exif_data.new_subfile_type == 1 && exif_data.make
|
57
|
+
exif_data.compression == 6 && exif_data.new_subfile_type == 1 && exif_data.make&.start_with?('SONY')
|
59
58
|
end
|
60
59
|
|
61
60
|
FormatParser.register_parser new, natures: :image, formats: :tif
|
data/lib/parsers/wav_parser.rb
CHANGED
@@ -34,9 +34,7 @@ class FormatParser::WAVParser
|
|
34
34
|
case chunk_type
|
35
35
|
when 'fmt ' # watch out: the chunk ID of the format chunk ends with a space
|
36
36
|
fmt_data = unpack_fmt_chunk(io, chunk_size)
|
37
|
-
if fmt_data[:audio_format] != 1 and fact_processed
|
38
|
-
return process_non_pcm(fmt_data, total_sample_frames)
|
39
|
-
end
|
37
|
+
return process_non_pcm(fmt_data, total_sample_frames) if fmt_data[:audio_format] != 1 and fact_processed
|
40
38
|
fmt_processed = true
|
41
39
|
when 'data'
|
42
40
|
return unless fmt_processed # the 'data' chunk cannot preceed the 'fmt ' chunk
|
@@ -45,11 +43,10 @@ class FormatParser::WAVParser
|
|
45
43
|
when 'fact'
|
46
44
|
total_sample_frames = safe_read(io, 4).unpack('l').first
|
47
45
|
safe_skip(io, chunk_size - 4)
|
48
|
-
if fmt_processed and fmt_data[:audio_format] != 1
|
49
|
-
return process_non_pcm(fmt_data, total_sample_frames)
|
50
|
-
end
|
46
|
+
return process_non_pcm(fmt_data, total_sample_frames) if fmt_processed and fmt_data[:audio_format] != 1
|
51
47
|
fact_processed = true
|
52
|
-
else
|
48
|
+
else
|
49
|
+
# Skip this chunk until a known chunk is encountered
|
53
50
|
safe_skip(io, chunk_size)
|
54
51
|
end
|
55
52
|
end
|
@@ -70,11 +67,11 @@ class FormatParser::WAVParser
|
|
70
67
|
safe_skip(io, chunk_size - 16) # skip the extra fields
|
71
68
|
|
72
69
|
{
|
73
|
-
audio_format:
|
74
|
-
channels:
|
75
|
-
sample_rate:
|
76
|
-
byte_rate:
|
77
|
-
block_align:
|
70
|
+
audio_format: fmt_info[0],
|
71
|
+
channels: fmt_info[1],
|
72
|
+
sample_rate: fmt_info[2],
|
73
|
+
byte_rate: fmt_info[3],
|
74
|
+
block_align: fmt_info[4],
|
78
75
|
bits_per_sample: fmt_info[5],
|
79
76
|
}
|
80
77
|
end
|