format_parser 1.4.2 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a9a94233949cc72d18b433cf1ddcba0e479e8b93aa1ff2e48bda6f6d86f667b
4
- data.tar.gz: 4e357bc46207e95cad52d21b2aaa1781e9c231bab02e235be29663db9722f5d9
3
+ metadata.gz: 54b56b24c97b2532bc5d7f8521aa38714111a05f2bedd5e15b7391e1005d9795
4
+ data.tar.gz: e33a026ab2c611a86d6ba7e35fd413455f7b99c423651fa09dcebf08ad543e0a
5
5
  SHA512:
6
- metadata.gz: dcf8c8aeefc6166f3645dae461aadbcc2b36e96cb7a75162586fc009d562f6f978767ff877b27d0c192b5ca3107011a1bfdda842e730e486ced02e4191b53f59
7
- data.tar.gz: fbc2caafb269f5e9c249e6ffe62ea8141477589256fab1bea5d058877d571725b1e619619ef9fcd33005d42759697ae7c7575d7a79217bdedc7e96ab02ce3c1b
6
+ metadata.gz: e27ec936c4b43cc6c82f896846ef3e8044a0639b18c1c2accf7bbdeffd0fe73d9c03fb2f2e9b4a477ef1f98a1043047042b8521f186e3406ca1c482be9a66abd
7
+ data.tar.gz: a7927cfb5fbf0a41980465186809fb830b322e83db56bab15b5a2ad897031f7269eeae85d7e61f489a5cc2e6737348309e4f93c5dee27238d06a3ed0979f0d43
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 1.5.0
2
+ * Add support for `NEF` files.
3
+
1
4
  ## 1.4.2
2
5
  * Fix `MP3Parser` taking precedence when parsing `WEBP` files.
3
6
 
data/README.md CHANGED
@@ -14,6 +14,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
14
14
 
15
15
  * TIFF
16
16
  * CR2
17
+ * NEF
17
18
  * PSD
18
19
  * PNG
19
20
  * MP3
@@ -148,6 +149,14 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
148
149
  ### MP3
149
150
  - Cassy.mp3 has been produced by WeTransfer and may be used with the library for the purposes of testing
150
151
 
152
+ ### AAC
153
+ - Originals music files: “Furious Freak” and “Galway”, Kevin MacLeod (incompetech.com), Licensed under Creative Commons: By Attribution 3.0, http://creativecommons.org/licenses/by/3.0/
154
+ - The AAC samples were converted from 'wav' format and made available [here](https://espressif-docs.readthedocs-hosted.com/projects/esp-adf/en/latest/design-guide/audio-samples.html) by Espressif Systems, as part of their audio development framework (under the ESPRESSIF MIT License).
155
+ - Files:
156
+ - ff-16b-2c-44100hz.aac
157
+ - ff-16b-1c-44100hz.aac
158
+ - gs-16b-2c-44100hz.aac
159
+ - gs-16b-1c-44100hz.aac
151
160
  ### FDX
152
161
  - fixture.fdx was created by one of the project maintainers and is MIT licensed
153
162
 
@@ -161,6 +170,12 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
161
170
  ### CR2
162
171
  - CR2 examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
163
172
 
173
+ ### NEF
174
+ - NEF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
175
+
176
+ ### ERF
177
+ - ERF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
178
+
164
179
  ### FLAC
165
180
  - atc_fixture_vbr.flac is a converted version of the MP3 with the same name
166
181
  - c_11k16btipcm.flac is a converted version of the WAV with the same name
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '1.4.2'
2
+ VERSION = '1.6.0'
3
3
  end
@@ -0,0 +1,138 @@
1
+ # This is a representation of the relevant information found in an Audio Data Transport Stream (ADTS) file header.
2
+ class FormatParser::AdtsHeaderInfo
3
+ attr_accessor :mpeg_version, :layer, :protection_absence, :profile, :mpeg4_sampling_frequency_index,
4
+ :mpeg4_channel_config, :originality, :home_usage, :frame_length, :buffer_fullness,
5
+ :aac_frames_per_adts_frame
6
+
7
+ # An ADTS header has the following format, when represented in bits:
8
+ # AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
9
+ # The chunks represented by these letters have specific meanings, as described here:
10
+ # https://wiki.multimedia.cx/index.php/ADTS
11
+
12
+ AAC_ADTS_HEADER_BITS_CHUNK_SIZES = [
13
+ ['A', 12], ['B', 1], ['C', 2], ['D', 1],
14
+ ['E', 2], ['F', 4], ['G', 1], ['H', 3],
15
+ ['I', 1], ['J', 1], ['K', 1], ['L', 1],
16
+ ['M', 13], ['O', 11], ['P', 2], ['Q', 16]
17
+ ]
18
+ MPEG4_AUDIO_OBJECT_TYPE_RANGE = 0..45
19
+ MPEG4_AUDIO_SAMPLING_FREQUENCY_RANGE = 0..14
20
+ MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH = {
21
+ 0 => 96000, 1 => 88200, 2 => 64000,
22
+ 3 => 48000, 4 => 44100, 5 => 32000,
23
+ 6 => 24000, 7 => 22050, 8 => 16000,
24
+ 9 => 12000, 10 => 11025, 11 => 8000,
25
+ 12 => 7350, 13 => 'Reserved', 14 => 'Reserved'
26
+ }
27
+ AAC_PROFILE_DESCRIPTION_HASH = {
28
+ 0 => 'AAC_MAIN',
29
+ 1 => 'AAC_LC (Low Complexity)',
30
+ 2 => 'AAC_SSR (Scaleable Sampling Rate)',
31
+ 3 => 'AAC_LTP (Long Term Prediction)'
32
+ }
33
+ MPEG_VERSION_HASH = { 0 => 'MPEG-4', 1 => 'MPEG-2'}
34
+
35
+ def mpeg4_sampling_frequency
36
+ if !@mpeg4_sampling_frequency_index.nil? && MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH.key?(@mpeg4_sampling_frequency_index)
37
+ return MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH[@mpeg4_sampling_frequency_index]
38
+ end
39
+ nil
40
+ end
41
+
42
+ def profile_description
43
+ if !@profile.nil? && AAC_PROFILE_DESCRIPTION_HASH.key?(@profile)
44
+ return AAC_PROFILE_DESCRIPTION_HASH[@profile]
45
+ end
46
+ nil
47
+ end
48
+
49
+ def mpeg_version_description
50
+ if !@mpeg_version.nil? && MPEG_VERSION_HASH.key?(@mpeg_version)
51
+ return MPEG_VERSION_HASH[@mpeg_version]
52
+ end
53
+ nil
54
+ end
55
+
56
+ def number_of_audio_channels
57
+ case @mpeg4_channel_config
58
+ when 1..6
59
+ @mpeg4_channel_config
60
+ when 7
61
+ 8
62
+ else
63
+ nil
64
+ end
65
+ end
66
+
67
+ def fixed_bitrate?
68
+ # A buffer fullness value of 0x7FF (decimal: 2047) denotes a variable bitrate, for which buffer fullness isn't applicable
69
+ @buffer_fullness != 2047
70
+ end
71
+
72
+ # The frame rate - i.e. frames per second
73
+ def frame_rate
74
+ # An AAC sample uncompresses to 1024 PCM samples
75
+ mpeg4_sampling_frequency.to_f / 1024
76
+ end
77
+
78
+ # If the given bit array is a valid ADTS header, this method will parse it and return an instance of AdtsHeaderInfo.
79
+ # Will return nil if the header does not match the ADTS specifications.
80
+ def self.parse_adts_header(header_bits)
81
+ result = FormatParser::AdtsHeaderInfo.new
82
+
83
+ AAC_ADTS_HEADER_BITS_CHUNK_SIZES.each do |letter_size|
84
+ letter = letter_size[0]
85
+ chunk_size = letter_size[1]
86
+ chunk = header_bits.shift(chunk_size)
87
+ decimal_number = chunk.join.to_i(2)
88
+
89
+ # Skipping data represented by the letters G, K, L, Q, as we are not interested in those values.
90
+ case letter
91
+ when 'A'
92
+ # Syncword, all bits must be set to 1
93
+ return nil unless chunk.all? { |bit| bit == '1' }
94
+ when 'B'
95
+ # MPEG Version, set to 0 for MPEG-4 and 1 for MPEG-2
96
+ result.mpeg_version = decimal_number
97
+ when 'C'
98
+ # Layer, always set to 0
99
+ return nil unless decimal_number == 0
100
+ when 'D'
101
+ # Protection absence, set to 1 if there is no CRC and 0 if there is CRC
102
+ result.protection_absence = decimal_number == 1
103
+ when 'E'
104
+ # AAC Profile
105
+ return nil unless MPEG4_AUDIO_OBJECT_TYPE_RANGE.include?(decimal_number + 1)
106
+ result.profile = decimal_number
107
+ when 'F'
108
+ # MPEG-4 Sampling Frequency Index (15 is forbidden)
109
+ return nil unless MPEG4_AUDIO_SAMPLING_FREQUENCY_RANGE.include?(decimal_number)
110
+ result.mpeg4_sampling_frequency_index = decimal_number
111
+ when 'H'
112
+ # MPEG-4 Channel Configuration (in the case of 0, the channel configuration is sent via an in-band PCE (Program Config Element))
113
+ result.mpeg4_channel_config = decimal_number
114
+ when 'I'
115
+ # Originality, set to 1 to signal originality of the audio and 0 otherwise
116
+ result.originality = decimal_number == 1
117
+ when 'J'
118
+ # Home, set to 1 to signal home usage of the audio and 0 otherwise
119
+ result.home_usage = decimal_number == 1
120
+ when 'M'
121
+ # Frame length, length of the ADTS frame including headers and CRC check (protectionabsent == 1? 7: 9)
122
+ # We expect this to be higher than the header length, but we won't impose any other restrictions
123
+ header_length = result.protection_absence ? 7 : 9
124
+ return nil unless decimal_number > header_length
125
+ result.frame_length = decimal_number
126
+ when 'O'
127
+ # Buffer fullness, states the bit-reservoir per frame.
128
+ # It is merely an informative field with no clear use case defined in the specification.
129
+ result.buffer_fullness = decimal_number
130
+ when 'P'
131
+ # Number of AAC frames (RDBs (Raw Data Blocks)) in ADTS frame minus 1. For maximum compatibility always use one AAC frame per ADTS frame.
132
+ result.aac_frames_per_adts_frame = decimal_number + 1
133
+ end
134
+ end
135
+
136
+ result
137
+ end
138
+ end
@@ -0,0 +1,35 @@
1
+ require_relative 'aac_parser/adts_header_info'
2
+
3
+ class FormatParser::AACParser
4
+ include FormatParser::IOUtils
5
+
6
+ AAC_MIME_TYPE = 'audio/aac'
7
+
8
+ def likely_match?(filename)
9
+ filename =~ /\.aac$/i
10
+ end
11
+
12
+ def call(raw_io)
13
+ io = FormatParser::IOConstraint.new(raw_io)
14
+ header = safe_read(io, 9)
15
+ header_bits = header.unpack('B*').first.split('')
16
+
17
+ header_info = FormatParser::AdtsHeaderInfo.parse_adts_header(header_bits)
18
+ return if header_info.nil?
19
+
20
+ FormatParser::Audio.new(
21
+ title: nil,
22
+ album: nil,
23
+ artist: nil,
24
+ format: :aac,
25
+ num_audio_channels: header_info.number_of_audio_channels,
26
+ audio_sample_rate_hz: header_info.mpeg4_sampling_frequency,
27
+ media_duration_seconds: nil,
28
+ media_duration_frames: nil,
29
+ intrinsics: nil,
30
+ content_type: AAC_MIME_TYPE
31
+ )
32
+ end
33
+
34
+ FormatParser.register_parser new, natures: :audio, formats: :aac
35
+ end
@@ -41,6 +41,13 @@ module FormatParser::EXIFParser
41
41
  end
42
42
 
43
43
  class EXIFResult < SimpleDelegator
44
+ attr_reader :sub_ifds_data
45
+
46
+ def initialize(exif_raw_data, sub_ifds_data = {})
47
+ super(exif_raw_data)
48
+ @sub_ifds_data = sub_ifds_data
49
+ end
50
+
44
51
  def rotated?
45
52
  orientation.to_i > 4
46
53
  end
@@ -167,10 +174,38 @@ module FormatParser::EXIFParser
167
174
  # Squash exifr's invalid date warning since we do not use that data.
168
175
  EXIFR.logger = Logger.new(nil)
169
176
 
170
- def exif_from_tiff_io(constrained_io)
177
+ def exif_from_tiff_io(constrained_io, should_include_sub_ifds = false)
171
178
  Measurometer.instrument('format_parser.EXIFParser.exif_from_tiff_io') do
172
- raw_exif_data = EXIFR::TIFF.new(IOExt.new(constrained_io))
173
- raw_exif_data ? EXIFResult.new(raw_exif_data) : nil
179
+ extended_io = IOExt.new(constrained_io)
180
+ exif_raw_data = EXIFR::TIFF.new(extended_io)
181
+
182
+ return unless exif_raw_data
183
+
184
+ sub_ifds_data = {}
185
+ if should_include_sub_ifds
186
+ sub_ifds_offsets = exif_raw_data.flat_map(&:sub_ifds).compact
187
+ sub_ifds_data = load_sub_ifds(extended_io, sub_ifds_offsets)
188
+ end
189
+
190
+ EXIFResult.new(exif_raw_data, sub_ifds_data)
191
+ end
192
+ end
193
+
194
+ private
195
+
196
+ # Reads exif data from subIFDs. This is important for NEF files.
197
+ def load_sub_ifds(extended_io, sub_ifds_offsets)
198
+ # Returning an hash of subIFDs using offsets as keys
199
+ # {
200
+ # 123 => { subIFD data...}
201
+ # 456 => { another subIFD data...}
202
+ # }
203
+ return {} if sub_ifds_offsets.empty?
204
+
205
+ EXIFR::TIFF::Data.open(extended_io) do |data|
206
+ sub_ifds_offsets.map do |sub_ifd_offset|
207
+ [sub_ifd_offset, EXIFR::TIFF::IFD.new(data, sub_ifd_offset)]
208
+ end.to_h
174
209
  end
175
210
  end
176
211
 
@@ -0,0 +1,69 @@
1
+ class FormatParser::NEFParser
2
+ include FormatParser::IOUtils
3
+ include FormatParser::EXIFParser
4
+
5
+ MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
6
+ MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
7
+ HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
8
+ NEF_MIME_TYPE = 'image/x-nikon-nef'
9
+
10
+ SUBFILE_TYPE_FULL_RESOLUTION = 0
11
+ SUBFILE_TYPE_REDUCED_RESOLUTION = 1
12
+
13
+ SHOULD_PARSE_SUB_IFDS = true
14
+
15
+ def likely_match?(filename)
16
+ filename =~ /\.nef$/i
17
+ end
18
+
19
+ def call(io)
20
+ io = FormatParser::IOConstraint.new(io)
21
+
22
+ return unless HEADER_BYTES.include?(safe_read(io, 4))
23
+
24
+ # Because of how NEF files organize their IFDs and subIFDs, we need to dive into the subIFDs
25
+ # to get the actual image dimensions instead of the preview's
26
+ exif_data = exif_from_tiff_io(io, SHOULD_PARSE_SUB_IFDS)
27
+
28
+ return unless valid?(exif_data)
29
+
30
+ full_resolution_data = get_full_resolution_ifd(exif_data)
31
+
32
+ w = full_resolution_data.image_width || exif_data.width || exif_data.pixel_x_dimension
33
+ h = full_resolution_data.image_length || exif_data.height || exif_data.pixel_y_dimension
34
+
35
+ FormatParser::Image.new(
36
+ format: :nef,
37
+ width_px: w,
38
+ height_px: h,
39
+ display_width_px: exif_data.rotated? ? h : w,
40
+ display_height_px: exif_data.rotated? ? w : h,
41
+ orientation: exif_data.orientation_sym,
42
+ intrinsics: { exif: exif_data },
43
+ content_type: NEF_MIME_TYPE,
44
+ )
45
+ rescue EXIFR::MalformedTIFF
46
+ nil
47
+ end
48
+
49
+ def valid?(exif_data)
50
+ # NEF files should hold subIFDs and have "NIKON" or "NIKON CORPORATION" as maker
51
+ has_sub_ifds_data = !exif_data&.sub_ifds_data.keys.empty?
52
+ has_sub_ifds_data && exif_data.make&.start_with?('NIKON')
53
+ end
54
+
55
+ # Investigates data from all subIFDs and find the one holding the full-resolution image
56
+ def get_full_resolution_ifd(exif_data)
57
+ # Most of the time, NEF files have 2 subIFDs:
58
+ # First one: Thumbnail (Reduced resolution)
59
+ # Second one: Full resolution
60
+ # While this is true in most situations, there are exceptions,
61
+ # so we can't rely in this order alone.
62
+
63
+ exif_data.sub_ifds_data.each do |_ifd_offset, ifd_data|
64
+ return ifd_data if ifd_data.new_subfile_type == SUBFILE_TYPE_FULL_RESOLUTION
65
+ end
66
+ end
67
+
68
+ FormatParser.register_parser new, natures: :image, formats: :nef, priority: 4
69
+ end
@@ -0,0 +1,87 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::AACParser do
4
+ it 'should match filenames with valid AAC extensions' do
5
+ filenames = ['audiofile', 'audio_file', 'audio-file', 'audio file', 'audio.file']
6
+ extensions = ['.aac', '.AAC', '.Aac', '.AAc', '.aAc', '.aAC', '.aaC']
7
+ filenames.each do |filename|
8
+ extensions.each do |extension|
9
+ expect(subject.likely_match?(filename + extension)).to be_truthy
10
+ end
11
+ end
12
+ end
13
+
14
+ it 'should not match filenames with invalid AAC extensions' do
15
+ extensions = ['.aa', '.ac', '.acc', '.mp3', '.ogg', '.wav', '.flac', '.m4a', '.m4b', '.m4p', '.m4r', '.3gp']
16
+ extensions.each do |extension|
17
+ expect(subject.likely_match?('audiofile' + extension)).to be_falsey
18
+ end
19
+ end
20
+
21
+ it 'should parse a short sample, single channel audio, 16 kb/s, 44100 HZ' do
22
+ file_path = fixtures_dir + '/AAC/gs-16b-1c-44100hz.aac'
23
+ parsed = subject.call(File.open(file_path, 'rb'))
24
+
25
+ expect(parsed).not_to be_nil
26
+
27
+ expect(parsed.nature).to eq(:audio)
28
+ expect(parsed.format).to eq(:aac)
29
+ expect(parsed.num_audio_channels).to eq(1)
30
+ expect(parsed.audio_sample_rate_hz).to eq(44100)
31
+ expect(parsed.content_type).to eq('audio/aac')
32
+ end
33
+
34
+ it 'should parse a short sample, two channel audio, 16 kb/s, 44100 HZ' do
35
+ file_path = fixtures_dir + '/AAC/gs-16b-2c-44100hz.aac'
36
+ parsed = subject.call(File.open(file_path, 'rb'))
37
+
38
+ expect(parsed).not_to be_nil
39
+
40
+ expect(parsed.nature).to eq(:audio)
41
+ expect(parsed.format).to eq(:aac)
42
+ expect(parsed.num_audio_channels).to eq(2)
43
+ expect(parsed.audio_sample_rate_hz).to eq(44100)
44
+ expect(parsed.content_type).to eq('audio/aac')
45
+ end
46
+
47
+ it 'should parse a long sample, single channel audio, 16 kb/s, 44100 HZ' do
48
+ file_path = fixtures_dir + '/AAC/ff-16b-1c-44100hz.aac'
49
+ parsed = subject.call(File.open(file_path, 'rb'))
50
+
51
+ expect(parsed).not_to be_nil
52
+
53
+ expect(parsed.nature).to eq(:audio)
54
+ expect(parsed.format).to eq(:aac)
55
+ expect(parsed.num_audio_channels).to eq(1)
56
+ expect(parsed.audio_sample_rate_hz).to eq(44100)
57
+ expect(parsed.content_type).to eq('audio/aac')
58
+ end
59
+
60
+ it 'should parse a long sample, two channel audio, 16 kb/s, 44100 HZ' do
61
+ file_path = fixtures_dir + '/AAC/ff-16b-2c-44100hz.aac'
62
+ parsed = subject.call(File.open(file_path, 'rb'))
63
+
64
+ expect(parsed).not_to be_nil
65
+
66
+ expect(parsed.nature).to eq(:audio)
67
+ expect(parsed.format).to eq(:aac)
68
+ expect(parsed.num_audio_channels).to eq(2)
69
+ expect(parsed.audio_sample_rate_hz).to eq(44100)
70
+ expect(parsed.content_type).to eq('audio/aac')
71
+ end
72
+
73
+ shared_examples 'invalid filetype' do |filetype, fixture_path|
74
+ it "should fail to parse #{filetype}" do
75
+ file_path = fixtures_dir + fixture_path
76
+ parsed = subject.call(File.open(file_path, 'rb'))
77
+ expect(parsed).to be_nil
78
+ end
79
+ end
80
+
81
+ include_examples 'invalid filetype', 'AIFF', '/AIFF/fixture.aiff'
82
+ include_examples 'invalid filetype', 'FLAC', '/FLAC/atc_fixture_vbr.flac'
83
+ include_examples 'invalid filetype', 'MP3', '/MP3/Cassy.mp3'
84
+ include_examples 'invalid filetype', 'MPG', '/MPG/video1.mpg'
85
+ include_examples 'invalid filetype', 'OGG', '/Ogg/hi.ogg'
86
+ include_examples 'invalid filetype', 'WAV', '/WAV/c_8kmp316.wav'
87
+ end
@@ -0,0 +1,38 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::AdtsHeaderInfo do
4
+ shared_examples 'parsed header' do |header_bits, expected_mpeg_version_description, expected_protection_absence, expected_profile_description, expected_mpeg4_sampling_frequency, expected_mpeg4_channel_config, expected_number_of_audio_channels, expected_originality, expected_home_usage, expected_frame_length, expected_aac_frames_per_adts_frame, expected_has_fixed_bitrate|
5
+ it "extracts correct values for header #{header_bits}" do
6
+ result = FormatParser::AdtsHeaderInfo.parse_adts_header(header_bits.split(''))
7
+ expect(result).not_to be_nil
8
+ expect(result.mpeg_version_description).to eq(expected_mpeg_version_description)
9
+ expect(result.protection_absence).to eq(expected_protection_absence)
10
+ expect(result.profile_description).to eq(expected_profile_description)
11
+ expect(result.mpeg4_sampling_frequency).to eq(expected_mpeg4_sampling_frequency)
12
+ expect(result.mpeg4_channel_config).to eq(expected_mpeg4_channel_config)
13
+ expect(result.number_of_audio_channels).to eq(expected_number_of_audio_channels)
14
+ expect(result.originality).to eq(expected_originality)
15
+ expect(result.home_usage).to eq(expected_home_usage)
16
+ expect(result.frame_length).to eq(expected_frame_length)
17
+ expect(result.aac_frames_per_adts_frame).to eq(expected_aac_frames_per_adts_frame)
18
+ expect(result.fixed_bitrate?).to eq(expected_has_fixed_bitrate)
19
+ end
20
+ end
21
+
22
+ shared_examples 'invalid header' do |failure_reason, header_bits|
23
+ it "fails on #{failure_reason} for header #{header_bits}" do
24
+ result = FormatParser::AdtsHeaderInfo.parse_adts_header(header_bits.split(''))
25
+ expect(result).to be_nil
26
+ end
27
+ end
28
+
29
+ # These headers have been validated here: https://www.p23.nl/projects/aac-header/
30
+ include_examples 'parsed header', '1111111111110001010111001000000000101110011111111111110000100001', 'MPEG-4', true, 'AAC_LC (Low Complexity)', 22050, 2, 2, false, false, 371, 1, false
31
+ include_examples 'parsed header', '111111111111000101010000010000000000011110011111111111001101111000000010', 'MPEG-4', true, 'AAC_LC (Low Complexity)', 44100, 1, 1, false, false, 60, 1, false
32
+
33
+ include_examples 'invalid header', 'invalid syncword', '1111110111110001010111001000000000101110011111111111110000100001'
34
+ include_examples 'invalid header', 'invalid layer value', '1111111111110011010111001000000000101110011111111111110000100001'
35
+ include_examples 'invalid header', 'invalid sampling frequency index 15', '1111111111110001011111001000000000101110011111111111110000100001'
36
+ include_examples 'invalid header', 'zero frame length', '1111111111110001010111001000000000000000011111111111110000100001'
37
+ include_examples 'invalid header', 'random header', '101000101011010101010101111010101010101011001010101010101111000000011101'
38
+ end
@@ -1,15 +1,88 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe FormatParser::EXIFParser do
4
- describe 'is able to correctly parse orientation for all the TIFF EXIF examples from FastImage' do
5
- Dir.glob(fixtures_dir + '/exif-orientation-testimages/tiff-*/*.tif').each do |tiff_path|
6
- filename = File.basename(tiff_path)
7
- it "is able to parse #{filename}" do
8
- result = described_class.exif_from_tiff_io(File.open(tiff_path, 'rb'))
9
- expect(result).not_to be_nil
10
- expect(result.orientation_sym).to be_kind_of(Symbol)
11
- # Filenames in this dir correspond with the orientation of the file
12
- expect(filename).to include(result.orientation_sym.to_s)
4
+ describe 'EXIFParser#exif_from_tiff_io' do
5
+ describe 'Orientation' do
6
+ describe 'is able to correctly parse orientation for all the TIFF EXIF examples from FastImage' do
7
+ Dir.glob(fixtures_dir + '/exif-orientation-testimages/tiff-*/*.tif').each do |tiff_path|
8
+ filename = File.basename(tiff_path)
9
+ it "is able to parse #{filename}" do
10
+ result = described_class.exif_from_tiff_io(File.open(tiff_path, 'rb'))
11
+ expect(result).not_to be_nil
12
+ expect(result.orientation_sym).to be_kind_of(Symbol)
13
+ # Filenames in this dir correspond with the orientation of the file
14
+ expect(filename).to include(result.orientation_sym.to_s)
15
+ end
16
+ end
17
+ end
18
+
19
+ it 'is able to deal with an orientation tag with a tuple value for orientation' do
20
+ path = fixtures_dir + '/EXIF/double_orientation.exif.bin'
21
+ exif_data = File.open(path, 'rb') do |f|
22
+ described_class.exif_from_tiff_io(f)
23
+ end
24
+ expect(exif_data.orientation).to eq(1)
25
+ end
26
+ end
27
+
28
+ describe 'SubIFDs' do
29
+ it 'should not retrieve subIFDs data by default' do
30
+ path = fixtures_dir + 'NEF/RAW_NIKON_D40_SRGB.NEF'
31
+
32
+ exif_data = File.open(path, 'rb') do |f|
33
+ described_class.exif_from_tiff_io(f)
34
+ end
35
+
36
+ expect(exif_data.sub_ifds_data).not_to be_nil
37
+ expect(exif_data.sub_ifds_data).to eq({})
38
+ end
39
+
40
+ it 'is able retrieve data from all subIFDs optionally' do
41
+ # Verifying:
42
+ # {
43
+ # offset_1 => { subIFD_1 data...}
44
+ # offset_2 => { subIFD_2 data...}
45
+ # }
46
+
47
+ path = fixtures_dir + 'NEF/RAW_NIKON_D40_SRGB.NEF'
48
+ should_include_sub_ifds = true
49
+
50
+ exif_data = File.open(path, 'rb') do |f|
51
+ described_class.exif_from_tiff_io(f, should_include_sub_ifds)
52
+ end
53
+
54
+ offset_1, offset_2 = exif_data.sub_ifds
55
+ sub_ifds_data = exif_data.sub_ifds_data
56
+
57
+ expect(sub_ifds_data).not_to be_nil
58
+ expect(sub_ifds_data).to have_key(offset_1)
59
+ expect(sub_ifds_data).to have_key(offset_2)
60
+ end
61
+
62
+ it 'returns EXIFR IFD instances as subIFD data' do
63
+ # Verifying:
64
+ # {
65
+ # offset_1 => { new_subfile_type => 1, ...}
66
+ # offset_2 => { new_subfile_type => 0, ...}
67
+ # }
68
+ # we shouldn't verify everything, since we trust to EXIFR for that.
69
+ # making sure we are getting each subfile type should be good enough.
70
+
71
+ path = fixtures_dir + 'NEF/RAW_NIKON_D40_SRGB.NEF'
72
+ should_include_sub_ifds = true
73
+
74
+ exif_data = File.open(path, 'rb') do |f|
75
+ described_class.exif_from_tiff_io(f, should_include_sub_ifds)
76
+ end
77
+
78
+ offset_1, offset_2 = exif_data.sub_ifds.sort
79
+ first_sub_ifd = exif_data.sub_ifds_data&.[](offset_1)
80
+ second_sub_ifd = exif_data.sub_ifds_data&.[](offset_2)
81
+
82
+ expect(first_sub_ifd).to be_kind_of(EXIFR::TIFF::IFD)
83
+ expect(second_sub_ifd).to be_kind_of(EXIFR::TIFF::IFD)
84
+ expect(first_sub_ifd.new_subfile_type).to eq(1)
85
+ expect(second_sub_ifd.new_subfile_type).to eq(0)
13
86
  end
14
87
  end
15
88
  end
@@ -29,8 +102,8 @@ describe FormatParser::EXIFParser do
29
102
  end
30
103
 
31
104
  it 'returns a Hash from #to_hash' do
32
- first_fake_exif = double(orientation: 1, to_hash: {foo: 123, bar: 675})
33
- second_fake_exif = double(orientation: 4, to_hash: {foo: 245})
105
+ first_fake_exif = double(orientation: 1, to_hash: { foo: 123, bar: 675 })
106
+ second_fake_exif = double(orientation: 4, to_hash: { foo: 245 })
34
107
 
35
108
  stack = FormatParser::EXIFParser::EXIFStack.new([first_fake_exif, second_fake_exif])
36
109
  stack_as_hash = stack.to_hash
@@ -42,14 +115,6 @@ describe FormatParser::EXIFParser do
42
115
  end
43
116
  end
44
117
 
45
- it 'is able to deal with an orientation tag which a tuple value for orientation' do
46
- path = fixtures_dir + '/EXIF/double_orientation.exif.bin'
47
- exif_data = File.open(path, 'rb') do |f|
48
- described_class.exif_from_tiff_io(f)
49
- end
50
- expect(exif_data.orientation).to eq(1)
51
- end
52
-
53
118
  describe 'IOExt' do
54
119
  it 'supports readbyte' do
55
120
  io = FormatParser::EXIFParser::IOExt.new(StringIO.new('hello'))
@@ -0,0 +1,131 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::NEFParser do
4
+ describe 'Sample files from rawsamples' do
5
+ Dir.glob(fixtures_dir + '/NEF/*.NEF').each do |file_path|
6
+ it "is able to parse #{File.basename(file_path)}" do
7
+ parsed = subject.call(File.open(file_path, 'rb'))
8
+
9
+ expect(parsed).not_to be_nil
10
+ expect(parsed.nature).to eq(:image)
11
+ expect(parsed.format).to eq(:nef)
12
+
13
+ expect(parsed.width_px).to be_kind_of(Integer)
14
+ expect(parsed.height_px).to be_kind_of(Integer)
15
+
16
+ expect(parsed.display_width_px).to be_kind_of(Integer)
17
+ expect(parsed.display_height_px).to be_kind_of(Integer)
18
+
19
+ expect(parsed.orientation).to be_kind_of(Symbol)
20
+
21
+ expect(parsed.intrinsics[:exif]).not_to be_nil
22
+ end
23
+ end
24
+ end
25
+
26
+ describe 'Image Dimensions' do
27
+ it 'parses dimensions properly for a given fixture' do
28
+ # The default parser from EXIFr returns the dimensions from the embedded
29
+ # thumbnails as being the image's actual dimensions.
30
+ # We make sure we properly deal with this.
31
+
32
+ parsed = subject.call(File.open("#{fixtures_dir}/NEF/RAW_NIKON_1S2.NEF", 'rb'))
33
+
34
+ # Raw Image dimensions
35
+ expect(parsed.width_px).to eq(4_608)
36
+ expect(parsed.height_px).to eq(3_080)
37
+
38
+ expect(parsed.orientation).to eq(:right_top)
39
+ end
40
+
41
+ it 'correctly adjusts display dimensions for rotated images' do
42
+ # This image is rotated, meaning display_width and display_height
43
+ # should hold swapped values from width and height
44
+ parsed = subject.call(File.open("#{fixtures_dir}/NEF/RAW_NIKON_1S2.NEF", 'rb'))
45
+
46
+ # Raw Image dimensions
47
+ expect(parsed.width_px).to eq(4_608)
48
+ expect(parsed.height_px).to eq(3_080)
49
+
50
+ # Raw Dimensions considering orientation
51
+ expect(parsed.display_width_px).to eq(3_080)
52
+ expect(parsed.display_height_px).to eq(4_608)
53
+
54
+ expect(parsed.orientation).to eq(:right_top)
55
+ end
56
+
57
+ it 'does not return dimensions from embedded previews' do
58
+ Dir.glob(fixtures_dir + '/NEF/*.nef').each do |file_path|
59
+ # By default, NEF files include 160x120 sub_ifds.
60
+ # This dimensions cannot be considered by the parser.
61
+
62
+ parsed = subject.call(File.open(file_path, 'rb'))
63
+
64
+ expect(parsed.width_px).not_to eq(160)
65
+ expect(parsed.height_px).not_to eq(120)
66
+ end
67
+ end
68
+
69
+ it 'properly extracts dimensions when there are more than 2 subIFDs in the image' do
70
+ # this file has 3 subIFDs, and the RAW image information is actually in the one in the middle.
71
+ nef_path = "#{fixtures_dir}/NEF/RAW_NIKON_D800_14bit_FX_UNCOMPRESSED.NEF"
72
+
73
+ parsed = subject.call(File.open(nef_path, 'rb'))
74
+
75
+ expect(parsed).not_to be_nil
76
+ expect(parsed.width_px).to eq(7424)
77
+ expect(parsed.height_px).to eq(4924)
78
+ expect(parsed.orientation).to eq(:top_left)
79
+ end
80
+
81
+ describe 'correctly extracts dimensions from various NEF flavors of the same file' do
82
+ Dir.glob(fixtures_dir + '/NEF/RAW_NIKON_D800*.NEF').each do |file_path|
83
+ it "is able to parse #{File.basename(file_path)}" do
84
+ parsed = subject.call(File.open(file_path, 'rb'))
85
+
86
+ expect(parsed).not_to be_nil
87
+ expect(parsed.width_px).to eq(7424)
88
+ expect(parsed.height_px).to eq(4924)
89
+ end
90
+ end
91
+ end
92
+ end
93
+
94
+ describe 'False-positive avoidance' do
95
+ it 'should return nil for regular TIFF images' do
96
+ parsed = subject.call(File.open("#{fixtures_dir}/TIFF/Shinbutsureijoushuincho.tiff", 'rb'))
97
+ expect(parsed).to be_nil
98
+ end
99
+
100
+ it 'should return nil for regular CR2 images' do
101
+ parsed = subject.call(File.open("#{fixtures_dir}/CR2/RAW_CANON_40D_SRAW_V103.CR2", 'rb'))
102
+ expect(parsed).to be_nil
103
+ end
104
+
105
+ it 'should return nil for regular ERF images' do
106
+ # ERF files are also TIFFs with subIFDs but they don't have a matching "maker" tag
107
+ parsed = subject.call(File.open("#{fixtures_dir}/ERF/RAW_EPSON_RD1.ERF", 'rb'))
108
+ expect(parsed).to be_nil
109
+ end
110
+ end
111
+
112
+ describe 'Parser Performance' do
113
+ it 'extracts dimensions from a very large NEF economically' do
114
+ # this file has 77.3mb
115
+ file_path = "#{fixtures_dir}/NEF/RAW_NIKON_D800_14bit_FX_UNCOMPRESSED.NEF"
116
+
117
+ io = File.open(file_path, 'rb')
118
+ io_with_stats = FormatParser::ReadLimiter.new(io)
119
+
120
+ parsed = subject.call(io_with_stats)
121
+
122
+ expect(parsed).not_to be_nil
123
+ expect(parsed.width_px).to eq(7424)
124
+ expect(parsed.height_px).to eq(4924)
125
+
126
+ expect(io_with_stats.reads).to be_within(4).of(12)
127
+ expect(io_with_stats.seeks).to be_within(4).of(12)
128
+ expect(io_with_stats.bytes).to be_within(1024).of(59000)
129
+ end
130
+ end
131
+ end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.2
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
8
8
  - Julik Tarkhanov
9
- autorequire:
9
+ autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2022-07-13 00:00:00.000000000 Z
12
+ date: 2022-09-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -229,6 +229,8 @@ files:
229
229
  - lib/image.rb
230
230
  - lib/io_constraint.rb
231
231
  - lib/io_utils.rb
232
+ - lib/parsers/aac_parser.rb
233
+ - lib/parsers/aac_parser/adts_header_info.rb
232
234
  - lib/parsers/aiff_parser.rb
233
235
  - lib/parsers/bmp_parser.rb
234
236
  - lib/parsers/cr2_parser.rb
@@ -246,6 +248,7 @@ files:
246
248
  - lib/parsers/mp3_parser.rb
247
249
  - lib/parsers/mp3_parser/id3_extraction.rb
248
250
  - lib/parsers/mpeg_parser.rb
251
+ - lib/parsers/nef_parser.rb
249
252
  - lib/parsers/ogg_parser.rb
250
253
  - lib/parsers/pdf_parser.rb
251
254
  - lib/parsers/png_parser.rb
@@ -272,6 +275,8 @@ files:
272
275
  - spec/hash_utils_spec.rb
273
276
  - spec/integration/active_storage/rails_app.rb
274
277
  - spec/io_utils_spec.rb
278
+ - spec/parsers/aac_parser_spec.rb
279
+ - spec/parsers/adts_header_info_spec.rb
275
280
  - spec/parsers/aiff_parser_spec.rb
276
281
  - spec/parsers/bmp_parser_spec.rb
277
282
  - spec/parsers/cr2_parser_spec.rb
@@ -286,6 +291,7 @@ files:
286
291
  - spec/parsers/moov_parser_spec.rb
287
292
  - spec/parsers/mp3_parser_spec.rb
288
293
  - spec/parsers/mpeg_parser_spec.rb
294
+ - spec/parsers/nef_parser_spec.rb
289
295
  - spec/parsers/ogg_parser_spec.rb
290
296
  - spec/parsers/pdf_parser_spec.rb
291
297
  - spec/parsers/png_parser_spec.rb
@@ -304,7 +310,7 @@ licenses:
304
310
  - MIT (Hippocratic)
305
311
  metadata:
306
312
  allowed_push_host: https://rubygems.org
307
- post_install_message:
313
+ post_install_message:
308
314
  rdoc_options: []
309
315
  require_paths:
310
316
  - lib
@@ -319,8 +325,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
319
325
  - !ruby/object:Gem::Version
320
326
  version: '0'
321
327
  requirements: []
322
- rubygems_version: 3.3.7
323
- signing_key:
328
+ rubygems_version: 3.1.6
329
+ signing_key:
324
330
  specification_version: 4
325
331
  summary: A library for efficient parsing of file metadata
326
332
  test_files: []