format_parser 1.4.2 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/README.md +15 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/aac_parser/adts_header_info.rb +138 -0
- data/lib/parsers/aac_parser.rb +35 -0
- data/lib/parsers/exif_parser.rb +38 -3
- data/lib/parsers/nef_parser.rb +69 -0
- data/spec/parsers/aac_parser_spec.rb +87 -0
- data/spec/parsers/adts_header_info_spec.rb +38 -0
- data/spec/parsers/exif_parser_spec.rb +84 -19
- data/spec/parsers/nef_parser_spec.rb +131 -0
- metadata +12 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54b56b24c97b2532bc5d7f8521aa38714111a05f2bedd5e15b7391e1005d9795
|
4
|
+
data.tar.gz: e33a026ab2c611a86d6ba7e35fd413455f7b99c423651fa09dcebf08ad543e0a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e27ec936c4b43cc6c82f896846ef3e8044a0639b18c1c2accf7bbdeffd0fe73d9c03fb2f2e9b4a477ef1f98a1043047042b8521f186e3406ca1c482be9a66abd
|
7
|
+
data.tar.gz: a7927cfb5fbf0a41980465186809fb830b322e83db56bab15b5a2ad897031f7269eeae85d7e61f489a5cc2e6737348309e4f93c5dee27238d06a3ed0979f0d43
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -14,6 +14,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
14
14
|
|
15
15
|
* TIFF
|
16
16
|
* CR2
|
17
|
+
* NEF
|
17
18
|
* PSD
|
18
19
|
* PNG
|
19
20
|
* MP3
|
@@ -148,6 +149,14 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
148
149
|
### MP3
|
149
150
|
- Cassy.mp3 has been produced by WeTransfer and may be used with the library for the purposes of testing
|
150
151
|
|
152
|
+
### AAC
|
153
|
+
- Originals music files: “Furious Freak” and “Galway”, Kevin MacLeod (incompetech.com), Licensed under Creative Commons: By Attribution 3.0, http://creativecommons.org/licenses/by/3.0/
|
154
|
+
- The AAC samples were converted from 'wav' format and made available [here](https://espressif-docs.readthedocs-hosted.com/projects/esp-adf/en/latest/design-guide/audio-samples.html) by Espressif Systems, as part of their audio development framework (under the ESPRESSIF MIT License).
|
155
|
+
- Files:
|
156
|
+
- ff-16b-2c-44100hz.aac
|
157
|
+
- ff-16b-1c-44100hz.aac
|
158
|
+
- gs-16b-2c-44100hz.aac
|
159
|
+
- gs-16b-1c-44100hz.aac
|
151
160
|
### FDX
|
152
161
|
- fixture.fdx was created by one of the project maintainers and is MIT licensed
|
153
162
|
|
@@ -161,6 +170,12 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
161
170
|
### CR2
|
162
171
|
- CR2 examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
163
172
|
|
173
|
+
### NEF
|
174
|
+
- NEF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
175
|
+
|
176
|
+
### ERF
|
177
|
+
- ERF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
178
|
+
|
164
179
|
### FLAC
|
165
180
|
- atc_fixture_vbr.flac is a converted version of the MP3 with the same name
|
166
181
|
- c_11k16btipcm.flac is a converted version of the WAV with the same name
|
@@ -0,0 +1,138 @@
|
|
1
|
+
# This is a representation of the relevant information found in an Audio Data Transport Stream (ADTS) file header.
|
2
|
+
class FormatParser::AdtsHeaderInfo
|
3
|
+
attr_accessor :mpeg_version, :layer, :protection_absence, :profile, :mpeg4_sampling_frequency_index,
|
4
|
+
:mpeg4_channel_config, :originality, :home_usage, :frame_length, :buffer_fullness,
|
5
|
+
:aac_frames_per_adts_frame
|
6
|
+
|
7
|
+
# An ADTS header has the following format, when represented in bits:
|
8
|
+
# AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
|
9
|
+
# The chunks represented by these letters have specific meanings, as described here:
|
10
|
+
# https://wiki.multimedia.cx/index.php/ADTS
|
11
|
+
|
12
|
+
AAC_ADTS_HEADER_BITS_CHUNK_SIZES = [
|
13
|
+
['A', 12], ['B', 1], ['C', 2], ['D', 1],
|
14
|
+
['E', 2], ['F', 4], ['G', 1], ['H', 3],
|
15
|
+
['I', 1], ['J', 1], ['K', 1], ['L', 1],
|
16
|
+
['M', 13], ['O', 11], ['P', 2], ['Q', 16]
|
17
|
+
]
|
18
|
+
MPEG4_AUDIO_OBJECT_TYPE_RANGE = 0..45
|
19
|
+
MPEG4_AUDIO_SAMPLING_FREQUENCY_RANGE = 0..14
|
20
|
+
MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH = {
|
21
|
+
0 => 96000, 1 => 88200, 2 => 64000,
|
22
|
+
3 => 48000, 4 => 44100, 5 => 32000,
|
23
|
+
6 => 24000, 7 => 22050, 8 => 16000,
|
24
|
+
9 => 12000, 10 => 11025, 11 => 8000,
|
25
|
+
12 => 7350, 13 => 'Reserved', 14 => 'Reserved'
|
26
|
+
}
|
27
|
+
AAC_PROFILE_DESCRIPTION_HASH = {
|
28
|
+
0 => 'AAC_MAIN',
|
29
|
+
1 => 'AAC_LC (Low Complexity)',
|
30
|
+
2 => 'AAC_SSR (Scaleable Sampling Rate)',
|
31
|
+
3 => 'AAC_LTP (Long Term Prediction)'
|
32
|
+
}
|
33
|
+
MPEG_VERSION_HASH = { 0 => 'MPEG-4', 1 => 'MPEG-2'}
|
34
|
+
|
35
|
+
def mpeg4_sampling_frequency
|
36
|
+
if !@mpeg4_sampling_frequency_index.nil? && MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH.key?(@mpeg4_sampling_frequency_index)
|
37
|
+
return MPEG4_AUDIO_SAMPLING_FREQUENCY_HASH[@mpeg4_sampling_frequency_index]
|
38
|
+
end
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
def profile_description
|
43
|
+
if !@profile.nil? && AAC_PROFILE_DESCRIPTION_HASH.key?(@profile)
|
44
|
+
return AAC_PROFILE_DESCRIPTION_HASH[@profile]
|
45
|
+
end
|
46
|
+
nil
|
47
|
+
end
|
48
|
+
|
49
|
+
def mpeg_version_description
|
50
|
+
if !@mpeg_version.nil? && MPEG_VERSION_HASH.key?(@mpeg_version)
|
51
|
+
return MPEG_VERSION_HASH[@mpeg_version]
|
52
|
+
end
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def number_of_audio_channels
|
57
|
+
case @mpeg4_channel_config
|
58
|
+
when 1..6
|
59
|
+
@mpeg4_channel_config
|
60
|
+
when 7
|
61
|
+
8
|
62
|
+
else
|
63
|
+
nil
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def fixed_bitrate?
|
68
|
+
# A buffer fullness value of 0x7FF (decimal: 2047) denotes a variable bitrate, for which buffer fullness isn't applicable
|
69
|
+
@buffer_fullness != 2047
|
70
|
+
end
|
71
|
+
|
72
|
+
# The frame rate - i.e. frames per second
|
73
|
+
def frame_rate
|
74
|
+
# An AAC sample uncompresses to 1024 PCM samples
|
75
|
+
mpeg4_sampling_frequency.to_f / 1024
|
76
|
+
end
|
77
|
+
|
78
|
+
# If the given bit array is a valid ADTS header, this method will parse it and return an instance of AdtsHeaderInfo.
|
79
|
+
# Will return nil if the header does not match the ADTS specifications.
|
80
|
+
def self.parse_adts_header(header_bits)
|
81
|
+
result = FormatParser::AdtsHeaderInfo.new
|
82
|
+
|
83
|
+
AAC_ADTS_HEADER_BITS_CHUNK_SIZES.each do |letter_size|
|
84
|
+
letter = letter_size[0]
|
85
|
+
chunk_size = letter_size[1]
|
86
|
+
chunk = header_bits.shift(chunk_size)
|
87
|
+
decimal_number = chunk.join.to_i(2)
|
88
|
+
|
89
|
+
# Skipping data represented by the letters G, K, L, Q, as we are not interested in those values.
|
90
|
+
case letter
|
91
|
+
when 'A'
|
92
|
+
# Syncword, all bits must be set to 1
|
93
|
+
return nil unless chunk.all? { |bit| bit == '1' }
|
94
|
+
when 'B'
|
95
|
+
# MPEG Version, set to 0 for MPEG-4 and 1 for MPEG-2
|
96
|
+
result.mpeg_version = decimal_number
|
97
|
+
when 'C'
|
98
|
+
# Layer, always set to 0
|
99
|
+
return nil unless decimal_number == 0
|
100
|
+
when 'D'
|
101
|
+
# Protection absence, set to 1 if there is no CRC and 0 if there is CRC
|
102
|
+
result.protection_absence = decimal_number == 1
|
103
|
+
when 'E'
|
104
|
+
# AAC Profile
|
105
|
+
return nil unless MPEG4_AUDIO_OBJECT_TYPE_RANGE.include?(decimal_number + 1)
|
106
|
+
result.profile = decimal_number
|
107
|
+
when 'F'
|
108
|
+
# MPEG-4 Sampling Frequency Index (15 is forbidden)
|
109
|
+
return nil unless MPEG4_AUDIO_SAMPLING_FREQUENCY_RANGE.include?(decimal_number)
|
110
|
+
result.mpeg4_sampling_frequency_index = decimal_number
|
111
|
+
when 'H'
|
112
|
+
# MPEG-4 Channel Configuration (in the case of 0, the channel configuration is sent via an in-band PCE (Program Config Element))
|
113
|
+
result.mpeg4_channel_config = decimal_number
|
114
|
+
when 'I'
|
115
|
+
# Originality, set to 1 to signal originality of the audio and 0 otherwise
|
116
|
+
result.originality = decimal_number == 1
|
117
|
+
when 'J'
|
118
|
+
# Home, set to 1 to signal home usage of the audio and 0 otherwise
|
119
|
+
result.home_usage = decimal_number == 1
|
120
|
+
when 'M'
|
121
|
+
# Frame length, length of the ADTS frame including headers and CRC check (protectionabsent == 1? 7: 9)
|
122
|
+
# We expect this to be higher than the header length, but we won't impose any other restrictions
|
123
|
+
header_length = result.protection_absence ? 7 : 9
|
124
|
+
return nil unless decimal_number > header_length
|
125
|
+
result.frame_length = decimal_number
|
126
|
+
when 'O'
|
127
|
+
# Buffer fullness, states the bit-reservoir per frame.
|
128
|
+
# It is merely an informative field with no clear use case defined in the specification.
|
129
|
+
result.buffer_fullness = decimal_number
|
130
|
+
when 'P'
|
131
|
+
# Number of AAC frames (RDBs (Raw Data Blocks)) in ADTS frame minus 1. For maximum compatibility always use one AAC frame per ADTS frame.
|
132
|
+
result.aac_frames_per_adts_frame = decimal_number + 1
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
result
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative 'aac_parser/adts_header_info'
|
2
|
+
|
3
|
+
class FormatParser::AACParser
|
4
|
+
include FormatParser::IOUtils
|
5
|
+
|
6
|
+
AAC_MIME_TYPE = 'audio/aac'
|
7
|
+
|
8
|
+
def likely_match?(filename)
|
9
|
+
filename =~ /\.aac$/i
|
10
|
+
end
|
11
|
+
|
12
|
+
def call(raw_io)
|
13
|
+
io = FormatParser::IOConstraint.new(raw_io)
|
14
|
+
header = safe_read(io, 9)
|
15
|
+
header_bits = header.unpack('B*').first.split('')
|
16
|
+
|
17
|
+
header_info = FormatParser::AdtsHeaderInfo.parse_adts_header(header_bits)
|
18
|
+
return if header_info.nil?
|
19
|
+
|
20
|
+
FormatParser::Audio.new(
|
21
|
+
title: nil,
|
22
|
+
album: nil,
|
23
|
+
artist: nil,
|
24
|
+
format: :aac,
|
25
|
+
num_audio_channels: header_info.number_of_audio_channels,
|
26
|
+
audio_sample_rate_hz: header_info.mpeg4_sampling_frequency,
|
27
|
+
media_duration_seconds: nil,
|
28
|
+
media_duration_frames: nil,
|
29
|
+
intrinsics: nil,
|
30
|
+
content_type: AAC_MIME_TYPE
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
FormatParser.register_parser new, natures: :audio, formats: :aac
|
35
|
+
end
|
data/lib/parsers/exif_parser.rb
CHANGED
@@ -41,6 +41,13 @@ module FormatParser::EXIFParser
|
|
41
41
|
end
|
42
42
|
|
43
43
|
class EXIFResult < SimpleDelegator
|
44
|
+
attr_reader :sub_ifds_data
|
45
|
+
|
46
|
+
def initialize(exif_raw_data, sub_ifds_data = {})
|
47
|
+
super(exif_raw_data)
|
48
|
+
@sub_ifds_data = sub_ifds_data
|
49
|
+
end
|
50
|
+
|
44
51
|
def rotated?
|
45
52
|
orientation.to_i > 4
|
46
53
|
end
|
@@ -167,10 +174,38 @@ module FormatParser::EXIFParser
|
|
167
174
|
# Squash exifr's invalid date warning since we do not use that data.
|
168
175
|
EXIFR.logger = Logger.new(nil)
|
169
176
|
|
170
|
-
def exif_from_tiff_io(constrained_io)
|
177
|
+
def exif_from_tiff_io(constrained_io, should_include_sub_ifds = false)
|
171
178
|
Measurometer.instrument('format_parser.EXIFParser.exif_from_tiff_io') do
|
172
|
-
|
173
|
-
|
179
|
+
extended_io = IOExt.new(constrained_io)
|
180
|
+
exif_raw_data = EXIFR::TIFF.new(extended_io)
|
181
|
+
|
182
|
+
return unless exif_raw_data
|
183
|
+
|
184
|
+
sub_ifds_data = {}
|
185
|
+
if should_include_sub_ifds
|
186
|
+
sub_ifds_offsets = exif_raw_data.flat_map(&:sub_ifds).compact
|
187
|
+
sub_ifds_data = load_sub_ifds(extended_io, sub_ifds_offsets)
|
188
|
+
end
|
189
|
+
|
190
|
+
EXIFResult.new(exif_raw_data, sub_ifds_data)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
private
|
195
|
+
|
196
|
+
# Reads exif data from subIFDs. This is important for NEF files.
|
197
|
+
def load_sub_ifds(extended_io, sub_ifds_offsets)
|
198
|
+
# Returning an hash of subIFDs using offsets as keys
|
199
|
+
# {
|
200
|
+
# 123 => { subIFD data...}
|
201
|
+
# 456 => { another subIFD data...}
|
202
|
+
# }
|
203
|
+
return {} if sub_ifds_offsets.empty?
|
204
|
+
|
205
|
+
EXIFR::TIFF::Data.open(extended_io) do |data|
|
206
|
+
sub_ifds_offsets.map do |sub_ifd_offset|
|
207
|
+
[sub_ifd_offset, EXIFR::TIFF::IFD.new(data, sub_ifd_offset)]
|
208
|
+
end.to_h
|
174
209
|
end
|
175
210
|
end
|
176
211
|
|
@@ -0,0 +1,69 @@
|
|
1
|
+
class FormatParser::NEFParser
|
2
|
+
include FormatParser::IOUtils
|
3
|
+
include FormatParser::EXIFParser
|
4
|
+
|
5
|
+
MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
|
6
|
+
MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
|
7
|
+
HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
|
8
|
+
NEF_MIME_TYPE = 'image/x-nikon-nef'
|
9
|
+
|
10
|
+
SUBFILE_TYPE_FULL_RESOLUTION = 0
|
11
|
+
SUBFILE_TYPE_REDUCED_RESOLUTION = 1
|
12
|
+
|
13
|
+
SHOULD_PARSE_SUB_IFDS = true
|
14
|
+
|
15
|
+
def likely_match?(filename)
|
16
|
+
filename =~ /\.nef$/i
|
17
|
+
end
|
18
|
+
|
19
|
+
def call(io)
|
20
|
+
io = FormatParser::IOConstraint.new(io)
|
21
|
+
|
22
|
+
return unless HEADER_BYTES.include?(safe_read(io, 4))
|
23
|
+
|
24
|
+
# Because of how NEF files organize their IFDs and subIFDs, we need to dive into the subIFDs
|
25
|
+
# to get the actual image dimensions instead of the preview's
|
26
|
+
exif_data = exif_from_tiff_io(io, SHOULD_PARSE_SUB_IFDS)
|
27
|
+
|
28
|
+
return unless valid?(exif_data)
|
29
|
+
|
30
|
+
full_resolution_data = get_full_resolution_ifd(exif_data)
|
31
|
+
|
32
|
+
w = full_resolution_data.image_width || exif_data.width || exif_data.pixel_x_dimension
|
33
|
+
h = full_resolution_data.image_length || exif_data.height || exif_data.pixel_y_dimension
|
34
|
+
|
35
|
+
FormatParser::Image.new(
|
36
|
+
format: :nef,
|
37
|
+
width_px: w,
|
38
|
+
height_px: h,
|
39
|
+
display_width_px: exif_data.rotated? ? h : w,
|
40
|
+
display_height_px: exif_data.rotated? ? w : h,
|
41
|
+
orientation: exif_data.orientation_sym,
|
42
|
+
intrinsics: { exif: exif_data },
|
43
|
+
content_type: NEF_MIME_TYPE,
|
44
|
+
)
|
45
|
+
rescue EXIFR::MalformedTIFF
|
46
|
+
nil
|
47
|
+
end
|
48
|
+
|
49
|
+
def valid?(exif_data)
|
50
|
+
# NEF files should hold subIFDs and have "NIKON" or "NIKON CORPORATION" as maker
|
51
|
+
has_sub_ifds_data = !exif_data&.sub_ifds_data.keys.empty?
|
52
|
+
has_sub_ifds_data && exif_data.make&.start_with?('NIKON')
|
53
|
+
end
|
54
|
+
|
55
|
+
# Investigates data from all subIFDs and find the one holding the full-resolution image
|
56
|
+
def get_full_resolution_ifd(exif_data)
|
57
|
+
# Most of the time, NEF files have 2 subIFDs:
|
58
|
+
# First one: Thumbnail (Reduced resolution)
|
59
|
+
# Second one: Full resolution
|
60
|
+
# While this is true in most situations, there are exceptions,
|
61
|
+
# so we can't rely in this order alone.
|
62
|
+
|
63
|
+
exif_data.sub_ifds_data.each do |_ifd_offset, ifd_data|
|
64
|
+
return ifd_data if ifd_data.new_subfile_type == SUBFILE_TYPE_FULL_RESOLUTION
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
FormatParser.register_parser new, natures: :image, formats: :nef, priority: 4
|
69
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::AACParser do
|
4
|
+
it 'should match filenames with valid AAC extensions' do
|
5
|
+
filenames = ['audiofile', 'audio_file', 'audio-file', 'audio file', 'audio.file']
|
6
|
+
extensions = ['.aac', '.AAC', '.Aac', '.AAc', '.aAc', '.aAC', '.aaC']
|
7
|
+
filenames.each do |filename|
|
8
|
+
extensions.each do |extension|
|
9
|
+
expect(subject.likely_match?(filename + extension)).to be_truthy
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should not match filenames with invalid AAC extensions' do
|
15
|
+
extensions = ['.aa', '.ac', '.acc', '.mp3', '.ogg', '.wav', '.flac', '.m4a', '.m4b', '.m4p', '.m4r', '.3gp']
|
16
|
+
extensions.each do |extension|
|
17
|
+
expect(subject.likely_match?('audiofile' + extension)).to be_falsey
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should parse a short sample, single channel audio, 16 kb/s, 44100 HZ' do
|
22
|
+
file_path = fixtures_dir + '/AAC/gs-16b-1c-44100hz.aac'
|
23
|
+
parsed = subject.call(File.open(file_path, 'rb'))
|
24
|
+
|
25
|
+
expect(parsed).not_to be_nil
|
26
|
+
|
27
|
+
expect(parsed.nature).to eq(:audio)
|
28
|
+
expect(parsed.format).to eq(:aac)
|
29
|
+
expect(parsed.num_audio_channels).to eq(1)
|
30
|
+
expect(parsed.audio_sample_rate_hz).to eq(44100)
|
31
|
+
expect(parsed.content_type).to eq('audio/aac')
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should parse a short sample, two channel audio, 16 kb/s, 44100 HZ' do
|
35
|
+
file_path = fixtures_dir + '/AAC/gs-16b-2c-44100hz.aac'
|
36
|
+
parsed = subject.call(File.open(file_path, 'rb'))
|
37
|
+
|
38
|
+
expect(parsed).not_to be_nil
|
39
|
+
|
40
|
+
expect(parsed.nature).to eq(:audio)
|
41
|
+
expect(parsed.format).to eq(:aac)
|
42
|
+
expect(parsed.num_audio_channels).to eq(2)
|
43
|
+
expect(parsed.audio_sample_rate_hz).to eq(44100)
|
44
|
+
expect(parsed.content_type).to eq('audio/aac')
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should parse a long sample, single channel audio, 16 kb/s, 44100 HZ' do
|
48
|
+
file_path = fixtures_dir + '/AAC/ff-16b-1c-44100hz.aac'
|
49
|
+
parsed = subject.call(File.open(file_path, 'rb'))
|
50
|
+
|
51
|
+
expect(parsed).not_to be_nil
|
52
|
+
|
53
|
+
expect(parsed.nature).to eq(:audio)
|
54
|
+
expect(parsed.format).to eq(:aac)
|
55
|
+
expect(parsed.num_audio_channels).to eq(1)
|
56
|
+
expect(parsed.audio_sample_rate_hz).to eq(44100)
|
57
|
+
expect(parsed.content_type).to eq('audio/aac')
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'should parse a long sample, two channel audio, 16 kb/s, 44100 HZ' do
|
61
|
+
file_path = fixtures_dir + '/AAC/ff-16b-2c-44100hz.aac'
|
62
|
+
parsed = subject.call(File.open(file_path, 'rb'))
|
63
|
+
|
64
|
+
expect(parsed).not_to be_nil
|
65
|
+
|
66
|
+
expect(parsed.nature).to eq(:audio)
|
67
|
+
expect(parsed.format).to eq(:aac)
|
68
|
+
expect(parsed.num_audio_channels).to eq(2)
|
69
|
+
expect(parsed.audio_sample_rate_hz).to eq(44100)
|
70
|
+
expect(parsed.content_type).to eq('audio/aac')
|
71
|
+
end
|
72
|
+
|
73
|
+
shared_examples 'invalid filetype' do |filetype, fixture_path|
|
74
|
+
it "should fail to parse #{filetype}" do
|
75
|
+
file_path = fixtures_dir + fixture_path
|
76
|
+
parsed = subject.call(File.open(file_path, 'rb'))
|
77
|
+
expect(parsed).to be_nil
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
include_examples 'invalid filetype', 'AIFF', '/AIFF/fixture.aiff'
|
82
|
+
include_examples 'invalid filetype', 'FLAC', '/FLAC/atc_fixture_vbr.flac'
|
83
|
+
include_examples 'invalid filetype', 'MP3', '/MP3/Cassy.mp3'
|
84
|
+
include_examples 'invalid filetype', 'MPG', '/MPG/video1.mpg'
|
85
|
+
include_examples 'invalid filetype', 'OGG', '/Ogg/hi.ogg'
|
86
|
+
include_examples 'invalid filetype', 'WAV', '/WAV/c_8kmp316.wav'
|
87
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::AdtsHeaderInfo do
|
4
|
+
shared_examples 'parsed header' do |header_bits, expected_mpeg_version_description, expected_protection_absence, expected_profile_description, expected_mpeg4_sampling_frequency, expected_mpeg4_channel_config, expected_number_of_audio_channels, expected_originality, expected_home_usage, expected_frame_length, expected_aac_frames_per_adts_frame, expected_has_fixed_bitrate|
|
5
|
+
it "extracts correct values for header #{header_bits}" do
|
6
|
+
result = FormatParser::AdtsHeaderInfo.parse_adts_header(header_bits.split(''))
|
7
|
+
expect(result).not_to be_nil
|
8
|
+
expect(result.mpeg_version_description).to eq(expected_mpeg_version_description)
|
9
|
+
expect(result.protection_absence).to eq(expected_protection_absence)
|
10
|
+
expect(result.profile_description).to eq(expected_profile_description)
|
11
|
+
expect(result.mpeg4_sampling_frequency).to eq(expected_mpeg4_sampling_frequency)
|
12
|
+
expect(result.mpeg4_channel_config).to eq(expected_mpeg4_channel_config)
|
13
|
+
expect(result.number_of_audio_channels).to eq(expected_number_of_audio_channels)
|
14
|
+
expect(result.originality).to eq(expected_originality)
|
15
|
+
expect(result.home_usage).to eq(expected_home_usage)
|
16
|
+
expect(result.frame_length).to eq(expected_frame_length)
|
17
|
+
expect(result.aac_frames_per_adts_frame).to eq(expected_aac_frames_per_adts_frame)
|
18
|
+
expect(result.fixed_bitrate?).to eq(expected_has_fixed_bitrate)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
shared_examples 'invalid header' do |failure_reason, header_bits|
|
23
|
+
it "fails on #{failure_reason} for header #{header_bits}" do
|
24
|
+
result = FormatParser::AdtsHeaderInfo.parse_adts_header(header_bits.split(''))
|
25
|
+
expect(result).to be_nil
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# These headers have been validated here: https://www.p23.nl/projects/aac-header/
|
30
|
+
include_examples 'parsed header', '1111111111110001010111001000000000101110011111111111110000100001', 'MPEG-4', true, 'AAC_LC (Low Complexity)', 22050, 2, 2, false, false, 371, 1, false
|
31
|
+
include_examples 'parsed header', '111111111111000101010000010000000000011110011111111111001101111000000010', 'MPEG-4', true, 'AAC_LC (Low Complexity)', 44100, 1, 1, false, false, 60, 1, false
|
32
|
+
|
33
|
+
include_examples 'invalid header', 'invalid syncword', '1111110111110001010111001000000000101110011111111111110000100001'
|
34
|
+
include_examples 'invalid header', 'invalid layer value', '1111111111110011010111001000000000101110011111111111110000100001'
|
35
|
+
include_examples 'invalid header', 'invalid sampling frequency index 15', '1111111111110001011111001000000000101110011111111111110000100001'
|
36
|
+
include_examples 'invalid header', 'zero frame length', '1111111111110001010111001000000000000000011111111111110000100001'
|
37
|
+
include_examples 'invalid header', 'random header', '101000101011010101010101111010101010101011001010101010101111000000011101'
|
38
|
+
end
|
@@ -1,15 +1,88 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe FormatParser::EXIFParser do
|
4
|
-
describe '
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
4
|
+
describe 'EXIFParser#exif_from_tiff_io' do
|
5
|
+
describe 'Orientation' do
|
6
|
+
describe 'is able to correctly parse orientation for all the TIFF EXIF examples from FastImage' do
|
7
|
+
Dir.glob(fixtures_dir + '/exif-orientation-testimages/tiff-*/*.tif').each do |tiff_path|
|
8
|
+
filename = File.basename(tiff_path)
|
9
|
+
it "is able to parse #{filename}" do
|
10
|
+
result = described_class.exif_from_tiff_io(File.open(tiff_path, 'rb'))
|
11
|
+
expect(result).not_to be_nil
|
12
|
+
expect(result.orientation_sym).to be_kind_of(Symbol)
|
13
|
+
# Filenames in this dir correspond with the orientation of the file
|
14
|
+
expect(filename).to include(result.orientation_sym.to_s)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'is able to deal with an orientation tag with a tuple value for orientation' do
|
20
|
+
path = fixtures_dir + '/EXIF/double_orientation.exif.bin'
|
21
|
+
exif_data = File.open(path, 'rb') do |f|
|
22
|
+
described_class.exif_from_tiff_io(f)
|
23
|
+
end
|
24
|
+
expect(exif_data.orientation).to eq(1)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe 'SubIFDs' do
|
29
|
+
it 'should not retrieve subIFDs data by default' do
|
30
|
+
path = fixtures_dir + 'NEF/RAW_NIKON_D40_SRGB.NEF'
|
31
|
+
|
32
|
+
exif_data = File.open(path, 'rb') do |f|
|
33
|
+
described_class.exif_from_tiff_io(f)
|
34
|
+
end
|
35
|
+
|
36
|
+
expect(exif_data.sub_ifds_data).not_to be_nil
|
37
|
+
expect(exif_data.sub_ifds_data).to eq({})
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'is able retrieve data from all subIFDs optionally' do
|
41
|
+
# Verifying:
|
42
|
+
# {
|
43
|
+
# offset_1 => { subIFD_1 data...}
|
44
|
+
# offset_2 => { subIFD_2 data...}
|
45
|
+
# }
|
46
|
+
|
47
|
+
path = fixtures_dir + 'NEF/RAW_NIKON_D40_SRGB.NEF'
|
48
|
+
should_include_sub_ifds = true
|
49
|
+
|
50
|
+
exif_data = File.open(path, 'rb') do |f|
|
51
|
+
described_class.exif_from_tiff_io(f, should_include_sub_ifds)
|
52
|
+
end
|
53
|
+
|
54
|
+
offset_1, offset_2 = exif_data.sub_ifds
|
55
|
+
sub_ifds_data = exif_data.sub_ifds_data
|
56
|
+
|
57
|
+
expect(sub_ifds_data).not_to be_nil
|
58
|
+
expect(sub_ifds_data).to have_key(offset_1)
|
59
|
+
expect(sub_ifds_data).to have_key(offset_2)
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'returns EXIFR IFD instances as subIFD data' do
|
63
|
+
# Verifying:
|
64
|
+
# {
|
65
|
+
# offset_1 => { new_subfile_type => 1, ...}
|
66
|
+
# offset_2 => { new_subfile_type => 0, ...}
|
67
|
+
# }
|
68
|
+
# we shouldn't verify everything, since we trust to EXIFR for that.
|
69
|
+
# making sure we are getting each subfile type should be good enough.
|
70
|
+
|
71
|
+
path = fixtures_dir + 'NEF/RAW_NIKON_D40_SRGB.NEF'
|
72
|
+
should_include_sub_ifds = true
|
73
|
+
|
74
|
+
exif_data = File.open(path, 'rb') do |f|
|
75
|
+
described_class.exif_from_tiff_io(f, should_include_sub_ifds)
|
76
|
+
end
|
77
|
+
|
78
|
+
offset_1, offset_2 = exif_data.sub_ifds.sort
|
79
|
+
first_sub_ifd = exif_data.sub_ifds_data&.[](offset_1)
|
80
|
+
second_sub_ifd = exif_data.sub_ifds_data&.[](offset_2)
|
81
|
+
|
82
|
+
expect(first_sub_ifd).to be_kind_of(EXIFR::TIFF::IFD)
|
83
|
+
expect(second_sub_ifd).to be_kind_of(EXIFR::TIFF::IFD)
|
84
|
+
expect(first_sub_ifd.new_subfile_type).to eq(1)
|
85
|
+
expect(second_sub_ifd.new_subfile_type).to eq(0)
|
13
86
|
end
|
14
87
|
end
|
15
88
|
end
|
@@ -29,8 +102,8 @@ describe FormatParser::EXIFParser do
|
|
29
102
|
end
|
30
103
|
|
31
104
|
it 'returns a Hash from #to_hash' do
|
32
|
-
first_fake_exif = double(orientation: 1, to_hash: {foo: 123, bar: 675})
|
33
|
-
second_fake_exif = double(orientation: 4, to_hash: {foo: 245})
|
105
|
+
first_fake_exif = double(orientation: 1, to_hash: { foo: 123, bar: 675 })
|
106
|
+
second_fake_exif = double(orientation: 4, to_hash: { foo: 245 })
|
34
107
|
|
35
108
|
stack = FormatParser::EXIFParser::EXIFStack.new([first_fake_exif, second_fake_exif])
|
36
109
|
stack_as_hash = stack.to_hash
|
@@ -42,14 +115,6 @@ describe FormatParser::EXIFParser do
|
|
42
115
|
end
|
43
116
|
end
|
44
117
|
|
45
|
-
it 'is able to deal with an orientation tag which a tuple value for orientation' do
|
46
|
-
path = fixtures_dir + '/EXIF/double_orientation.exif.bin'
|
47
|
-
exif_data = File.open(path, 'rb') do |f|
|
48
|
-
described_class.exif_from_tiff_io(f)
|
49
|
-
end
|
50
|
-
expect(exif_data.orientation).to eq(1)
|
51
|
-
end
|
52
|
-
|
53
118
|
describe 'IOExt' do
|
54
119
|
it 'supports readbyte' do
|
55
120
|
io = FormatParser::EXIFParser::IOExt.new(StringIO.new('hello'))
|
@@ -0,0 +1,131 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::NEFParser do
|
4
|
+
describe 'Sample files from rawsamples' do
|
5
|
+
Dir.glob(fixtures_dir + '/NEF/*.NEF').each do |file_path|
|
6
|
+
it "is able to parse #{File.basename(file_path)}" do
|
7
|
+
parsed = subject.call(File.open(file_path, 'rb'))
|
8
|
+
|
9
|
+
expect(parsed).not_to be_nil
|
10
|
+
expect(parsed.nature).to eq(:image)
|
11
|
+
expect(parsed.format).to eq(:nef)
|
12
|
+
|
13
|
+
expect(parsed.width_px).to be_kind_of(Integer)
|
14
|
+
expect(parsed.height_px).to be_kind_of(Integer)
|
15
|
+
|
16
|
+
expect(parsed.display_width_px).to be_kind_of(Integer)
|
17
|
+
expect(parsed.display_height_px).to be_kind_of(Integer)
|
18
|
+
|
19
|
+
expect(parsed.orientation).to be_kind_of(Symbol)
|
20
|
+
|
21
|
+
expect(parsed.intrinsics[:exif]).not_to be_nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe 'Image Dimensions' do
|
27
|
+
it 'parses dimensions properly for a given fixture' do
|
28
|
+
# The default parser from EXIFr returns the dimensions from the embedded
|
29
|
+
# thumbnails as being the image's actual dimensions.
|
30
|
+
# We make sure we properly deal with this.
|
31
|
+
|
32
|
+
parsed = subject.call(File.open("#{fixtures_dir}/NEF/RAW_NIKON_1S2.NEF", 'rb'))
|
33
|
+
|
34
|
+
# Raw Image dimensions
|
35
|
+
expect(parsed.width_px).to eq(4_608)
|
36
|
+
expect(parsed.height_px).to eq(3_080)
|
37
|
+
|
38
|
+
expect(parsed.orientation).to eq(:right_top)
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'correctly adjusts display dimensions for rotated images' do
|
42
|
+
# This image is rotated, meaning display_width and display_height
|
43
|
+
# should hold swapped values from width and height
|
44
|
+
parsed = subject.call(File.open("#{fixtures_dir}/NEF/RAW_NIKON_1S2.NEF", 'rb'))
|
45
|
+
|
46
|
+
# Raw Image dimensions
|
47
|
+
expect(parsed.width_px).to eq(4_608)
|
48
|
+
expect(parsed.height_px).to eq(3_080)
|
49
|
+
|
50
|
+
# Raw Dimensions considering orientation
|
51
|
+
expect(parsed.display_width_px).to eq(3_080)
|
52
|
+
expect(parsed.display_height_px).to eq(4_608)
|
53
|
+
|
54
|
+
expect(parsed.orientation).to eq(:right_top)
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'does not return dimensions from embedded previews' do
|
58
|
+
Dir.glob(fixtures_dir + '/NEF/*.nef').each do |file_path|
|
59
|
+
# By default, NEF files include 160x120 sub_ifds.
|
60
|
+
# This dimensions cannot be considered by the parser.
|
61
|
+
|
62
|
+
parsed = subject.call(File.open(file_path, 'rb'))
|
63
|
+
|
64
|
+
expect(parsed.width_px).not_to eq(160)
|
65
|
+
expect(parsed.height_px).not_to eq(120)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'properly extracts dimensions when there are more than 2 subIFDs in the image' do
|
70
|
+
# this file has 3 subIFDs, and the RAW image information is actually in the one in the middle.
|
71
|
+
nef_path = "#{fixtures_dir}/NEF/RAW_NIKON_D800_14bit_FX_UNCOMPRESSED.NEF"
|
72
|
+
|
73
|
+
parsed = subject.call(File.open(nef_path, 'rb'))
|
74
|
+
|
75
|
+
expect(parsed).not_to be_nil
|
76
|
+
expect(parsed.width_px).to eq(7424)
|
77
|
+
expect(parsed.height_px).to eq(4924)
|
78
|
+
expect(parsed.orientation).to eq(:top_left)
|
79
|
+
end
|
80
|
+
|
81
|
+
describe 'correctly extracts dimensions from various NEF flavors of the same file' do
|
82
|
+
Dir.glob(fixtures_dir + '/NEF/RAW_NIKON_D800*.NEF').each do |file_path|
|
83
|
+
it "is able to parse #{File.basename(file_path)}" do
|
84
|
+
parsed = subject.call(File.open(file_path, 'rb'))
|
85
|
+
|
86
|
+
expect(parsed).not_to be_nil
|
87
|
+
expect(parsed.width_px).to eq(7424)
|
88
|
+
expect(parsed.height_px).to eq(4924)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe 'False-positive avoidance' do
|
95
|
+
it 'should return nil for regular TIFF images' do
|
96
|
+
parsed = subject.call(File.open("#{fixtures_dir}/TIFF/Shinbutsureijoushuincho.tiff", 'rb'))
|
97
|
+
expect(parsed).to be_nil
|
98
|
+
end
|
99
|
+
|
100
|
+
it 'should return nil for regular CR2 images' do
|
101
|
+
parsed = subject.call(File.open("#{fixtures_dir}/CR2/RAW_CANON_40D_SRAW_V103.CR2", 'rb'))
|
102
|
+
expect(parsed).to be_nil
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'should return nil for regular ERF images' do
|
106
|
+
# ERF files are also TIFFs with subIFDs but they don't have a matching "maker" tag
|
107
|
+
parsed = subject.call(File.open("#{fixtures_dir}/ERF/RAW_EPSON_RD1.ERF", 'rb'))
|
108
|
+
expect(parsed).to be_nil
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
describe 'Parser Performance' do
|
113
|
+
it 'extracts dimensions from a very large NEF economically' do
|
114
|
+
# this file has 77.3mb
|
115
|
+
file_path = "#{fixtures_dir}/NEF/RAW_NIKON_D800_14bit_FX_UNCOMPRESSED.NEF"
|
116
|
+
|
117
|
+
io = File.open(file_path, 'rb')
|
118
|
+
io_with_stats = FormatParser::ReadLimiter.new(io)
|
119
|
+
|
120
|
+
parsed = subject.call(io_with_stats)
|
121
|
+
|
122
|
+
expect(parsed).not_to be_nil
|
123
|
+
expect(parsed.width_px).to eq(7424)
|
124
|
+
expect(parsed.height_px).to eq(4924)
|
125
|
+
|
126
|
+
expect(io_with_stats.reads).to be_within(4).of(12)
|
127
|
+
expect(io_with_stats.seeks).to be_within(4).of(12)
|
128
|
+
expect(io_with_stats.bytes).to be_within(1024).of(59000)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
8
8
|
- Julik Tarkhanov
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-
|
12
|
+
date: 2022-09-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -229,6 +229,8 @@ files:
|
|
229
229
|
- lib/image.rb
|
230
230
|
- lib/io_constraint.rb
|
231
231
|
- lib/io_utils.rb
|
232
|
+
- lib/parsers/aac_parser.rb
|
233
|
+
- lib/parsers/aac_parser/adts_header_info.rb
|
232
234
|
- lib/parsers/aiff_parser.rb
|
233
235
|
- lib/parsers/bmp_parser.rb
|
234
236
|
- lib/parsers/cr2_parser.rb
|
@@ -246,6 +248,7 @@ files:
|
|
246
248
|
- lib/parsers/mp3_parser.rb
|
247
249
|
- lib/parsers/mp3_parser/id3_extraction.rb
|
248
250
|
- lib/parsers/mpeg_parser.rb
|
251
|
+
- lib/parsers/nef_parser.rb
|
249
252
|
- lib/parsers/ogg_parser.rb
|
250
253
|
- lib/parsers/pdf_parser.rb
|
251
254
|
- lib/parsers/png_parser.rb
|
@@ -272,6 +275,8 @@ files:
|
|
272
275
|
- spec/hash_utils_spec.rb
|
273
276
|
- spec/integration/active_storage/rails_app.rb
|
274
277
|
- spec/io_utils_spec.rb
|
278
|
+
- spec/parsers/aac_parser_spec.rb
|
279
|
+
- spec/parsers/adts_header_info_spec.rb
|
275
280
|
- spec/parsers/aiff_parser_spec.rb
|
276
281
|
- spec/parsers/bmp_parser_spec.rb
|
277
282
|
- spec/parsers/cr2_parser_spec.rb
|
@@ -286,6 +291,7 @@ files:
|
|
286
291
|
- spec/parsers/moov_parser_spec.rb
|
287
292
|
- spec/parsers/mp3_parser_spec.rb
|
288
293
|
- spec/parsers/mpeg_parser_spec.rb
|
294
|
+
- spec/parsers/nef_parser_spec.rb
|
289
295
|
- spec/parsers/ogg_parser_spec.rb
|
290
296
|
- spec/parsers/pdf_parser_spec.rb
|
291
297
|
- spec/parsers/png_parser_spec.rb
|
@@ -304,7 +310,7 @@ licenses:
|
|
304
310
|
- MIT (Hippocratic)
|
305
311
|
metadata:
|
306
312
|
allowed_push_host: https://rubygems.org
|
307
|
-
post_install_message:
|
313
|
+
post_install_message:
|
308
314
|
rdoc_options: []
|
309
315
|
require_paths:
|
310
316
|
- lib
|
@@ -319,8 +325,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
319
325
|
- !ruby/object:Gem::Version
|
320
326
|
version: '0'
|
321
327
|
requirements: []
|
322
|
-
rubygems_version: 3.
|
323
|
-
signing_key:
|
328
|
+
rubygems_version: 3.1.6
|
329
|
+
signing_key:
|
324
330
|
specification_version: 4
|
325
331
|
summary: A library for efficient parsing of file metadata
|
326
332
|
test_files: []
|