format_parser 1.4.2 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a9a94233949cc72d18b433cf1ddcba0e479e8b93aa1ff2e48bda6f6d86f667b
4
- data.tar.gz: 4e357bc46207e95cad52d21b2aaa1781e9c231bab02e235be29663db9722f5d9
3
+ metadata.gz: f3f9d1c51523af6efea2ed6f8cf63f0f573e2b0b140c1af0435e6bb26961953c
4
+ data.tar.gz: 84b73895b8924a0cffa286a4adaa3c270951e4074ec09db718f1a8d482b8e14b
5
5
  SHA512:
6
- metadata.gz: dcf8c8aeefc6166f3645dae461aadbcc2b36e96cb7a75162586fc009d562f6f978767ff877b27d0c192b5ca3107011a1bfdda842e730e486ced02e4191b53f59
7
- data.tar.gz: fbc2caafb269f5e9c249e6ffe62ea8141477589256fab1bea5d058877d571725b1e619619ef9fcd33005d42759697ae7c7575d7a79217bdedc7e96ab02ce3c1b
6
+ metadata.gz: cfa0a69fd35d8d3c05fff79cca1b550af8b8c6876fece5443fc41f7967fed15fe20d494ce5678e16cb2a1cdd1b0918b8ce61ef4b731f3e47b18c736ca21cdf0c
7
+ data.tar.gz: 0f7747775606681981367432322bb7b63e2e3c862fcd506d0b843bfc2280ae3d8dcdf334f49a9150f27a181329cf993f9cf79036d8124e75d41ca01f26edebd6
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 1.5.0
2
+ * Add support for `NEF` files.
3
+
1
4
  ## 1.4.2
2
5
  * Fix `MP3Parser` taking precedence when parsing `WEBP` files.
3
6
 
data/README.md CHANGED
@@ -14,6 +14,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
14
14
 
15
15
  * TIFF
16
16
  * CR2
17
+ * NEF
17
18
  * PSD
18
19
  * PNG
19
20
  * MP3
@@ -161,6 +162,12 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
161
162
  ### CR2
162
163
  - CR2 examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
163
164
 
165
+ ### NEF
166
+ - NEF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
167
+
168
+ ### ERF
169
+ - ERF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
170
+
164
171
  ### FLAC
165
172
  - atc_fixture_vbr.flac is a converted version of the MP3 with the same name
166
173
  - c_11k16btipcm.flac is a converted version of the WAV with the same name
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '1.4.2'
2
+ VERSION = '1.5.0'
3
3
  end
@@ -41,6 +41,13 @@ module FormatParser::EXIFParser
41
41
  end
42
42
 
43
43
  class EXIFResult < SimpleDelegator
44
+ attr_reader :sub_ifds_data
45
+
46
+ def initialize(exif_raw_data, sub_ifds_data = {})
47
+ super(exif_raw_data)
48
+ @sub_ifds_data = sub_ifds_data
49
+ end
50
+
44
51
  def rotated?
45
52
  orientation.to_i > 4
46
53
  end
@@ -167,10 +174,38 @@ module FormatParser::EXIFParser
167
174
  # Squash exifr's invalid date warning since we do not use that data.
168
175
  EXIFR.logger = Logger.new(nil)
169
176
 
170
- def exif_from_tiff_io(constrained_io)
177
+ def exif_from_tiff_io(constrained_io, should_include_sub_ifds = false)
171
178
  Measurometer.instrument('format_parser.EXIFParser.exif_from_tiff_io') do
172
- raw_exif_data = EXIFR::TIFF.new(IOExt.new(constrained_io))
173
- raw_exif_data ? EXIFResult.new(raw_exif_data) : nil
179
+ extended_io = IOExt.new(constrained_io)
180
+ exif_raw_data = EXIFR::TIFF.new(extended_io)
181
+
182
+ return unless exif_raw_data
183
+
184
+ sub_ifds_data = {}
185
+ if should_include_sub_ifds
186
+ sub_ifds_offsets = exif_raw_data.flat_map(&:sub_ifds).compact
187
+ sub_ifds_data = load_sub_ifds(extended_io, sub_ifds_offsets)
188
+ end
189
+
190
+ EXIFResult.new(exif_raw_data, sub_ifds_data)
191
+ end
192
+ end
193
+
194
+ private
195
+
196
+ # Reads exif data from subIFDs. This is important for NEF files.
197
+ def load_sub_ifds(extended_io, sub_ifds_offsets)
198
+ # Returning an hash of subIFDs using offsets as keys
199
+ # {
200
+ # 123 => { subIFD data...}
201
+ # 456 => { another subIFD data...}
202
+ # }
203
+ return {} if sub_ifds_offsets.empty?
204
+
205
+ EXIFR::TIFF::Data.open(extended_io) do |data|
206
+ sub_ifds_offsets.map do |sub_ifd_offset|
207
+ [sub_ifd_offset, EXIFR::TIFF::IFD.new(data, sub_ifd_offset)]
208
+ end.to_h
174
209
  end
175
210
  end
176
211
 
@@ -0,0 +1,69 @@
1
+ class FormatParser::NEFParser
2
+ include FormatParser::IOUtils
3
+ include FormatParser::EXIFParser
4
+
5
+ MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
6
+ MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
7
+ HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
8
+ NEF_MIME_TYPE = 'image/x-nikon-nef'
9
+
10
+ SUBFILE_TYPE_FULL_RESOLUTION = 0
11
+ SUBFILE_TYPE_REDUCED_RESOLUTION = 1
12
+
13
+ SHOULD_PARSE_SUB_IFDS = true
14
+
15
+ def likely_match?(filename)
16
+ filename =~ /\.nef$/i
17
+ end
18
+
19
+ def call(io)
20
+ io = FormatParser::IOConstraint.new(io)
21
+
22
+ return unless HEADER_BYTES.include?(safe_read(io, 4))
23
+
24
+ # Because of how NEF files organize their IFDs and subIFDs, we need to dive into the subIFDs
25
+ # to get the actual image dimensions instead of the preview's
26
+ exif_data = exif_from_tiff_io(io, SHOULD_PARSE_SUB_IFDS)
27
+
28
+ return unless valid?(exif_data)
29
+
30
+ full_resolution_data = get_full_resolution_ifd(exif_data)
31
+
32
+ w = full_resolution_data.image_width || exif_data.width || exif_data.pixel_x_dimension
33
+ h = full_resolution_data.image_length || exif_data.height || exif_data.pixel_y_dimension
34
+
35
+ FormatParser::Image.new(
36
+ format: :nef,
37
+ width_px: w,
38
+ height_px: h,
39
+ display_width_px: exif_data.rotated? ? h : w,
40
+ display_height_px: exif_data.rotated? ? w : h,
41
+ orientation: exif_data.orientation_sym,
42
+ intrinsics: { exif: exif_data },
43
+ content_type: NEF_MIME_TYPE,
44
+ )
45
+ rescue EXIFR::MalformedTIFF
46
+ nil
47
+ end
48
+
49
+ def valid?(exif_data)
50
+ # NEF files should hold subIFDs and have "NIKON" or "NIKON CORPORATION" as maker
51
+ has_sub_ifds_data = !exif_data&.sub_ifds_data.keys.empty?
52
+ has_sub_ifds_data && exif_data.make&.start_with?('NIKON')
53
+ end
54
+
55
+ # Investigates data from all subIFDs and find the one holding the full-resolution image
56
+ def get_full_resolution_ifd(exif_data)
57
+ # Most of the time, NEF files have 2 subIFDs:
58
+ # First one: Thumbnail (Reduced resolution)
59
+ # Second one: Full resolution
60
+ # While this is true in most situations, there are exceptions,
61
+ # so we can't rely in this order alone.
62
+
63
+ exif_data.sub_ifds_data.each do |_ifd_offset, ifd_data|
64
+ return ifd_data if ifd_data.new_subfile_type == SUBFILE_TYPE_FULL_RESOLUTION
65
+ end
66
+ end
67
+
68
+ FormatParser.register_parser new, natures: :image, formats: :nef, priority: 4
69
+ end
@@ -1,15 +1,88 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe FormatParser::EXIFParser do
4
- describe 'is able to correctly parse orientation for all the TIFF EXIF examples from FastImage' do
5
- Dir.glob(fixtures_dir + '/exif-orientation-testimages/tiff-*/*.tif').each do |tiff_path|
6
- filename = File.basename(tiff_path)
7
- it "is able to parse #{filename}" do
8
- result = described_class.exif_from_tiff_io(File.open(tiff_path, 'rb'))
9
- expect(result).not_to be_nil
10
- expect(result.orientation_sym).to be_kind_of(Symbol)
11
- # Filenames in this dir correspond with the orientation of the file
12
- expect(filename).to include(result.orientation_sym.to_s)
4
+ describe 'EXIFParser#exif_from_tiff_io' do
5
+ describe 'Orientation' do
6
+ describe 'is able to correctly parse orientation for all the TIFF EXIF examples from FastImage' do
7
+ Dir.glob(fixtures_dir + '/exif-orientation-testimages/tiff-*/*.tif').each do |tiff_path|
8
+ filename = File.basename(tiff_path)
9
+ it "is able to parse #{filename}" do
10
+ result = described_class.exif_from_tiff_io(File.open(tiff_path, 'rb'))
11
+ expect(result).not_to be_nil
12
+ expect(result.orientation_sym).to be_kind_of(Symbol)
13
+ # Filenames in this dir correspond with the orientation of the file
14
+ expect(filename).to include(result.orientation_sym.to_s)
15
+ end
16
+ end
17
+ end
18
+
19
+ it 'is able to deal with an orientation tag with a tuple value for orientation' do
20
+ path = fixtures_dir + '/EXIF/double_orientation.exif.bin'
21
+ exif_data = File.open(path, 'rb') do |f|
22
+ described_class.exif_from_tiff_io(f)
23
+ end
24
+ expect(exif_data.orientation).to eq(1)
25
+ end
26
+ end
27
+
28
+ describe 'SubIFDs' do
29
+ it 'should not retrieve subIFDs data by default' do
30
+ path = fixtures_dir + 'NEF/RAW_NIKON_D40_SRGB.NEF'
31
+
32
+ exif_data = File.open(path, 'rb') do |f|
33
+ described_class.exif_from_tiff_io(f)
34
+ end
35
+
36
+ expect(exif_data.sub_ifds_data).not_to be_nil
37
+ expect(exif_data.sub_ifds_data).to eq({})
38
+ end
39
+
40
+ it 'is able retrieve data from all subIFDs optionally' do
41
+ # Verifying:
42
+ # {
43
+ # offset_1 => { subIFD_1 data...}
44
+ # offset_2 => { subIFD_2 data...}
45
+ # }
46
+
47
+ path = fixtures_dir + 'NEF/RAW_NIKON_D40_SRGB.NEF'
48
+ should_include_sub_ifds = true
49
+
50
+ exif_data = File.open(path, 'rb') do |f|
51
+ described_class.exif_from_tiff_io(f, should_include_sub_ifds)
52
+ end
53
+
54
+ offset_1, offset_2 = exif_data.sub_ifds
55
+ sub_ifds_data = exif_data.sub_ifds_data
56
+
57
+ expect(sub_ifds_data).not_to be_nil
58
+ expect(sub_ifds_data).to have_key(offset_1)
59
+ expect(sub_ifds_data).to have_key(offset_2)
60
+ end
61
+
62
+ it 'returns EXIFR IFD instances as subIFD data' do
63
+ # Verifying:
64
+ # {
65
+ # offset_1 => { new_subfile_type => 1, ...}
66
+ # offset_2 => { new_subfile_type => 0, ...}
67
+ # }
68
+ # we shouldn't verify everything, since we trust to EXIFR for that.
69
+ # making sure we are getting each subfile type should be good enough.
70
+
71
+ path = fixtures_dir + 'NEF/RAW_NIKON_D40_SRGB.NEF'
72
+ should_include_sub_ifds = true
73
+
74
+ exif_data = File.open(path, 'rb') do |f|
75
+ described_class.exif_from_tiff_io(f, should_include_sub_ifds)
76
+ end
77
+
78
+ offset_1, offset_2 = exif_data.sub_ifds.sort
79
+ first_sub_ifd = exif_data.sub_ifds_data&.[](offset_1)
80
+ second_sub_ifd = exif_data.sub_ifds_data&.[](offset_2)
81
+
82
+ expect(first_sub_ifd).to be_kind_of(EXIFR::TIFF::IFD)
83
+ expect(second_sub_ifd).to be_kind_of(EXIFR::TIFF::IFD)
84
+ expect(first_sub_ifd.new_subfile_type).to eq(1)
85
+ expect(second_sub_ifd.new_subfile_type).to eq(0)
13
86
  end
14
87
  end
15
88
  end
@@ -29,8 +102,8 @@ describe FormatParser::EXIFParser do
29
102
  end
30
103
 
31
104
  it 'returns a Hash from #to_hash' do
32
- first_fake_exif = double(orientation: 1, to_hash: {foo: 123, bar: 675})
33
- second_fake_exif = double(orientation: 4, to_hash: {foo: 245})
105
+ first_fake_exif = double(orientation: 1, to_hash: { foo: 123, bar: 675 })
106
+ second_fake_exif = double(orientation: 4, to_hash: { foo: 245 })
34
107
 
35
108
  stack = FormatParser::EXIFParser::EXIFStack.new([first_fake_exif, second_fake_exif])
36
109
  stack_as_hash = stack.to_hash
@@ -42,14 +115,6 @@ describe FormatParser::EXIFParser do
42
115
  end
43
116
  end
44
117
 
45
- it 'is able to deal with an orientation tag which a tuple value for orientation' do
46
- path = fixtures_dir + '/EXIF/double_orientation.exif.bin'
47
- exif_data = File.open(path, 'rb') do |f|
48
- described_class.exif_from_tiff_io(f)
49
- end
50
- expect(exif_data.orientation).to eq(1)
51
- end
52
-
53
118
  describe 'IOExt' do
54
119
  it 'supports readbyte' do
55
120
  io = FormatParser::EXIFParser::IOExt.new(StringIO.new('hello'))
@@ -0,0 +1,131 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::NEFParser do
4
+ describe 'Sample files from rawsamples' do
5
+ Dir.glob(fixtures_dir + '/NEF/*.NEF').each do |file_path|
6
+ it "is able to parse #{File.basename(file_path)}" do
7
+ parsed = subject.call(File.open(file_path, 'rb'))
8
+
9
+ expect(parsed).not_to be_nil
10
+ expect(parsed.nature).to eq(:image)
11
+ expect(parsed.format).to eq(:nef)
12
+
13
+ expect(parsed.width_px).to be_kind_of(Integer)
14
+ expect(parsed.height_px).to be_kind_of(Integer)
15
+
16
+ expect(parsed.display_width_px).to be_kind_of(Integer)
17
+ expect(parsed.display_height_px).to be_kind_of(Integer)
18
+
19
+ expect(parsed.orientation).to be_kind_of(Symbol)
20
+
21
+ expect(parsed.intrinsics[:exif]).not_to be_nil
22
+ end
23
+ end
24
+ end
25
+
26
+ describe 'Image Dimensions' do
27
+ it 'parses dimensions properly for a given fixture' do
28
+ # The default parser from EXIFr returns the dimensions from the embedded
29
+ # thumbnails as being the image's actual dimensions.
30
+ # We make sure we properly deal with this.
31
+
32
+ parsed = subject.call(File.open("#{fixtures_dir}/NEF/RAW_NIKON_1S2.NEF", 'rb'))
33
+
34
+ # Raw Image dimensions
35
+ expect(parsed.width_px).to eq(4_608)
36
+ expect(parsed.height_px).to eq(3_080)
37
+
38
+ expect(parsed.orientation).to eq(:right_top)
39
+ end
40
+
41
+ it 'correctly adjusts display dimensions for rotated images' do
42
+ # This image is rotated, meaning display_width and display_height
43
+ # should hold swapped values from width and height
44
+ parsed = subject.call(File.open("#{fixtures_dir}/NEF/RAW_NIKON_1S2.NEF", 'rb'))
45
+
46
+ # Raw Image dimensions
47
+ expect(parsed.width_px).to eq(4_608)
48
+ expect(parsed.height_px).to eq(3_080)
49
+
50
+ # Raw Dimensions considering orientation
51
+ expect(parsed.display_width_px).to eq(3_080)
52
+ expect(parsed.display_height_px).to eq(4_608)
53
+
54
+ expect(parsed.orientation).to eq(:right_top)
55
+ end
56
+
57
+ it 'does not return dimensions from embedded previews' do
58
+ Dir.glob(fixtures_dir + '/NEF/*.nef').each do |file_path|
59
+ # By default, NEF files include 160x120 sub_ifds.
60
+ # This dimensions cannot be considered by the parser.
61
+
62
+ parsed = subject.call(File.open(file_path, 'rb'))
63
+
64
+ expect(parsed.width_px).not_to eq(160)
65
+ expect(parsed.height_px).not_to eq(120)
66
+ end
67
+ end
68
+
69
+ it 'properly extracts dimensions when there are more than 2 subIFDs in the image' do
70
+ # this file has 3 subIFDs, and the RAW image information is actually in the one in the middle.
71
+ nef_path = "#{fixtures_dir}/NEF/RAW_NIKON_D800_14bit_FX_UNCOMPRESSED.NEF"
72
+
73
+ parsed = subject.call(File.open(nef_path, 'rb'))
74
+
75
+ expect(parsed).not_to be_nil
76
+ expect(parsed.width_px).to eq(7424)
77
+ expect(parsed.height_px).to eq(4924)
78
+ expect(parsed.orientation).to eq(:top_left)
79
+ end
80
+
81
+ describe 'correctly extracts dimensions from various NEF flavors of the same file' do
82
+ Dir.glob(fixtures_dir + '/NEF/RAW_NIKON_D800*.NEF').each do |file_path|
83
+ it "is able to parse #{File.basename(file_path)}" do
84
+ parsed = subject.call(File.open(file_path, 'rb'))
85
+
86
+ expect(parsed).not_to be_nil
87
+ expect(parsed.width_px).to eq(7424)
88
+ expect(parsed.height_px).to eq(4924)
89
+ end
90
+ end
91
+ end
92
+ end
93
+
94
+ describe 'False-positive avoidance' do
95
+ it 'should return nil for regular TIFF images' do
96
+ parsed = subject.call(File.open("#{fixtures_dir}/TIFF/Shinbutsureijoushuincho.tiff", 'rb'))
97
+ expect(parsed).to be_nil
98
+ end
99
+
100
+ it 'should return nil for regular CR2 images' do
101
+ parsed = subject.call(File.open("#{fixtures_dir}/CR2/RAW_CANON_40D_SRAW_V103.CR2", 'rb'))
102
+ expect(parsed).to be_nil
103
+ end
104
+
105
+ it 'should return nil for regular ERF images' do
106
+ # ERF files are also TIFFs with subIFDs but they don't have a matching "maker" tag
107
+ parsed = subject.call(File.open("#{fixtures_dir}/ERF/RAW_EPSON_RD1.ERF", 'rb'))
108
+ expect(parsed).to be_nil
109
+ end
110
+ end
111
+
112
+ describe 'Parser Performance' do
113
+ it 'extracts dimensions from a very large NEF economically' do
114
+ # this file has 77.3mb
115
+ file_path = "#{fixtures_dir}/NEF/RAW_NIKON_D800_14bit_FX_UNCOMPRESSED.NEF"
116
+
117
+ io = File.open(file_path, 'rb')
118
+ io_with_stats = FormatParser::ReadLimiter.new(io)
119
+
120
+ parsed = subject.call(io_with_stats)
121
+
122
+ expect(parsed).not_to be_nil
123
+ expect(parsed.width_px).to eq(7424)
124
+ expect(parsed.height_px).to eq(4924)
125
+
126
+ expect(io_with_stats.reads).to be_within(4).of(12)
127
+ expect(io_with_stats.seeks).to be_within(4).of(12)
128
+ expect(io_with_stats.bytes).to be_within(1024).of(59000)
129
+ end
130
+ end
131
+ end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.2
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
8
8
  - Julik Tarkhanov
9
- autorequire:
9
+ autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2022-07-13 00:00:00.000000000 Z
12
+ date: 2022-08-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -246,6 +246,7 @@ files:
246
246
  - lib/parsers/mp3_parser.rb
247
247
  - lib/parsers/mp3_parser/id3_extraction.rb
248
248
  - lib/parsers/mpeg_parser.rb
249
+ - lib/parsers/nef_parser.rb
249
250
  - lib/parsers/ogg_parser.rb
250
251
  - lib/parsers/pdf_parser.rb
251
252
  - lib/parsers/png_parser.rb
@@ -286,6 +287,7 @@ files:
286
287
  - spec/parsers/moov_parser_spec.rb
287
288
  - spec/parsers/mp3_parser_spec.rb
288
289
  - spec/parsers/mpeg_parser_spec.rb
290
+ - spec/parsers/nef_parser_spec.rb
289
291
  - spec/parsers/ogg_parser_spec.rb
290
292
  - spec/parsers/pdf_parser_spec.rb
291
293
  - spec/parsers/png_parser_spec.rb
@@ -304,7 +306,7 @@ licenses:
304
306
  - MIT (Hippocratic)
305
307
  metadata:
306
308
  allowed_push_host: https://rubygems.org
307
- post_install_message:
309
+ post_install_message:
308
310
  rdoc_options: []
309
311
  require_paths:
310
312
  - lib
@@ -319,8 +321,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
319
321
  - !ruby/object:Gem::Version
320
322
  version: '0'
321
323
  requirements: []
322
- rubygems_version: 3.3.7
323
- signing_key:
324
+ rubygems_version: 3.1.6
325
+ signing_key:
324
326
  specification_version: 4
325
327
  summary: A library for efficient parsing of file metadata
326
328
  test_files: []