format_parser 1.4.2 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a9a94233949cc72d18b433cf1ddcba0e479e8b93aa1ff2e48bda6f6d86f667b
4
- data.tar.gz: 4e357bc46207e95cad52d21b2aaa1781e9c231bab02e235be29663db9722f5d9
3
+ metadata.gz: f3f9d1c51523af6efea2ed6f8cf63f0f573e2b0b140c1af0435e6bb26961953c
4
+ data.tar.gz: 84b73895b8924a0cffa286a4adaa3c270951e4074ec09db718f1a8d482b8e14b
5
5
  SHA512:
6
- metadata.gz: dcf8c8aeefc6166f3645dae461aadbcc2b36e96cb7a75162586fc009d562f6f978767ff877b27d0c192b5ca3107011a1bfdda842e730e486ced02e4191b53f59
7
- data.tar.gz: fbc2caafb269f5e9c249e6ffe62ea8141477589256fab1bea5d058877d571725b1e619619ef9fcd33005d42759697ae7c7575d7a79217bdedc7e96ab02ce3c1b
6
+ metadata.gz: cfa0a69fd35d8d3c05fff79cca1b550af8b8c6876fece5443fc41f7967fed15fe20d494ce5678e16cb2a1cdd1b0918b8ce61ef4b731f3e47b18c736ca21cdf0c
7
+ data.tar.gz: 0f7747775606681981367432322bb7b63e2e3c862fcd506d0b843bfc2280ae3d8dcdf334f49a9150f27a181329cf993f9cf79036d8124e75d41ca01f26edebd6
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 1.5.0
2
+ * Add support for `NEF` files.
3
+
1
4
  ## 1.4.2
2
5
  * Fix `MP3Parser` taking precedence when parsing `WEBP` files.
3
6
 
data/README.md CHANGED
@@ -14,6 +14,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
14
14
 
15
15
  * TIFF
16
16
  * CR2
17
+ * NEF
17
18
  * PSD
18
19
  * PNG
19
20
  * MP3
@@ -161,6 +162,12 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
161
162
  ### CR2
162
163
  - CR2 examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
163
164
 
165
+ ### NEF
166
+ - NEF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
167
+
168
+ ### ERF
169
+ - ERF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
170
+
164
171
  ### FLAC
165
172
  - atc_fixture_vbr.flac is a converted version of the MP3 with the same name
166
173
  - c_11k16btipcm.flac is a converted version of the WAV with the same name
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '1.4.2'
2
+ VERSION = '1.5.0'
3
3
  end
@@ -41,6 +41,13 @@ module FormatParser::EXIFParser
41
41
  end
42
42
 
43
43
  class EXIFResult < SimpleDelegator
44
+ attr_reader :sub_ifds_data
45
+
46
+ def initialize(exif_raw_data, sub_ifds_data = {})
47
+ super(exif_raw_data)
48
+ @sub_ifds_data = sub_ifds_data
49
+ end
50
+
44
51
  def rotated?
45
52
  orientation.to_i > 4
46
53
  end
@@ -167,10 +174,38 @@ module FormatParser::EXIFParser
167
174
  # Squash exifr's invalid date warning since we do not use that data.
168
175
  EXIFR.logger = Logger.new(nil)
169
176
 
170
- def exif_from_tiff_io(constrained_io)
177
+ def exif_from_tiff_io(constrained_io, should_include_sub_ifds = false)
171
178
  Measurometer.instrument('format_parser.EXIFParser.exif_from_tiff_io') do
172
- raw_exif_data = EXIFR::TIFF.new(IOExt.new(constrained_io))
173
- raw_exif_data ? EXIFResult.new(raw_exif_data) : nil
179
+ extended_io = IOExt.new(constrained_io)
180
+ exif_raw_data = EXIFR::TIFF.new(extended_io)
181
+
182
+ return unless exif_raw_data
183
+
184
+ sub_ifds_data = {}
185
+ if should_include_sub_ifds
186
+ sub_ifds_offsets = exif_raw_data.flat_map(&:sub_ifds).compact
187
+ sub_ifds_data = load_sub_ifds(extended_io, sub_ifds_offsets)
188
+ end
189
+
190
+ EXIFResult.new(exif_raw_data, sub_ifds_data)
191
+ end
192
+ end
193
+
194
+ private
195
+
196
+ # Reads exif data from subIFDs. This is important for NEF files.
197
+ def load_sub_ifds(extended_io, sub_ifds_offsets)
198
+ # Returning an hash of subIFDs using offsets as keys
199
+ # {
200
+ # 123 => { subIFD data...}
201
+ # 456 => { another subIFD data...}
202
+ # }
203
+ return {} if sub_ifds_offsets.empty?
204
+
205
+ EXIFR::TIFF::Data.open(extended_io) do |data|
206
+ sub_ifds_offsets.map do |sub_ifd_offset|
207
+ [sub_ifd_offset, EXIFR::TIFF::IFD.new(data, sub_ifd_offset)]
208
+ end.to_h
174
209
  end
175
210
  end
176
211
 
@@ -0,0 +1,69 @@
1
+ class FormatParser::NEFParser
2
+ include FormatParser::IOUtils
3
+ include FormatParser::EXIFParser
4
+
5
+ MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
6
+ MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
7
+ HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
8
+ NEF_MIME_TYPE = 'image/x-nikon-nef'
9
+
10
+ SUBFILE_TYPE_FULL_RESOLUTION = 0
11
+ SUBFILE_TYPE_REDUCED_RESOLUTION = 1
12
+
13
+ SHOULD_PARSE_SUB_IFDS = true
14
+
15
+ def likely_match?(filename)
16
+ filename =~ /\.nef$/i
17
+ end
18
+
19
+ def call(io)
20
+ io = FormatParser::IOConstraint.new(io)
21
+
22
+ return unless HEADER_BYTES.include?(safe_read(io, 4))
23
+
24
+ # Because of how NEF files organize their IFDs and subIFDs, we need to dive into the subIFDs
25
+ # to get the actual image dimensions instead of the preview's
26
+ exif_data = exif_from_tiff_io(io, SHOULD_PARSE_SUB_IFDS)
27
+
28
+ return unless valid?(exif_data)
29
+
30
+ full_resolution_data = get_full_resolution_ifd(exif_data)
31
+
32
+ w = full_resolution_data.image_width || exif_data.width || exif_data.pixel_x_dimension
33
+ h = full_resolution_data.image_length || exif_data.height || exif_data.pixel_y_dimension
34
+
35
+ FormatParser::Image.new(
36
+ format: :nef,
37
+ width_px: w,
38
+ height_px: h,
39
+ display_width_px: exif_data.rotated? ? h : w,
40
+ display_height_px: exif_data.rotated? ? w : h,
41
+ orientation: exif_data.orientation_sym,
42
+ intrinsics: { exif: exif_data },
43
+ content_type: NEF_MIME_TYPE,
44
+ )
45
+ rescue EXIFR::MalformedTIFF
46
+ nil
47
+ end
48
+
49
+ def valid?(exif_data)
50
+ # NEF files should hold subIFDs and have "NIKON" or "NIKON CORPORATION" as maker
51
+ has_sub_ifds_data = !exif_data&.sub_ifds_data.keys.empty?
52
+ has_sub_ifds_data && exif_data.make&.start_with?('NIKON')
53
+ end
54
+
55
+ # Investigates data from all subIFDs and find the one holding the full-resolution image
56
+ def get_full_resolution_ifd(exif_data)
57
+ # Most of the time, NEF files have 2 subIFDs:
58
+ # First one: Thumbnail (Reduced resolution)
59
+ # Second one: Full resolution
60
+ # While this is true in most situations, there are exceptions,
61
+ # so we can't rely in this order alone.
62
+
63
+ exif_data.sub_ifds_data.each do |_ifd_offset, ifd_data|
64
+ return ifd_data if ifd_data.new_subfile_type == SUBFILE_TYPE_FULL_RESOLUTION
65
+ end
66
+ end
67
+
68
+ FormatParser.register_parser new, natures: :image, formats: :nef, priority: 4
69
+ end
@@ -1,15 +1,88 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe FormatParser::EXIFParser do
4
- describe 'is able to correctly parse orientation for all the TIFF EXIF examples from FastImage' do
5
- Dir.glob(fixtures_dir + '/exif-orientation-testimages/tiff-*/*.tif').each do |tiff_path|
6
- filename = File.basename(tiff_path)
7
- it "is able to parse #{filename}" do
8
- result = described_class.exif_from_tiff_io(File.open(tiff_path, 'rb'))
9
- expect(result).not_to be_nil
10
- expect(result.orientation_sym).to be_kind_of(Symbol)
11
- # Filenames in this dir correspond with the orientation of the file
12
- expect(filename).to include(result.orientation_sym.to_s)
4
+ describe 'EXIFParser#exif_from_tiff_io' do
5
+ describe 'Orientation' do
6
+ describe 'is able to correctly parse orientation for all the TIFF EXIF examples from FastImage' do
7
+ Dir.glob(fixtures_dir + '/exif-orientation-testimages/tiff-*/*.tif').each do |tiff_path|
8
+ filename = File.basename(tiff_path)
9
+ it "is able to parse #{filename}" do
10
+ result = described_class.exif_from_tiff_io(File.open(tiff_path, 'rb'))
11
+ expect(result).not_to be_nil
12
+ expect(result.orientation_sym).to be_kind_of(Symbol)
13
+ # Filenames in this dir correspond with the orientation of the file
14
+ expect(filename).to include(result.orientation_sym.to_s)
15
+ end
16
+ end
17
+ end
18
+
19
+ it 'is able to deal with an orientation tag with a tuple value for orientation' do
20
+ path = fixtures_dir + '/EXIF/double_orientation.exif.bin'
21
+ exif_data = File.open(path, 'rb') do |f|
22
+ described_class.exif_from_tiff_io(f)
23
+ end
24
+ expect(exif_data.orientation).to eq(1)
25
+ end
26
+ end
27
+
28
+ describe 'SubIFDs' do
29
+ it 'should not retrieve subIFDs data by default' do
30
+ path = fixtures_dir + 'NEF/RAW_NIKON_D40_SRGB.NEF'
31
+
32
+ exif_data = File.open(path, 'rb') do |f|
33
+ described_class.exif_from_tiff_io(f)
34
+ end
35
+
36
+ expect(exif_data.sub_ifds_data).not_to be_nil
37
+ expect(exif_data.sub_ifds_data).to eq({})
38
+ end
39
+
40
+ it 'is able retrieve data from all subIFDs optionally' do
41
+ # Verifying:
42
+ # {
43
+ # offset_1 => { subIFD_1 data...}
44
+ # offset_2 => { subIFD_2 data...}
45
+ # }
46
+
47
+ path = fixtures_dir + 'NEF/RAW_NIKON_D40_SRGB.NEF'
48
+ should_include_sub_ifds = true
49
+
50
+ exif_data = File.open(path, 'rb') do |f|
51
+ described_class.exif_from_tiff_io(f, should_include_sub_ifds)
52
+ end
53
+
54
+ offset_1, offset_2 = exif_data.sub_ifds
55
+ sub_ifds_data = exif_data.sub_ifds_data
56
+
57
+ expect(sub_ifds_data).not_to be_nil
58
+ expect(sub_ifds_data).to have_key(offset_1)
59
+ expect(sub_ifds_data).to have_key(offset_2)
60
+ end
61
+
62
+ it 'returns EXIFR IFD instances as subIFD data' do
63
+ # Verifying:
64
+ # {
65
+ # offset_1 => { new_subfile_type => 1, ...}
66
+ # offset_2 => { new_subfile_type => 0, ...}
67
+ # }
68
+ # we shouldn't verify everything, since we trust to EXIFR for that.
69
+ # making sure we are getting each subfile type should be good enough.
70
+
71
+ path = fixtures_dir + 'NEF/RAW_NIKON_D40_SRGB.NEF'
72
+ should_include_sub_ifds = true
73
+
74
+ exif_data = File.open(path, 'rb') do |f|
75
+ described_class.exif_from_tiff_io(f, should_include_sub_ifds)
76
+ end
77
+
78
+ offset_1, offset_2 = exif_data.sub_ifds.sort
79
+ first_sub_ifd = exif_data.sub_ifds_data&.[](offset_1)
80
+ second_sub_ifd = exif_data.sub_ifds_data&.[](offset_2)
81
+
82
+ expect(first_sub_ifd).to be_kind_of(EXIFR::TIFF::IFD)
83
+ expect(second_sub_ifd).to be_kind_of(EXIFR::TIFF::IFD)
84
+ expect(first_sub_ifd.new_subfile_type).to eq(1)
85
+ expect(second_sub_ifd.new_subfile_type).to eq(0)
13
86
  end
14
87
  end
15
88
  end
@@ -29,8 +102,8 @@ describe FormatParser::EXIFParser do
29
102
  end
30
103
 
31
104
  it 'returns a Hash from #to_hash' do
32
- first_fake_exif = double(orientation: 1, to_hash: {foo: 123, bar: 675})
33
- second_fake_exif = double(orientation: 4, to_hash: {foo: 245})
105
+ first_fake_exif = double(orientation: 1, to_hash: { foo: 123, bar: 675 })
106
+ second_fake_exif = double(orientation: 4, to_hash: { foo: 245 })
34
107
 
35
108
  stack = FormatParser::EXIFParser::EXIFStack.new([first_fake_exif, second_fake_exif])
36
109
  stack_as_hash = stack.to_hash
@@ -42,14 +115,6 @@ describe FormatParser::EXIFParser do
42
115
  end
43
116
  end
44
117
 
45
- it 'is able to deal with an orientation tag which a tuple value for orientation' do
46
- path = fixtures_dir + '/EXIF/double_orientation.exif.bin'
47
- exif_data = File.open(path, 'rb') do |f|
48
- described_class.exif_from_tiff_io(f)
49
- end
50
- expect(exif_data.orientation).to eq(1)
51
- end
52
-
53
118
  describe 'IOExt' do
54
119
  it 'supports readbyte' do
55
120
  io = FormatParser::EXIFParser::IOExt.new(StringIO.new('hello'))
@@ -0,0 +1,131 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::NEFParser do
4
+ describe 'Sample files from rawsamples' do
5
+ Dir.glob(fixtures_dir + '/NEF/*.NEF').each do |file_path|
6
+ it "is able to parse #{File.basename(file_path)}" do
7
+ parsed = subject.call(File.open(file_path, 'rb'))
8
+
9
+ expect(parsed).not_to be_nil
10
+ expect(parsed.nature).to eq(:image)
11
+ expect(parsed.format).to eq(:nef)
12
+
13
+ expect(parsed.width_px).to be_kind_of(Integer)
14
+ expect(parsed.height_px).to be_kind_of(Integer)
15
+
16
+ expect(parsed.display_width_px).to be_kind_of(Integer)
17
+ expect(parsed.display_height_px).to be_kind_of(Integer)
18
+
19
+ expect(parsed.orientation).to be_kind_of(Symbol)
20
+
21
+ expect(parsed.intrinsics[:exif]).not_to be_nil
22
+ end
23
+ end
24
+ end
25
+
26
+ describe 'Image Dimensions' do
27
+ it 'parses dimensions properly for a given fixture' do
28
+ # The default parser from EXIFr returns the dimensions from the embedded
29
+ # thumbnails as being the image's actual dimensions.
30
+ # We make sure we properly deal with this.
31
+
32
+ parsed = subject.call(File.open("#{fixtures_dir}/NEF/RAW_NIKON_1S2.NEF", 'rb'))
33
+
34
+ # Raw Image dimensions
35
+ expect(parsed.width_px).to eq(4_608)
36
+ expect(parsed.height_px).to eq(3_080)
37
+
38
+ expect(parsed.orientation).to eq(:right_top)
39
+ end
40
+
41
+ it 'correctly adjusts display dimensions for rotated images' do
42
+ # This image is rotated, meaning display_width and display_height
43
+ # should hold swapped values from width and height
44
+ parsed = subject.call(File.open("#{fixtures_dir}/NEF/RAW_NIKON_1S2.NEF", 'rb'))
45
+
46
+ # Raw Image dimensions
47
+ expect(parsed.width_px).to eq(4_608)
48
+ expect(parsed.height_px).to eq(3_080)
49
+
50
+ # Raw Dimensions considering orientation
51
+ expect(parsed.display_width_px).to eq(3_080)
52
+ expect(parsed.display_height_px).to eq(4_608)
53
+
54
+ expect(parsed.orientation).to eq(:right_top)
55
+ end
56
+
57
+ it 'does not return dimensions from embedded previews' do
58
+ Dir.glob(fixtures_dir + '/NEF/*.nef').each do |file_path|
59
+ # By default, NEF files include 160x120 sub_ifds.
60
+ # This dimensions cannot be considered by the parser.
61
+
62
+ parsed = subject.call(File.open(file_path, 'rb'))
63
+
64
+ expect(parsed.width_px).not_to eq(160)
65
+ expect(parsed.height_px).not_to eq(120)
66
+ end
67
+ end
68
+
69
+ it 'properly extracts dimensions when there are more than 2 subIFDs in the image' do
70
+ # this file has 3 subIFDs, and the RAW image information is actually in the one in the middle.
71
+ nef_path = "#{fixtures_dir}/NEF/RAW_NIKON_D800_14bit_FX_UNCOMPRESSED.NEF"
72
+
73
+ parsed = subject.call(File.open(nef_path, 'rb'))
74
+
75
+ expect(parsed).not_to be_nil
76
+ expect(parsed.width_px).to eq(7424)
77
+ expect(parsed.height_px).to eq(4924)
78
+ expect(parsed.orientation).to eq(:top_left)
79
+ end
80
+
81
+ describe 'correctly extracts dimensions from various NEF flavors of the same file' do
82
+ Dir.glob(fixtures_dir + '/NEF/RAW_NIKON_D800*.NEF').each do |file_path|
83
+ it "is able to parse #{File.basename(file_path)}" do
84
+ parsed = subject.call(File.open(file_path, 'rb'))
85
+
86
+ expect(parsed).not_to be_nil
87
+ expect(parsed.width_px).to eq(7424)
88
+ expect(parsed.height_px).to eq(4924)
89
+ end
90
+ end
91
+ end
92
+ end
93
+
94
+ describe 'False-positive avoidance' do
95
+ it 'should return nil for regular TIFF images' do
96
+ parsed = subject.call(File.open("#{fixtures_dir}/TIFF/Shinbutsureijoushuincho.tiff", 'rb'))
97
+ expect(parsed).to be_nil
98
+ end
99
+
100
+ it 'should return nil for regular CR2 images' do
101
+ parsed = subject.call(File.open("#{fixtures_dir}/CR2/RAW_CANON_40D_SRAW_V103.CR2", 'rb'))
102
+ expect(parsed).to be_nil
103
+ end
104
+
105
+ it 'should return nil for regular ERF images' do
106
+ # ERF files are also TIFFs with subIFDs but they don't have a matching "maker" tag
107
+ parsed = subject.call(File.open("#{fixtures_dir}/ERF/RAW_EPSON_RD1.ERF", 'rb'))
108
+ expect(parsed).to be_nil
109
+ end
110
+ end
111
+
112
+ describe 'Parser Performance' do
113
+ it 'extracts dimensions from a very large NEF economically' do
114
+ # this file has 77.3mb
115
+ file_path = "#{fixtures_dir}/NEF/RAW_NIKON_D800_14bit_FX_UNCOMPRESSED.NEF"
116
+
117
+ io = File.open(file_path, 'rb')
118
+ io_with_stats = FormatParser::ReadLimiter.new(io)
119
+
120
+ parsed = subject.call(io_with_stats)
121
+
122
+ expect(parsed).not_to be_nil
123
+ expect(parsed.width_px).to eq(7424)
124
+ expect(parsed.height_px).to eq(4924)
125
+
126
+ expect(io_with_stats.reads).to be_within(4).of(12)
127
+ expect(io_with_stats.seeks).to be_within(4).of(12)
128
+ expect(io_with_stats.bytes).to be_within(1024).of(59000)
129
+ end
130
+ end
131
+ end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.2
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
8
8
  - Julik Tarkhanov
9
- autorequire:
9
+ autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2022-07-13 00:00:00.000000000 Z
12
+ date: 2022-08-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -246,6 +246,7 @@ files:
246
246
  - lib/parsers/mp3_parser.rb
247
247
  - lib/parsers/mp3_parser/id3_extraction.rb
248
248
  - lib/parsers/mpeg_parser.rb
249
+ - lib/parsers/nef_parser.rb
249
250
  - lib/parsers/ogg_parser.rb
250
251
  - lib/parsers/pdf_parser.rb
251
252
  - lib/parsers/png_parser.rb
@@ -286,6 +287,7 @@ files:
286
287
  - spec/parsers/moov_parser_spec.rb
287
288
  - spec/parsers/mp3_parser_spec.rb
288
289
  - spec/parsers/mpeg_parser_spec.rb
290
+ - spec/parsers/nef_parser_spec.rb
289
291
  - spec/parsers/ogg_parser_spec.rb
290
292
  - spec/parsers/pdf_parser_spec.rb
291
293
  - spec/parsers/png_parser_spec.rb
@@ -304,7 +306,7 @@ licenses:
304
306
  - MIT (Hippocratic)
305
307
  metadata:
306
308
  allowed_push_host: https://rubygems.org
307
- post_install_message:
309
+ post_install_message:
308
310
  rdoc_options: []
309
311
  require_paths:
310
312
  - lib
@@ -319,8 +321,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
319
321
  - !ruby/object:Gem::Version
320
322
  version: '0'
321
323
  requirements: []
322
- rubygems_version: 3.3.7
323
- signing_key:
324
+ rubygems_version: 3.1.6
325
+ signing_key:
324
326
  specification_version: 4
325
327
  summary: A library for efficient parsing of file metadata
326
328
  test_files: []