format_parser 0.9.3 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: cd4a3b56391cebff09efc933b5fd48188c67f913adeec46c77a832ff067fd870
4
- data.tar.gz: 159c14df0b5740f627a99915f05750bf753c89620017733eab067c6e865e972d
2
+ SHA1:
3
+ metadata.gz: 1ea761c606eb23bb10fd39f158afa2a628930018
4
+ data.tar.gz: eda078a306d8ded4dbdbdb4570608fe99b80b3e8
5
5
  SHA512:
6
- metadata.gz: 559b0f5709bd8fc23cb20468eed5c3840a4087a444a581591b530490278532dff8700a54652e7e9cc8ab82b89ac2db910bf8e79620c7de5cf5a172cd7285ade3
7
- data.tar.gz: d5baa5e7b5aea3cce2acf2513509b4dac8ad40dceb29a69875a10e4322dd136214ae650e82254416d1d39907a33cb18ed0d40634af0fa974a68ce2b0dc99b7dd
6
+ metadata.gz: 5596e607f371f5b05503d25fc749964af5dd3b945a3ec5011efcb4262a03c3325ea344a00e103cea13cf76e85bea41f10d4633d7f6f3a67dfeb17a7cbe04f2ea
7
+ data.tar.gz: d25fff95fbb51bd7b414e3e29104b798a8b5d6f11e1a4711266c16cadd532a436463be6ad0414acdda7904562a196b262738483a05e7cfa73d2138ebb4022200
@@ -1,3 +1,6 @@
1
+ ## 0.9.4
2
+ * Fix a TIFF parsing regression introduced in 0.3.1 that led to all TIFFs being incorrectly parsed
3
+
1
4
  ## 0.9.3
2
5
  * Fix a JPEG parsing regression introduced in 0.9.1
3
6
 
data/README.md CHANGED
@@ -136,6 +136,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
136
136
  ### M4A
137
137
  - fixture.m4a was created by one of the project maintainers and is MIT licensed
138
138
 
139
+ ### TIFF
140
+ - `Shinbutsureijoushuincho.tiff` is obtained from Wikimedia Commons and is Creative Commons licensed
141
+ - `IMG_9266_*.tif` and all it's variations were created by the project maintainers
142
+
139
143
  ### ZIP
140
144
  - The .zip fixture files have been created by the project maintainers
141
145
 
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.9.3'
2
+ VERSION = '0.9.4'
3
3
  end
@@ -118,6 +118,12 @@ class FormatParser::JPEGParser
118
118
  # does under the hood.
119
119
  app1_frame_content_length = read_short - 2
120
120
 
121
+ # If there is certainly not enough data in this APP1 to begin with, bail out.
122
+ # For the sake of the argument assume that a usable EXIF marker would contain
123
+ # at least 2 bytes of data - not exact science, but it can help us
124
+ # avoid reading _anything_ from the APP1 marker body if it's too small anyway
125
+ return if app1_frame_content_length < (EXIF_MAGIC_STRING.bytesize + 2)
126
+
121
127
  # Peek whether the contents of the marker starts with Exif\0
122
128
  maybe_exif_magic_str = safe_read(@buf, EXIF_MAGIC_STRING.bytesize)
123
129
 
@@ -1,70 +1,37 @@
1
1
  class FormatParser::TIFFParser
2
2
  include FormatParser::IOUtils
3
3
 
4
- LITTLE_ENDIAN_TIFF_HEADER_BYTES = [0x49, 0x49, 0x2A, 0x0]
5
- BIG_ENDIAN_TIFF_HEADER_BYTES = [0x4D, 0x4D, 0x0, 0x2A]
6
- WIDTH_TAG = 0x100
7
- HEIGHT_TAG = 0x101
4
+ MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
5
+ MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
8
6
 
9
7
  def call(io)
10
8
  io = FormatParser::IOConstraint.new(io)
11
- magic_bytes = safe_read(io, 4).unpack('C4')
12
- endianness = scan_tiff_endianness(magic_bytes)
13
- return if !endianness || cr2_check(io)
14
9
 
15
- w, h = read_tiff_by_endianness(io, endianness)
10
+ return unless [MAGIC_LE, MAGIC_BE].include?(safe_read(io, 4))
11
+ io.seek(io.pos + 2) # Skip over the offset of the IFD, EXIFR will re-read it anyway
12
+ return if cr2?(io)
13
+
14
+ # The TIFF scanner in EXIFR is plenty good enough,
15
+ # so why don't we use it? It does all the right skips
16
+ # in all the right places.
16
17
  scanner = FormatParser::EXIFParser.new(io)
17
18
  scanner.scan_image_tiff
19
+ return unless scanner.exif_data
20
+
18
21
  FormatParser::Image.new(
19
22
  format: :tif,
20
- width_px: w,
21
- height_px: h,
23
+ width_px: scanner.exif_data.image_width,
24
+ height_px: scanner.exif_data.image_length,
22
25
  # might be nil if EXIF metadata wasn't found
23
26
  orientation: scanner.orientation
24
27
  )
28
+ rescue EXIFR::MalformedTIFF
29
+ nil
25
30
  end
26
31
 
27
- # TIFFs can be either big or little endian, so we check here
28
- # and set our unpack method argument to suit.
29
- def scan_tiff_endianness(magic_bytes)
30
- if magic_bytes == LITTLE_ENDIAN_TIFF_HEADER_BYTES
31
- 'v'
32
- elsif magic_bytes == BIG_ENDIAN_TIFF_HEADER_BYTES
33
- 'n'
34
- end
35
- end
36
-
37
- # The TIFF format stores metadata in a flexible set of information fields
38
- # called tags, which are stored in a header referred to as the IFD or
39
- # Image File Directory. It is not necessarily in the same place in every image,
40
- # so we need to do some work to scan through it and find the tags we need.
41
- # For more information the TIFF wikipedia page is a reasonable place to start:
42
- # https://en.wikipedia.org/wiki/TIFF
43
- def scan_ifd(cache, offset, endianness)
44
- entry_count = safe_read(cache, 4).unpack(endianness)[0]
45
- entry_count.times do |i|
46
- cache.seek(offset + 2 + (12 * i))
47
- tag = safe_read(cache, 4).unpack(endianness)[0]
48
- if tag == WIDTH_TAG
49
- @width = safe_read(cache, 4).unpack(endianness.upcase)[0]
50
- elsif tag == HEIGHT_TAG
51
- @height = safe_read(cache, 4).unpack(endianness.upcase)[0]
52
- end
53
- end
54
- end
55
-
56
- def read_tiff_by_endianness(io, endianness)
57
- io.seek(4)
58
- offset = safe_read(io, 4).unpack(endianness.upcase)[0]
59
- io.seek(offset)
60
- scan_ifd(io, offset, endianness)
61
- [@width, @height]
62
- end
63
-
64
- def cr2_check(io)
32
+ def cr2?(io)
65
33
  io.seek(8)
66
- cr2_check_bytes = safe_read(io, 2)
67
- cr2_check_bytes == 'CR'
34
+ safe_read(io, 2) == 'CR'
68
35
  end
69
36
 
70
37
  FormatParser.register_parser self, natures: :image, formats: :tif
@@ -1,7 +1,7 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe FormatParser::TIFFParser do
4
- describe 'is able to parse all the examples from FastImage' do
4
+ describe 'with FastImage TIFF examples' do
5
5
  Dir.glob(fixtures_dir + '/TIFF/*.tif').each do |tiff_path|
6
6
  it "is able to parse #{File.basename(tiff_path)}" do
7
7
  parsed = subject.call(File.open(tiff_path, 'rb'))
@@ -19,6 +19,48 @@ describe FormatParser::TIFFParser do
19
19
  end
20
20
  end
21
21
 
22
+ it 'extracts dimensions from a very large TIFF economically' do
23
+ tiff_path = fixtures_dir + '/TIFF/Shinbutsureijoushuincho.tiff'
24
+
25
+ io = File.open(tiff_path, 'rb')
26
+ io_with_stats = FormatParser::ReadLimiter.new(io)
27
+
28
+ parsed = subject.call(io_with_stats)
29
+
30
+ expect(parsed).not_to be_nil
31
+ expect(parsed.width_px).to eq(1120)
32
+ expect(parsed.height_px).to eq(1559)
33
+
34
+ expect(io_with_stats.reads).to be_within(4).of(4)
35
+ expect(io_with_stats.seeks).to be_within(4).of(4)
36
+ expect(io_with_stats.bytes).to be_within(1024).of(8198)
37
+ end
38
+
39
+ it 'correctly extracts dimensions for one fixture' do
40
+ tiff_path = fixtures_dir + '/TIFF/IMG_9266_8b_rgb_le_interleaved.tif'
41
+
42
+ parsed = subject.call(File.open(tiff_path, 'rb'))
43
+
44
+ expect(parsed).not_to be_nil
45
+ expect(parsed.width_px).to eq(320)
46
+ expect(parsed.height_px).to eq(240)
47
+ end
48
+
49
+ describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
50
+ Dir.glob(fixtures_dir + '/TIFF/IMG_9266*.tif').each do |tiff_path|
51
+ it "is able to parse #{File.basename(tiff_path)}" do
52
+ parsed = subject.call(File.open(tiff_path, 'rb'))
53
+
54
+ expect(parsed).not_to be_nil
55
+ expect(parsed.nature).to eq(:image)
56
+ expect(parsed.format).to eq(:tif)
57
+
58
+ expect(parsed.width_px).to eq(320)
59
+ expect(parsed.height_px).to eq(240)
60
+ end
61
+ end
62
+ end
63
+
22
64
  describe 'is able to parse all the TIFF exif examples from FastImage' do
23
65
  Dir.glob(fixtures_dir + '/exif-orientation-testimages/tiff-*/*.tif').each do |tiff_path|
24
66
  it "is able to parse #{File.basename(tiff_path)}" do
@@ -34,9 +76,9 @@ describe FormatParser::TIFFParser do
34
76
  end
35
77
  end
36
78
 
37
- describe 'is able to return nil when parsing CR2 examples' do
79
+ describe 'bails out on CR2 files, such as' do
38
80
  Dir.glob(fixtures_dir + '/CR2/*.CR2').each do |cr2_path|
39
- it "is able to return nil when parsing #{File.basename(cr2_path)}" do
81
+ it "skips #{File.basename(cr2_path)}" do
40
82
  parsed = subject.call(File.open(cr2_path, 'rb'))
41
83
  expect(parsed).to be_nil
42
84
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.3
4
+ version: 0.9.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2018-04-21 00:00:00.000000000 Z
12
+ date: 2018-04-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -247,7 +247,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
247
247
  version: '0'
248
248
  requirements: []
249
249
  rubyforge_project:
250
- rubygems_version: 2.7.3
250
+ rubygems_version: 2.5.2
251
251
  signing_key:
252
252
  specification_version: 4
253
253
  summary: A library for efficient parsing of file metadata