format_parser 0.9.3 → 0.9.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: cd4a3b56391cebff09efc933b5fd48188c67f913adeec46c77a832ff067fd870
4
- data.tar.gz: 159c14df0b5740f627a99915f05750bf753c89620017733eab067c6e865e972d
2
+ SHA1:
3
+ metadata.gz: 1ea761c606eb23bb10fd39f158afa2a628930018
4
+ data.tar.gz: eda078a306d8ded4dbdbdb4570608fe99b80b3e8
5
5
  SHA512:
6
- metadata.gz: 559b0f5709bd8fc23cb20468eed5c3840a4087a444a581591b530490278532dff8700a54652e7e9cc8ab82b89ac2db910bf8e79620c7de5cf5a172cd7285ade3
7
- data.tar.gz: d5baa5e7b5aea3cce2acf2513509b4dac8ad40dceb29a69875a10e4322dd136214ae650e82254416d1d39907a33cb18ed0d40634af0fa974a68ce2b0dc99b7dd
6
+ metadata.gz: 5596e607f371f5b05503d25fc749964af5dd3b945a3ec5011efcb4262a03c3325ea344a00e103cea13cf76e85bea41f10d4633d7f6f3a67dfeb17a7cbe04f2ea
7
+ data.tar.gz: d25fff95fbb51bd7b414e3e29104b798a8b5d6f11e1a4711266c16cadd532a436463be6ad0414acdda7904562a196b262738483a05e7cfa73d2138ebb4022200
@@ -1,3 +1,6 @@
1
+ ## 0.9.4
2
+ * Fix a TIFF parsing regression introduced in 0.3.1 that led to all TIFFs being incorrectly parsed
3
+
1
4
  ## 0.9.3
2
5
  * Fix a JPEG parsing regression introduced in 0.9.1
3
6
 
data/README.md CHANGED
@@ -136,6 +136,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
136
136
  ### M4A
137
137
  - fixture.m4a was created by one of the project maintainers and is MIT licensed
138
138
 
139
+ ### TIFF
140
+ - `Shinbutsureijoushuincho.tiff` is obtained from Wikimedia Commons and is Creative Commons licensed
141
+ - `IMG_9266_*.tif` and all it's variations were created by the project maintainers
142
+
139
143
  ### ZIP
140
144
  - The .zip fixture files have been created by the project maintainers
141
145
 
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.9.3'
2
+ VERSION = '0.9.4'
3
3
  end
@@ -118,6 +118,12 @@ class FormatParser::JPEGParser
118
118
  # does under the hood.
119
119
  app1_frame_content_length = read_short - 2
120
120
 
121
+ # If there is certainly not enough data in this APP1 to begin with, bail out.
122
+ # For the sake of the argument assume that a usable EXIF marker would contain
123
+ # at least 2 bytes of data - not exact science, but it can help us
124
+ # avoid reading _anything_ from the APP1 marker body if it's too small anyway
125
+ return if app1_frame_content_length < (EXIF_MAGIC_STRING.bytesize + 2)
126
+
121
127
  # Peek whether the contents of the marker starts with Exif\0
122
128
  maybe_exif_magic_str = safe_read(@buf, EXIF_MAGIC_STRING.bytesize)
123
129
 
@@ -1,70 +1,37 @@
1
1
  class FormatParser::TIFFParser
2
2
  include FormatParser::IOUtils
3
3
 
4
- LITTLE_ENDIAN_TIFF_HEADER_BYTES = [0x49, 0x49, 0x2A, 0x0]
5
- BIG_ENDIAN_TIFF_HEADER_BYTES = [0x4D, 0x4D, 0x0, 0x2A]
6
- WIDTH_TAG = 0x100
7
- HEIGHT_TAG = 0x101
4
+ MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
5
+ MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
8
6
 
9
7
  def call(io)
10
8
  io = FormatParser::IOConstraint.new(io)
11
- magic_bytes = safe_read(io, 4).unpack('C4')
12
- endianness = scan_tiff_endianness(magic_bytes)
13
- return if !endianness || cr2_check(io)
14
9
 
15
- w, h = read_tiff_by_endianness(io, endianness)
10
+ return unless [MAGIC_LE, MAGIC_BE].include?(safe_read(io, 4))
11
+ io.seek(io.pos + 2) # Skip over the offset of the IFD, EXIFR will re-read it anyway
12
+ return if cr2?(io)
13
+
14
+ # The TIFF scanner in EXIFR is plenty good enough,
15
+ # so why don't we use it? It does all the right skips
16
+ # in all the right places.
16
17
  scanner = FormatParser::EXIFParser.new(io)
17
18
  scanner.scan_image_tiff
19
+ return unless scanner.exif_data
20
+
18
21
  FormatParser::Image.new(
19
22
  format: :tif,
20
- width_px: w,
21
- height_px: h,
23
+ width_px: scanner.exif_data.image_width,
24
+ height_px: scanner.exif_data.image_length,
22
25
  # might be nil if EXIF metadata wasn't found
23
26
  orientation: scanner.orientation
24
27
  )
28
+ rescue EXIFR::MalformedTIFF
29
+ nil
25
30
  end
26
31
 
27
- # TIFFs can be either big or little endian, so we check here
28
- # and set our unpack method argument to suit.
29
- def scan_tiff_endianness(magic_bytes)
30
- if magic_bytes == LITTLE_ENDIAN_TIFF_HEADER_BYTES
31
- 'v'
32
- elsif magic_bytes == BIG_ENDIAN_TIFF_HEADER_BYTES
33
- 'n'
34
- end
35
- end
36
-
37
- # The TIFF format stores metadata in a flexible set of information fields
38
- # called tags, which are stored in a header referred to as the IFD or
39
- # Image File Directory. It is not necessarily in the same place in every image,
40
- # so we need to do some work to scan through it and find the tags we need.
41
- # For more information the TIFF wikipedia page is a reasonable place to start:
42
- # https://en.wikipedia.org/wiki/TIFF
43
- def scan_ifd(cache, offset, endianness)
44
- entry_count = safe_read(cache, 4).unpack(endianness)[0]
45
- entry_count.times do |i|
46
- cache.seek(offset + 2 + (12 * i))
47
- tag = safe_read(cache, 4).unpack(endianness)[0]
48
- if tag == WIDTH_TAG
49
- @width = safe_read(cache, 4).unpack(endianness.upcase)[0]
50
- elsif tag == HEIGHT_TAG
51
- @height = safe_read(cache, 4).unpack(endianness.upcase)[0]
52
- end
53
- end
54
- end
55
-
56
- def read_tiff_by_endianness(io, endianness)
57
- io.seek(4)
58
- offset = safe_read(io, 4).unpack(endianness.upcase)[0]
59
- io.seek(offset)
60
- scan_ifd(io, offset, endianness)
61
- [@width, @height]
62
- end
63
-
64
- def cr2_check(io)
32
+ def cr2?(io)
65
33
  io.seek(8)
66
- cr2_check_bytes = safe_read(io, 2)
67
- cr2_check_bytes == 'CR'
34
+ safe_read(io, 2) == 'CR'
68
35
  end
69
36
 
70
37
  FormatParser.register_parser self, natures: :image, formats: :tif
@@ -1,7 +1,7 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe FormatParser::TIFFParser do
4
- describe 'is able to parse all the examples from FastImage' do
4
+ describe 'with FastImage TIFF examples' do
5
5
  Dir.glob(fixtures_dir + '/TIFF/*.tif').each do |tiff_path|
6
6
  it "is able to parse #{File.basename(tiff_path)}" do
7
7
  parsed = subject.call(File.open(tiff_path, 'rb'))
@@ -19,6 +19,48 @@ describe FormatParser::TIFFParser do
19
19
  end
20
20
  end
21
21
 
22
+ it 'extracts dimensions from a very large TIFF economically' do
23
+ tiff_path = fixtures_dir + '/TIFF/Shinbutsureijoushuincho.tiff'
24
+
25
+ io = File.open(tiff_path, 'rb')
26
+ io_with_stats = FormatParser::ReadLimiter.new(io)
27
+
28
+ parsed = subject.call(io_with_stats)
29
+
30
+ expect(parsed).not_to be_nil
31
+ expect(parsed.width_px).to eq(1120)
32
+ expect(parsed.height_px).to eq(1559)
33
+
34
+ expect(io_with_stats.reads).to be_within(4).of(4)
35
+ expect(io_with_stats.seeks).to be_within(4).of(4)
36
+ expect(io_with_stats.bytes).to be_within(1024).of(8198)
37
+ end
38
+
39
+ it 'correctly extracts dimensions for one fixture' do
40
+ tiff_path = fixtures_dir + '/TIFF/IMG_9266_8b_rgb_le_interleaved.tif'
41
+
42
+ parsed = subject.call(File.open(tiff_path, 'rb'))
43
+
44
+ expect(parsed).not_to be_nil
45
+ expect(parsed.width_px).to eq(320)
46
+ expect(parsed.height_px).to eq(240)
47
+ end
48
+
49
+ describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
50
+ Dir.glob(fixtures_dir + '/TIFF/IMG_9266*.tif').each do |tiff_path|
51
+ it "is able to parse #{File.basename(tiff_path)}" do
52
+ parsed = subject.call(File.open(tiff_path, 'rb'))
53
+
54
+ expect(parsed).not_to be_nil
55
+ expect(parsed.nature).to eq(:image)
56
+ expect(parsed.format).to eq(:tif)
57
+
58
+ expect(parsed.width_px).to eq(320)
59
+ expect(parsed.height_px).to eq(240)
60
+ end
61
+ end
62
+ end
63
+
22
64
  describe 'is able to parse all the TIFF exif examples from FastImage' do
23
65
  Dir.glob(fixtures_dir + '/exif-orientation-testimages/tiff-*/*.tif').each do |tiff_path|
24
66
  it "is able to parse #{File.basename(tiff_path)}" do
@@ -34,9 +76,9 @@ describe FormatParser::TIFFParser do
34
76
  end
35
77
  end
36
78
 
37
- describe 'is able to return nil when parsing CR2 examples' do
79
+ describe 'bails out on CR2 files, such as' do
38
80
  Dir.glob(fixtures_dir + '/CR2/*.CR2').each do |cr2_path|
39
- it "is able to return nil when parsing #{File.basename(cr2_path)}" do
81
+ it "skips #{File.basename(cr2_path)}" do
40
82
  parsed = subject.call(File.open(cr2_path, 'rb'))
41
83
  expect(parsed).to be_nil
42
84
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.3
4
+ version: 0.9.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2018-04-21 00:00:00.000000000 Z
12
+ date: 2018-04-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -247,7 +247,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
247
247
  version: '0'
248
248
  requirements: []
249
249
  rubyforge_project:
250
- rubygems_version: 2.7.3
250
+ rubygems_version: 2.5.2
251
251
  signing_key:
252
252
  specification_version: 4
253
253
  summary: A library for efficient parsing of file metadata