format_parser 0.9.3 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +3 -0
- data/README.md +4 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/jpeg_parser.rb +6 -0
- data/lib/parsers/tiff_parser.rb +17 -50
- data/spec/parsers/tiff_parser_spec.rb +45 -3
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1ea761c606eb23bb10fd39f158afa2a628930018
|
4
|
+
data.tar.gz: eda078a306d8ded4dbdbdb4570608fe99b80b3e8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5596e607f371f5b05503d25fc749964af5dd3b945a3ec5011efcb4262a03c3325ea344a00e103cea13cf76e85bea41f10d4633d7f6f3a67dfeb17a7cbe04f2ea
|
7
|
+
data.tar.gz: d25fff95fbb51bd7b414e3e29104b798a8b5d6f11e1a4711266c16cadd532a436463be6ad0414acdda7904562a196b262738483a05e7cfa73d2138ebb4022200
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -136,6 +136,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
136
136
|
### M4A
|
137
137
|
- fixture.m4a was created by one of the project maintainers and is MIT licensed
|
138
138
|
|
139
|
+
### TIFF
|
140
|
+
- `Shinbutsureijoushuincho.tiff` is obtained from Wikimedia Commons and is Creative Commons licensed
|
141
|
+
- `IMG_9266_*.tif` and all it's variations were created by the project maintainers
|
142
|
+
|
139
143
|
### ZIP
|
140
144
|
- The .zip fixture files have been created by the project maintainers
|
141
145
|
|
data/lib/parsers/jpeg_parser.rb
CHANGED
@@ -118,6 +118,12 @@ class FormatParser::JPEGParser
|
|
118
118
|
# does under the hood.
|
119
119
|
app1_frame_content_length = read_short - 2
|
120
120
|
|
121
|
+
# If there is certainly not enough data in this APP1 to begin with, bail out.
|
122
|
+
# For the sake of the argument assume that a usable EXIF marker would contain
|
123
|
+
# at least 2 bytes of data - not exact science, but it can help us
|
124
|
+
# avoid reading _anything_ from the APP1 marker body if it's too small anyway
|
125
|
+
return if app1_frame_content_length < (EXIF_MAGIC_STRING.bytesize + 2)
|
126
|
+
|
121
127
|
# Peek whether the contents of the marker starts with Exif\0
|
122
128
|
maybe_exif_magic_str = safe_read(@buf, EXIF_MAGIC_STRING.bytesize)
|
123
129
|
|
data/lib/parsers/tiff_parser.rb
CHANGED
@@ -1,70 +1,37 @@
|
|
1
1
|
class FormatParser::TIFFParser
|
2
2
|
include FormatParser::IOUtils
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
WIDTH_TAG = 0x100
|
7
|
-
HEIGHT_TAG = 0x101
|
4
|
+
MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
|
5
|
+
MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
|
8
6
|
|
9
7
|
def call(io)
|
10
8
|
io = FormatParser::IOConstraint.new(io)
|
11
|
-
magic_bytes = safe_read(io, 4).unpack('C4')
|
12
|
-
endianness = scan_tiff_endianness(magic_bytes)
|
13
|
-
return if !endianness || cr2_check(io)
|
14
9
|
|
15
|
-
|
10
|
+
return unless [MAGIC_LE, MAGIC_BE].include?(safe_read(io, 4))
|
11
|
+
io.seek(io.pos + 2) # Skip over the offset of the IFD, EXIFR will re-read it anyway
|
12
|
+
return if cr2?(io)
|
13
|
+
|
14
|
+
# The TIFF scanner in EXIFR is plenty good enough,
|
15
|
+
# so why don't we use it? It does all the right skips
|
16
|
+
# in all the right places.
|
16
17
|
scanner = FormatParser::EXIFParser.new(io)
|
17
18
|
scanner.scan_image_tiff
|
19
|
+
return unless scanner.exif_data
|
20
|
+
|
18
21
|
FormatParser::Image.new(
|
19
22
|
format: :tif,
|
20
|
-
width_px:
|
21
|
-
height_px:
|
23
|
+
width_px: scanner.exif_data.image_width,
|
24
|
+
height_px: scanner.exif_data.image_length,
|
22
25
|
# might be nil if EXIF metadata wasn't found
|
23
26
|
orientation: scanner.orientation
|
24
27
|
)
|
28
|
+
rescue EXIFR::MalformedTIFF
|
29
|
+
nil
|
25
30
|
end
|
26
31
|
|
27
|
-
|
28
|
-
# and set our unpack method argument to suit.
|
29
|
-
def scan_tiff_endianness(magic_bytes)
|
30
|
-
if magic_bytes == LITTLE_ENDIAN_TIFF_HEADER_BYTES
|
31
|
-
'v'
|
32
|
-
elsif magic_bytes == BIG_ENDIAN_TIFF_HEADER_BYTES
|
33
|
-
'n'
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
# The TIFF format stores metadata in a flexible set of information fields
|
38
|
-
# called tags, which are stored in a header referred to as the IFD or
|
39
|
-
# Image File Directory. It is not necessarily in the same place in every image,
|
40
|
-
# so we need to do some work to scan through it and find the tags we need.
|
41
|
-
# For more information the TIFF wikipedia page is a reasonable place to start:
|
42
|
-
# https://en.wikipedia.org/wiki/TIFF
|
43
|
-
def scan_ifd(cache, offset, endianness)
|
44
|
-
entry_count = safe_read(cache, 4).unpack(endianness)[0]
|
45
|
-
entry_count.times do |i|
|
46
|
-
cache.seek(offset + 2 + (12 * i))
|
47
|
-
tag = safe_read(cache, 4).unpack(endianness)[0]
|
48
|
-
if tag == WIDTH_TAG
|
49
|
-
@width = safe_read(cache, 4).unpack(endianness.upcase)[0]
|
50
|
-
elsif tag == HEIGHT_TAG
|
51
|
-
@height = safe_read(cache, 4).unpack(endianness.upcase)[0]
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def read_tiff_by_endianness(io, endianness)
|
57
|
-
io.seek(4)
|
58
|
-
offset = safe_read(io, 4).unpack(endianness.upcase)[0]
|
59
|
-
io.seek(offset)
|
60
|
-
scan_ifd(io, offset, endianness)
|
61
|
-
[@width, @height]
|
62
|
-
end
|
63
|
-
|
64
|
-
def cr2_check(io)
|
32
|
+
def cr2?(io)
|
65
33
|
io.seek(8)
|
66
|
-
|
67
|
-
cr2_check_bytes == 'CR'
|
34
|
+
safe_read(io, 2) == 'CR'
|
68
35
|
end
|
69
36
|
|
70
37
|
FormatParser.register_parser self, natures: :image, formats: :tif
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe FormatParser::TIFFParser do
|
4
|
-
describe '
|
4
|
+
describe 'with FastImage TIFF examples' do
|
5
5
|
Dir.glob(fixtures_dir + '/TIFF/*.tif').each do |tiff_path|
|
6
6
|
it "is able to parse #{File.basename(tiff_path)}" do
|
7
7
|
parsed = subject.call(File.open(tiff_path, 'rb'))
|
@@ -19,6 +19,48 @@ describe FormatParser::TIFFParser do
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
+
it 'extracts dimensions from a very large TIFF economically' do
|
23
|
+
tiff_path = fixtures_dir + '/TIFF/Shinbutsureijoushuincho.tiff'
|
24
|
+
|
25
|
+
io = File.open(tiff_path, 'rb')
|
26
|
+
io_with_stats = FormatParser::ReadLimiter.new(io)
|
27
|
+
|
28
|
+
parsed = subject.call(io_with_stats)
|
29
|
+
|
30
|
+
expect(parsed).not_to be_nil
|
31
|
+
expect(parsed.width_px).to eq(1120)
|
32
|
+
expect(parsed.height_px).to eq(1559)
|
33
|
+
|
34
|
+
expect(io_with_stats.reads).to be_within(4).of(4)
|
35
|
+
expect(io_with_stats.seeks).to be_within(4).of(4)
|
36
|
+
expect(io_with_stats.bytes).to be_within(1024).of(8198)
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'correctly extracts dimensions for one fixture' do
|
40
|
+
tiff_path = fixtures_dir + '/TIFF/IMG_9266_8b_rgb_le_interleaved.tif'
|
41
|
+
|
42
|
+
parsed = subject.call(File.open(tiff_path, 'rb'))
|
43
|
+
|
44
|
+
expect(parsed).not_to be_nil
|
45
|
+
expect(parsed.width_px).to eq(320)
|
46
|
+
expect(parsed.height_px).to eq(240)
|
47
|
+
end
|
48
|
+
|
49
|
+
describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
|
50
|
+
Dir.glob(fixtures_dir + '/TIFF/IMG_9266*.tif').each do |tiff_path|
|
51
|
+
it "is able to parse #{File.basename(tiff_path)}" do
|
52
|
+
parsed = subject.call(File.open(tiff_path, 'rb'))
|
53
|
+
|
54
|
+
expect(parsed).not_to be_nil
|
55
|
+
expect(parsed.nature).to eq(:image)
|
56
|
+
expect(parsed.format).to eq(:tif)
|
57
|
+
|
58
|
+
expect(parsed.width_px).to eq(320)
|
59
|
+
expect(parsed.height_px).to eq(240)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
22
64
|
describe 'is able to parse all the TIFF exif examples from FastImage' do
|
23
65
|
Dir.glob(fixtures_dir + '/exif-orientation-testimages/tiff-*/*.tif').each do |tiff_path|
|
24
66
|
it "is able to parse #{File.basename(tiff_path)}" do
|
@@ -34,9 +76,9 @@ describe FormatParser::TIFFParser do
|
|
34
76
|
end
|
35
77
|
end
|
36
78
|
|
37
|
-
describe '
|
79
|
+
describe 'bails out on CR2 files, such as' do
|
38
80
|
Dir.glob(fixtures_dir + '/CR2/*.CR2').each do |cr2_path|
|
39
|
-
it "
|
81
|
+
it "skips #{File.basename(cr2_path)}" do
|
40
82
|
parsed = subject.call(File.open(cr2_path, 'rb'))
|
41
83
|
expect(parsed).to be_nil
|
42
84
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-04-
|
12
|
+
date: 2018-04-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -247,7 +247,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
247
247
|
version: '0'
|
248
248
|
requirements: []
|
249
249
|
rubyforge_project:
|
250
|
-
rubygems_version: 2.
|
250
|
+
rubygems_version: 2.5.2
|
251
251
|
signing_key:
|
252
252
|
specification_version: 4
|
253
253
|
summary: A library for efficient parsing of file metadata
|