format_parser 0.9.3 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +3 -0
- data/README.md +4 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/jpeg_parser.rb +6 -0
- data/lib/parsers/tiff_parser.rb +17 -50
- data/spec/parsers/tiff_parser_spec.rb +45 -3
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1ea761c606eb23bb10fd39f158afa2a628930018
|
4
|
+
data.tar.gz: eda078a306d8ded4dbdbdb4570608fe99b80b3e8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5596e607f371f5b05503d25fc749964af5dd3b945a3ec5011efcb4262a03c3325ea344a00e103cea13cf76e85bea41f10d4633d7f6f3a67dfeb17a7cbe04f2ea
|
7
|
+
data.tar.gz: d25fff95fbb51bd7b414e3e29104b798a8b5d6f11e1a4711266c16cadd532a436463be6ad0414acdda7904562a196b262738483a05e7cfa73d2138ebb4022200
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -136,6 +136,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
136
136
|
### M4A
|
137
137
|
- fixture.m4a was created by one of the project maintainers and is MIT licensed
|
138
138
|
|
139
|
+
### TIFF
|
140
|
+
- `Shinbutsureijoushuincho.tiff` is obtained from Wikimedia Commons and is Creative Commons licensed
|
141
|
+
- `IMG_9266_*.tif` and all it's variations were created by the project maintainers
|
142
|
+
|
139
143
|
### ZIP
|
140
144
|
- The .zip fixture files have been created by the project maintainers
|
141
145
|
|
data/lib/parsers/jpeg_parser.rb
CHANGED
@@ -118,6 +118,12 @@ class FormatParser::JPEGParser
|
|
118
118
|
# does under the hood.
|
119
119
|
app1_frame_content_length = read_short - 2
|
120
120
|
|
121
|
+
# If there is certainly not enough data in this APP1 to begin with, bail out.
|
122
|
+
# For the sake of the argument assume that a usable EXIF marker would contain
|
123
|
+
# at least 2 bytes of data - not exact science, but it can help us
|
124
|
+
# avoid reading _anything_ from the APP1 marker body if it's too small anyway
|
125
|
+
return if app1_frame_content_length < (EXIF_MAGIC_STRING.bytesize + 2)
|
126
|
+
|
121
127
|
# Peek whether the contents of the marker starts with Exif\0
|
122
128
|
maybe_exif_magic_str = safe_read(@buf, EXIF_MAGIC_STRING.bytesize)
|
123
129
|
|
data/lib/parsers/tiff_parser.rb
CHANGED
@@ -1,70 +1,37 @@
|
|
1
1
|
class FormatParser::TIFFParser
|
2
2
|
include FormatParser::IOUtils
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
WIDTH_TAG = 0x100
|
7
|
-
HEIGHT_TAG = 0x101
|
4
|
+
MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
|
5
|
+
MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
|
8
6
|
|
9
7
|
def call(io)
|
10
8
|
io = FormatParser::IOConstraint.new(io)
|
11
|
-
magic_bytes = safe_read(io, 4).unpack('C4')
|
12
|
-
endianness = scan_tiff_endianness(magic_bytes)
|
13
|
-
return if !endianness || cr2_check(io)
|
14
9
|
|
15
|
-
|
10
|
+
return unless [MAGIC_LE, MAGIC_BE].include?(safe_read(io, 4))
|
11
|
+
io.seek(io.pos + 2) # Skip over the offset of the IFD, EXIFR will re-read it anyway
|
12
|
+
return if cr2?(io)
|
13
|
+
|
14
|
+
# The TIFF scanner in EXIFR is plenty good enough,
|
15
|
+
# so why don't we use it? It does all the right skips
|
16
|
+
# in all the right places.
|
16
17
|
scanner = FormatParser::EXIFParser.new(io)
|
17
18
|
scanner.scan_image_tiff
|
19
|
+
return unless scanner.exif_data
|
20
|
+
|
18
21
|
FormatParser::Image.new(
|
19
22
|
format: :tif,
|
20
|
-
width_px:
|
21
|
-
height_px:
|
23
|
+
width_px: scanner.exif_data.image_width,
|
24
|
+
height_px: scanner.exif_data.image_length,
|
22
25
|
# might be nil if EXIF metadata wasn't found
|
23
26
|
orientation: scanner.orientation
|
24
27
|
)
|
28
|
+
rescue EXIFR::MalformedTIFF
|
29
|
+
nil
|
25
30
|
end
|
26
31
|
|
27
|
-
|
28
|
-
# and set our unpack method argument to suit.
|
29
|
-
def scan_tiff_endianness(magic_bytes)
|
30
|
-
if magic_bytes == LITTLE_ENDIAN_TIFF_HEADER_BYTES
|
31
|
-
'v'
|
32
|
-
elsif magic_bytes == BIG_ENDIAN_TIFF_HEADER_BYTES
|
33
|
-
'n'
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
# The TIFF format stores metadata in a flexible set of information fields
|
38
|
-
# called tags, which are stored in a header referred to as the IFD or
|
39
|
-
# Image File Directory. It is not necessarily in the same place in every image,
|
40
|
-
# so we need to do some work to scan through it and find the tags we need.
|
41
|
-
# For more information the TIFF wikipedia page is a reasonable place to start:
|
42
|
-
# https://en.wikipedia.org/wiki/TIFF
|
43
|
-
def scan_ifd(cache, offset, endianness)
|
44
|
-
entry_count = safe_read(cache, 4).unpack(endianness)[0]
|
45
|
-
entry_count.times do |i|
|
46
|
-
cache.seek(offset + 2 + (12 * i))
|
47
|
-
tag = safe_read(cache, 4).unpack(endianness)[0]
|
48
|
-
if tag == WIDTH_TAG
|
49
|
-
@width = safe_read(cache, 4).unpack(endianness.upcase)[0]
|
50
|
-
elsif tag == HEIGHT_TAG
|
51
|
-
@height = safe_read(cache, 4).unpack(endianness.upcase)[0]
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def read_tiff_by_endianness(io, endianness)
|
57
|
-
io.seek(4)
|
58
|
-
offset = safe_read(io, 4).unpack(endianness.upcase)[0]
|
59
|
-
io.seek(offset)
|
60
|
-
scan_ifd(io, offset, endianness)
|
61
|
-
[@width, @height]
|
62
|
-
end
|
63
|
-
|
64
|
-
def cr2_check(io)
|
32
|
+
def cr2?(io)
|
65
33
|
io.seek(8)
|
66
|
-
|
67
|
-
cr2_check_bytes == 'CR'
|
34
|
+
safe_read(io, 2) == 'CR'
|
68
35
|
end
|
69
36
|
|
70
37
|
FormatParser.register_parser self, natures: :image, formats: :tif
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe FormatParser::TIFFParser do
|
4
|
-
describe '
|
4
|
+
describe 'with FastImage TIFF examples' do
|
5
5
|
Dir.glob(fixtures_dir + '/TIFF/*.tif').each do |tiff_path|
|
6
6
|
it "is able to parse #{File.basename(tiff_path)}" do
|
7
7
|
parsed = subject.call(File.open(tiff_path, 'rb'))
|
@@ -19,6 +19,48 @@ describe FormatParser::TIFFParser do
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
+
it 'extracts dimensions from a very large TIFF economically' do
|
23
|
+
tiff_path = fixtures_dir + '/TIFF/Shinbutsureijoushuincho.tiff'
|
24
|
+
|
25
|
+
io = File.open(tiff_path, 'rb')
|
26
|
+
io_with_stats = FormatParser::ReadLimiter.new(io)
|
27
|
+
|
28
|
+
parsed = subject.call(io_with_stats)
|
29
|
+
|
30
|
+
expect(parsed).not_to be_nil
|
31
|
+
expect(parsed.width_px).to eq(1120)
|
32
|
+
expect(parsed.height_px).to eq(1559)
|
33
|
+
|
34
|
+
expect(io_with_stats.reads).to be_within(4).of(4)
|
35
|
+
expect(io_with_stats.seeks).to be_within(4).of(4)
|
36
|
+
expect(io_with_stats.bytes).to be_within(1024).of(8198)
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'correctly extracts dimensions for one fixture' do
|
40
|
+
tiff_path = fixtures_dir + '/TIFF/IMG_9266_8b_rgb_le_interleaved.tif'
|
41
|
+
|
42
|
+
parsed = subject.call(File.open(tiff_path, 'rb'))
|
43
|
+
|
44
|
+
expect(parsed).not_to be_nil
|
45
|
+
expect(parsed.width_px).to eq(320)
|
46
|
+
expect(parsed.height_px).to eq(240)
|
47
|
+
end
|
48
|
+
|
49
|
+
describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
|
50
|
+
Dir.glob(fixtures_dir + '/TIFF/IMG_9266*.tif').each do |tiff_path|
|
51
|
+
it "is able to parse #{File.basename(tiff_path)}" do
|
52
|
+
parsed = subject.call(File.open(tiff_path, 'rb'))
|
53
|
+
|
54
|
+
expect(parsed).not_to be_nil
|
55
|
+
expect(parsed.nature).to eq(:image)
|
56
|
+
expect(parsed.format).to eq(:tif)
|
57
|
+
|
58
|
+
expect(parsed.width_px).to eq(320)
|
59
|
+
expect(parsed.height_px).to eq(240)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
22
64
|
describe 'is able to parse all the TIFF exif examples from FastImage' do
|
23
65
|
Dir.glob(fixtures_dir + '/exif-orientation-testimages/tiff-*/*.tif').each do |tiff_path|
|
24
66
|
it "is able to parse #{File.basename(tiff_path)}" do
|
@@ -34,9 +76,9 @@ describe FormatParser::TIFFParser do
|
|
34
76
|
end
|
35
77
|
end
|
36
78
|
|
37
|
-
describe '
|
79
|
+
describe 'bails out on CR2 files, such as' do
|
38
80
|
Dir.glob(fixtures_dir + '/CR2/*.CR2').each do |cr2_path|
|
39
|
-
it "
|
81
|
+
it "skips #{File.basename(cr2_path)}" do
|
40
82
|
parsed = subject.call(File.open(cr2_path, 'rb'))
|
41
83
|
expect(parsed).to be_nil
|
42
84
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-04-
|
12
|
+
date: 2018-04-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -247,7 +247,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
247
247
|
version: '0'
|
248
248
|
requirements: []
|
249
249
|
rubyforge_project:
|
250
|
-
rubygems_version: 2.
|
250
|
+
rubygems_version: 2.5.2
|
251
251
|
signing_key:
|
252
252
|
specification_version: 4
|
253
253
|
summary: A library for efficient parsing of file metadata
|