format_parser 0.16.0 → 0.16.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +6 -0
- data/README.md +5 -0
- data/Rakefile +3 -1
- data/format_parser.gemspec +2 -1
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/cr2_parser.rb +1 -1
- data/lib/parsers/exif_parser.rb +110 -27
- data/lib/parsers/jpeg_parser.rb +14 -6
- data/lib/parsers/tiff_parser.rb +1 -1
- data/spec/parsers/exif_parser_spec.rb +2 -2
- data/spec/parsers/jpeg_parser_spec.rb +27 -0
- metadata +25 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 80362b6ee92ad34550681e8c0447dcb93b11071beac25d743356a9c0b7a548f9
|
4
|
+
data.tar.gz: c40fbcae9bd5417420ff5ea573c24f65462f3b0cf5269fc65024e6a237c98f45
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f20f7206574ea204393662e29251dc079ce615c593b21c0c4c35f3ba74a08fc9582e15443f23e45771ee2cb73b5b691ee6a31d37f5fb99e847d09d4c2e003cd
|
7
|
+
data.tar.gz: 6a23e2bb7797a5834e0b2867096240a29ea899d2301f1496c94b2e9d35d29698dc8599bef7185785f7a26fc929bff1e9dda1f5d7a96bbfc37227b1a2d42b3c4b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## 0.16.1
|
2
|
+
* All EXIF: Make sure the 0 orientation does not get silently treated as orientation 8, mislabling
|
3
|
+
images which are not rotated as being rotated (orientation changed)
|
4
|
+
* All EXIF: Make sure the 0 orientation (`unknown`) is correctly passed and represented
|
5
|
+
* JPEG: Make sure multiple EXIF tags in APP1 markers get handled correctly (via overlays)
|
6
|
+
|
1
7
|
## 0.16.0
|
2
8
|
* Add `filename_hint` keyword argument to `FormatParser.parse`. This can hint the library to apply
|
3
9
|
the parser that will likely match for this filename first, and the other parsers later. This helps
|
data/README.md
CHANGED
@@ -170,5 +170,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
170
170
|
### .docx
|
171
171
|
- The .docx files were generated by the project maintainers
|
172
172
|
|
173
|
+
### JPEG examples of EXIF orientation
|
174
|
+
- Downloaded from Unspash (and thus freely avaliable) - https://unsplash.com/license and have then been
|
175
|
+
manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
|
176
|
+
script.
|
177
|
+
|
173
178
|
### .key
|
174
179
|
- The `keynote_recognized_as_jpeg.key` file was created by the project maintainers
|
data/Rakefile
CHANGED
@@ -2,6 +2,7 @@ require 'bundler/gem_tasks'
|
|
2
2
|
require 'rspec/core/rake_task'
|
3
3
|
require 'yard'
|
4
4
|
require 'rubocop/rake_task'
|
5
|
+
require 'parallel_tests/tasks'
|
5
6
|
|
6
7
|
YARD::Rake::YardocTask.new(:doc) do |t|
|
7
8
|
# The dash has to be between the two to "divide" the source files and
|
@@ -11,4 +12,5 @@ end
|
|
11
12
|
|
12
13
|
RuboCop::RakeTask.new
|
13
14
|
RSpec::Core::RakeTask.new(:spec)
|
14
|
-
|
15
|
+
|
16
|
+
task default: ['parallel:spec', :rubocop]
|
data/format_parser.gemspec
CHANGED
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
33
|
spec.add_dependency 'ks', '~> 0.0'
|
34
|
-
spec.add_dependency 'exifr', '~> 1.
|
34
|
+
spec.add_dependency 'exifr', '~> 1', '>= 1.3.4'
|
35
35
|
spec.add_dependency 'id3tag', '~> 0.10', '>= 0.10.1'
|
36
36
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
37
|
spec.add_dependency 'measurometer', '~> 1'
|
@@ -42,4 +42,5 @@ Gem::Specification.new do |spec|
|
|
42
42
|
spec.add_development_dependency 'pry', '~> 0.11'
|
43
43
|
spec.add_development_dependency 'yard', '~> 0.9'
|
44
44
|
spec.add_development_dependency 'wetransfer_style', '0.5.0'
|
45
|
+
spec.add_development_dependency 'parallel_tests'
|
45
46
|
end
|
data/lib/parsers/cr2_parser.rb
CHANGED
@@ -37,7 +37,7 @@ class FormatParser::CR2Parser
|
|
37
37
|
height_px: h,
|
38
38
|
display_width_px: exif_data.rotated? ? h : w,
|
39
39
|
display_height_px: exif_data.rotated? ? w : h,
|
40
|
-
orientation: exif_data.
|
40
|
+
orientation: exif_data.orientation_sym,
|
41
41
|
intrinsics: {exif: exif_data},
|
42
42
|
)
|
43
43
|
rescue EXIFR::MalformedTIFF
|
data/lib/parsers/exif_parser.rb
CHANGED
@@ -2,30 +2,17 @@ require 'exifr/tiff'
|
|
2
2
|
require 'delegate'
|
3
3
|
|
4
4
|
module FormatParser::EXIFParser
|
5
|
-
ORIENTATIONS =
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
|
15
|
-
|
16
|
-
:bottom_left,
|
17
|
-
:bottom_right,
|
18
|
-
:top_left,
|
19
|
-
:top_right
|
20
|
-
]
|
21
|
-
module MethodsMethodFix
|
22
|
-
# Fix a little bug in EXIFR which breaks delegators
|
23
|
-
# https://github.com/remvee/exifr/pull/55
|
24
|
-
def methods(*)
|
25
|
-
super() # no args
|
26
|
-
end
|
27
|
-
end
|
28
|
-
EXIFR::TIFF.prepend(MethodsMethodFix)
|
5
|
+
ORIENTATIONS = {
|
6
|
+
0 => :unknown, # Non-rotated
|
7
|
+
1 => :top_left, # Non-rotated
|
8
|
+
2 => :top_right, # Non-rotated
|
9
|
+
3 => :bottom_right, # Non-rotated
|
10
|
+
4 => :bottom_left, # Non-rotated
|
11
|
+
5 => :left_top,
|
12
|
+
6 => :right_top,
|
13
|
+
7 => :right_bottom,
|
14
|
+
8 => :left_bottom
|
15
|
+
}
|
29
16
|
|
30
17
|
# EXIFR kindly requests the presence of a few more methods than what our IOConstraint
|
31
18
|
# is willing to provide, but they can be derived from the available ones
|
@@ -55,7 +42,7 @@ module FormatParser::EXIFParser
|
|
55
42
|
|
56
43
|
class EXIFResult < SimpleDelegator
|
57
44
|
def rotated?
|
58
|
-
|
45
|
+
__getobj__.orientation.to_i > 4
|
59
46
|
end
|
60
47
|
|
61
48
|
def to_json(*maybe_coder)
|
@@ -65,8 +52,104 @@ module FormatParser::EXIFParser
|
|
65
52
|
end
|
66
53
|
|
67
54
|
def orientation
|
68
|
-
|
69
|
-
|
55
|
+
__getobj__.orientation.to_i
|
56
|
+
end
|
57
|
+
|
58
|
+
def orientation_sym
|
59
|
+
ORIENTATIONS.fetch(orientation)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# With some formats, multiple EXIF tag frames can be included in a single file.
|
64
|
+
# For example, JPEGs might have multiple APP1 markers which each contain EXIF
|
65
|
+
# data. The EXIF data in them, however, is not necessarily "complete" - it seems
|
66
|
+
# most applications assume that these blocks "overwrite" each other with the properties
|
67
|
+
# they specify. Probably this is done for more efficient saving - instead of overwriting
|
68
|
+
# the EXIF data with a modified version - which would also potentially disturb any digital
|
69
|
+
# signing that this data might include - the applications are supposed to follow the order
|
70
|
+
# in which these tags appear in the file:
|
71
|
+
#
|
72
|
+
# Take a resized image for example:
|
73
|
+
#
|
74
|
+
# APP1 {author: 'John', pixel_width: 1024}
|
75
|
+
# APP1 {pixel_width: 10}
|
76
|
+
#
|
77
|
+
# That image would get a combined EXIF of:
|
78
|
+
#
|
79
|
+
# APP1 {author: 'John', pixel_width: 10}
|
80
|
+
#
|
81
|
+
# since the frame that comes later in the file overwrites a property. From what I see
|
82
|
+
# exiftools do this is the way it works.
|
83
|
+
#
|
84
|
+
# This class acts as a wrapper for this "layering" of chunks of EXIF properties, and will
|
85
|
+
# follow the following conventions:
|
86
|
+
#
|
87
|
+
# * When merging data for JSON conversion, it will merge it top-down. It will overwrite
|
88
|
+
# any specified properties. An exception is made for orientation (see below)
|
89
|
+
# * When retrieving a property, it will look "from the end to the beginning" of the EXIF
|
90
|
+
# dataframe stack, looking for the first dataframe which has this property with a non-nil value
|
91
|
+
# * When retrieving orientation, it will pick the first orientation value which is not nil
|
92
|
+
# but also not 0 ("unknown orientation"). Even files in our test suite contain these.
|
93
|
+
class EXIFStack
|
94
|
+
def initialize(multiple_exif_results)
|
95
|
+
@multiple_exif_results = Array(multiple_exif_results)
|
96
|
+
end
|
97
|
+
|
98
|
+
def to_json(*maybe_coder)
|
99
|
+
# Let EXIF tags that come later overwrite the properties from the tags
|
100
|
+
# that come earlier
|
101
|
+
overlay = @multiple_exif_results.each_with_object({}) do |one_exif_frame, h|
|
102
|
+
h.merge!(one_exif_frame.to_hash)
|
103
|
+
end
|
104
|
+
# Overwrite the orientation with our custom method implementation, because
|
105
|
+
# it does reject 0-values.
|
106
|
+
overlay[:orientation] = orientation
|
107
|
+
|
108
|
+
sanitized = FormatParser::AttributesJSON._sanitize_json_value(overlay)
|
109
|
+
sanitized.to_json(*maybe_coder)
|
110
|
+
end
|
111
|
+
|
112
|
+
def orientation_sym
|
113
|
+
ORIENTATIONS.fetch(orientation)
|
114
|
+
end
|
115
|
+
|
116
|
+
def rotated?
|
117
|
+
orientation > 4
|
118
|
+
end
|
119
|
+
|
120
|
+
def orientation
|
121
|
+
# Retrieving an orientation "through" the sequence of EXIF tags
|
122
|
+
# is trickier than the method_missing case, because the value
|
123
|
+
# of the orientation can be 0, meaning "unknown". We need to skip through
|
124
|
+
# those and return the _last_ non-0 orientation, or 0 otherwise
|
125
|
+
@multiple_exif_results.reverse_each do |exif_tag_frame|
|
126
|
+
orientation_value = exif_tag_frame.orientation
|
127
|
+
if !orientation_value.nil? && orientation_value != 0
|
128
|
+
return orientation_value
|
129
|
+
end
|
130
|
+
end
|
131
|
+
0 # If none were found - the orientation is unknown
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def respond_to_missing?(method_name)
|
137
|
+
@multiple_exif_results.last.respond_to?(method_name)
|
138
|
+
end
|
139
|
+
|
140
|
+
def method_missing(*a)
|
141
|
+
return super unless @multiple_exif_results.any?
|
142
|
+
|
143
|
+
# The EXIF tags get appended to the file, so the ones coming _later_
|
144
|
+
# are more specific and potentially overwrite the earlier ones. Walk
|
145
|
+
# through the frames in reverse (starting with one that comes last)
|
146
|
+
# and if it contans the requisite EXIF property, return the value
|
147
|
+
# from that tag.
|
148
|
+
@multiple_exif_results.reverse_each do |exif_tag_frame|
|
149
|
+
value_of = exif_tag_frame.public_send(*a)
|
150
|
+
return value_of if value_of
|
151
|
+
end
|
152
|
+
nil
|
70
153
|
end
|
71
154
|
end
|
72
155
|
|
data/lib/parsers/jpeg_parser.rb
CHANGED
@@ -21,7 +21,7 @@ class FormatParser::JPEGParser
|
|
21
21
|
@buf = FormatParser::IOConstraint.new(io)
|
22
22
|
@width = nil
|
23
23
|
@height = nil
|
24
|
-
@
|
24
|
+
@exif_data_frames = []
|
25
25
|
scan
|
26
26
|
end
|
27
27
|
|
@@ -66,16 +66,24 @@ class FormatParser::JPEGParser
|
|
66
66
|
|
67
67
|
Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_read_until_capture', @buf.pos)
|
68
68
|
|
69
|
+
# A single file might contain multiple EXIF data frames. In a JPEG this would
|
70
|
+
# manifest as multiple APP1 markers. The way different programs handle these
|
71
|
+
# differs, for us it makes the most sense to simply "flatten" them top-down.
|
72
|
+
# So we start with the first EXIF frame, and we then allow the APP1 markers
|
73
|
+
# that come later in the file to override the properties they _do_ specify.
|
74
|
+
flat_exif = FormatParser::EXIFParser::EXIFStack.new(@exif_data_frames)
|
75
|
+
|
69
76
|
# Return at the earliest possible opportunity
|
70
77
|
if @width && @height
|
78
|
+
dw, dh = flat_exif.rotated? ? [@height, @width] : [@width, @height]
|
71
79
|
result = FormatParser::Image.new(
|
72
80
|
format: :jpg,
|
73
81
|
width_px: @width,
|
74
82
|
height_px: @height,
|
75
|
-
display_width_px:
|
76
|
-
display_height_px:
|
77
|
-
orientation:
|
78
|
-
intrinsics: {exif:
|
83
|
+
display_width_px: dw,
|
84
|
+
display_height_px: dh,
|
85
|
+
orientation: flat_exif.orientation_sym,
|
86
|
+
intrinsics: {exif: flat_exif},
|
79
87
|
)
|
80
88
|
|
81
89
|
return result
|
@@ -144,7 +152,7 @@ class FormatParser::JPEGParser
|
|
144
152
|
|
145
153
|
Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_sent_to_exif_parser', exif_buf.size)
|
146
154
|
|
147
|
-
@
|
155
|
+
@exif_data_frames << exif_from_tiff_io(exif_buf)
|
148
156
|
rescue EXIFR::MalformedTIFF
|
149
157
|
# Not a JPEG or the Exif headers contain invalid data, or
|
150
158
|
# an APP1 marker was detected in a file that is not a JPEG
|
data/lib/parsers/tiff_parser.rb
CHANGED
@@ -31,7 +31,7 @@ class FormatParser::TIFFParser
|
|
31
31
|
height_px: h,
|
32
32
|
display_width_px: exif_data.rotated? ? h : w,
|
33
33
|
display_height_px: exif_data.rotated? ? w : h,
|
34
|
-
orientation: exif_data.
|
34
|
+
orientation: exif_data.orientation_sym,
|
35
35
|
intrinsics: {exif: exif_data},
|
36
36
|
)
|
37
37
|
rescue EXIFR::MalformedTIFF
|
@@ -11,9 +11,9 @@ describe FormatParser::EXIFParser do
|
|
11
11
|
it "is able to parse #{filename}" do
|
12
12
|
result = subject.exif_from_tiff_io(File.open(tiff_path, 'rb'))
|
13
13
|
expect(result).not_to be_nil
|
14
|
-
expect(result.
|
14
|
+
expect(result.orientation_sym).to be_kind_of(Symbol)
|
15
15
|
# Filenames in this dir correspond with the orientation of the file
|
16
|
-
expect(filename).to include(result.
|
16
|
+
expect(filename).to include(result.orientation_sym.to_s)
|
17
17
|
end
|
18
18
|
end
|
19
19
|
end
|
@@ -130,4 +130,31 @@ describe FormatParser::JPEGParser do
|
|
130
130
|
serialized = JSON.pretty_generate(result)
|
131
131
|
expect(serialized).to be_kind_of(String)
|
132
132
|
end
|
133
|
+
|
134
|
+
it 'correctly recognizes various EXIF orientations' do
|
135
|
+
(0..4).each do |n|
|
136
|
+
path = fixtures_dir + "/exif-orientation-testimages/manipulated/Landscape_#{n}.jpg"
|
137
|
+
result = subject.call(File.open(path, 'rb'))
|
138
|
+
expect(result.display_width_px).to eq(1600)
|
139
|
+
expect(result.display_height_px).to eq(1200)
|
140
|
+
end
|
141
|
+
(5..8).each do |n|
|
142
|
+
path = fixtures_dir + "/exif-orientation-testimages/manipulated/Landscape_#{n}.jpg"
|
143
|
+
result = subject.call(File.open(path, 'rb'))
|
144
|
+
expect(result.display_width_px).to eq(1600)
|
145
|
+
expect(result.display_height_px).to eq(1200)
|
146
|
+
end
|
147
|
+
(0..4).each do |n|
|
148
|
+
path = fixtures_dir + "/exif-orientation-testimages/manipulated/Portrait_#{n}.jpg"
|
149
|
+
result = subject.call(File.open(path, 'rb'))
|
150
|
+
expect(result.display_width_px).to eq(1200)
|
151
|
+
expect(result.display_height_px).to eq(1600)
|
152
|
+
end
|
153
|
+
(5..8).each do |n|
|
154
|
+
path = fixtures_dir + "/exif-orientation-testimages/manipulated/Portrait_#{n}.jpg"
|
155
|
+
result = subject.call(File.open(path, 'rb'))
|
156
|
+
expect(result.display_width_px).to eq(1200)
|
157
|
+
expect(result.display_height_px).to eq(1600)
|
158
|
+
end
|
159
|
+
end
|
133
160
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.16.
|
4
|
+
version: 0.16.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-
|
12
|
+
date: 2019-08-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -31,14 +31,20 @@ dependencies:
|
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: '1
|
34
|
+
version: '1'
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 1.3.4
|
35
38
|
type: :runtime
|
36
39
|
prerelease: false
|
37
40
|
version_requirements: !ruby/object:Gem::Requirement
|
38
41
|
requirements:
|
39
42
|
- - "~>"
|
40
43
|
- !ruby/object:Gem::Version
|
41
|
-
version: '1
|
44
|
+
version: '1'
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.3.4
|
42
48
|
- !ruby/object:Gem::Dependency
|
43
49
|
name: id3tag
|
44
50
|
requirement: !ruby/object:Gem::Requirement
|
@@ -171,6 +177,20 @@ dependencies:
|
|
171
177
|
- - '='
|
172
178
|
- !ruby/object:Gem::Version
|
173
179
|
version: 0.5.0
|
180
|
+
- !ruby/object:Gem::Dependency
|
181
|
+
name: parallel_tests
|
182
|
+
requirement: !ruby/object:Gem::Requirement
|
183
|
+
requirements:
|
184
|
+
- - ">="
|
185
|
+
- !ruby/object:Gem::Version
|
186
|
+
version: '0'
|
187
|
+
type: :development
|
188
|
+
prerelease: false
|
189
|
+
version_requirements: !ruby/object:Gem::Requirement
|
190
|
+
requirements:
|
191
|
+
- - ">="
|
192
|
+
- !ruby/object:Gem::Version
|
193
|
+
version: '0'
|
174
194
|
description: |-
|
175
195
|
A Ruby library for prying open files you can convert to a previewable format, such as video, image and audio files. It includes
|
176
196
|
a number of parser modules that try to recover metadata useful for post-processing and layout while reading the absolute
|
@@ -284,7 +304,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
284
304
|
version: '0'
|
285
305
|
requirements: []
|
286
306
|
rubyforge_project:
|
287
|
-
rubygems_version: 2.6
|
307
|
+
rubygems_version: 2.7.6
|
288
308
|
signing_key:
|
289
309
|
specification_version: 4
|
290
310
|
summary: A library for efficient parsing of file metadata
|