format_parser 0.16.0 → 0.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +6 -0
- data/README.md +5 -0
- data/Rakefile +3 -1
- data/format_parser.gemspec +2 -1
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/cr2_parser.rb +1 -1
- data/lib/parsers/exif_parser.rb +110 -27
- data/lib/parsers/jpeg_parser.rb +14 -6
- data/lib/parsers/tiff_parser.rb +1 -1
- data/spec/parsers/exif_parser_spec.rb +2 -2
- data/spec/parsers/jpeg_parser_spec.rb +27 -0
- metadata +25 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 80362b6ee92ad34550681e8c0447dcb93b11071beac25d743356a9c0b7a548f9
|
4
|
+
data.tar.gz: c40fbcae9bd5417420ff5ea573c24f65462f3b0cf5269fc65024e6a237c98f45
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f20f7206574ea204393662e29251dc079ce615c593b21c0c4c35f3ba74a08fc9582e15443f23e45771ee2cb73b5b691ee6a31d37f5fb99e847d09d4c2e003cd
|
7
|
+
data.tar.gz: 6a23e2bb7797a5834e0b2867096240a29ea899d2301f1496c94b2e9d35d29698dc8599bef7185785f7a26fc929bff1e9dda1f5d7a96bbfc37227b1a2d42b3c4b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## 0.16.1
|
2
|
+
* All EXIF: Make sure the 0 orientation does not get silently treated as orientation 8, mislabling
|
3
|
+
images which are not rotated as being rotated (orientation changed)
|
4
|
+
* All EXIF: Make sure the 0 orientation (`unknown`) is correctly passed and represented
|
5
|
+
* JPEG: Make sure multiple EXIF tags in APP1 markers get handled correctly (via overlays)
|
6
|
+
|
1
7
|
## 0.16.0
|
2
8
|
* Add `filename_hint` keyword argument to `FormatParser.parse`. This can hint the library to apply
|
3
9
|
the parser that will likely match for this filename first, and the other parsers later. This helps
|
data/README.md
CHANGED
@@ -170,5 +170,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
170
170
|
### .docx
|
171
171
|
- The .docx files were generated by the project maintainers
|
172
172
|
|
173
|
+
### JPEG examples of EXIF orientation
|
174
|
+
- Downloaded from Unspash (and thus freely avaliable) - https://unsplash.com/license and have then been
|
175
|
+
manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
|
176
|
+
script.
|
177
|
+
|
173
178
|
### .key
|
174
179
|
- The `keynote_recognized_as_jpeg.key` file was created by the project maintainers
|
data/Rakefile
CHANGED
@@ -2,6 +2,7 @@ require 'bundler/gem_tasks'
|
|
2
2
|
require 'rspec/core/rake_task'
|
3
3
|
require 'yard'
|
4
4
|
require 'rubocop/rake_task'
|
5
|
+
require 'parallel_tests/tasks'
|
5
6
|
|
6
7
|
YARD::Rake::YardocTask.new(:doc) do |t|
|
7
8
|
# The dash has to be between the two to "divide" the source files and
|
@@ -11,4 +12,5 @@ end
|
|
11
12
|
|
12
13
|
RuboCop::RakeTask.new
|
13
14
|
RSpec::Core::RakeTask.new(:spec)
|
14
|
-
|
15
|
+
|
16
|
+
task default: ['parallel:spec', :rubocop]
|
data/format_parser.gemspec
CHANGED
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
33
|
spec.add_dependency 'ks', '~> 0.0'
|
34
|
-
spec.add_dependency 'exifr', '~> 1.
|
34
|
+
spec.add_dependency 'exifr', '~> 1', '>= 1.3.4'
|
35
35
|
spec.add_dependency 'id3tag', '~> 0.10', '>= 0.10.1'
|
36
36
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
37
|
spec.add_dependency 'measurometer', '~> 1'
|
@@ -42,4 +42,5 @@ Gem::Specification.new do |spec|
|
|
42
42
|
spec.add_development_dependency 'pry', '~> 0.11'
|
43
43
|
spec.add_development_dependency 'yard', '~> 0.9'
|
44
44
|
spec.add_development_dependency 'wetransfer_style', '0.5.0'
|
45
|
+
spec.add_development_dependency 'parallel_tests'
|
45
46
|
end
|
data/lib/parsers/cr2_parser.rb
CHANGED
@@ -37,7 +37,7 @@ class FormatParser::CR2Parser
|
|
37
37
|
height_px: h,
|
38
38
|
display_width_px: exif_data.rotated? ? h : w,
|
39
39
|
display_height_px: exif_data.rotated? ? w : h,
|
40
|
-
orientation: exif_data.
|
40
|
+
orientation: exif_data.orientation_sym,
|
41
41
|
intrinsics: {exif: exif_data},
|
42
42
|
)
|
43
43
|
rescue EXIFR::MalformedTIFF
|
data/lib/parsers/exif_parser.rb
CHANGED
@@ -2,30 +2,17 @@ require 'exifr/tiff'
|
|
2
2
|
require 'delegate'
|
3
3
|
|
4
4
|
module FormatParser::EXIFParser
|
5
|
-
ORIENTATIONS =
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
|
15
|
-
|
16
|
-
:bottom_left,
|
17
|
-
:bottom_right,
|
18
|
-
:top_left,
|
19
|
-
:top_right
|
20
|
-
]
|
21
|
-
module MethodsMethodFix
|
22
|
-
# Fix a little bug in EXIFR which breaks delegators
|
23
|
-
# https://github.com/remvee/exifr/pull/55
|
24
|
-
def methods(*)
|
25
|
-
super() # no args
|
26
|
-
end
|
27
|
-
end
|
28
|
-
EXIFR::TIFF.prepend(MethodsMethodFix)
|
5
|
+
ORIENTATIONS = {
|
6
|
+
0 => :unknown, # Non-rotated
|
7
|
+
1 => :top_left, # Non-rotated
|
8
|
+
2 => :top_right, # Non-rotated
|
9
|
+
3 => :bottom_right, # Non-rotated
|
10
|
+
4 => :bottom_left, # Non-rotated
|
11
|
+
5 => :left_top,
|
12
|
+
6 => :right_top,
|
13
|
+
7 => :right_bottom,
|
14
|
+
8 => :left_bottom
|
15
|
+
}
|
29
16
|
|
30
17
|
# EXIFR kindly requests the presence of a few more methods than what our IOConstraint
|
31
18
|
# is willing to provide, but they can be derived from the available ones
|
@@ -55,7 +42,7 @@ module FormatParser::EXIFParser
|
|
55
42
|
|
56
43
|
class EXIFResult < SimpleDelegator
|
57
44
|
def rotated?
|
58
|
-
|
45
|
+
__getobj__.orientation.to_i > 4
|
59
46
|
end
|
60
47
|
|
61
48
|
def to_json(*maybe_coder)
|
@@ -65,8 +52,104 @@ module FormatParser::EXIFParser
|
|
65
52
|
end
|
66
53
|
|
67
54
|
def orientation
|
68
|
-
|
69
|
-
|
55
|
+
__getobj__.orientation.to_i
|
56
|
+
end
|
57
|
+
|
58
|
+
def orientation_sym
|
59
|
+
ORIENTATIONS.fetch(orientation)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# With some formats, multiple EXIF tag frames can be included in a single file.
|
64
|
+
# For example, JPEGs might have multiple APP1 markers which each contain EXIF
|
65
|
+
# data. The EXIF data in them, however, is not necessarily "complete" - it seems
|
66
|
+
# most applications assume that these blocks "overwrite" each other with the properties
|
67
|
+
# they specify. Probably this is done for more efficient saving - instead of overwriting
|
68
|
+
# the EXIF data with a modified version - which would also potentially disturb any digital
|
69
|
+
# signing that this data might include - the applications are supposed to follow the order
|
70
|
+
# in which these tags appear in the file:
|
71
|
+
#
|
72
|
+
# Take a resized image for example:
|
73
|
+
#
|
74
|
+
# APP1 {author: 'John', pixel_width: 1024}
|
75
|
+
# APP1 {pixel_width: 10}
|
76
|
+
#
|
77
|
+
# That image would get a combined EXIF of:
|
78
|
+
#
|
79
|
+
# APP1 {author: 'John', pixel_width: 10}
|
80
|
+
#
|
81
|
+
# since the frame that comes later in the file overwrites a property. From what I see
|
82
|
+
# exiftools do this is the way it works.
|
83
|
+
#
|
84
|
+
# This class acts as a wrapper for this "layering" of chunks of EXIF properties, and will
|
85
|
+
# follow the following conventions:
|
86
|
+
#
|
87
|
+
# * When merging data for JSON conversion, it will merge it top-down. It will overwrite
|
88
|
+
# any specified properties. An exception is made for orientation (see below)
|
89
|
+
# * When retrieving a property, it will look "from the end to the beginning" of the EXIF
|
90
|
+
# dataframe stack, looking for the first dataframe which has this property with a non-nil value
|
91
|
+
# * When retrieving orientation, it will pick the first orientation value which is not nil
|
92
|
+
# but also not 0 ("unknown orientation"). Even files in our test suite contain these.
|
93
|
+
class EXIFStack
|
94
|
+
def initialize(multiple_exif_results)
|
95
|
+
@multiple_exif_results = Array(multiple_exif_results)
|
96
|
+
end
|
97
|
+
|
98
|
+
def to_json(*maybe_coder)
|
99
|
+
# Let EXIF tags that come later overwrite the properties from the tags
|
100
|
+
# that come earlier
|
101
|
+
overlay = @multiple_exif_results.each_with_object({}) do |one_exif_frame, h|
|
102
|
+
h.merge!(one_exif_frame.to_hash)
|
103
|
+
end
|
104
|
+
# Overwrite the orientation with our custom method implementation, because
|
105
|
+
# it does reject 0-values.
|
106
|
+
overlay[:orientation] = orientation
|
107
|
+
|
108
|
+
sanitized = FormatParser::AttributesJSON._sanitize_json_value(overlay)
|
109
|
+
sanitized.to_json(*maybe_coder)
|
110
|
+
end
|
111
|
+
|
112
|
+
def orientation_sym
|
113
|
+
ORIENTATIONS.fetch(orientation)
|
114
|
+
end
|
115
|
+
|
116
|
+
def rotated?
|
117
|
+
orientation > 4
|
118
|
+
end
|
119
|
+
|
120
|
+
def orientation
|
121
|
+
# Retrieving an orientation "through" the sequence of EXIF tags
|
122
|
+
# is trickier than the method_missing case, because the value
|
123
|
+
# of the orientation can be 0, meaning "unknown". We need to skip through
|
124
|
+
# those and return the _last_ non-0 orientation, or 0 otherwise
|
125
|
+
@multiple_exif_results.reverse_each do |exif_tag_frame|
|
126
|
+
orientation_value = exif_tag_frame.orientation
|
127
|
+
if !orientation_value.nil? && orientation_value != 0
|
128
|
+
return orientation_value
|
129
|
+
end
|
130
|
+
end
|
131
|
+
0 # If none were found - the orientation is unknown
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def respond_to_missing?(method_name)
|
137
|
+
@multiple_exif_results.last.respond_to?(method_name)
|
138
|
+
end
|
139
|
+
|
140
|
+
def method_missing(*a)
|
141
|
+
return super unless @multiple_exif_results.any?
|
142
|
+
|
143
|
+
# The EXIF tags get appended to the file, so the ones coming _later_
|
144
|
+
# are more specific and potentially overwrite the earlier ones. Walk
|
145
|
+
# through the frames in reverse (starting with one that comes last)
|
146
|
+
# and if it contans the requisite EXIF property, return the value
|
147
|
+
# from that tag.
|
148
|
+
@multiple_exif_results.reverse_each do |exif_tag_frame|
|
149
|
+
value_of = exif_tag_frame.public_send(*a)
|
150
|
+
return value_of if value_of
|
151
|
+
end
|
152
|
+
nil
|
70
153
|
end
|
71
154
|
end
|
72
155
|
|
data/lib/parsers/jpeg_parser.rb
CHANGED
@@ -21,7 +21,7 @@ class FormatParser::JPEGParser
|
|
21
21
|
@buf = FormatParser::IOConstraint.new(io)
|
22
22
|
@width = nil
|
23
23
|
@height = nil
|
24
|
-
@
|
24
|
+
@exif_data_frames = []
|
25
25
|
scan
|
26
26
|
end
|
27
27
|
|
@@ -66,16 +66,24 @@ class FormatParser::JPEGParser
|
|
66
66
|
|
67
67
|
Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_read_until_capture', @buf.pos)
|
68
68
|
|
69
|
+
# A single file might contain multiple EXIF data frames. In a JPEG this would
|
70
|
+
# manifest as multiple APP1 markers. The way different programs handle these
|
71
|
+
# differs, for us it makes the most sense to simply "flatten" them top-down.
|
72
|
+
# So we start with the first EXIF frame, and we then allow the APP1 markers
|
73
|
+
# that come later in the file to override the properties they _do_ specify.
|
74
|
+
flat_exif = FormatParser::EXIFParser::EXIFStack.new(@exif_data_frames)
|
75
|
+
|
69
76
|
# Return at the earliest possible opportunity
|
70
77
|
if @width && @height
|
78
|
+
dw, dh = flat_exif.rotated? ? [@height, @width] : [@width, @height]
|
71
79
|
result = FormatParser::Image.new(
|
72
80
|
format: :jpg,
|
73
81
|
width_px: @width,
|
74
82
|
height_px: @height,
|
75
|
-
display_width_px:
|
76
|
-
display_height_px:
|
77
|
-
orientation:
|
78
|
-
intrinsics: {exif:
|
83
|
+
display_width_px: dw,
|
84
|
+
display_height_px: dh,
|
85
|
+
orientation: flat_exif.orientation_sym,
|
86
|
+
intrinsics: {exif: flat_exif},
|
79
87
|
)
|
80
88
|
|
81
89
|
return result
|
@@ -144,7 +152,7 @@ class FormatParser::JPEGParser
|
|
144
152
|
|
145
153
|
Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_sent_to_exif_parser', exif_buf.size)
|
146
154
|
|
147
|
-
@
|
155
|
+
@exif_data_frames << exif_from_tiff_io(exif_buf)
|
148
156
|
rescue EXIFR::MalformedTIFF
|
149
157
|
# Not a JPEG or the Exif headers contain invalid data, or
|
150
158
|
# an APP1 marker was detected in a file that is not a JPEG
|
data/lib/parsers/tiff_parser.rb
CHANGED
@@ -31,7 +31,7 @@ class FormatParser::TIFFParser
|
|
31
31
|
height_px: h,
|
32
32
|
display_width_px: exif_data.rotated? ? h : w,
|
33
33
|
display_height_px: exif_data.rotated? ? w : h,
|
34
|
-
orientation: exif_data.
|
34
|
+
orientation: exif_data.orientation_sym,
|
35
35
|
intrinsics: {exif: exif_data},
|
36
36
|
)
|
37
37
|
rescue EXIFR::MalformedTIFF
|
@@ -11,9 +11,9 @@ describe FormatParser::EXIFParser do
|
|
11
11
|
it "is able to parse #{filename}" do
|
12
12
|
result = subject.exif_from_tiff_io(File.open(tiff_path, 'rb'))
|
13
13
|
expect(result).not_to be_nil
|
14
|
-
expect(result.
|
14
|
+
expect(result.orientation_sym).to be_kind_of(Symbol)
|
15
15
|
# Filenames in this dir correspond with the orientation of the file
|
16
|
-
expect(filename).to include(result.
|
16
|
+
expect(filename).to include(result.orientation_sym.to_s)
|
17
17
|
end
|
18
18
|
end
|
19
19
|
end
|
@@ -130,4 +130,31 @@ describe FormatParser::JPEGParser do
|
|
130
130
|
serialized = JSON.pretty_generate(result)
|
131
131
|
expect(serialized).to be_kind_of(String)
|
132
132
|
end
|
133
|
+
|
134
|
+
it 'correctly recognizes various EXIF orientations' do
|
135
|
+
(0..4).each do |n|
|
136
|
+
path = fixtures_dir + "/exif-orientation-testimages/manipulated/Landscape_#{n}.jpg"
|
137
|
+
result = subject.call(File.open(path, 'rb'))
|
138
|
+
expect(result.display_width_px).to eq(1600)
|
139
|
+
expect(result.display_height_px).to eq(1200)
|
140
|
+
end
|
141
|
+
(5..8).each do |n|
|
142
|
+
path = fixtures_dir + "/exif-orientation-testimages/manipulated/Landscape_#{n}.jpg"
|
143
|
+
result = subject.call(File.open(path, 'rb'))
|
144
|
+
expect(result.display_width_px).to eq(1600)
|
145
|
+
expect(result.display_height_px).to eq(1200)
|
146
|
+
end
|
147
|
+
(0..4).each do |n|
|
148
|
+
path = fixtures_dir + "/exif-orientation-testimages/manipulated/Portrait_#{n}.jpg"
|
149
|
+
result = subject.call(File.open(path, 'rb'))
|
150
|
+
expect(result.display_width_px).to eq(1200)
|
151
|
+
expect(result.display_height_px).to eq(1600)
|
152
|
+
end
|
153
|
+
(5..8).each do |n|
|
154
|
+
path = fixtures_dir + "/exif-orientation-testimages/manipulated/Portrait_#{n}.jpg"
|
155
|
+
result = subject.call(File.open(path, 'rb'))
|
156
|
+
expect(result.display_width_px).to eq(1200)
|
157
|
+
expect(result.display_height_px).to eq(1600)
|
158
|
+
end
|
159
|
+
end
|
133
160
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.16.
|
4
|
+
version: 0.16.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-
|
12
|
+
date: 2019-08-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -31,14 +31,20 @@ dependencies:
|
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: '1
|
34
|
+
version: '1'
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 1.3.4
|
35
38
|
type: :runtime
|
36
39
|
prerelease: false
|
37
40
|
version_requirements: !ruby/object:Gem::Requirement
|
38
41
|
requirements:
|
39
42
|
- - "~>"
|
40
43
|
- !ruby/object:Gem::Version
|
41
|
-
version: '1
|
44
|
+
version: '1'
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.3.4
|
42
48
|
- !ruby/object:Gem::Dependency
|
43
49
|
name: id3tag
|
44
50
|
requirement: !ruby/object:Gem::Requirement
|
@@ -171,6 +177,20 @@ dependencies:
|
|
171
177
|
- - '='
|
172
178
|
- !ruby/object:Gem::Version
|
173
179
|
version: 0.5.0
|
180
|
+
- !ruby/object:Gem::Dependency
|
181
|
+
name: parallel_tests
|
182
|
+
requirement: !ruby/object:Gem::Requirement
|
183
|
+
requirements:
|
184
|
+
- - ">="
|
185
|
+
- !ruby/object:Gem::Version
|
186
|
+
version: '0'
|
187
|
+
type: :development
|
188
|
+
prerelease: false
|
189
|
+
version_requirements: !ruby/object:Gem::Requirement
|
190
|
+
requirements:
|
191
|
+
- - ">="
|
192
|
+
- !ruby/object:Gem::Version
|
193
|
+
version: '0'
|
174
194
|
description: |-
|
175
195
|
A Ruby library for prying open files you can convert to a previewable format, such as video, image and audio files. It includes
|
176
196
|
a number of parser modules that try to recover metadata useful for post-processing and layout while reading the absolute
|
@@ -284,7 +304,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
284
304
|
version: '0'
|
285
305
|
requirements: []
|
286
306
|
rubyforge_project:
|
287
|
-
rubygems_version: 2.6
|
307
|
+
rubygems_version: 2.7.6
|
288
308
|
signing_key:
|
289
309
|
specification_version: 4
|
290
310
|
summary: A library for efficient parsing of file metadata
|