format_parser 0.16.0 → 0.16.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: e49c10ea63c475b9bc61781ea8e76f42ef9e4307
4
- data.tar.gz: c625105933c8654f5aac93bae09476e32917655b
2
+ SHA256:
3
+ metadata.gz: 80362b6ee92ad34550681e8c0447dcb93b11071beac25d743356a9c0b7a548f9
4
+ data.tar.gz: c40fbcae9bd5417420ff5ea573c24f65462f3b0cf5269fc65024e6a237c98f45
5
5
  SHA512:
6
- metadata.gz: cf72d2b021f6fc407b29cbc5b336fd71e022f6a40b67a79646ea58e605c33d34b44404339e7f15cf1c3f63729712a784ac75296fde54525f51a9c6276069619e
7
- data.tar.gz: c68eadacfae8062e6fb1532d3e6976c349f1d3f7a950cafb7ea0bab714a125e81d3d32bccb154ed96c2118c142f984637ed17c6a53d1ee755778836ad008a7a0
6
+ metadata.gz: 7f20f7206574ea204393662e29251dc079ce615c593b21c0c4c35f3ba74a08fc9582e15443f23e45771ee2cb73b5b691ee6a31d37f5fb99e847d09d4c2e003cd
7
+ data.tar.gz: 6a23e2bb7797a5834e0b2867096240a29ea899d2301f1496c94b2e9d35d29698dc8599bef7185785f7a26fc929bff1e9dda1f5d7a96bbfc37227b1a2d42b3c4b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.16.1
2
+ * All EXIF: Make sure the 0 orientation does not get silently treated as orientation 8, mislabling
3
+ images which are not rotated as being rotated (orientation changed)
4
+ * All EXIF: Make sure the 0 orientation (`unknown`) is correctly passed and represented
5
+ * JPEG: Make sure multiple EXIF tags in APP1 markers get handled correctly (via overlays)
6
+
1
7
  ## 0.16.0
2
8
  * Add `filename_hint` keyword argument to `FormatParser.parse`. This can hint the library to apply
3
9
  the parser that will likely match for this filename first, and the other parsers later. This helps
data/README.md CHANGED
@@ -170,5 +170,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
170
170
  ### .docx
171
171
  - The .docx files were generated by the project maintainers
172
172
 
173
+ ### JPEG examples of EXIF orientation
174
+ - Downloaded from Unspash (and thus freely avaliable) - https://unsplash.com/license and have then been
175
+ manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
176
+ script.
177
+
173
178
  ### .key
174
179
  - The `keynote_recognized_as_jpeg.key` file was created by the project maintainers
data/Rakefile CHANGED
@@ -2,6 +2,7 @@ require 'bundler/gem_tasks'
2
2
  require 'rspec/core/rake_task'
3
3
  require 'yard'
4
4
  require 'rubocop/rake_task'
5
+ require 'parallel_tests/tasks'
5
6
 
6
7
  YARD::Rake::YardocTask.new(:doc) do |t|
7
8
  # The dash has to be between the two to "divide" the source files and
@@ -11,4 +12,5 @@ end
11
12
 
12
13
  RuboCop::RakeTask.new
13
14
  RSpec::Core::RakeTask.new(:spec)
14
- task default: [:spec, :rubocop]
15
+
16
+ task default: ['parallel:spec', :rubocop]
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.require_paths = ['lib']
32
32
 
33
33
  spec.add_dependency 'ks', '~> 0.0'
34
- spec.add_dependency 'exifr', '~> 1.0'
34
+ spec.add_dependency 'exifr', '~> 1', '>= 1.3.4'
35
35
  spec.add_dependency 'id3tag', '~> 0.10', '>= 0.10.1'
36
36
  spec.add_dependency 'faraday', '~> 0.13'
37
37
  spec.add_dependency 'measurometer', '~> 1'
@@ -42,4 +42,5 @@ Gem::Specification.new do |spec|
42
42
  spec.add_development_dependency 'pry', '~> 0.11'
43
43
  spec.add_development_dependency 'yard', '~> 0.9'
44
44
  spec.add_development_dependency 'wetransfer_style', '0.5.0'
45
+ spec.add_development_dependency 'parallel_tests'
45
46
  end
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.16.0'
2
+ VERSION = '0.16.1'
3
3
  end
@@ -37,7 +37,7 @@ class FormatParser::CR2Parser
37
37
  height_px: h,
38
38
  display_width_px: exif_data.rotated? ? h : w,
39
39
  display_height_px: exif_data.rotated? ? w : h,
40
- orientation: exif_data.orientation,
40
+ orientation: exif_data.orientation_sym,
41
41
  intrinsics: {exif: exif_data},
42
42
  )
43
43
  rescue EXIFR::MalformedTIFF
@@ -2,30 +2,17 @@ require 'exifr/tiff'
2
2
  require 'delegate'
3
3
 
4
4
  module FormatParser::EXIFParser
5
- ORIENTATIONS = [
6
- :top_left,
7
- :top_right,
8
- :bottom_right,
9
- :bottom_left,
10
- :left_top,
11
- :right_top,
12
- :right_bottom,
13
- :left_bottom
14
- ]
15
- ROTATED_ORIENTATIONS = ORIENTATIONS - [
16
- :bottom_left,
17
- :bottom_right,
18
- :top_left,
19
- :top_right
20
- ]
21
- module MethodsMethodFix
22
- # Fix a little bug in EXIFR which breaks delegators
23
- # https://github.com/remvee/exifr/pull/55
24
- def methods(*)
25
- super() # no args
26
- end
27
- end
28
- EXIFR::TIFF.prepend(MethodsMethodFix)
5
+ ORIENTATIONS = {
6
+ 0 => :unknown, # Non-rotated
7
+ 1 => :top_left, # Non-rotated
8
+ 2 => :top_right, # Non-rotated
9
+ 3 => :bottom_right, # Non-rotated
10
+ 4 => :bottom_left, # Non-rotated
11
+ 5 => :left_top,
12
+ 6 => :right_top,
13
+ 7 => :right_bottom,
14
+ 8 => :left_bottom
15
+ }
29
16
 
30
17
  # EXIFR kindly requests the presence of a few more methods than what our IOConstraint
31
18
  # is willing to provide, but they can be derived from the available ones
@@ -55,7 +42,7 @@ module FormatParser::EXIFParser
55
42
 
56
43
  class EXIFResult < SimpleDelegator
57
44
  def rotated?
58
- ROTATED_ORIENTATIONS.include?(orientation)
45
+ __getobj__.orientation.to_i > 4
59
46
  end
60
47
 
61
48
  def to_json(*maybe_coder)
@@ -65,8 +52,104 @@ module FormatParser::EXIFParser
65
52
  end
66
53
 
67
54
  def orientation
68
- value = __getobj__.orientation.to_i
69
- ORIENTATIONS.fetch(value - 1)
55
+ __getobj__.orientation.to_i
56
+ end
57
+
58
+ def orientation_sym
59
+ ORIENTATIONS.fetch(orientation)
60
+ end
61
+ end
62
+
63
+ # With some formats, multiple EXIF tag frames can be included in a single file.
64
+ # For example, JPEGs might have multiple APP1 markers which each contain EXIF
65
+ # data. The EXIF data in them, however, is not necessarily "complete" - it seems
66
+ # most applications assume that these blocks "overwrite" each other with the properties
67
+ # they specify. Probably this is done for more efficient saving - instead of overwriting
68
+ # the EXIF data with a modified version - which would also potentially disturb any digital
69
+ # signing that this data might include - the applications are supposed to follow the order
70
+ # in which these tags appear in the file:
71
+ #
72
+ # Take a resized image for example:
73
+ #
74
+ # APP1 {author: 'John', pixel_width: 1024}
75
+ # APP1 {pixel_width: 10}
76
+ #
77
+ # That image would get a combined EXIF of:
78
+ #
79
+ # APP1 {author: 'John', pixel_width: 10}
80
+ #
81
+ # since the frame that comes later in the file overwrites a property. From what I see
82
+ # exiftools do this is the way it works.
83
+ #
84
+ # This class acts as a wrapper for this "layering" of chunks of EXIF properties, and will
85
+ # follow the following conventions:
86
+ #
87
+ # * When merging data for JSON conversion, it will merge it top-down. It will overwrite
88
+ # any specified properties. An exception is made for orientation (see below)
89
+ # * When retrieving a property, it will look "from the end to the beginning" of the EXIF
90
+ # dataframe stack, looking for the first dataframe which has this property with a non-nil value
91
+ # * When retrieving orientation, it will pick the first orientation value which is not nil
92
+ # but also not 0 ("unknown orientation"). Even files in our test suite contain these.
93
+ class EXIFStack
94
+ def initialize(multiple_exif_results)
95
+ @multiple_exif_results = Array(multiple_exif_results)
96
+ end
97
+
98
+ def to_json(*maybe_coder)
99
+ # Let EXIF tags that come later overwrite the properties from the tags
100
+ # that come earlier
101
+ overlay = @multiple_exif_results.each_with_object({}) do |one_exif_frame, h|
102
+ h.merge!(one_exif_frame.to_hash)
103
+ end
104
+ # Overwrite the orientation with our custom method implementation, because
105
+ # it does reject 0-values.
106
+ overlay[:orientation] = orientation
107
+
108
+ sanitized = FormatParser::AttributesJSON._sanitize_json_value(overlay)
109
+ sanitized.to_json(*maybe_coder)
110
+ end
111
+
112
+ def orientation_sym
113
+ ORIENTATIONS.fetch(orientation)
114
+ end
115
+
116
+ def rotated?
117
+ orientation > 4
118
+ end
119
+
120
+ def orientation
121
+ # Retrieving an orientation "through" the sequence of EXIF tags
122
+ # is trickier than the method_missing case, because the value
123
+ # of the orientation can be 0, meaning "unknown". We need to skip through
124
+ # those and return the _last_ non-0 orientation, or 0 otherwise
125
+ @multiple_exif_results.reverse_each do |exif_tag_frame|
126
+ orientation_value = exif_tag_frame.orientation
127
+ if !orientation_value.nil? && orientation_value != 0
128
+ return orientation_value
129
+ end
130
+ end
131
+ 0 # If none were found - the orientation is unknown
132
+ end
133
+
134
+ private
135
+
136
+ def respond_to_missing?(method_name)
137
+ @multiple_exif_results.last.respond_to?(method_name)
138
+ end
139
+
140
+ def method_missing(*a)
141
+ return super unless @multiple_exif_results.any?
142
+
143
+ # The EXIF tags get appended to the file, so the ones coming _later_
144
+ # are more specific and potentially overwrite the earlier ones. Walk
145
+ # through the frames in reverse (starting with one that comes last)
146
+ # and if it contans the requisite EXIF property, return the value
147
+ # from that tag.
148
+ @multiple_exif_results.reverse_each do |exif_tag_frame|
149
+ value_of = exif_tag_frame.public_send(*a)
150
+ return value_of if value_of
151
+ end
152
+ nil
70
153
  end
71
154
  end
72
155
 
@@ -21,7 +21,7 @@ class FormatParser::JPEGParser
21
21
  @buf = FormatParser::IOConstraint.new(io)
22
22
  @width = nil
23
23
  @height = nil
24
- @exif_data = nil
24
+ @exif_data_frames = []
25
25
  scan
26
26
  end
27
27
 
@@ -66,16 +66,24 @@ class FormatParser::JPEGParser
66
66
 
67
67
  Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_read_until_capture', @buf.pos)
68
68
 
69
+ # A single file might contain multiple EXIF data frames. In a JPEG this would
70
+ # manifest as multiple APP1 markers. The way different programs handle these
71
+ # differs, for us it makes the most sense to simply "flatten" them top-down.
72
+ # So we start with the first EXIF frame, and we then allow the APP1 markers
73
+ # that come later in the file to override the properties they _do_ specify.
74
+ flat_exif = FormatParser::EXIFParser::EXIFStack.new(@exif_data_frames)
75
+
69
76
  # Return at the earliest possible opportunity
70
77
  if @width && @height
78
+ dw, dh = flat_exif.rotated? ? [@height, @width] : [@width, @height]
71
79
  result = FormatParser::Image.new(
72
80
  format: :jpg,
73
81
  width_px: @width,
74
82
  height_px: @height,
75
- display_width_px: @exif_data && @exif_data.rotated? ? @height : @width,
76
- display_height_px: @exif_data && @exif_data.rotated? ? @width : @height,
77
- orientation: @exif_data && @exif_data.orientation,
78
- intrinsics: {exif: @exif_data},
83
+ display_width_px: dw,
84
+ display_height_px: dh,
85
+ orientation: flat_exif.orientation_sym,
86
+ intrinsics: {exif: flat_exif},
79
87
  )
80
88
 
81
89
  return result
@@ -144,7 +152,7 @@ class FormatParser::JPEGParser
144
152
 
145
153
  Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_sent_to_exif_parser', exif_buf.size)
146
154
 
147
- @exif_data = exif_from_tiff_io(exif_buf)
155
+ @exif_data_frames << exif_from_tiff_io(exif_buf)
148
156
  rescue EXIFR::MalformedTIFF
149
157
  # Not a JPEG or the Exif headers contain invalid data, or
150
158
  # an APP1 marker was detected in a file that is not a JPEG
@@ -31,7 +31,7 @@ class FormatParser::TIFFParser
31
31
  height_px: h,
32
32
  display_width_px: exif_data.rotated? ? h : w,
33
33
  display_height_px: exif_data.rotated? ? w : h,
34
- orientation: exif_data.orientation,
34
+ orientation: exif_data.orientation_sym,
35
35
  intrinsics: {exif: exif_data},
36
36
  )
37
37
  rescue EXIFR::MalformedTIFF
@@ -11,9 +11,9 @@ describe FormatParser::EXIFParser do
11
11
  it "is able to parse #{filename}" do
12
12
  result = subject.exif_from_tiff_io(File.open(tiff_path, 'rb'))
13
13
  expect(result).not_to be_nil
14
- expect(result.orientation).to be_kind_of(Symbol)
14
+ expect(result.orientation_sym).to be_kind_of(Symbol)
15
15
  # Filenames in this dir correspond with the orientation of the file
16
- expect(filename).to include(result.orientation.to_s)
16
+ expect(filename).to include(result.orientation_sym.to_s)
17
17
  end
18
18
  end
19
19
  end
@@ -130,4 +130,31 @@ describe FormatParser::JPEGParser do
130
130
  serialized = JSON.pretty_generate(result)
131
131
  expect(serialized).to be_kind_of(String)
132
132
  end
133
+
134
+ it 'correctly recognizes various EXIF orientations' do
135
+ (0..4).each do |n|
136
+ path = fixtures_dir + "/exif-orientation-testimages/manipulated/Landscape_#{n}.jpg"
137
+ result = subject.call(File.open(path, 'rb'))
138
+ expect(result.display_width_px).to eq(1600)
139
+ expect(result.display_height_px).to eq(1200)
140
+ end
141
+ (5..8).each do |n|
142
+ path = fixtures_dir + "/exif-orientation-testimages/manipulated/Landscape_#{n}.jpg"
143
+ result = subject.call(File.open(path, 'rb'))
144
+ expect(result.display_width_px).to eq(1600)
145
+ expect(result.display_height_px).to eq(1200)
146
+ end
147
+ (0..4).each do |n|
148
+ path = fixtures_dir + "/exif-orientation-testimages/manipulated/Portrait_#{n}.jpg"
149
+ result = subject.call(File.open(path, 'rb'))
150
+ expect(result.display_width_px).to eq(1200)
151
+ expect(result.display_height_px).to eq(1600)
152
+ end
153
+ (5..8).each do |n|
154
+ path = fixtures_dir + "/exif-orientation-testimages/manipulated/Portrait_#{n}.jpg"
155
+ result = subject.call(File.open(path, 'rb'))
156
+ expect(result.display_width_px).to eq(1200)
157
+ expect(result.display_height_px).to eq(1600)
158
+ end
159
+ end
133
160
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.0
4
+ version: 0.16.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2019-07-08 00:00:00.000000000 Z
12
+ date: 2019-08-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -31,14 +31,20 @@ dependencies:
31
31
  requirements:
32
32
  - - "~>"
33
33
  - !ruby/object:Gem::Version
34
- version: '1.0'
34
+ version: '1'
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: 1.3.4
35
38
  type: :runtime
36
39
  prerelease: false
37
40
  version_requirements: !ruby/object:Gem::Requirement
38
41
  requirements:
39
42
  - - "~>"
40
43
  - !ruby/object:Gem::Version
41
- version: '1.0'
44
+ version: '1'
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 1.3.4
42
48
  - !ruby/object:Gem::Dependency
43
49
  name: id3tag
44
50
  requirement: !ruby/object:Gem::Requirement
@@ -171,6 +177,20 @@ dependencies:
171
177
  - - '='
172
178
  - !ruby/object:Gem::Version
173
179
  version: 0.5.0
180
+ - !ruby/object:Gem::Dependency
181
+ name: parallel_tests
182
+ requirement: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - ">="
185
+ - !ruby/object:Gem::Version
186
+ version: '0'
187
+ type: :development
188
+ prerelease: false
189
+ version_requirements: !ruby/object:Gem::Requirement
190
+ requirements:
191
+ - - ">="
192
+ - !ruby/object:Gem::Version
193
+ version: '0'
174
194
  description: |-
175
195
  A Ruby library for prying open files you can convert to a previewable format, such as video, image and audio files. It includes
176
196
  a number of parser modules that try to recover metadata useful for post-processing and layout while reading the absolute
@@ -284,7 +304,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
284
304
  version: '0'
285
305
  requirements: []
286
306
  rubyforge_project:
287
- rubygems_version: 2.6.11
307
+ rubygems_version: 2.7.6
288
308
  signing_key:
289
309
  specification_version: 4
290
310
  summary: A library for efficient parsing of file metadata