format_parser 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: e49c10ea63c475b9bc61781ea8e76f42ef9e4307
4
- data.tar.gz: c625105933c8654f5aac93bae09476e32917655b
2
+ SHA256:
3
+ metadata.gz: 80362b6ee92ad34550681e8c0447dcb93b11071beac25d743356a9c0b7a548f9
4
+ data.tar.gz: c40fbcae9bd5417420ff5ea573c24f65462f3b0cf5269fc65024e6a237c98f45
5
5
  SHA512:
6
- metadata.gz: cf72d2b021f6fc407b29cbc5b336fd71e022f6a40b67a79646ea58e605c33d34b44404339e7f15cf1c3f63729712a784ac75296fde54525f51a9c6276069619e
7
- data.tar.gz: c68eadacfae8062e6fb1532d3e6976c349f1d3f7a950cafb7ea0bab714a125e81d3d32bccb154ed96c2118c142f984637ed17c6a53d1ee755778836ad008a7a0
6
+ metadata.gz: 7f20f7206574ea204393662e29251dc079ce615c593b21c0c4c35f3ba74a08fc9582e15443f23e45771ee2cb73b5b691ee6a31d37f5fb99e847d09d4c2e003cd
7
+ data.tar.gz: 6a23e2bb7797a5834e0b2867096240a29ea899d2301f1496c94b2e9d35d29698dc8599bef7185785f7a26fc929bff1e9dda1f5d7a96bbfc37227b1a2d42b3c4b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.16.1
2
+ * All EXIF: Make sure the 0 orientation does not get silently treated as orientation 8, mislabling
3
+ images which are not rotated as being rotated (orientation changed)
4
+ * All EXIF: Make sure the 0 orientation (`unknown`) is correctly passed and represented
5
+ * JPEG: Make sure multiple EXIF tags in APP1 markers get handled correctly (via overlays)
6
+
1
7
  ## 0.16.0
2
8
  * Add `filename_hint` keyword argument to `FormatParser.parse`. This can hint the library to apply
3
9
  the parser that will likely match for this filename first, and the other parsers later. This helps
data/README.md CHANGED
@@ -170,5 +170,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
170
170
  ### .docx
171
171
  - The .docx files were generated by the project maintainers
172
172
 
173
+ ### JPEG examples of EXIF orientation
174
+ - Downloaded from Unspash (and thus freely avaliable) - https://unsplash.com/license and have then been
175
+ manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
176
+ script.
177
+
173
178
  ### .key
174
179
  - The `keynote_recognized_as_jpeg.key` file was created by the project maintainers
data/Rakefile CHANGED
@@ -2,6 +2,7 @@ require 'bundler/gem_tasks'
2
2
  require 'rspec/core/rake_task'
3
3
  require 'yard'
4
4
  require 'rubocop/rake_task'
5
+ require 'parallel_tests/tasks'
5
6
 
6
7
  YARD::Rake::YardocTask.new(:doc) do |t|
7
8
  # The dash has to be between the two to "divide" the source files and
@@ -11,4 +12,5 @@ end
11
12
 
12
13
  RuboCop::RakeTask.new
13
14
  RSpec::Core::RakeTask.new(:spec)
14
- task default: [:spec, :rubocop]
15
+
16
+ task default: ['parallel:spec', :rubocop]
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.require_paths = ['lib']
32
32
 
33
33
  spec.add_dependency 'ks', '~> 0.0'
34
- spec.add_dependency 'exifr', '~> 1.0'
34
+ spec.add_dependency 'exifr', '~> 1', '>= 1.3.4'
35
35
  spec.add_dependency 'id3tag', '~> 0.10', '>= 0.10.1'
36
36
  spec.add_dependency 'faraday', '~> 0.13'
37
37
  spec.add_dependency 'measurometer', '~> 1'
@@ -42,4 +42,5 @@ Gem::Specification.new do |spec|
42
42
  spec.add_development_dependency 'pry', '~> 0.11'
43
43
  spec.add_development_dependency 'yard', '~> 0.9'
44
44
  spec.add_development_dependency 'wetransfer_style', '0.5.0'
45
+ spec.add_development_dependency 'parallel_tests'
45
46
  end
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.16.0'
2
+ VERSION = '0.16.1'
3
3
  end
@@ -37,7 +37,7 @@ class FormatParser::CR2Parser
37
37
  height_px: h,
38
38
  display_width_px: exif_data.rotated? ? h : w,
39
39
  display_height_px: exif_data.rotated? ? w : h,
40
- orientation: exif_data.orientation,
40
+ orientation: exif_data.orientation_sym,
41
41
  intrinsics: {exif: exif_data},
42
42
  )
43
43
  rescue EXIFR::MalformedTIFF
@@ -2,30 +2,17 @@ require 'exifr/tiff'
2
2
  require 'delegate'
3
3
 
4
4
  module FormatParser::EXIFParser
5
- ORIENTATIONS = [
6
- :top_left,
7
- :top_right,
8
- :bottom_right,
9
- :bottom_left,
10
- :left_top,
11
- :right_top,
12
- :right_bottom,
13
- :left_bottom
14
- ]
15
- ROTATED_ORIENTATIONS = ORIENTATIONS - [
16
- :bottom_left,
17
- :bottom_right,
18
- :top_left,
19
- :top_right
20
- ]
21
- module MethodsMethodFix
22
- # Fix a little bug in EXIFR which breaks delegators
23
- # https://github.com/remvee/exifr/pull/55
24
- def methods(*)
25
- super() # no args
26
- end
27
- end
28
- EXIFR::TIFF.prepend(MethodsMethodFix)
5
+ ORIENTATIONS = {
6
+ 0 => :unknown, # Non-rotated
7
+ 1 => :top_left, # Non-rotated
8
+ 2 => :top_right, # Non-rotated
9
+ 3 => :bottom_right, # Non-rotated
10
+ 4 => :bottom_left, # Non-rotated
11
+ 5 => :left_top,
12
+ 6 => :right_top,
13
+ 7 => :right_bottom,
14
+ 8 => :left_bottom
15
+ }
29
16
 
30
17
  # EXIFR kindly requests the presence of a few more methods than what our IOConstraint
31
18
  # is willing to provide, but they can be derived from the available ones
@@ -55,7 +42,7 @@ module FormatParser::EXIFParser
55
42
 
56
43
  class EXIFResult < SimpleDelegator
57
44
  def rotated?
58
- ROTATED_ORIENTATIONS.include?(orientation)
45
+ __getobj__.orientation.to_i > 4
59
46
  end
60
47
 
61
48
  def to_json(*maybe_coder)
@@ -65,8 +52,104 @@ module FormatParser::EXIFParser
65
52
  end
66
53
 
67
54
  def orientation
68
- value = __getobj__.orientation.to_i
69
- ORIENTATIONS.fetch(value - 1)
55
+ __getobj__.orientation.to_i
56
+ end
57
+
58
+ def orientation_sym
59
+ ORIENTATIONS.fetch(orientation)
60
+ end
61
+ end
62
+
63
+ # With some formats, multiple EXIF tag frames can be included in a single file.
64
+ # For example, JPEGs might have multiple APP1 markers which each contain EXIF
65
+ # data. The EXIF data in them, however, is not necessarily "complete" - it seems
66
+ # most applications assume that these blocks "overwrite" each other with the properties
67
+ # they specify. Probably this is done for more efficient saving - instead of overwriting
68
+ # the EXIF data with a modified version - which would also potentially disturb any digital
69
+ # signing that this data might include - the applications are supposed to follow the order
70
+ # in which these tags appear in the file:
71
+ #
72
+ # Take a resized image for example:
73
+ #
74
+ # APP1 {author: 'John', pixel_width: 1024}
75
+ # APP1 {pixel_width: 10}
76
+ #
77
+ # That image would get a combined EXIF of:
78
+ #
79
+ # APP1 {author: 'John', pixel_width: 10}
80
+ #
81
+ # since the frame that comes later in the file overwrites a property. From what I see
82
+ # exiftools do this is the way it works.
83
+ #
84
+ # This class acts as a wrapper for this "layering" of chunks of EXIF properties, and will
85
+ # follow the following conventions:
86
+ #
87
+ # * When merging data for JSON conversion, it will merge it top-down. It will overwrite
88
+ # any specified properties. An exception is made for orientation (see below)
89
+ # * When retrieving a property, it will look "from the end to the beginning" of the EXIF
90
+ # dataframe stack, looking for the first dataframe which has this property with a non-nil value
91
+ # * When retrieving orientation, it will pick the first orientation value which is not nil
92
+ # but also not 0 ("unknown orientation"). Even files in our test suite contain these.
93
+ class EXIFStack
94
+ def initialize(multiple_exif_results)
95
+ @multiple_exif_results = Array(multiple_exif_results)
96
+ end
97
+
98
+ def to_json(*maybe_coder)
99
+ # Let EXIF tags that come later overwrite the properties from the tags
100
+ # that come earlier
101
+ overlay = @multiple_exif_results.each_with_object({}) do |one_exif_frame, h|
102
+ h.merge!(one_exif_frame.to_hash)
103
+ end
104
+ # Overwrite the orientation with our custom method implementation, because
105
+ # it does reject 0-values.
106
+ overlay[:orientation] = orientation
107
+
108
+ sanitized = FormatParser::AttributesJSON._sanitize_json_value(overlay)
109
+ sanitized.to_json(*maybe_coder)
110
+ end
111
+
112
+ def orientation_sym
113
+ ORIENTATIONS.fetch(orientation)
114
+ end
115
+
116
+ def rotated?
117
+ orientation > 4
118
+ end
119
+
120
+ def orientation
121
+ # Retrieving an orientation "through" the sequence of EXIF tags
122
+ # is trickier than the method_missing case, because the value
123
+ # of the orientation can be 0, meaning "unknown". We need to skip through
124
+ # those and return the _last_ non-0 orientation, or 0 otherwise
125
+ @multiple_exif_results.reverse_each do |exif_tag_frame|
126
+ orientation_value = exif_tag_frame.orientation
127
+ if !orientation_value.nil? && orientation_value != 0
128
+ return orientation_value
129
+ end
130
+ end
131
+ 0 # If none were found - the orientation is unknown
132
+ end
133
+
134
+ private
135
+
136
+ def respond_to_missing?(method_name)
137
+ @multiple_exif_results.last.respond_to?(method_name)
138
+ end
139
+
140
+ def method_missing(*a)
141
+ return super unless @multiple_exif_results.any?
142
+
143
+ # The EXIF tags get appended to the file, so the ones coming _later_
144
+ # are more specific and potentially overwrite the earlier ones. Walk
145
+ # through the frames in reverse (starting with one that comes last)
146
+ # and if it contans the requisite EXIF property, return the value
147
+ # from that tag.
148
+ @multiple_exif_results.reverse_each do |exif_tag_frame|
149
+ value_of = exif_tag_frame.public_send(*a)
150
+ return value_of if value_of
151
+ end
152
+ nil
70
153
  end
71
154
  end
72
155
 
@@ -21,7 +21,7 @@ class FormatParser::JPEGParser
21
21
  @buf = FormatParser::IOConstraint.new(io)
22
22
  @width = nil
23
23
  @height = nil
24
- @exif_data = nil
24
+ @exif_data_frames = []
25
25
  scan
26
26
  end
27
27
 
@@ -66,16 +66,24 @@ class FormatParser::JPEGParser
66
66
 
67
67
  Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_read_until_capture', @buf.pos)
68
68
 
69
+ # A single file might contain multiple EXIF data frames. In a JPEG this would
70
+ # manifest as multiple APP1 markers. The way different programs handle these
71
+ # differs, for us it makes the most sense to simply "flatten" them top-down.
72
+ # So we start with the first EXIF frame, and we then allow the APP1 markers
73
+ # that come later in the file to override the properties they _do_ specify.
74
+ flat_exif = FormatParser::EXIFParser::EXIFStack.new(@exif_data_frames)
75
+
69
76
  # Return at the earliest possible opportunity
70
77
  if @width && @height
78
+ dw, dh = flat_exif.rotated? ? [@height, @width] : [@width, @height]
71
79
  result = FormatParser::Image.new(
72
80
  format: :jpg,
73
81
  width_px: @width,
74
82
  height_px: @height,
75
- display_width_px: @exif_data && @exif_data.rotated? ? @height : @width,
76
- display_height_px: @exif_data && @exif_data.rotated? ? @width : @height,
77
- orientation: @exif_data && @exif_data.orientation,
78
- intrinsics: {exif: @exif_data},
83
+ display_width_px: dw,
84
+ display_height_px: dh,
85
+ orientation: flat_exif.orientation_sym,
86
+ intrinsics: {exif: flat_exif},
79
87
  )
80
88
 
81
89
  return result
@@ -144,7 +152,7 @@ class FormatParser::JPEGParser
144
152
 
145
153
  Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_sent_to_exif_parser', exif_buf.size)
146
154
 
147
- @exif_data = exif_from_tiff_io(exif_buf)
155
+ @exif_data_frames << exif_from_tiff_io(exif_buf)
148
156
  rescue EXIFR::MalformedTIFF
149
157
  # Not a JPEG or the Exif headers contain invalid data, or
150
158
  # an APP1 marker was detected in a file that is not a JPEG
@@ -31,7 +31,7 @@ class FormatParser::TIFFParser
31
31
  height_px: h,
32
32
  display_width_px: exif_data.rotated? ? h : w,
33
33
  display_height_px: exif_data.rotated? ? w : h,
34
- orientation: exif_data.orientation,
34
+ orientation: exif_data.orientation_sym,
35
35
  intrinsics: {exif: exif_data},
36
36
  )
37
37
  rescue EXIFR::MalformedTIFF
@@ -11,9 +11,9 @@ describe FormatParser::EXIFParser do
11
11
  it "is able to parse #{filename}" do
12
12
  result = subject.exif_from_tiff_io(File.open(tiff_path, 'rb'))
13
13
  expect(result).not_to be_nil
14
- expect(result.orientation).to be_kind_of(Symbol)
14
+ expect(result.orientation_sym).to be_kind_of(Symbol)
15
15
  # Filenames in this dir correspond with the orientation of the file
16
- expect(filename).to include(result.orientation.to_s)
16
+ expect(filename).to include(result.orientation_sym.to_s)
17
17
  end
18
18
  end
19
19
  end
@@ -130,4 +130,31 @@ describe FormatParser::JPEGParser do
130
130
  serialized = JSON.pretty_generate(result)
131
131
  expect(serialized).to be_kind_of(String)
132
132
  end
133
+
134
+ it 'correctly recognizes various EXIF orientations' do
135
+ (0..4).each do |n|
136
+ path = fixtures_dir + "/exif-orientation-testimages/manipulated/Landscape_#{n}.jpg"
137
+ result = subject.call(File.open(path, 'rb'))
138
+ expect(result.display_width_px).to eq(1600)
139
+ expect(result.display_height_px).to eq(1200)
140
+ end
141
+ (5..8).each do |n|
142
+ path = fixtures_dir + "/exif-orientation-testimages/manipulated/Landscape_#{n}.jpg"
143
+ result = subject.call(File.open(path, 'rb'))
144
+ expect(result.display_width_px).to eq(1600)
145
+ expect(result.display_height_px).to eq(1200)
146
+ end
147
+ (0..4).each do |n|
148
+ path = fixtures_dir + "/exif-orientation-testimages/manipulated/Portrait_#{n}.jpg"
149
+ result = subject.call(File.open(path, 'rb'))
150
+ expect(result.display_width_px).to eq(1200)
151
+ expect(result.display_height_px).to eq(1600)
152
+ end
153
+ (5..8).each do |n|
154
+ path = fixtures_dir + "/exif-orientation-testimages/manipulated/Portrait_#{n}.jpg"
155
+ result = subject.call(File.open(path, 'rb'))
156
+ expect(result.display_width_px).to eq(1200)
157
+ expect(result.display_height_px).to eq(1600)
158
+ end
159
+ end
133
160
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.0
4
+ version: 0.16.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2019-07-08 00:00:00.000000000 Z
12
+ date: 2019-08-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -31,14 +31,20 @@ dependencies:
31
31
  requirements:
32
32
  - - "~>"
33
33
  - !ruby/object:Gem::Version
34
- version: '1.0'
34
+ version: '1'
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: 1.3.4
35
38
  type: :runtime
36
39
  prerelease: false
37
40
  version_requirements: !ruby/object:Gem::Requirement
38
41
  requirements:
39
42
  - - "~>"
40
43
  - !ruby/object:Gem::Version
41
- version: '1.0'
44
+ version: '1'
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 1.3.4
42
48
  - !ruby/object:Gem::Dependency
43
49
  name: id3tag
44
50
  requirement: !ruby/object:Gem::Requirement
@@ -171,6 +177,20 @@ dependencies:
171
177
  - - '='
172
178
  - !ruby/object:Gem::Version
173
179
  version: 0.5.0
180
+ - !ruby/object:Gem::Dependency
181
+ name: parallel_tests
182
+ requirement: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - ">="
185
+ - !ruby/object:Gem::Version
186
+ version: '0'
187
+ type: :development
188
+ prerelease: false
189
+ version_requirements: !ruby/object:Gem::Requirement
190
+ requirements:
191
+ - - ">="
192
+ - !ruby/object:Gem::Version
193
+ version: '0'
174
194
  description: |-
175
195
  A Ruby library for prying open files you can convert to a previewable format, such as video, image and audio files. It includes
176
196
  a number of parser modules that try to recover metadata useful for post-processing and layout while reading the absolute
@@ -284,7 +304,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
284
304
  version: '0'
285
305
  requirements: []
286
306
  rubyforge_project:
287
- rubygems_version: 2.6.11
307
+ rubygems_version: 2.7.6
288
308
  signing_key:
289
309
  specification_version: 4
290
310
  summary: A library for efficient parsing of file metadata