format_parser 1.3.0 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/CHANGELOG.md +4 -1
- data/README.md +5 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/io_utils.rb +8 -0
- data/lib/parsers/webp_parser.rb +162 -0
- data/spec/parsers/webp_parser_spec.rb +121 -0
- metadata +8 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf7fbbf842a1ae6fcde3986b360877223ac699a87950848b508da15f8a8280ad
|
4
|
+
data.tar.gz: 29882db7afe75a1d3b6554f18dbc837cefb1dbe9e8927adafe959ac8d37ade84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0cf33f73ac298f565020e9819c9c7d2e2af340490b869b97a008b4466ac2b0825fed70d5d9e255ef1192520cef92fdeeff0c7ade18d5e38910d6dc2fd0de89f3
|
7
|
+
data.tar.gz: c20cdc92df0d29d1e0c4b9f8c05644e17216f239a8d90e9c7af38f5566b4abaaf6f6289d5cc69d6a25b0ed644236403820b5c3402080f0b7ba40ca112b671d3a
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -33,6 +33,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
33
33
|
* OGG
|
34
34
|
* MPEG, MPG
|
35
35
|
* M3U
|
36
|
+
* WEBP
|
36
37
|
|
37
38
|
...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
|
38
39
|
|
@@ -198,6 +199,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
198
199
|
### M3U
|
199
200
|
- The M3U fixture files were created by one of the project maintainers
|
200
201
|
|
202
|
+
### WEBP
|
203
|
+
- With the exception of extended-animation.webp, which was obtained from Wikimedia Commons and is Creative Commons
|
204
|
+
licensed, all of the WebP fixture files have been created by one of the project maintainers.
|
205
|
+
|
201
206
|
### .key
|
202
207
|
- The `keynote_recognized_as_jpeg.key` file was created by the project maintainers
|
203
208
|
|
data/lib/io_utils.rb
CHANGED
@@ -42,6 +42,14 @@ module FormatParser::IOUtils
|
|
42
42
|
safe_read(@buf, 4).unpack('N').first
|
43
43
|
end
|
44
44
|
|
45
|
+
def read_little_endian_int_16
|
46
|
+
safe_read(@buf, 2).unpack('v').first
|
47
|
+
end
|
48
|
+
|
49
|
+
def read_little_endian_int_32
|
50
|
+
safe_read(@buf, 4).unpack('V').first
|
51
|
+
end
|
52
|
+
|
45
53
|
# 'n' is the number of bytes to read
|
46
54
|
def read_string(n)
|
47
55
|
safe_read(@buf, n)
|
@@ -0,0 +1,162 @@
|
|
1
|
+
# WebP is an image format that provides superior lossless and lossy compression for images on the web, with support for
|
2
|
+
# transparency. It uses predictive coding to encode an image, predicting the values in a block of pixels based on the
|
3
|
+
# values of neighbouring blocks. A WebP file consists of VP8 or VP8L data, and a container based on RIFF. There is also
|
4
|
+
# an extended file format, VP8X, that optionally encodes various information such as the color profile, animation
|
5
|
+
# control data, transparency, and EXIF and/or XMP metadata.
|
6
|
+
#
|
7
|
+
# For more information, visit https://developers.google.com/speed/webp.
|
8
|
+
#
|
9
|
+
# TODO: Decide how to determine color mode (depends on variant, transformations, flags, etc.; maybe not worth it).
|
10
|
+
|
11
|
+
class FormatParser::WebpParser
|
12
|
+
include FormatParser::EXIFParser
|
13
|
+
include FormatParser::IOUtils
|
14
|
+
|
15
|
+
WEBP_MIME_TYPE = 'image/webp'
|
16
|
+
|
17
|
+
def likely_match?(filename)
|
18
|
+
filename =~ /\.webp$/i
|
19
|
+
end
|
20
|
+
|
21
|
+
def call(io)
|
22
|
+
@buf = FormatParser::IOConstraint.new(io)
|
23
|
+
|
24
|
+
# All WebP files start with the following 20 bytes:
|
25
|
+
#
|
26
|
+
# Offset | Description
|
27
|
+
# -------------------------------------------------------------------------------------
|
28
|
+
# 0...3 | "RIFF" (Since WebP is based on the RIFF file container format).
|
29
|
+
# 4...7 | The size of the file in bytes - 8 bytes.
|
30
|
+
# 8...11 | "WEBP" (To signify that this is a WebP file).
|
31
|
+
# 12...15 | The VB8 variant in use ("VB8 ", "VP8L" or "VB8X")
|
32
|
+
# 16...19 | The length of the VB8 data in bytes (i.e. The size of the file - 20 bytes).
|
33
|
+
riff, webp, variant = safe_read(@buf, 20).unpack('A4x4A4A4')
|
34
|
+
return unless riff == 'RIFF' && webp == 'WEBP'
|
35
|
+
read_data(variant)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def read_data(variant)
|
41
|
+
case variant
|
42
|
+
when 'VP8' # Lossy
|
43
|
+
read_lossy_data
|
44
|
+
when 'VP8L' # Lossless
|
45
|
+
read_lossless_data
|
46
|
+
when 'VP8X' # Extended
|
47
|
+
read_extended_data
|
48
|
+
else
|
49
|
+
nil
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def read_lossy_data
|
54
|
+
# Encoded as a single VP8 key frame - a 10-byte uncompressed chunk followed by 2+ partitions of compressed data.
|
55
|
+
# The first 6 bytes of this chunk contains information that is mostly relevant when using VP8 as a video
|
56
|
+
# compression format, and can be ignored.
|
57
|
+
safe_skip(@buf, 6)
|
58
|
+
|
59
|
+
# The subsequent 4 bytes contain the image width and height, respectively, as 16-bit unsigned little endian
|
60
|
+
# integers.
|
61
|
+
width, height = safe_read(@buf, 4).unpack('S<S<')
|
62
|
+
create_image(width, height)
|
63
|
+
end
|
64
|
+
|
65
|
+
def read_lossless_data
|
66
|
+
# There is a single byte signature, 0x2F, that we can disregard.
|
67
|
+
safe_skip(@buf, 1)
|
68
|
+
|
69
|
+
# The subsequent 4 bytes contain the image width and height, respectively, as 14-bit unsigned little endian
|
70
|
+
# integers (minus one). The 4 remaining bits consist of a 1-bit flag indicating whether alpha is used, and a 3-bit
|
71
|
+
# version that is always zero.
|
72
|
+
dimensions = read_little_endian_int_32
|
73
|
+
width = (dimensions & 0x3fff) + 1
|
74
|
+
height = (dimensions >> 14 & 0x3fff) + 1
|
75
|
+
has_transparency = (dimensions >> 28 & 0x1) == 1
|
76
|
+
|
77
|
+
create_image(width, height, has_transparency: has_transparency)
|
78
|
+
end
|
79
|
+
|
80
|
+
def read_extended_data
|
81
|
+
# After the common RIFF header bytes, the extended file format has a series of 1-bit flags to signify the presence
|
82
|
+
# of optional information. These flags are as follows:
|
83
|
+
#
|
84
|
+
# |0|1|2|3|4|5|6|7|
|
85
|
+
# +-+-+-+-+-+-+-+-+
|
86
|
+
# |Rsv|I|L|E|X|A|R|
|
87
|
+
#
|
88
|
+
# Where:
|
89
|
+
# - Rsv & R = Reserved - Should be 0.
|
90
|
+
# - I = Set if file contains an ICC profile.
|
91
|
+
# - L = Set if file contains transparency information.
|
92
|
+
# - E = Set if file contains Exif metadata.
|
93
|
+
# - X = Set if file contains XMP metadata.
|
94
|
+
# - A = Set if file is an animated image.
|
95
|
+
flags = read_int_8
|
96
|
+
has_transparency = flags & 0x10 != 0
|
97
|
+
has_exif_metadata = flags & 0x08 != 0
|
98
|
+
has_xmp_metadata = flags & 0x04 != 0
|
99
|
+
has_multiple_frames = flags & 0x02 != 0
|
100
|
+
|
101
|
+
# The flags are followed by three reserved bytes of zeros, and then by the width and height, respectively - each
|
102
|
+
# occupying three bytes and each one less than the actual canvas measurements.
|
103
|
+
safe_skip(@buf, 3)
|
104
|
+
dimensions = safe_read(@buf, 6).unpack('VS')
|
105
|
+
width = (dimensions[0] & 0xffffff) + 1
|
106
|
+
height = (dimensions[0] >> 24 | dimensions[1] << 8 & 0xffffff) + 1
|
107
|
+
|
108
|
+
image = create_image(width, height, has_multiple_frames: has_multiple_frames, has_transparency: has_transparency)
|
109
|
+
augment_image(image) if has_exif_metadata || has_xmp_metadata || has_multiple_frames
|
110
|
+
image
|
111
|
+
end
|
112
|
+
|
113
|
+
def create_image(width, height, has_multiple_frames: false, has_transparency: false)
|
114
|
+
FormatParser::Image.new(
|
115
|
+
content_type: WEBP_MIME_TYPE,
|
116
|
+
format: :webp,
|
117
|
+
has_multiple_frames: has_multiple_frames,
|
118
|
+
has_transparency: has_transparency,
|
119
|
+
height_px: height,
|
120
|
+
width_px: width
|
121
|
+
)
|
122
|
+
end
|
123
|
+
|
124
|
+
def augment_image(image)
|
125
|
+
# We're going to scan the file looking for the EXIF, XMP and/or ANMF chunks.
|
126
|
+
intrinsics = {}
|
127
|
+
num_frames = 0
|
128
|
+
loop do
|
129
|
+
# Try to read the next chunk header, and break the loop if we've reached EOF.
|
130
|
+
begin
|
131
|
+
fourcc, chunk_size = safe_read(@buf, 8).unpack('A4V')
|
132
|
+
rescue InvalidRead
|
133
|
+
break
|
134
|
+
end
|
135
|
+
|
136
|
+
# Padding byte of 0 added if chunk size is odd.
|
137
|
+
safe_skip(@buf, 1) if chunk_size.odd?
|
138
|
+
|
139
|
+
case fourcc
|
140
|
+
when 'EXIF'
|
141
|
+
exif = exif_from_tiff_io(StringIO.new(safe_read(@buf, chunk_size)))
|
142
|
+
# We use ||= here as one Exif chunk at most should be present, even though it is possible for there to be more.
|
143
|
+
intrinsics[:exif] ||= exif
|
144
|
+
image.height_px, image.width_px = image.width_px, image.height_px if exif&.rotated?
|
145
|
+
image.orientation = exif&.orientation_sym
|
146
|
+
when 'XMP'
|
147
|
+
# We use ||= here as one XMP chunk at most should be present, even though it is possible for there to be more.
|
148
|
+
intrinsics[:xmp] ||= safe_read(@buf, chunk_size)
|
149
|
+
when 'ANMF'
|
150
|
+
num_frames += 1 if image.has_multiple_frames
|
151
|
+
safe_skip(@buf, chunk_size)
|
152
|
+
else
|
153
|
+
safe_skip(@buf, chunk_size)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
image.intrinsics = intrinsics unless intrinsics.empty?
|
158
|
+
image.num_animation_or_video_frames = num_frames if num_frames > 0
|
159
|
+
end
|
160
|
+
|
161
|
+
FormatParser.register_parser new, natures: [:image], formats: [:webp]
|
162
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::WebpParser do
|
4
|
+
it 'does not parse files with an invalid RIFF header' do
|
5
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/invalid-header.webp', 'rb'))
|
6
|
+
expect(result).to be_nil
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'does not parse files with an unrecognised variant' do
|
10
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/unrecognised-variant.webp', 'rb'))
|
11
|
+
expect(result).to be_nil
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'successfully parses lossy (VP8) WebP files' do
|
15
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/lossy.webp', 'rb'))
|
16
|
+
expect(result).not_to be_nil
|
17
|
+
expect(result.content_type).to eq('image/webp')
|
18
|
+
expect(result.format).to eq(:webp)
|
19
|
+
expect(result.has_multiple_frames).to eq(false)
|
20
|
+
expect(result.has_transparency).to eq(false)
|
21
|
+
expect(result.height_px).to eq(181)
|
22
|
+
expect(result.intrinsics).to be_nil
|
23
|
+
expect(result.orientation).to be_nil
|
24
|
+
expect(result.width_px).to eq(65)
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'successfully parses lossless WebP files' do
|
28
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/lossless.webp', 'rb'))
|
29
|
+
expect(result).not_to be_nil
|
30
|
+
expect(result.content_type).to eq('image/webp')
|
31
|
+
expect(result.format).to eq(:webp)
|
32
|
+
expect(result.has_multiple_frames).to eq(false)
|
33
|
+
expect(result.has_transparency).to eq(false)
|
34
|
+
expect(result.height_px).to eq(181)
|
35
|
+
expect(result.intrinsics).to be_nil
|
36
|
+
expect(result.orientation).to be_nil
|
37
|
+
expect(result.width_px).to eq(65)
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'successfully parses lossless WebP files with an alpha channel' do
|
41
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/lossless-alpha.webp', 'rb'))
|
42
|
+
expect(result).not_to be_nil
|
43
|
+
expect(result.content_type).to eq('image/webp')
|
44
|
+
expect(result.format).to eq(:webp)
|
45
|
+
expect(result.has_multiple_frames).to eq(false)
|
46
|
+
expect(result.has_transparency).to eq(true)
|
47
|
+
expect(result.height_px).to eq(181)
|
48
|
+
expect(result.intrinsics).to be_nil
|
49
|
+
expect(result.orientation).to be_nil
|
50
|
+
expect(result.width_px).to eq(65)
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'successfully parses extended WebP files' do
|
54
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/extended.webp', 'rb'))
|
55
|
+
expect(result).not_to be_nil
|
56
|
+
expect(result.content_type).to eq('image/webp')
|
57
|
+
expect(result.format).to eq(:webp)
|
58
|
+
expect(result.has_multiple_frames).to eq(false)
|
59
|
+
expect(result.has_transparency).to eq(false)
|
60
|
+
expect(result.height_px).to eq(181)
|
61
|
+
expect(result.intrinsics).to be_nil
|
62
|
+
expect(result.orientation).to be_nil
|
63
|
+
expect(result.width_px).to eq(65)
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'successfully parses extended WebP files with an alpha channel' do
|
67
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/extended-alpha.webp', 'rb'))
|
68
|
+
expect(result).not_to be_nil
|
69
|
+
expect(result.content_type).to eq('image/webp')
|
70
|
+
expect(result.format).to eq(:webp)
|
71
|
+
expect(result.has_multiple_frames).to eq(false)
|
72
|
+
expect(result.has_transparency).to eq(true)
|
73
|
+
expect(result.height_px).to eq(181)
|
74
|
+
expect(result.intrinsics).to be_nil
|
75
|
+
expect(result.orientation).to be_nil
|
76
|
+
expect(result.width_px).to eq(65)
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'successfully parses extended WebP files with Exif metadata' do
|
80
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/extended-exif.webp', 'rb'))
|
81
|
+
expect(result).not_to be_nil
|
82
|
+
expect(result.content_type).to eq('image/webp')
|
83
|
+
expect(result.format).to eq(:webp)
|
84
|
+
expect(result.has_multiple_frames).to eq(false)
|
85
|
+
expect(result.has_transparency).to eq(false)
|
86
|
+
expect(result.height_px).to eq(181)
|
87
|
+
expect(result.intrinsics).not_to be_nil
|
88
|
+
expect(result.intrinsics[:exif]).not_to be_nil
|
89
|
+
expect(result.intrinsics[:exif].image_length).to eq(result.height_px)
|
90
|
+
expect(result.intrinsics[:exif].image_width).to eq(result.width_px)
|
91
|
+
expect(result.orientation).to eq(:top_left)
|
92
|
+
expect(result.width_px).to eq(65)
|
93
|
+
end
|
94
|
+
|
95
|
+
it 'successfully parses extended WebP files with XMP metadata' do
|
96
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/extended-xmp.webp', 'rb'))
|
97
|
+
expect(result).not_to be_nil
|
98
|
+
expect(result.content_type).to eq('image/webp')
|
99
|
+
expect(result.format).to eq(:webp)
|
100
|
+
expect(result.has_multiple_frames).to eq(false)
|
101
|
+
expect(result.has_transparency).to eq(false)
|
102
|
+
expect(result.height_px).to eq(181)
|
103
|
+
expect(result.intrinsics).not_to be_nil
|
104
|
+
expect(result.intrinsics[:xmp]).not_to be_nil
|
105
|
+
expect(result.orientation).to be_nil
|
106
|
+
expect(result.width_px).to eq(65)
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'successfully parses extended WebP files with animation' do
|
110
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/extended-animation.webp', 'rb'))
|
111
|
+
expect(result).not_to be_nil
|
112
|
+
expect(result.content_type).to eq('image/webp')
|
113
|
+
expect(result.format).to eq(:webp)
|
114
|
+
expect(result.has_multiple_frames).to eq(true)
|
115
|
+
expect(result.has_transparency).to eq(true)
|
116
|
+
expect(result.height_px).to eq(211)
|
117
|
+
expect(result.intrinsics).to be_nil
|
118
|
+
expect(result.orientation).to be_nil
|
119
|
+
expect(result.width_px).to eq(211)
|
120
|
+
end
|
121
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
8
8
|
- Julik Tarkhanov
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-
|
12
|
+
date: 2022-07-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -252,6 +252,7 @@ files:
|
|
252
252
|
- lib/parsers/psd_parser.rb
|
253
253
|
- lib/parsers/tiff_parser.rb
|
254
254
|
- lib/parsers/wav_parser.rb
|
255
|
+
- lib/parsers/webp_parser.rb
|
255
256
|
- lib/parsers/zip_parser.rb
|
256
257
|
- lib/parsers/zip_parser/file_reader.rb
|
257
258
|
- lib/parsers/zip_parser/office_formats.rb
|
@@ -291,6 +292,7 @@ files:
|
|
291
292
|
- spec/parsers/psd_parser_spec.rb
|
292
293
|
- spec/parsers/tiff_parser_spec.rb
|
293
294
|
- spec/parsers/wav_parser_spec.rb
|
295
|
+
- spec/parsers/webp_parser_spec.rb
|
294
296
|
- spec/parsers/zip_parser_spec.rb
|
295
297
|
- spec/read_limiter_spec.rb
|
296
298
|
- spec/read_limits_config_spec.rb
|
@@ -302,7 +304,7 @@ licenses:
|
|
302
304
|
- MIT (Hippocratic)
|
303
305
|
metadata:
|
304
306
|
allowed_push_host: https://rubygems.org
|
305
|
-
post_install_message:
|
307
|
+
post_install_message:
|
306
308
|
rdoc_options: []
|
307
309
|
require_paths:
|
308
310
|
- lib
|
@@ -317,8 +319,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
317
319
|
- !ruby/object:Gem::Version
|
318
320
|
version: '0'
|
319
321
|
requirements: []
|
320
|
-
rubygems_version: 3.
|
321
|
-
signing_key:
|
322
|
+
rubygems_version: 3.2.33
|
323
|
+
signing_key:
|
322
324
|
specification_version: 4
|
323
325
|
summary: A library for efficient parsing of file metadata
|
324
326
|
test_files: []
|