format_parser 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/CHANGELOG.md +9 -0
- data/Gemfile +0 -5
- data/README.md +5 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/io_utils.rb +25 -0
- data/lib/parsers/heif_parser.rb +431 -0
- data/lib/parsers/moov_parser.rb +2 -2
- data/lib/parsers/pdf_parser.rb +1 -1
- data/lib/parsers/webp_parser.rb +162 -0
- data/lib/parsers/zip_parser.rb +1 -1
- data/spec/parsers/heif_parser_spec.rb +75 -0
- data/spec/parsers/webp_parser_spec.rb +121 -0
- metadata +10 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf7fbbf842a1ae6fcde3986b360877223ac699a87950848b508da15f8a8280ad
|
4
|
+
data.tar.gz: 29882db7afe75a1d3b6554f18dbc837cefb1dbe9e8927adafe959ac8d37ade84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0cf33f73ac298f565020e9819c9c7d2e2af340490b869b97a008b4466ac2b0825fed70d5d9e255ef1192520cef92fdeeff0c7ade18d5e38910d6dc2fd0de89f3
|
7
|
+
data.tar.gz: c20cdc92df0d29d1e0c4b9f8c05644e17216f239a8d90e9c7af38f5566b4abaaf6f6289d5cc69d6a25b0ed644236403820b5c3402080f0b7ba40ca112b671d3a
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 1.4.0
|
2
|
+
* Add support for `WEBP` lossy, lossless and extended file formats.
|
3
|
+
|
4
|
+
## 1.3.0
|
5
|
+
* Add `heif_parser` and support for `HEIF` and `HEIC` formats. Exif parsing is still missing.
|
6
|
+
|
7
|
+
## 1.2.1
|
8
|
+
* Resolve bug when `stts` atom is `nil`
|
9
|
+
|
1
10
|
## 1.2.0
|
2
11
|
* Add support for `codecs` in moov_parser for video metadata
|
3
12
|
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -33,6 +33,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
33
33
|
* OGG
|
34
34
|
* MPEG, MPG
|
35
35
|
* M3U
|
36
|
+
* WEBP
|
36
37
|
|
37
38
|
...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
|
38
39
|
|
@@ -198,6 +199,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
198
199
|
### M3U
|
199
200
|
- The M3U fixture files were created by one of the project maintainers
|
200
201
|
|
202
|
+
### WEBP
|
203
|
+
- With the exception of extended-animation.webp, which was obtained from Wikimedia Commons and is Creative Commons
|
204
|
+
licensed, all of the WebP fixture files have been created by one of the project maintainers.
|
205
|
+
|
201
206
|
### .key
|
202
207
|
- The `keynote_recognized_as_jpeg.key` file was created by the project maintainers
|
203
208
|
|
data/lib/io_utils.rb
CHANGED
@@ -30,5 +30,30 @@ module FormatParser::IOUtils
|
|
30
30
|
nil
|
31
31
|
end
|
32
32
|
|
33
|
+
def read_int_8
|
34
|
+
safe_read(@buf, 1).unpack('C').first
|
35
|
+
end
|
36
|
+
|
37
|
+
def read_int_16
|
38
|
+
safe_read(@buf, 2).unpack('n').first
|
39
|
+
end
|
40
|
+
|
41
|
+
def read_int_32
|
42
|
+
safe_read(@buf, 4).unpack('N').first
|
43
|
+
end
|
44
|
+
|
45
|
+
def read_little_endian_int_16
|
46
|
+
safe_read(@buf, 2).unpack('v').first
|
47
|
+
end
|
48
|
+
|
49
|
+
def read_little_endian_int_32
|
50
|
+
safe_read(@buf, 4).unpack('V').first
|
51
|
+
end
|
52
|
+
|
53
|
+
# 'n' is the number of bytes to read
|
54
|
+
def read_string(n)
|
55
|
+
safe_read(@buf, n)
|
56
|
+
end
|
57
|
+
|
33
58
|
### TODO: Some kind of built-in offset for the read
|
34
59
|
end
|
@@ -0,0 +1,431 @@
|
|
1
|
+
# HEIF stands for High-Efficiency Image File format, which is basically a container that is capable of storing an image, or a sequence of images in a single file.
|
2
|
+
# There are a number of variants of HEIF, which can be used to store images, sequences of images, or videos using different codecs.
|
3
|
+
# The variant that Apple uses to store images and sequences of images in its iOS and macOS operating systems is High Efficiency Image Coding (HEIC), which uses HEVC / H.265 for content compression.
|
4
|
+
class FormatParser::HEIFParser
|
5
|
+
include FormatParser::IOUtils
|
6
|
+
|
7
|
+
HEIF_MARKER = [0x68, 0x65, 0x69, 0x63].pack('C4') # heif marker
|
8
|
+
FILE_TYPE_BOX_MARKER = [0x66, 0x74, 0x79, 0x70].pack('C4') # ftyp marker
|
9
|
+
META_BOX_MARKER = [0x6D, 0x65, 0x74, 0x61].pack('C4') # meta marker
|
10
|
+
MIF1_MARKER = [0x6D, 0x69, 0x66, 0x31].pack('C4') # mif1 marker
|
11
|
+
MSF1_MARKER = [0x6D, 0x73, 0x66, 0x31].pack('C4') # msf1 marker
|
12
|
+
MEANINGLESS_BYTE = [0x00, 0x00, 0x00, 0x00].pack('C4')
|
13
|
+
HANDLER_MARKER = [0x68, 0x64, 0x6C, 0x72].pack('C4') # hdlr marker
|
14
|
+
ITEM_PROPERTIES_BOX = [0x69, 0x70, 0x72, 0x70].pack('C4') # iprp marker
|
15
|
+
ITEM_PROPERTIES_CONTAINER_BOX = [0x69, 0x70, 0x63, 0x6F].pack('C4') # ipco marker
|
16
|
+
IMAGE_SPATIAL_EXTENTS_BOX = [0x69, 0x73, 0x70, 0x65].pack('C4') # ispe marker
|
17
|
+
PIXEL_ASPECT_RATIO_BOX = [0x70, 0x61, 0x73, 0x70].pack('C4') # pasp marker
|
18
|
+
ITEM_INFO_BOX = [0x69, 0x69, 0x6E, 0x66].pack('C4') # iinf marker
|
19
|
+
ITEM_INFO_ENTRY = [0x69, 0x6E, 0x66, 0x65].pack('C4') # infe marker
|
20
|
+
MIME_MARKER = [0x6D, 0x69, 0x6D, 0x65].pack('C4') # mime marker
|
21
|
+
COLOUR_INFO_BOX = [0x63, 0x6F, 0x6C, 0x72].pack('C4') # colr marker
|
22
|
+
PIXEL_INFO_BOX = [0x70, 0x69, 0x78, 0x69].pack('C4') # pixi marker
|
23
|
+
RELATIVE_LOCATION_BOX = [0x72, 0x6C, 0x6F, 0x63].pack('C4') # rloc marker
|
24
|
+
CLEAN_APERTURE_BOX = [0x63, 0x6C, 0x61, 0x70].pack('C4') # clap marker
|
25
|
+
PRIMARY_ITEM_BOX = [0x70, 0x69, 0x74, 0x6D].pack('C4') # pitm marker
|
26
|
+
ITEM_PROPERTIES_ASSOCIATION_BOX = [0x69, 0x70, 0x6D, 0x61].pack('C4') # ipma marker
|
27
|
+
IMAGE_ROTATION_BOX = [0x69, 0x72, 0x6F, 0x74].pack('C4') # irot marker
|
28
|
+
HEADER_LENGTH = 8 # every box header has a length of 8 bytes
|
29
|
+
HEIC_MIME_POSSIBLE_TYPES = {
|
30
|
+
'heic' => :heic,
|
31
|
+
'heix' => :heix,
|
32
|
+
'heim' => :heim,
|
33
|
+
'heis' => :heis
|
34
|
+
}
|
35
|
+
HEIC_MIME_TYPE = 'image/heic'
|
36
|
+
HEIF_MIME_TYPE = 'image/heif'
|
37
|
+
# TODO: use the following when adding image-sequence parsing
|
38
|
+
# HEIC_SEQUENCE_MIME_TYPE = 'image/heic-sequence'
|
39
|
+
# HEIF_SEQUENCE_MIME_TYPE = 'image/heif-sequence'
|
40
|
+
|
41
|
+
def self.call(io)
|
42
|
+
new.call(io)
|
43
|
+
end
|
44
|
+
|
45
|
+
def call(io)
|
46
|
+
@buf = FormatParser::IOConstraint.new(io)
|
47
|
+
@format = nil
|
48
|
+
@@major_brand = nil
|
49
|
+
@width = nil
|
50
|
+
@height = nil
|
51
|
+
@exif_data_frames = []
|
52
|
+
@compatible_brands = nil
|
53
|
+
@metadata_start_pos = 0
|
54
|
+
@metadata_end_pos = 0
|
55
|
+
@handler_type = nil
|
56
|
+
@sub_items = nil
|
57
|
+
@pixel_aspect_ratio = nil
|
58
|
+
@colour_info = nil
|
59
|
+
@pixel_info = nil
|
60
|
+
@horizontal_offset = nil
|
61
|
+
@vertical_offset = nil
|
62
|
+
@clean_aperture = nil
|
63
|
+
@primary_item_id = 0
|
64
|
+
@item_props = {}
|
65
|
+
@rotation = 0
|
66
|
+
@item_props_idxs = []
|
67
|
+
@content_type = nil
|
68
|
+
scan
|
69
|
+
end
|
70
|
+
|
71
|
+
def scan
|
72
|
+
# All HEIC files must be conform to ISO/IEC 23008-12:2017
|
73
|
+
# Moreover, all HEIC files are conform to ISO/IEC 14496-12:2015 and should be conform to the Clause 4 of such spec.
|
74
|
+
# Files are formed as a series of objects, called boxes. All data is contained in such boxes.
|
75
|
+
# All boxes start with a header which defines both size and type.
|
76
|
+
# The size is the entire size of the box, including the size and type header, fields, and all contained boxes.
|
77
|
+
# The fields in the objects are stored with the most significant byte first, commonly known as network byte order or big-endian format.
|
78
|
+
# A HEIC file must contain a File Type Box (ftyp).
|
79
|
+
# A file conforms to all the requirements of the brands listed in the compatible_brands.
|
80
|
+
scan_file_type_box
|
81
|
+
|
82
|
+
# file may be identified by MIME type of Annex C of ISO/IEC 23008-12 if 'mif1' is the major brand or Annex D if 'msf1' is the major brand.
|
83
|
+
# the MIME indicates the nature and format of our assortment of bytes
|
84
|
+
# note particularly that the brand 'mif1' doesn't mandate a MovieBox ("moov").
|
85
|
+
# One or more brands must be included in the list of compatible brands
|
86
|
+
return if @compatible_brands.nil?
|
87
|
+
if @compatible_brands&.include?(MIF1_MARKER)
|
88
|
+
scan_meta_level_box
|
89
|
+
if @major_brand == MIF1_MARKER
|
90
|
+
@content_type = HEIF_MIME_TYPE
|
91
|
+
@format = :heif
|
92
|
+
elsif (@compatible_brands & HEIC_MIME_POSSIBLE_TYPES.keys).length > 0
|
93
|
+
@format = :heic
|
94
|
+
@content_type = HEIC_MIME_TYPE
|
95
|
+
end
|
96
|
+
end
|
97
|
+
if @compatible_brands&.include?(MSF1_MARKER)
|
98
|
+
# TODO
|
99
|
+
end
|
100
|
+
|
101
|
+
result = FormatParser::Image.new(
|
102
|
+
format: @format,
|
103
|
+
width_px: @width,
|
104
|
+
height_px: @height,
|
105
|
+
intrinsics: {
|
106
|
+
compatible_brands: @compatible_brands,
|
107
|
+
handler_type: @handler_type,
|
108
|
+
# 'sub_items': @sub_items, # enable this if you want to output all the sub-items in the image
|
109
|
+
pixel_aspect_ratio: @pixel_aspect_ratio,
|
110
|
+
colour_info: @colour_info,
|
111
|
+
pixel_info: @pixel_info,
|
112
|
+
horizontal_offset: @horizontal_offset,
|
113
|
+
vertical_offset: @vertical_offset,
|
114
|
+
clean_aperture: @clean_aperture,
|
115
|
+
rotation: @rotation
|
116
|
+
},
|
117
|
+
content_type: @content_type
|
118
|
+
)
|
119
|
+
|
120
|
+
result
|
121
|
+
end
|
122
|
+
|
123
|
+
def scan_file_type_box
|
124
|
+
file_type_box_length = read_int_32
|
125
|
+
return unless read_string(4) == FILE_TYPE_BOX_MARKER
|
126
|
+
@major_brand = read_string(4)
|
127
|
+
return unless @major_brand == HEIF_MARKER || @major_brand == MIF1_MARKER
|
128
|
+
read_string(4) # minor_brand
|
129
|
+
|
130
|
+
# Subtracting from the total length of the box specified in the header the size header itself (8 bytes = header length and length of ftyp)
|
131
|
+
# and the length of the major and minor brand, we obtain the compatible brands
|
132
|
+
data_left_length = file_type_box_length - HEADER_LENGTH - HEIF_MARKER.length - 4
|
133
|
+
|
134
|
+
@compatible_brands = []
|
135
|
+
(data_left_length / 4).times do
|
136
|
+
@compatible_brands << read_string(4)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def scan_meta_level_box
|
141
|
+
metadata_length = read_int_32
|
142
|
+
return unless read_string(4) == META_BOX_MARKER
|
143
|
+
@metadata_start_pos = @buf.pos
|
144
|
+
@metadata_end_pos = @buf.pos + metadata_length - HEADER_LENGTH # the real data is always without the 8 initial bytes of the handler
|
145
|
+
read_nil_version_and_flag
|
146
|
+
|
147
|
+
# we are looking for box/containers right beneath the Meta box
|
148
|
+
# we start with the HDLR (Handler) box..
|
149
|
+
handler_length = read_int_32
|
150
|
+
return unless read_string(4) == HANDLER_MARKER
|
151
|
+
handler_length -= HEADER_LENGTH # subtract the header as usual (will not be mentioned anymore from now on)
|
152
|
+
handler_start = @buf.pos
|
153
|
+
# the handler type declares the type of metadata and thus the process by which the media-data in the track is presented
|
154
|
+
# it also indicates the structure or format of the ‘meta’ box contents
|
155
|
+
read_nil_version_and_flag
|
156
|
+
read_string(4) # pre_defined bytes, always 4 null bytes in the hdlr box
|
157
|
+
@handler_type = read_string(4)
|
158
|
+
@buf.seek(handler_start + handler_length) # the remaining part is reserved
|
159
|
+
|
160
|
+
# ..continue looking for the IINF box and especially for the IPRP box, containing info about the image itself
|
161
|
+
next_box_length = read_int_32
|
162
|
+
next_box = read_string(4)
|
163
|
+
next_box_start_pos = @buf.pos
|
164
|
+
while @buf.pos < @metadata_end_pos # we iterate over all next incoming boxed but without going outside the meta-box
|
165
|
+
case next_box
|
166
|
+
when PRIMARY_ITEM_BOX
|
167
|
+
read_primary_item_box
|
168
|
+
when ITEM_INFO_BOX
|
169
|
+
read_item_info_box
|
170
|
+
when ITEM_PROPERTIES_BOX
|
171
|
+
read_item_properties_box
|
172
|
+
fill_primary_values
|
173
|
+
when next_box == ''
|
174
|
+
break
|
175
|
+
end
|
176
|
+
next_box_length, next_box, next_box_start_pos = get_next_box(next_box_start_pos, next_box_length, @metadata_end_pos)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def read_item_info_box
|
181
|
+
version = read_int_8
|
182
|
+
safe_skip(@buf, 3) # 0 flags
|
183
|
+
entry_count = if version == 0
|
184
|
+
read_int_16
|
185
|
+
else
|
186
|
+
read_int_32
|
187
|
+
end
|
188
|
+
@sub_items = []
|
189
|
+
entry_count.times {
|
190
|
+
item_info_entry_length = read_int_32
|
191
|
+
return unless read_string(4) == ITEM_INFO_ENTRY
|
192
|
+
item_info_end_pos = @buf.pos + item_info_entry_length - HEADER_LENGTH
|
193
|
+
version = read_int_8
|
194
|
+
safe_skip(@buf, 3) # 0 flags
|
195
|
+
case version
|
196
|
+
when 2
|
197
|
+
item_id = read_int_16
|
198
|
+
when 3
|
199
|
+
item_id = read_int_32
|
200
|
+
else
|
201
|
+
return # wrong version according to standards, hence return
|
202
|
+
end
|
203
|
+
safe_skip(@buf, 2) # not interested in the item_protection_index
|
204
|
+
item_type = read_string(4)
|
205
|
+
content_encoding = ''
|
206
|
+
if item_type == MIME_MARKER
|
207
|
+
content_encoding = read_string(item_info_end_pos - @buf.pos).delete!("\0") # remove the null-termination part for output visualization reason
|
208
|
+
end
|
209
|
+
@sub_items << {item_id: item_id, item_type: item_type, content_encoding: content_encoding}
|
210
|
+
@buf.seek(item_info_end_pos) # we are not interested in anything else, go directly to the end of this 'infe' box
|
211
|
+
}
|
212
|
+
end
|
213
|
+
|
214
|
+
def read_nil_version_and_flag
|
215
|
+
safe_skip(@buf, 1) # version, always 0 in this current box
|
216
|
+
safe_skip(@buf, 3) # flags, always 0 in this current box
|
217
|
+
end
|
218
|
+
|
219
|
+
def read_primary_item_box
|
220
|
+
version = read_int_8
|
221
|
+
safe_read(@buf, 3) # flags, always 0 in this current box
|
222
|
+
@primary_item_id = if version == 0
|
223
|
+
read_int_16
|
224
|
+
else
|
225
|
+
read_int_32
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
# the ITEM_PROPERTIES_CONTAINER_BOX contains an implicitely 1-based index list of item properties.
|
230
|
+
# While parsing such box we are storing the properties with its own index.
|
231
|
+
# Reason behind is that the primary_item will be associated to some of these properties through the same index
|
232
|
+
# and in order to output relevant data from the format_parser we need all the properties associated to the primary_item.
|
233
|
+
# Hence the need of the association between an item and its properties, found in the ITEM_PROPERTIES_ASSOCIATION_BOX
|
234
|
+
def read_item_properties_box
|
235
|
+
ipco_length = read_int_32
|
236
|
+
return unless read_string(4) == ITEM_PROPERTIES_CONTAINER_BOX
|
237
|
+
read_item_properties_container_box(ipco_length)
|
238
|
+
read_int_32 # ipma_length
|
239
|
+
return unless read_string(4) == ITEM_PROPERTIES_ASSOCIATION_BOX
|
240
|
+
read_item_properties_association_box
|
241
|
+
end
|
242
|
+
|
243
|
+
def read_item_properties_container_box(box_length)
|
244
|
+
end_of_ipco_box = @buf.pos + box_length - HEADER_LENGTH
|
245
|
+
item_prop_length = read_int_32
|
246
|
+
item_prop_name = read_string(4)
|
247
|
+
item_prop_start_pos = @buf.pos
|
248
|
+
item_prop_index = 1
|
249
|
+
while @buf.pos < end_of_ipco_box
|
250
|
+
case item_prop_name
|
251
|
+
when IMAGE_SPATIAL_EXTENTS_BOX
|
252
|
+
read_nil_version_and_flag
|
253
|
+
width = read_int_32
|
254
|
+
height = read_int_32
|
255
|
+
@item_props[item_prop_index] = {
|
256
|
+
type: IMAGE_SPATIAL_EXTENTS_BOX,
|
257
|
+
width: width,
|
258
|
+
height: height
|
259
|
+
}
|
260
|
+
when PIXEL_ASPECT_RATIO_BOX
|
261
|
+
h_spacing = read_int_32
|
262
|
+
v_spacing = read_int_32
|
263
|
+
pixel_aspect_ratio = "#{h_spacing}/#{v_spacing}"
|
264
|
+
@item_props[item_prop_index] = {
|
265
|
+
type: PIXEL_ASPECT_RATIO_BOX,
|
266
|
+
pixel_aspect_ratio: pixel_aspect_ratio
|
267
|
+
}
|
268
|
+
when COLOUR_INFO_BOX
|
269
|
+
colour_info = {
|
270
|
+
colour_primaries: read_int_16,
|
271
|
+
transfer_characteristics: read_int_16,
|
272
|
+
matrix_coefficients: read_int_16
|
273
|
+
}
|
274
|
+
@item_props[item_prop_index] = {
|
275
|
+
type: COLOUR_INFO_BOX,
|
276
|
+
colour_info: colour_info
|
277
|
+
}
|
278
|
+
when PIXEL_INFO_BOX
|
279
|
+
pixel_info = []
|
280
|
+
read_nil_version_and_flag
|
281
|
+
num_channels = read_int_8
|
282
|
+
channel = 1
|
283
|
+
while channel <= num_channels
|
284
|
+
channel += 1
|
285
|
+
pixel_info << {
|
286
|
+
"bits_in_channel_#{channel}": read_int_8
|
287
|
+
}
|
288
|
+
end
|
289
|
+
@item_props[item_prop_index] = {
|
290
|
+
type: PIXEL_INFO_BOX,
|
291
|
+
pixel_info: pixel_info
|
292
|
+
}
|
293
|
+
when RELATIVE_LOCATION_BOX
|
294
|
+
read_nil_version_and_flag
|
295
|
+
horizontal_offset = read_int_32
|
296
|
+
vertical_offset = read_int_32
|
297
|
+
@item_props[item_prop_index] = {
|
298
|
+
type: RELATIVE_LOCATION_BOX,
|
299
|
+
horizontal_offset: horizontal_offset,
|
300
|
+
vertical_offset: vertical_offset
|
301
|
+
}
|
302
|
+
when CLEAN_APERTURE_BOX
|
303
|
+
clean_aperture = []
|
304
|
+
clean_aperture << {
|
305
|
+
clean_aperture_width_n: read_int_32,
|
306
|
+
clean_aperture_width_d: read_int_32,
|
307
|
+
clean_aperture_height_n: read_int_32,
|
308
|
+
clean_aperture_height_d: read_int_32,
|
309
|
+
horiz_off_n: read_int_32,
|
310
|
+
horiz_off_d: read_int_32,
|
311
|
+
vert_off_n: read_int_32,
|
312
|
+
vert_off_d: read_int_32
|
313
|
+
}
|
314
|
+
@item_props[item_prop_index] = {
|
315
|
+
type: CLEAN_APERTURE_BOX,
|
316
|
+
clean_aperture: clean_aperture
|
317
|
+
}
|
318
|
+
when IMAGE_ROTATION_BOX
|
319
|
+
read_nil_version_and_flag
|
320
|
+
binary = convert_byte_to_binary(read_int_8)
|
321
|
+
# we need only the last 2 bits to retrieve the angle multiplier. angle multiplier * 90 specifies the angle
|
322
|
+
rotation = binary.slice(6, 2).join.to_i(2) * 90
|
323
|
+
@item_props[item_prop_index] = {
|
324
|
+
type: IMAGE_ROTATION_BOX,
|
325
|
+
rotation: rotation
|
326
|
+
}
|
327
|
+
end
|
328
|
+
item_prop_length, item_prop_name, item_prop_start_pos = get_next_box(item_prop_start_pos, item_prop_length, end_of_ipco_box)
|
329
|
+
item_prop_index += 1
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
def read_item_properties_association_box
|
334
|
+
version = read_int_8
|
335
|
+
safe_read(@buf, 2) # we skip the first 2 bytes of the flags (total of 3 bytes) cause we care only about the least significant bit
|
336
|
+
flags = read_int_8
|
337
|
+
entry_count = read_int_32
|
338
|
+
item_id = 0
|
339
|
+
entry_count.times do
|
340
|
+
item_id = if version == 0
|
341
|
+
read_int_16
|
342
|
+
else
|
343
|
+
read_int_32
|
344
|
+
end
|
345
|
+
|
346
|
+
association_count = read_int_8
|
347
|
+
association_count.times do
|
348
|
+
# we need to retrieve the "essential" bit wich is just the first bit in the next byte
|
349
|
+
binary = convert_byte_to_binary(read_int_8)
|
350
|
+
# essential_bit = binary[0] # uncomment if needed
|
351
|
+
binary.concat(convert_byte_to_binary(read_int_8)) if (flags & 1) == 1 # if flag is 1 we need the next 15 bits instead of only the next 7 bits
|
352
|
+
# we need to nullify the 1st bit since that one was the essential bit and doesn't count now to calculate the property index
|
353
|
+
binary[0] = 0
|
354
|
+
item_property_index = binary.join.to_i(2)
|
355
|
+
# we are interested only in the primary item properties
|
356
|
+
@item_props_idxs << item_property_index if item_id == @primary_item_id
|
357
|
+
end
|
358
|
+
|
359
|
+
# we are interested only in the primary item
|
360
|
+
if item_id != @primary_item_id
|
361
|
+
next
|
362
|
+
else
|
363
|
+
return
|
364
|
+
end
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
def fill_primary_values
|
369
|
+
@item_props_idxs.each { |x|
|
370
|
+
next if @item_props[x].nil?
|
371
|
+
prop = @item_props[x]
|
372
|
+
case prop[:type]
|
373
|
+
when IMAGE_SPATIAL_EXTENTS_BOX
|
374
|
+
@width = prop[:width]
|
375
|
+
@height = prop[:height]
|
376
|
+
when PIXEL_ASPECT_RATIO_BOX
|
377
|
+
@pixel_aspect_ratio = prop[:pixel_aspect_ratio]
|
378
|
+
when COLOUR_INFO_BOX
|
379
|
+
@colour_info = prop[:colour_info]
|
380
|
+
when PIXEL_INFO_BOX
|
381
|
+
@pixel_info = prop[:pixel_info]
|
382
|
+
when RELATIVE_LOCATION_BOX
|
383
|
+
@horizontal_offset = prop[:horizontal_offset]
|
384
|
+
@vertical_offset = prop[:vertical_offset]
|
385
|
+
when CLEAN_APERTURE_BOX
|
386
|
+
@clean_aperture = prop[:clean_aperture]
|
387
|
+
when IMAGE_ROTATION_BOX
|
388
|
+
@rotation = prop[:rotation]
|
389
|
+
end
|
390
|
+
}
|
391
|
+
end
|
392
|
+
|
393
|
+
def next_meaningful_meta_byte
|
394
|
+
while @buf.pos < @metadata_end_pos
|
395
|
+
next_byte = read_string(4)
|
396
|
+
return next_byte if meaningful?(next_byte)
|
397
|
+
end
|
398
|
+
end
|
399
|
+
|
400
|
+
def get_next_box(box_start_pos, box_length, end_pos_upper_box)
|
401
|
+
skip_pos = box_start_pos + box_length - HEADER_LENGTH
|
402
|
+
@buf.seek(skip_pos)
|
403
|
+
return if skip_pos >= end_pos_upper_box
|
404
|
+
next_box_length = read_int_32
|
405
|
+
next_box_name = read_string(4)
|
406
|
+
[next_box_length, next_box_name, @buf.pos]
|
407
|
+
end
|
408
|
+
|
409
|
+
def meaningful?(byte)
|
410
|
+
byte != MEANINGLESS_BYTE
|
411
|
+
end
|
412
|
+
|
413
|
+
def convert_byte_to_binary(integer)
|
414
|
+
binary = []
|
415
|
+
while integer > 0
|
416
|
+
binary << integer % 2
|
417
|
+
integer /= 2
|
418
|
+
end
|
419
|
+
binary_value = binary.reverse
|
420
|
+
(8 - binary_value.length).times do
|
421
|
+
binary_value.prepend('0')
|
422
|
+
end
|
423
|
+
binary_value
|
424
|
+
end
|
425
|
+
|
426
|
+
def likely_match?(filename)
|
427
|
+
filename =~ /\.hei[cf]$/i
|
428
|
+
end
|
429
|
+
|
430
|
+
FormatParser.register_parser(new, natures: :image, formats: [:heif, :heic], priority: 2)
|
431
|
+
end
|
data/lib/parsers/moov_parser.rb
CHANGED
@@ -136,7 +136,7 @@ class FormatParser::MOOVParser
|
|
136
136
|
|
137
137
|
if stts && mdhd
|
138
138
|
timescale = mdhd.atom_fields[:tscale]
|
139
|
-
sample_duration = stts.field_value(:entries).
|
139
|
+
sample_duration = stts.field_value(:entries).dig(0, :sample_duration)
|
140
140
|
if timescale.nil? || timescale == 0 || sample_duration.nil? || sample_duration == 0
|
141
141
|
nil
|
142
142
|
else
|
@@ -163,5 +163,5 @@ class FormatParser::MOOVParser
|
|
163
163
|
end
|
164
164
|
end
|
165
165
|
|
166
|
-
FormatParser.register_parser new, natures: :video, formats: FTYP_MAP.values, priority:
|
166
|
+
FormatParser.register_parser new, natures: :video, formats: FTYP_MAP.values, priority: 3
|
167
167
|
end
|
data/lib/parsers/pdf_parser.rb
CHANGED
@@ -21,5 +21,5 @@ class FormatParser::PDFParser
|
|
21
21
|
FormatParser::Document.new(format: :pdf, content_type: PDF_CONTENT_TYPE)
|
22
22
|
end
|
23
23
|
|
24
|
-
FormatParser.register_parser new, natures: :document, formats: :pdf, priority:
|
24
|
+
FormatParser.register_parser new, natures: :document, formats: :pdf, priority: 3
|
25
25
|
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
# WebP is an image format that provides superior lossless and lossy compression for images on the web, with support for
|
2
|
+
# transparency. It uses predictive coding to encode an image, predicting the values in a block of pixels based on the
|
3
|
+
# values of neighbouring blocks. A WebP file consists of VP8 or VP8L data, and a container based on RIFF. There is also
|
4
|
+
# an extended file format, VP8X, that optionally encodes various information such as the color profile, animation
|
5
|
+
# control data, transparency, and EXIF and/or XMP metadata.
|
6
|
+
#
|
7
|
+
# For more information, visit https://developers.google.com/speed/webp.
|
8
|
+
#
|
9
|
+
# TODO: Decide how to determine color mode (depends on variant, transformations, flags, etc.; maybe not worth it).
|
10
|
+
|
11
|
+
class FormatParser::WebpParser
|
12
|
+
include FormatParser::EXIFParser
|
13
|
+
include FormatParser::IOUtils
|
14
|
+
|
15
|
+
WEBP_MIME_TYPE = 'image/webp'
|
16
|
+
|
17
|
+
def likely_match?(filename)
|
18
|
+
filename =~ /\.webp$/i
|
19
|
+
end
|
20
|
+
|
21
|
+
def call(io)
|
22
|
+
@buf = FormatParser::IOConstraint.new(io)
|
23
|
+
|
24
|
+
# All WebP files start with the following 20 bytes:
|
25
|
+
#
|
26
|
+
# Offset | Description
|
27
|
+
# -------------------------------------------------------------------------------------
|
28
|
+
# 0...3 | "RIFF" (Since WebP is based on the RIFF file container format).
|
29
|
+
# 4...7 | The size of the file in bytes - 8 bytes.
|
30
|
+
# 8...11 | "WEBP" (To signify that this is a WebP file).
|
31
|
+
# 12...15 | The VB8 variant in use ("VB8 ", "VP8L" or "VB8X")
|
32
|
+
# 16...19 | The length of the VB8 data in bytes (i.e. The size of the file - 20 bytes).
|
33
|
+
riff, webp, variant = safe_read(@buf, 20).unpack('A4x4A4A4')
|
34
|
+
return unless riff == 'RIFF' && webp == 'WEBP'
|
35
|
+
read_data(variant)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def read_data(variant)
|
41
|
+
case variant
|
42
|
+
when 'VP8' # Lossy
|
43
|
+
read_lossy_data
|
44
|
+
when 'VP8L' # Lossless
|
45
|
+
read_lossless_data
|
46
|
+
when 'VP8X' # Extended
|
47
|
+
read_extended_data
|
48
|
+
else
|
49
|
+
nil
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def read_lossy_data
|
54
|
+
# Encoded as a single VP8 key frame - a 10-byte uncompressed chunk followed by 2+ partitions of compressed data.
|
55
|
+
# The first 6 bytes of this chunk contains information that is mostly relevant when using VP8 as a video
|
56
|
+
# compression format, and can be ignored.
|
57
|
+
safe_skip(@buf, 6)
|
58
|
+
|
59
|
+
# The subsequent 4 bytes contain the image width and height, respectively, as 16-bit unsigned little endian
|
60
|
+
# integers.
|
61
|
+
width, height = safe_read(@buf, 4).unpack('S<S<')
|
62
|
+
create_image(width, height)
|
63
|
+
end
|
64
|
+
|
65
|
+
def read_lossless_data
|
66
|
+
# There is a single byte signature, 0x2F, that we can disregard.
|
67
|
+
safe_skip(@buf, 1)
|
68
|
+
|
69
|
+
# The subsequent 4 bytes contain the image width and height, respectively, as 14-bit unsigned little endian
|
70
|
+
# integers (minus one). The 4 remaining bits consist of a 1-bit flag indicating whether alpha is used, and a 3-bit
|
71
|
+
# version that is always zero.
|
72
|
+
dimensions = read_little_endian_int_32
|
73
|
+
width = (dimensions & 0x3fff) + 1
|
74
|
+
height = (dimensions >> 14 & 0x3fff) + 1
|
75
|
+
has_transparency = (dimensions >> 28 & 0x1) == 1
|
76
|
+
|
77
|
+
create_image(width, height, has_transparency: has_transparency)
|
78
|
+
end
|
79
|
+
|
80
|
+
def read_extended_data
|
81
|
+
# After the common RIFF header bytes, the extended file format has a series of 1-bit flags to signify the presence
|
82
|
+
# of optional information. These flags are as follows:
|
83
|
+
#
|
84
|
+
# |0|1|2|3|4|5|6|7|
|
85
|
+
# +-+-+-+-+-+-+-+-+
|
86
|
+
# |Rsv|I|L|E|X|A|R|
|
87
|
+
#
|
88
|
+
# Where:
|
89
|
+
# - Rsv & R = Reserved - Should be 0.
|
90
|
+
# - I = Set if file contains an ICC profile.
|
91
|
+
# - L = Set if file contains transparency information.
|
92
|
+
# - E = Set if file contains Exif metadata.
|
93
|
+
# - X = Set if file contains XMP metadata.
|
94
|
+
# - A = Set if file is an animated image.
|
95
|
+
flags = read_int_8
|
96
|
+
has_transparency = flags & 0x10 != 0
|
97
|
+
has_exif_metadata = flags & 0x08 != 0
|
98
|
+
has_xmp_metadata = flags & 0x04 != 0
|
99
|
+
has_multiple_frames = flags & 0x02 != 0
|
100
|
+
|
101
|
+
# The flags are followed by three reserved bytes of zeros, and then by the width and height, respectively - each
|
102
|
+
# occupying three bytes and each one less than the actual canvas measurements.
|
103
|
+
safe_skip(@buf, 3)
|
104
|
+
dimensions = safe_read(@buf, 6).unpack('VS')
|
105
|
+
width = (dimensions[0] & 0xffffff) + 1
|
106
|
+
height = (dimensions[0] >> 24 | dimensions[1] << 8 & 0xffffff) + 1
|
107
|
+
|
108
|
+
image = create_image(width, height, has_multiple_frames: has_multiple_frames, has_transparency: has_transparency)
|
109
|
+
augment_image(image) if has_exif_metadata || has_xmp_metadata || has_multiple_frames
|
110
|
+
image
|
111
|
+
end
|
112
|
+
|
113
|
+
def create_image(width, height, has_multiple_frames: false, has_transparency: false)
|
114
|
+
FormatParser::Image.new(
|
115
|
+
content_type: WEBP_MIME_TYPE,
|
116
|
+
format: :webp,
|
117
|
+
has_multiple_frames: has_multiple_frames,
|
118
|
+
has_transparency: has_transparency,
|
119
|
+
height_px: height,
|
120
|
+
width_px: width
|
121
|
+
)
|
122
|
+
end
|
123
|
+
|
124
|
+
def augment_image(image)
|
125
|
+
# We're going to scan the file looking for the EXIF, XMP and/or ANMF chunks.
|
126
|
+
intrinsics = {}
|
127
|
+
num_frames = 0
|
128
|
+
loop do
|
129
|
+
# Try to read the next chunk header, and break the loop if we've reached EOF.
|
130
|
+
begin
|
131
|
+
fourcc, chunk_size = safe_read(@buf, 8).unpack('A4V')
|
132
|
+
rescue InvalidRead
|
133
|
+
break
|
134
|
+
end
|
135
|
+
|
136
|
+
# Padding byte of 0 added if chunk size is odd.
|
137
|
+
safe_skip(@buf, 1) if chunk_size.odd?
|
138
|
+
|
139
|
+
case fourcc
|
140
|
+
when 'EXIF'
|
141
|
+
exif = exif_from_tiff_io(StringIO.new(safe_read(@buf, chunk_size)))
|
142
|
+
# We use ||= here as one Exif chunk at most should be present, even though it is possible for there to be more.
|
143
|
+
intrinsics[:exif] ||= exif
|
144
|
+
image.height_px, image.width_px = image.width_px, image.height_px if exif&.rotated?
|
145
|
+
image.orientation = exif&.orientation_sym
|
146
|
+
when 'XMP'
|
147
|
+
# We use ||= here as one XMP chunk at most should be present, even though it is possible for there to be more.
|
148
|
+
intrinsics[:xmp] ||= safe_read(@buf, chunk_size)
|
149
|
+
when 'ANMF'
|
150
|
+
num_frames += 1 if image.has_multiple_frames
|
151
|
+
safe_skip(@buf, chunk_size)
|
152
|
+
else
|
153
|
+
safe_skip(@buf, chunk_size)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
image.intrinsics = intrinsics unless intrinsics.empty?
|
158
|
+
image.num_animation_or_video_frames = num_frames if num_frames > 0
|
159
|
+
end
|
160
|
+
|
161
|
+
FormatParser.register_parser new, natures: [:image], formats: [:webp]
|
162
|
+
end
|
data/lib/parsers/zip_parser.rb
CHANGED
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::HEIFParser do
|
4
|
+
it 'is able to parse single heif image with heic major brand' do
|
5
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage.heic'
|
6
|
+
|
7
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
8
|
+
expect(result).not_to be_nil
|
9
|
+
expect(result.nature).to eq(:image)
|
10
|
+
expect(result.format).to eq(:heic)
|
11
|
+
expect(result.width_px).to eq(4000)
|
12
|
+
expect(result.height_px).to eq(3000)
|
13
|
+
expect(result.content_type).to eq('image/heic')
|
14
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'is able to parse single heif image with mif1 major brand' do
|
18
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage_Autumn.heic'
|
19
|
+
|
20
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
21
|
+
expect(result).not_to be_nil
|
22
|
+
expect(result.nature).to eq(:image)
|
23
|
+
expect(result.format).to eq(:heif)
|
24
|
+
expect(result.width_px).to eq(1440)
|
25
|
+
expect(result.height_px).to eq(960)
|
26
|
+
expect(result.content_type).to eq('image/heif')
|
27
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'is able to parse image collection with mif1 major brand' do
|
31
|
+
heif_path = fixtures_dir + 'HEIF/ImageCollection.heic'
|
32
|
+
|
33
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
34
|
+
expect(result).not_to be_nil
|
35
|
+
expect(result.nature).to eq(:image)
|
36
|
+
expect(result.format).to eq(:heif)
|
37
|
+
expect(result.width_px).to eq(1440)
|
38
|
+
expect(result.height_px).to eq(960)
|
39
|
+
expect(result.content_type).to eq('image/heif')
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'is able to parse image collection with colour info' do
|
43
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage_Autumn_WithColourInfo.heic'
|
44
|
+
|
45
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
46
|
+
expect(result).not_to be_nil
|
47
|
+
expect(result.nature).to eq(:image)
|
48
|
+
expect(result.format).to eq(:heic)
|
49
|
+
expect(result.width_px).to eq(1440)
|
50
|
+
expect(result.height_px).to eq(960)
|
51
|
+
colour_info = result.intrinsics[:colour_info]
|
52
|
+
expect(colour_info[:colour_primaries]).to eq(28259)
|
53
|
+
expect(colour_info[:transfer_characteristics]).to eq(27768)
|
54
|
+
expect(colour_info[:matrix_coefficients]).to eq(2)
|
55
|
+
expect(result.content_type).to eq('image/heic')
|
56
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'is able to parse image collection with pixel info' do
|
60
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage_Autumn_WithColourInfo.heic'
|
61
|
+
|
62
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
63
|
+
expect(result).not_to be_nil
|
64
|
+
expect(result.nature).to eq(:image)
|
65
|
+
expect(result.format).to eq(:heic)
|
66
|
+
expect(result.width_px).to eq(1440)
|
67
|
+
expect(result.height_px).to eq(960)
|
68
|
+
pixel_info = result.intrinsics[:pixel_info]
|
69
|
+
expect(pixel_info[0][:bits_in_channel_2]).to eq(8)
|
70
|
+
expect(pixel_info[1][:bits_in_channel_3]).to eq(8)
|
71
|
+
expect(pixel_info[2][:bits_in_channel_4]).to eq(8)
|
72
|
+
expect(result.content_type).to eq('image/heic')
|
73
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::WebpParser do
|
4
|
+
it 'does not parse files with an invalid RIFF header' do
|
5
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/invalid-header.webp', 'rb'))
|
6
|
+
expect(result).to be_nil
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'does not parse files with an unrecognised variant' do
|
10
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/unrecognised-variant.webp', 'rb'))
|
11
|
+
expect(result).to be_nil
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'successfully parses lossy (VP8) WebP files' do
|
15
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/lossy.webp', 'rb'))
|
16
|
+
expect(result).not_to be_nil
|
17
|
+
expect(result.content_type).to eq('image/webp')
|
18
|
+
expect(result.format).to eq(:webp)
|
19
|
+
expect(result.has_multiple_frames).to eq(false)
|
20
|
+
expect(result.has_transparency).to eq(false)
|
21
|
+
expect(result.height_px).to eq(181)
|
22
|
+
expect(result.intrinsics).to be_nil
|
23
|
+
expect(result.orientation).to be_nil
|
24
|
+
expect(result.width_px).to eq(65)
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'successfully parses lossless WebP files' do
|
28
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/lossless.webp', 'rb'))
|
29
|
+
expect(result).not_to be_nil
|
30
|
+
expect(result.content_type).to eq('image/webp')
|
31
|
+
expect(result.format).to eq(:webp)
|
32
|
+
expect(result.has_multiple_frames).to eq(false)
|
33
|
+
expect(result.has_transparency).to eq(false)
|
34
|
+
expect(result.height_px).to eq(181)
|
35
|
+
expect(result.intrinsics).to be_nil
|
36
|
+
expect(result.orientation).to be_nil
|
37
|
+
expect(result.width_px).to eq(65)
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'successfully parses lossless WebP files with an alpha channel' do
|
41
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/lossless-alpha.webp', 'rb'))
|
42
|
+
expect(result).not_to be_nil
|
43
|
+
expect(result.content_type).to eq('image/webp')
|
44
|
+
expect(result.format).to eq(:webp)
|
45
|
+
expect(result.has_multiple_frames).to eq(false)
|
46
|
+
expect(result.has_transparency).to eq(true)
|
47
|
+
expect(result.height_px).to eq(181)
|
48
|
+
expect(result.intrinsics).to be_nil
|
49
|
+
expect(result.orientation).to be_nil
|
50
|
+
expect(result.width_px).to eq(65)
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'successfully parses extended WebP files' do
|
54
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/extended.webp', 'rb'))
|
55
|
+
expect(result).not_to be_nil
|
56
|
+
expect(result.content_type).to eq('image/webp')
|
57
|
+
expect(result.format).to eq(:webp)
|
58
|
+
expect(result.has_multiple_frames).to eq(false)
|
59
|
+
expect(result.has_transparency).to eq(false)
|
60
|
+
expect(result.height_px).to eq(181)
|
61
|
+
expect(result.intrinsics).to be_nil
|
62
|
+
expect(result.orientation).to be_nil
|
63
|
+
expect(result.width_px).to eq(65)
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'successfully parses extended WebP files with an alpha channel' do
|
67
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/extended-alpha.webp', 'rb'))
|
68
|
+
expect(result).not_to be_nil
|
69
|
+
expect(result.content_type).to eq('image/webp')
|
70
|
+
expect(result.format).to eq(:webp)
|
71
|
+
expect(result.has_multiple_frames).to eq(false)
|
72
|
+
expect(result.has_transparency).to eq(true)
|
73
|
+
expect(result.height_px).to eq(181)
|
74
|
+
expect(result.intrinsics).to be_nil
|
75
|
+
expect(result.orientation).to be_nil
|
76
|
+
expect(result.width_px).to eq(65)
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'successfully parses extended WebP files with Exif metadata' do
|
80
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/extended-exif.webp', 'rb'))
|
81
|
+
expect(result).not_to be_nil
|
82
|
+
expect(result.content_type).to eq('image/webp')
|
83
|
+
expect(result.format).to eq(:webp)
|
84
|
+
expect(result.has_multiple_frames).to eq(false)
|
85
|
+
expect(result.has_transparency).to eq(false)
|
86
|
+
expect(result.height_px).to eq(181)
|
87
|
+
expect(result.intrinsics).not_to be_nil
|
88
|
+
expect(result.intrinsics[:exif]).not_to be_nil
|
89
|
+
expect(result.intrinsics[:exif].image_length).to eq(result.height_px)
|
90
|
+
expect(result.intrinsics[:exif].image_width).to eq(result.width_px)
|
91
|
+
expect(result.orientation).to eq(:top_left)
|
92
|
+
expect(result.width_px).to eq(65)
|
93
|
+
end
|
94
|
+
|
95
|
+
it 'successfully parses extended WebP files with XMP metadata' do
|
96
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/extended-xmp.webp', 'rb'))
|
97
|
+
expect(result).not_to be_nil
|
98
|
+
expect(result.content_type).to eq('image/webp')
|
99
|
+
expect(result.format).to eq(:webp)
|
100
|
+
expect(result.has_multiple_frames).to eq(false)
|
101
|
+
expect(result.has_transparency).to eq(false)
|
102
|
+
expect(result.height_px).to eq(181)
|
103
|
+
expect(result.intrinsics).not_to be_nil
|
104
|
+
expect(result.intrinsics[:xmp]).not_to be_nil
|
105
|
+
expect(result.orientation).to be_nil
|
106
|
+
expect(result.width_px).to eq(65)
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'successfully parses extended WebP files with animation' do
|
110
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/extended-animation.webp', 'rb'))
|
111
|
+
expect(result).not_to be_nil
|
112
|
+
expect(result.content_type).to eq('image/webp')
|
113
|
+
expect(result.format).to eq(:webp)
|
114
|
+
expect(result.has_multiple_frames).to eq(true)
|
115
|
+
expect(result.has_transparency).to eq(true)
|
116
|
+
expect(result.height_px).to eq(211)
|
117
|
+
expect(result.intrinsics).to be_nil
|
118
|
+
expect(result.orientation).to be_nil
|
119
|
+
expect(result.width_px).to eq(211)
|
120
|
+
end
|
121
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
8
8
|
- Julik Tarkhanov
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-
|
12
|
+
date: 2022-07-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -238,6 +238,7 @@ files:
|
|
238
238
|
- lib/parsers/fdx_parser.rb
|
239
239
|
- lib/parsers/flac_parser.rb
|
240
240
|
- lib/parsers/gif_parser.rb
|
241
|
+
- lib/parsers/heif_parser.rb
|
241
242
|
- lib/parsers/jpeg_parser.rb
|
242
243
|
- lib/parsers/m3u_parser.rb
|
243
244
|
- lib/parsers/moov_parser.rb
|
@@ -251,6 +252,7 @@ files:
|
|
251
252
|
- lib/parsers/psd_parser.rb
|
252
253
|
- lib/parsers/tiff_parser.rb
|
253
254
|
- lib/parsers/wav_parser.rb
|
255
|
+
- lib/parsers/webp_parser.rb
|
254
256
|
- lib/parsers/zip_parser.rb
|
255
257
|
- lib/parsers/zip_parser/file_reader.rb
|
256
258
|
- lib/parsers/zip_parser/office_formats.rb
|
@@ -278,6 +280,7 @@ files:
|
|
278
280
|
- spec/parsers/fdx_parser_spec.rb
|
279
281
|
- spec/parsers/flac_parser_spec.rb
|
280
282
|
- spec/parsers/gif_parser_spec.rb
|
283
|
+
- spec/parsers/heif_parser_spec.rb
|
281
284
|
- spec/parsers/jpeg_parser_spec.rb
|
282
285
|
- spec/parsers/m3u_parser_spec.rb
|
283
286
|
- spec/parsers/moov_parser_spec.rb
|
@@ -289,6 +292,7 @@ files:
|
|
289
292
|
- spec/parsers/psd_parser_spec.rb
|
290
293
|
- spec/parsers/tiff_parser_spec.rb
|
291
294
|
- spec/parsers/wav_parser_spec.rb
|
295
|
+
- spec/parsers/webp_parser_spec.rb
|
292
296
|
- spec/parsers/zip_parser_spec.rb
|
293
297
|
- spec/read_limiter_spec.rb
|
294
298
|
- spec/read_limits_config_spec.rb
|
@@ -300,7 +304,7 @@ licenses:
|
|
300
304
|
- MIT (Hippocratic)
|
301
305
|
metadata:
|
302
306
|
allowed_push_host: https://rubygems.org
|
303
|
-
post_install_message:
|
307
|
+
post_install_message:
|
304
308
|
rdoc_options: []
|
305
309
|
require_paths:
|
306
310
|
- lib
|
@@ -315,8 +319,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
315
319
|
- !ruby/object:Gem::Version
|
316
320
|
version: '0'
|
317
321
|
requirements: []
|
318
|
-
rubygems_version: 3.
|
319
|
-
signing_key:
|
322
|
+
rubygems_version: 3.2.33
|
323
|
+
signing_key:
|
320
324
|
specification_version: 4
|
321
325
|
summary: A library for efficient parsing of file metadata
|
322
326
|
test_files: []
|