format_parser 1.2.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/io_utils.rb +17 -0
- data/lib/parsers/heif_parser.rb +431 -0
- data/lib/parsers/moov_parser.rb +1 -1
- data/lib/parsers/pdf_parser.rb +1 -1
- data/lib/parsers/zip_parser.rb +1 -1
- data/spec/parsers/heif_parser_spec.rb +75 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d90a6eee951feb5017bdedc0fb6dd4be49fc0a7f218972c0fb423b1985bf9a97
|
4
|
+
data.tar.gz: ee6caab359b0e01450897d00abd1f190b131e40d842d487d1f29107ac3b374f0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72aca621c20dfb24443e32c52a3d27f64a4887c1254f6d2295f77b39ce57e1a5ef0aa52f365630badcb2e3aa90d32c3546c8fcea3b83e025e2c4fc3606dd2dd7
|
7
|
+
data.tar.gz: 90da48352579c4044035732fd1f837d86db1a2d3c8f325b60ee688630f6cb4ea8efdb2ffd7887ac4e49cf9f9df2a22fe2189187c32d606b8f027a0a3d6a3ec5f
|
data/CHANGELOG.md
CHANGED
data/lib/io_utils.rb
CHANGED
@@ -30,5 +30,22 @@ module FormatParser::IOUtils
|
|
30
30
|
nil
|
31
31
|
end
|
32
32
|
|
33
|
+
def read_int_8
|
34
|
+
safe_read(@buf, 1).unpack('C').first
|
35
|
+
end
|
36
|
+
|
37
|
+
def read_int_16
|
38
|
+
safe_read(@buf, 2).unpack('n').first
|
39
|
+
end
|
40
|
+
|
41
|
+
def read_int_32
|
42
|
+
safe_read(@buf, 4).unpack('N').first
|
43
|
+
end
|
44
|
+
|
45
|
+
# 'n' is the number of bytes to read
|
46
|
+
def read_string(n)
|
47
|
+
safe_read(@buf, n)
|
48
|
+
end
|
49
|
+
|
33
50
|
### TODO: Some kind of built-in offset for the read
|
34
51
|
end
|
@@ -0,0 +1,431 @@
|
|
1
|
+
# HEIF stands for High-Efficiency Image File format, which is basically a container that is capable of storing an image, or a sequence of images in a single file.
|
2
|
+
# There are a number of variants of HEIF, which can be used to store images, sequences of images, or videos using different codecs.
|
3
|
+
# The variant that Apple uses to store images and sequences of images in its iOS and macOS operating systems is High Efficiency Image Coding (HEIC), which uses HEVC / H.265 for content compression.
|
4
|
+
class FormatParser::HEIFParser
|
5
|
+
include FormatParser::IOUtils
|
6
|
+
|
7
|
+
HEIF_MARKER = [0x68, 0x65, 0x69, 0x63].pack('C4') # heif marker
|
8
|
+
FILE_TYPE_BOX_MARKER = [0x66, 0x74, 0x79, 0x70].pack('C4') # ftyp marker
|
9
|
+
META_BOX_MARKER = [0x6D, 0x65, 0x74, 0x61].pack('C4') # meta marker
|
10
|
+
MIF1_MARKER = [0x6D, 0x69, 0x66, 0x31].pack('C4') # mif1 marker
|
11
|
+
MSF1_MARKER = [0x6D, 0x73, 0x66, 0x31].pack('C4') # msf1 marker
|
12
|
+
MEANINGLESS_BYTE = [0x00, 0x00, 0x00, 0x00].pack('C4')
|
13
|
+
HANDLER_MARKER = [0x68, 0x64, 0x6C, 0x72].pack('C4') # hdlr marker
|
14
|
+
ITEM_PROPERTIES_BOX = [0x69, 0x70, 0x72, 0x70].pack('C4') # iprp marker
|
15
|
+
ITEM_PROPERTIES_CONTAINER_BOX = [0x69, 0x70, 0x63, 0x6F].pack('C4') # ipco marker
|
16
|
+
IMAGE_SPATIAL_EXTENTS_BOX = [0x69, 0x73, 0x70, 0x65].pack('C4') # ispe marker
|
17
|
+
PIXEL_ASPECT_RATIO_BOX = [0x70, 0x61, 0x73, 0x70].pack('C4') # pasp marker
|
18
|
+
ITEM_INFO_BOX = [0x69, 0x69, 0x6E, 0x66].pack('C4') # iinf marker
|
19
|
+
ITEM_INFO_ENTRY = [0x69, 0x6E, 0x66, 0x65].pack('C4') # infe marker
|
20
|
+
MIME_MARKER = [0x6D, 0x69, 0x6D, 0x65].pack('C4') # mime marker
|
21
|
+
COLOUR_INFO_BOX = [0x63, 0x6F, 0x6C, 0x72].pack('C4') # colr marker
|
22
|
+
PIXEL_INFO_BOX = [0x70, 0x69, 0x78, 0x69].pack('C4') # pixi marker
|
23
|
+
RELATIVE_LOCATION_BOX = [0x72, 0x6C, 0x6F, 0x63].pack('C4') # rloc marker
|
24
|
+
CLEAN_APERTURE_BOX = [0x63, 0x6C, 0x61, 0x70].pack('C4') # clap marker
|
25
|
+
PRIMARY_ITEM_BOX = [0x70, 0x69, 0x74, 0x6D].pack('C4') # pitm marker
|
26
|
+
ITEM_PROPERTIES_ASSOCIATION_BOX = [0x69, 0x70, 0x6D, 0x61].pack('C4') # ipma marker
|
27
|
+
IMAGE_ROTATION_BOX = [0x69, 0x72, 0x6F, 0x74].pack('C4') # irot marker
|
28
|
+
HEADER_LENGTH = 8 # every box header has a length of 8 bytes
|
29
|
+
HEIC_MIME_POSSIBLE_TYPES = {
|
30
|
+
'heic' => :heic,
|
31
|
+
'heix' => :heix,
|
32
|
+
'heim' => :heim,
|
33
|
+
'heis' => :heis
|
34
|
+
}
|
35
|
+
HEIC_MIME_TYPE = 'image/heic'
|
36
|
+
HEIF_MIME_TYPE = 'image/heif'
|
37
|
+
# TODO: use the following when adding image-sequence parsing
|
38
|
+
# HEIC_SEQUENCE_MIME_TYPE = 'image/heic-sequence'
|
39
|
+
# HEIF_SEQUENCE_MIME_TYPE = 'image/heif-sequence'
|
40
|
+
|
41
|
+
def self.call(io)
|
42
|
+
new.call(io)
|
43
|
+
end
|
44
|
+
|
45
|
+
def call(io)
|
46
|
+
@buf = FormatParser::IOConstraint.new(io)
|
47
|
+
@format = nil
|
48
|
+
@@major_brand = nil
|
49
|
+
@width = nil
|
50
|
+
@height = nil
|
51
|
+
@exif_data_frames = []
|
52
|
+
@compatible_brands = nil
|
53
|
+
@metadata_start_pos = 0
|
54
|
+
@metadata_end_pos = 0
|
55
|
+
@handler_type = nil
|
56
|
+
@sub_items = nil
|
57
|
+
@pixel_aspect_ratio = nil
|
58
|
+
@colour_info = nil
|
59
|
+
@pixel_info = nil
|
60
|
+
@horizontal_offset = nil
|
61
|
+
@vertical_offset = nil
|
62
|
+
@clean_aperture = nil
|
63
|
+
@primary_item_id = 0
|
64
|
+
@item_props = {}
|
65
|
+
@rotation = 0
|
66
|
+
@item_props_idxs = []
|
67
|
+
@content_type = nil
|
68
|
+
scan
|
69
|
+
end
|
70
|
+
|
71
|
+
def scan
|
72
|
+
# All HEIC files must be conform to ISO/IEC 23008-12:2017
|
73
|
+
# Moreover, all HEIC files are conform to ISO/IEC 14496-12:2015 and should be conform to the Clause 4 of such spec.
|
74
|
+
# Files are formed as a series of objects, called boxes. All data is contained in such boxes.
|
75
|
+
# All boxes start with a header which defines both size and type.
|
76
|
+
# The size is the entire size of the box, including the size and type header, fields, and all contained boxes.
|
77
|
+
# The fields in the objects are stored with the most significant byte first, commonly known as network byte order or big-endian format.
|
78
|
+
# A HEIC file must contain a File Type Box (ftyp).
|
79
|
+
# A file conforms to all the requirements of the brands listed in the compatible_brands.
|
80
|
+
scan_file_type_box
|
81
|
+
|
82
|
+
# file may be identified by MIME type of Annex C of ISO/IEC 23008-12 if 'mif1' is the major brand or Annex D if 'msf1' is the major brand.
|
83
|
+
# the MIME indicates the nature and format of our assortment of bytes
|
84
|
+
# note particularly that the brand 'mif1' doesn't mandate a MovieBox ("moov").
|
85
|
+
# One or more brands must be included in the list of compatible brands
|
86
|
+
return if @compatible_brands.nil?
|
87
|
+
if @compatible_brands&.include?(MIF1_MARKER)
|
88
|
+
scan_meta_level_box
|
89
|
+
if @major_brand == MIF1_MARKER
|
90
|
+
@content_type = HEIF_MIME_TYPE
|
91
|
+
@format = :heif
|
92
|
+
elsif (@compatible_brands & HEIC_MIME_POSSIBLE_TYPES.keys).length > 0
|
93
|
+
@format = :heic
|
94
|
+
@content_type = HEIC_MIME_TYPE
|
95
|
+
end
|
96
|
+
end
|
97
|
+
if @compatible_brands&.include?(MSF1_MARKER)
|
98
|
+
# TODO
|
99
|
+
end
|
100
|
+
|
101
|
+
result = FormatParser::Image.new(
|
102
|
+
format: @format,
|
103
|
+
width_px: @width,
|
104
|
+
height_px: @height,
|
105
|
+
intrinsics: {
|
106
|
+
compatible_brands: @compatible_brands,
|
107
|
+
handler_type: @handler_type,
|
108
|
+
# 'sub_items': @sub_items, # enable this if you want to output all the sub-items in the image
|
109
|
+
pixel_aspect_ratio: @pixel_aspect_ratio,
|
110
|
+
colour_info: @colour_info,
|
111
|
+
pixel_info: @pixel_info,
|
112
|
+
horizontal_offset: @horizontal_offset,
|
113
|
+
vertical_offset: @vertical_offset,
|
114
|
+
clean_aperture: @clean_aperture,
|
115
|
+
rotation: @rotation
|
116
|
+
},
|
117
|
+
content_type: @content_type
|
118
|
+
)
|
119
|
+
|
120
|
+
result
|
121
|
+
end
|
122
|
+
|
123
|
+
def scan_file_type_box
|
124
|
+
file_type_box_length = read_int_32
|
125
|
+
return unless read_string(4) == FILE_TYPE_BOX_MARKER
|
126
|
+
@major_brand = read_string(4)
|
127
|
+
return unless @major_brand == HEIF_MARKER || @major_brand == MIF1_MARKER
|
128
|
+
read_string(4) # minor_brand
|
129
|
+
|
130
|
+
# Subtracting from the total length of the box specified in the header the size header itself (8 bytes = header length and length of ftyp)
|
131
|
+
# and the length of the major and minor brand, we obtain the compatible brands
|
132
|
+
data_left_length = file_type_box_length - HEADER_LENGTH - HEIF_MARKER.length - 4
|
133
|
+
|
134
|
+
@compatible_brands = []
|
135
|
+
(data_left_length / 4).times do
|
136
|
+
@compatible_brands << read_string(4)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def scan_meta_level_box
|
141
|
+
metadata_length = read_int_32
|
142
|
+
return unless read_string(4) == META_BOX_MARKER
|
143
|
+
@metadata_start_pos = @buf.pos
|
144
|
+
@metadata_end_pos = @buf.pos + metadata_length - HEADER_LENGTH # the real data is always without the 8 initial bytes of the handler
|
145
|
+
read_nil_version_and_flag
|
146
|
+
|
147
|
+
# we are looking for box/containers right beneath the Meta box
|
148
|
+
# we start with the HDLR (Handler) box..
|
149
|
+
handler_length = read_int_32
|
150
|
+
return unless read_string(4) == HANDLER_MARKER
|
151
|
+
handler_length -= HEADER_LENGTH # subtract the header as usual (will not be mentioned anymore from now on)
|
152
|
+
handler_start = @buf.pos
|
153
|
+
# the handler type declares the type of metadata and thus the process by which the media-data in the track is presented
|
154
|
+
# it also indicates the structure or format of the ‘meta’ box contents
|
155
|
+
read_nil_version_and_flag
|
156
|
+
read_string(4) # pre_defined bytes, always 4 null bytes in the hdlr box
|
157
|
+
@handler_type = read_string(4)
|
158
|
+
@buf.seek(handler_start + handler_length) # the remaining part is reserved
|
159
|
+
|
160
|
+
# ..continue looking for the IINF box and especially for the IPRP box, containing info about the image itself
|
161
|
+
next_box_length = read_int_32
|
162
|
+
next_box = read_string(4)
|
163
|
+
next_box_start_pos = @buf.pos
|
164
|
+
while @buf.pos < @metadata_end_pos # we iterate over all next incoming boxed but without going outside the meta-box
|
165
|
+
case next_box
|
166
|
+
when PRIMARY_ITEM_BOX
|
167
|
+
read_primary_item_box
|
168
|
+
when ITEM_INFO_BOX
|
169
|
+
read_item_info_box
|
170
|
+
when ITEM_PROPERTIES_BOX
|
171
|
+
read_item_properties_box
|
172
|
+
fill_primary_values
|
173
|
+
when next_box == ''
|
174
|
+
break
|
175
|
+
end
|
176
|
+
next_box_length, next_box, next_box_start_pos = get_next_box(next_box_start_pos, next_box_length, @metadata_end_pos)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def read_item_info_box
|
181
|
+
version = read_int_8
|
182
|
+
safe_skip(@buf, 3) # 0 flags
|
183
|
+
entry_count = if version == 0
|
184
|
+
read_int_16
|
185
|
+
else
|
186
|
+
read_int_32
|
187
|
+
end
|
188
|
+
@sub_items = []
|
189
|
+
entry_count.times {
|
190
|
+
item_info_entry_length = read_int_32
|
191
|
+
return unless read_string(4) == ITEM_INFO_ENTRY
|
192
|
+
item_info_end_pos = @buf.pos + item_info_entry_length - HEADER_LENGTH
|
193
|
+
version = read_int_8
|
194
|
+
safe_skip(@buf, 3) # 0 flags
|
195
|
+
case version
|
196
|
+
when 2
|
197
|
+
item_id = read_int_16
|
198
|
+
when 3
|
199
|
+
item_id = read_int_32
|
200
|
+
else
|
201
|
+
return # wrong version according to standards, hence return
|
202
|
+
end
|
203
|
+
safe_skip(@buf, 2) # not interested in the item_protection_index
|
204
|
+
item_type = read_string(4)
|
205
|
+
content_encoding = ''
|
206
|
+
if item_type == MIME_MARKER
|
207
|
+
content_encoding = read_string(item_info_end_pos - @buf.pos).delete!("\0") # remove the null-termination part for output visualization reason
|
208
|
+
end
|
209
|
+
@sub_items << {item_id: item_id, item_type: item_type, content_encoding: content_encoding}
|
210
|
+
@buf.seek(item_info_end_pos) # we are not interested in anything else, go directly to the end of this 'infe' box
|
211
|
+
}
|
212
|
+
end
|
213
|
+
|
214
|
+
def read_nil_version_and_flag
|
215
|
+
safe_skip(@buf, 1) # version, always 0 in this current box
|
216
|
+
safe_skip(@buf, 3) # flags, always 0 in this current box
|
217
|
+
end
|
218
|
+
|
219
|
+
def read_primary_item_box
|
220
|
+
version = read_int_8
|
221
|
+
safe_read(@buf, 3) # flags, always 0 in this current box
|
222
|
+
@primary_item_id = if version == 0
|
223
|
+
read_int_16
|
224
|
+
else
|
225
|
+
read_int_32
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
# the ITEM_PROPERTIES_CONTAINER_BOX contains an implicitely 1-based index list of item properties.
|
230
|
+
# While parsing such box we are storing the properties with its own index.
|
231
|
+
# Reason behind is that the primary_item will be associated to some of these properties through the same index
|
232
|
+
# and in order to output relevant data from the format_parser we need all the properties associated to the primary_item.
|
233
|
+
# Hence the need of the association between an item and its properties, found in the ITEM_PROPERTIES_ASSOCIATION_BOX
|
234
|
+
def read_item_properties_box
|
235
|
+
ipco_length = read_int_32
|
236
|
+
return unless read_string(4) == ITEM_PROPERTIES_CONTAINER_BOX
|
237
|
+
read_item_properties_container_box(ipco_length)
|
238
|
+
read_int_32 # ipma_length
|
239
|
+
return unless read_string(4) == ITEM_PROPERTIES_ASSOCIATION_BOX
|
240
|
+
read_item_properties_association_box
|
241
|
+
end
|
242
|
+
|
243
|
+
def read_item_properties_container_box(box_length)
|
244
|
+
end_of_ipco_box = @buf.pos + box_length - HEADER_LENGTH
|
245
|
+
item_prop_length = read_int_32
|
246
|
+
item_prop_name = read_string(4)
|
247
|
+
item_prop_start_pos = @buf.pos
|
248
|
+
item_prop_index = 1
|
249
|
+
while @buf.pos < end_of_ipco_box
|
250
|
+
case item_prop_name
|
251
|
+
when IMAGE_SPATIAL_EXTENTS_BOX
|
252
|
+
read_nil_version_and_flag
|
253
|
+
width = read_int_32
|
254
|
+
height = read_int_32
|
255
|
+
@item_props[item_prop_index] = {
|
256
|
+
type: IMAGE_SPATIAL_EXTENTS_BOX,
|
257
|
+
width: width,
|
258
|
+
height: height
|
259
|
+
}
|
260
|
+
when PIXEL_ASPECT_RATIO_BOX
|
261
|
+
h_spacing = read_int_32
|
262
|
+
v_spacing = read_int_32
|
263
|
+
pixel_aspect_ratio = "#{h_spacing}/#{v_spacing}"
|
264
|
+
@item_props[item_prop_index] = {
|
265
|
+
type: PIXEL_ASPECT_RATIO_BOX,
|
266
|
+
pixel_aspect_ratio: pixel_aspect_ratio
|
267
|
+
}
|
268
|
+
when COLOUR_INFO_BOX
|
269
|
+
colour_info = {
|
270
|
+
colour_primaries: read_int_16,
|
271
|
+
transfer_characteristics: read_int_16,
|
272
|
+
matrix_coefficients: read_int_16
|
273
|
+
}
|
274
|
+
@item_props[item_prop_index] = {
|
275
|
+
type: COLOUR_INFO_BOX,
|
276
|
+
colour_info: colour_info
|
277
|
+
}
|
278
|
+
when PIXEL_INFO_BOX
|
279
|
+
pixel_info = []
|
280
|
+
read_nil_version_and_flag
|
281
|
+
num_channels = read_int_8
|
282
|
+
channel = 1
|
283
|
+
while channel <= num_channels
|
284
|
+
channel += 1
|
285
|
+
pixel_info << {
|
286
|
+
"bits_in_channel_#{channel}": read_int_8
|
287
|
+
}
|
288
|
+
end
|
289
|
+
@item_props[item_prop_index] = {
|
290
|
+
type: PIXEL_INFO_BOX,
|
291
|
+
pixel_info: pixel_info
|
292
|
+
}
|
293
|
+
when RELATIVE_LOCATION_BOX
|
294
|
+
read_nil_version_and_flag
|
295
|
+
horizontal_offset = read_int_32
|
296
|
+
vertical_offset = read_int_32
|
297
|
+
@item_props[item_prop_index] = {
|
298
|
+
type: RELATIVE_LOCATION_BOX,
|
299
|
+
horizontal_offset: horizontal_offset,
|
300
|
+
vertical_offset: vertical_offset
|
301
|
+
}
|
302
|
+
when CLEAN_APERTURE_BOX
|
303
|
+
clean_aperture = []
|
304
|
+
clean_aperture << {
|
305
|
+
clean_aperture_width_n: read_int_32,
|
306
|
+
clean_aperture_width_d: read_int_32,
|
307
|
+
clean_aperture_height_n: read_int_32,
|
308
|
+
clean_aperture_height_d: read_int_32,
|
309
|
+
horiz_off_n: read_int_32,
|
310
|
+
horiz_off_d: read_int_32,
|
311
|
+
vert_off_n: read_int_32,
|
312
|
+
vert_off_d: read_int_32
|
313
|
+
}
|
314
|
+
@item_props[item_prop_index] = {
|
315
|
+
type: CLEAN_APERTURE_BOX,
|
316
|
+
clean_aperture: clean_aperture
|
317
|
+
}
|
318
|
+
when IMAGE_ROTATION_BOX
|
319
|
+
read_nil_version_and_flag
|
320
|
+
binary = convert_byte_to_binary(read_int_8)
|
321
|
+
# we need only the last 2 bits to retrieve the angle multiplier. angle multiplier * 90 specifies the angle
|
322
|
+
rotation = binary.slice(6, 2).join.to_i(2) * 90
|
323
|
+
@item_props[item_prop_index] = {
|
324
|
+
type: IMAGE_ROTATION_BOX,
|
325
|
+
rotation: rotation
|
326
|
+
}
|
327
|
+
end
|
328
|
+
item_prop_length, item_prop_name, item_prop_start_pos = get_next_box(item_prop_start_pos, item_prop_length, end_of_ipco_box)
|
329
|
+
item_prop_index += 1
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
def read_item_properties_association_box
|
334
|
+
version = read_int_8
|
335
|
+
safe_read(@buf, 2) # we skip the first 2 bytes of the flags (total of 3 bytes) cause we care only about the least significant bit
|
336
|
+
flags = read_int_8
|
337
|
+
entry_count = read_int_32
|
338
|
+
item_id = 0
|
339
|
+
entry_count.times do
|
340
|
+
item_id = if version == 0
|
341
|
+
read_int_16
|
342
|
+
else
|
343
|
+
read_int_32
|
344
|
+
end
|
345
|
+
|
346
|
+
association_count = read_int_8
|
347
|
+
association_count.times do
|
348
|
+
# we need to retrieve the "essential" bit wich is just the first bit in the next byte
|
349
|
+
binary = convert_byte_to_binary(read_int_8)
|
350
|
+
# essential_bit = binary[0] # uncomment if needed
|
351
|
+
binary.concat(convert_byte_to_binary(read_int_8)) if (flags & 1) == 1 # if flag is 1 we need the next 15 bits instead of only the next 7 bits
|
352
|
+
# we need to nullify the 1st bit since that one was the essential bit and doesn't count now to calculate the property index
|
353
|
+
binary[0] = 0
|
354
|
+
item_property_index = binary.join.to_i(2)
|
355
|
+
# we are interested only in the primary item properties
|
356
|
+
@item_props_idxs << item_property_index if item_id == @primary_item_id
|
357
|
+
end
|
358
|
+
|
359
|
+
# we are interested only in the primary item
|
360
|
+
if item_id != @primary_item_id
|
361
|
+
next
|
362
|
+
else
|
363
|
+
return
|
364
|
+
end
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
def fill_primary_values
|
369
|
+
@item_props_idxs.each { |x|
|
370
|
+
next if @item_props[x].nil?
|
371
|
+
prop = @item_props[x]
|
372
|
+
case prop[:type]
|
373
|
+
when IMAGE_SPATIAL_EXTENTS_BOX
|
374
|
+
@width = prop[:width]
|
375
|
+
@height = prop[:height]
|
376
|
+
when PIXEL_ASPECT_RATIO_BOX
|
377
|
+
@pixel_aspect_ratio = prop[:pixel_aspect_ratio]
|
378
|
+
when COLOUR_INFO_BOX
|
379
|
+
@colour_info = prop[:colour_info]
|
380
|
+
when PIXEL_INFO_BOX
|
381
|
+
@pixel_info = prop[:pixel_info]
|
382
|
+
when RELATIVE_LOCATION_BOX
|
383
|
+
@horizontal_offset = prop[:horizontal_offset]
|
384
|
+
@vertical_offset = prop[:vertical_offset]
|
385
|
+
when CLEAN_APERTURE_BOX
|
386
|
+
@clean_aperture = prop[:clean_aperture]
|
387
|
+
when IMAGE_ROTATION_BOX
|
388
|
+
@rotation = prop[:rotation]
|
389
|
+
end
|
390
|
+
}
|
391
|
+
end
|
392
|
+
|
393
|
+
def next_meaningful_meta_byte
|
394
|
+
while @buf.pos < @metadata_end_pos
|
395
|
+
next_byte = read_string(4)
|
396
|
+
return next_byte if meaningful?(next_byte)
|
397
|
+
end
|
398
|
+
end
|
399
|
+
|
400
|
+
def get_next_box(box_start_pos, box_length, end_pos_upper_box)
|
401
|
+
skip_pos = box_start_pos + box_length - HEADER_LENGTH
|
402
|
+
@buf.seek(skip_pos)
|
403
|
+
return if skip_pos >= end_pos_upper_box
|
404
|
+
next_box_length = read_int_32
|
405
|
+
next_box_name = read_string(4)
|
406
|
+
[next_box_length, next_box_name, @buf.pos]
|
407
|
+
end
|
408
|
+
|
409
|
+
def meaningful?(byte)
|
410
|
+
byte != MEANINGLESS_BYTE
|
411
|
+
end
|
412
|
+
|
413
|
+
def convert_byte_to_binary(integer)
|
414
|
+
binary = []
|
415
|
+
while integer > 0
|
416
|
+
binary << integer % 2
|
417
|
+
integer /= 2
|
418
|
+
end
|
419
|
+
binary_value = binary.reverse
|
420
|
+
(8 - binary_value.length).times do
|
421
|
+
binary_value.prepend('0')
|
422
|
+
end
|
423
|
+
binary_value
|
424
|
+
end
|
425
|
+
|
426
|
+
def likely_match?(filename)
|
427
|
+
filename =~ /\.hei[cf]$/i
|
428
|
+
end
|
429
|
+
|
430
|
+
FormatParser.register_parser(new, natures: :image, formats: [:heif, :heic], priority: 2)
|
431
|
+
end
|
data/lib/parsers/moov_parser.rb
CHANGED
data/lib/parsers/pdf_parser.rb
CHANGED
@@ -21,5 +21,5 @@ class FormatParser::PDFParser
|
|
21
21
|
FormatParser::Document.new(format: :pdf, content_type: PDF_CONTENT_TYPE)
|
22
22
|
end
|
23
23
|
|
24
|
-
FormatParser.register_parser new, natures: :document, formats: :pdf, priority:
|
24
|
+
FormatParser.register_parser new, natures: :document, formats: :pdf, priority: 3
|
25
25
|
end
|
data/lib/parsers/zip_parser.rb
CHANGED
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::HEIFParser do
|
4
|
+
it 'is able to parse single heif image with heic major brand' do
|
5
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage.heic'
|
6
|
+
|
7
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
8
|
+
expect(result).not_to be_nil
|
9
|
+
expect(result.nature).to eq(:image)
|
10
|
+
expect(result.format).to eq(:heic)
|
11
|
+
expect(result.width_px).to eq(4000)
|
12
|
+
expect(result.height_px).to eq(3000)
|
13
|
+
expect(result.content_type).to eq('image/heic')
|
14
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'is able to parse single heif image with mif1 major brand' do
|
18
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage_Autumn.heic'
|
19
|
+
|
20
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
21
|
+
expect(result).not_to be_nil
|
22
|
+
expect(result.nature).to eq(:image)
|
23
|
+
expect(result.format).to eq(:heif)
|
24
|
+
expect(result.width_px).to eq(1440)
|
25
|
+
expect(result.height_px).to eq(960)
|
26
|
+
expect(result.content_type).to eq('image/heif')
|
27
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'is able to parse image collection with mif1 major brand' do
|
31
|
+
heif_path = fixtures_dir + 'HEIF/ImageCollection.heic'
|
32
|
+
|
33
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
34
|
+
expect(result).not_to be_nil
|
35
|
+
expect(result.nature).to eq(:image)
|
36
|
+
expect(result.format).to eq(:heif)
|
37
|
+
expect(result.width_px).to eq(1440)
|
38
|
+
expect(result.height_px).to eq(960)
|
39
|
+
expect(result.content_type).to eq('image/heif')
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'is able to parse image collection with colour info' do
|
43
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage_Autumn_WithColourInfo.heic'
|
44
|
+
|
45
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
46
|
+
expect(result).not_to be_nil
|
47
|
+
expect(result.nature).to eq(:image)
|
48
|
+
expect(result.format).to eq(:heic)
|
49
|
+
expect(result.width_px).to eq(1440)
|
50
|
+
expect(result.height_px).to eq(960)
|
51
|
+
colour_info = result.intrinsics[:colour_info]
|
52
|
+
expect(colour_info[:colour_primaries]).to eq(28259)
|
53
|
+
expect(colour_info[:transfer_characteristics]).to eq(27768)
|
54
|
+
expect(colour_info[:matrix_coefficients]).to eq(2)
|
55
|
+
expect(result.content_type).to eq('image/heic')
|
56
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'is able to parse image collection with pixel info' do
|
60
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage_Autumn_WithColourInfo.heic'
|
61
|
+
|
62
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
63
|
+
expect(result).not_to be_nil
|
64
|
+
expect(result.nature).to eq(:image)
|
65
|
+
expect(result.format).to eq(:heic)
|
66
|
+
expect(result.width_px).to eq(1440)
|
67
|
+
expect(result.height_px).to eq(960)
|
68
|
+
pixel_info = result.intrinsics[:pixel_info]
|
69
|
+
expect(pixel_info[0][:bits_in_channel_2]).to eq(8)
|
70
|
+
expect(pixel_info[1][:bits_in_channel_3]).to eq(8)
|
71
|
+
expect(pixel_info[2][:bits_in_channel_4]).to eq(8)
|
72
|
+
expect(result.content_type).to eq('image/heic')
|
73
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
74
|
+
end
|
75
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-
|
12
|
+
date: 2022-05-31 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -238,6 +238,7 @@ files:
|
|
238
238
|
- lib/parsers/fdx_parser.rb
|
239
239
|
- lib/parsers/flac_parser.rb
|
240
240
|
- lib/parsers/gif_parser.rb
|
241
|
+
- lib/parsers/heif_parser.rb
|
241
242
|
- lib/parsers/jpeg_parser.rb
|
242
243
|
- lib/parsers/m3u_parser.rb
|
243
244
|
- lib/parsers/moov_parser.rb
|
@@ -278,6 +279,7 @@ files:
|
|
278
279
|
- spec/parsers/fdx_parser_spec.rb
|
279
280
|
- spec/parsers/flac_parser_spec.rb
|
280
281
|
- spec/parsers/gif_parser_spec.rb
|
282
|
+
- spec/parsers/heif_parser_spec.rb
|
281
283
|
- spec/parsers/jpeg_parser_spec.rb
|
282
284
|
- spec/parsers/m3u_parser_spec.rb
|
283
285
|
- spec/parsers/moov_parser_spec.rb
|
@@ -315,7 +317,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
315
317
|
- !ruby/object:Gem::Version
|
316
318
|
version: '0'
|
317
319
|
requirements: []
|
318
|
-
rubygems_version: 3.
|
320
|
+
rubygems_version: 3.1.6
|
319
321
|
signing_key:
|
320
322
|
specification_version: 4
|
321
323
|
summary: A library for efficient parsing of file metadata
|