format_parser 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/io_utils.rb +17 -0
- data/lib/parsers/heif_parser.rb +431 -0
- data/lib/parsers/moov_parser.rb +1 -1
- data/lib/parsers/pdf_parser.rb +1 -1
- data/lib/parsers/zip_parser.rb +1 -1
- data/spec/parsers/heif_parser_spec.rb +75 -0
- metadata +5 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d90a6eee951feb5017bdedc0fb6dd4be49fc0a7f218972c0fb423b1985bf9a97
|
|
4
|
+
data.tar.gz: ee6caab359b0e01450897d00abd1f190b131e40d842d487d1f29107ac3b374f0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 72aca621c20dfb24443e32c52a3d27f64a4887c1254f6d2295f77b39ce57e1a5ef0aa52f365630badcb2e3aa90d32c3546c8fcea3b83e025e2c4fc3606dd2dd7
|
|
7
|
+
data.tar.gz: 90da48352579c4044035732fd1f837d86db1a2d3c8f325b60ee688630f6cb4ea8efdb2ffd7887ac4e49cf9f9df2a22fe2189187c32d606b8f027a0a3d6a3ec5f
|
data/CHANGELOG.md
CHANGED
data/lib/io_utils.rb
CHANGED
|
@@ -30,5 +30,22 @@ module FormatParser::IOUtils
|
|
|
30
30
|
nil
|
|
31
31
|
end
|
|
32
32
|
|
|
33
|
+
def read_int_8
|
|
34
|
+
safe_read(@buf, 1).unpack('C').first
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def read_int_16
|
|
38
|
+
safe_read(@buf, 2).unpack('n').first
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def read_int_32
|
|
42
|
+
safe_read(@buf, 4).unpack('N').first
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# 'n' is the number of bytes to read
|
|
46
|
+
def read_string(n)
|
|
47
|
+
safe_read(@buf, n)
|
|
48
|
+
end
|
|
49
|
+
|
|
33
50
|
### TODO: Some kind of built-in offset for the read
|
|
34
51
|
end
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
# HEIF stands for High-Efficiency Image File format, which is basically a container that is capable of storing an image, or a sequence of images in a single file.
|
|
2
|
+
# There are a number of variants of HEIF, which can be used to store images, sequences of images, or videos using different codecs.
|
|
3
|
+
# The variant that Apple uses to store images and sequences of images in its iOS and macOS operating systems is High Efficiency Image Coding (HEIC), which uses HEVC / H.265 for content compression.
|
|
4
|
+
class FormatParser::HEIFParser
|
|
5
|
+
include FormatParser::IOUtils
|
|
6
|
+
|
|
7
|
+
HEIF_MARKER = [0x68, 0x65, 0x69, 0x63].pack('C4') # heif marker
|
|
8
|
+
FILE_TYPE_BOX_MARKER = [0x66, 0x74, 0x79, 0x70].pack('C4') # ftyp marker
|
|
9
|
+
META_BOX_MARKER = [0x6D, 0x65, 0x74, 0x61].pack('C4') # meta marker
|
|
10
|
+
MIF1_MARKER = [0x6D, 0x69, 0x66, 0x31].pack('C4') # mif1 marker
|
|
11
|
+
MSF1_MARKER = [0x6D, 0x73, 0x66, 0x31].pack('C4') # msf1 marker
|
|
12
|
+
MEANINGLESS_BYTE = [0x00, 0x00, 0x00, 0x00].pack('C4')
|
|
13
|
+
HANDLER_MARKER = [0x68, 0x64, 0x6C, 0x72].pack('C4') # hdlr marker
|
|
14
|
+
ITEM_PROPERTIES_BOX = [0x69, 0x70, 0x72, 0x70].pack('C4') # iprp marker
|
|
15
|
+
ITEM_PROPERTIES_CONTAINER_BOX = [0x69, 0x70, 0x63, 0x6F].pack('C4') # ipco marker
|
|
16
|
+
IMAGE_SPATIAL_EXTENTS_BOX = [0x69, 0x73, 0x70, 0x65].pack('C4') # ispe marker
|
|
17
|
+
PIXEL_ASPECT_RATIO_BOX = [0x70, 0x61, 0x73, 0x70].pack('C4') # pasp marker
|
|
18
|
+
ITEM_INFO_BOX = [0x69, 0x69, 0x6E, 0x66].pack('C4') # iinf marker
|
|
19
|
+
ITEM_INFO_ENTRY = [0x69, 0x6E, 0x66, 0x65].pack('C4') # infe marker
|
|
20
|
+
MIME_MARKER = [0x6D, 0x69, 0x6D, 0x65].pack('C4') # mime marker
|
|
21
|
+
COLOUR_INFO_BOX = [0x63, 0x6F, 0x6C, 0x72].pack('C4') # colr marker
|
|
22
|
+
PIXEL_INFO_BOX = [0x70, 0x69, 0x78, 0x69].pack('C4') # pixi marker
|
|
23
|
+
RELATIVE_LOCATION_BOX = [0x72, 0x6C, 0x6F, 0x63].pack('C4') # rloc marker
|
|
24
|
+
CLEAN_APERTURE_BOX = [0x63, 0x6C, 0x61, 0x70].pack('C4') # clap marker
|
|
25
|
+
PRIMARY_ITEM_BOX = [0x70, 0x69, 0x74, 0x6D].pack('C4') # pitm marker
|
|
26
|
+
ITEM_PROPERTIES_ASSOCIATION_BOX = [0x69, 0x70, 0x6D, 0x61].pack('C4') # ipma marker
|
|
27
|
+
IMAGE_ROTATION_BOX = [0x69, 0x72, 0x6F, 0x74].pack('C4') # irot marker
|
|
28
|
+
HEADER_LENGTH = 8 # every box header has a length of 8 bytes
|
|
29
|
+
HEIC_MIME_POSSIBLE_TYPES = {
|
|
30
|
+
'heic' => :heic,
|
|
31
|
+
'heix' => :heix,
|
|
32
|
+
'heim' => :heim,
|
|
33
|
+
'heis' => :heis
|
|
34
|
+
}
|
|
35
|
+
HEIC_MIME_TYPE = 'image/heic'
|
|
36
|
+
HEIF_MIME_TYPE = 'image/heif'
|
|
37
|
+
# TODO: use the following when adding image-sequence parsing
|
|
38
|
+
# HEIC_SEQUENCE_MIME_TYPE = 'image/heic-sequence'
|
|
39
|
+
# HEIF_SEQUENCE_MIME_TYPE = 'image/heif-sequence'
|
|
40
|
+
|
|
41
|
+
def self.call(io)
|
|
42
|
+
new.call(io)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def call(io)
|
|
46
|
+
@buf = FormatParser::IOConstraint.new(io)
|
|
47
|
+
@format = nil
|
|
48
|
+
@@major_brand = nil
|
|
49
|
+
@width = nil
|
|
50
|
+
@height = nil
|
|
51
|
+
@exif_data_frames = []
|
|
52
|
+
@compatible_brands = nil
|
|
53
|
+
@metadata_start_pos = 0
|
|
54
|
+
@metadata_end_pos = 0
|
|
55
|
+
@handler_type = nil
|
|
56
|
+
@sub_items = nil
|
|
57
|
+
@pixel_aspect_ratio = nil
|
|
58
|
+
@colour_info = nil
|
|
59
|
+
@pixel_info = nil
|
|
60
|
+
@horizontal_offset = nil
|
|
61
|
+
@vertical_offset = nil
|
|
62
|
+
@clean_aperture = nil
|
|
63
|
+
@primary_item_id = 0
|
|
64
|
+
@item_props = {}
|
|
65
|
+
@rotation = 0
|
|
66
|
+
@item_props_idxs = []
|
|
67
|
+
@content_type = nil
|
|
68
|
+
scan
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def scan
|
|
72
|
+
# All HEIC files must be conform to ISO/IEC 23008-12:2017
|
|
73
|
+
# Moreover, all HEIC files are conform to ISO/IEC 14496-12:2015 and should be conform to the Clause 4 of such spec.
|
|
74
|
+
# Files are formed as a series of objects, called boxes. All data is contained in such boxes.
|
|
75
|
+
# All boxes start with a header which defines both size and type.
|
|
76
|
+
# The size is the entire size of the box, including the size and type header, fields, and all contained boxes.
|
|
77
|
+
# The fields in the objects are stored with the most significant byte first, commonly known as network byte order or big-endian format.
|
|
78
|
+
# A HEIC file must contain a File Type Box (ftyp).
|
|
79
|
+
# A file conforms to all the requirements of the brands listed in the compatible_brands.
|
|
80
|
+
scan_file_type_box
|
|
81
|
+
|
|
82
|
+
# file may be identified by MIME type of Annex C of ISO/IEC 23008-12 if 'mif1' is the major brand or Annex D if 'msf1' is the major brand.
|
|
83
|
+
# the MIME indicates the nature and format of our assortment of bytes
|
|
84
|
+
# note particularly that the brand 'mif1' doesn't mandate a MovieBox ("moov").
|
|
85
|
+
# One or more brands must be included in the list of compatible brands
|
|
86
|
+
return if @compatible_brands.nil?
|
|
87
|
+
if @compatible_brands&.include?(MIF1_MARKER)
|
|
88
|
+
scan_meta_level_box
|
|
89
|
+
if @major_brand == MIF1_MARKER
|
|
90
|
+
@content_type = HEIF_MIME_TYPE
|
|
91
|
+
@format = :heif
|
|
92
|
+
elsif (@compatible_brands & HEIC_MIME_POSSIBLE_TYPES.keys).length > 0
|
|
93
|
+
@format = :heic
|
|
94
|
+
@content_type = HEIC_MIME_TYPE
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
if @compatible_brands&.include?(MSF1_MARKER)
|
|
98
|
+
# TODO
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
result = FormatParser::Image.new(
|
|
102
|
+
format: @format,
|
|
103
|
+
width_px: @width,
|
|
104
|
+
height_px: @height,
|
|
105
|
+
intrinsics: {
|
|
106
|
+
compatible_brands: @compatible_brands,
|
|
107
|
+
handler_type: @handler_type,
|
|
108
|
+
# 'sub_items': @sub_items, # enable this if you want to output all the sub-items in the image
|
|
109
|
+
pixel_aspect_ratio: @pixel_aspect_ratio,
|
|
110
|
+
colour_info: @colour_info,
|
|
111
|
+
pixel_info: @pixel_info,
|
|
112
|
+
horizontal_offset: @horizontal_offset,
|
|
113
|
+
vertical_offset: @vertical_offset,
|
|
114
|
+
clean_aperture: @clean_aperture,
|
|
115
|
+
rotation: @rotation
|
|
116
|
+
},
|
|
117
|
+
content_type: @content_type
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
result
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def scan_file_type_box
|
|
124
|
+
file_type_box_length = read_int_32
|
|
125
|
+
return unless read_string(4) == FILE_TYPE_BOX_MARKER
|
|
126
|
+
@major_brand = read_string(4)
|
|
127
|
+
return unless @major_brand == HEIF_MARKER || @major_brand == MIF1_MARKER
|
|
128
|
+
read_string(4) # minor_brand
|
|
129
|
+
|
|
130
|
+
# Subtracting from the total length of the box specified in the header the size header itself (8 bytes = header length and length of ftyp)
|
|
131
|
+
# and the length of the major and minor brand, we obtain the compatible brands
|
|
132
|
+
data_left_length = file_type_box_length - HEADER_LENGTH - HEIF_MARKER.length - 4
|
|
133
|
+
|
|
134
|
+
@compatible_brands = []
|
|
135
|
+
(data_left_length / 4).times do
|
|
136
|
+
@compatible_brands << read_string(4)
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def scan_meta_level_box
|
|
141
|
+
metadata_length = read_int_32
|
|
142
|
+
return unless read_string(4) == META_BOX_MARKER
|
|
143
|
+
@metadata_start_pos = @buf.pos
|
|
144
|
+
@metadata_end_pos = @buf.pos + metadata_length - HEADER_LENGTH # the real data is always without the 8 initial bytes of the handler
|
|
145
|
+
read_nil_version_and_flag
|
|
146
|
+
|
|
147
|
+
# we are looking for box/containers right beneath the Meta box
|
|
148
|
+
# we start with the HDLR (Handler) box..
|
|
149
|
+
handler_length = read_int_32
|
|
150
|
+
return unless read_string(4) == HANDLER_MARKER
|
|
151
|
+
handler_length -= HEADER_LENGTH # subtract the header as usual (will not be mentioned anymore from now on)
|
|
152
|
+
handler_start = @buf.pos
|
|
153
|
+
# the handler type declares the type of metadata and thus the process by which the media-data in the track is presented
|
|
154
|
+
# it also indicates the structure or format of the ‘meta’ box contents
|
|
155
|
+
read_nil_version_and_flag
|
|
156
|
+
read_string(4) # pre_defined bytes, always 4 null bytes in the hdlr box
|
|
157
|
+
@handler_type = read_string(4)
|
|
158
|
+
@buf.seek(handler_start + handler_length) # the remaining part is reserved
|
|
159
|
+
|
|
160
|
+
# ..continue looking for the IINF box and especially for the IPRP box, containing info about the image itself
|
|
161
|
+
next_box_length = read_int_32
|
|
162
|
+
next_box = read_string(4)
|
|
163
|
+
next_box_start_pos = @buf.pos
|
|
164
|
+
while @buf.pos < @metadata_end_pos # we iterate over all next incoming boxed but without going outside the meta-box
|
|
165
|
+
case next_box
|
|
166
|
+
when PRIMARY_ITEM_BOX
|
|
167
|
+
read_primary_item_box
|
|
168
|
+
when ITEM_INFO_BOX
|
|
169
|
+
read_item_info_box
|
|
170
|
+
when ITEM_PROPERTIES_BOX
|
|
171
|
+
read_item_properties_box
|
|
172
|
+
fill_primary_values
|
|
173
|
+
when next_box == ''
|
|
174
|
+
break
|
|
175
|
+
end
|
|
176
|
+
next_box_length, next_box, next_box_start_pos = get_next_box(next_box_start_pos, next_box_length, @metadata_end_pos)
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def read_item_info_box
|
|
181
|
+
version = read_int_8
|
|
182
|
+
safe_skip(@buf, 3) # 0 flags
|
|
183
|
+
entry_count = if version == 0
|
|
184
|
+
read_int_16
|
|
185
|
+
else
|
|
186
|
+
read_int_32
|
|
187
|
+
end
|
|
188
|
+
@sub_items = []
|
|
189
|
+
entry_count.times {
|
|
190
|
+
item_info_entry_length = read_int_32
|
|
191
|
+
return unless read_string(4) == ITEM_INFO_ENTRY
|
|
192
|
+
item_info_end_pos = @buf.pos + item_info_entry_length - HEADER_LENGTH
|
|
193
|
+
version = read_int_8
|
|
194
|
+
safe_skip(@buf, 3) # 0 flags
|
|
195
|
+
case version
|
|
196
|
+
when 2
|
|
197
|
+
item_id = read_int_16
|
|
198
|
+
when 3
|
|
199
|
+
item_id = read_int_32
|
|
200
|
+
else
|
|
201
|
+
return # wrong version according to standards, hence return
|
|
202
|
+
end
|
|
203
|
+
safe_skip(@buf, 2) # not interested in the item_protection_index
|
|
204
|
+
item_type = read_string(4)
|
|
205
|
+
content_encoding = ''
|
|
206
|
+
if item_type == MIME_MARKER
|
|
207
|
+
content_encoding = read_string(item_info_end_pos - @buf.pos).delete!("\0") # remove the null-termination part for output visualization reason
|
|
208
|
+
end
|
|
209
|
+
@sub_items << {item_id: item_id, item_type: item_type, content_encoding: content_encoding}
|
|
210
|
+
@buf.seek(item_info_end_pos) # we are not interested in anything else, go directly to the end of this 'infe' box
|
|
211
|
+
}
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def read_nil_version_and_flag
|
|
215
|
+
safe_skip(@buf, 1) # version, always 0 in this current box
|
|
216
|
+
safe_skip(@buf, 3) # flags, always 0 in this current box
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def read_primary_item_box
|
|
220
|
+
version = read_int_8
|
|
221
|
+
safe_read(@buf, 3) # flags, always 0 in this current box
|
|
222
|
+
@primary_item_id = if version == 0
|
|
223
|
+
read_int_16
|
|
224
|
+
else
|
|
225
|
+
read_int_32
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# the ITEM_PROPERTIES_CONTAINER_BOX contains an implicitely 1-based index list of item properties.
|
|
230
|
+
# While parsing such box we are storing the properties with its own index.
|
|
231
|
+
# Reason behind is that the primary_item will be associated to some of these properties through the same index
|
|
232
|
+
# and in order to output relevant data from the format_parser we need all the properties associated to the primary_item.
|
|
233
|
+
# Hence the need of the association between an item and its properties, found in the ITEM_PROPERTIES_ASSOCIATION_BOX
|
|
234
|
+
def read_item_properties_box
|
|
235
|
+
ipco_length = read_int_32
|
|
236
|
+
return unless read_string(4) == ITEM_PROPERTIES_CONTAINER_BOX
|
|
237
|
+
read_item_properties_container_box(ipco_length)
|
|
238
|
+
read_int_32 # ipma_length
|
|
239
|
+
return unless read_string(4) == ITEM_PROPERTIES_ASSOCIATION_BOX
|
|
240
|
+
read_item_properties_association_box
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def read_item_properties_container_box(box_length)
|
|
244
|
+
end_of_ipco_box = @buf.pos + box_length - HEADER_LENGTH
|
|
245
|
+
item_prop_length = read_int_32
|
|
246
|
+
item_prop_name = read_string(4)
|
|
247
|
+
item_prop_start_pos = @buf.pos
|
|
248
|
+
item_prop_index = 1
|
|
249
|
+
while @buf.pos < end_of_ipco_box
|
|
250
|
+
case item_prop_name
|
|
251
|
+
when IMAGE_SPATIAL_EXTENTS_BOX
|
|
252
|
+
read_nil_version_and_flag
|
|
253
|
+
width = read_int_32
|
|
254
|
+
height = read_int_32
|
|
255
|
+
@item_props[item_prop_index] = {
|
|
256
|
+
type: IMAGE_SPATIAL_EXTENTS_BOX,
|
|
257
|
+
width: width,
|
|
258
|
+
height: height
|
|
259
|
+
}
|
|
260
|
+
when PIXEL_ASPECT_RATIO_BOX
|
|
261
|
+
h_spacing = read_int_32
|
|
262
|
+
v_spacing = read_int_32
|
|
263
|
+
pixel_aspect_ratio = "#{h_spacing}/#{v_spacing}"
|
|
264
|
+
@item_props[item_prop_index] = {
|
|
265
|
+
type: PIXEL_ASPECT_RATIO_BOX,
|
|
266
|
+
pixel_aspect_ratio: pixel_aspect_ratio
|
|
267
|
+
}
|
|
268
|
+
when COLOUR_INFO_BOX
|
|
269
|
+
colour_info = {
|
|
270
|
+
colour_primaries: read_int_16,
|
|
271
|
+
transfer_characteristics: read_int_16,
|
|
272
|
+
matrix_coefficients: read_int_16
|
|
273
|
+
}
|
|
274
|
+
@item_props[item_prop_index] = {
|
|
275
|
+
type: COLOUR_INFO_BOX,
|
|
276
|
+
colour_info: colour_info
|
|
277
|
+
}
|
|
278
|
+
when PIXEL_INFO_BOX
|
|
279
|
+
pixel_info = []
|
|
280
|
+
read_nil_version_and_flag
|
|
281
|
+
num_channels = read_int_8
|
|
282
|
+
channel = 1
|
|
283
|
+
while channel <= num_channels
|
|
284
|
+
channel += 1
|
|
285
|
+
pixel_info << {
|
|
286
|
+
"bits_in_channel_#{channel}": read_int_8
|
|
287
|
+
}
|
|
288
|
+
end
|
|
289
|
+
@item_props[item_prop_index] = {
|
|
290
|
+
type: PIXEL_INFO_BOX,
|
|
291
|
+
pixel_info: pixel_info
|
|
292
|
+
}
|
|
293
|
+
when RELATIVE_LOCATION_BOX
|
|
294
|
+
read_nil_version_and_flag
|
|
295
|
+
horizontal_offset = read_int_32
|
|
296
|
+
vertical_offset = read_int_32
|
|
297
|
+
@item_props[item_prop_index] = {
|
|
298
|
+
type: RELATIVE_LOCATION_BOX,
|
|
299
|
+
horizontal_offset: horizontal_offset,
|
|
300
|
+
vertical_offset: vertical_offset
|
|
301
|
+
}
|
|
302
|
+
when CLEAN_APERTURE_BOX
|
|
303
|
+
clean_aperture = []
|
|
304
|
+
clean_aperture << {
|
|
305
|
+
clean_aperture_width_n: read_int_32,
|
|
306
|
+
clean_aperture_width_d: read_int_32,
|
|
307
|
+
clean_aperture_height_n: read_int_32,
|
|
308
|
+
clean_aperture_height_d: read_int_32,
|
|
309
|
+
horiz_off_n: read_int_32,
|
|
310
|
+
horiz_off_d: read_int_32,
|
|
311
|
+
vert_off_n: read_int_32,
|
|
312
|
+
vert_off_d: read_int_32
|
|
313
|
+
}
|
|
314
|
+
@item_props[item_prop_index] = {
|
|
315
|
+
type: CLEAN_APERTURE_BOX,
|
|
316
|
+
clean_aperture: clean_aperture
|
|
317
|
+
}
|
|
318
|
+
when IMAGE_ROTATION_BOX
|
|
319
|
+
read_nil_version_and_flag
|
|
320
|
+
binary = convert_byte_to_binary(read_int_8)
|
|
321
|
+
# we need only the last 2 bits to retrieve the angle multiplier. angle multiplier * 90 specifies the angle
|
|
322
|
+
rotation = binary.slice(6, 2).join.to_i(2) * 90
|
|
323
|
+
@item_props[item_prop_index] = {
|
|
324
|
+
type: IMAGE_ROTATION_BOX,
|
|
325
|
+
rotation: rotation
|
|
326
|
+
}
|
|
327
|
+
end
|
|
328
|
+
item_prop_length, item_prop_name, item_prop_start_pos = get_next_box(item_prop_start_pos, item_prop_length, end_of_ipco_box)
|
|
329
|
+
item_prop_index += 1
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def read_item_properties_association_box
|
|
334
|
+
version = read_int_8
|
|
335
|
+
safe_read(@buf, 2) # we skip the first 2 bytes of the flags (total of 3 bytes) cause we care only about the least significant bit
|
|
336
|
+
flags = read_int_8
|
|
337
|
+
entry_count = read_int_32
|
|
338
|
+
item_id = 0
|
|
339
|
+
entry_count.times do
|
|
340
|
+
item_id = if version == 0
|
|
341
|
+
read_int_16
|
|
342
|
+
else
|
|
343
|
+
read_int_32
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
association_count = read_int_8
|
|
347
|
+
association_count.times do
|
|
348
|
+
# we need to retrieve the "essential" bit wich is just the first bit in the next byte
|
|
349
|
+
binary = convert_byte_to_binary(read_int_8)
|
|
350
|
+
# essential_bit = binary[0] # uncomment if needed
|
|
351
|
+
binary.concat(convert_byte_to_binary(read_int_8)) if (flags & 1) == 1 # if flag is 1 we need the next 15 bits instead of only the next 7 bits
|
|
352
|
+
# we need to nullify the 1st bit since that one was the essential bit and doesn't count now to calculate the property index
|
|
353
|
+
binary[0] = 0
|
|
354
|
+
item_property_index = binary.join.to_i(2)
|
|
355
|
+
# we are interested only in the primary item properties
|
|
356
|
+
@item_props_idxs << item_property_index if item_id == @primary_item_id
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
# we are interested only in the primary item
|
|
360
|
+
if item_id != @primary_item_id
|
|
361
|
+
next
|
|
362
|
+
else
|
|
363
|
+
return
|
|
364
|
+
end
|
|
365
|
+
end
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
def fill_primary_values
|
|
369
|
+
@item_props_idxs.each { |x|
|
|
370
|
+
next if @item_props[x].nil?
|
|
371
|
+
prop = @item_props[x]
|
|
372
|
+
case prop[:type]
|
|
373
|
+
when IMAGE_SPATIAL_EXTENTS_BOX
|
|
374
|
+
@width = prop[:width]
|
|
375
|
+
@height = prop[:height]
|
|
376
|
+
when PIXEL_ASPECT_RATIO_BOX
|
|
377
|
+
@pixel_aspect_ratio = prop[:pixel_aspect_ratio]
|
|
378
|
+
when COLOUR_INFO_BOX
|
|
379
|
+
@colour_info = prop[:colour_info]
|
|
380
|
+
when PIXEL_INFO_BOX
|
|
381
|
+
@pixel_info = prop[:pixel_info]
|
|
382
|
+
when RELATIVE_LOCATION_BOX
|
|
383
|
+
@horizontal_offset = prop[:horizontal_offset]
|
|
384
|
+
@vertical_offset = prop[:vertical_offset]
|
|
385
|
+
when CLEAN_APERTURE_BOX
|
|
386
|
+
@clean_aperture = prop[:clean_aperture]
|
|
387
|
+
when IMAGE_ROTATION_BOX
|
|
388
|
+
@rotation = prop[:rotation]
|
|
389
|
+
end
|
|
390
|
+
}
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
def next_meaningful_meta_byte
|
|
394
|
+
while @buf.pos < @metadata_end_pos
|
|
395
|
+
next_byte = read_string(4)
|
|
396
|
+
return next_byte if meaningful?(next_byte)
|
|
397
|
+
end
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
def get_next_box(box_start_pos, box_length, end_pos_upper_box)
|
|
401
|
+
skip_pos = box_start_pos + box_length - HEADER_LENGTH
|
|
402
|
+
@buf.seek(skip_pos)
|
|
403
|
+
return if skip_pos >= end_pos_upper_box
|
|
404
|
+
next_box_length = read_int_32
|
|
405
|
+
next_box_name = read_string(4)
|
|
406
|
+
[next_box_length, next_box_name, @buf.pos]
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def meaningful?(byte)
|
|
410
|
+
byte != MEANINGLESS_BYTE
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
def convert_byte_to_binary(integer)
|
|
414
|
+
binary = []
|
|
415
|
+
while integer > 0
|
|
416
|
+
binary << integer % 2
|
|
417
|
+
integer /= 2
|
|
418
|
+
end
|
|
419
|
+
binary_value = binary.reverse
|
|
420
|
+
(8 - binary_value.length).times do
|
|
421
|
+
binary_value.prepend('0')
|
|
422
|
+
end
|
|
423
|
+
binary_value
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def likely_match?(filename)
|
|
427
|
+
filename =~ /\.hei[cf]$/i
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
FormatParser.register_parser(new, natures: :image, formats: [:heif, :heic], priority: 2)
|
|
431
|
+
end
|
data/lib/parsers/moov_parser.rb
CHANGED
data/lib/parsers/pdf_parser.rb
CHANGED
|
@@ -21,5 +21,5 @@ class FormatParser::PDFParser
|
|
|
21
21
|
FormatParser::Document.new(format: :pdf, content_type: PDF_CONTENT_TYPE)
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
FormatParser.register_parser new, natures: :document, formats: :pdf, priority:
|
|
24
|
+
FormatParser.register_parser new, natures: :document, formats: :pdf, priority: 3
|
|
25
25
|
end
|
data/lib/parsers/zip_parser.rb
CHANGED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe FormatParser::HEIFParser do
|
|
4
|
+
it 'is able to parse single heif image with heic major brand' do
|
|
5
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage.heic'
|
|
6
|
+
|
|
7
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
|
8
|
+
expect(result).not_to be_nil
|
|
9
|
+
expect(result.nature).to eq(:image)
|
|
10
|
+
expect(result.format).to eq(:heic)
|
|
11
|
+
expect(result.width_px).to eq(4000)
|
|
12
|
+
expect(result.height_px).to eq(3000)
|
|
13
|
+
expect(result.content_type).to eq('image/heic')
|
|
14
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it 'is able to parse single heif image with mif1 major brand' do
|
|
18
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage_Autumn.heic'
|
|
19
|
+
|
|
20
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
|
21
|
+
expect(result).not_to be_nil
|
|
22
|
+
expect(result.nature).to eq(:image)
|
|
23
|
+
expect(result.format).to eq(:heif)
|
|
24
|
+
expect(result.width_px).to eq(1440)
|
|
25
|
+
expect(result.height_px).to eq(960)
|
|
26
|
+
expect(result.content_type).to eq('image/heif')
|
|
27
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it 'is able to parse image collection with mif1 major brand' do
|
|
31
|
+
heif_path = fixtures_dir + 'HEIF/ImageCollection.heic'
|
|
32
|
+
|
|
33
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
|
34
|
+
expect(result).not_to be_nil
|
|
35
|
+
expect(result.nature).to eq(:image)
|
|
36
|
+
expect(result.format).to eq(:heif)
|
|
37
|
+
expect(result.width_px).to eq(1440)
|
|
38
|
+
expect(result.height_px).to eq(960)
|
|
39
|
+
expect(result.content_type).to eq('image/heif')
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it 'is able to parse image collection with colour info' do
|
|
43
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage_Autumn_WithColourInfo.heic'
|
|
44
|
+
|
|
45
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
|
46
|
+
expect(result).not_to be_nil
|
|
47
|
+
expect(result.nature).to eq(:image)
|
|
48
|
+
expect(result.format).to eq(:heic)
|
|
49
|
+
expect(result.width_px).to eq(1440)
|
|
50
|
+
expect(result.height_px).to eq(960)
|
|
51
|
+
colour_info = result.intrinsics[:colour_info]
|
|
52
|
+
expect(colour_info[:colour_primaries]).to eq(28259)
|
|
53
|
+
expect(colour_info[:transfer_characteristics]).to eq(27768)
|
|
54
|
+
expect(colour_info[:matrix_coefficients]).to eq(2)
|
|
55
|
+
expect(result.content_type).to eq('image/heic')
|
|
56
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it 'is able to parse image collection with pixel info' do
|
|
60
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage_Autumn_WithColourInfo.heic'
|
|
61
|
+
|
|
62
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
|
63
|
+
expect(result).not_to be_nil
|
|
64
|
+
expect(result.nature).to eq(:image)
|
|
65
|
+
expect(result.format).to eq(:heic)
|
|
66
|
+
expect(result.width_px).to eq(1440)
|
|
67
|
+
expect(result.height_px).to eq(960)
|
|
68
|
+
pixel_info = result.intrinsics[:pixel_info]
|
|
69
|
+
expect(pixel_info[0][:bits_in_channel_2]).to eq(8)
|
|
70
|
+
expect(pixel_info[1][:bits_in_channel_3]).to eq(8)
|
|
71
|
+
expect(pixel_info[2][:bits_in_channel_4]).to eq(8)
|
|
72
|
+
expect(result.content_type).to eq('image/heic')
|
|
73
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
|
74
|
+
end
|
|
75
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: format_parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Noah Berman
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: exe
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2022-
|
|
12
|
+
date: 2022-05-31 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: ks
|
|
@@ -238,6 +238,7 @@ files:
|
|
|
238
238
|
- lib/parsers/fdx_parser.rb
|
|
239
239
|
- lib/parsers/flac_parser.rb
|
|
240
240
|
- lib/parsers/gif_parser.rb
|
|
241
|
+
- lib/parsers/heif_parser.rb
|
|
241
242
|
- lib/parsers/jpeg_parser.rb
|
|
242
243
|
- lib/parsers/m3u_parser.rb
|
|
243
244
|
- lib/parsers/moov_parser.rb
|
|
@@ -278,6 +279,7 @@ files:
|
|
|
278
279
|
- spec/parsers/fdx_parser_spec.rb
|
|
279
280
|
- spec/parsers/flac_parser_spec.rb
|
|
280
281
|
- spec/parsers/gif_parser_spec.rb
|
|
282
|
+
- spec/parsers/heif_parser_spec.rb
|
|
281
283
|
- spec/parsers/jpeg_parser_spec.rb
|
|
282
284
|
- spec/parsers/m3u_parser_spec.rb
|
|
283
285
|
- spec/parsers/moov_parser_spec.rb
|
|
@@ -315,7 +317,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
315
317
|
- !ruby/object:Gem::Version
|
|
316
318
|
version: '0'
|
|
317
319
|
requirements: []
|
|
318
|
-
rubygems_version: 3.
|
|
320
|
+
rubygems_version: 3.1.6
|
|
319
321
|
signing_key:
|
|
320
322
|
specification_version: 4
|
|
321
323
|
summary: A library for efficient parsing of file metadata
|