format_parser 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Gemfile +0 -4
- data/lib/format_parser/version.rb +1 -1
- data/lib/io_utils.rb +17 -0
- data/lib/parsers/heif_parser.rb +431 -0
- data/lib/parsers/moov_parser/decoder.rb +17 -0
- data/lib/parsers/moov_parser.rb +21 -5
- data/lib/parsers/pdf_parser.rb +1 -1
- data/lib/parsers/zip_parser.rb +1 -1
- data/lib/video.rb +2 -0
- data/spec/parsers/heif_parser_spec.rb +75 -0
- data/spec/parsers/moov_parser_spec.rb +2 -0
- metadata +5 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d90a6eee951feb5017bdedc0fb6dd4be49fc0a7f218972c0fb423b1985bf9a97
|
|
4
|
+
data.tar.gz: ee6caab359b0e01450897d00abd1f190b131e40d842d487d1f29107ac3b374f0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 72aca621c20dfb24443e32c52a3d27f64a4887c1254f6d2295f77b39ce57e1a5ef0aa52f365630badcb2e3aa90d32c3546c8fcea3b83e025e2c4fc3606dd2dd7
|
|
7
|
+
data.tar.gz: 90da48352579c4044035732fd1f837d86db1a2d3c8f325b60ee688630f6cb4ea8efdb2ffd7887ac4e49cf9f9df2a22fe2189187c32d606b8f027a0a3d6a3ec5f
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
## 1.3.0
|
|
2
|
+
* Add `heif_parser` and support for `HEIF` and `HEIC` formats. Exif parsing is still missing.
|
|
3
|
+
*
|
|
4
|
+
## 1.2.1
|
|
5
|
+
* Resolve bug when `stts` atom is `nil`
|
|
6
|
+
|
|
7
|
+
## 1.2.0
|
|
8
|
+
* Add support for `codecs` in moov_parser for video metadata
|
|
9
|
+
|
|
1
10
|
## 1.1.0
|
|
2
11
|
* Add support for `frame_rate` in moov_parser
|
|
3
12
|
|
data/Gemfile
CHANGED
data/lib/io_utils.rb
CHANGED
|
@@ -30,5 +30,22 @@ module FormatParser::IOUtils
|
|
|
30
30
|
nil
|
|
31
31
|
end
|
|
32
32
|
|
|
33
|
+
def read_int_8
|
|
34
|
+
safe_read(@buf, 1).unpack('C').first
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def read_int_16
|
|
38
|
+
safe_read(@buf, 2).unpack('n').first
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def read_int_32
|
|
42
|
+
safe_read(@buf, 4).unpack('N').first
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# 'n' is the number of bytes to read
|
|
46
|
+
def read_string(n)
|
|
47
|
+
safe_read(@buf, n)
|
|
48
|
+
end
|
|
49
|
+
|
|
33
50
|
### TODO: Some kind of built-in offset for the read
|
|
34
51
|
end
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
# HEIF stands for High-Efficiency Image File format, which is basically a container that is capable of storing an image, or a sequence of images in a single file.
|
|
2
|
+
# There are a number of variants of HEIF, which can be used to store images, sequences of images, or videos using different codecs.
|
|
3
|
+
# The variant that Apple uses to store images and sequences of images in its iOS and macOS operating systems is High Efficiency Image Coding (HEIC), which uses HEVC / H.265 for content compression.
|
|
4
|
+
class FormatParser::HEIFParser
|
|
5
|
+
include FormatParser::IOUtils
|
|
6
|
+
|
|
7
|
+
HEIF_MARKER = [0x68, 0x65, 0x69, 0x63].pack('C4') # heif marker
|
|
8
|
+
FILE_TYPE_BOX_MARKER = [0x66, 0x74, 0x79, 0x70].pack('C4') # ftyp marker
|
|
9
|
+
META_BOX_MARKER = [0x6D, 0x65, 0x74, 0x61].pack('C4') # meta marker
|
|
10
|
+
MIF1_MARKER = [0x6D, 0x69, 0x66, 0x31].pack('C4') # mif1 marker
|
|
11
|
+
MSF1_MARKER = [0x6D, 0x73, 0x66, 0x31].pack('C4') # msf1 marker
|
|
12
|
+
MEANINGLESS_BYTE = [0x00, 0x00, 0x00, 0x00].pack('C4')
|
|
13
|
+
HANDLER_MARKER = [0x68, 0x64, 0x6C, 0x72].pack('C4') # hdlr marker
|
|
14
|
+
ITEM_PROPERTIES_BOX = [0x69, 0x70, 0x72, 0x70].pack('C4') # iprp marker
|
|
15
|
+
ITEM_PROPERTIES_CONTAINER_BOX = [0x69, 0x70, 0x63, 0x6F].pack('C4') # ipco marker
|
|
16
|
+
IMAGE_SPATIAL_EXTENTS_BOX = [0x69, 0x73, 0x70, 0x65].pack('C4') # ispe marker
|
|
17
|
+
PIXEL_ASPECT_RATIO_BOX = [0x70, 0x61, 0x73, 0x70].pack('C4') # pasp marker
|
|
18
|
+
ITEM_INFO_BOX = [0x69, 0x69, 0x6E, 0x66].pack('C4') # iinf marker
|
|
19
|
+
ITEM_INFO_ENTRY = [0x69, 0x6E, 0x66, 0x65].pack('C4') # infe marker
|
|
20
|
+
MIME_MARKER = [0x6D, 0x69, 0x6D, 0x65].pack('C4') # mime marker
|
|
21
|
+
COLOUR_INFO_BOX = [0x63, 0x6F, 0x6C, 0x72].pack('C4') # colr marker
|
|
22
|
+
PIXEL_INFO_BOX = [0x70, 0x69, 0x78, 0x69].pack('C4') # pixi marker
|
|
23
|
+
RELATIVE_LOCATION_BOX = [0x72, 0x6C, 0x6F, 0x63].pack('C4') # rloc marker
|
|
24
|
+
CLEAN_APERTURE_BOX = [0x63, 0x6C, 0x61, 0x70].pack('C4') # clap marker
|
|
25
|
+
PRIMARY_ITEM_BOX = [0x70, 0x69, 0x74, 0x6D].pack('C4') # pitm marker
|
|
26
|
+
ITEM_PROPERTIES_ASSOCIATION_BOX = [0x69, 0x70, 0x6D, 0x61].pack('C4') # ipma marker
|
|
27
|
+
IMAGE_ROTATION_BOX = [0x69, 0x72, 0x6F, 0x74].pack('C4') # irot marker
|
|
28
|
+
HEADER_LENGTH = 8 # every box header has a length of 8 bytes
|
|
29
|
+
HEIC_MIME_POSSIBLE_TYPES = {
|
|
30
|
+
'heic' => :heic,
|
|
31
|
+
'heix' => :heix,
|
|
32
|
+
'heim' => :heim,
|
|
33
|
+
'heis' => :heis
|
|
34
|
+
}
|
|
35
|
+
HEIC_MIME_TYPE = 'image/heic'
|
|
36
|
+
HEIF_MIME_TYPE = 'image/heif'
|
|
37
|
+
# TODO: use the following when adding image-sequence parsing
|
|
38
|
+
# HEIC_SEQUENCE_MIME_TYPE = 'image/heic-sequence'
|
|
39
|
+
# HEIF_SEQUENCE_MIME_TYPE = 'image/heif-sequence'
|
|
40
|
+
|
|
41
|
+
def self.call(io)
|
|
42
|
+
new.call(io)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def call(io)
|
|
46
|
+
@buf = FormatParser::IOConstraint.new(io)
|
|
47
|
+
@format = nil
|
|
48
|
+
@@major_brand = nil
|
|
49
|
+
@width = nil
|
|
50
|
+
@height = nil
|
|
51
|
+
@exif_data_frames = []
|
|
52
|
+
@compatible_brands = nil
|
|
53
|
+
@metadata_start_pos = 0
|
|
54
|
+
@metadata_end_pos = 0
|
|
55
|
+
@handler_type = nil
|
|
56
|
+
@sub_items = nil
|
|
57
|
+
@pixel_aspect_ratio = nil
|
|
58
|
+
@colour_info = nil
|
|
59
|
+
@pixel_info = nil
|
|
60
|
+
@horizontal_offset = nil
|
|
61
|
+
@vertical_offset = nil
|
|
62
|
+
@clean_aperture = nil
|
|
63
|
+
@primary_item_id = 0
|
|
64
|
+
@item_props = {}
|
|
65
|
+
@rotation = 0
|
|
66
|
+
@item_props_idxs = []
|
|
67
|
+
@content_type = nil
|
|
68
|
+
scan
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def scan
|
|
72
|
+
# All HEIC files must be conform to ISO/IEC 23008-12:2017
|
|
73
|
+
# Moreover, all HEIC files are conform to ISO/IEC 14496-12:2015 and should be conform to the Clause 4 of such spec.
|
|
74
|
+
# Files are formed as a series of objects, called boxes. All data is contained in such boxes.
|
|
75
|
+
# All boxes start with a header which defines both size and type.
|
|
76
|
+
# The size is the entire size of the box, including the size and type header, fields, and all contained boxes.
|
|
77
|
+
# The fields in the objects are stored with the most significant byte first, commonly known as network byte order or big-endian format.
|
|
78
|
+
# A HEIC file must contain a File Type Box (ftyp).
|
|
79
|
+
# A file conforms to all the requirements of the brands listed in the compatible_brands.
|
|
80
|
+
scan_file_type_box
|
|
81
|
+
|
|
82
|
+
# file may be identified by MIME type of Annex C of ISO/IEC 23008-12 if 'mif1' is the major brand or Annex D if 'msf1' is the major brand.
|
|
83
|
+
# the MIME indicates the nature and format of our assortment of bytes
|
|
84
|
+
# note particularly that the brand 'mif1' doesn't mandate a MovieBox ("moov").
|
|
85
|
+
# One or more brands must be included in the list of compatible brands
|
|
86
|
+
return if @compatible_brands.nil?
|
|
87
|
+
if @compatible_brands&.include?(MIF1_MARKER)
|
|
88
|
+
scan_meta_level_box
|
|
89
|
+
if @major_brand == MIF1_MARKER
|
|
90
|
+
@content_type = HEIF_MIME_TYPE
|
|
91
|
+
@format = :heif
|
|
92
|
+
elsif (@compatible_brands & HEIC_MIME_POSSIBLE_TYPES.keys).length > 0
|
|
93
|
+
@format = :heic
|
|
94
|
+
@content_type = HEIC_MIME_TYPE
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
if @compatible_brands&.include?(MSF1_MARKER)
|
|
98
|
+
# TODO
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
result = FormatParser::Image.new(
|
|
102
|
+
format: @format,
|
|
103
|
+
width_px: @width,
|
|
104
|
+
height_px: @height,
|
|
105
|
+
intrinsics: {
|
|
106
|
+
compatible_brands: @compatible_brands,
|
|
107
|
+
handler_type: @handler_type,
|
|
108
|
+
# 'sub_items': @sub_items, # enable this if you want to output all the sub-items in the image
|
|
109
|
+
pixel_aspect_ratio: @pixel_aspect_ratio,
|
|
110
|
+
colour_info: @colour_info,
|
|
111
|
+
pixel_info: @pixel_info,
|
|
112
|
+
horizontal_offset: @horizontal_offset,
|
|
113
|
+
vertical_offset: @vertical_offset,
|
|
114
|
+
clean_aperture: @clean_aperture,
|
|
115
|
+
rotation: @rotation
|
|
116
|
+
},
|
|
117
|
+
content_type: @content_type
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
result
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def scan_file_type_box
|
|
124
|
+
file_type_box_length = read_int_32
|
|
125
|
+
return unless read_string(4) == FILE_TYPE_BOX_MARKER
|
|
126
|
+
@major_brand = read_string(4)
|
|
127
|
+
return unless @major_brand == HEIF_MARKER || @major_brand == MIF1_MARKER
|
|
128
|
+
read_string(4) # minor_brand
|
|
129
|
+
|
|
130
|
+
# Subtracting from the total length of the box specified in the header the size header itself (8 bytes = header length and length of ftyp)
|
|
131
|
+
# and the length of the major and minor brand, we obtain the compatible brands
|
|
132
|
+
data_left_length = file_type_box_length - HEADER_LENGTH - HEIF_MARKER.length - 4
|
|
133
|
+
|
|
134
|
+
@compatible_brands = []
|
|
135
|
+
(data_left_length / 4).times do
|
|
136
|
+
@compatible_brands << read_string(4)
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def scan_meta_level_box
|
|
141
|
+
metadata_length = read_int_32
|
|
142
|
+
return unless read_string(4) == META_BOX_MARKER
|
|
143
|
+
@metadata_start_pos = @buf.pos
|
|
144
|
+
@metadata_end_pos = @buf.pos + metadata_length - HEADER_LENGTH # the real data is always without the 8 initial bytes of the handler
|
|
145
|
+
read_nil_version_and_flag
|
|
146
|
+
|
|
147
|
+
# we are looking for box/containers right beneath the Meta box
|
|
148
|
+
# we start with the HDLR (Handler) box..
|
|
149
|
+
handler_length = read_int_32
|
|
150
|
+
return unless read_string(4) == HANDLER_MARKER
|
|
151
|
+
handler_length -= HEADER_LENGTH # subtract the header as usual (will not be mentioned anymore from now on)
|
|
152
|
+
handler_start = @buf.pos
|
|
153
|
+
# the handler type declares the type of metadata and thus the process by which the media-data in the track is presented
|
|
154
|
+
# it also indicates the structure or format of the ‘meta’ box contents
|
|
155
|
+
read_nil_version_and_flag
|
|
156
|
+
read_string(4) # pre_defined bytes, always 4 null bytes in the hdlr box
|
|
157
|
+
@handler_type = read_string(4)
|
|
158
|
+
@buf.seek(handler_start + handler_length) # the remaining part is reserved
|
|
159
|
+
|
|
160
|
+
# ..continue looking for the IINF box and especially for the IPRP box, containing info about the image itself
|
|
161
|
+
next_box_length = read_int_32
|
|
162
|
+
next_box = read_string(4)
|
|
163
|
+
next_box_start_pos = @buf.pos
|
|
164
|
+
while @buf.pos < @metadata_end_pos # we iterate over all next incoming boxed but without going outside the meta-box
|
|
165
|
+
case next_box
|
|
166
|
+
when PRIMARY_ITEM_BOX
|
|
167
|
+
read_primary_item_box
|
|
168
|
+
when ITEM_INFO_BOX
|
|
169
|
+
read_item_info_box
|
|
170
|
+
when ITEM_PROPERTIES_BOX
|
|
171
|
+
read_item_properties_box
|
|
172
|
+
fill_primary_values
|
|
173
|
+
when next_box == ''
|
|
174
|
+
break
|
|
175
|
+
end
|
|
176
|
+
next_box_length, next_box, next_box_start_pos = get_next_box(next_box_start_pos, next_box_length, @metadata_end_pos)
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def read_item_info_box
|
|
181
|
+
version = read_int_8
|
|
182
|
+
safe_skip(@buf, 3) # 0 flags
|
|
183
|
+
entry_count = if version == 0
|
|
184
|
+
read_int_16
|
|
185
|
+
else
|
|
186
|
+
read_int_32
|
|
187
|
+
end
|
|
188
|
+
@sub_items = []
|
|
189
|
+
entry_count.times {
|
|
190
|
+
item_info_entry_length = read_int_32
|
|
191
|
+
return unless read_string(4) == ITEM_INFO_ENTRY
|
|
192
|
+
item_info_end_pos = @buf.pos + item_info_entry_length - HEADER_LENGTH
|
|
193
|
+
version = read_int_8
|
|
194
|
+
safe_skip(@buf, 3) # 0 flags
|
|
195
|
+
case version
|
|
196
|
+
when 2
|
|
197
|
+
item_id = read_int_16
|
|
198
|
+
when 3
|
|
199
|
+
item_id = read_int_32
|
|
200
|
+
else
|
|
201
|
+
return # wrong version according to standards, hence return
|
|
202
|
+
end
|
|
203
|
+
safe_skip(@buf, 2) # not interested in the item_protection_index
|
|
204
|
+
item_type = read_string(4)
|
|
205
|
+
content_encoding = ''
|
|
206
|
+
if item_type == MIME_MARKER
|
|
207
|
+
content_encoding = read_string(item_info_end_pos - @buf.pos).delete!("\0") # remove the null-termination part for output visualization reason
|
|
208
|
+
end
|
|
209
|
+
@sub_items << {item_id: item_id, item_type: item_type, content_encoding: content_encoding}
|
|
210
|
+
@buf.seek(item_info_end_pos) # we are not interested in anything else, go directly to the end of this 'infe' box
|
|
211
|
+
}
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def read_nil_version_and_flag
|
|
215
|
+
safe_skip(@buf, 1) # version, always 0 in this current box
|
|
216
|
+
safe_skip(@buf, 3) # flags, always 0 in this current box
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def read_primary_item_box
|
|
220
|
+
version = read_int_8
|
|
221
|
+
safe_read(@buf, 3) # flags, always 0 in this current box
|
|
222
|
+
@primary_item_id = if version == 0
|
|
223
|
+
read_int_16
|
|
224
|
+
else
|
|
225
|
+
read_int_32
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# the ITEM_PROPERTIES_CONTAINER_BOX contains an implicitely 1-based index list of item properties.
|
|
230
|
+
# While parsing such box we are storing the properties with its own index.
|
|
231
|
+
# Reason behind is that the primary_item will be associated to some of these properties through the same index
|
|
232
|
+
# and in order to output relevant data from the format_parser we need all the properties associated to the primary_item.
|
|
233
|
+
# Hence the need of the association between an item and its properties, found in the ITEM_PROPERTIES_ASSOCIATION_BOX
|
|
234
|
+
def read_item_properties_box
|
|
235
|
+
ipco_length = read_int_32
|
|
236
|
+
return unless read_string(4) == ITEM_PROPERTIES_CONTAINER_BOX
|
|
237
|
+
read_item_properties_container_box(ipco_length)
|
|
238
|
+
read_int_32 # ipma_length
|
|
239
|
+
return unless read_string(4) == ITEM_PROPERTIES_ASSOCIATION_BOX
|
|
240
|
+
read_item_properties_association_box
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def read_item_properties_container_box(box_length)
|
|
244
|
+
end_of_ipco_box = @buf.pos + box_length - HEADER_LENGTH
|
|
245
|
+
item_prop_length = read_int_32
|
|
246
|
+
item_prop_name = read_string(4)
|
|
247
|
+
item_prop_start_pos = @buf.pos
|
|
248
|
+
item_prop_index = 1
|
|
249
|
+
while @buf.pos < end_of_ipco_box
|
|
250
|
+
case item_prop_name
|
|
251
|
+
when IMAGE_SPATIAL_EXTENTS_BOX
|
|
252
|
+
read_nil_version_and_flag
|
|
253
|
+
width = read_int_32
|
|
254
|
+
height = read_int_32
|
|
255
|
+
@item_props[item_prop_index] = {
|
|
256
|
+
type: IMAGE_SPATIAL_EXTENTS_BOX,
|
|
257
|
+
width: width,
|
|
258
|
+
height: height
|
|
259
|
+
}
|
|
260
|
+
when PIXEL_ASPECT_RATIO_BOX
|
|
261
|
+
h_spacing = read_int_32
|
|
262
|
+
v_spacing = read_int_32
|
|
263
|
+
pixel_aspect_ratio = "#{h_spacing}/#{v_spacing}"
|
|
264
|
+
@item_props[item_prop_index] = {
|
|
265
|
+
type: PIXEL_ASPECT_RATIO_BOX,
|
|
266
|
+
pixel_aspect_ratio: pixel_aspect_ratio
|
|
267
|
+
}
|
|
268
|
+
when COLOUR_INFO_BOX
|
|
269
|
+
colour_info = {
|
|
270
|
+
colour_primaries: read_int_16,
|
|
271
|
+
transfer_characteristics: read_int_16,
|
|
272
|
+
matrix_coefficients: read_int_16
|
|
273
|
+
}
|
|
274
|
+
@item_props[item_prop_index] = {
|
|
275
|
+
type: COLOUR_INFO_BOX,
|
|
276
|
+
colour_info: colour_info
|
|
277
|
+
}
|
|
278
|
+
when PIXEL_INFO_BOX
|
|
279
|
+
pixel_info = []
|
|
280
|
+
read_nil_version_and_flag
|
|
281
|
+
num_channels = read_int_8
|
|
282
|
+
channel = 1
|
|
283
|
+
while channel <= num_channels
|
|
284
|
+
channel += 1
|
|
285
|
+
pixel_info << {
|
|
286
|
+
"bits_in_channel_#{channel}": read_int_8
|
|
287
|
+
}
|
|
288
|
+
end
|
|
289
|
+
@item_props[item_prop_index] = {
|
|
290
|
+
type: PIXEL_INFO_BOX,
|
|
291
|
+
pixel_info: pixel_info
|
|
292
|
+
}
|
|
293
|
+
when RELATIVE_LOCATION_BOX
|
|
294
|
+
read_nil_version_and_flag
|
|
295
|
+
horizontal_offset = read_int_32
|
|
296
|
+
vertical_offset = read_int_32
|
|
297
|
+
@item_props[item_prop_index] = {
|
|
298
|
+
type: RELATIVE_LOCATION_BOX,
|
|
299
|
+
horizontal_offset: horizontal_offset,
|
|
300
|
+
vertical_offset: vertical_offset
|
|
301
|
+
}
|
|
302
|
+
when CLEAN_APERTURE_BOX
|
|
303
|
+
clean_aperture = []
|
|
304
|
+
clean_aperture << {
|
|
305
|
+
clean_aperture_width_n: read_int_32,
|
|
306
|
+
clean_aperture_width_d: read_int_32,
|
|
307
|
+
clean_aperture_height_n: read_int_32,
|
|
308
|
+
clean_aperture_height_d: read_int_32,
|
|
309
|
+
horiz_off_n: read_int_32,
|
|
310
|
+
horiz_off_d: read_int_32,
|
|
311
|
+
vert_off_n: read_int_32,
|
|
312
|
+
vert_off_d: read_int_32
|
|
313
|
+
}
|
|
314
|
+
@item_props[item_prop_index] = {
|
|
315
|
+
type: CLEAN_APERTURE_BOX,
|
|
316
|
+
clean_aperture: clean_aperture
|
|
317
|
+
}
|
|
318
|
+
when IMAGE_ROTATION_BOX
|
|
319
|
+
read_nil_version_and_flag
|
|
320
|
+
binary = convert_byte_to_binary(read_int_8)
|
|
321
|
+
# we need only the last 2 bits to retrieve the angle multiplier. angle multiplier * 90 specifies the angle
|
|
322
|
+
rotation = binary.slice(6, 2).join.to_i(2) * 90
|
|
323
|
+
@item_props[item_prop_index] = {
|
|
324
|
+
type: IMAGE_ROTATION_BOX,
|
|
325
|
+
rotation: rotation
|
|
326
|
+
}
|
|
327
|
+
end
|
|
328
|
+
item_prop_length, item_prop_name, item_prop_start_pos = get_next_box(item_prop_start_pos, item_prop_length, end_of_ipco_box)
|
|
329
|
+
item_prop_index += 1
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def read_item_properties_association_box
|
|
334
|
+
version = read_int_8
|
|
335
|
+
safe_read(@buf, 2) # we skip the first 2 bytes of the flags (total of 3 bytes) cause we care only about the least significant bit
|
|
336
|
+
flags = read_int_8
|
|
337
|
+
entry_count = read_int_32
|
|
338
|
+
item_id = 0
|
|
339
|
+
entry_count.times do
|
|
340
|
+
item_id = if version == 0
|
|
341
|
+
read_int_16
|
|
342
|
+
else
|
|
343
|
+
read_int_32
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
association_count = read_int_8
|
|
347
|
+
association_count.times do
|
|
348
|
+
# we need to retrieve the "essential" bit wich is just the first bit in the next byte
|
|
349
|
+
binary = convert_byte_to_binary(read_int_8)
|
|
350
|
+
# essential_bit = binary[0] # uncomment if needed
|
|
351
|
+
binary.concat(convert_byte_to_binary(read_int_8)) if (flags & 1) == 1 # if flag is 1 we need the next 15 bits instead of only the next 7 bits
|
|
352
|
+
# we need to nullify the 1st bit since that one was the essential bit and doesn't count now to calculate the property index
|
|
353
|
+
binary[0] = 0
|
|
354
|
+
item_property_index = binary.join.to_i(2)
|
|
355
|
+
# we are interested only in the primary item properties
|
|
356
|
+
@item_props_idxs << item_property_index if item_id == @primary_item_id
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
# we are interested only in the primary item
|
|
360
|
+
if item_id != @primary_item_id
|
|
361
|
+
next
|
|
362
|
+
else
|
|
363
|
+
return
|
|
364
|
+
end
|
|
365
|
+
end
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
def fill_primary_values
|
|
369
|
+
@item_props_idxs.each { |x|
|
|
370
|
+
next if @item_props[x].nil?
|
|
371
|
+
prop = @item_props[x]
|
|
372
|
+
case prop[:type]
|
|
373
|
+
when IMAGE_SPATIAL_EXTENTS_BOX
|
|
374
|
+
@width = prop[:width]
|
|
375
|
+
@height = prop[:height]
|
|
376
|
+
when PIXEL_ASPECT_RATIO_BOX
|
|
377
|
+
@pixel_aspect_ratio = prop[:pixel_aspect_ratio]
|
|
378
|
+
when COLOUR_INFO_BOX
|
|
379
|
+
@colour_info = prop[:colour_info]
|
|
380
|
+
when PIXEL_INFO_BOX
|
|
381
|
+
@pixel_info = prop[:pixel_info]
|
|
382
|
+
when RELATIVE_LOCATION_BOX
|
|
383
|
+
@horizontal_offset = prop[:horizontal_offset]
|
|
384
|
+
@vertical_offset = prop[:vertical_offset]
|
|
385
|
+
when CLEAN_APERTURE_BOX
|
|
386
|
+
@clean_aperture = prop[:clean_aperture]
|
|
387
|
+
when IMAGE_ROTATION_BOX
|
|
388
|
+
@rotation = prop[:rotation]
|
|
389
|
+
end
|
|
390
|
+
}
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
def next_meaningful_meta_byte
|
|
394
|
+
while @buf.pos < @metadata_end_pos
|
|
395
|
+
next_byte = read_string(4)
|
|
396
|
+
return next_byte if meaningful?(next_byte)
|
|
397
|
+
end
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
def get_next_box(box_start_pos, box_length, end_pos_upper_box)
|
|
401
|
+
skip_pos = box_start_pos + box_length - HEADER_LENGTH
|
|
402
|
+
@buf.seek(skip_pos)
|
|
403
|
+
return if skip_pos >= end_pos_upper_box
|
|
404
|
+
next_box_length = read_int_32
|
|
405
|
+
next_box_name = read_string(4)
|
|
406
|
+
[next_box_length, next_box_name, @buf.pos]
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def meaningful?(byte)
|
|
410
|
+
byte != MEANINGLESS_BYTE
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
def convert_byte_to_binary(integer)
|
|
414
|
+
binary = []
|
|
415
|
+
while integer > 0
|
|
416
|
+
binary << integer % 2
|
|
417
|
+
integer /= 2
|
|
418
|
+
end
|
|
419
|
+
binary_value = binary.reverse
|
|
420
|
+
(8 - binary_value.length).times do
|
|
421
|
+
binary_value.prepend('0')
|
|
422
|
+
end
|
|
423
|
+
binary_value
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def likely_match?(filename)
|
|
427
|
+
filename =~ /\.hei[cf]$/i
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
FormatParser.register_parser(new, natures: :image, formats: [:heif, :heic], priority: 2)
|
|
431
|
+
end
|
|
@@ -130,6 +130,23 @@ class FormatParser::MOOVParser::Decoder
|
|
|
130
130
|
stts
|
|
131
131
|
end
|
|
132
132
|
|
|
133
|
+
def parse_stsd_atom(io, _)
|
|
134
|
+
version = read_byte_value(io)
|
|
135
|
+
is_v1 = version == 1
|
|
136
|
+
stsd = {
|
|
137
|
+
version: version,
|
|
138
|
+
flags: read_bytes(io, 3),
|
|
139
|
+
number_of_entries: is_v1 ? read_64bit_uint(io) : read_32bit_uint(io),
|
|
140
|
+
codecs: []
|
|
141
|
+
}
|
|
142
|
+
stsd[:number_of_entries].times {
|
|
143
|
+
codec_length = read_32bit_uint(io)
|
|
144
|
+
stsd[:codecs] << read_bytes(io, 4)
|
|
145
|
+
io.seek(io.pos + codec_length - 8) # 8 bytes is the header length containing the codec length and the codec name that we just did read
|
|
146
|
+
}
|
|
147
|
+
stsd
|
|
148
|
+
end
|
|
149
|
+
|
|
133
150
|
def parse_mdhd_atom(io, _)
|
|
134
151
|
version = read_byte_value(io)
|
|
135
152
|
is_v1 = version == 1
|
data/lib/parsers/moov_parser.rb
CHANGED
|
@@ -64,14 +64,14 @@ class FormatParser::MOOVParser
|
|
|
64
64
|
intrinsics: atom_tree,
|
|
65
65
|
)
|
|
66
66
|
else
|
|
67
|
-
frame_rate = parse_sample_atom(decoder, atom_tree)&.truncate(2)
|
|
68
67
|
FormatParser::Video.new(
|
|
69
68
|
format: format_from_moov_type(file_type),
|
|
70
69
|
width_px: width,
|
|
71
70
|
height_px: height,
|
|
72
|
-
frame_rate:
|
|
71
|
+
frame_rate: parse_time_to_sample_atom(decoder, atom_tree)&.truncate(2),
|
|
73
72
|
media_duration_seconds: media_duration_s,
|
|
74
73
|
content_type: MP4_MIXED_MIME_TYPE,
|
|
74
|
+
codecs: parse_sample_description_atom(decoder, atom_tree),
|
|
75
75
|
intrinsics: atom_tree
|
|
76
76
|
)
|
|
77
77
|
end
|
|
@@ -119,7 +119,7 @@ class FormatParser::MOOVParser
|
|
|
119
119
|
|
|
120
120
|
# Sample information is found in the 'time-to-sample' stts atom.
|
|
121
121
|
# The media atom mdhd is needed too in order to get the movie timescale
|
|
122
|
-
def
|
|
122
|
+
def parse_time_to_sample_atom(decoder, atom_tree)
|
|
123
123
|
video_trak_atom = decoder.find_video_trak_atom(atom_tree)
|
|
124
124
|
|
|
125
125
|
stts = if video_trak_atom
|
|
@@ -136,7 +136,7 @@ class FormatParser::MOOVParser
|
|
|
136
136
|
|
|
137
137
|
if stts && mdhd
|
|
138
138
|
timescale = mdhd.atom_fields[:tscale]
|
|
139
|
-
sample_duration = stts.field_value(:entries).
|
|
139
|
+
sample_duration = stts.field_value(:entries).dig(0, :sample_duration)
|
|
140
140
|
if timescale.nil? || timescale == 0 || sample_duration.nil? || sample_duration == 0
|
|
141
141
|
nil
|
|
142
142
|
else
|
|
@@ -147,5 +147,21 @@ class FormatParser::MOOVParser
|
|
|
147
147
|
end
|
|
148
148
|
end
|
|
149
149
|
|
|
150
|
-
|
|
150
|
+
def parse_sample_description_atom(decoder, atom_tree)
|
|
151
|
+
video_trak_atom = decoder.find_video_trak_atom(atom_tree)
|
|
152
|
+
|
|
153
|
+
stsd = if video_trak_atom
|
|
154
|
+
decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'mdia', 'minf', 'stbl', 'stsd')
|
|
155
|
+
else
|
|
156
|
+
decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'mdia', 'minf', 'stbl', 'stsd')
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
if stsd
|
|
160
|
+
stsd.field_value(:codecs)
|
|
161
|
+
else
|
|
162
|
+
nil
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
FormatParser.register_parser new, natures: :video, formats: FTYP_MAP.values, priority: 3
|
|
151
167
|
end
|
data/lib/parsers/pdf_parser.rb
CHANGED
|
@@ -21,5 +21,5 @@ class FormatParser::PDFParser
|
|
|
21
21
|
FormatParser::Document.new(format: :pdf, content_type: PDF_CONTENT_TYPE)
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
FormatParser.register_parser new, natures: :document, formats: :pdf, priority:
|
|
24
|
+
FormatParser.register_parser new, natures: :document, formats: :pdf, priority: 3
|
|
25
25
|
end
|
data/lib/parsers/zip_parser.rb
CHANGED
data/lib/video.rb
CHANGED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe FormatParser::HEIFParser do
|
|
4
|
+
it 'is able to parse single heif image with heic major brand' do
|
|
5
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage.heic'
|
|
6
|
+
|
|
7
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
|
8
|
+
expect(result).not_to be_nil
|
|
9
|
+
expect(result.nature).to eq(:image)
|
|
10
|
+
expect(result.format).to eq(:heic)
|
|
11
|
+
expect(result.width_px).to eq(4000)
|
|
12
|
+
expect(result.height_px).to eq(3000)
|
|
13
|
+
expect(result.content_type).to eq('image/heic')
|
|
14
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it 'is able to parse single heif image with mif1 major brand' do
|
|
18
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage_Autumn.heic'
|
|
19
|
+
|
|
20
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
|
21
|
+
expect(result).not_to be_nil
|
|
22
|
+
expect(result.nature).to eq(:image)
|
|
23
|
+
expect(result.format).to eq(:heif)
|
|
24
|
+
expect(result.width_px).to eq(1440)
|
|
25
|
+
expect(result.height_px).to eq(960)
|
|
26
|
+
expect(result.content_type).to eq('image/heif')
|
|
27
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it 'is able to parse image collection with mif1 major brand' do
|
|
31
|
+
heif_path = fixtures_dir + 'HEIF/ImageCollection.heic'
|
|
32
|
+
|
|
33
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
|
34
|
+
expect(result).not_to be_nil
|
|
35
|
+
expect(result.nature).to eq(:image)
|
|
36
|
+
expect(result.format).to eq(:heif)
|
|
37
|
+
expect(result.width_px).to eq(1440)
|
|
38
|
+
expect(result.height_px).to eq(960)
|
|
39
|
+
expect(result.content_type).to eq('image/heif')
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it 'is able to parse image collection with colour info' do
|
|
43
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage_Autumn_WithColourInfo.heic'
|
|
44
|
+
|
|
45
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
|
46
|
+
expect(result).not_to be_nil
|
|
47
|
+
expect(result.nature).to eq(:image)
|
|
48
|
+
expect(result.format).to eq(:heic)
|
|
49
|
+
expect(result.width_px).to eq(1440)
|
|
50
|
+
expect(result.height_px).to eq(960)
|
|
51
|
+
colour_info = result.intrinsics[:colour_info]
|
|
52
|
+
expect(colour_info[:colour_primaries]).to eq(28259)
|
|
53
|
+
expect(colour_info[:transfer_characteristics]).to eq(27768)
|
|
54
|
+
expect(colour_info[:matrix_coefficients]).to eq(2)
|
|
55
|
+
expect(result.content_type).to eq('image/heic')
|
|
56
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it 'is able to parse image collection with pixel info' do
|
|
60
|
+
heif_path = fixtures_dir + 'HEIF/SingleImage_Autumn_WithColourInfo.heic'
|
|
61
|
+
|
|
62
|
+
result = subject.call(File.open(heif_path, 'rb'))
|
|
63
|
+
expect(result).not_to be_nil
|
|
64
|
+
expect(result.nature).to eq(:image)
|
|
65
|
+
expect(result.format).to eq(:heic)
|
|
66
|
+
expect(result.width_px).to eq(1440)
|
|
67
|
+
expect(result.height_px).to eq(960)
|
|
68
|
+
pixel_info = result.intrinsics[:pixel_info]
|
|
69
|
+
expect(pixel_info[0][:bits_in_channel_2]).to eq(8)
|
|
70
|
+
expect(pixel_info[1][:bits_in_channel_3]).to eq(8)
|
|
71
|
+
expect(pixel_info[2][:bits_in_channel_4]).to eq(8)
|
|
72
|
+
expect(result.content_type).to eq('image/heic')
|
|
73
|
+
expect(result.intrinsics[:compatible_brands].should =~ ['mif1', 'heic'])
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -94,6 +94,7 @@ describe FormatParser::MOOVParser do
|
|
|
94
94
|
expect(result.format).to eq(:mov)
|
|
95
95
|
expect(result.width_px).to eq(1920)
|
|
96
96
|
expect(result.height_px).to eq(1080)
|
|
97
|
+
expect(result.codecs).to eq(['apcn'])
|
|
97
98
|
end
|
|
98
99
|
|
|
99
100
|
it 'parses an MP4 video file and provides the necessary metadata' do
|
|
@@ -107,6 +108,7 @@ describe FormatParser::MOOVParser do
|
|
|
107
108
|
expect(result.width_px).to eq(160)
|
|
108
109
|
expect(result.height_px).to eq(90)
|
|
109
110
|
expect(result.frame_rate).to eq(14.98)
|
|
111
|
+
expect(result.codecs).to eq(['avc1'])
|
|
110
112
|
end
|
|
111
113
|
|
|
112
114
|
it 'provides filename hints' do
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: format_parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Noah Berman
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: exe
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2022-
|
|
12
|
+
date: 2022-05-31 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: ks
|
|
@@ -238,6 +238,7 @@ files:
|
|
|
238
238
|
- lib/parsers/fdx_parser.rb
|
|
239
239
|
- lib/parsers/flac_parser.rb
|
|
240
240
|
- lib/parsers/gif_parser.rb
|
|
241
|
+
- lib/parsers/heif_parser.rb
|
|
241
242
|
- lib/parsers/jpeg_parser.rb
|
|
242
243
|
- lib/parsers/m3u_parser.rb
|
|
243
244
|
- lib/parsers/moov_parser.rb
|
|
@@ -278,6 +279,7 @@ files:
|
|
|
278
279
|
- spec/parsers/fdx_parser_spec.rb
|
|
279
280
|
- spec/parsers/flac_parser_spec.rb
|
|
280
281
|
- spec/parsers/gif_parser_spec.rb
|
|
282
|
+
- spec/parsers/heif_parser_spec.rb
|
|
281
283
|
- spec/parsers/jpeg_parser_spec.rb
|
|
282
284
|
- spec/parsers/m3u_parser_spec.rb
|
|
283
285
|
- spec/parsers/moov_parser_spec.rb
|
|
@@ -315,7 +317,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
315
317
|
- !ruby/object:Gem::Version
|
|
316
318
|
version: '0'
|
|
317
319
|
requirements: []
|
|
318
|
-
rubygems_version: 3.
|
|
320
|
+
rubygems_version: 3.1.6
|
|
319
321
|
signing_key:
|
|
320
322
|
specification_version: 4
|
|
321
323
|
summary: A library for efficient parsing of file metadata
|