format_parser 2.1.0 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/README.md +80 -68
- data/lib/format_parser/version.rb +1 -1
- data/lib/io_utils.rb +30 -6
- data/lib/parsers/cr3_parser/decoder.rb +35 -0
- data/lib/parsers/cr3_parser.rb +51 -0
- data/lib/parsers/heif_parser.rb +7 -7
- data/lib/parsers/iso_base_media_file_format/decoder.rb +1041 -0
- data/lib/parsers/jpeg_parser.rb +1 -1
- data/lib/parsers/moov_parser/decoder.rb +1 -1
- data/lib/parsers/moov_parser.rb +9 -9
- data/lib/parsers/webp_parser.rb +6 -6
- data/spec/format_parser_spec.rb +3 -1
- data/spec/parsers/cr3_parser_spec.rb +58 -0
- data/spec/parsers/iso_base_media_file_format/decoder_spec.rb +242 -0
- metadata +11 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 00a7541231e6ab1309b8fc318595725c4d0ec6de6a5732973b8ed0ac5e3b6393
|
4
|
+
data.tar.gz: fbd9d67e3cec87e474519bdade15a700694118f75f3ede8757fcb0b0657423f5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05a601a35d26db59f1df83f21fbe96fceff0e96c96ec9f958aa94326a34e95c4c4c585a36182b7c392eb205f5a433758eb1f45df2d726bb2402a89015ac26279
|
7
|
+
data.tar.gz: e9c935e201680e6740ef764b4836ecb3adddfb7559aab5e78f8a728c1e52bce0e784862a2cf5b73b05c8a3e23c1efcac8d402304dada21d4a404011d403fb604
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1
|
+
## 2.2.0
|
2
|
+
* Add support for `CR3` files.
|
3
|
+
* Add ISO base file format decoding functionality.
|
4
|
+
|
1
5
|
## 2.1.0
|
2
|
-
* Require
|
6
|
+
* Require minimum 2.6 ruby version.
|
3
7
|
* Bring back 2.6 to test matrix, we have jruby there which is still compatible with 2.6
|
4
8
|
* Drop `ks` dependency.
|
5
9
|
|
data/README.md
CHANGED
@@ -12,29 +12,37 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
12
12
|
|
13
13
|
## Currently supported filetypes:
|
14
14
|
|
15
|
-
*
|
15
|
+
* AAC
|
16
|
+
* AIFF
|
17
|
+
* ARW
|
18
|
+
* BMP
|
16
19
|
* CR2
|
17
|
-
*
|
18
|
-
*
|
19
|
-
* PNG
|
20
|
-
* MP3
|
21
|
-
* JPEG
|
22
|
-
* GIF
|
23
|
-
* PDF
|
20
|
+
* CR3
|
21
|
+
* DOCX
|
24
22
|
* DPX
|
25
|
-
* AIFF
|
26
|
-
* WAV
|
27
|
-
* FLAC
|
28
23
|
* FDX
|
24
|
+
* FLAC
|
25
|
+
* GIF
|
26
|
+
* HEIC
|
27
|
+
* HEIF
|
28
|
+
* JPEG
|
29
|
+
* M3U
|
30
|
+
* M4A
|
29
31
|
* MOV
|
32
|
+
* MP3
|
30
33
|
* MP4
|
31
|
-
*
|
32
|
-
*
|
33
|
-
* DOCX, PPTX, XLSX
|
34
|
+
* MPEG
|
35
|
+
* NEF
|
34
36
|
* OGG
|
35
|
-
*
|
36
|
-
*
|
37
|
+
* PDF
|
38
|
+
* PNG
|
39
|
+
* PPTX
|
40
|
+
* PSD
|
41
|
+
* TIFF
|
42
|
+
* WAV
|
37
43
|
* WEBP
|
44
|
+
* XLSX
|
45
|
+
* ZIP
|
38
46
|
|
39
47
|
...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
|
40
48
|
|
@@ -132,23 +140,6 @@ Krists Ozols for [id3tag](https://github.com/krists/id3tag) that we are using fo
|
|
132
140
|
|
133
141
|
Unless specified otherwise in this section the fixture files are MIT licensed and from the FastImage and Dimensions projects.
|
134
142
|
|
135
|
-
### JPEG
|
136
|
-
- `divergent_pixel_dimensions_exif.jpg` is used with permission from LiveKom GmbH
|
137
|
-
- `extended_reads.jpg` has kindly been made available by Raphaelle Pellerin for use exclusively with format_parser
|
138
|
-
- `too_many_APP1_markers_surrogate.jpg` was created by the project maintainers
|
139
|
-
* `orient_6.jpg` is used with permission from [Renaud Chaput](https://github.com/renchap)
|
140
|
-
|
141
|
-
### AIFF
|
142
|
-
- fixture.aiff was created by one of the project maintainers and is MIT licensed
|
143
|
-
|
144
|
-
### WAV
|
145
|
-
- c_11k16bitpcm.wav and c_8kmp316.wav are from [Wikipedia WAV](https://en.wikipedia.org/wiki/WAV#Comparison_of_coding_schemes), retrieved January 7, 2018
|
146
|
-
- c_39064__alienbomb__atmo-truck.wav is from [freesound](https://freesound.org/people/alienbomb/sounds/39064/) and is CC0 licensed
|
147
|
-
- c_M1F1-Alaw-AFsp.wav and d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)
|
148
|
-
|
149
|
-
### MP3
|
150
|
-
- Cassy.mp3 has been produced by WeTransfer and may be used with the library for the purposes of testing
|
151
|
-
|
152
143
|
### AAC
|
153
144
|
- Originals music files: “Furious Freak” and “Galway”, Kevin MacLeod (incompetech.com), Licensed under Creative Commons: By Attribution 3.0, http://creativecommons.org/licenses/by/3.0/
|
154
145
|
- The AAC samples were converted from 'wav' format and made available [here](https://espressif-docs.readthedocs-hosted.com/projects/esp-adf/en/latest/design-guide/audio-samples.html) by Espressif Systems, as part of their audio development framework (under the ESPRESSIF MIT License).
|
@@ -157,69 +148,90 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
157
148
|
- ff-16b-1c-44100hz.aac
|
158
149
|
- gs-16b-2c-44100hz.aac
|
159
150
|
- gs-16b-1c-44100hz.aac
|
160
|
-
### FDX
|
161
|
-
- fixture.fdx was created by one of the project maintainers and is MIT licensed
|
162
151
|
|
163
|
-
###
|
164
|
-
-
|
152
|
+
### AIFF
|
153
|
+
- fixture.aiff was created by one of the project maintainers and is MIT licensed
|
165
154
|
|
166
|
-
###
|
167
|
-
-
|
168
|
-
- Test_Circular MOV files were created by one of the project maintainers and are MIT licensed
|
155
|
+
### ARW
|
156
|
+
- ARW examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
169
157
|
|
170
158
|
### CR2
|
171
159
|
- CR2 examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
172
160
|
|
173
|
-
###
|
174
|
-
-
|
161
|
+
### CR3
|
162
|
+
- CR3 examples are downloaded from https://raw.pixls.us/ and are in the public domain (CC0).
|
163
|
+
|
164
|
+
### DOCX
|
165
|
+
- The .docx files were generated by the project maintainers
|
166
|
+
|
167
|
+
### DPX
|
168
|
+
- DPX files were created by one of the project maintainers and may be used with the library for the purposes of testing
|
175
169
|
|
176
170
|
### ERF
|
177
171
|
- ERF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
178
172
|
|
173
|
+
### FDX
|
174
|
+
- fixture.fdx was created by one of the project maintainers and is MIT licensed
|
175
|
+
|
179
176
|
### FLAC
|
180
177
|
- atc_fixture_vbr.flac is a converted version of the MP3 with the same name
|
181
178
|
- c_11k16btipcm.flac is a converted version of the WAV with the same name
|
182
179
|
|
183
|
-
###
|
184
|
-
- `
|
180
|
+
### JPEG
|
181
|
+
- `divergent_pixel_dimensions_exif.jpg` is used with permission from LiveKom GmbH
|
182
|
+
- `extended_reads.jpg` has kindly been made available by Raphaelle Pellerin for use exclusively with format_parser
|
183
|
+
- `too_many_APP1_markers_surrogate.jpg` was created by the project maintainers
|
184
|
+
* `orient_6.jpg` is used with permission from [Renaud Chaput](https://github.com/renchap)
|
185
185
|
|
186
|
-
###
|
187
|
-
-
|
186
|
+
### JPEG (EXIF orientation)
|
187
|
+
- Downloaded from Unspash (and thus freely avaliable) - https://unsplash.com/license and have then been
|
188
|
+
manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
|
189
|
+
script.
|
188
190
|
|
189
|
-
###
|
190
|
-
- `
|
191
|
+
### KEY
|
192
|
+
- The `keynote_recognized_as_jpeg.key` file was created by the project maintainers
|
191
193
|
|
192
|
-
###
|
193
|
-
-
|
194
|
-
- `IMG_9266_*.tif` and all it's variations were created by the project maintainers
|
194
|
+
### M3U
|
195
|
+
- The M3U fixture files were created by one of the project maintainers
|
195
196
|
|
196
|
-
###
|
197
|
-
-
|
197
|
+
### M4A
|
198
|
+
- fixture.m4a was created by one of the project maintainers and is MIT licensed
|
198
199
|
|
199
|
-
###
|
200
|
-
-
|
200
|
+
### MOOV
|
201
|
+
- bmff.mp4 is borrowed from the [bmff](https://github.com/zuku/bmff) project
|
202
|
+
- Test_Circular MOV files were created by one of the project maintainers and are MIT licensed
|
201
203
|
|
202
|
-
###
|
203
|
-
-
|
204
|
+
### MP3
|
205
|
+
- Cassy.mp3 has been produced by WeTransfer and may be used with the library for the purposes of testing
|
204
206
|
|
205
|
-
###
|
207
|
+
### MPEG
|
206
208
|
- The files (video 1 to 4) were downloaded from https://standaloneinstaller.com/blog/big-list-of-sample-videos-for-testers-124.html.
|
207
209
|
- Video 5 was downloaded from https://archive.org/details/ligouHDR-HC1_sample1.
|
208
210
|
|
209
|
-
###
|
210
|
-
-
|
211
|
-
manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
|
212
|
-
script.
|
211
|
+
### NEF
|
212
|
+
- NEF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
213
213
|
|
214
|
-
###
|
215
|
-
-
|
214
|
+
### OGG
|
215
|
+
- `hi.ogg`, `vorbis.ogg`, `with_confusing_magic_string.ogg`, `with_garbage_at_the_end.ogg` have been generated by the project contributors
|
216
|
+
|
217
|
+
### PNG
|
218
|
+
- `simulator_screenie.png` provided by [Rens Verhoeven](https://github.com/renssies)
|
219
|
+
|
220
|
+
### TIFF
|
221
|
+
- `Shinbutsureijoushuincho.tiff` is obtained from Wikimedia Commons and is Creative Commons licensed
|
222
|
+
- `IMG_9266_*.tif` and all it's variations were created by the project maintainers
|
223
|
+
|
224
|
+
### WAV
|
225
|
+
- c_11k16bitpcm.wav and c_8kmp316.wav are from [Wikipedia WAV](https://en.wikipedia.org/wiki/WAV#Comparison_of_coding_schemes), retrieved January 7, 2018
|
226
|
+
- c_39064__alienbomb__atmo-truck.wav is from [freesound](https://freesound.org/people/alienbomb/sounds/39064/) and is CC0 licensed
|
227
|
+
- c_M1F1-Alaw-AFsp.wav and d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)
|
216
228
|
|
217
229
|
### WEBP
|
218
|
-
- With the exception of extended-animation.webp, which was obtained from Wikimedia Commons and is Creative Commons
|
219
|
-
licensed, all of the WebP fixture files have been created by one of the project maintainers.
|
230
|
+
- With the exception of extended-animation.webp, which was obtained from Wikimedia Commons and is Creative Commons
|
231
|
+
licensed, all of the WebP fixture files have been created by one of the project maintainers.
|
220
232
|
|
221
|
-
###
|
222
|
-
- The
|
233
|
+
### ZIP
|
234
|
+
- The .zip fixture files have been created by the project maintainers
|
223
235
|
|
224
236
|
## Copyright
|
225
237
|
|
data/lib/io_utils.rb
CHANGED
@@ -27,29 +27,53 @@ module FormatParser::IOUtils
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def read_int_8
|
30
|
-
|
30
|
+
read_bytes(1).unpack('C').first
|
31
31
|
end
|
32
32
|
|
33
33
|
def read_int_16
|
34
|
-
|
34
|
+
read_bytes(2).unpack('n').first
|
35
35
|
end
|
36
36
|
|
37
37
|
def read_int_32
|
38
|
-
|
38
|
+
read_bytes(4).unpack('N').first
|
39
|
+
end
|
40
|
+
|
41
|
+
def read_int_64
|
42
|
+
read_bytes(8).unpack('Q>').first
|
39
43
|
end
|
40
44
|
|
41
45
|
def read_little_endian_int_16
|
42
|
-
|
46
|
+
read_bytes(2).unpack('v').first
|
43
47
|
end
|
44
48
|
|
45
49
|
def read_little_endian_int_32
|
46
|
-
|
50
|
+
read_bytes(4).unpack('V').first
|
51
|
+
end
|
52
|
+
|
53
|
+
def read_fixed_point_16
|
54
|
+
read_bytes(2).unpack('C2')
|
55
|
+
end
|
56
|
+
|
57
|
+
def read_fixed_point_32
|
58
|
+
read_bytes(4).unpack('n2')
|
59
|
+
end
|
60
|
+
|
61
|
+
def read_fixed_point_32_2_30
|
62
|
+
n = read_int_32
|
63
|
+
[n >> 30, n & 0x3fffffff]
|
47
64
|
end
|
48
65
|
|
49
66
|
# 'n' is the number of bytes to read
|
50
|
-
def
|
67
|
+
def read_bytes(n)
|
51
68
|
safe_read(@buf, n)
|
52
69
|
end
|
53
70
|
|
71
|
+
alias_method :read_string, :read_bytes
|
72
|
+
|
73
|
+
def skip_bytes(n)
|
74
|
+
safe_skip(@buf, n)
|
75
|
+
yield if block_given?
|
76
|
+
end
|
77
|
+
|
54
78
|
### TODO: Some kind of built-in offset for the read
|
55
79
|
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'parsers/iso_base_media_file_format/decoder'
|
2
|
+
|
3
|
+
class FormatParser::CR3Parser::Decoder < FormatParser::ISOBaseMediaFileFormat::Decoder
|
4
|
+
include FormatParser::EXIFParser
|
5
|
+
|
6
|
+
protected
|
7
|
+
|
8
|
+
ATOM_PARSERS = ATOM_PARSERS.merge({
|
9
|
+
'CMT1' => :cmt1
|
10
|
+
})
|
11
|
+
CANON_METADATA_CONTAINER_UUID = '85c0b687820f11e08111f4ce462b6a48'
|
12
|
+
|
13
|
+
def cmt1(size)
|
14
|
+
exif = exif_from_tiff_io(StringIO.new(read_bytes(size)))
|
15
|
+
if exif
|
16
|
+
fields = exif.to_hash
|
17
|
+
fields[:rotated] = exif.rotated?
|
18
|
+
fields[:orientation_sym] = exif.orientation_sym
|
19
|
+
[fields, nil]
|
20
|
+
else
|
21
|
+
[nil, nil]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def uuid(size)
|
26
|
+
usertype = read_bytes(16).unpack('H*').first
|
27
|
+
fields = { usertype: usertype }
|
28
|
+
children = if usertype == CANON_METADATA_CONTAINER_UUID
|
29
|
+
build_atom_tree(size - 16)
|
30
|
+
else
|
31
|
+
skip_bytes(size - 16)
|
32
|
+
end
|
33
|
+
[fields, children]
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
class FormatParser::CR3Parser
|
2
|
+
include FormatParser::IOUtils
|
3
|
+
require_relative 'cr3_parser/decoder'
|
4
|
+
|
5
|
+
CR3_MIME_TYPE = 'image/x-canon-cr3'
|
6
|
+
MAGIC_BYTES = 'ftypcrx '
|
7
|
+
|
8
|
+
def likely_match?(filename)
|
9
|
+
filename =~ /\.cr3$/i
|
10
|
+
end
|
11
|
+
|
12
|
+
def call(io)
|
13
|
+
@buf = FormatParser::IOConstraint.new(io)
|
14
|
+
|
15
|
+
return unless matches_cr3_definition?
|
16
|
+
|
17
|
+
atom_tree = Decoder.new.build_atom_tree(0xffffffff, @buf)
|
18
|
+
moov_atom = atom_tree.find { |atom| atom.type == 'moov' }
|
19
|
+
cmt1_atom = moov_atom&.find_first_descendent(['CMT1'])
|
20
|
+
return unless cmt1_atom
|
21
|
+
|
22
|
+
width = cmt1_atom.fields[:image_width]
|
23
|
+
height = cmt1_atom.fields[:image_length]
|
24
|
+
rotated = cmt1_atom.fields[:rotated]
|
25
|
+
orientation = cmt1_atom.fields[:orientation_sym]
|
26
|
+
FormatParser::Image.new(
|
27
|
+
format: :cr3,
|
28
|
+
content_type: CR3_MIME_TYPE,
|
29
|
+
width_px: width,
|
30
|
+
height_px: height,
|
31
|
+
orientation: orientation,
|
32
|
+
display_width_px: rotated ? height : width,
|
33
|
+
display_height_px: rotated ? width : height,
|
34
|
+
intrinsics: {
|
35
|
+
atom_tree: atom_tree,
|
36
|
+
exif: cmt1_atom.fields,
|
37
|
+
},
|
38
|
+
)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def matches_cr3_definition?
|
44
|
+
skip_bytes(4)
|
45
|
+
matches = read_string(8) == MAGIC_BYTES
|
46
|
+
@buf.seek(0)
|
47
|
+
matches
|
48
|
+
end
|
49
|
+
|
50
|
+
FormatParser.register_parser new, natures: [:image], formats: [:cr3]
|
51
|
+
end
|
data/lib/parsers/heif_parser.rb
CHANGED
@@ -179,7 +179,7 @@ class FormatParser::HEIFParser
|
|
179
179
|
|
180
180
|
def read_item_info_box
|
181
181
|
version = read_int_8
|
182
|
-
|
182
|
+
skip_bytes(3) # 0 flags
|
183
183
|
entry_count = if version == 0
|
184
184
|
read_int_16
|
185
185
|
else
|
@@ -191,7 +191,7 @@ class FormatParser::HEIFParser
|
|
191
191
|
return unless read_string(4) == ITEM_INFO_ENTRY
|
192
192
|
item_info_end_pos = @buf.pos + item_info_entry_length - HEADER_LENGTH
|
193
193
|
version = read_int_8
|
194
|
-
|
194
|
+
skip_bytes(3) # 0 flags
|
195
195
|
case version
|
196
196
|
when 2
|
197
197
|
item_id = read_int_16
|
@@ -200,7 +200,7 @@ class FormatParser::HEIFParser
|
|
200
200
|
else
|
201
201
|
return # wrong version according to standards, hence return
|
202
202
|
end
|
203
|
-
|
203
|
+
skip_bytes(2) # not interested in the item_protection_index
|
204
204
|
item_type = read_string(4)
|
205
205
|
content_encoding = ''
|
206
206
|
if item_type == MIME_MARKER
|
@@ -212,13 +212,13 @@ class FormatParser::HEIFParser
|
|
212
212
|
end
|
213
213
|
|
214
214
|
def read_nil_version_and_flag
|
215
|
-
|
216
|
-
|
215
|
+
skip_bytes(1) # version, always 0 in this current box
|
216
|
+
skip_bytes(3) # flags, always 0 in this current box
|
217
217
|
end
|
218
218
|
|
219
219
|
def read_primary_item_box
|
220
220
|
version = read_int_8
|
221
|
-
|
221
|
+
skip_bytes(3) # flags, always 0 in this current box
|
222
222
|
@primary_item_id = if version == 0
|
223
223
|
read_int_16
|
224
224
|
else
|
@@ -332,7 +332,7 @@ class FormatParser::HEIFParser
|
|
332
332
|
|
333
333
|
def read_item_properties_association_box
|
334
334
|
version = read_int_8
|
335
|
-
|
335
|
+
skip_bytes(2) # we skip the first 2 bytes of the flags (total of 3 bytes) cause we care only about the least significant bit
|
336
336
|
flags = read_int_8
|
337
337
|
entry_count = read_int_32
|
338
338
|
item_id = 0
|