format_parser 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/README.md +80 -68
- data/lib/format_parser/version.rb +1 -1
- data/lib/io_utils.rb +30 -6
- data/lib/parsers/cr3_parser/decoder.rb +35 -0
- data/lib/parsers/cr3_parser.rb +51 -0
- data/lib/parsers/heif_parser.rb +7 -7
- data/lib/parsers/iso_base_media_file_format/decoder.rb +1041 -0
- data/lib/parsers/jpeg_parser.rb +1 -1
- data/lib/parsers/moov_parser/decoder.rb +1 -1
- data/lib/parsers/moov_parser.rb +9 -9
- data/lib/parsers/webp_parser.rb +6 -6
- data/spec/format_parser_spec.rb +3 -1
- data/spec/parsers/cr3_parser_spec.rb +58 -0
- data/spec/parsers/iso_base_media_file_format/decoder_spec.rb +242 -0
- metadata +11 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 00a7541231e6ab1309b8fc318595725c4d0ec6de6a5732973b8ed0ac5e3b6393
|
4
|
+
data.tar.gz: fbd9d67e3cec87e474519bdade15a700694118f75f3ede8757fcb0b0657423f5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05a601a35d26db59f1df83f21fbe96fceff0e96c96ec9f958aa94326a34e95c4c4c585a36182b7c392eb205f5a433758eb1f45df2d726bb2402a89015ac26279
|
7
|
+
data.tar.gz: e9c935e201680e6740ef764b4836ecb3adddfb7559aab5e78f8a728c1e52bce0e784862a2cf5b73b05c8a3e23c1efcac8d402304dada21d4a404011d403fb604
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1
|
+
## 2.2.0
|
2
|
+
* Add support for `CR3` files.
|
3
|
+
* Add ISO base file format decoding functionality.
|
4
|
+
|
1
5
|
## 2.1.0
|
2
|
-
* Require
|
6
|
+
* Require minimum 2.6 ruby version.
|
3
7
|
* Bring back 2.6 to test matrix, we have jruby there which is still compatible with 2.6
|
4
8
|
* Drop `ks` dependency.
|
5
9
|
|
data/README.md
CHANGED
@@ -12,29 +12,37 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
12
12
|
|
13
13
|
## Currently supported filetypes:
|
14
14
|
|
15
|
-
*
|
15
|
+
* AAC
|
16
|
+
* AIFF
|
17
|
+
* ARW
|
18
|
+
* BMP
|
16
19
|
* CR2
|
17
|
-
*
|
18
|
-
*
|
19
|
-
* PNG
|
20
|
-
* MP3
|
21
|
-
* JPEG
|
22
|
-
* GIF
|
23
|
-
* PDF
|
20
|
+
* CR3
|
21
|
+
* DOCX
|
24
22
|
* DPX
|
25
|
-
* AIFF
|
26
|
-
* WAV
|
27
|
-
* FLAC
|
28
23
|
* FDX
|
24
|
+
* FLAC
|
25
|
+
* GIF
|
26
|
+
* HEIC
|
27
|
+
* HEIF
|
28
|
+
* JPEG
|
29
|
+
* M3U
|
30
|
+
* M4A
|
29
31
|
* MOV
|
32
|
+
* MP3
|
30
33
|
* MP4
|
31
|
-
*
|
32
|
-
*
|
33
|
-
* DOCX, PPTX, XLSX
|
34
|
+
* MPEG
|
35
|
+
* NEF
|
34
36
|
* OGG
|
35
|
-
*
|
36
|
-
*
|
37
|
+
* PDF
|
38
|
+
* PNG
|
39
|
+
* PPTX
|
40
|
+
* PSD
|
41
|
+
* TIFF
|
42
|
+
* WAV
|
37
43
|
* WEBP
|
44
|
+
* XLSX
|
45
|
+
* ZIP
|
38
46
|
|
39
47
|
...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
|
40
48
|
|
@@ -132,23 +140,6 @@ Krists Ozols for [id3tag](https://github.com/krists/id3tag) that we are using fo
|
|
132
140
|
|
133
141
|
Unless specified otherwise in this section the fixture files are MIT licensed and from the FastImage and Dimensions projects.
|
134
142
|
|
135
|
-
### JPEG
|
136
|
-
- `divergent_pixel_dimensions_exif.jpg` is used with permission from LiveKom GmbH
|
137
|
-
- `extended_reads.jpg` has kindly been made available by Raphaelle Pellerin for use exclusively with format_parser
|
138
|
-
- `too_many_APP1_markers_surrogate.jpg` was created by the project maintainers
|
139
|
-
* `orient_6.jpg` is used with permission from [Renaud Chaput](https://github.com/renchap)
|
140
|
-
|
141
|
-
### AIFF
|
142
|
-
- fixture.aiff was created by one of the project maintainers and is MIT licensed
|
143
|
-
|
144
|
-
### WAV
|
145
|
-
- c_11k16bitpcm.wav and c_8kmp316.wav are from [Wikipedia WAV](https://en.wikipedia.org/wiki/WAV#Comparison_of_coding_schemes), retrieved January 7, 2018
|
146
|
-
- c_39064__alienbomb__atmo-truck.wav is from [freesound](https://freesound.org/people/alienbomb/sounds/39064/) and is CC0 licensed
|
147
|
-
- c_M1F1-Alaw-AFsp.wav and d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)
|
148
|
-
|
149
|
-
### MP3
|
150
|
-
- Cassy.mp3 has been produced by WeTransfer and may be used with the library for the purposes of testing
|
151
|
-
|
152
143
|
### AAC
|
153
144
|
- Originals music files: “Furious Freak” and “Galway”, Kevin MacLeod (incompetech.com), Licensed under Creative Commons: By Attribution 3.0, http://creativecommons.org/licenses/by/3.0/
|
154
145
|
- The AAC samples were converted from 'wav' format and made available [here](https://espressif-docs.readthedocs-hosted.com/projects/esp-adf/en/latest/design-guide/audio-samples.html) by Espressif Systems, as part of their audio development framework (under the ESPRESSIF MIT License).
|
@@ -157,69 +148,90 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
157
148
|
- ff-16b-1c-44100hz.aac
|
158
149
|
- gs-16b-2c-44100hz.aac
|
159
150
|
- gs-16b-1c-44100hz.aac
|
160
|
-
### FDX
|
161
|
-
- fixture.fdx was created by one of the project maintainers and is MIT licensed
|
162
151
|
|
163
|
-
###
|
164
|
-
-
|
152
|
+
### AIFF
|
153
|
+
- fixture.aiff was created by one of the project maintainers and is MIT licensed
|
165
154
|
|
166
|
-
###
|
167
|
-
-
|
168
|
-
- Test_Circular MOV files were created by one of the project maintainers and are MIT licensed
|
155
|
+
### ARW
|
156
|
+
- ARW examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
169
157
|
|
170
158
|
### CR2
|
171
159
|
- CR2 examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
172
160
|
|
173
|
-
###
|
174
|
-
-
|
161
|
+
### CR3
|
162
|
+
- CR3 examples are downloaded from https://raw.pixls.us/ and are in the public domain (CC0).
|
163
|
+
|
164
|
+
### DOCX
|
165
|
+
- The .docx files were generated by the project maintainers
|
166
|
+
|
167
|
+
### DPX
|
168
|
+
- DPX files were created by one of the project maintainers and may be used with the library for the purposes of testing
|
175
169
|
|
176
170
|
### ERF
|
177
171
|
- ERF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
178
172
|
|
173
|
+
### FDX
|
174
|
+
- fixture.fdx was created by one of the project maintainers and is MIT licensed
|
175
|
+
|
179
176
|
### FLAC
|
180
177
|
- atc_fixture_vbr.flac is a converted version of the MP3 with the same name
|
181
178
|
- c_11k16btipcm.flac is a converted version of the WAV with the same name
|
182
179
|
|
183
|
-
###
|
184
|
-
- `
|
180
|
+
### JPEG
|
181
|
+
- `divergent_pixel_dimensions_exif.jpg` is used with permission from LiveKom GmbH
|
182
|
+
- `extended_reads.jpg` has kindly been made available by Raphaelle Pellerin for use exclusively with format_parser
|
183
|
+
- `too_many_APP1_markers_surrogate.jpg` was created by the project maintainers
|
184
|
+
* `orient_6.jpg` is used with permission from [Renaud Chaput](https://github.com/renchap)
|
185
185
|
|
186
|
-
###
|
187
|
-
-
|
186
|
+
### JPEG (EXIF orientation)
|
187
|
+
- Downloaded from Unspash (and thus freely avaliable) - https://unsplash.com/license and have then been
|
188
|
+
manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
|
189
|
+
script.
|
188
190
|
|
189
|
-
###
|
190
|
-
- `
|
191
|
+
### KEY
|
192
|
+
- The `keynote_recognized_as_jpeg.key` file was created by the project maintainers
|
191
193
|
|
192
|
-
###
|
193
|
-
-
|
194
|
-
- `IMG_9266_*.tif` and all it's variations were created by the project maintainers
|
194
|
+
### M3U
|
195
|
+
- The M3U fixture files were created by one of the project maintainers
|
195
196
|
|
196
|
-
###
|
197
|
-
-
|
197
|
+
### M4A
|
198
|
+
- fixture.m4a was created by one of the project maintainers and is MIT licensed
|
198
199
|
|
199
|
-
###
|
200
|
-
-
|
200
|
+
### MOOV
|
201
|
+
- bmff.mp4 is borrowed from the [bmff](https://github.com/zuku/bmff) project
|
202
|
+
- Test_Circular MOV files were created by one of the project maintainers and are MIT licensed
|
201
203
|
|
202
|
-
###
|
203
|
-
-
|
204
|
+
### MP3
|
205
|
+
- Cassy.mp3 has been produced by WeTransfer and may be used with the library for the purposes of testing
|
204
206
|
|
205
|
-
###
|
207
|
+
### MPEG
|
206
208
|
- The files (video 1 to 4) were downloaded from https://standaloneinstaller.com/blog/big-list-of-sample-videos-for-testers-124.html.
|
207
209
|
- Video 5 was downloaded from https://archive.org/details/ligouHDR-HC1_sample1.
|
208
210
|
|
209
|
-
###
|
210
|
-
-
|
211
|
-
manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
|
212
|
-
script.
|
211
|
+
### NEF
|
212
|
+
- NEF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
213
213
|
|
214
|
-
###
|
215
|
-
-
|
214
|
+
### OGG
|
215
|
+
- `hi.ogg`, `vorbis.ogg`, `with_confusing_magic_string.ogg`, `with_garbage_at_the_end.ogg` have been generated by the project contributors
|
216
|
+
|
217
|
+
### PNG
|
218
|
+
- `simulator_screenie.png` provided by [Rens Verhoeven](https://github.com/renssies)
|
219
|
+
|
220
|
+
### TIFF
|
221
|
+
- `Shinbutsureijoushuincho.tiff` is obtained from Wikimedia Commons and is Creative Commons licensed
|
222
|
+
- `IMG_9266_*.tif` and all it's variations were created by the project maintainers
|
223
|
+
|
224
|
+
### WAV
|
225
|
+
- c_11k16bitpcm.wav and c_8kmp316.wav are from [Wikipedia WAV](https://en.wikipedia.org/wiki/WAV#Comparison_of_coding_schemes), retrieved January 7, 2018
|
226
|
+
- c_39064__alienbomb__atmo-truck.wav is from [freesound](https://freesound.org/people/alienbomb/sounds/39064/) and is CC0 licensed
|
227
|
+
- c_M1F1-Alaw-AFsp.wav and d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)
|
216
228
|
|
217
229
|
### WEBP
|
218
|
-
- With the exception of extended-animation.webp, which was obtained from Wikimedia Commons and is Creative Commons
|
219
|
-
licensed, all of the WebP fixture files have been created by one of the project maintainers.
|
230
|
+
- With the exception of extended-animation.webp, which was obtained from Wikimedia Commons and is Creative Commons
|
231
|
+
licensed, all of the WebP fixture files have been created by one of the project maintainers.
|
220
232
|
|
221
|
-
###
|
222
|
-
- The
|
233
|
+
### ZIP
|
234
|
+
- The .zip fixture files have been created by the project maintainers
|
223
235
|
|
224
236
|
## Copyright
|
225
237
|
|
data/lib/io_utils.rb
CHANGED
@@ -27,29 +27,53 @@ module FormatParser::IOUtils
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def read_int_8
|
30
|
-
|
30
|
+
read_bytes(1).unpack('C').first
|
31
31
|
end
|
32
32
|
|
33
33
|
def read_int_16
|
34
|
-
|
34
|
+
read_bytes(2).unpack('n').first
|
35
35
|
end
|
36
36
|
|
37
37
|
def read_int_32
|
38
|
-
|
38
|
+
read_bytes(4).unpack('N').first
|
39
|
+
end
|
40
|
+
|
41
|
+
def read_int_64
|
42
|
+
read_bytes(8).unpack('Q>').first
|
39
43
|
end
|
40
44
|
|
41
45
|
def read_little_endian_int_16
|
42
|
-
|
46
|
+
read_bytes(2).unpack('v').first
|
43
47
|
end
|
44
48
|
|
45
49
|
def read_little_endian_int_32
|
46
|
-
|
50
|
+
read_bytes(4).unpack('V').first
|
51
|
+
end
|
52
|
+
|
53
|
+
def read_fixed_point_16
|
54
|
+
read_bytes(2).unpack('C2')
|
55
|
+
end
|
56
|
+
|
57
|
+
def read_fixed_point_32
|
58
|
+
read_bytes(4).unpack('n2')
|
59
|
+
end
|
60
|
+
|
61
|
+
def read_fixed_point_32_2_30
|
62
|
+
n = read_int_32
|
63
|
+
[n >> 30, n & 0x3fffffff]
|
47
64
|
end
|
48
65
|
|
49
66
|
# 'n' is the number of bytes to read
|
50
|
-
def
|
67
|
+
def read_bytes(n)
|
51
68
|
safe_read(@buf, n)
|
52
69
|
end
|
53
70
|
|
71
|
+
alias_method :read_string, :read_bytes
|
72
|
+
|
73
|
+
def skip_bytes(n)
|
74
|
+
safe_skip(@buf, n)
|
75
|
+
yield if block_given?
|
76
|
+
end
|
77
|
+
|
54
78
|
### TODO: Some kind of built-in offset for the read
|
55
79
|
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'parsers/iso_base_media_file_format/decoder'
|
2
|
+
|
3
|
+
class FormatParser::CR3Parser::Decoder < FormatParser::ISOBaseMediaFileFormat::Decoder
|
4
|
+
include FormatParser::EXIFParser
|
5
|
+
|
6
|
+
protected
|
7
|
+
|
8
|
+
ATOM_PARSERS = ATOM_PARSERS.merge({
|
9
|
+
'CMT1' => :cmt1
|
10
|
+
})
|
11
|
+
CANON_METADATA_CONTAINER_UUID = '85c0b687820f11e08111f4ce462b6a48'
|
12
|
+
|
13
|
+
def cmt1(size)
|
14
|
+
exif = exif_from_tiff_io(StringIO.new(read_bytes(size)))
|
15
|
+
if exif
|
16
|
+
fields = exif.to_hash
|
17
|
+
fields[:rotated] = exif.rotated?
|
18
|
+
fields[:orientation_sym] = exif.orientation_sym
|
19
|
+
[fields, nil]
|
20
|
+
else
|
21
|
+
[nil, nil]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def uuid(size)
|
26
|
+
usertype = read_bytes(16).unpack('H*').first
|
27
|
+
fields = { usertype: usertype }
|
28
|
+
children = if usertype == CANON_METADATA_CONTAINER_UUID
|
29
|
+
build_atom_tree(size - 16)
|
30
|
+
else
|
31
|
+
skip_bytes(size - 16)
|
32
|
+
end
|
33
|
+
[fields, children]
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
class FormatParser::CR3Parser
|
2
|
+
include FormatParser::IOUtils
|
3
|
+
require_relative 'cr3_parser/decoder'
|
4
|
+
|
5
|
+
CR3_MIME_TYPE = 'image/x-canon-cr3'
|
6
|
+
MAGIC_BYTES = 'ftypcrx '
|
7
|
+
|
8
|
+
def likely_match?(filename)
|
9
|
+
filename =~ /\.cr3$/i
|
10
|
+
end
|
11
|
+
|
12
|
+
def call(io)
|
13
|
+
@buf = FormatParser::IOConstraint.new(io)
|
14
|
+
|
15
|
+
return unless matches_cr3_definition?
|
16
|
+
|
17
|
+
atom_tree = Decoder.new.build_atom_tree(0xffffffff, @buf)
|
18
|
+
moov_atom = atom_tree.find { |atom| atom.type == 'moov' }
|
19
|
+
cmt1_atom = moov_atom&.find_first_descendent(['CMT1'])
|
20
|
+
return unless cmt1_atom
|
21
|
+
|
22
|
+
width = cmt1_atom.fields[:image_width]
|
23
|
+
height = cmt1_atom.fields[:image_length]
|
24
|
+
rotated = cmt1_atom.fields[:rotated]
|
25
|
+
orientation = cmt1_atom.fields[:orientation_sym]
|
26
|
+
FormatParser::Image.new(
|
27
|
+
format: :cr3,
|
28
|
+
content_type: CR3_MIME_TYPE,
|
29
|
+
width_px: width,
|
30
|
+
height_px: height,
|
31
|
+
orientation: orientation,
|
32
|
+
display_width_px: rotated ? height : width,
|
33
|
+
display_height_px: rotated ? width : height,
|
34
|
+
intrinsics: {
|
35
|
+
atom_tree: atom_tree,
|
36
|
+
exif: cmt1_atom.fields,
|
37
|
+
},
|
38
|
+
)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def matches_cr3_definition?
|
44
|
+
skip_bytes(4)
|
45
|
+
matches = read_string(8) == MAGIC_BYTES
|
46
|
+
@buf.seek(0)
|
47
|
+
matches
|
48
|
+
end
|
49
|
+
|
50
|
+
FormatParser.register_parser new, natures: [:image], formats: [:cr3]
|
51
|
+
end
|
data/lib/parsers/heif_parser.rb
CHANGED
@@ -179,7 +179,7 @@ class FormatParser::HEIFParser
|
|
179
179
|
|
180
180
|
def read_item_info_box
|
181
181
|
version = read_int_8
|
182
|
-
|
182
|
+
skip_bytes(3) # 0 flags
|
183
183
|
entry_count = if version == 0
|
184
184
|
read_int_16
|
185
185
|
else
|
@@ -191,7 +191,7 @@ class FormatParser::HEIFParser
|
|
191
191
|
return unless read_string(4) == ITEM_INFO_ENTRY
|
192
192
|
item_info_end_pos = @buf.pos + item_info_entry_length - HEADER_LENGTH
|
193
193
|
version = read_int_8
|
194
|
-
|
194
|
+
skip_bytes(3) # 0 flags
|
195
195
|
case version
|
196
196
|
when 2
|
197
197
|
item_id = read_int_16
|
@@ -200,7 +200,7 @@ class FormatParser::HEIFParser
|
|
200
200
|
else
|
201
201
|
return # wrong version according to standards, hence return
|
202
202
|
end
|
203
|
-
|
203
|
+
skip_bytes(2) # not interested in the item_protection_index
|
204
204
|
item_type = read_string(4)
|
205
205
|
content_encoding = ''
|
206
206
|
if item_type == MIME_MARKER
|
@@ -212,13 +212,13 @@ class FormatParser::HEIFParser
|
|
212
212
|
end
|
213
213
|
|
214
214
|
def read_nil_version_and_flag
|
215
|
-
|
216
|
-
|
215
|
+
skip_bytes(1) # version, always 0 in this current box
|
216
|
+
skip_bytes(3) # flags, always 0 in this current box
|
217
217
|
end
|
218
218
|
|
219
219
|
def read_primary_item_box
|
220
220
|
version = read_int_8
|
221
|
-
|
221
|
+
skip_bytes(3) # flags, always 0 in this current box
|
222
222
|
@primary_item_id = if version == 0
|
223
223
|
read_int_16
|
224
224
|
else
|
@@ -332,7 +332,7 @@ class FormatParser::HEIFParser
|
|
332
332
|
|
333
333
|
def read_item_properties_association_box
|
334
334
|
version = read_int_8
|
335
|
-
|
335
|
+
skip_bytes(2) # we skip the first 2 bytes of the flags (total of 3 bytes) cause we care only about the least significant bit
|
336
336
|
flags = read_int_8
|
337
337
|
entry_count = read_int_32
|
338
338
|
item_id = 0
|