format_parser 0.25.3 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -2
- data/CHANGELOG.md +16 -0
- data/README.md +4 -0
- data/lib/archive.rb +3 -0
- data/lib/audio.rb +3 -0
- data/lib/document.rb +1 -0
- data/lib/format_parser.rb +18 -3
- data/lib/format_parser/version.rb +1 -1
- data/lib/image.rb +3 -0
- data/lib/parsers/aiff_parser.rb +4 -1
- data/lib/parsers/bmp_parser.rb +3 -0
- data/lib/parsers/cr2_parser.rb +2 -0
- data/lib/parsers/dpx_parser.rb +19 -8
- data/lib/parsers/flac_parser.rb +2 -0
- data/lib/parsers/gif_parser.rb +2 -0
- data/lib/parsers/jpeg_parser.rb +2 -0
- data/lib/parsers/m3u_parser.rb +23 -0
- data/lib/parsers/moov_parser.rb +10 -1
- data/lib/parsers/mp3_parser.rb +9 -1
- data/lib/parsers/ogg_parser.rb +3 -2
- data/lib/parsers/pdf_parser.rb +2 -2
- data/lib/parsers/png_parser.rb +2 -0
- data/lib/parsers/psd_parser.rb +2 -0
- data/lib/parsers/tiff_parser.rb +12 -3
- data/lib/parsers/wav_parser.rb +3 -0
- data/lib/parsers/zip_parser.rb +5 -3
- data/lib/parsers/zip_parser/office_formats.rb +5 -5
- data/lib/text.rb +19 -0
- data/lib/video.rb +3 -0
- data/spec/format_parser_spec.rb +20 -0
- data/spec/parsers/aiff_parser_spec.rb +1 -0
- data/spec/parsers/bmp_parser_spec.rb +8 -0
- data/spec/parsers/cr2_parser_spec.rb +1 -0
- data/spec/parsers/dpx_parser_spec.rb +1 -0
- data/spec/parsers/flac_parser_spec.rb +1 -0
- data/spec/parsers/gif_parser_spec.rb +1 -0
- data/spec/parsers/jpeg_parser_spec.rb +1 -0
- data/spec/parsers/m3u_parser_spec.rb +41 -0
- data/spec/parsers/moov_parser_spec.rb +4 -1
- data/spec/parsers/mp3_parser_spec.rb +9 -0
- data/spec/parsers/ogg_parser_spec.rb +1 -0
- data/spec/parsers/pdf_parser_spec.rb +1 -0
- data/spec/parsers/png_parser_spec.rb +1 -0
- data/spec/parsers/psd_parser_spec.rb +1 -0
- data/spec/parsers/tiff_parser_spec.rb +1 -0
- data/spec/parsers/wav_parser_spec.rb +1 -0
- data/spec/parsers/zip_parser_spec.rb +2 -0
- metadata +6 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1a10ceeaca4d0d6d2336b94f9fc397781ae2ffabdb588cee7ebc59fdcb968082
|
|
4
|
+
data.tar.gz: c28b8b7a0eb1d83e9f93406a4bbbad0699e50248d49d413393d6c3a6d82f7acf
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6b3780e0f615f8a42aa097b652b675ceba5fb2325c3f2ad7472e178204cef3838967ea56c25e0daa1f5c666df42c5f5d4252a4dd42dae26389c41288d5b56d30
|
|
7
|
+
data.tar.gz: f6f22e664f8603e691795b44902cc5d677f63a02b4950233ea09719b9b2e8ae7d9eea57e406bd4f5b9e252aff8e9c5a684cdb1155846f0eedeceeba7fe7131c0
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,19 @@
|
|
|
1
|
+
## 0.27.0
|
|
2
|
+
* Add `#content_type` on `Result` return values which makes sense for the detected filetype
|
|
3
|
+
|
|
4
|
+
## 0.26.0
|
|
5
|
+
* Add support for M3U format files
|
|
6
|
+
|
|
7
|
+
## 0.25.6
|
|
8
|
+
* Fix FormatParser.parse (with `results: :first`) to be deterministic
|
|
9
|
+
|
|
10
|
+
## 0.25.5
|
|
11
|
+
* DPX: Fix DPXParser to support images without aspect ratio
|
|
12
|
+
|
|
13
|
+
## 0.25.4
|
|
14
|
+
* MP3: Fix MP3Parser to return nil for TIFF files
|
|
15
|
+
* Add support to ruby 2.7
|
|
16
|
+
|
|
1
17
|
## 0.25.3
|
|
2
18
|
* MP3: Fix parser to not skip the first bytes if it's not an ID3 header
|
|
3
19
|
|
data/README.md
CHANGED
|
@@ -32,6 +32,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
|
32
32
|
* DOCX, PPTX, XLSX
|
|
33
33
|
* OGG
|
|
34
34
|
* MPEG, MPG
|
|
35
|
+
* M3U
|
|
35
36
|
|
|
36
37
|
...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
|
|
37
38
|
|
|
@@ -194,6 +195,9 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
|
194
195
|
manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
|
|
195
196
|
script.
|
|
196
197
|
|
|
198
|
+
### M3U
|
|
199
|
+
- The M3U fixture files were created by one of the project maintainers
|
|
200
|
+
|
|
197
201
|
### .key
|
|
198
202
|
- The `keynote_recognized_as_jpeg.key` file was created by the project maintainers
|
|
199
203
|
|
data/lib/archive.rb
CHANGED
|
@@ -26,6 +26,9 @@ module FormatParser
|
|
|
26
26
|
# it can be placed here
|
|
27
27
|
attr_accessor :intrinsics
|
|
28
28
|
|
|
29
|
+
# The MIME type of the archive
|
|
30
|
+
attr_accessor :content_type
|
|
31
|
+
|
|
29
32
|
# Only permits assignments via defined accessors
|
|
30
33
|
def initialize(**attributes)
|
|
31
34
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
data/lib/audio.rb
CHANGED
|
@@ -35,6 +35,9 @@ module FormatParser
|
|
|
35
35
|
# it can be placed here
|
|
36
36
|
attr_accessor :intrinsics
|
|
37
37
|
|
|
38
|
+
# The MIME type of the sound file
|
|
39
|
+
attr_accessor :content_type
|
|
40
|
+
|
|
38
41
|
# Only permits assignments via defined accessors
|
|
39
42
|
def initialize(**attributes)
|
|
40
43
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
data/lib/document.rb
CHANGED
data/lib/format_parser.rb
CHANGED
|
@@ -19,6 +19,7 @@ module FormatParser
|
|
|
19
19
|
require_relative 'io_constraint'
|
|
20
20
|
require_relative 'care'
|
|
21
21
|
require_relative 'active_storage/blob_analyzer'
|
|
22
|
+
require_relative 'text'
|
|
22
23
|
|
|
23
24
|
# Define Measurometer in the internal namespace as well
|
|
24
25
|
# so that we stay compatible for the applications that use it
|
|
@@ -49,8 +50,10 @@ module FormatParser
|
|
|
49
50
|
parser_provided_formats = Array(formats)
|
|
50
51
|
parser_provided_natures = Array(natures)
|
|
51
52
|
PARSER_MUX.synchronize do
|
|
52
|
-
|
|
53
|
-
|
|
53
|
+
# It can't be a Set because the method `parsers_for` depends on the order
|
|
54
|
+
# that the parsers were added.
|
|
55
|
+
@parsers ||= []
|
|
56
|
+
@parsers << callable_parser unless @parsers.include?(callable_parser)
|
|
54
57
|
@parsers_per_nature ||= {}
|
|
55
58
|
parser_provided_natures.each do |provided_nature|
|
|
56
59
|
@parsers_per_nature[provided_nature] ||= Set.new
|
|
@@ -255,7 +258,19 @@ module FormatParser
|
|
|
255
258
|
# Order the parsers according to their priority value. The ones having a lower
|
|
256
259
|
# value will sort higher and will be applied sooner
|
|
257
260
|
parsers_in_order_of_priority = parsers.to_a.sort do |parser_a, parser_b|
|
|
258
|
-
@parser_priorities[parser_a]
|
|
261
|
+
if @parser_priorities[parser_a] != @parser_priorities[parser_b]
|
|
262
|
+
@parser_priorities[parser_a] <=> @parser_priorities[parser_b]
|
|
263
|
+
else
|
|
264
|
+
# Some parsers have the same priority and we want them to be always sorted
|
|
265
|
+
# in the same way, to not change the result of FormatParser.parse(results: :first).
|
|
266
|
+
# When this changes, it can generate flaky tests or event different
|
|
267
|
+
# results in different environments, which can be hard to understand why.
|
|
268
|
+
# There is also no guarantee in the order that the elements are added in
|
|
269
|
+
# @@parser_priorities
|
|
270
|
+
# So, to have always the same order, we sort by the order that the parsers
|
|
271
|
+
# were registered if the priorities are the same.
|
|
272
|
+
@parsers.index(parser_a) <=> @parsers.index(parser_b)
|
|
273
|
+
end
|
|
259
274
|
end
|
|
260
275
|
|
|
261
276
|
# If there is one parser that is more likely to match, place it first
|
data/lib/image.rb
CHANGED
|
@@ -64,6 +64,9 @@ module FormatParser
|
|
|
64
64
|
# it can be placed here
|
|
65
65
|
attr_accessor :intrinsics
|
|
66
66
|
|
|
67
|
+
# The MIME type of the image file
|
|
68
|
+
attr_accessor :content_type
|
|
69
|
+
|
|
67
70
|
# Only permits assignments via defined accessors
|
|
68
71
|
def initialize(**attributes)
|
|
69
72
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
data/lib/parsers/aiff_parser.rb
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
class FormatParser::AIFFParser
|
|
2
2
|
include FormatParser::IOUtils
|
|
3
3
|
|
|
4
|
+
AIFF_MIME_TYPE = 'audio/x-aiff'
|
|
5
|
+
|
|
4
6
|
# Known chunk types we can omit when parsing,
|
|
5
7
|
# grossly lifted from http://www.muratnkonar.com/aiff/
|
|
6
8
|
KNOWN_CHUNKS = [
|
|
@@ -70,7 +72,8 @@ class FormatParser::AIFFParser
|
|
|
70
72
|
num_audio_channels: channels,
|
|
71
73
|
audio_sample_rate_hz: sample_rate.to_i,
|
|
72
74
|
media_duration_frames: sample_frames,
|
|
73
|
-
media_duration_seconds: duration_in_seconds
|
|
75
|
+
media_duration_seconds: duration_in_seconds,
|
|
76
|
+
content_type: AIFF_MIME_TYPE,
|
|
74
77
|
)
|
|
75
78
|
end
|
|
76
79
|
|
data/lib/parsers/bmp_parser.rb
CHANGED
|
@@ -5,6 +5,7 @@ class FormatParser::BMPParser
|
|
|
5
5
|
|
|
6
6
|
VALID_BMP = 'BM'
|
|
7
7
|
PERMISSIBLE_PIXEL_ARRAY_LOCATIONS = 26..512
|
|
8
|
+
BMP_MIME_TYPE = 'image/bmp'
|
|
8
9
|
|
|
9
10
|
def likely_match?(filename)
|
|
10
11
|
filename =~ /\.bmp$/i
|
|
@@ -42,6 +43,7 @@ class FormatParser::BMPParser
|
|
|
42
43
|
width_px: width,
|
|
43
44
|
height_px: height,
|
|
44
45
|
color_mode: :rgb,
|
|
46
|
+
content_type: BMP_MIME_TYPE,
|
|
45
47
|
intrinsics: {
|
|
46
48
|
data_order: data_order,
|
|
47
49
|
bits_per_pixel: bit_depth
|
|
@@ -63,6 +65,7 @@ class FormatParser::BMPParser
|
|
|
63
65
|
width_px: width,
|
|
64
66
|
height_px: height.abs,
|
|
65
67
|
color_mode: :rgb,
|
|
68
|
+
content_type: BMP_MIME_TYPE,
|
|
66
69
|
intrinsics: {
|
|
67
70
|
vertical_resolution: vertical_res,
|
|
68
71
|
horizontal_resolution: horizontal_res,
|
data/lib/parsers/cr2_parser.rb
CHANGED
|
@@ -6,6 +6,7 @@ class FormatParser::CR2Parser
|
|
|
6
6
|
|
|
7
7
|
TIFF_HEADER = [0x49, 0x49, 0x2a, 0x00]
|
|
8
8
|
CR2_HEADER = [0x43, 0x52, 0x02, 0x00]
|
|
9
|
+
CR2_MIME_TYPE = 'image/x-canon-cr2'
|
|
9
10
|
|
|
10
11
|
def likely_match?(filename)
|
|
11
12
|
filename =~ /\.cr2$/i
|
|
@@ -39,6 +40,7 @@ class FormatParser::CR2Parser
|
|
|
39
40
|
display_height_px: exif_data.rotated? ? w : h,
|
|
40
41
|
orientation: exif_data.orientation_sym,
|
|
41
42
|
intrinsics: {exif: exif_data},
|
|
43
|
+
content_type: CR2_MIME_TYPE,
|
|
42
44
|
)
|
|
43
45
|
rescue EXIFR::MalformedTIFF
|
|
44
46
|
nil
|
data/lib/parsers/dpx_parser.rb
CHANGED
|
@@ -6,6 +6,11 @@ class FormatParser::DPXParser
|
|
|
6
6
|
BE_MAGIC = 'SDPX'
|
|
7
7
|
LE_MAGIC = BE_MAGIC.reverse
|
|
8
8
|
|
|
9
|
+
# There is no official MIME type for DPX, so we have
|
|
10
|
+
# to invent something useful. We will prefix it with x-
|
|
11
|
+
# to indicate that it is a vendor subtype
|
|
12
|
+
DPX_MIME_TYPE = 'image/x-dpx'
|
|
13
|
+
|
|
9
14
|
class ByteOrderHintIO < SimpleDelegator
|
|
10
15
|
def initialize(io, is_little_endian)
|
|
11
16
|
super(io)
|
|
@@ -35,18 +40,23 @@ class FormatParser::DPXParser
|
|
|
35
40
|
w = dpx_structure.fetch(:image).fetch(:pixels_per_line)
|
|
36
41
|
h = dpx_structure.fetch(:image).fetch(:lines_per_element)
|
|
37
42
|
|
|
43
|
+
display_w = w
|
|
44
|
+
display_h = h
|
|
45
|
+
|
|
38
46
|
pixel_aspect_w = dpx_structure.fetch(:orientation).fetch(:horizontal_pixel_aspect)
|
|
39
47
|
pixel_aspect_h = dpx_structure.fetch(:orientation).fetch(:vertical_pixel_aspect)
|
|
40
|
-
pixel_aspect = pixel_aspect_w / pixel_aspect_h.to_f
|
|
41
48
|
|
|
42
|
-
|
|
49
|
+
# Find display height and width based on aspect only if the file structure has pixel aspects
|
|
50
|
+
if pixel_aspect_h != 0 && pixel_aspect_w != 0
|
|
51
|
+
pixel_aspect = pixel_aspect_w / pixel_aspect_h.to_f
|
|
43
52
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
53
|
+
image_aspect = w / h.to_f * pixel_aspect
|
|
54
|
+
|
|
55
|
+
if image_aspect > 1
|
|
56
|
+
display_h = (display_w / image_aspect).round
|
|
57
|
+
else
|
|
58
|
+
display_w = (display_h * image_aspect).round
|
|
59
|
+
end
|
|
50
60
|
end
|
|
51
61
|
|
|
52
62
|
FormatParser::Image.new(
|
|
@@ -56,6 +66,7 @@ class FormatParser::DPXParser
|
|
|
56
66
|
display_width_px: display_w,
|
|
57
67
|
display_height_px: display_h,
|
|
58
68
|
intrinsics: dpx_structure,
|
|
69
|
+
content_type: DPX_MIME_TYPE,
|
|
59
70
|
)
|
|
60
71
|
end
|
|
61
72
|
|
data/lib/parsers/flac_parser.rb
CHANGED
|
@@ -4,6 +4,7 @@ class FormatParser::FLACParser
|
|
|
4
4
|
MAGIC_BYTES = 4
|
|
5
5
|
MAGIC_BYTE_STRING = 'fLaC'
|
|
6
6
|
BLOCK_HEADER_BYTES = 4
|
|
7
|
+
FLAC_MIME_TYPE = 'audio/x-flac'
|
|
7
8
|
|
|
8
9
|
def likely_match?(filename)
|
|
9
10
|
filename =~ /\.flac$/i
|
|
@@ -61,6 +62,7 @@ class FormatParser::FLACParser
|
|
|
61
62
|
audio_sample_rate_hz: sample_rate,
|
|
62
63
|
media_duration_seconds: duration,
|
|
63
64
|
media_duration_frames: total_samples,
|
|
65
|
+
content_type: FLAC_MIME_TYPE,
|
|
64
66
|
intrinsics: {
|
|
65
67
|
bits_per_sample: bits_per_sample,
|
|
66
68
|
minimum_frame_size: minimum_frame_size,
|
data/lib/parsers/gif_parser.rb
CHANGED
|
@@ -3,6 +3,7 @@ class FormatParser::GIFParser
|
|
|
3
3
|
|
|
4
4
|
HEADERS = ['GIF87a', 'GIF89a'].map(&:b)
|
|
5
5
|
NETSCAPE_AND_AUTHENTICATION_CODE = 'NETSCAPE2.0'
|
|
6
|
+
GIF_MIME_TYPE = 'image/gif'
|
|
6
7
|
|
|
7
8
|
def likely_match?(filename)
|
|
8
9
|
filename =~ /\.gif$/i
|
|
@@ -45,6 +46,7 @@ class FormatParser::GIFParser
|
|
|
45
46
|
height_px: h,
|
|
46
47
|
has_multiple_frames: is_animated,
|
|
47
48
|
color_mode: :indexed,
|
|
49
|
+
content_type: GIF_MIME_TYPE
|
|
48
50
|
)
|
|
49
51
|
end
|
|
50
52
|
|
data/lib/parsers/jpeg_parser.rb
CHANGED
|
@@ -12,6 +12,7 @@ class FormatParser::JPEGParser
|
|
|
12
12
|
APP1_MARKER = 0xE1 # maybe EXIF
|
|
13
13
|
EXIF_MAGIC_STRING = "Exif\0\0".b
|
|
14
14
|
MUST_FIND_NEXT_MARKER_WITHIN_BYTES = 1024
|
|
15
|
+
JPEG_MIME_TYPE = 'image/jpeg'
|
|
15
16
|
|
|
16
17
|
def self.likely_match?(filename)
|
|
17
18
|
filename =~ /\.jpe?g$/i
|
|
@@ -88,6 +89,7 @@ class FormatParser::JPEGParser
|
|
|
88
89
|
display_height_px: dh,
|
|
89
90
|
orientation: flat_exif.orientation_sym,
|
|
90
91
|
intrinsics: {exif: flat_exif},
|
|
92
|
+
content_type: JPEG_MIME_TYPE
|
|
91
93
|
)
|
|
92
94
|
|
|
93
95
|
return result
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
class FormatParser::M3UParser
|
|
2
|
+
include FormatParser::IOUtils
|
|
3
|
+
|
|
4
|
+
HEADER = '#EXTM3U'
|
|
5
|
+
M3U8_MIME_TYPE = 'application/vnd.apple.mpegurl' # https://en.wikipedia.org/wiki/M3U#Internet_media_types
|
|
6
|
+
|
|
7
|
+
def likely_match?(filename)
|
|
8
|
+
filename =~ /\.m3u8?$/i
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def call(io)
|
|
12
|
+
io = FormatParser::IOConstraint.new(io)
|
|
13
|
+
|
|
14
|
+
header = safe_read(io, 7)
|
|
15
|
+
return unless HEADER.eql?(header)
|
|
16
|
+
|
|
17
|
+
FormatParser::Text.new(
|
|
18
|
+
format: :m3u,
|
|
19
|
+
content_type: M3U8_MIME_TYPE,
|
|
20
|
+
)
|
|
21
|
+
end
|
|
22
|
+
FormatParser.register_parser new, natures: :text, formats: :m3u
|
|
23
|
+
end
|
data/lib/parsers/moov_parser.rb
CHANGED
|
@@ -11,6 +11,12 @@ class FormatParser::MOOVParser
|
|
|
11
11
|
'm4a ' => :m4a,
|
|
12
12
|
}
|
|
13
13
|
|
|
14
|
+
# https://tools.ietf.org/html/rfc4337#section-2
|
|
15
|
+
# There is also video/quicktime which we should be able to capture
|
|
16
|
+
# here, but there is currently no detection for MOVs versus MP4s
|
|
17
|
+
MP4_AU_MIME_TYPE = 'audio/mp4'
|
|
18
|
+
MP4_MIXED_MIME_TYPE = 'video/mp4'
|
|
19
|
+
|
|
14
20
|
def likely_match?(filename)
|
|
15
21
|
filename =~ /\.(mov|m4a|ma4|mp4|aac|m4v)$/i
|
|
16
22
|
end
|
|
@@ -49,10 +55,12 @@ class FormatParser::MOOVParser
|
|
|
49
55
|
end
|
|
50
56
|
|
|
51
57
|
# M4A only contains audio, while MP4 and friends can contain video.
|
|
52
|
-
|
|
58
|
+
fmt = format_from_moov_type(file_type)
|
|
59
|
+
if fmt == :m4a
|
|
53
60
|
FormatParser::Audio.new(
|
|
54
61
|
format: format_from_moov_type(file_type),
|
|
55
62
|
media_duration_seconds: media_duration_s,
|
|
63
|
+
content_type: MP4_AU_MIME_TYPE,
|
|
56
64
|
intrinsics: atom_tree,
|
|
57
65
|
)
|
|
58
66
|
else
|
|
@@ -61,6 +69,7 @@ class FormatParser::MOOVParser
|
|
|
61
69
|
width_px: width,
|
|
62
70
|
height_px: height,
|
|
63
71
|
media_duration_seconds: media_duration_s,
|
|
72
|
+
content_type: MP4_MIXED_MIME_TYPE,
|
|
64
73
|
intrinsics: atom_tree,
|
|
65
74
|
)
|
|
66
75
|
end
|
data/lib/parsers/mp3_parser.rb
CHANGED
|
@@ -29,6 +29,10 @@ class FormatParser::MP3Parser
|
|
|
29
29
|
ZIP_LOCAL_ENTRY_SIGNATURE = "PK\x03\x04\x14\x00".b
|
|
30
30
|
PNG_HEADER_BYTES = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
|
|
31
31
|
|
|
32
|
+
MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
|
|
33
|
+
MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
|
|
34
|
+
TIFF_HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
|
|
35
|
+
MP3_MIME_TYPE = 'audio/mpeg'
|
|
32
36
|
# Wraps the Tag object returned by ID3Tag in such
|
|
33
37
|
# a way that a usable JSON representation gets
|
|
34
38
|
# returned
|
|
@@ -68,6 +72,9 @@ class FormatParser::MP3Parser
|
|
|
68
72
|
return if header.start_with?(ZIP_LOCAL_ENTRY_SIGNATURE)
|
|
69
73
|
return if header.start_with?(PNG_HEADER_BYTES)
|
|
70
74
|
|
|
75
|
+
io.seek(0)
|
|
76
|
+
return if TIFF_HEADER_BYTES.include?(safe_read(io, 4))
|
|
77
|
+
|
|
71
78
|
# Read all the ID3 tags (or at least attempt to)
|
|
72
79
|
io.seek(0)
|
|
73
80
|
id3v1 = ID3Extraction.attempt_id3_v1_extraction(io)
|
|
@@ -97,7 +104,8 @@ class FormatParser::MP3Parser
|
|
|
97
104
|
# do not tell anything of substance
|
|
98
105
|
num_audio_channels: first_frame.channels,
|
|
99
106
|
audio_sample_rate_hz: first_frame.sample_rate,
|
|
100
|
-
intrinsics: id3tags_hash.merge(id3tags: tags)
|
|
107
|
+
intrinsics: id3tags_hash.merge(id3tags: tags),
|
|
108
|
+
content_type: MP3_MIME_TYPE,
|
|
101
109
|
)
|
|
102
110
|
|
|
103
111
|
extra_file_attirbutes = fetch_extra_attributes_from_id3_tags(id3tags_hash)
|
data/lib/parsers/ogg_parser.rb
CHANGED
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
class FormatParser::OggParser
|
|
4
4
|
include FormatParser::IOUtils
|
|
5
5
|
|
|
6
|
-
# Maximum size of an Ogg page
|
|
7
6
|
MAX_POSSIBLE_PAGE_SIZE = 65307
|
|
7
|
+
OGG_MIME_TYPE = 'audio/ogg'
|
|
8
8
|
|
|
9
9
|
def likely_match?(filename)
|
|
10
10
|
filename =~ /\.ogg$/i
|
|
@@ -45,7 +45,8 @@ class FormatParser::OggParser
|
|
|
45
45
|
format: :ogg,
|
|
46
46
|
audio_sample_rate_hz: sample_rate,
|
|
47
47
|
num_audio_channels: channels,
|
|
48
|
-
media_duration_seconds: duration
|
|
48
|
+
media_duration_seconds: duration,
|
|
49
|
+
content_type: OGG_MIME_TYPE,
|
|
49
50
|
)
|
|
50
51
|
end
|
|
51
52
|
|
data/lib/parsers/pdf_parser.rb
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
class FormatParser::PDFParser
|
|
2
2
|
include FormatParser::IOUtils
|
|
3
|
-
|
|
4
3
|
# First 9 bytes of a PDF should be in this format, according to:
|
|
5
4
|
#
|
|
6
5
|
# https://stackoverflow.com/questions/3108201/detect-if-pdf-file-is-correct-header-pdf
|
|
@@ -8,6 +7,7 @@ class FormatParser::PDFParser
|
|
|
8
7
|
# There are however exceptions, which are left out for now.
|
|
9
8
|
#
|
|
10
9
|
PDF_MARKER = /%PDF-1\.[0-8]{1}/
|
|
10
|
+
PDF_CONTENT_TYPE = 'application/pdf'
|
|
11
11
|
|
|
12
12
|
def likely_match?(filename)
|
|
13
13
|
filename =~ /\.(pdf|ai)$/i
|
|
@@ -18,7 +18,7 @@ class FormatParser::PDFParser
|
|
|
18
18
|
|
|
19
19
|
return unless safe_read(io, 9) =~ PDF_MARKER
|
|
20
20
|
|
|
21
|
-
FormatParser::Document.new(format: :pdf)
|
|
21
|
+
FormatParser::Document.new(format: :pdf, content_type: PDF_CONTENT_TYPE)
|
|
22
22
|
end
|
|
23
23
|
|
|
24
24
|
FormatParser.register_parser new, natures: :document, formats: :pdf, priority: 1
|
data/lib/parsers/png_parser.rb
CHANGED
|
@@ -14,6 +14,7 @@ class FormatParser::PNGParser
|
|
|
14
14
|
4 => true, # Grayscale with alpha
|
|
15
15
|
6 => true,
|
|
16
16
|
}
|
|
17
|
+
PNG_MIME_TYPE = 'image/png'
|
|
17
18
|
|
|
18
19
|
def likely_match?(filename)
|
|
19
20
|
filename =~ /\.png$/i
|
|
@@ -67,6 +68,7 @@ class FormatParser::PNGParser
|
|
|
67
68
|
color_mode: color_mode,
|
|
68
69
|
has_multiple_frames: has_animation,
|
|
69
70
|
num_animation_or_video_frames: num_frames,
|
|
71
|
+
content_type: PNG_MIME_TYPE,
|
|
70
72
|
)
|
|
71
73
|
end
|
|
72
74
|
|
data/lib/parsers/psd_parser.rb
CHANGED
|
@@ -2,6 +2,7 @@ class FormatParser::PSDParser
|
|
|
2
2
|
include FormatParser::IOUtils
|
|
3
3
|
|
|
4
4
|
PSD_HEADER = [0x38, 0x42, 0x50, 0x53]
|
|
5
|
+
PSD_MIME_TYPE = 'application/x-photoshop'
|
|
5
6
|
|
|
6
7
|
def likely_match?(filename)
|
|
7
8
|
filename =~ /\.psd$/i # Maybe also PSB at some point
|
|
@@ -20,6 +21,7 @@ class FormatParser::PSDParser
|
|
|
20
21
|
format: :psd,
|
|
21
22
|
width_px: w,
|
|
22
23
|
height_px: h,
|
|
24
|
+
content_type: PSD_MIME_TYPE,
|
|
23
25
|
)
|
|
24
26
|
end
|
|
25
27
|
|
data/lib/parsers/tiff_parser.rb
CHANGED
|
@@ -4,6 +4,9 @@ class FormatParser::TIFFParser
|
|
|
4
4
|
|
|
5
5
|
MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
|
|
6
6
|
MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
|
|
7
|
+
HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
|
|
8
|
+
TIFF_MIME_TYPE = 'image/tiff'
|
|
9
|
+
ARW_MIME_TYPE = 'image/x-sony-arw'
|
|
7
10
|
|
|
8
11
|
def likely_match?(filename)
|
|
9
12
|
filename =~ /\.tiff?$/i
|
|
@@ -12,8 +15,11 @@ class FormatParser::TIFFParser
|
|
|
12
15
|
def call(io)
|
|
13
16
|
io = FormatParser::IOConstraint.new(io)
|
|
14
17
|
|
|
15
|
-
return unless
|
|
16
|
-
|
|
18
|
+
return unless HEADER_BYTES.include?(safe_read(io, 4))
|
|
19
|
+
|
|
20
|
+
# Skip over the offset of the IFD,
|
|
21
|
+
# EXIFR will re-read it anyway
|
|
22
|
+
io.seek(io.pos + 2)
|
|
17
23
|
return if cr2?(io)
|
|
18
24
|
|
|
19
25
|
# The TIFF scanner in EXIFR is plenty good enough,
|
|
@@ -25,14 +31,17 @@ class FormatParser::TIFFParser
|
|
|
25
31
|
w = exif_data.width || exif_data.pixel_x_dimension
|
|
26
32
|
h = exif_data.height || exif_data.pixel_y_dimension
|
|
27
33
|
|
|
34
|
+
format = arw?(exif_data) ? :arw : :tif
|
|
35
|
+
mime_type = arw?(exif_data) ? ARW_MIME_TYPE : TIFF_MIME_TYPE
|
|
28
36
|
FormatParser::Image.new(
|
|
29
|
-
format:
|
|
37
|
+
format: format,
|
|
30
38
|
width_px: w,
|
|
31
39
|
height_px: h,
|
|
32
40
|
display_width_px: exif_data.rotated? ? h : w,
|
|
33
41
|
display_height_px: exif_data.rotated? ? w : h,
|
|
34
42
|
orientation: exif_data.orientation_sym,
|
|
35
43
|
intrinsics: {exif: exif_data},
|
|
44
|
+
content_type: mime_type,
|
|
36
45
|
)
|
|
37
46
|
rescue EXIFR::MalformedTIFF
|
|
38
47
|
nil
|
data/lib/parsers/wav_parser.rb
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
class FormatParser::WAVParser
|
|
2
2
|
include FormatParser::IOUtils
|
|
3
3
|
|
|
4
|
+
WAV_MIME_TYPE = 'audio/x-wav'
|
|
5
|
+
|
|
4
6
|
def likely_match?(filename)
|
|
5
7
|
filename =~ /\.wav$/i
|
|
6
8
|
end
|
|
@@ -96,6 +98,7 @@ class FormatParser::WAVParser
|
|
|
96
98
|
audio_sample_rate_hz: fmt_data[:sample_rate],
|
|
97
99
|
media_duration_frames: sample_frames,
|
|
98
100
|
media_duration_seconds: duration_in_seconds,
|
|
101
|
+
content_type: WAV_MIME_TYPE,
|
|
99
102
|
)
|
|
100
103
|
end
|
|
101
104
|
|
data/lib/parsers/zip_parser.rb
CHANGED
|
@@ -5,6 +5,8 @@ class FormatParser::ZIPParser
|
|
|
5
5
|
include OfficeFormats
|
|
6
6
|
include FormatParser::IOUtils
|
|
7
7
|
|
|
8
|
+
ZIP_MIME_TYPE = 'application/zip'
|
|
9
|
+
|
|
8
10
|
def likely_match?(filename)
|
|
9
11
|
filename =~ /\.(zip|docx|keynote|numbers|pptx|xlsx)$/i
|
|
10
12
|
end
|
|
@@ -25,10 +27,10 @@ class FormatParser::ZIPParser
|
|
|
25
27
|
end
|
|
26
28
|
|
|
27
29
|
if office_document?(filenames_set)
|
|
28
|
-
office_format =
|
|
29
|
-
FormatParser::Archive.new(nature: :document, format: office_format, entries: entries_archive)
|
|
30
|
+
office_format, mime_type = office_file_format_and_mime_type_from_entry_set(filenames_set)
|
|
31
|
+
FormatParser::Archive.new(nature: :document, format: office_format, entries: entries_archive, content_type: mime_type)
|
|
30
32
|
else
|
|
31
|
-
FormatParser::Archive.new(nature: :archive, format: :zip, entries: entries_archive)
|
|
33
|
+
FormatParser::Archive.new(nature: :archive, format: :zip, entries: entries_archive, content_type: ZIP_MIME_TYPE)
|
|
32
34
|
end
|
|
33
35
|
rescue FileReader::Error
|
|
34
36
|
# This is not a ZIP, or a broken ZIP.
|
|
@@ -37,15 +37,15 @@ module FormatParser::ZIPParser::OfficeFormats
|
|
|
37
37
|
OFFICE_MARKER_FILES.subset?(filenames_set)
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
-
def
|
|
40
|
+
def office_file_format_and_mime_type_from_entry_set(filenames_set)
|
|
41
41
|
if filenames_set.include?('word/document.xml')
|
|
42
|
-
:docx
|
|
42
|
+
[:docx, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document']
|
|
43
43
|
elsif filenames_set.include?('xl/workbook.xml')
|
|
44
|
-
:xlsx
|
|
44
|
+
[:xlsx, 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet']
|
|
45
45
|
elsif filenames_set.include?('ppt/presentation.xml')
|
|
46
|
-
:pptx
|
|
46
|
+
[:pptx, 'application/vnd.openxmlformats-officedocument.presentationml.presentation']
|
|
47
47
|
else
|
|
48
|
-
:unknown
|
|
48
|
+
[:unknown, 'application/zip']
|
|
49
49
|
end
|
|
50
50
|
end
|
|
51
51
|
end
|
data/lib/text.rb
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module FormatParser
|
|
2
|
+
class Text
|
|
3
|
+
include FormatParser::AttributesJSON
|
|
4
|
+
|
|
5
|
+
NATURE = :text
|
|
6
|
+
|
|
7
|
+
attr_accessor :format
|
|
8
|
+
attr_accessor :content_type
|
|
9
|
+
|
|
10
|
+
# Only permits assignments via defined accessors
|
|
11
|
+
def initialize(**attributes)
|
|
12
|
+
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def nature
|
|
16
|
+
NATURE
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
data/lib/video.rb
CHANGED
|
@@ -23,6 +23,9 @@ module FormatParser
|
|
|
23
23
|
# it can be placed here
|
|
24
24
|
attr_accessor :intrinsics
|
|
25
25
|
|
|
26
|
+
# The MIME type of the video
|
|
27
|
+
attr_accessor :content_type
|
|
28
|
+
|
|
26
29
|
# Only permits assignments via defined accessors
|
|
27
30
|
def initialize(**attributes)
|
|
28
31
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
data/spec/format_parser_spec.rb
CHANGED
|
@@ -173,6 +173,26 @@ describe FormatParser do
|
|
|
173
173
|
prioritized_parsers = FormatParser.parsers_for([:archive, :document, :image, :audio], [:tif, :jpg, :zip, :docx, :mp3, :aiff], 'a-file.zip')
|
|
174
174
|
expect(prioritized_parsers.first).to be_kind_of(FormatParser::ZIPParser)
|
|
175
175
|
end
|
|
176
|
+
|
|
177
|
+
it 'sorts the parsers by priority and name' do
|
|
178
|
+
parsers = FormatParser.parsers_for(
|
|
179
|
+
[:audio, :image],
|
|
180
|
+
[:cr2, :dpx, :fdx, :flac, :gif, :jpg, :mov, :mp4, :m4a, :mp3, :mpg, :mpeg, :ogg, :png, :tif, :wav]
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
expect(parsers.map { |parser| parser.class.name }).to eq([
|
|
184
|
+
'FormatParser::GIFParser',
|
|
185
|
+
'Class',
|
|
186
|
+
'FormatParser::PNGParser',
|
|
187
|
+
'FormatParser::CR2Parser',
|
|
188
|
+
'FormatParser::DPXParser',
|
|
189
|
+
'FormatParser::FLACParser',
|
|
190
|
+
'FormatParser::MP3Parser',
|
|
191
|
+
'FormatParser::OggParser',
|
|
192
|
+
'FormatParser::TIFFParser',
|
|
193
|
+
'FormatParser::WAVParser'
|
|
194
|
+
])
|
|
195
|
+
end
|
|
176
196
|
end
|
|
177
197
|
|
|
178
198
|
describe '.register_parser and .deregister_parser' do
|
|
@@ -10,6 +10,7 @@ describe FormatParser::AIFFParser do
|
|
|
10
10
|
expect(parse_result.num_audio_channels).to eq(2)
|
|
11
11
|
expect(parse_result.audio_sample_rate_hz).to be_within(0.01).of(44100)
|
|
12
12
|
expect(parse_result.media_duration_seconds).to be_within(0.01).of(1.05)
|
|
13
|
+
expect(parse_result.content_type).to eq('audio/x-aiff')
|
|
13
14
|
end
|
|
14
15
|
|
|
15
16
|
it 'parses a Logic Pro created AIFF sample file having a COMT chunk before a COMM chunk' do
|
|
@@ -13,6 +13,8 @@ describe FormatParser::BMPParser do
|
|
|
13
13
|
expect(parsed.width_px).to eq(40)
|
|
14
14
|
expect(parsed.height_px).to eq(27)
|
|
15
15
|
|
|
16
|
+
expect(parsed.content_type).to eq('image/bmp')
|
|
17
|
+
|
|
16
18
|
expect(parsed.intrinsics).not_to be_nil
|
|
17
19
|
expect(parsed.intrinsics[:vertical_resolution]).to eq(2834)
|
|
18
20
|
expect(parsed.intrinsics[:horizontal_resolution]).to eq(2834)
|
|
@@ -32,6 +34,8 @@ describe FormatParser::BMPParser do
|
|
|
32
34
|
expect(parsed.width_px).to eq(1920)
|
|
33
35
|
expect(parsed.height_px).to eq(1080)
|
|
34
36
|
|
|
37
|
+
expect(parsed.content_type).to eq('image/bmp')
|
|
38
|
+
|
|
35
39
|
expect(parsed.intrinsics).not_to be_nil
|
|
36
40
|
expect(parsed.intrinsics[:vertical_resolution]).to eq(2835)
|
|
37
41
|
expect(parsed.intrinsics[:horizontal_resolution]).to eq(2835)
|
|
@@ -51,6 +55,8 @@ describe FormatParser::BMPParser do
|
|
|
51
55
|
expect(parsed.width_px).to eq(200)
|
|
52
56
|
expect(parsed.height_px).to eq(200)
|
|
53
57
|
|
|
58
|
+
expect(parsed.content_type).to eq('image/bmp')
|
|
59
|
+
|
|
54
60
|
expect(parsed.intrinsics).not_to be_nil
|
|
55
61
|
end
|
|
56
62
|
|
|
@@ -64,6 +70,7 @@ describe FormatParser::BMPParser do
|
|
|
64
70
|
expect(parsed.color_mode).to eq(:rgb)
|
|
65
71
|
expect(parsed.width_px).to eq(40)
|
|
66
72
|
expect(parsed.height_px).to eq(27)
|
|
73
|
+
expect(parsed.content_type).to eq('image/bmp')
|
|
67
74
|
expect(parsed.intrinsics[:bits_per_pixel]).to eq(24)
|
|
68
75
|
expect(parsed.intrinsics[:data_order]).to eq(:normal)
|
|
69
76
|
|
|
@@ -76,6 +83,7 @@ describe FormatParser::BMPParser do
|
|
|
76
83
|
expect(parsed.color_mode).to eq(:rgb)
|
|
77
84
|
expect(parsed.width_px).to eq(40)
|
|
78
85
|
expect(parsed.height_px).to eq(27)
|
|
86
|
+
expect(parsed.content_type).to eq('image/bmp')
|
|
79
87
|
expect(parsed.intrinsics[:bits_per_pixel]).to eq(24)
|
|
80
88
|
expect(parsed.intrinsics[:data_order]).to eq(:normal)
|
|
81
89
|
end
|
|
@@ -14,6 +14,7 @@ describe FormatParser::FLACParser do
|
|
|
14
14
|
expect(parsed.intrinsics).not_to be_nil
|
|
15
15
|
expect(parsed.media_duration_frames).to eq(33810)
|
|
16
16
|
expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
|
|
17
|
+
expect(parsed.content_type).to eq('audio/x-flac')
|
|
17
18
|
end
|
|
18
19
|
|
|
19
20
|
it 'decodes and estimates duration for the 16bit FLAC File' do
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe FormatParser::M3UParser do
|
|
4
|
+
let(:parsed_m3u) do
|
|
5
|
+
subject.call(
|
|
6
|
+
File.open(
|
|
7
|
+
Pathname.new(fixtures_dir).join('M3U').join(m3u_file),
|
|
8
|
+
'rb'
|
|
9
|
+
)
|
|
10
|
+
)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
describe 'an m3u file with missing header' do
|
|
14
|
+
let(:m3u_file) { 'plain_text.m3u' }
|
|
15
|
+
|
|
16
|
+
it 'does not parse the file successfully' do
|
|
17
|
+
expect(parsed_m3u).to be_nil
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
describe 'an m3u file with valid header' do
|
|
22
|
+
let(:m3u_file) { 'sample.m3u' }
|
|
23
|
+
|
|
24
|
+
it 'parses the file successfully' do
|
|
25
|
+
expect(parsed_m3u).not_to be_nil
|
|
26
|
+
expect(parsed_m3u.nature).to eq(:text)
|
|
27
|
+
expect(parsed_m3u.format).to eq(:m3u)
|
|
28
|
+
expect(parsed_m3u.content_type).to eq('application/vnd.apple.mpegurl')
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
describe 'an m3u8 file with valid header' do
|
|
33
|
+
let(:m3u_file) { 'sample.m3u8' }
|
|
34
|
+
|
|
35
|
+
it 'parses the file successfully' do
|
|
36
|
+
expect(parsed_m3u).not_to be_nil
|
|
37
|
+
expect(parsed_m3u.nature).to eq(:text)
|
|
38
|
+
expect(parsed_m3u.format).to eq(:m3u)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -37,7 +37,7 @@ describe FormatParser::MOOVParser do
|
|
|
37
37
|
expect(result.nature).to eq(:audio)
|
|
38
38
|
expect(result.media_duration_seconds).to be_kind_of(Float)
|
|
39
39
|
expect(result.media_duration_seconds).to be > 0
|
|
40
|
-
|
|
40
|
+
expect(result.content_type).to be_kind_of(String)
|
|
41
41
|
expect(result.intrinsics).not_to be_nil
|
|
42
42
|
end
|
|
43
43
|
end
|
|
@@ -52,6 +52,7 @@ describe FormatParser::MOOVParser do
|
|
|
52
52
|
expect(result.height_px).to be > 0
|
|
53
53
|
expect(result.media_duration_seconds).to be_kind_of(Float)
|
|
54
54
|
expect(result.media_duration_seconds).to be > 0
|
|
55
|
+
expect(result.content_type).to eq('video/mp4')
|
|
55
56
|
|
|
56
57
|
expect(result.intrinsics).not_to be_nil
|
|
57
58
|
end
|
|
@@ -67,6 +68,7 @@ describe FormatParser::MOOVParser do
|
|
|
67
68
|
expect(result.height_px).to be > 0
|
|
68
69
|
expect(result.media_duration_seconds).to be_kind_of(Float)
|
|
69
70
|
expect(result.media_duration_seconds).to be > 0
|
|
71
|
+
expect(result.content_type).to eq('video/mp4')
|
|
70
72
|
|
|
71
73
|
expect(result.intrinsics).not_to be_nil
|
|
72
74
|
end
|
|
@@ -79,6 +81,7 @@ describe FormatParser::MOOVParser do
|
|
|
79
81
|
expect(result).not_to be_nil
|
|
80
82
|
expect(result.nature).to eq(:audio)
|
|
81
83
|
expect(result.format).to eq(:m4a)
|
|
84
|
+
expect(result.content_type).to eq('audio/mp4')
|
|
82
85
|
end
|
|
83
86
|
|
|
84
87
|
it 'parses a MOV file and provides the necessary metadata' do
|
|
@@ -23,6 +23,7 @@ describe FormatParser::MP3Parser do
|
|
|
23
23
|
|
|
24
24
|
expect(parsed.nature).to eq(:audio)
|
|
25
25
|
expect(parsed.format).to eq(:mp3)
|
|
26
|
+
expect(parsed.content_type).to eq('audio/mpeg')
|
|
26
27
|
expect(parsed.num_audio_channels).to eq(2)
|
|
27
28
|
expect(parsed.audio_sample_rate_hz).to eq(48000)
|
|
28
29
|
expect(parsed.intrinsics).not_to be_nil
|
|
@@ -205,4 +206,12 @@ describe FormatParser::MP3Parser do
|
|
|
205
206
|
).to eq([ID3Tag::Tag])
|
|
206
207
|
end
|
|
207
208
|
end
|
|
209
|
+
|
|
210
|
+
it 'does not recognize TIFF files as MP3' do
|
|
211
|
+
fpath = fixtures_dir + '/TIFF/test2.tif'
|
|
212
|
+
|
|
213
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
|
214
|
+
|
|
215
|
+
expect(parsed).to be_nil
|
|
216
|
+
end
|
|
208
217
|
end
|
|
@@ -6,6 +6,7 @@ describe FormatParser::OggParser do
|
|
|
6
6
|
|
|
7
7
|
expect(parse_result.nature).to eq(:audio)
|
|
8
8
|
expect(parse_result.format).to eq(:ogg)
|
|
9
|
+
expect(parse_result.content_type).to eq('audio/ogg')
|
|
9
10
|
expect(parse_result.num_audio_channels).to eq(1)
|
|
10
11
|
expect(parse_result.audio_sample_rate_hz).to eq(16000)
|
|
11
12
|
expect(parse_result.media_duration_seconds).to be_within(0.01).of(2973.95)
|
|
@@ -59,6 +59,7 @@ describe FormatParser::TIFFParser do
|
|
|
59
59
|
expect(parsed.width_px).to eq(7952)
|
|
60
60
|
expect(parsed.height_px).to eq(5304)
|
|
61
61
|
expect(parsed.intrinsics[:exif]).not_to be_nil
|
|
62
|
+
expect(parsed.content_type).to eq('image/x-sony-arw')
|
|
62
63
|
end
|
|
63
64
|
|
|
64
65
|
describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
|
|
@@ -14,6 +14,7 @@ describe FormatParser::ZIPParser do
|
|
|
14
14
|
expect(result).not_to be_nil
|
|
15
15
|
|
|
16
16
|
expect(result.format).to eq(:zip)
|
|
17
|
+
expect(result.content_type).to eq('application/zip')
|
|
17
18
|
expect(result.nature).to eq(:archive)
|
|
18
19
|
expect(result.entries.length).to eq(0xFFFF + 1)
|
|
19
20
|
|
|
@@ -58,6 +59,7 @@ describe FormatParser::ZIPParser do
|
|
|
58
59
|
result = subject.call(fi_io)
|
|
59
60
|
expect(result.nature).to eq(:document)
|
|
60
61
|
expect(result.format).to eq(:docx)
|
|
62
|
+
expect(result.content_type).to eq('application/vnd.openxmlformats-officedocument.wordprocessingml.document')
|
|
61
63
|
|
|
62
64
|
fixture_path = fixtures_dir + '/ZIP/sample-docx.docx'
|
|
63
65
|
fi_io = File.open(fixture_path, 'rb')
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: format_parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.27.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Noah Berman
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: exe
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date:
|
|
12
|
+
date: 2021-01-26 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: ks
|
|
@@ -219,6 +219,7 @@ files:
|
|
|
219
219
|
- lib/parsers/flac_parser.rb
|
|
220
220
|
- lib/parsers/gif_parser.rb
|
|
221
221
|
- lib/parsers/jpeg_parser.rb
|
|
222
|
+
- lib/parsers/m3u_parser.rb
|
|
222
223
|
- lib/parsers/moov_parser.rb
|
|
223
224
|
- lib/parsers/moov_parser/decoder.rb
|
|
224
225
|
- lib/parsers/mp3_parser.rb
|
|
@@ -236,6 +237,7 @@ files:
|
|
|
236
237
|
- lib/read_limiter.rb
|
|
237
238
|
- lib/read_limits_config.rb
|
|
238
239
|
- lib/remote_io.rb
|
|
240
|
+
- lib/text.rb
|
|
239
241
|
- lib/video.rb
|
|
240
242
|
- spec/active_storage/blob_io_spec.rb
|
|
241
243
|
- spec/active_storage/rails_app_spec.rb
|
|
@@ -257,6 +259,7 @@ files:
|
|
|
257
259
|
- spec/parsers/flac_parser_spec.rb
|
|
258
260
|
- spec/parsers/gif_parser_spec.rb
|
|
259
261
|
- spec/parsers/jpeg_parser_spec.rb
|
|
262
|
+
- spec/parsers/m3u_parser_spec.rb
|
|
260
263
|
- spec/parsers/moov_parser_spec.rb
|
|
261
264
|
- spec/parsers/mp3_parser_spec.rb
|
|
262
265
|
- spec/parsers/mpeg_parser_spec.rb
|
|
@@ -292,7 +295,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
292
295
|
- !ruby/object:Gem::Version
|
|
293
296
|
version: '0'
|
|
294
297
|
requirements: []
|
|
295
|
-
rubygems_version: 3.
|
|
298
|
+
rubygems_version: 3.0.3
|
|
296
299
|
signing_key:
|
|
297
300
|
specification_version: 4
|
|
298
301
|
summary: A library for efficient parsing of file metadata
|