format_parser 0.1.7 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +47 -8
- data/format_parser.gemspec +9 -3
- data/lib/audio.rb +37 -0
- data/lib/document.rb +17 -0
- data/lib/format_parser.rb +41 -10
- data/lib/format_parser/version.rb +1 -1
- data/lib/{file_information.rb → image.rb} +5 -26
- data/lib/parsers/aiff_parser.rb +7 -4
- data/lib/parsers/dpx_parser.rb +8 -3
- data/lib/parsers/dsl.rb +29 -0
- data/lib/parsers/fdx_parser.rb +10 -7
- data/lib/parsers/gif_parser.rb +8 -5
- data/lib/parsers/jpeg_parser.rb +8 -12
- data/lib/parsers/moov_parser.rb +9 -7
- data/lib/parsers/mp3_parser.rb +7 -4
- data/lib/parsers/png_parser.rb +9 -5
- data/lib/parsers/psd_parser.rb +8 -5
- data/lib/parsers/tiff_parser.rb +9 -13
- data/lib/parsers/wav_parser.rb +6 -5
- data/lib/video.rb +33 -0
- data/spec/aiff_parser_spec.rb +6 -6
- data/spec/file_information_spec.rb +4 -4
- data/spec/format_parser_spec.rb +30 -2
- data/spec/parsers/dpx_parser_spec.rb +4 -4
- data/spec/parsers/fdx_parser_spec.rb +5 -5
- data/spec/parsers/gif_parser_spec.rb +4 -4
- data/spec/parsers/jpeg_parser_spec.rb +4 -4
- data/spec/parsers/moov_parser_spec.rb +9 -8
- data/spec/parsers/mp3_parser_spec.rb +9 -9
- data/spec/parsers/png_parser_spec.rb +4 -4
- data/spec/parsers/psd_parser_spec.rb +3 -3
- data/spec/parsers/tiff_parser_spec.rb +4 -4
- data/spec/parsers/wav_parser_spec.rb +13 -13
- data/spec/remote_fetching_spec.rb +9 -9
- metadata +14 -5
data/lib/parsers/gif_parser.rb
CHANGED
@@ -1,12 +1,15 @@
|
|
1
1
|
class FormatParser::GIFParser
|
2
|
+
include FormatParser::IOUtils
|
3
|
+
include FormatParser::DSL
|
4
|
+
|
2
5
|
HEADERS = ['GIF87a', 'GIF89a'].map(&:b)
|
3
6
|
NETSCAPE_AND_AUTHENTICATION_CODE = 'NETSCAPE2.0'
|
4
7
|
|
5
|
-
|
8
|
+
natures :image
|
9
|
+
formats :gif
|
6
10
|
|
7
|
-
def
|
11
|
+
def call(io)
|
8
12
|
io = FormatParser::IOConstraint.new(io)
|
9
|
-
|
10
13
|
header = safe_read(io, 6)
|
11
14
|
return unless HEADERS.include?(header)
|
12
15
|
|
@@ -38,8 +41,8 @@ class FormatParser::GIFParser
|
|
38
41
|
potentially_netscape_app_header = safe_read(io, 64)
|
39
42
|
is_animated = potentially_netscape_app_header.include?(NETSCAPE_AND_AUTHENTICATION_CODE)
|
40
43
|
|
41
|
-
FormatParser::
|
42
|
-
|
44
|
+
FormatParser::Image.new(
|
45
|
+
format: :gif,
|
43
46
|
width_px: w,
|
44
47
|
height_px: h,
|
45
48
|
has_multiple_frames: is_animated,
|
data/lib/parsers/jpeg_parser.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
class FormatParser::JPEGParser
|
2
2
|
include FormatParser::IOUtils
|
3
|
+
include FormatParser::DSL
|
3
4
|
|
4
5
|
class InvalidStructure < StandardError
|
5
6
|
end
|
@@ -10,7 +11,10 @@ class FormatParser::JPEGParser
|
|
10
11
|
SOS_MARKER = 0xDA # start of stream
|
11
12
|
APP1_MARKER = 0xE1 # maybe EXIF
|
12
13
|
|
13
|
-
|
14
|
+
natures :image
|
15
|
+
formats :jpg
|
16
|
+
|
17
|
+
def call(io)
|
14
18
|
@buf = FormatParser::IOConstraint.new(io)
|
15
19
|
@width = nil
|
16
20
|
@height = nil
|
@@ -50,21 +54,13 @@ class FormatParser::JPEGParser
|
|
50
54
|
end
|
51
55
|
|
52
56
|
# Return at the earliest possible opportunity
|
53
|
-
if @width && @height
|
54
|
-
|
55
|
-
|
57
|
+
if @width && @height
|
58
|
+
return FormatParser::Image.new(
|
59
|
+
format: :jpg,
|
56
60
|
width_px: @width,
|
57
61
|
height_px: @height,
|
58
62
|
orientation: @orientation
|
59
63
|
)
|
60
|
-
return file_info
|
61
|
-
elsif @width && @height
|
62
|
-
file_info = FormatParser::FileInformation.image(
|
63
|
-
file_type: :jpg,
|
64
|
-
width_px: @width,
|
65
|
-
height_px: @height
|
66
|
-
)
|
67
|
-
return file_info
|
68
64
|
end
|
69
65
|
end
|
70
66
|
nil # We could not parse anything
|
data/lib/parsers/moov_parser.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
class FormatParser::MOOVParser
|
2
2
|
include FormatParser::IOUtils
|
3
|
-
|
3
|
+
include FormatParser::DSL
|
4
4
|
require_relative 'moov_parser/decoder'
|
5
5
|
|
6
6
|
# Maps values of the "ftyp" atom to something
|
@@ -12,11 +12,14 @@ class FormatParser::MOOVParser
|
|
12
12
|
"m4a " => :m4a,
|
13
13
|
}
|
14
14
|
|
15
|
+
natures :video
|
16
|
+
formats *FTYP_MAP.values
|
17
|
+
|
15
18
|
# It is currently not documented and not particularly well-tested,
|
16
19
|
# so not considered a public API for now
|
17
|
-
private_constant :Decoder
|
20
|
+
private_constant :Decoder
|
18
21
|
|
19
|
-
def
|
22
|
+
def call(io)
|
20
23
|
return nil unless matches_moov_definition?(io)
|
21
24
|
|
22
25
|
# Now we know we are in a MOOV, so go back and parse out the atom structure.
|
@@ -51,9 +54,8 @@ class FormatParser::MOOVParser
|
|
51
54
|
media_duration_s = duration / timescale.to_f
|
52
55
|
end
|
53
56
|
|
54
|
-
FormatParser::
|
55
|
-
|
56
|
-
file_type: file_type_from_moov_type(file_type),
|
57
|
+
FormatParser::Video.new(
|
58
|
+
format: format_from_moov_type(file_type),
|
57
59
|
width_px: width,
|
58
60
|
height_px: height,
|
59
61
|
media_duration_seconds: media_duration_s,
|
@@ -63,7 +65,7 @@ class FormatParser::MOOVParser
|
|
63
65
|
|
64
66
|
private
|
65
67
|
|
66
|
-
def
|
68
|
+
def format_from_moov_type(file_type)
|
67
69
|
FTYP_MAP.fetch(file_type, :mov)
|
68
70
|
end
|
69
71
|
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -23,7 +23,11 @@ class FormatParser::MP3Parser
|
|
23
23
|
# Default frame size for mp3
|
24
24
|
SAMPLES_PER_FRAME = 1152
|
25
25
|
|
26
|
-
|
26
|
+
include FormatParser::DSL
|
27
|
+
natures :audio
|
28
|
+
formats :mp3
|
29
|
+
|
30
|
+
def call(io)
|
27
31
|
# Read the last 128 bytes which might contain ID3v1
|
28
32
|
id3_v1 = ID3V1.attempt_id3_v1_extraction(io)
|
29
33
|
# Read the header bytes that might contain ID3v1
|
@@ -42,9 +46,8 @@ class FormatParser::MP3Parser
|
|
42
46
|
|
43
47
|
first_frame = initial_frames.first
|
44
48
|
|
45
|
-
file_info = FormatParser::
|
46
|
-
|
47
|
-
file_type: :mp3,
|
49
|
+
file_info = FormatParser::Audio.new(
|
50
|
+
format: :mp3,
|
48
51
|
num_audio_channels: first_frame.channels,
|
49
52
|
audio_sample_rate_hz: first_frame.sample_rate,
|
50
53
|
# media_duration_frames is omitted because the frames
|
data/lib/parsers/png_parser.rb
CHANGED
@@ -1,4 +1,10 @@
|
|
1
1
|
class FormatParser::PNGParser
|
2
|
+
include FormatParser::IOUtils
|
3
|
+
include FormatParser::DSL
|
4
|
+
|
5
|
+
natures :image
|
6
|
+
formats :png
|
7
|
+
|
2
8
|
PNG_HEADER_BYTES = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
|
3
9
|
COLOR_TYPES = {
|
4
10
|
0 => :grayscale,
|
@@ -13,15 +19,13 @@ class FormatParser::PNGParser
|
|
13
19
|
6 => true,
|
14
20
|
}
|
15
21
|
|
16
|
-
include FormatParser::IOUtils
|
17
22
|
|
18
23
|
def chunk_length_and_type(io)
|
19
24
|
safe_read(io, 8).unpack("Na4")
|
20
25
|
end
|
21
26
|
|
22
|
-
def
|
27
|
+
def call(io)
|
23
28
|
io = FormatParser::IOConstraint.new(io)
|
24
|
-
|
25
29
|
magic_bytes = safe_read(io, PNG_HEADER_BYTES.bytesize)
|
26
30
|
return unless magic_bytes == PNG_HEADER_BYTES
|
27
31
|
|
@@ -66,8 +70,8 @@ class FormatParser::PNGParser
|
|
66
70
|
num_frames, loop_n_times = safe_read(io, 8).unpack('NN')
|
67
71
|
end
|
68
72
|
|
69
|
-
FormatParser::
|
70
|
-
|
73
|
+
FormatParser::Image.new(
|
74
|
+
format: :png,
|
71
75
|
width_px: w,
|
72
76
|
height_px: h,
|
73
77
|
has_transparency: has_transparency,
|
data/lib/parsers/psd_parser.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
class FormatParser::PSDParser
|
2
|
-
PSD_HEADER = [0x38, 0x42, 0x50, 0x53]
|
3
2
|
include FormatParser::IOUtils
|
3
|
+
include FormatParser::DSL
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
PSD_HEADER = [0x38, 0x42, 0x50, 0x53]
|
6
|
+
natures :image
|
7
|
+
formats :psd
|
7
8
|
|
9
|
+
def call(io)
|
10
|
+
io = FormatParser::IOConstraint.new(io)
|
8
11
|
magic_bytes = safe_read(io, 4).unpack("C4")
|
9
12
|
|
10
13
|
return unless magic_bytes == PSD_HEADER
|
@@ -12,8 +15,8 @@ class FormatParser::PSDParser
|
|
12
15
|
# We can be reasonably certain this is a PSD so we grab the height
|
13
16
|
# and width bytes
|
14
17
|
w,h = safe_read(io, 22).unpack("x10N2")
|
15
|
-
FormatParser::
|
16
|
-
|
18
|
+
FormatParser::Image.new(
|
19
|
+
format: :psd,
|
17
20
|
width_px: w,
|
18
21
|
height_px: h,
|
19
22
|
)
|
data/lib/parsers/tiff_parser.rb
CHANGED
@@ -1,34 +1,30 @@
|
|
1
1
|
class FormatParser::TIFFParser
|
2
|
+
include FormatParser::IOUtils
|
3
|
+
include FormatParser::DSL
|
4
|
+
|
2
5
|
LITTLE_ENDIAN_TIFF_HEADER_BYTES = [0x49, 0x49, 0x2A, 0x0]
|
3
6
|
BIG_ENDIAN_TIFF_HEADER_BYTES = [0x4D, 0x4D, 0x0, 0x2A]
|
4
7
|
WIDTH_TAG = 0x100
|
5
8
|
HEIGHT_TAG = 0x101
|
6
9
|
|
7
|
-
|
10
|
+
natures :image
|
11
|
+
formats :tif
|
8
12
|
|
9
|
-
def
|
13
|
+
def call(io)
|
10
14
|
io = FormatParser::IOConstraint.new(io)
|
11
|
-
|
12
15
|
magic_bytes = safe_read(io, 4).unpack("C4")
|
13
16
|
endianness = scan_tiff_endianness(magic_bytes)
|
14
17
|
return unless endianness
|
15
18
|
w, h = read_tiff_by_endianness(io, endianness)
|
16
19
|
scanner = FormatParser::EXIFParser.new(:tiff, io)
|
17
20
|
scanner.scan_image_exif
|
18
|
-
|
19
|
-
|
20
|
-
file_type: :tif,
|
21
|
+
return FormatParser::Image.new(
|
22
|
+
format: :tif,
|
21
23
|
width_px: w,
|
22
24
|
height_px: h,
|
25
|
+
# might be nil if EXIF metadata wasn't found
|
23
26
|
orientation: scanner.orientation
|
24
27
|
)
|
25
|
-
else
|
26
|
-
FormatParser::FileInformation.image(
|
27
|
-
file_type: :tif,
|
28
|
-
width_px: w,
|
29
|
-
height_px: h
|
30
|
-
)
|
31
|
-
end
|
32
28
|
end
|
33
29
|
|
34
30
|
# TIFFs can be either big or little endian, so we check here
|
data/lib/parsers/wav_parser.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
class FormatParser::WAVParser
|
2
2
|
include FormatParser::IOUtils
|
3
|
+
include FormatParser::DSL
|
3
4
|
|
4
|
-
|
5
|
-
|
5
|
+
natures :audio
|
6
|
+
formats :wav
|
6
7
|
|
8
|
+
def call(io)
|
7
9
|
# Read the RIFF header. Chunk descriptor should be RIFF, the size should
|
8
10
|
# contain the size of the entire file in bytes minus 8 bytes for the
|
9
11
|
# two fields not included in this count: chunk_id and size.
|
@@ -88,9 +90,8 @@ class FormatParser::WAVParser
|
|
88
90
|
def file_info(fmt_data, sample_frames)
|
89
91
|
return unless fmt_data[:sample_rate] > 0
|
90
92
|
duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f
|
91
|
-
FormatParser::
|
92
|
-
|
93
|
-
file_type: :wav,
|
93
|
+
FormatParser::Audio.new(
|
94
|
+
format: :wav,
|
94
95
|
num_audio_channels: fmt_data[:channels],
|
95
96
|
audio_sample_rate_hz: fmt_data[:sample_rate],
|
96
97
|
media_duration_frames: sample_frames,
|
data/lib/video.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
module FormatParser
|
2
|
+
class Video
|
3
|
+
NATURE = :video
|
4
|
+
|
5
|
+
attr_accessor :width_px
|
6
|
+
|
7
|
+
attr_accessor :height_px
|
8
|
+
|
9
|
+
# Type of the file (e.g :mp3)
|
10
|
+
attr_accessor :format
|
11
|
+
|
12
|
+
# Duration of the media object (be it audio or video) in seconds,
|
13
|
+
# as a Float
|
14
|
+
attr_accessor :media_duration_seconds
|
15
|
+
|
16
|
+
# Duration of the media object in addressable frames or samples,
|
17
|
+
# as an Integer
|
18
|
+
attr_accessor :media_duration_frames
|
19
|
+
|
20
|
+
# If a parser wants to provide any extra information to the caller
|
21
|
+
# it can be placed here
|
22
|
+
attr_accessor :intrinsics
|
23
|
+
|
24
|
+
# Only permits assignments via defined accessors
|
25
|
+
def initialize(**attributes)
|
26
|
+
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
27
|
+
end
|
28
|
+
|
29
|
+
def nature
|
30
|
+
NATURE
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/spec/aiff_parser_spec.rb
CHANGED
@@ -2,10 +2,10 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe FormatParser::AIFFParser do
|
4
4
|
it 'parses an AIFF sample file' do
|
5
|
-
parse_result = subject.
|
5
|
+
parse_result = subject.call(File.open(__dir__ + '/fixtures/AIFF/fixture.aiff', 'rb'))
|
6
6
|
|
7
|
-
expect(parse_result.
|
8
|
-
expect(parse_result.
|
7
|
+
expect(parse_result.nature).to eq(:audio)
|
8
|
+
expect(parse_result.format).to eq(:aiff)
|
9
9
|
expect(parse_result.media_duration_frames).to eq(46433)
|
10
10
|
expect(parse_result.num_audio_channels).to eq(2)
|
11
11
|
expect(parse_result.audio_sample_rate_hz).to be_within(0.01).of(44100)
|
@@ -13,10 +13,10 @@ describe FormatParser::AIFFParser do
|
|
13
13
|
end
|
14
14
|
|
15
15
|
it 'parses a Logic Pro created AIFF sample file having a COMT chunk before a COMM chunk' do
|
16
|
-
parse_result = subject.
|
16
|
+
parse_result = subject.call(File.open(__dir__ + '/fixtures/AIFF/fixture-logic-aiff.aif', 'rb'))
|
17
17
|
|
18
|
-
expect(parse_result.
|
19
|
-
expect(parse_result.
|
18
|
+
expect(parse_result.nature).to eq(:audio)
|
19
|
+
expect(parse_result.format).to eq(:aiff)
|
20
20
|
expect(parse_result.media_duration_frames).to eq(302400)
|
21
21
|
expect(parse_result.num_audio_channels).to eq(2)
|
22
22
|
expect(parse_result.audio_sample_rate_hz).to be_within(0.01).of(44100)
|
@@ -1,12 +1,12 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe FormatParser::
|
3
|
+
describe FormatParser::Image do
|
4
4
|
|
5
5
|
context "File data checks" do
|
6
6
|
it 'succeeds with relevant attributes' do
|
7
|
-
result = described_class.new(
|
8
|
-
expect(result.
|
9
|
-
expect(result.
|
7
|
+
result = described_class.new(format: :jpg, width_px: 42, height_px: 10, image_orientation: 1)
|
8
|
+
expect(result.nature).to eq(:image)
|
9
|
+
expect(result.format).to eq(:jpg)
|
10
10
|
expect(result.width_px).to eq(42)
|
11
11
|
expect(result.height_px).to eq(10)
|
12
12
|
expect(result.image_orientation).to eq(1)
|
data/spec/format_parser_spec.rb
CHANGED
@@ -3,12 +3,12 @@ require 'spec_helper'
|
|
3
3
|
describe FormatParser do
|
4
4
|
it 'returns nil when trying to parse an empty IO' do
|
5
5
|
d = StringIO.new('')
|
6
|
-
expect(FormatParser.parse(d)).to
|
6
|
+
expect(FormatParser.parse(d)).to be_empty
|
7
7
|
end
|
8
8
|
|
9
9
|
it 'returns nil when parsing an IO no parser can make sense of' do
|
10
10
|
d = StringIO.new(Random.new.bytes(1))
|
11
|
-
expect(FormatParser.parse(d)).to
|
11
|
+
expect(FormatParser.parse(d)).to be_empty
|
12
12
|
end
|
13
13
|
|
14
14
|
describe 'with fuzzing' do
|
@@ -21,6 +21,34 @@ describe FormatParser do
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
+
describe 'multiple values return' do
|
25
|
+
let(:blob) { StringIO.new(Random.new.bytes(512 * 1024)) }
|
26
|
+
let(:audio) { FormatParser::Audio.new(format: :aiff, num_audio_channels: 1) }
|
27
|
+
let(:image) { FormatParser::Image.new(format: :dpx, width_px: 1, height_px: 1) }
|
28
|
+
|
29
|
+
context '#parse called without any option' do
|
30
|
+
before do
|
31
|
+
expect_any_instance_of(FormatParser::AIFFParser).to receive(:call).and_return(audio)
|
32
|
+
expect_any_instance_of(FormatParser::DPXParser).to receive(:call).and_return(image)
|
33
|
+
end
|
34
|
+
|
35
|
+
subject { FormatParser.parse(blob) }
|
36
|
+
|
37
|
+
it { is_expected.to include(image) }
|
38
|
+
it { is_expected.to include(audio) }
|
39
|
+
end
|
40
|
+
|
41
|
+
context '#parse called with hash options' do
|
42
|
+
before do
|
43
|
+
expect_any_instance_of(FormatParser::DPXParser).to receive(:call).and_return(image)
|
44
|
+
end
|
45
|
+
|
46
|
+
subject { FormatParser.parse(blob, formats: [:dpx], returns: :one) }
|
47
|
+
|
48
|
+
it { is_expected.to eq(image) }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
24
52
|
describe 'when parsing fixtures' do
|
25
53
|
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
|
26
54
|
it "parses #{fixture_path} without raising any errors" do
|
@@ -4,11 +4,11 @@ describe FormatParser::DPXParser do
|
|
4
4
|
describe 'with Depix example files' do
|
5
5
|
Dir.glob(fixtures_dir + '/dpx/*.*').each do |dpx_path|
|
6
6
|
it "is able to parse #{File.basename(dpx_path)}" do
|
7
|
-
parsed = subject.
|
7
|
+
parsed = subject.call(File.open(dpx_path, 'rb'))
|
8
8
|
|
9
9
|
expect(parsed).not_to be_nil
|
10
|
-
expect(parsed.
|
11
|
-
expect(parsed.
|
10
|
+
expect(parsed.nature).to eq(:image)
|
11
|
+
expect(parsed.format).to eq(:dpx)
|
12
12
|
|
13
13
|
# If we have an error in the struct offsets these values are likely to become
|
14
14
|
# the maximum value of a 4-byte uint, which is way higher
|
@@ -21,7 +21,7 @@ describe FormatParser::DPXParser do
|
|
21
21
|
|
22
22
|
it 'correctly reads pixel dimensions' do
|
23
23
|
fi = File.open(fixtures_dir + '/dpx/026_FROM_HERO_TAPE_5-3-1_MOV.0029.dpx', 'rb')
|
24
|
-
parsed = subject.
|
24
|
+
parsed = subject.call(fi)
|
25
25
|
expect(parsed.width_px).to eq(1920)
|
26
26
|
expect(parsed.height_px).to eq(1080)
|
27
27
|
end
|
@@ -4,19 +4,19 @@ describe FormatParser::FDXParser do
|
|
4
4
|
describe 'is able to parse the sample file' do
|
5
5
|
Dir.glob(fixtures_dir + '/XML/*.fdx').each do |fdx_path|
|
6
6
|
it "is able to parse #{File.basename(fdx_path)}" do
|
7
|
-
parsed = subject.
|
7
|
+
parsed = subject.call(File.open(fdx_path, 'rb'))
|
8
8
|
expect(parsed).not_to be_nil
|
9
|
-
expect(parsed.
|
10
|
-
expect(parsed.
|
9
|
+
expect(parsed.nature).to eq(:document)
|
10
|
+
expect(parsed.format).to eq(:fdx)
|
11
11
|
expect(parsed.document_type).to eq(:script)
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
describe 'does not parse other XML files as FDX' do
|
17
17
|
Dir.glob(fixtures_dir + '/*.svg').each do |svg_path|
|
18
18
|
it 'returns nil when parsing a non-fdx xml file' do
|
19
|
-
parsed = subject.
|
19
|
+
parsed = subject.call(File.open(svg_path, 'rb'))
|
20
20
|
expect(parsed).to eq(nil)
|
21
21
|
end
|
22
22
|
end
|