format_parser 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,12 +1,15 @@
1
1
  class FormatParser::GIFParser
2
+ include FormatParser::IOUtils
3
+ include FormatParser::DSL
4
+
2
5
  HEADERS = ['GIF87a', 'GIF89a'].map(&:b)
3
6
  NETSCAPE_AND_AUTHENTICATION_CODE = 'NETSCAPE2.0'
4
7
 
5
- include FormatParser::IOUtils
8
+ natures :image
9
+ formats :gif
6
10
 
7
- def information_from_io(io)
11
+ def call(io)
8
12
  io = FormatParser::IOConstraint.new(io)
9
-
10
13
  header = safe_read(io, 6)
11
14
  return unless HEADERS.include?(header)
12
15
 
@@ -38,8 +41,8 @@ class FormatParser::GIFParser
38
41
  potentially_netscape_app_header = safe_read(io, 64)
39
42
  is_animated = potentially_netscape_app_header.include?(NETSCAPE_AND_AUTHENTICATION_CODE)
40
43
 
41
- FormatParser::FileInformation.image(
42
- file_type: :gif,
44
+ FormatParser::Image.new(
45
+ format: :gif,
43
46
  width_px: w,
44
47
  height_px: h,
45
48
  has_multiple_frames: is_animated,
@@ -1,5 +1,6 @@
1
1
  class FormatParser::JPEGParser
2
2
  include FormatParser::IOUtils
3
+ include FormatParser::DSL
3
4
 
4
5
  class InvalidStructure < StandardError
5
6
  end
@@ -10,7 +11,10 @@ class FormatParser::JPEGParser
10
11
  SOS_MARKER = 0xDA # start of stream
11
12
  APP1_MARKER = 0xE1 # maybe EXIF
12
13
 
13
- def information_from_io(io)
14
+ natures :image
15
+ formats :jpg
16
+
17
+ def call(io)
14
18
  @buf = FormatParser::IOConstraint.new(io)
15
19
  @width = nil
16
20
  @height = nil
@@ -50,21 +54,13 @@ class FormatParser::JPEGParser
50
54
  end
51
55
 
52
56
  # Return at the earliest possible opportunity
53
- if @width && @height && @orientation
54
- file_info = FormatParser::FileInformation.image(
55
- file_type: :jpg,
57
+ if @width && @height
58
+ return FormatParser::Image.new(
59
+ format: :jpg,
56
60
  width_px: @width,
57
61
  height_px: @height,
58
62
  orientation: @orientation
59
63
  )
60
- return file_info
61
- elsif @width && @height
62
- file_info = FormatParser::FileInformation.image(
63
- file_type: :jpg,
64
- width_px: @width,
65
- height_px: @height
66
- )
67
- return file_info
68
64
  end
69
65
  end
70
66
  nil # We could not parse anything
@@ -1,6 +1,6 @@
1
1
  class FormatParser::MOOVParser
2
2
  include FormatParser::IOUtils
3
-
3
+ include FormatParser::DSL
4
4
  require_relative 'moov_parser/decoder'
5
5
 
6
6
  # Maps values of the "ftyp" atom to something
@@ -12,11 +12,14 @@ class FormatParser::MOOVParser
12
12
  "m4a " => :m4a,
13
13
  }
14
14
 
15
+ natures :video
16
+ formats *FTYP_MAP.values
17
+
15
18
  # It is currently not documented and not particularly well-tested,
16
19
  # so not considered a public API for now
17
- private_constant :Decoder
20
+ private_constant :Decoder
18
21
 
19
- def information_from_io(io)
22
+ def call(io)
20
23
  return nil unless matches_moov_definition?(io)
21
24
 
22
25
  # Now we know we are in a MOOV, so go back and parse out the atom structure.
@@ -51,9 +54,8 @@ class FormatParser::MOOVParser
51
54
  media_duration_s = duration / timescale.to_f
52
55
  end
53
56
 
54
- FormatParser::FileInformation.new(
55
- file_nature: :video,
56
- file_type: file_type_from_moov_type(file_type),
57
+ FormatParser::Video.new(
58
+ format: format_from_moov_type(file_type),
57
59
  width_px: width,
58
60
  height_px: height,
59
61
  media_duration_seconds: media_duration_s,
@@ -63,7 +65,7 @@ class FormatParser::MOOVParser
63
65
 
64
66
  private
65
67
 
66
- def file_type_from_moov_type(file_type)
68
+ def format_from_moov_type(file_type)
67
69
  FTYP_MAP.fetch(file_type, :mov)
68
70
  end
69
71
 
@@ -23,7 +23,11 @@ class FormatParser::MP3Parser
23
23
  # Default frame size for mp3
24
24
  SAMPLES_PER_FRAME = 1152
25
25
 
26
- def information_from_io(io)
26
+ include FormatParser::DSL
27
+ natures :audio
28
+ formats :mp3
29
+
30
+ def call(io)
27
31
  # Read the last 128 bytes which might contain ID3v1
28
32
  id3_v1 = ID3V1.attempt_id3_v1_extraction(io)
29
33
  # Read the header bytes that might contain ID3v1
@@ -42,9 +46,8 @@ class FormatParser::MP3Parser
42
46
 
43
47
  first_frame = initial_frames.first
44
48
 
45
- file_info = FormatParser::FileInformation.new(
46
- file_nature: :audio,
47
- file_type: :mp3,
49
+ file_info = FormatParser::Audio.new(
50
+ format: :mp3,
48
51
  num_audio_channels: first_frame.channels,
49
52
  audio_sample_rate_hz: first_frame.sample_rate,
50
53
  # media_duration_frames is omitted because the frames
@@ -1,4 +1,10 @@
1
1
  class FormatParser::PNGParser
2
+ include FormatParser::IOUtils
3
+ include FormatParser::DSL
4
+
5
+ natures :image
6
+ formats :png
7
+
2
8
  PNG_HEADER_BYTES = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
3
9
  COLOR_TYPES = {
4
10
  0 => :grayscale,
@@ -13,15 +19,13 @@ class FormatParser::PNGParser
13
19
  6 => true,
14
20
  }
15
21
 
16
- include FormatParser::IOUtils
17
22
 
18
23
  def chunk_length_and_type(io)
19
24
  safe_read(io, 8).unpack("Na4")
20
25
  end
21
26
 
22
- def information_from_io(io)
27
+ def call(io)
23
28
  io = FormatParser::IOConstraint.new(io)
24
-
25
29
  magic_bytes = safe_read(io, PNG_HEADER_BYTES.bytesize)
26
30
  return unless magic_bytes == PNG_HEADER_BYTES
27
31
 
@@ -66,8 +70,8 @@ class FormatParser::PNGParser
66
70
  num_frames, loop_n_times = safe_read(io, 8).unpack('NN')
67
71
  end
68
72
 
69
- FormatParser::FileInformation.image(
70
- file_type: :png,
73
+ FormatParser::Image.new(
74
+ format: :png,
71
75
  width_px: w,
72
76
  height_px: h,
73
77
  has_transparency: has_transparency,
@@ -1,10 +1,13 @@
1
1
  class FormatParser::PSDParser
2
- PSD_HEADER = [0x38, 0x42, 0x50, 0x53]
3
2
  include FormatParser::IOUtils
3
+ include FormatParser::DSL
4
4
 
5
- def information_from_io(io)
6
- io = FormatParser::IOConstraint.new(io)
5
+ PSD_HEADER = [0x38, 0x42, 0x50, 0x53]
6
+ natures :image
7
+ formats :psd
7
8
 
9
+ def call(io)
10
+ io = FormatParser::IOConstraint.new(io)
8
11
  magic_bytes = safe_read(io, 4).unpack("C4")
9
12
 
10
13
  return unless magic_bytes == PSD_HEADER
@@ -12,8 +15,8 @@ class FormatParser::PSDParser
12
15
  # We can be reasonably certain this is a PSD so we grab the height
13
16
  # and width bytes
14
17
  w,h = safe_read(io, 22).unpack("x10N2")
15
- FormatParser::FileInformation.image(
16
- file_type: :psd,
18
+ FormatParser::Image.new(
19
+ format: :psd,
17
20
  width_px: w,
18
21
  height_px: h,
19
22
  )
@@ -1,34 +1,30 @@
1
1
  class FormatParser::TIFFParser
2
+ include FormatParser::IOUtils
3
+ include FormatParser::DSL
4
+
2
5
  LITTLE_ENDIAN_TIFF_HEADER_BYTES = [0x49, 0x49, 0x2A, 0x0]
3
6
  BIG_ENDIAN_TIFF_HEADER_BYTES = [0x4D, 0x4D, 0x0, 0x2A]
4
7
  WIDTH_TAG = 0x100
5
8
  HEIGHT_TAG = 0x101
6
9
 
7
- include FormatParser::IOUtils
10
+ natures :image
11
+ formats :tif
8
12
 
9
- def information_from_io(io)
13
+ def call(io)
10
14
  io = FormatParser::IOConstraint.new(io)
11
-
12
15
  magic_bytes = safe_read(io, 4).unpack("C4")
13
16
  endianness = scan_tiff_endianness(magic_bytes)
14
17
  return unless endianness
15
18
  w, h = read_tiff_by_endianness(io, endianness)
16
19
  scanner = FormatParser::EXIFParser.new(:tiff, io)
17
20
  scanner.scan_image_exif
18
- if scanner.orientation
19
- FormatParser::FileInformation.image(
20
- file_type: :tif,
21
+ return FormatParser::Image.new(
22
+ format: :tif,
21
23
  width_px: w,
22
24
  height_px: h,
25
+ # might be nil if EXIF metadata wasn't found
23
26
  orientation: scanner.orientation
24
27
  )
25
- else
26
- FormatParser::FileInformation.image(
27
- file_type: :tif,
28
- width_px: w,
29
- height_px: h
30
- )
31
- end
32
28
  end
33
29
 
34
30
  # TIFFs can be either big or little endian, so we check here
@@ -1,9 +1,11 @@
1
1
  class FormatParser::WAVParser
2
2
  include FormatParser::IOUtils
3
+ include FormatParser::DSL
3
4
 
4
- def information_from_io(io)
5
- io.seek(0)
5
+ natures :audio
6
+ formats :wav
6
7
 
8
+ def call(io)
7
9
  # Read the RIFF header. Chunk descriptor should be RIFF, the size should
8
10
  # contain the size of the entire file in bytes minus 8 bytes for the
9
11
  # two fields not included in this count: chunk_id and size.
@@ -88,9 +90,8 @@ class FormatParser::WAVParser
88
90
  def file_info(fmt_data, sample_frames)
89
91
  return unless fmt_data[:sample_rate] > 0
90
92
  duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f
91
- FormatParser::FileInformation.new(
92
- file_nature: :audio,
93
- file_type: :wav,
93
+ FormatParser::Audio.new(
94
+ format: :wav,
94
95
  num_audio_channels: fmt_data[:channels],
95
96
  audio_sample_rate_hz: fmt_data[:sample_rate],
96
97
  media_duration_frames: sample_frames,
data/lib/video.rb ADDED
@@ -0,0 +1,33 @@
1
+ module FormatParser
2
+ class Video
3
+ NATURE = :video
4
+
5
+ attr_accessor :width_px
6
+
7
+ attr_accessor :height_px
8
+
9
+ # Type of the file (e.g :mp3)
10
+ attr_accessor :format
11
+
12
+ # Duration of the media object (be it audio or video) in seconds,
13
+ # as a Float
14
+ attr_accessor :media_duration_seconds
15
+
16
+ # Duration of the media object in addressable frames or samples,
17
+ # as an Integer
18
+ attr_accessor :media_duration_frames
19
+
20
+ # If a parser wants to provide any extra information to the caller
21
+ # it can be placed here
22
+ attr_accessor :intrinsics
23
+
24
+ # Only permits assignments via defined accessors
25
+ def initialize(**attributes)
26
+ attributes.map { |(k, v)| public_send("#{k}=", v) }
27
+ end
28
+
29
+ def nature
30
+ NATURE
31
+ end
32
+ end
33
+ end
@@ -2,10 +2,10 @@ require 'spec_helper'
2
2
 
3
3
  describe FormatParser::AIFFParser do
4
4
  it 'parses an AIFF sample file' do
5
- parse_result = subject.information_from_io(File.open(__dir__ + '/fixtures/AIFF/fixture.aiff', 'rb'))
5
+ parse_result = subject.call(File.open(__dir__ + '/fixtures/AIFF/fixture.aiff', 'rb'))
6
6
 
7
- expect(parse_result.file_nature).to eq(:audio)
8
- expect(parse_result.file_type).to eq(:aiff)
7
+ expect(parse_result.nature).to eq(:audio)
8
+ expect(parse_result.format).to eq(:aiff)
9
9
  expect(parse_result.media_duration_frames).to eq(46433)
10
10
  expect(parse_result.num_audio_channels).to eq(2)
11
11
  expect(parse_result.audio_sample_rate_hz).to be_within(0.01).of(44100)
@@ -13,10 +13,10 @@ describe FormatParser::AIFFParser do
13
13
  end
14
14
 
15
15
  it 'parses a Logic Pro created AIFF sample file having a COMT chunk before a COMM chunk' do
16
- parse_result = subject.information_from_io(File.open(__dir__ + '/fixtures/AIFF/fixture-logic-aiff.aif', 'rb'))
16
+ parse_result = subject.call(File.open(__dir__ + '/fixtures/AIFF/fixture-logic-aiff.aif', 'rb'))
17
17
 
18
- expect(parse_result.file_nature).to eq(:audio)
19
- expect(parse_result.file_type).to eq(:aiff)
18
+ expect(parse_result.nature).to eq(:audio)
19
+ expect(parse_result.format).to eq(:aiff)
20
20
  expect(parse_result.media_duration_frames).to eq(302400)
21
21
  expect(parse_result.num_audio_channels).to eq(2)
22
22
  expect(parse_result.audio_sample_rate_hz).to be_within(0.01).of(44100)
@@ -1,12 +1,12 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe FormatParser::FileInformation do
3
+ describe FormatParser::Image do
4
4
 
5
5
  context "File data checks" do
6
6
  it 'succeeds with relevant attributes' do
7
- result = described_class.new(file_nature: :image, file_type: :jpg, width_px: 42, height_px: 10, image_orientation: 1)
8
- expect(result.file_nature).to eq(:image)
9
- expect(result.file_type).to eq(:jpg)
7
+ result = described_class.new(format: :jpg, width_px: 42, height_px: 10, image_orientation: 1)
8
+ expect(result.nature).to eq(:image)
9
+ expect(result.format).to eq(:jpg)
10
10
  expect(result.width_px).to eq(42)
11
11
  expect(result.height_px).to eq(10)
12
12
  expect(result.image_orientation).to eq(1)
@@ -3,12 +3,12 @@ require 'spec_helper'
3
3
  describe FormatParser do
4
4
  it 'returns nil when trying to parse an empty IO' do
5
5
  d = StringIO.new('')
6
- expect(FormatParser.parse(d)).to be_nil
6
+ expect(FormatParser.parse(d)).to be_empty
7
7
  end
8
8
 
9
9
  it 'returns nil when parsing an IO no parser can make sense of' do
10
10
  d = StringIO.new(Random.new.bytes(1))
11
- expect(FormatParser.parse(d)).to be_nil
11
+ expect(FormatParser.parse(d)).to be_empty
12
12
  end
13
13
 
14
14
  describe 'with fuzzing' do
@@ -21,6 +21,34 @@ describe FormatParser do
21
21
  end
22
22
  end
23
23
 
24
+ describe 'multiple values return' do
25
+ let(:blob) { StringIO.new(Random.new.bytes(512 * 1024)) }
26
+ let(:audio) { FormatParser::Audio.new(format: :aiff, num_audio_channels: 1) }
27
+ let(:image) { FormatParser::Image.new(format: :dpx, width_px: 1, height_px: 1) }
28
+
29
+ context '#parse called without any option' do
30
+ before do
31
+ expect_any_instance_of(FormatParser::AIFFParser).to receive(:call).and_return(audio)
32
+ expect_any_instance_of(FormatParser::DPXParser).to receive(:call).and_return(image)
33
+ end
34
+
35
+ subject { FormatParser.parse(blob) }
36
+
37
+ it { is_expected.to include(image) }
38
+ it { is_expected.to include(audio) }
39
+ end
40
+
41
+ context '#parse called with hash options' do
42
+ before do
43
+ expect_any_instance_of(FormatParser::DPXParser).to receive(:call).and_return(image)
44
+ end
45
+
46
+ subject { FormatParser.parse(blob, formats: [:dpx], returns: :one) }
47
+
48
+ it { is_expected.to eq(image) }
49
+ end
50
+ end
51
+
24
52
  describe 'when parsing fixtures' do
25
53
  Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
26
54
  it "parses #{fixture_path} without raising any errors" do
@@ -4,11 +4,11 @@ describe FormatParser::DPXParser do
4
4
  describe 'with Depix example files' do
5
5
  Dir.glob(fixtures_dir + '/dpx/*.*').each do |dpx_path|
6
6
  it "is able to parse #{File.basename(dpx_path)}" do
7
- parsed = subject.information_from_io(File.open(dpx_path, 'rb'))
7
+ parsed = subject.call(File.open(dpx_path, 'rb'))
8
8
 
9
9
  expect(parsed).not_to be_nil
10
- expect(parsed.file_nature).to eq(:image)
11
- expect(parsed.file_type).to eq(:dpx)
10
+ expect(parsed.nature).to eq(:image)
11
+ expect(parsed.format).to eq(:dpx)
12
12
 
13
13
  # If we have an error in the struct offsets these values are likely to become
14
14
  # the maximum value of a 4-byte uint, which is way higher
@@ -21,7 +21,7 @@ describe FormatParser::DPXParser do
21
21
 
22
22
  it 'correctly reads pixel dimensions' do
23
23
  fi = File.open(fixtures_dir + '/dpx/026_FROM_HERO_TAPE_5-3-1_MOV.0029.dpx', 'rb')
24
- parsed = subject.information_from_io(fi)
24
+ parsed = subject.call(fi)
25
25
  expect(parsed.width_px).to eq(1920)
26
26
  expect(parsed.height_px).to eq(1080)
27
27
  end
@@ -4,19 +4,19 @@ describe FormatParser::FDXParser do
4
4
  describe 'is able to parse the sample file' do
5
5
  Dir.glob(fixtures_dir + '/XML/*.fdx').each do |fdx_path|
6
6
  it "is able to parse #{File.basename(fdx_path)}" do
7
- parsed = subject.information_from_io(File.open(fdx_path, 'rb'))
7
+ parsed = subject.call(File.open(fdx_path, 'rb'))
8
8
  expect(parsed).not_to be_nil
9
- expect(parsed.file_nature).to eq(:document)
10
- expect(parsed.file_type).to eq(:fdx)
9
+ expect(parsed.nature).to eq(:document)
10
+ expect(parsed.format).to eq(:fdx)
11
11
  expect(parsed.document_type).to eq(:script)
12
12
  end
13
13
  end
14
14
  end
15
-
15
+
16
16
  describe 'does not parse other XML files as FDX' do
17
17
  Dir.glob(fixtures_dir + '/*.svg').each do |svg_path|
18
18
  it 'returns nil when parsing a non-fdx xml file' do
19
- parsed = subject.information_from_io(File.open(svg_path, 'rb'))
19
+ parsed = subject.call(File.open(svg_path, 'rb'))
20
20
  expect(parsed).to eq(nil)
21
21
  end
22
22
  end