format_parser 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,15 @@
1
1
  class FormatParser::GIFParser
2
+ include FormatParser::IOUtils
3
+ include FormatParser::DSL
4
+
2
5
  HEADERS = ['GIF87a', 'GIF89a'].map(&:b)
3
6
  NETSCAPE_AND_AUTHENTICATION_CODE = 'NETSCAPE2.0'
4
7
 
5
- include FormatParser::IOUtils
8
+ natures :image
9
+ formats :gif
6
10
 
7
- def information_from_io(io)
11
+ def call(io)
8
12
  io = FormatParser::IOConstraint.new(io)
9
-
10
13
  header = safe_read(io, 6)
11
14
  return unless HEADERS.include?(header)
12
15
 
@@ -38,8 +41,8 @@ class FormatParser::GIFParser
38
41
  potentially_netscape_app_header = safe_read(io, 64)
39
42
  is_animated = potentially_netscape_app_header.include?(NETSCAPE_AND_AUTHENTICATION_CODE)
40
43
 
41
- FormatParser::FileInformation.image(
42
- file_type: :gif,
44
+ FormatParser::Image.new(
45
+ format: :gif,
43
46
  width_px: w,
44
47
  height_px: h,
45
48
  has_multiple_frames: is_animated,
@@ -1,5 +1,6 @@
1
1
  class FormatParser::JPEGParser
2
2
  include FormatParser::IOUtils
3
+ include FormatParser::DSL
3
4
 
4
5
  class InvalidStructure < StandardError
5
6
  end
@@ -10,7 +11,10 @@ class FormatParser::JPEGParser
10
11
  SOS_MARKER = 0xDA # start of stream
11
12
  APP1_MARKER = 0xE1 # maybe EXIF
12
13
 
13
- def information_from_io(io)
14
+ natures :image
15
+ formats :jpg
16
+
17
+ def call(io)
14
18
  @buf = FormatParser::IOConstraint.new(io)
15
19
  @width = nil
16
20
  @height = nil
@@ -50,21 +54,13 @@ class FormatParser::JPEGParser
50
54
  end
51
55
 
52
56
  # Return at the earliest possible opportunity
53
- if @width && @height && @orientation
54
- file_info = FormatParser::FileInformation.image(
55
- file_type: :jpg,
57
+ if @width && @height
58
+ return FormatParser::Image.new(
59
+ format: :jpg,
56
60
  width_px: @width,
57
61
  height_px: @height,
58
62
  orientation: @orientation
59
63
  )
60
- return file_info
61
- elsif @width && @height
62
- file_info = FormatParser::FileInformation.image(
63
- file_type: :jpg,
64
- width_px: @width,
65
- height_px: @height
66
- )
67
- return file_info
68
64
  end
69
65
  end
70
66
  nil # We could not parse anything
@@ -1,6 +1,6 @@
1
1
  class FormatParser::MOOVParser
2
2
  include FormatParser::IOUtils
3
-
3
+ include FormatParser::DSL
4
4
  require_relative 'moov_parser/decoder'
5
5
 
6
6
  # Maps values of the "ftyp" atom to something
@@ -12,11 +12,14 @@ class FormatParser::MOOVParser
12
12
  "m4a " => :m4a,
13
13
  }
14
14
 
15
+ natures :video
16
+ formats *FTYP_MAP.values
17
+
15
18
  # It is currently not documented and not particularly well-tested,
16
19
  # so not considered a public API for now
17
- private_constant :Decoder
20
+ private_constant :Decoder
18
21
 
19
- def information_from_io(io)
22
+ def call(io)
20
23
  return nil unless matches_moov_definition?(io)
21
24
 
22
25
  # Now we know we are in a MOOV, so go back and parse out the atom structure.
@@ -51,9 +54,8 @@ class FormatParser::MOOVParser
51
54
  media_duration_s = duration / timescale.to_f
52
55
  end
53
56
 
54
- FormatParser::FileInformation.new(
55
- file_nature: :video,
56
- file_type: file_type_from_moov_type(file_type),
57
+ FormatParser::Video.new(
58
+ format: format_from_moov_type(file_type),
57
59
  width_px: width,
58
60
  height_px: height,
59
61
  media_duration_seconds: media_duration_s,
@@ -63,7 +65,7 @@ class FormatParser::MOOVParser
63
65
 
64
66
  private
65
67
 
66
- def file_type_from_moov_type(file_type)
68
+ def format_from_moov_type(file_type)
67
69
  FTYP_MAP.fetch(file_type, :mov)
68
70
  end
69
71
 
@@ -23,7 +23,11 @@ class FormatParser::MP3Parser
23
23
  # Default frame size for mp3
24
24
  SAMPLES_PER_FRAME = 1152
25
25
 
26
- def information_from_io(io)
26
+ include FormatParser::DSL
27
+ natures :audio
28
+ formats :mp3
29
+
30
+ def call(io)
27
31
  # Read the last 128 bytes which might contain ID3v1
28
32
  id3_v1 = ID3V1.attempt_id3_v1_extraction(io)
29
33
  # Read the header bytes that might contain ID3v1
@@ -42,9 +46,8 @@ class FormatParser::MP3Parser
42
46
 
43
47
  first_frame = initial_frames.first
44
48
 
45
- file_info = FormatParser::FileInformation.new(
46
- file_nature: :audio,
47
- file_type: :mp3,
49
+ file_info = FormatParser::Audio.new(
50
+ format: :mp3,
48
51
  num_audio_channels: first_frame.channels,
49
52
  audio_sample_rate_hz: first_frame.sample_rate,
50
53
  # media_duration_frames is omitted because the frames
@@ -1,4 +1,10 @@
1
1
  class FormatParser::PNGParser
2
+ include FormatParser::IOUtils
3
+ include FormatParser::DSL
4
+
5
+ natures :image
6
+ formats :png
7
+
2
8
  PNG_HEADER_BYTES = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
3
9
  COLOR_TYPES = {
4
10
  0 => :grayscale,
@@ -13,15 +19,13 @@ class FormatParser::PNGParser
13
19
  6 => true,
14
20
  }
15
21
 
16
- include FormatParser::IOUtils
17
22
 
18
23
  def chunk_length_and_type(io)
19
24
  safe_read(io, 8).unpack("Na4")
20
25
  end
21
26
 
22
- def information_from_io(io)
27
+ def call(io)
23
28
  io = FormatParser::IOConstraint.new(io)
24
-
25
29
  magic_bytes = safe_read(io, PNG_HEADER_BYTES.bytesize)
26
30
  return unless magic_bytes == PNG_HEADER_BYTES
27
31
 
@@ -66,8 +70,8 @@ class FormatParser::PNGParser
66
70
  num_frames, loop_n_times = safe_read(io, 8).unpack('NN')
67
71
  end
68
72
 
69
- FormatParser::FileInformation.image(
70
- file_type: :png,
73
+ FormatParser::Image.new(
74
+ format: :png,
71
75
  width_px: w,
72
76
  height_px: h,
73
77
  has_transparency: has_transparency,
@@ -1,10 +1,13 @@
1
1
  class FormatParser::PSDParser
2
- PSD_HEADER = [0x38, 0x42, 0x50, 0x53]
3
2
  include FormatParser::IOUtils
3
+ include FormatParser::DSL
4
4
 
5
- def information_from_io(io)
6
- io = FormatParser::IOConstraint.new(io)
5
+ PSD_HEADER = [0x38, 0x42, 0x50, 0x53]
6
+ natures :image
7
+ formats :psd
7
8
 
9
+ def call(io)
10
+ io = FormatParser::IOConstraint.new(io)
8
11
  magic_bytes = safe_read(io, 4).unpack("C4")
9
12
 
10
13
  return unless magic_bytes == PSD_HEADER
@@ -12,8 +15,8 @@ class FormatParser::PSDParser
12
15
  # We can be reasonably certain this is a PSD so we grab the height
13
16
  # and width bytes
14
17
  w,h = safe_read(io, 22).unpack("x10N2")
15
- FormatParser::FileInformation.image(
16
- file_type: :psd,
18
+ FormatParser::Image.new(
19
+ format: :psd,
17
20
  width_px: w,
18
21
  height_px: h,
19
22
  )
@@ -1,34 +1,30 @@
1
1
  class FormatParser::TIFFParser
2
+ include FormatParser::IOUtils
3
+ include FormatParser::DSL
4
+
2
5
  LITTLE_ENDIAN_TIFF_HEADER_BYTES = [0x49, 0x49, 0x2A, 0x0]
3
6
  BIG_ENDIAN_TIFF_HEADER_BYTES = [0x4D, 0x4D, 0x0, 0x2A]
4
7
  WIDTH_TAG = 0x100
5
8
  HEIGHT_TAG = 0x101
6
9
 
7
- include FormatParser::IOUtils
10
+ natures :image
11
+ formats :tif
8
12
 
9
- def information_from_io(io)
13
+ def call(io)
10
14
  io = FormatParser::IOConstraint.new(io)
11
-
12
15
  magic_bytes = safe_read(io, 4).unpack("C4")
13
16
  endianness = scan_tiff_endianness(magic_bytes)
14
17
  return unless endianness
15
18
  w, h = read_tiff_by_endianness(io, endianness)
16
19
  scanner = FormatParser::EXIFParser.new(:tiff, io)
17
20
  scanner.scan_image_exif
18
- if scanner.orientation
19
- FormatParser::FileInformation.image(
20
- file_type: :tif,
21
+ return FormatParser::Image.new(
22
+ format: :tif,
21
23
  width_px: w,
22
24
  height_px: h,
25
+ # might be nil if EXIF metadata wasn't found
23
26
  orientation: scanner.orientation
24
27
  )
25
- else
26
- FormatParser::FileInformation.image(
27
- file_type: :tif,
28
- width_px: w,
29
- height_px: h
30
- )
31
- end
32
28
  end
33
29
 
34
30
  # TIFFs can be either big or little endian, so we check here
@@ -1,9 +1,11 @@
1
1
  class FormatParser::WAVParser
2
2
  include FormatParser::IOUtils
3
+ include FormatParser::DSL
3
4
 
4
- def information_from_io(io)
5
- io.seek(0)
5
+ natures :audio
6
+ formats :wav
6
7
 
8
+ def call(io)
7
9
  # Read the RIFF header. Chunk descriptor should be RIFF, the size should
8
10
  # contain the size of the entire file in bytes minus 8 bytes for the
9
11
  # two fields not included in this count: chunk_id and size.
@@ -88,9 +90,8 @@ class FormatParser::WAVParser
88
90
  def file_info(fmt_data, sample_frames)
89
91
  return unless fmt_data[:sample_rate] > 0
90
92
  duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f
91
- FormatParser::FileInformation.new(
92
- file_nature: :audio,
93
- file_type: :wav,
93
+ FormatParser::Audio.new(
94
+ format: :wav,
94
95
  num_audio_channels: fmt_data[:channels],
95
96
  audio_sample_rate_hz: fmt_data[:sample_rate],
96
97
  media_duration_frames: sample_frames,
data/lib/video.rb ADDED
@@ -0,0 +1,33 @@
1
+ module FormatParser
2
+ class Video
3
+ NATURE = :video
4
+
5
+ attr_accessor :width_px
6
+
7
+ attr_accessor :height_px
8
+
9
+ # Type of the file (e.g :mp3)
10
+ attr_accessor :format
11
+
12
+ # Duration of the media object (be it audio or video) in seconds,
13
+ # as a Float
14
+ attr_accessor :media_duration_seconds
15
+
16
+ # Duration of the media object in addressable frames or samples,
17
+ # as an Integer
18
+ attr_accessor :media_duration_frames
19
+
20
+ # If a parser wants to provide any extra information to the caller
21
+ # it can be placed here
22
+ attr_accessor :intrinsics
23
+
24
+ # Only permits assignments via defined accessors
25
+ def initialize(**attributes)
26
+ attributes.map { |(k, v)| public_send("#{k}=", v) }
27
+ end
28
+
29
+ def nature
30
+ NATURE
31
+ end
32
+ end
33
+ end
@@ -2,10 +2,10 @@ require 'spec_helper'
2
2
 
3
3
  describe FormatParser::AIFFParser do
4
4
  it 'parses an AIFF sample file' do
5
- parse_result = subject.information_from_io(File.open(__dir__ + '/fixtures/AIFF/fixture.aiff', 'rb'))
5
+ parse_result = subject.call(File.open(__dir__ + '/fixtures/AIFF/fixture.aiff', 'rb'))
6
6
 
7
- expect(parse_result.file_nature).to eq(:audio)
8
- expect(parse_result.file_type).to eq(:aiff)
7
+ expect(parse_result.nature).to eq(:audio)
8
+ expect(parse_result.format).to eq(:aiff)
9
9
  expect(parse_result.media_duration_frames).to eq(46433)
10
10
  expect(parse_result.num_audio_channels).to eq(2)
11
11
  expect(parse_result.audio_sample_rate_hz).to be_within(0.01).of(44100)
@@ -13,10 +13,10 @@ describe FormatParser::AIFFParser do
13
13
  end
14
14
 
15
15
  it 'parses a Logic Pro created AIFF sample file having a COMT chunk before a COMM chunk' do
16
- parse_result = subject.information_from_io(File.open(__dir__ + '/fixtures/AIFF/fixture-logic-aiff.aif', 'rb'))
16
+ parse_result = subject.call(File.open(__dir__ + '/fixtures/AIFF/fixture-logic-aiff.aif', 'rb'))
17
17
 
18
- expect(parse_result.file_nature).to eq(:audio)
19
- expect(parse_result.file_type).to eq(:aiff)
18
+ expect(parse_result.nature).to eq(:audio)
19
+ expect(parse_result.format).to eq(:aiff)
20
20
  expect(parse_result.media_duration_frames).to eq(302400)
21
21
  expect(parse_result.num_audio_channels).to eq(2)
22
22
  expect(parse_result.audio_sample_rate_hz).to be_within(0.01).of(44100)
@@ -1,12 +1,12 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe FormatParser::FileInformation do
3
+ describe FormatParser::Image do
4
4
 
5
5
  context "File data checks" do
6
6
  it 'succeeds with relevant attributes' do
7
- result = described_class.new(file_nature: :image, file_type: :jpg, width_px: 42, height_px: 10, image_orientation: 1)
8
- expect(result.file_nature).to eq(:image)
9
- expect(result.file_type).to eq(:jpg)
7
+ result = described_class.new(format: :jpg, width_px: 42, height_px: 10, image_orientation: 1)
8
+ expect(result.nature).to eq(:image)
9
+ expect(result.format).to eq(:jpg)
10
10
  expect(result.width_px).to eq(42)
11
11
  expect(result.height_px).to eq(10)
12
12
  expect(result.image_orientation).to eq(1)
@@ -3,12 +3,12 @@ require 'spec_helper'
3
3
  describe FormatParser do
4
4
  it 'returns nil when trying to parse an empty IO' do
5
5
  d = StringIO.new('')
6
- expect(FormatParser.parse(d)).to be_nil
6
+ expect(FormatParser.parse(d)).to be_empty
7
7
  end
8
8
 
9
9
  it 'returns nil when parsing an IO no parser can make sense of' do
10
10
  d = StringIO.new(Random.new.bytes(1))
11
- expect(FormatParser.parse(d)).to be_nil
11
+ expect(FormatParser.parse(d)).to be_empty
12
12
  end
13
13
 
14
14
  describe 'with fuzzing' do
@@ -21,6 +21,34 @@ describe FormatParser do
21
21
  end
22
22
  end
23
23
 
24
+ describe 'multiple values return' do
25
+ let(:blob) { StringIO.new(Random.new.bytes(512 * 1024)) }
26
+ let(:audio) { FormatParser::Audio.new(format: :aiff, num_audio_channels: 1) }
27
+ let(:image) { FormatParser::Image.new(format: :dpx, width_px: 1, height_px: 1) }
28
+
29
+ context '#parse called without any option' do
30
+ before do
31
+ expect_any_instance_of(FormatParser::AIFFParser).to receive(:call).and_return(audio)
32
+ expect_any_instance_of(FormatParser::DPXParser).to receive(:call).and_return(image)
33
+ end
34
+
35
+ subject { FormatParser.parse(blob) }
36
+
37
+ it { is_expected.to include(image) }
38
+ it { is_expected.to include(audio) }
39
+ end
40
+
41
+ context '#parse called with hash options' do
42
+ before do
43
+ expect_any_instance_of(FormatParser::DPXParser).to receive(:call).and_return(image)
44
+ end
45
+
46
+ subject { FormatParser.parse(blob, formats: [:dpx], returns: :one) }
47
+
48
+ it { is_expected.to eq(image) }
49
+ end
50
+ end
51
+
24
52
  describe 'when parsing fixtures' do
25
53
  Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
26
54
  it "parses #{fixture_path} without raising any errors" do
@@ -4,11 +4,11 @@ describe FormatParser::DPXParser do
4
4
  describe 'with Depix example files' do
5
5
  Dir.glob(fixtures_dir + '/dpx/*.*').each do |dpx_path|
6
6
  it "is able to parse #{File.basename(dpx_path)}" do
7
- parsed = subject.information_from_io(File.open(dpx_path, 'rb'))
7
+ parsed = subject.call(File.open(dpx_path, 'rb'))
8
8
 
9
9
  expect(parsed).not_to be_nil
10
- expect(parsed.file_nature).to eq(:image)
11
- expect(parsed.file_type).to eq(:dpx)
10
+ expect(parsed.nature).to eq(:image)
11
+ expect(parsed.format).to eq(:dpx)
12
12
 
13
13
  # If we have an error in the struct offsets these values are likely to become
14
14
  # the maximum value of a 4-byte uint, which is way higher
@@ -21,7 +21,7 @@ describe FormatParser::DPXParser do
21
21
 
22
22
  it 'correctly reads pixel dimensions' do
23
23
  fi = File.open(fixtures_dir + '/dpx/026_FROM_HERO_TAPE_5-3-1_MOV.0029.dpx', 'rb')
24
- parsed = subject.information_from_io(fi)
24
+ parsed = subject.call(fi)
25
25
  expect(parsed.width_px).to eq(1920)
26
26
  expect(parsed.height_px).to eq(1080)
27
27
  end
@@ -4,19 +4,19 @@ describe FormatParser::FDXParser do
4
4
  describe 'is able to parse the sample file' do
5
5
  Dir.glob(fixtures_dir + '/XML/*.fdx').each do |fdx_path|
6
6
  it "is able to parse #{File.basename(fdx_path)}" do
7
- parsed = subject.information_from_io(File.open(fdx_path, 'rb'))
7
+ parsed = subject.call(File.open(fdx_path, 'rb'))
8
8
  expect(parsed).not_to be_nil
9
- expect(parsed.file_nature).to eq(:document)
10
- expect(parsed.file_type).to eq(:fdx)
9
+ expect(parsed.nature).to eq(:document)
10
+ expect(parsed.format).to eq(:fdx)
11
11
  expect(parsed.document_type).to eq(:script)
12
12
  end
13
13
  end
14
14
  end
15
-
15
+
16
16
  describe 'does not parse other XML files as FDX' do
17
17
  Dir.glob(fixtures_dir + '/*.svg').each do |svg_path|
18
18
  it 'returns nil when parsing a non-fdx xml file' do
19
- parsed = subject.information_from_io(File.open(svg_path, 'rb'))
19
+ parsed = subject.call(File.open(svg_path, 'rb'))
20
20
  expect(parsed).to eq(nil)
21
21
  end
22
22
  end