format_parser 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b0ef6923a01b8fbe52f4491979a3be1224ea7018
4
- data.tar.gz: a19859f36a81154f73d1071857834cd0e7e1ca74
3
+ metadata.gz: 99d5518139da4b6ee5131ec57009f9c3982a3a3d
4
+ data.tar.gz: 386ca02d14fac20a0d9d74bce6a43a65d862b73c
5
5
  SHA512:
6
- metadata.gz: ca6bd5d8324a4dcb41d6f3137a1ab621a016acf8e9fafae5983bb8c325f883ecc72b4046d774e423e5e4ca7f35d048dae20a1e6ca2287d842c111c2714e2d606
7
- data.tar.gz: 99d6517341e8b48635c8540e7f92cb48e772ed02047eb6cd56412b21de2411c06d64d43878ffa1a3ce43bf8e82b7e886eef4f00b221bfdfe42bafc19435f3f35
6
+ metadata.gz: 791c835fbd9a2cbbcb8dce9dc07c34417967880fff6c3e044ae07142c5da5e106be378041461b28673136a2639ae42ae24022438a00d4e53366edd126bc7ac1a
7
+ data.tar.gz: 196a9d2137cee58e07621cd9e6140320863358830c4ca20407c11e5d523dce08790a9e132c97045e44a7d48ab82105499b6a0f9aa21314ccb5682136e87922c7
data/README.md CHANGED
@@ -9,7 +9,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
9
9
 
10
10
  ## Currently supported filetypes:
11
11
 
12
- `TIFF, PSD, PNG, MP3, JPEG, GIF, DPX, AIFF`
12
+ `TIFF, PSD, PNG, MP3, JPEG, GIF, DPX, AIFF, WAV`
13
13
 
14
14
  ...with more on the way!
15
15
 
@@ -56,5 +56,12 @@ Therefore we adapt the following approaches:
56
56
 
57
57
  ## Fixture Sources
58
58
 
59
- - MIT licensed fixture files from the FastImage and Dimensions projects
59
+ Unless specified otherwise in this section the fixture files are MIT licensed and from the FastImage and Dimensions projects.
60
+
61
+ ### AIFF
60
62
  - fixture.aiff was created by one of the project maintainers and is MIT licensed
63
+
64
+ ### WAV
65
+ - c_11k16bitpcm.wav and c_8kmp316.wav are from [Wikipedia WAV](https://en.wikipedia.org/wiki/WAV#Comparison_of_coding_schemes), retrieved January 7, 2018
66
+ - c_39064__alienbomb__atmo-truck.wav is from [freesound](https://freesound.org/people/alienbomb/sounds/39064/) and is CC0 licensed
67
+ - c_M1F1-Alaw-AFsp.wav and d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.1.3'
2
+ VERSION = '0.1.4'
3
3
  end
@@ -0,0 +1,102 @@
1
+ class FormatParser::WAVParser
2
+ include FormatParser::IOUtils
3
+
4
+ def information_from_io(io)
5
+ io.seek(0)
6
+
7
+ # Read the RIFF header. Chunk descriptor should be RIFF, the size should
8
+ # contain the size of the entire file in bytes minus 8 bytes for the
9
+ # two fields not included in this count: chunk_id and size.
10
+ chunk_id, size, riff_type = safe_read(io, 12).unpack('a4la4')
11
+
12
+ # The chunk_id and riff_type should be RIFF and WAVE respectively
13
+ return unless chunk_id == 'RIFF' && riff_type == 'WAVE'
14
+
15
+ # There are no restrictions upon the order of the chunks within a WAVE file,
16
+ # with the exception that the Format chunk must precede the Data chunk.
17
+ # The specification does not require the Format chunk to be the first chunk
18
+ # after the RIFF header.
19
+ # http://soundfile.sapp.org/doc/WaveFormat/
20
+ # For WAVE files containing PCM audio format we parse the 'fmt ' and
21
+ # 'data' chunks while for non PCM audio formats the 'fmt ' and 'fact'
22
+ # chunks. In the latter case the order fo appearence of the chunks is
23
+ # arbitrary.
24
+ fmt_processed = false
25
+ fact_processed = false
26
+ fmt_data = {}
27
+ total_sample_frames = 0
28
+ loop do
29
+ chunk_type, chunk_size = safe_read(io, 8).unpack('a4l')
30
+ case chunk_type
31
+ when 'fmt ' # watch out: the chunk ID of the format chunk ends with a space
32
+ fmt_data = unpack_fmt_chunk(io, chunk_size)
33
+ if fmt_data[:audio_format] != 1 and fact_processed
34
+ return process_non_pcm(fmt_data, total_sample_frames)
35
+ end
36
+ fmt_processed = true
37
+ when 'data'
38
+ return unless fmt_processed # the 'data' chunk cannot preceed the 'fmt ' chunk
39
+ return process_pcm(fmt_data, chunk_size) if fmt_data[:audio_format] == 1
40
+ safe_skip(io, chunk_size)
41
+ when 'fact'
42
+ total_sample_frames = safe_read(io, 4).unpack('l').first
43
+ safe_skip(io, chunk_size - 4)
44
+ if fmt_processed and fmt_data[:audio_format] != 1
45
+ return process_non_pcm(fmt_data, total_sample_frames)
46
+ end
47
+ fact_processed = true
48
+ else # Skip this chunk until a known chunk is encountered
49
+ safe_skip(io, chunk_size)
50
+ end
51
+ end
52
+ end
53
+
54
+ def unpack_fmt_chunk(io, chunk_size)
55
+ # The size of the fmt chunk is at least 16 bytes. If the format tag's value is not
56
+ # 1 compression might be in use for storing the data
57
+ # and the fmt chunk might contain extra fields appended to it.
58
+ # The last 4 fields of the fmt tag are always:
59
+ # * unsigned short channels
60
+ # * unsigned long samples per sec
61
+ # * unsigned long average bytes per sec
62
+ # * unsigned short block align
63
+ # * unsigned short bits per sample
64
+
65
+ fmt_info = safe_read(io, 16).unpack('S_2I2S_2')
66
+ safe_skip(io, chunk_size - 16) # skip the extra fields
67
+
68
+ {
69
+ audio_format: fmt_info[0],
70
+ channels: fmt_info[1],
71
+ sample_rate: fmt_info[2],
72
+ byte_rate: fmt_info[3],
73
+ block_align: fmt_info[4],
74
+ bits_per_sample: fmt_info[5],
75
+ }
76
+ end
77
+
78
+ def process_pcm(fmt_data, data_size)
79
+ return unless fmt_data[:channels] > 0 and fmt_data[:bits_per_sample] > 0
80
+ sample_frames = data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8)
81
+ file_info(fmt_data, sample_frames)
82
+ end
83
+
84
+ def process_non_pcm(fmt_data, total_sample_frames)
85
+ file_info(fmt_data, total_sample_frames)
86
+ end
87
+
88
+ def file_info(fmt_data, sample_frames)
89
+ return unless fmt_data[:sample_rate] > 0
90
+ duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f
91
+ FormatParser::FileInformation.new(
92
+ file_nature: :audio,
93
+ file_type: :wav,
94
+ num_audio_channels: fmt_data[:channels],
95
+ audio_sample_rate_hz: fmt_data[:sample_rate],
96
+ media_duration_frames: sample_frames,
97
+ media_duration_seconds: duration_in_seconds,
98
+ )
99
+ end
100
+
101
+ FormatParser.register_parser_constructor self
102
+ end
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::WAVParser do
4
+ # Fixtures prefixed with c_ are considered canonical
5
+ # while fixtures prefixed with d_ deviate from the standard.
6
+ Dir.glob(fixtures_dir + '/WAV/c_*.*').each do |wav_path|
7
+ it "is able to parse #{File.basename(wav_path)}" do
8
+ parse_result = subject.information_from_io(File.open(wav_path, 'rb'))
9
+
10
+ expect(parse_result.file_nature).to eq(:audio)
11
+ expect(parse_result.file_type).to eq(:wav)
12
+ end
13
+ end
14
+
15
+ it "returns correct info about pcm files" do
16
+ parse_result = subject.information_from_io(File.open(__dir__ + '/../fixtures/WAV/c_8kmp316.wav', 'rb'))
17
+
18
+ expect(parse_result.file_nature).to eq(:audio)
19
+ expect(parse_result.file_type).to eq(:wav)
20
+ expect(parse_result.num_audio_channels).to eq(1)
21
+ expect(parse_result.audio_sample_rate_hz).to eq(8000)
22
+ expect(parse_result.media_duration_frames).to eq(110488)
23
+ expect(parse_result.media_duration_seconds).to be_within(0.01).of(13.81)
24
+ end
25
+
26
+ it "returns correct info about pcm files with more channels" do
27
+ parse_result = subject.information_from_io(File.open(__dir__ + '/../fixtures/WAV/c_39064__alienbomb__atmo-truck.wav', 'rb'))
28
+
29
+ expect(parse_result.file_nature).to eq(:audio)
30
+ expect(parse_result.file_type).to eq(:wav)
31
+ expect(parse_result.num_audio_channels).to eq(2)
32
+ expect(parse_result.audio_sample_rate_hz).to eq(44100)
33
+ expect(parse_result.media_duration_frames).to eq(162832)
34
+ expect(parse_result.media_duration_seconds).to be_within(0.01).of(3.69)
35
+ end
36
+
37
+ it "returns correct info about non pcm files" do
38
+ parse_result = subject.information_from_io(File.open(__dir__ + '/../fixtures/WAV/c_11k16bitpcm.wav', 'rb'))
39
+
40
+ expect(parse_result.file_nature).to eq(:audio)
41
+ expect(parse_result.file_type).to eq(:wav)
42
+ expect(parse_result.num_audio_channels).to eq(1)
43
+ expect(parse_result.audio_sample_rate_hz).to eq(11025)
44
+ expect(parse_result.media_duration_frames).to eq(152267)
45
+ expect(parse_result.media_duration_seconds).to be_within(0.01).of(13.81)
46
+ end
47
+
48
+ it "cannot parse file with audio format different from 1 and no 'fact' chunk" do
49
+ expect {
50
+ subject.information_from_io(File.open(__dir__ + '/../fixtures/WAV/d_6_Channel_ID.wav', 'rb'))
51
+ }.to raise_error(FormatParser::IOUtils::InvalidRead)
52
+ end
53
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -161,6 +161,7 @@ files:
161
161
  - lib/parsers/png_parser.rb
162
162
  - lib/parsers/psd_parser.rb
163
163
  - lib/parsers/tiff_parser.rb
164
+ - lib/parsers/wav_parser.rb
164
165
  - lib/read_limiter.rb
165
166
  - lib/remote_io.rb
166
167
  - spec/aiff_parser_spec.rb
@@ -176,6 +177,7 @@ files:
176
177
  - spec/parsers/png_parser_spec.rb
177
178
  - spec/parsers/psd_parser_spec.rb
178
179
  - spec/parsers/tiff_parser_spec.rb
180
+ - spec/parsers/wav_parser_spec.rb
179
181
  - spec/read_limiter_spec.rb
180
182
  - spec/remote_fetching_spec.rb
181
183
  - spec/remote_io_spec.rb
@@ -201,7 +203,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
201
203
  version: '0'
202
204
  requirements: []
203
205
  rubyforge_project:
204
- rubygems_version: 2.5.2
206
+ rubygems_version: 2.5.1
205
207
  signing_key:
206
208
  specification_version: 4
207
209
  summary: A library for efficient parsing of file metadata