format_parser 2.8.0 → 2.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd3557085da1447c801981d805786c74a9f319ac5715a1da54e70509fc7c1e99
4
- data.tar.gz: b21520eac5fd8f7e776b8cb28c06b95bb372b293ce4426482470935ce67dcb55
3
+ metadata.gz: 04b14799c012037cf53b949b0920e0f8e2d5a42e30182363fee0d283d41eade4
4
+ data.tar.gz: 78ff4a354e05fb0e67a7b8dd8368768fceb313afc4dd736d5fa41592dfbc7b35
5
5
  SHA512:
6
- metadata.gz: 1bc06e885bba567bfef0f1d97fe83f6785044e6476cac6b4502349ce40208d0c0f9a5f1c1042c90492f8ca093085de1c84e46345c948fb820cb5d507db52ba9b
7
- data.tar.gz: 61569171e518c30b984155765f823d2ee77633061b70abd0ad8a2ce0122a95f4c2e042be291c7ac48ec9e57a1ef2630bdf4ed5d5216876bea29d9330b84a430a
6
+ metadata.gz: e5792c23191b2d4b4d63a5d9dfbfe5a3b28862f9e8ba977c49e39f3ebaf6bd222a31c296f224743a00fc877f8dbbfb6f97cdd19a0b6d712db6e3a00d29dd30d8
7
+ data.tar.gz: 6fd2ac968375090928b407afa3234c623c933150a36a00da965e75bb97d01ee2dcb502124e71fa100f0f2321b16a0f268c8a8fa94691cd195fbeb65a1fbab177
data/CHANGELOG.md CHANGED
@@ -1,5 +1,8 @@
1
+ ## 2.9.0
2
+ * Improve WAV parser by performing a best-effort when extracting metadata from files that do not strictly follow the format spec.
3
+
1
4
  ## 2.8.0
2
- * Add support for Ruby 3.2 and 3.3
5
+ * Add support for Ruby 3.2 and 3.3.
3
6
 
4
7
  ## 2.7.2
5
8
  * Improved stability for mp4 parser when dealing with corrupted FTYP boxes.
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '2.8.0'
2
+ VERSION = '2.9.0'
3
3
  end
@@ -20,75 +20,60 @@ class FormatParser::WAVParser
20
20
  # with the exception that the Format chunk must precede the Data chunk.
21
21
  # The specification does not require the Format chunk to be the first chunk
22
22
  # after the RIFF header.
23
- # http://soundfile.sapp.org/doc/WaveFormat/
24
- # For WAVE files containing PCM audio format we parse the 'fmt ' and
25
- # 'data' chunks while for non PCM audio formats the 'fmt ' and 'fact'
26
- # chunks. In the latter case the order fo appearence of the chunks is
27
- # arbitrary.
28
- fmt_processed = false
29
- fact_processed = false
23
+ # https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
30
24
  fmt_data = {}
31
- total_sample_frames = 0
25
+ data_size = 0
26
+ total_sample_frames = nil
32
27
  loop do
33
28
  chunk_type, chunk_size = safe_read(io, 8).unpack('a4l')
34
29
  case chunk_type
35
30
  when 'fmt ' # watch out: the chunk ID of the format chunk ends with a space
36
31
  fmt_data = unpack_fmt_chunk(io, chunk_size)
37
- return process_non_pcm(fmt_data, total_sample_frames) if fmt_data[:audio_format] != 1 and fact_processed
38
- fmt_processed = true
39
32
  when 'data'
40
- return unless fmt_processed # the 'data' chunk cannot preceed the 'fmt ' chunk
41
- return process_pcm(fmt_data, chunk_size) if fmt_data[:audio_format] == 1
42
- safe_skip(io, chunk_size)
33
+ data_size = chunk_size
43
34
  when 'fact'
44
35
  total_sample_frames = safe_read(io, 4).unpack('l').first
45
36
  safe_skip(io, chunk_size - 4)
46
- return process_non_pcm(fmt_data, total_sample_frames) if fmt_processed and fmt_data[:audio_format] != 1
47
- fact_processed = true
48
37
  else
49
38
  # Skip this chunk until a known chunk is encountered
50
39
  safe_skip(io, chunk_size)
51
40
  end
41
+ rescue FormatParser::IOUtils::InvalidRead
42
+ # We've reached EOF, so it's time to make the most out of the metadata we
43
+ # managed to parse
44
+ break
52
45
  end
46
+
47
+ file_info(fmt_data, data_size, total_sample_frames)
53
48
  end
54
49
 
55
50
  def unpack_fmt_chunk(io, chunk_size)
56
51
  # The size of the fmt chunk is at least 16 bytes. If the format tag's value is not
57
52
  # 1 compression might be in use for storing the data
58
53
  # and the fmt chunk might contain extra fields appended to it.
59
- # The last 4 fields of the fmt tag are always:
54
+ # The first 6 fields of the fmt tag are always:
55
+ # * unsigned short audio format
60
56
  # * unsigned short channels
61
57
  # * unsigned long samples per sec
62
58
  # * unsigned long average bytes per sec
63
59
  # * unsigned short block align
64
60
  # * unsigned short bits per sample
65
61
 
66
- fmt_info = safe_read(io, 16).unpack('S_2I2S_2')
62
+ _, channels, sample_rate, byte_rate, _, bits_per_sample = safe_read(io, 16).unpack('S_2I2S_2')
67
63
  safe_skip(io, chunk_size - 16) # skip the extra fields
68
64
 
69
65
  {
70
- audio_format: fmt_info[0],
71
- channels: fmt_info[1],
72
- sample_rate: fmt_info[2],
73
- byte_rate: fmt_info[3],
74
- block_align: fmt_info[4],
75
- bits_per_sample: fmt_info[5],
66
+ channels: channels,
67
+ sample_rate: sample_rate,
68
+ byte_rate: byte_rate,
69
+ bits_per_sample: bits_per_sample,
76
70
  }
77
71
  end
78
72
 
79
- def process_pcm(fmt_data, data_size)
80
- return unless fmt_data[:channels] > 0 and fmt_data[:bits_per_sample] > 0
81
- sample_frames = data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8)
82
- file_info(fmt_data, sample_frames)
83
- end
84
-
85
- def process_non_pcm(fmt_data, total_sample_frames)
86
- file_info(fmt_data, total_sample_frames)
87
- end
88
-
89
- def file_info(fmt_data, sample_frames)
90
- return unless fmt_data[:sample_rate] > 0
91
- duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f
73
+ def file_info(fmt_data, data_size, sample_frames)
74
+ # NOTE: Each sample includes information for each channel
75
+ sample_frames ||= data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8) if fmt_data[:channels] > 0 && fmt_data[:bits_per_sample] > 0
76
+ duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f if fmt_data[:sample_rate] > 0
92
77
  FormatParser::Audio.new(
93
78
  format: :wav,
94
79
  num_audio_channels: fmt_data[:channels],
@@ -46,9 +46,14 @@ describe FormatParser::WAVParser do
46
46
  expect(parse_result.media_duration_seconds).to be_within(0.01).of(13.81)
47
47
  end
48
48
 
49
- it "cannot parse file with audio format different from 1 and no 'fact' chunk" do
50
- expect {
51
- subject.call(File.open(__dir__ + '/../fixtures/WAV/invalid_d_6_Channel_ID.wav', 'rb'))
52
- }.to raise_error(FormatParser::IOUtils::InvalidRead)
49
+ it 'returns correct info about non pcm files with no fact chunk' do
50
+ parse_result = subject.call(File.open(__dir__ + '/../fixtures/WAV/d_6_Channel_ID.wav', 'rb'))
51
+
52
+ expect(parse_result.nature).to eq(:audio)
53
+ expect(parse_result.format).to eq(:wav)
54
+ expect(parse_result.num_audio_channels).to eq(6)
55
+ expect(parse_result.audio_sample_rate_hz).to eq(44100)
56
+ expect(parse_result.media_duration_frames).to eq(257411)
57
+ expect(parse_result.media_duration_seconds).to be_within(0.01).of(5.83)
53
58
  end
54
59
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.8.0
4
+ version: 2.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2024-07-02 00:00:00.000000000 Z
12
+ date: 2024-07-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: exifr