format_parser 2.8.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd3557085da1447c801981d805786c74a9f319ac5715a1da54e70509fc7c1e99
4
- data.tar.gz: b21520eac5fd8f7e776b8cb28c06b95bb372b293ce4426482470935ce67dcb55
3
+ metadata.gz: 04b14799c012037cf53b949b0920e0f8e2d5a42e30182363fee0d283d41eade4
4
+ data.tar.gz: 78ff4a354e05fb0e67a7b8dd8368768fceb313afc4dd736d5fa41592dfbc7b35
5
5
  SHA512:
6
- metadata.gz: 1bc06e885bba567bfef0f1d97fe83f6785044e6476cac6b4502349ce40208d0c0f9a5f1c1042c90492f8ca093085de1c84e46345c948fb820cb5d507db52ba9b
7
- data.tar.gz: 61569171e518c30b984155765f823d2ee77633061b70abd0ad8a2ce0122a95f4c2e042be291c7ac48ec9e57a1ef2630bdf4ed5d5216876bea29d9330b84a430a
6
+ metadata.gz: e5792c23191b2d4b4d63a5d9dfbfe5a3b28862f9e8ba977c49e39f3ebaf6bd222a31c296f224743a00fc877f8dbbfb6f97cdd19a0b6d712db6e3a00d29dd30d8
7
+ data.tar.gz: 6fd2ac968375090928b407afa3234c623c933150a36a00da965e75bb97d01ee2dcb502124e71fa100f0f2321b16a0f268c8a8fa94691cd195fbeb65a1fbab177
data/CHANGELOG.md CHANGED
@@ -1,5 +1,8 @@
1
+ ## 2.9.0
2
+ * Improve WAV parser by performing a best-effort when extracting metadata from files that do not strictly follow the format spec.
3
+
1
4
  ## 2.8.0
2
- * Add support for Ruby 3.2 and 3.3
5
+ * Add support for Ruby 3.2 and 3.3.
3
6
 
4
7
  ## 2.7.2
5
8
  * Improved stability for mp4 parser when dealing with corrupted FTYP boxes.
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '2.8.0'
2
+ VERSION = '2.9.0'
3
3
  end
@@ -20,75 +20,60 @@ class FormatParser::WAVParser
20
20
  # with the exception that the Format chunk must precede the Data chunk.
21
21
  # The specification does not require the Format chunk to be the first chunk
22
22
  # after the RIFF header.
23
- # http://soundfile.sapp.org/doc/WaveFormat/
24
- # For WAVE files containing PCM audio format we parse the 'fmt ' and
25
- # 'data' chunks while for non PCM audio formats the 'fmt ' and 'fact'
26
- # chunks. In the latter case the order fo appearence of the chunks is
27
- # arbitrary.
28
- fmt_processed = false
29
- fact_processed = false
23
+ # https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
30
24
  fmt_data = {}
31
- total_sample_frames = 0
25
+ data_size = 0
26
+ total_sample_frames = nil
32
27
  loop do
33
28
  chunk_type, chunk_size = safe_read(io, 8).unpack('a4l')
34
29
  case chunk_type
35
30
  when 'fmt ' # watch out: the chunk ID of the format chunk ends with a space
36
31
  fmt_data = unpack_fmt_chunk(io, chunk_size)
37
- return process_non_pcm(fmt_data, total_sample_frames) if fmt_data[:audio_format] != 1 and fact_processed
38
- fmt_processed = true
39
32
  when 'data'
40
- return unless fmt_processed # the 'data' chunk cannot preceed the 'fmt ' chunk
41
- return process_pcm(fmt_data, chunk_size) if fmt_data[:audio_format] == 1
42
- safe_skip(io, chunk_size)
33
+ data_size = chunk_size
43
34
  when 'fact'
44
35
  total_sample_frames = safe_read(io, 4).unpack('l').first
45
36
  safe_skip(io, chunk_size - 4)
46
- return process_non_pcm(fmt_data, total_sample_frames) if fmt_processed and fmt_data[:audio_format] != 1
47
- fact_processed = true
48
37
  else
49
38
  # Skip this chunk until a known chunk is encountered
50
39
  safe_skip(io, chunk_size)
51
40
  end
41
+ rescue FormatParser::IOUtils::InvalidRead
42
+ # We've reached EOF, so it's time to make the most out of the metadata we
43
+ # managed to parse
44
+ break
52
45
  end
46
+
47
+ file_info(fmt_data, data_size, total_sample_frames)
53
48
  end
54
49
 
55
50
  def unpack_fmt_chunk(io, chunk_size)
56
51
  # The size of the fmt chunk is at least 16 bytes. If the format tag's value is not
57
52
  # 1 compression might be in use for storing the data
58
53
  # and the fmt chunk might contain extra fields appended to it.
59
- # The last 4 fields of the fmt tag are always:
54
+ # The first 6 fields of the fmt tag are always:
55
+ # * unsigned short audio format
60
56
  # * unsigned short channels
61
57
  # * unsigned long samples per sec
62
58
  # * unsigned long average bytes per sec
63
59
  # * unsigned short block align
64
60
  # * unsigned short bits per sample
65
61
 
66
- fmt_info = safe_read(io, 16).unpack('S_2I2S_2')
62
+ _, channels, sample_rate, byte_rate, _, bits_per_sample = safe_read(io, 16).unpack('S_2I2S_2')
67
63
  safe_skip(io, chunk_size - 16) # skip the extra fields
68
64
 
69
65
  {
70
- audio_format: fmt_info[0],
71
- channels: fmt_info[1],
72
- sample_rate: fmt_info[2],
73
- byte_rate: fmt_info[3],
74
- block_align: fmt_info[4],
75
- bits_per_sample: fmt_info[5],
66
+ channels: channels,
67
+ sample_rate: sample_rate,
68
+ byte_rate: byte_rate,
69
+ bits_per_sample: bits_per_sample,
76
70
  }
77
71
  end
78
72
 
79
- def process_pcm(fmt_data, data_size)
80
- return unless fmt_data[:channels] > 0 and fmt_data[:bits_per_sample] > 0
81
- sample_frames = data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8)
82
- file_info(fmt_data, sample_frames)
83
- end
84
-
85
- def process_non_pcm(fmt_data, total_sample_frames)
86
- file_info(fmt_data, total_sample_frames)
87
- end
88
-
89
- def file_info(fmt_data, sample_frames)
90
- return unless fmt_data[:sample_rate] > 0
91
- duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f
73
+ def file_info(fmt_data, data_size, sample_frames)
74
+ # NOTE: Each sample includes information for each channel
75
+ sample_frames ||= data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8) if fmt_data[:channels] > 0 && fmt_data[:bits_per_sample] > 0
76
+ duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f if fmt_data[:sample_rate] > 0
92
77
  FormatParser::Audio.new(
93
78
  format: :wav,
94
79
  num_audio_channels: fmt_data[:channels],
@@ -46,9 +46,14 @@ describe FormatParser::WAVParser do
46
46
  expect(parse_result.media_duration_seconds).to be_within(0.01).of(13.81)
47
47
  end
48
48
 
49
- it "cannot parse file with audio format different from 1 and no 'fact' chunk" do
50
- expect {
51
- subject.call(File.open(__dir__ + '/../fixtures/WAV/invalid_d_6_Channel_ID.wav', 'rb'))
52
- }.to raise_error(FormatParser::IOUtils::InvalidRead)
49
+ it 'returns correct info about non pcm files with no fact chunk' do
50
+ parse_result = subject.call(File.open(__dir__ + '/../fixtures/WAV/d_6_Channel_ID.wav', 'rb'))
51
+
52
+ expect(parse_result.nature).to eq(:audio)
53
+ expect(parse_result.format).to eq(:wav)
54
+ expect(parse_result.num_audio_channels).to eq(6)
55
+ expect(parse_result.audio_sample_rate_hz).to eq(44100)
56
+ expect(parse_result.media_duration_frames).to eq(257411)
57
+ expect(parse_result.media_duration_seconds).to be_within(0.01).of(5.83)
53
58
  end
54
59
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.8.0
4
+ version: 2.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2024-07-02 00:00:00.000000000 Z
12
+ date: 2024-07-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: exifr