format_parser 2.8.0 → 2.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd3557085da1447c801981d805786c74a9f319ac5715a1da54e70509fc7c1e99
4
- data.tar.gz: b21520eac5fd8f7e776b8cb28c06b95bb372b293ce4426482470935ce67dcb55
3
+ metadata.gz: 922909c34fc304a86a75f6396bd9627cdacf009aba5191da863064811ca2906f
4
+ data.tar.gz: 864b73aa70b7ec650dab326f677b17f7ed1905d05560deb669a564bb9fdc3d37
5
5
  SHA512:
6
- metadata.gz: 1bc06e885bba567bfef0f1d97fe83f6785044e6476cac6b4502349ce40208d0c0f9a5f1c1042c90492f8ca093085de1c84e46345c948fb820cb5d507db52ba9b
7
- data.tar.gz: 61569171e518c30b984155765f823d2ee77633061b70abd0ad8a2ce0122a95f4c2e042be291c7ac48ec9e57a1ef2630bdf4ed5d5216876bea29d9330b84a430a
6
+ metadata.gz: 5f2527d385f3270f9b11976d478030108787a165e4440825c4ffa8c6a56054e67e5a7fb1a88d18d25dcb945ab6c2befc97156e7a2a5d5ad98f2e11e7c558ce2a
7
+ data.tar.gz: 788254d0f2e40625a6f2fd17c3c7f8959456ad1018fceb62305c5a877a3a8fd52ad5587915b0d3711a8ab68c6dc0ca6d283d9556f68a3234bbd3ecb7a50ece95
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
+ ## 2.10.0
2
+ * Improve WAV parser by focusing on performance rather than on attempting a best-effort when extracting metadata from files that do not strictly follow the format spec.
3
+
4
+ ## 2.9.0
5
+ * Improve WAV parser by performing a best-effort when extracting metadata from files that do not strictly follow the format spec.
6
+
1
7
  ## 2.8.0
2
- * Add support for Ruby 3.2 and 3.3
8
+ * Add support for Ruby 3.2 and 3.3.
3
9
 
4
10
  ## 2.7.2
5
11
  * Improved stability for mp4 parser when dealing with corrupted FTYP boxes.
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '2.8.0'
2
+ VERSION = '2.10.0'
3
3
  end
@@ -20,75 +20,57 @@ class FormatParser::WAVParser
20
20
  # with the exception that the Format chunk must precede the Data chunk.
21
21
  # The specification does not require the Format chunk to be the first chunk
22
22
  # after the RIFF header.
23
- # http://soundfile.sapp.org/doc/WaveFormat/
24
- # For WAVE files containing PCM audio format we parse the 'fmt ' and
25
- # 'data' chunks while for non PCM audio formats the 'fmt ' and 'fact'
26
- # chunks. In the latter case the order fo appearence of the chunks is
27
- # arbitrary.
23
+ # https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
28
24
  fmt_processed = false
29
- fact_processed = false
25
+ data_processed = false
30
26
  fmt_data = {}
31
- total_sample_frames = 0
27
+ data_size = 0
32
28
  loop do
33
29
  chunk_type, chunk_size = safe_read(io, 8).unpack('a4l')
34
30
  case chunk_type
35
31
  when 'fmt ' # watch out: the chunk ID of the format chunk ends with a space
36
32
  fmt_data = unpack_fmt_chunk(io, chunk_size)
37
- return process_non_pcm(fmt_data, total_sample_frames) if fmt_data[:audio_format] != 1 and fact_processed
38
33
  fmt_processed = true
39
34
  when 'data'
40
- return unless fmt_processed # the 'data' chunk cannot preceed the 'fmt ' chunk
41
- return process_pcm(fmt_data, chunk_size) if fmt_data[:audio_format] == 1
42
- safe_skip(io, chunk_size)
43
- when 'fact'
44
- total_sample_frames = safe_read(io, 4).unpack('l').first
45
- safe_skip(io, chunk_size - 4)
46
- return process_non_pcm(fmt_data, total_sample_frames) if fmt_processed and fmt_data[:audio_format] != 1
47
- fact_processed = true
35
+ data_size = chunk_size
36
+ data_processed = true
48
37
  else
49
38
  # Skip this chunk until a known chunk is encountered
50
39
  safe_skip(io, chunk_size)
51
40
  end
41
+ break if fmt_processed && data_processed
52
42
  end
43
+
44
+ file_info(fmt_data, data_size)
53
45
  end
54
46
 
55
47
  def unpack_fmt_chunk(io, chunk_size)
56
48
  # The size of the fmt chunk is at least 16 bytes. If the format tag's value is not
57
49
  # 1 compression might be in use for storing the data
58
50
  # and the fmt chunk might contain extra fields appended to it.
59
- # The last 4 fields of the fmt tag are always:
51
+ # The first 6 fields of the fmt tag are always:
52
+ # * unsigned short audio format
60
53
  # * unsigned short channels
61
54
  # * unsigned long samples per sec
62
55
  # * unsigned long average bytes per sec
63
56
  # * unsigned short block align
64
57
  # * unsigned short bits per sample
65
58
 
66
- fmt_info = safe_read(io, 16).unpack('S_2I2S_2')
59
+ _, channels, sample_rate, byte_rate, _, bits_per_sample = safe_read(io, 16).unpack('S_2I2S_2')
67
60
  safe_skip(io, chunk_size - 16) # skip the extra fields
68
61
 
69
62
  {
70
- audio_format: fmt_info[0],
71
- channels: fmt_info[1],
72
- sample_rate: fmt_info[2],
73
- byte_rate: fmt_info[3],
74
- block_align: fmt_info[4],
75
- bits_per_sample: fmt_info[5],
63
+ channels: channels,
64
+ sample_rate: sample_rate,
65
+ byte_rate: byte_rate,
66
+ bits_per_sample: bits_per_sample,
76
67
  }
77
68
  end
78
69
 
79
- def process_pcm(fmt_data, data_size)
80
- return unless fmt_data[:channels] > 0 and fmt_data[:bits_per_sample] > 0
81
- sample_frames = data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8)
82
- file_info(fmt_data, sample_frames)
83
- end
84
-
85
- def process_non_pcm(fmt_data, total_sample_frames)
86
- file_info(fmt_data, total_sample_frames)
87
- end
88
-
89
- def file_info(fmt_data, sample_frames)
90
- return unless fmt_data[:sample_rate] > 0
91
- duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f
70
+ def file_info(fmt_data, data_size)
71
+ # NOTE: Each sample includes information for each channel
72
+ sample_frames = data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8) if fmt_data[:channels] > 0 && fmt_data[:bits_per_sample] > 0
73
+ duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f if sample_frames && fmt_data[:byte_rate] > 0
92
74
  FormatParser::Audio.new(
93
75
  format: :wav,
94
76
  num_audio_channels: fmt_data[:channels],
@@ -20,8 +20,9 @@ describe FormatParser::WAVParser do
20
20
  expect(parse_result.format).to eq(:wav)
21
21
  expect(parse_result.num_audio_channels).to eq(1)
22
22
  expect(parse_result.audio_sample_rate_hz).to eq(8000)
23
- expect(parse_result.media_duration_frames).to eq(110488)
24
- expect(parse_result.media_duration_seconds).to be_within(0.01).of(13.81)
23
+ # Fixture does not define bits_per_sample in the fmt chunk
24
+ expect(parse_result.media_duration_frames).to be_nil
25
+ expect(parse_result.media_duration_seconds).to be_nil
25
26
  end
26
27
 
27
28
  it 'returns correct info about pcm files with more channels' do
@@ -46,9 +47,14 @@ describe FormatParser::WAVParser do
46
47
  expect(parse_result.media_duration_seconds).to be_within(0.01).of(13.81)
47
48
  end
48
49
 
49
- it "cannot parse file with audio format different from 1 and no 'fact' chunk" do
50
- expect {
51
- subject.call(File.open(__dir__ + '/../fixtures/WAV/invalid_d_6_Channel_ID.wav', 'rb'))
52
- }.to raise_error(FormatParser::IOUtils::InvalidRead)
50
+ it 'returns correct info about non pcm files with no fact chunk' do
51
+ parse_result = subject.call(File.open(__dir__ + '/../fixtures/WAV/d_6_Channel_ID.wav', 'rb'))
52
+
53
+ expect(parse_result.nature).to eq(:audio)
54
+ expect(parse_result.format).to eq(:wav)
55
+ expect(parse_result.num_audio_channels).to eq(6)
56
+ expect(parse_result.audio_sample_rate_hz).to eq(44100)
57
+ expect(parse_result.media_duration_frames).to eq(257411)
58
+ expect(parse_result.media_duration_seconds).to be_within(0.01).of(5.83)
53
59
  end
54
60
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.8.0
4
+ version: 2.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2024-07-02 00:00:00.000000000 Z
12
+ date: 2024-07-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: exifr