format_parser 2.8.0 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd3557085da1447c801981d805786c74a9f319ac5715a1da54e70509fc7c1e99
4
- data.tar.gz: b21520eac5fd8f7e776b8cb28c06b95bb372b293ce4426482470935ce67dcb55
3
+ metadata.gz: 922909c34fc304a86a75f6396bd9627cdacf009aba5191da863064811ca2906f
4
+ data.tar.gz: 864b73aa70b7ec650dab326f677b17f7ed1905d05560deb669a564bb9fdc3d37
5
5
  SHA512:
6
- metadata.gz: 1bc06e885bba567bfef0f1d97fe83f6785044e6476cac6b4502349ce40208d0c0f9a5f1c1042c90492f8ca093085de1c84e46345c948fb820cb5d507db52ba9b
7
- data.tar.gz: 61569171e518c30b984155765f823d2ee77633061b70abd0ad8a2ce0122a95f4c2e042be291c7ac48ec9e57a1ef2630bdf4ed5d5216876bea29d9330b84a430a
6
+ metadata.gz: 5f2527d385f3270f9b11976d478030108787a165e4440825c4ffa8c6a56054e67e5a7fb1a88d18d25dcb945ab6c2befc97156e7a2a5d5ad98f2e11e7c558ce2a
7
+ data.tar.gz: 788254d0f2e40625a6f2fd17c3c7f8959456ad1018fceb62305c5a877a3a8fd52ad5587915b0d3711a8ab68c6dc0ca6d283d9556f68a3234bbd3ecb7a50ece95
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
+ ## 2.10.0
2
+ * Improve WAV parser by focusing on performance rather than on attempting a best-effort when extracting metadata from files that do not strictly follow the format spec.
3
+
4
+ ## 2.9.0
5
+ * Improve WAV parser by performing a best-effort when extracting metadata from files that do not strictly follow the format spec.
6
+
1
7
  ## 2.8.0
2
- * Add support for Ruby 3.2 and 3.3
8
+ * Add support for Ruby 3.2 and 3.3.
3
9
 
4
10
  ## 2.7.2
5
11
  * Improved stability for mp4 parser when dealing with corrupted FTYP boxes.
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '2.8.0'
2
+ VERSION = '2.10.0'
3
3
  end
@@ -20,75 +20,57 @@ class FormatParser::WAVParser
20
20
  # with the exception that the Format chunk must precede the Data chunk.
21
21
  # The specification does not require the Format chunk to be the first chunk
22
22
  # after the RIFF header.
23
- # http://soundfile.sapp.org/doc/WaveFormat/
24
- # For WAVE files containing PCM audio format we parse the 'fmt ' and
25
- # 'data' chunks while for non PCM audio formats the 'fmt ' and 'fact'
26
- # chunks. In the latter case the order fo appearence of the chunks is
27
- # arbitrary.
23
+ # https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
28
24
  fmt_processed = false
29
- fact_processed = false
25
+ data_processed = false
30
26
  fmt_data = {}
31
- total_sample_frames = 0
27
+ data_size = 0
32
28
  loop do
33
29
  chunk_type, chunk_size = safe_read(io, 8).unpack('a4l')
34
30
  case chunk_type
35
31
  when 'fmt ' # watch out: the chunk ID of the format chunk ends with a space
36
32
  fmt_data = unpack_fmt_chunk(io, chunk_size)
37
- return process_non_pcm(fmt_data, total_sample_frames) if fmt_data[:audio_format] != 1 and fact_processed
38
33
  fmt_processed = true
39
34
  when 'data'
40
- return unless fmt_processed # the 'data' chunk cannot preceed the 'fmt ' chunk
41
- return process_pcm(fmt_data, chunk_size) if fmt_data[:audio_format] == 1
42
- safe_skip(io, chunk_size)
43
- when 'fact'
44
- total_sample_frames = safe_read(io, 4).unpack('l').first
45
- safe_skip(io, chunk_size - 4)
46
- return process_non_pcm(fmt_data, total_sample_frames) if fmt_processed and fmt_data[:audio_format] != 1
47
- fact_processed = true
35
+ data_size = chunk_size
36
+ data_processed = true
48
37
  else
49
38
  # Skip this chunk until a known chunk is encountered
50
39
  safe_skip(io, chunk_size)
51
40
  end
41
+ break if fmt_processed && data_processed
52
42
  end
43
+
44
+ file_info(fmt_data, data_size)
53
45
  end
54
46
 
55
47
  def unpack_fmt_chunk(io, chunk_size)
56
48
  # The size of the fmt chunk is at least 16 bytes. If the format tag's value is not
57
49
  # 1 compression might be in use for storing the data
58
50
  # and the fmt chunk might contain extra fields appended to it.
59
- # The last 4 fields of the fmt tag are always:
51
+ # The first 6 fields of the fmt tag are always:
52
+ # * unsigned short audio format
60
53
  # * unsigned short channels
61
54
  # * unsigned long samples per sec
62
55
  # * unsigned long average bytes per sec
63
56
  # * unsigned short block align
64
57
  # * unsigned short bits per sample
65
58
 
66
- fmt_info = safe_read(io, 16).unpack('S_2I2S_2')
59
+ _, channels, sample_rate, byte_rate, _, bits_per_sample = safe_read(io, 16).unpack('S_2I2S_2')
67
60
  safe_skip(io, chunk_size - 16) # skip the extra fields
68
61
 
69
62
  {
70
- audio_format: fmt_info[0],
71
- channels: fmt_info[1],
72
- sample_rate: fmt_info[2],
73
- byte_rate: fmt_info[3],
74
- block_align: fmt_info[4],
75
- bits_per_sample: fmt_info[5],
63
+ channels: channels,
64
+ sample_rate: sample_rate,
65
+ byte_rate: byte_rate,
66
+ bits_per_sample: bits_per_sample,
76
67
  }
77
68
  end
78
69
 
79
- def process_pcm(fmt_data, data_size)
80
- return unless fmt_data[:channels] > 0 and fmt_data[:bits_per_sample] > 0
81
- sample_frames = data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8)
82
- file_info(fmt_data, sample_frames)
83
- end
84
-
85
- def process_non_pcm(fmt_data, total_sample_frames)
86
- file_info(fmt_data, total_sample_frames)
87
- end
88
-
89
- def file_info(fmt_data, sample_frames)
90
- return unless fmt_data[:sample_rate] > 0
91
- duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f
70
+ def file_info(fmt_data, data_size)
71
+ # NOTE: Each sample includes information for each channel
72
+ sample_frames = data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8) if fmt_data[:channels] > 0 && fmt_data[:bits_per_sample] > 0
73
+ duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f if sample_frames && fmt_data[:byte_rate] > 0
92
74
  FormatParser::Audio.new(
93
75
  format: :wav,
94
76
  num_audio_channels: fmt_data[:channels],
@@ -20,8 +20,9 @@ describe FormatParser::WAVParser do
20
20
  expect(parse_result.format).to eq(:wav)
21
21
  expect(parse_result.num_audio_channels).to eq(1)
22
22
  expect(parse_result.audio_sample_rate_hz).to eq(8000)
23
- expect(parse_result.media_duration_frames).to eq(110488)
24
- expect(parse_result.media_duration_seconds).to be_within(0.01).of(13.81)
23
+ # Fixture does not define bits_per_sample in the fmt chunk
24
+ expect(parse_result.media_duration_frames).to be_nil
25
+ expect(parse_result.media_duration_seconds).to be_nil
25
26
  end
26
27
 
27
28
  it 'returns correct info about pcm files with more channels' do
@@ -46,9 +47,14 @@ describe FormatParser::WAVParser do
46
47
  expect(parse_result.media_duration_seconds).to be_within(0.01).of(13.81)
47
48
  end
48
49
 
49
- it "cannot parse file with audio format different from 1 and no 'fact' chunk" do
50
- expect {
51
- subject.call(File.open(__dir__ + '/../fixtures/WAV/invalid_d_6_Channel_ID.wav', 'rb'))
52
- }.to raise_error(FormatParser::IOUtils::InvalidRead)
50
+ it 'returns correct info about non pcm files with no fact chunk' do
51
+ parse_result = subject.call(File.open(__dir__ + '/../fixtures/WAV/d_6_Channel_ID.wav', 'rb'))
52
+
53
+ expect(parse_result.nature).to eq(:audio)
54
+ expect(parse_result.format).to eq(:wav)
55
+ expect(parse_result.num_audio_channels).to eq(6)
56
+ expect(parse_result.audio_sample_rate_hz).to eq(44100)
57
+ expect(parse_result.media_duration_frames).to eq(257411)
58
+ expect(parse_result.media_duration_seconds).to be_within(0.01).of(5.83)
53
59
  end
54
60
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.8.0
4
+ version: 2.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2024-07-02 00:00:00.000000000 Z
12
+ date: 2024-07-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: exifr