awaaz 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,169 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Awaaz
4
+ module Utils
5
+ ##
6
+ # A utility class for reading and optionally resampling audio files.
7
+ #
8
+ # This class supports reading `.wav` files using {Extensions::Soundfile}
9
+ # and can automatically resample them using {Utils::Resample}.
10
+ #
11
+ # @example Read and resample a WAV file
12
+ # reader = Awaaz::Utils::Soundread.new("audio.wav", resample_options: { output_rate: 44100 })
13
+ # samples, channels, rate = reader.read
14
+ #
15
+ # @note Currently, only `.wav` files are supported.
16
+ #
17
+ class Soundread
18
+ ##
19
+ # Supported audio file extensions.
20
+ #
21
+ # @return [Array<String>] List of supported file extensions.
22
+ #
23
+ SUPPORTED_EXTENSIONS = %w[.wav].freeze
24
+
25
+ ##
26
+ # Creates a new Soundread instance.
27
+ #
28
+ # @param filename [String] Path to the audio file to read.
29
+ # @param resample_options [Hash] Options for resampling the audio.
30
+ # - `:output_rate` [Integer] Output sample rate (default: `22050`)
31
+ # - `:sampling_option` [Symbol] Resampling algorithm (default: `:sinc_fastest`)
32
+ #
33
+ def initialize(filename, resample_options: default_resample_options)
34
+ @filename = filename
35
+ @resample_options = resample_options || {}
36
+ end
37
+
38
+ ##
39
+ # Reads the audio file, returning its samples and metadata.
40
+ #
41
+ # @return [Array<(Numo::SFloat, Integer, Integer)>]
42
+ # A tuple containing:
43
+ # - samples [Numo::SFloat] — Audio samples as a Numo array.
44
+ # - channels [Integer] — Number of channels in the audio.
45
+ # - output_rate [Integer] — Sample rate of the returned audio.
46
+ #
47
+ # @raise [ArgumentError] If the file extension is unsupported.
48
+ # @raise [Awaaz::AudioreadError] If the file cannot be opened.
49
+ #
50
+ def read
51
+ validate_support
52
+ soundfile, sample_rate, frames, channels = open_file
53
+ samples = parse_soundfile(soundfile, frames, channels)
54
+ close_soundfile(soundfile)
55
+
56
+ resample(samples, sample_rate, channels)
57
+ end
58
+
59
+ private
60
+
61
+ ##
62
+ # Default resampling options.
63
+ #
64
+ # @return [Hash] Default options with `:output_rate => 22050`.
65
+ #
66
+ def default_resample_options
67
+ { output_rate: 22_050 }
68
+ end
69
+
70
+ ##
71
+ # Ensures the file format is supported.
72
+ #
73
+ # @raise [ArgumentError] If the file extension is not in {SUPPORTED_EXTENSIONS}.
74
+ #
75
+ def validate_support
76
+ return if supported?
77
+
78
+ raise ArgumentError, "File extension not supported. Supported files: #{SUPPORTED_EXTENSIONS.join(",")}"
79
+ end
80
+
81
+ ##
82
+ # Checks if the file extension is supported.
83
+ #
84
+ # @return [Boolean] `true` if supported, `false` otherwise.
85
+ #
86
+ def supported?
87
+ SUPPORTED_EXTENSIONS.include?(File.extname(@filename))
88
+ end
89
+
90
+ ##
91
+ # Opens the audio file for reading.
92
+ #
93
+ # @return [Array<(FFI::Pointer, Integer, Integer, Integer)>]
94
+ # A tuple containing:
95
+ # - soundfile [FFI::Pointer] — Pointer to the opened sound file.
96
+ # - sample_rate [Integer] — Sample rate of the audio file.
97
+ # - frames [Integer] — Number of frames in the file.
98
+ # - channels [Integer] — Number of channels in the file.
99
+ #
100
+ # @raise [Awaaz::AudioreadError] If the file cannot be opened.
101
+ #
102
+ def open_file
103
+ info = Extensions::Soundfile::SF_INFO.new
104
+ sndfile = Extensions::Soundfile.sf_open(@filename, Extensions::Soundfile::SFM_READ, info.to_ptr)
105
+
106
+ raise Awaaz::AudioreadError, "Could not read the audio file" if sndfile.null?
107
+
108
+ sample_rate = info[:samplerate]
109
+ frames = info[:frames]
110
+ channels = info[:channels]
111
+ [sndfile, sample_rate, frames, channels]
112
+ end
113
+
114
+ ##
115
+ # Reads the raw samples from the file and converts them into a Numo array.
116
+ #
117
+ # @param soundfile [FFI::Pointer] Open sound file pointer.
118
+ # @param frames [Integer] Number of frames to read.
119
+ # @param channels [Integer] Number of channels in the file.
120
+ # @return [Numo::SFloat] The audio samples.
121
+ #
122
+ def parse_soundfile(soundfile, frames, channels)
123
+ buffer = FFI::MemoryPointer.new(:float, frames * channels)
124
+ read_frames = Extensions::Soundfile.sf_readf_float(soundfile, buffer, frames)
125
+ Numo::SFloat.cast(buffer.read_array_of_float(read_frames * channels))
126
+ end
127
+
128
+ ##
129
+ # Closes the open sound file.
130
+ #
131
+ # @param soundfile [FFI::Pointer] Open sound file pointer.
132
+ # @return [void]
133
+ #
134
+ def close_soundfile(soundfile)
135
+ Extensions::Soundfile.sf_close(soundfile)
136
+ end
137
+
138
+ ##
139
+ # Resamples the audio if necessary.
140
+ #
141
+ # @param samples [Numo::SFloat] The input samples.
142
+ # @param sample_rate [Integer] Original sample rate.
143
+ # @param channels [Integer] Number of channels.
144
+ # @return [Array<(Numo::SFloat, Integer, Integer)>]
145
+ #
146
+ # @raise [ArgumentError] If an invalid resample option key is passed.
147
+ #
148
+ def resample(samples, sample_rate, channels)
149
+ valid_options = %i[output_rate sampling_option]
150
+
151
+ @resample_options.transform_keys!(&:to_sym)
152
+ @resample_options.each_key do |key|
153
+ next if valid_options.include?(key)
154
+
155
+ raise ArgumentError, "Invalid option: #{key}. Available options: #{valid_options.join}"
156
+ end
157
+
158
+ output_rate, sampling_option = @resample_options.values_at(:output_rate, :sampling_rate)
159
+ sampling_option ||= :sinc_fastest
160
+
161
+ [
162
+ Utils::Resample.read_and_resample_numo(samples, sample_rate, output_rate, sampling_option:),
163
+ channels,
164
+ output_rate
165
+ ]
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This namespace contains utility classes and modules
4
+ # used internally by the Awaaz gem for audio processing.
5
+ #
6
+ # It requires and consolidates helper modules for resampling,
7
+ # sample manipulation, configuration, file reading, shell command
8
+ # building, and shell-based audio operations.
9
+ #
10
+ # @see Awaaz
11
+ # @since 0.1.0
12
+ #
13
+ # @example Accessing a utility class
14
+ # Awaaz::Utils::Soundread.new("file.wav").read
15
+ #
16
+ require_relative "resample"
17
+ require_relative "sound_config"
18
+ require_relative "soundread"
19
+ require_relative "shell_command_builder"
20
+ require_relative "via_shell"
21
+
22
+ module Awaaz
23
+ # The Utils module provides low-level helper components
24
+ # for performing core audio-related operations in the Awaaz gem.
25
+ #
26
+ # These utilities are generally not intended for direct use by
27
+ # consumers of the gem, but may be useful for advanced integrations.
28
+ module Utils
29
+ end
30
+ end
@@ -0,0 +1,183 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Awaaz
4
+ module Utils
5
+ ##
6
+ # Utility module providing shell-based audio decoding support for decoder classes.
7
+ #
8
+ # This module is intended to be mixed into decoder classes that rely on external
9
+ # tools such as `ffmpeg`, `mpg123`, or `sox` to decode audio files. It builds the
10
+ # appropriate shell commands, executes them, and converts the raw audio data into
11
+ # [Numo::NArray] samples.
12
+ #
13
+ # @note This module is `private` and its methods are meant to be used internally by decoders.
14
+ #
15
+ # @example Using within a decoder
16
+ # class Mp3Decoder < B
17
+ # include Awaaz::Utils::ViaShell
18
+ #
19
+ # def load
20
+ # process(*shell_load(sox_options: { raw: true }))
21
+ # end
22
+ # end
23
+ #
24
+ module ViaShell
25
+ private
26
+
27
+ ##
28
+ # Loads audio samples by building and executing a shell command.
29
+ #
30
+ # @return [Array<(Numo::DFloat, Integer, Integer)>] An array containing:
31
+ # - samples (`Numo::DFloat`)
32
+ # - number of channels (`Integer`)
33
+ # - sample rate (`Integer`)
34
+ def shell_load(...)
35
+ shell_command = build_shell_command(...)
36
+ load_samples(shell_command)
37
+ end
38
+
39
+ ##
40
+ # Builds the appropriate shell command for the detected decoder.
41
+ #
42
+ # @param ffmpeg_options [Hash] Additional options for `ffmpeg` commands.
43
+ # @param mpg123_options [Hash] Additional options for `mpg123` commands.
44
+ # @param sox_options [Hash] Additional options for `sox` commands.
45
+ # @return [Utils::ShellCommandBuilder] The command builder object.
46
+ def build_shell_command(ffmpeg_options: {}, mpg123_options: {}, sox_options: {})
47
+ set_decoder
48
+
49
+ case @decoder
50
+ when :ffmpeg then build_ffmpeg_command(@filename, **ffmpeg_options)
51
+ when :mpg123 then build_mpg123_command(@filename, **mpg123_options)
52
+ when :sox then build_sox_command(@filename, **sox_options)
53
+ end
54
+ end
55
+
56
+ ##
57
+ # Builds an `ffmpeg` command to decode audio.
58
+ #
59
+ # @param filename [String] Path to the audio file.
60
+ # @return [Utils::ShellCommandBuilder] The configured command.
61
+ def build_ffmpeg_command(filename, **_opts)
62
+ ffmpeg_command = Utils::ShellCommandBuilder.new(:ffmpeg)
63
+
64
+ ffmpeg_command
65
+ .add_flag("-nostdin")
66
+ .add_option("-v", "quiet")
67
+ .add_option("-i", filename)
68
+ .add_option("-f", "s16le")
69
+ .add_option("-acodec", "pcm_s16le")
70
+ .add_option("-ac", channels_flag)
71
+ .add_option("-ar", sample_rate)
72
+ .add_arg("-")
73
+ end
74
+
75
+ ##
76
+ # Builds a `mpg123` command to decode audio.
77
+ #
78
+ # @param filename [String] Path to the audio file.
79
+ # @return [Utils::ShellCommandBuilder] The configured command.
80
+ def build_mpg123_command(filename, **_opts)
81
+ mpg123_command = Utils::ShellCommandBuilder.new(:mpg123)
82
+
83
+ mpg123_command
84
+ .add_flag("-q")
85
+ .add_option("-f", amplification_factor)
86
+ .add_option("-r", sample_rate)
87
+ .add_flag("-s")
88
+ .add_arg(filename)
89
+ mpg123_command.add_flag(channels_flag) if mono?
90
+ mpg123_command
91
+ end
92
+
93
+ ##
94
+ # Builds a `sox` command to decode audio.
95
+ #
96
+ # @param filename [String] Path to the audio file.
97
+ # @param opts [Hash] Additional options (e.g., `raw: true`).
98
+ # @return [Utils::ShellCommandBuilder] The configured command.
99
+ def build_sox_command(filename, **opts)
100
+ sox_command = Utils::ShellCommandBuilder.new(:sox)
101
+
102
+ sox_command
103
+ .add_arg(filename)
104
+ .add_option("-r", sample_rate)
105
+ .add_option("-e", "signed")
106
+ .add_option("-b", 16)
107
+ .add_option("-c", channels_flag)
108
+ sox_command.add_option("-t", "raw") if opts[:raw]
109
+ sox_command.add_arg("-")
110
+ end
111
+
112
+ ##
113
+ # Executes the shell command and loads raw audio samples.
114
+ #
115
+ # @param shell_command [String, Utils::ShellCommandBuilder] The shell command to execute.
116
+ # @return [Array<(Numo::DFloat, Integer, Integer)>] An array containing:
117
+ # - samples (`Numo::DFloat`)
118
+ # - number of channels (`Integer`)
119
+ # - sample rate (`Integer`)
120
+ def load_samples(shell_command)
121
+ shell_command = shell_command.command unless shell_command.is_a?(String)
122
+ raw_audio = IO.popen(shell_command, "rb", &:read)
123
+ samples = Numo::Int16.from_string(raw_audio).cast_to(Numo::DFloat) / amplification_factor.to_f
124
+
125
+ [samples, num_channels, sample_rate.to_i]
126
+ end
127
+
128
+ # This method first returns the already-set `@decoder` if present.
129
+ # If no decoder is set, it attempts to determine an appropriate decoder
130
+ # by calling {#choose_decoder}. If no decoder can be determined, it raises
131
+ # a {Awaaz::DecoderNotFound} error with a list of potential decoders.
132
+ #
133
+ # @raise [Awaaz::DecoderNotFound] if no decoder could be determined.
134
+ # @return [Symbol] The chosen decoder symbol (`:ffmpeg`, `:mpg123`, `:sox`, or a user-provided option).
135
+ def set_decoder
136
+ return @decoder if @decoder
137
+
138
+ @decoder = choose_decoder
139
+ return if @decoder
140
+
141
+ raise Awaaz::DecoderNotFound,
142
+ "No available decoder detected to decode mp3 files. " \
143
+ "Potential decoders: #{config.potential_decoders.join(", ")}"
144
+ end
145
+
146
+ # Chooses an appropriate decoder based on user preference and system capabilities.
147
+ #
148
+ # Priority order:
149
+ # 1. User-specified decoder option (if valid).
150
+ # 2. `ffmpeg` if available.
151
+ # 3. `mpg123` if available.
152
+ # 4. `sox` if available.
153
+ #
154
+ # @return [Symbol, nil] The chosen decoder symbol (`:ffmpeg`, `:mpg123`, `:sox`,
155
+ # or a user-provided option), or `nil` if none is available.
156
+ def choose_decoder
157
+ return decoder_option if decoder_option && potential_decoders.include?(decoder_option)
158
+ return :ffmpeg if config.ffmpeg?
159
+ return :mpg123 if config.mpg123?
160
+
161
+ :sox if config.sox?
162
+ end
163
+
164
+ ##
165
+ # Returns the appropriate channel flag for the decoder.
166
+ #
167
+ # @return [String, Integer] A flag for `mpg123` (`"-m"`) if mono, otherwise the channel count.
168
+ def channels_flag
169
+ return "-m" if mpg123? && mono?
170
+
171
+ num_channels
172
+ end
173
+
174
+ ##
175
+ # Checks if the current decoder is `mpg123`.
176
+ #
177
+ # @return [Boolean] `true` if the decoder is `mpg123`.
178
+ def mpg123?
179
+ set_decoder == :mpg123
180
+ end
181
+ end
182
+ end
183
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Awaaz
4
+ # Version the Awaaz gem.
5
+ VERSION = "0.1.0"
6
+ end
data/lib/awaaz.rb ADDED
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Main namespace for the Awaaz gem.
5
+ #
6
+ # The Awaaz gem provides audio decoding utilities and related tools for working
7
+ # with various audio formats. It uses FFI bindings and Numo::NArray for numerical
8
+ # processing and includes multiple decoders, utilities, and configuration options.
9
+ #
10
+ # @see Awaaz::Decoders
11
+ # @see Awaaz::Utils
12
+ # @see Awaaz::Config
13
+ module Awaaz
14
+ end
15
+
16
+ require "ffi"
17
+ require "numo/narray"
18
+
19
+ require_relative "awaaz/errors"
20
+ require_relative "awaaz/extensions/extensions"
21
+ require_relative "awaaz/utils/utils"
22
+ require_relative "awaaz/version"
23
+
24
+ require_relative "awaaz/config"
25
+ require_relative "awaaz/decoders/decoders"
data/sig/awaaz.rbs ADDED
@@ -0,0 +1,4 @@
1
+ module Awaaz
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: awaaz
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Saad Azam
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 2025-08-12 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: ffi
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: 1.17.2
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: 1.17.2
26
+ - !ruby/object:Gem::Dependency
27
+ name: numo-narray
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: 0.9.1
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 0.9.1
40
+ - !ruby/object:Gem::Dependency
41
+ name: ruby-filemagic
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: 0.7.3
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: 0.7.3
54
+ description: A gem for loading, decoding, processing and analyse audio.
55
+ email:
56
+ - zeroinside4u@gmail.com
57
+ executables: []
58
+ extensions: []
59
+ extra_rdoc_files: []
60
+ files:
61
+ - ".rubocop.yml"
62
+ - ".ruby-version"
63
+ - CHANGELOG.md
64
+ - CODE_OF_CONDUCT.md
65
+ - GLOSSARY.md
66
+ - LICENSE.txt
67
+ - README.md
68
+ - Rakefile
69
+ - TODOS.md
70
+ - lib/awaaz.rb
71
+ - lib/awaaz/config.rb
72
+ - lib/awaaz/decoders/base_decoder.rb
73
+ - lib/awaaz/decoders/decode.rb
74
+ - lib/awaaz/decoders/decoders.rb
75
+ - lib/awaaz/decoders/mp3_decoder.rb
76
+ - lib/awaaz/decoders/wavefile_decoder.rb
77
+ - lib/awaaz/errors.rb
78
+ - lib/awaaz/extensions/extensions.rb
79
+ - lib/awaaz/extensions/samplerate.rb
80
+ - lib/awaaz/extensions/soundfile.rb
81
+ - lib/awaaz/utils/resample.rb
82
+ - lib/awaaz/utils/shell_command_builder.rb
83
+ - lib/awaaz/utils/sound_config.rb
84
+ - lib/awaaz/utils/soundread.rb
85
+ - lib/awaaz/utils/utils.rb
86
+ - lib/awaaz/utils/via_shell.rb
87
+ - lib/awaaz/version.rb
88
+ - sig/awaaz.rbs
89
+ homepage: https://github.com/SadMadLad/awaaz
90
+ licenses:
91
+ - MIT
92
+ metadata:
93
+ allowed_push_host: https://rubygems.org
94
+ homepage_uri: https://github.com/SadMadLad/awaaz
95
+ source_code_uri: https://github.com/SadMadLad/awaaz
96
+ changelog_uri: https://github.com/SadMadLad/awaaz/blob/main/CHANGELOG.md
97
+ rubygems_mfa_required: 'true'
98
+ rdoc_options: []
99
+ require_paths:
100
+ - lib
101
+ required_ruby_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: 3.4.2
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ requirements: []
112
+ rubygems_version: 3.6.2
113
+ specification_version: 4
114
+ summary: Audio Analysis with Ruby
115
+ test_files: []