atom-audio-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
utils/audio.py ADDED
@@ -0,0 +1,220 @@
1
+ """Audio utility functions."""
2
+
3
+ import struct
4
+ from typing import Optional
5
+
6
+
7
+ def resample_audio(
8
+ audio: bytes,
9
+ from_rate: int,
10
+ to_rate: int,
11
+ channels: int = 1,
12
+ sample_width: int = 2,
13
+ ) -> bytes:
14
+ """
15
+ Resample audio to a different sample rate.
16
+
17
+ Uses linear interpolation for simple resampling.
18
+ For higher quality, consider using librosa or scipy.
19
+
20
+ Args:
21
+ audio: Input audio bytes (PCM format)
22
+ from_rate: Original sample rate
23
+ to_rate: Target sample rate
24
+ channels: Number of audio channels
25
+ sample_width: Bytes per sample (2 for 16-bit)
26
+
27
+ Returns:
28
+ Resampled audio bytes
29
+ """
30
+ if from_rate == to_rate:
31
+ return audio
32
+
33
+ try:
34
+ import numpy as np
35
+ from scipy import signal
36
+
37
+ # Convert bytes to numpy array
38
+ dtype = np.int16 if sample_width == 2 else np.int32
39
+ samples = np.frombuffer(audio, dtype=dtype)
40
+
41
+ # Resample using scipy
42
+ num_samples = int(len(samples) * to_rate / from_rate)
43
+ resampled = signal.resample(samples, num_samples)
44
+
45
+ return resampled.astype(dtype).tobytes()
46
+
47
+ except ImportError:
48
+ # Fallback to simple linear interpolation
49
+ return _simple_resample(audio, from_rate, to_rate, sample_width)
50
+
51
+
52
+ def _simple_resample(
53
+ audio: bytes,
54
+ from_rate: int,
55
+ to_rate: int,
56
+ sample_width: int = 2,
57
+ ) -> bytes:
58
+ """Simple linear interpolation resampling."""
59
+ if sample_width == 2:
60
+ fmt = "<h"
61
+ samples = [
62
+ struct.unpack(fmt, audio[i : i + 2])[0] for i in range(0, len(audio), 2)
63
+ ]
64
+ else:
65
+ raise ValueError(f"Unsupported sample width: {sample_width}")
66
+
67
+ ratio = from_rate / to_rate
68
+ new_length = int(len(samples) / ratio)
69
+ resampled = []
70
+
71
+ for i in range(new_length):
72
+ pos = i * ratio
73
+ idx = int(pos)
74
+ frac = pos - idx
75
+
76
+ if idx + 1 < len(samples):
77
+ sample = int(samples[idx] * (1 - frac) + samples[idx + 1] * frac)
78
+ else:
79
+ sample = samples[idx]
80
+
81
+ resampled.append(sample)
82
+
83
+ return struct.pack(f"<{len(resampled)}h", *resampled)
84
+
85
+
86
+ def pcm_to_wav(
87
+ pcm_data: bytes,
88
+ sample_rate: int = 16000,
89
+ channels: int = 1,
90
+ bits_per_sample: int = 16,
91
+ ) -> bytes:
92
+ """
93
+ Convert raw PCM data to WAV format.
94
+
95
+ Args:
96
+ pcm_data: Raw PCM audio bytes
97
+ sample_rate: Sample rate in Hz
98
+ channels: Number of audio channels
99
+ bits_per_sample: Bits per sample (typically 16)
100
+
101
+ Returns:
102
+ WAV file as bytes
103
+ """
104
+ byte_rate = sample_rate * channels * bits_per_sample // 8
105
+ block_align = channels * bits_per_sample // 8
106
+ data_size = len(pcm_data)
107
+
108
+ header = struct.pack(
109
+ "<4sI4s4sIHHIIHH4sI",
110
+ b"RIFF",
111
+ 36 + data_size,
112
+ b"WAVE",
113
+ b"fmt ",
114
+ 16, # fmt chunk size
115
+ 1, # audio format (PCM)
116
+ channels,
117
+ sample_rate,
118
+ byte_rate,
119
+ block_align,
120
+ bits_per_sample,
121
+ b"data",
122
+ data_size,
123
+ )
124
+
125
+ return header + pcm_data
126
+
127
+
128
+ def wav_to_pcm(wav_data: bytes) -> tuple[bytes, int, int, int]:
129
+ """
130
+ Extract raw PCM data from WAV format.
131
+
132
+ Args:
133
+ wav_data: WAV file as bytes
134
+
135
+ Returns:
136
+ Tuple of (pcm_data, sample_rate, channels, bits_per_sample)
137
+ """
138
+ # Parse RIFF header
139
+ if wav_data[:4] != b"RIFF" or wav_data[8:12] != b"WAVE":
140
+ raise ValueError("Invalid WAV file")
141
+
142
+ # Find fmt chunk
143
+ pos = 12
144
+ sample_rate = 0
145
+ channels = 0
146
+ bits_per_sample = 0
147
+
148
+ while pos < len(wav_data):
149
+ chunk_id = wav_data[pos : pos + 4]
150
+ chunk_size = struct.unpack("<I", wav_data[pos + 4 : pos + 8])[0]
151
+
152
+ if chunk_id == b"fmt ":
153
+ _, channels, sample_rate, _, _, bits_per_sample = struct.unpack(
154
+ "<HHIIHH", wav_data[pos + 8 : pos + 24]
155
+ )
156
+ elif chunk_id == b"data":
157
+ pcm_data = wav_data[pos + 8 : pos + 8 + chunk_size]
158
+ return pcm_data, sample_rate, channels, bits_per_sample
159
+
160
+ pos += 8 + chunk_size
161
+
162
+ raise ValueError("No data chunk found in WAV file")
163
+
164
+
165
+ def get_audio_duration(
166
+ audio: bytes,
167
+ sample_rate: int,
168
+ channels: int = 1,
169
+ bits_per_sample: int = 16,
170
+ ) -> float:
171
+ """
172
+ Calculate duration of PCM audio in seconds.
173
+
174
+ Args:
175
+ audio: PCM audio bytes
176
+ sample_rate: Sample rate in Hz
177
+ channels: Number of audio channels
178
+ bits_per_sample: Bits per sample
179
+
180
+ Returns:
181
+ Duration in seconds
182
+ """
183
+ bytes_per_sample = bits_per_sample // 8
184
+ total_samples = len(audio) // (bytes_per_sample * channels)
185
+ return total_samples / sample_rate
186
+
187
+
188
+ def normalize_audio(audio: bytes, target_db: float = -20.0) -> bytes:
189
+ """
190
+ Normalize audio to a target dB level.
191
+
192
+ Args:
193
+ audio: PCM audio bytes (16-bit)
194
+ target_db: Target dB level
195
+
196
+ Returns:
197
+ Normalized audio bytes
198
+ """
199
+ try:
200
+ import numpy as np
201
+
202
+ samples = np.frombuffer(audio, dtype=np.int16).astype(np.float32)
203
+
204
+ # Calculate current RMS
205
+ rms = np.sqrt(np.mean(samples**2))
206
+ if rms == 0:
207
+ return audio
208
+
209
+ # Calculate target RMS
210
+ target_rms = 32768 * (10 ** (target_db / 20))
211
+
212
+ # Scale
213
+ gain = target_rms / rms
214
+ normalized = np.clip(samples * gain, -32768, 32767).astype(np.int16)
215
+
216
+ return normalized.tobytes()
217
+
218
+ except ImportError:
219
+ # Return unchanged if numpy not available
220
+ return audio