orbitalsai 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,327 @@
1
+ """
2
+ OrbitalsAI Streaming Audio Converter
3
+
4
+ Audio format conversion utilities for streaming.
5
+ """
6
+
7
+ import logging
8
+ from pathlib import Path
9
+ from typing import List, Optional, Tuple, Union
10
+
11
+ logger = logging.getLogger("orbitalsai.streaming")
12
+
13
+ # Lazy imports for optional dependencies
14
+ _numpy = None
15
+ _librosa = None
16
+ _soundfile = None
17
+
18
+
19
+ def _get_numpy():
20
+ """Lazy import numpy."""
21
+ global _numpy
22
+ if _numpy is None:
23
+ try:
24
+ import numpy as np
25
+ _numpy = np
26
+ except ImportError:
27
+ raise ImportError(
28
+ "numpy is required for audio conversion. "
29
+ "Install it with: pip install numpy"
30
+ )
31
+ return _numpy
32
+
33
+
34
+ def _get_librosa():
35
+ """Lazy import librosa."""
36
+ global _librosa
37
+ if _librosa is None:
38
+ try:
39
+ import librosa
40
+ _librosa = librosa
41
+ except ImportError:
42
+ raise ImportError(
43
+ "librosa is required for audio file loading. "
44
+ "Install it with: pip install librosa"
45
+ )
46
+ return _librosa
47
+
48
+
49
+ def _get_soundfile():
50
+ """Lazy import soundfile."""
51
+ global _soundfile
52
+ if _soundfile is None:
53
+ try:
54
+ import soundfile as sf
55
+ _soundfile = sf
56
+ except ImportError:
57
+ raise ImportError(
58
+ "soundfile is required for audio file loading. "
59
+ "Install it with: pip install soundfile"
60
+ )
61
+ return _soundfile
62
+
63
+
64
+ class AudioConverter:
65
+ """
66
+ Convert various audio formats to PCM16.
67
+
68
+ Provides utilities for loading audio files, converting numpy arrays,
69
+ and splitting audio into chunks for streaming.
70
+
71
+ Example:
72
+ # Load and convert an audio file
73
+ audio_bytes, sample_rate = AudioConverter.from_file("speech.mp3")
74
+
75
+ # Split into chunks for streaming
76
+ chunks = AudioConverter.split_chunks(audio_bytes, chunk_size=16000)
77
+ for chunk in chunks:
78
+ await client.send_audio(chunk)
79
+ """
80
+
81
+ @staticmethod
82
+ def from_file(
83
+ file_path: str,
84
+ target_sample_rate: int = 16000
85
+ ) -> Tuple[bytes, int]:
86
+ """
87
+ Load audio file and convert to PCM16.
88
+
89
+ Supports: WAV, MP3, M4A, FLAC, OGG, etc.
90
+ Uses librosa for decoding and resampling.
91
+
92
+ Args:
93
+ file_path: Path to audio file
94
+ target_sample_rate: Target sample rate in Hz (default: 16000)
95
+
96
+ Returns:
97
+ Tuple of (PCM16 bytes, actual sample rate used)
98
+
99
+ Raises:
100
+ ImportError: If required libraries not installed
101
+ FileNotFoundError: If file doesn't exist
102
+ ValueError: If file format is unsupported
103
+ """
104
+ librosa = _get_librosa()
105
+ np = _get_numpy()
106
+
107
+ path = Path(file_path)
108
+ if not path.exists():
109
+ raise FileNotFoundError(f"Audio file not found: {file_path}")
110
+
111
+ logger.debug(f"Loading audio file: {file_path}")
112
+
113
+ # Load audio with librosa (handles resampling automatically)
114
+ try:
115
+ audio, sr = librosa.load(
116
+ file_path,
117
+ sr=target_sample_rate,
118
+ mono=True
119
+ )
120
+ except Exception as e:
121
+ raise ValueError(f"Failed to load audio file: {e}")
122
+
123
+ # Convert float32 [-1, 1] to int16 bytes
124
+ audio_bytes = AudioConverter._float32_to_pcm16_bytes(audio)
125
+
126
+ logger.debug(
127
+ f"Loaded {len(audio_bytes)} bytes "
128
+ f"({len(audio_bytes) / (target_sample_rate * 2):.2f}s) "
129
+ f"at {target_sample_rate}Hz"
130
+ )
131
+
132
+ return audio_bytes, target_sample_rate
133
+
134
+ @staticmethod
135
+ def from_wav_file(
136
+ file_path: str,
137
+ target_sample_rate: int = 16000
138
+ ) -> Tuple[bytes, int]:
139
+ """
140
+ Load WAV file using soundfile (faster than librosa for WAV).
141
+
142
+ Args:
143
+ file_path: Path to WAV file
144
+ target_sample_rate: Target sample rate in Hz (default: 16000)
145
+
146
+ Returns:
147
+ Tuple of (PCM16 bytes, actual sample rate used)
148
+ """
149
+ sf = _get_soundfile()
150
+ librosa = _get_librosa()
151
+ np = _get_numpy()
152
+
153
+ path = Path(file_path)
154
+ if not path.exists():
155
+ raise FileNotFoundError(f"Audio file not found: {file_path}")
156
+
157
+ # Read audio file
158
+ audio, sr = sf.read(file_path)
159
+
160
+ # Convert to mono if stereo
161
+ if len(audio.shape) > 1:
162
+ audio = audio.mean(axis=1)
163
+
164
+ # Resample if needed
165
+ if sr != target_sample_rate:
166
+ audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sample_rate)
167
+
168
+ # Ensure float32 type
169
+ audio = audio.astype(np.float32)
170
+
171
+ # Convert to PCM16 bytes
172
+ audio_bytes = AudioConverter._float32_to_pcm16_bytes(audio)
173
+
174
+ return audio_bytes, target_sample_rate
175
+
176
+ @staticmethod
177
+ def from_numpy(
178
+ audio: 'np.ndarray',
179
+ source_sample_rate: int,
180
+ target_sample_rate: int = 16000
181
+ ) -> bytes:
182
+ """
183
+ Convert numpy array to PCM16 bytes.
184
+
185
+ Args:
186
+ audio: Numpy array of audio samples (float32 [-1, 1] or int16)
187
+ source_sample_rate: Sample rate of input audio
188
+ target_sample_rate: Target sample rate in Hz (default: 16000)
189
+
190
+ Returns:
191
+ PCM16 bytes (mono, little-endian)
192
+ """
193
+ np = _get_numpy()
194
+ librosa = _get_librosa()
195
+
196
+ # Handle different input types
197
+ if audio.dtype == np.int16:
198
+ audio = audio.astype(np.float32) / 32768.0
199
+ elif audio.dtype != np.float32:
200
+ audio = audio.astype(np.float32)
201
+
202
+ # Normalize to [-1, 1] if needed
203
+ max_val = np.abs(audio).max()
204
+ if max_val > 1.0:
205
+ audio = audio / max_val
206
+
207
+ # Convert to mono if stereo
208
+ if len(audio.shape) > 1:
209
+ audio = audio.mean(axis=1)
210
+
211
+ # Resample if needed
212
+ if source_sample_rate != target_sample_rate:
213
+ audio = librosa.resample(
214
+ audio,
215
+ orig_sr=source_sample_rate,
216
+ target_sr=target_sample_rate
217
+ )
218
+
219
+ return AudioConverter._float32_to_pcm16_bytes(audio)
220
+
221
+ @staticmethod
222
+ def split_chunks(audio_bytes: bytes, chunk_size: int) -> List[bytes]:
223
+ """
224
+ Split audio bytes into chunks.
225
+
226
+ Args:
227
+ audio_bytes: PCM16 bytes to split
228
+ chunk_size: Number of samples per chunk
229
+
230
+ Returns:
231
+ List of chunk bytes (last chunk may be smaller)
232
+ """
233
+ bytes_per_chunk = chunk_size * 2 # 2 bytes per int16 sample
234
+
235
+ chunks = []
236
+ for i in range(0, len(audio_bytes), bytes_per_chunk):
237
+ chunk = audio_bytes[i:i + bytes_per_chunk]
238
+ chunks.append(chunk)
239
+
240
+ return chunks
241
+
242
+ @staticmethod
243
+ def _float32_to_pcm16_bytes(audio: 'np.ndarray') -> bytes:
244
+ """
245
+ Convert float32 audio to PCM16 bytes.
246
+
247
+ Args:
248
+ audio: Float32 numpy array in range [-1, 1]
249
+
250
+ Returns:
251
+ PCM16 bytes (little-endian)
252
+ """
253
+ np = _get_numpy()
254
+
255
+ # Clip to [-1, 1] to prevent overflow
256
+ audio = np.clip(audio, -1.0, 1.0)
257
+
258
+ # Convert to int16
259
+ audio_int16 = (audio * 32767).astype(np.int16)
260
+
261
+ # Convert to bytes (little-endian)
262
+ return audio_int16.tobytes()
263
+
264
+ @staticmethod
265
+ def _int16_to_float32(audio: 'np.ndarray') -> 'np.ndarray':
266
+ """
267
+ Convert int16 audio to float32.
268
+
269
+ Args:
270
+ audio: Int16 numpy array
271
+
272
+ Returns:
273
+ Float32 numpy array in range [-1, 1]
274
+ """
275
+ np = _get_numpy()
276
+ return audio.astype(np.float32) / 32768.0
277
+
278
+ @staticmethod
279
+ def pcm16_bytes_to_float32(audio_bytes: bytes) -> 'np.ndarray':
280
+ """
281
+ Convert PCM16 bytes to float32 numpy array.
282
+
283
+ Args:
284
+ audio_bytes: PCM16 bytes (little-endian)
285
+
286
+ Returns:
287
+ Float32 numpy array in range [-1, 1]
288
+ """
289
+ np = _get_numpy()
290
+ audio_int16 = np.frombuffer(audio_bytes, dtype='<i2')
291
+ return audio_int16.astype(np.float32) / 32768.0
292
+
293
+ @staticmethod
294
+ def get_duration_seconds(audio_bytes: bytes, sample_rate: int = 16000) -> float:
295
+ """
296
+ Calculate duration of PCM16 audio in seconds.
297
+
298
+ Args:
299
+ audio_bytes: PCM16 bytes
300
+ sample_rate: Sample rate in Hz
301
+
302
+ Returns:
303
+ Duration in seconds
304
+ """
305
+ num_samples = len(audio_bytes) // 2
306
+ return num_samples / sample_rate
307
+
308
+ @staticmethod
309
+ def validate_pcm16(audio_bytes: bytes) -> bool:
310
+ """
311
+ Validate that audio bytes are valid PCM16 format.
312
+
313
+ Args:
314
+ audio_bytes: Bytes to validate
315
+
316
+ Returns:
317
+ True if valid PCM16, False otherwise
318
+ """
319
+ # PCM16 must have even length (2 bytes per sample)
320
+ if len(audio_bytes) % 2 != 0:
321
+ return False
322
+
323
+ # Must have at least some data
324
+ if len(audio_bytes) == 0:
325
+ return False
326
+
327
+ return True
@@ -0,0 +1,112 @@
1
+ """
2
+ OrbitalsAI Streaming Audio Formats
3
+
4
+ Audio format definitions and utilities.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ from typing import Optional
9
+ from pathlib import Path
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class AudioFormat:
14
+ """
15
+ Audio format specification.
16
+
17
+ Attributes:
18
+ encoding: Audio encoding (e.g., "pcm_s16le")
19
+ sample_rate: Sample rate in Hz
20
+ channels: Number of audio channels
21
+ sample_width: Bytes per sample
22
+ byte_order: Byte order ("little" or "big")
23
+ """
24
+ encoding: str
25
+ sample_rate: int
26
+ channels: int
27
+ sample_width: int
28
+ byte_order: str = "little"
29
+
30
+ @property
31
+ def bytes_per_second(self) -> int:
32
+ """Calculate bytes per second for this format."""
33
+ return self.sample_rate * self.channels * self.sample_width
34
+
35
+ @property
36
+ def frame_size(self) -> int:
37
+ """Calculate bytes per frame (all channels)."""
38
+ return self.channels * self.sample_width
39
+
40
+
41
+ # Standard PCM16 mono format used by the streaming API
42
+ PCM16_MONO = AudioFormat(
43
+ encoding="pcm_s16le",
44
+ sample_rate=16000,
45
+ channels=1,
46
+ sample_width=2,
47
+ byte_order="little"
48
+ )
49
+
50
+
51
+ # Supported audio file extensions
52
+ SUPPORTED_AUDIO_EXTENSIONS = {
53
+ ".wav", ".wave",
54
+ ".mp3",
55
+ ".m4a", ".aac",
56
+ ".ogg", ".oga", ".opus",
57
+ ".flac",
58
+ ".wma",
59
+ ".amr",
60
+ ".3gp", ".3gpp",
61
+ ".webm",
62
+ }
63
+
64
+
65
+ def get_format_for_file(file_path: str) -> Optional[str]:
66
+ """
67
+ Determine audio format from file extension.
68
+
69
+ Args:
70
+ file_path: Path to audio file
71
+
72
+ Returns:
73
+ Format string (e.g., "wav", "mp3") or None if unsupported
74
+ """
75
+ ext = Path(file_path).suffix.lower()
76
+
77
+ if ext in (".wav", ".wave"):
78
+ return "wav"
79
+ elif ext == ".mp3":
80
+ return "mp3"
81
+ elif ext in (".m4a", ".aac"):
82
+ return "m4a"
83
+ elif ext in (".ogg", ".oga"):
84
+ return "ogg"
85
+ elif ext == ".opus":
86
+ return "opus"
87
+ elif ext == ".flac":
88
+ return "flac"
89
+ elif ext == ".wma":
90
+ return "wma"
91
+ elif ext == ".amr":
92
+ return "amr"
93
+ elif ext in (".3gp", ".3gpp"):
94
+ return "3gp"
95
+ elif ext == ".webm":
96
+ return "webm"
97
+
98
+ return None
99
+
100
+
101
+ def is_supported_audio_file(file_path: str) -> bool:
102
+ """
103
+ Check if file is a supported audio format.
104
+
105
+ Args:
106
+ file_path: Path to audio file
107
+
108
+ Returns:
109
+ True if supported, False otherwise
110
+ """
111
+ ext = Path(file_path).suffix.lower()
112
+ return ext in SUPPORTED_AUDIO_EXTENSIONS