orbitalsai 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orbitalsai/__init__.py +26 -3
- orbitalsai/async_client.py +25 -1
- orbitalsai/client.py +26 -50
- orbitalsai/models.py +10 -0
- orbitalsai/streaming/__init__.py +117 -0
- orbitalsai/streaming/async_client.py +507 -0
- orbitalsai/streaming/audio/__init__.py +33 -0
- orbitalsai/streaming/audio/buffer.py +171 -0
- orbitalsai/streaming/audio/converter.py +327 -0
- orbitalsai/streaming/audio/formats.py +112 -0
- orbitalsai/streaming/audio/source.py +317 -0
- orbitalsai/streaming/client.py +384 -0
- orbitalsai/streaming/config.py +207 -0
- orbitalsai/streaming/connection.py +298 -0
- orbitalsai/streaming/events.py +360 -0
- orbitalsai/streaming/exceptions.py +179 -0
- orbitalsai/streaming/protocol.py +245 -0
- orbitalsai-1.2.0.dist-info/METADATA +850 -0
- orbitalsai-1.2.0.dist-info/RECORD +24 -0
- {orbitalsai-1.0.0.dist-info → orbitalsai-1.2.0.dist-info}/WHEEL +1 -1
- orbitalsai-1.0.0.dist-info/METADATA +0 -439
- orbitalsai-1.0.0.dist-info/RECORD +0 -11
- {orbitalsai-1.0.0.dist-info → orbitalsai-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {orbitalsai-1.0.0.dist-info → orbitalsai-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OrbitalsAI Streaming Audio Converter
|
|
3
|
+
|
|
4
|
+
Audio format conversion utilities for streaming.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Optional, Tuple, Union
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger("orbitalsai.streaming")
|
|
12
|
+
|
|
13
|
+
# Lazy imports for optional dependencies
|
|
14
|
+
_numpy = None
|
|
15
|
+
_librosa = None
|
|
16
|
+
_soundfile = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _get_numpy():
|
|
20
|
+
"""Lazy import numpy."""
|
|
21
|
+
global _numpy
|
|
22
|
+
if _numpy is None:
|
|
23
|
+
try:
|
|
24
|
+
import numpy as np
|
|
25
|
+
_numpy = np
|
|
26
|
+
except ImportError:
|
|
27
|
+
raise ImportError(
|
|
28
|
+
"numpy is required for audio conversion. "
|
|
29
|
+
"Install it with: pip install numpy"
|
|
30
|
+
)
|
|
31
|
+
return _numpy
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _get_librosa():
|
|
35
|
+
"""Lazy import librosa."""
|
|
36
|
+
global _librosa
|
|
37
|
+
if _librosa is None:
|
|
38
|
+
try:
|
|
39
|
+
import librosa
|
|
40
|
+
_librosa = librosa
|
|
41
|
+
except ImportError:
|
|
42
|
+
raise ImportError(
|
|
43
|
+
"librosa is required for audio file loading. "
|
|
44
|
+
"Install it with: pip install librosa"
|
|
45
|
+
)
|
|
46
|
+
return _librosa
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _get_soundfile():
|
|
50
|
+
"""Lazy import soundfile."""
|
|
51
|
+
global _soundfile
|
|
52
|
+
if _soundfile is None:
|
|
53
|
+
try:
|
|
54
|
+
import soundfile as sf
|
|
55
|
+
_soundfile = sf
|
|
56
|
+
except ImportError:
|
|
57
|
+
raise ImportError(
|
|
58
|
+
"soundfile is required for audio file loading. "
|
|
59
|
+
"Install it with: pip install soundfile"
|
|
60
|
+
)
|
|
61
|
+
return _soundfile
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class AudioConverter:
|
|
65
|
+
"""
|
|
66
|
+
Convert various audio formats to PCM16.
|
|
67
|
+
|
|
68
|
+
Provides utilities for loading audio files, converting numpy arrays,
|
|
69
|
+
and splitting audio into chunks for streaming.
|
|
70
|
+
|
|
71
|
+
Example:
|
|
72
|
+
# Load and convert an audio file
|
|
73
|
+
audio_bytes, sample_rate = AudioConverter.from_file("speech.mp3")
|
|
74
|
+
|
|
75
|
+
# Split into chunks for streaming
|
|
76
|
+
chunks = AudioConverter.split_chunks(audio_bytes, chunk_size=16000)
|
|
77
|
+
for chunk in chunks:
|
|
78
|
+
await client.send_audio(chunk)
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def from_file(
|
|
83
|
+
file_path: str,
|
|
84
|
+
target_sample_rate: int = 16000
|
|
85
|
+
) -> Tuple[bytes, int]:
|
|
86
|
+
"""
|
|
87
|
+
Load audio file and convert to PCM16.
|
|
88
|
+
|
|
89
|
+
Supports: WAV, MP3, M4A, FLAC, OGG, etc.
|
|
90
|
+
Uses librosa for decoding and resampling.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
file_path: Path to audio file
|
|
94
|
+
target_sample_rate: Target sample rate in Hz (default: 16000)
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Tuple of (PCM16 bytes, actual sample rate used)
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
ImportError: If required libraries not installed
|
|
101
|
+
FileNotFoundError: If file doesn't exist
|
|
102
|
+
ValueError: If file format is unsupported
|
|
103
|
+
"""
|
|
104
|
+
librosa = _get_librosa()
|
|
105
|
+
np = _get_numpy()
|
|
106
|
+
|
|
107
|
+
path = Path(file_path)
|
|
108
|
+
if not path.exists():
|
|
109
|
+
raise FileNotFoundError(f"Audio file not found: {file_path}")
|
|
110
|
+
|
|
111
|
+
logger.debug(f"Loading audio file: {file_path}")
|
|
112
|
+
|
|
113
|
+
# Load audio with librosa (handles resampling automatically)
|
|
114
|
+
try:
|
|
115
|
+
audio, sr = librosa.load(
|
|
116
|
+
file_path,
|
|
117
|
+
sr=target_sample_rate,
|
|
118
|
+
mono=True
|
|
119
|
+
)
|
|
120
|
+
except Exception as e:
|
|
121
|
+
raise ValueError(f"Failed to load audio file: {e}")
|
|
122
|
+
|
|
123
|
+
# Convert float32 [-1, 1] to int16 bytes
|
|
124
|
+
audio_bytes = AudioConverter._float32_to_pcm16_bytes(audio)
|
|
125
|
+
|
|
126
|
+
logger.debug(
|
|
127
|
+
f"Loaded {len(audio_bytes)} bytes "
|
|
128
|
+
f"({len(audio_bytes) / (target_sample_rate * 2):.2f}s) "
|
|
129
|
+
f"at {target_sample_rate}Hz"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return audio_bytes, target_sample_rate
|
|
133
|
+
|
|
134
|
+
@staticmethod
|
|
135
|
+
def from_wav_file(
|
|
136
|
+
file_path: str,
|
|
137
|
+
target_sample_rate: int = 16000
|
|
138
|
+
) -> Tuple[bytes, int]:
|
|
139
|
+
"""
|
|
140
|
+
Load WAV file using soundfile (faster than librosa for WAV).
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
file_path: Path to WAV file
|
|
144
|
+
target_sample_rate: Target sample rate in Hz (default: 16000)
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Tuple of (PCM16 bytes, actual sample rate used)
|
|
148
|
+
"""
|
|
149
|
+
sf = _get_soundfile()
|
|
150
|
+
librosa = _get_librosa()
|
|
151
|
+
np = _get_numpy()
|
|
152
|
+
|
|
153
|
+
path = Path(file_path)
|
|
154
|
+
if not path.exists():
|
|
155
|
+
raise FileNotFoundError(f"Audio file not found: {file_path}")
|
|
156
|
+
|
|
157
|
+
# Read audio file
|
|
158
|
+
audio, sr = sf.read(file_path)
|
|
159
|
+
|
|
160
|
+
# Convert to mono if stereo
|
|
161
|
+
if len(audio.shape) > 1:
|
|
162
|
+
audio = audio.mean(axis=1)
|
|
163
|
+
|
|
164
|
+
# Resample if needed
|
|
165
|
+
if sr != target_sample_rate:
|
|
166
|
+
audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sample_rate)
|
|
167
|
+
|
|
168
|
+
# Ensure float32 type
|
|
169
|
+
audio = audio.astype(np.float32)
|
|
170
|
+
|
|
171
|
+
# Convert to PCM16 bytes
|
|
172
|
+
audio_bytes = AudioConverter._float32_to_pcm16_bytes(audio)
|
|
173
|
+
|
|
174
|
+
return audio_bytes, target_sample_rate
|
|
175
|
+
|
|
176
|
+
@staticmethod
|
|
177
|
+
def from_numpy(
|
|
178
|
+
audio: 'np.ndarray',
|
|
179
|
+
source_sample_rate: int,
|
|
180
|
+
target_sample_rate: int = 16000
|
|
181
|
+
) -> bytes:
|
|
182
|
+
"""
|
|
183
|
+
Convert numpy array to PCM16 bytes.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
audio: Numpy array of audio samples (float32 [-1, 1] or int16)
|
|
187
|
+
source_sample_rate: Sample rate of input audio
|
|
188
|
+
target_sample_rate: Target sample rate in Hz (default: 16000)
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
PCM16 bytes (mono, little-endian)
|
|
192
|
+
"""
|
|
193
|
+
np = _get_numpy()
|
|
194
|
+
librosa = _get_librosa()
|
|
195
|
+
|
|
196
|
+
# Handle different input types
|
|
197
|
+
if audio.dtype == np.int16:
|
|
198
|
+
audio = audio.astype(np.float32) / 32768.0
|
|
199
|
+
elif audio.dtype != np.float32:
|
|
200
|
+
audio = audio.astype(np.float32)
|
|
201
|
+
|
|
202
|
+
# Normalize to [-1, 1] if needed
|
|
203
|
+
max_val = np.abs(audio).max()
|
|
204
|
+
if max_val > 1.0:
|
|
205
|
+
audio = audio / max_val
|
|
206
|
+
|
|
207
|
+
# Convert to mono if stereo
|
|
208
|
+
if len(audio.shape) > 1:
|
|
209
|
+
audio = audio.mean(axis=1)
|
|
210
|
+
|
|
211
|
+
# Resample if needed
|
|
212
|
+
if source_sample_rate != target_sample_rate:
|
|
213
|
+
audio = librosa.resample(
|
|
214
|
+
audio,
|
|
215
|
+
orig_sr=source_sample_rate,
|
|
216
|
+
target_sr=target_sample_rate
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
return AudioConverter._float32_to_pcm16_bytes(audio)
|
|
220
|
+
|
|
221
|
+
@staticmethod
|
|
222
|
+
def split_chunks(audio_bytes: bytes, chunk_size: int) -> List[bytes]:
|
|
223
|
+
"""
|
|
224
|
+
Split audio bytes into chunks.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
audio_bytes: PCM16 bytes to split
|
|
228
|
+
chunk_size: Number of samples per chunk
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
List of chunk bytes (last chunk may be smaller)
|
|
232
|
+
"""
|
|
233
|
+
bytes_per_chunk = chunk_size * 2 # 2 bytes per int16 sample
|
|
234
|
+
|
|
235
|
+
chunks = []
|
|
236
|
+
for i in range(0, len(audio_bytes), bytes_per_chunk):
|
|
237
|
+
chunk = audio_bytes[i:i + bytes_per_chunk]
|
|
238
|
+
chunks.append(chunk)
|
|
239
|
+
|
|
240
|
+
return chunks
|
|
241
|
+
|
|
242
|
+
@staticmethod
|
|
243
|
+
def _float32_to_pcm16_bytes(audio: 'np.ndarray') -> bytes:
|
|
244
|
+
"""
|
|
245
|
+
Convert float32 audio to PCM16 bytes.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
audio: Float32 numpy array in range [-1, 1]
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
PCM16 bytes (little-endian)
|
|
252
|
+
"""
|
|
253
|
+
np = _get_numpy()
|
|
254
|
+
|
|
255
|
+
# Clip to [-1, 1] to prevent overflow
|
|
256
|
+
audio = np.clip(audio, -1.0, 1.0)
|
|
257
|
+
|
|
258
|
+
# Convert to int16
|
|
259
|
+
audio_int16 = (audio * 32767).astype(np.int16)
|
|
260
|
+
|
|
261
|
+
# Convert to bytes (little-endian)
|
|
262
|
+
return audio_int16.tobytes()
|
|
263
|
+
|
|
264
|
+
@staticmethod
|
|
265
|
+
def _int16_to_float32(audio: 'np.ndarray') -> 'np.ndarray':
|
|
266
|
+
"""
|
|
267
|
+
Convert int16 audio to float32.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
audio: Int16 numpy array
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
Float32 numpy array in range [-1, 1]
|
|
274
|
+
"""
|
|
275
|
+
np = _get_numpy()
|
|
276
|
+
return audio.astype(np.float32) / 32768.0
|
|
277
|
+
|
|
278
|
+
@staticmethod
|
|
279
|
+
def pcm16_bytes_to_float32(audio_bytes: bytes) -> 'np.ndarray':
|
|
280
|
+
"""
|
|
281
|
+
Convert PCM16 bytes to float32 numpy array.
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
audio_bytes: PCM16 bytes (little-endian)
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
Float32 numpy array in range [-1, 1]
|
|
288
|
+
"""
|
|
289
|
+
np = _get_numpy()
|
|
290
|
+
audio_int16 = np.frombuffer(audio_bytes, dtype='<i2')
|
|
291
|
+
return audio_int16.astype(np.float32) / 32768.0
|
|
292
|
+
|
|
293
|
+
@staticmethod
|
|
294
|
+
def get_duration_seconds(audio_bytes: bytes, sample_rate: int = 16000) -> float:
|
|
295
|
+
"""
|
|
296
|
+
Calculate duration of PCM16 audio in seconds.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
audio_bytes: PCM16 bytes
|
|
300
|
+
sample_rate: Sample rate in Hz
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
Duration in seconds
|
|
304
|
+
"""
|
|
305
|
+
num_samples = len(audio_bytes) // 2
|
|
306
|
+
return num_samples / sample_rate
|
|
307
|
+
|
|
308
|
+
@staticmethod
|
|
309
|
+
def validate_pcm16(audio_bytes: bytes) -> bool:
|
|
310
|
+
"""
|
|
311
|
+
Validate that audio bytes are valid PCM16 format.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
audio_bytes: Bytes to validate
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
True if valid PCM16, False otherwise
|
|
318
|
+
"""
|
|
319
|
+
# PCM16 must have even length (2 bytes per sample)
|
|
320
|
+
if len(audio_bytes) % 2 != 0:
|
|
321
|
+
return False
|
|
322
|
+
|
|
323
|
+
# Must have at least some data
|
|
324
|
+
if len(audio_bytes) == 0:
|
|
325
|
+
return False
|
|
326
|
+
|
|
327
|
+
return True
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OrbitalsAI Streaming Audio Formats
|
|
3
|
+
|
|
4
|
+
Audio format definitions and utilities.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Optional
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class AudioFormat:
|
|
14
|
+
"""
|
|
15
|
+
Audio format specification.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
encoding: Audio encoding (e.g., "pcm_s16le")
|
|
19
|
+
sample_rate: Sample rate in Hz
|
|
20
|
+
channels: Number of audio channels
|
|
21
|
+
sample_width: Bytes per sample
|
|
22
|
+
byte_order: Byte order ("little" or "big")
|
|
23
|
+
"""
|
|
24
|
+
encoding: str
|
|
25
|
+
sample_rate: int
|
|
26
|
+
channels: int
|
|
27
|
+
sample_width: int
|
|
28
|
+
byte_order: str = "little"
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def bytes_per_second(self) -> int:
|
|
32
|
+
"""Calculate bytes per second for this format."""
|
|
33
|
+
return self.sample_rate * self.channels * self.sample_width
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def frame_size(self) -> int:
|
|
37
|
+
"""Calculate bytes per frame (all channels)."""
|
|
38
|
+
return self.channels * self.sample_width
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Standard PCM16 mono format used by the streaming API
|
|
42
|
+
PCM16_MONO = AudioFormat(
|
|
43
|
+
encoding="pcm_s16le",
|
|
44
|
+
sample_rate=16000,
|
|
45
|
+
channels=1,
|
|
46
|
+
sample_width=2,
|
|
47
|
+
byte_order="little"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Supported audio file extensions
|
|
52
|
+
SUPPORTED_AUDIO_EXTENSIONS = {
|
|
53
|
+
".wav", ".wave",
|
|
54
|
+
".mp3",
|
|
55
|
+
".m4a", ".aac",
|
|
56
|
+
".ogg", ".oga", ".opus",
|
|
57
|
+
".flac",
|
|
58
|
+
".wma",
|
|
59
|
+
".amr",
|
|
60
|
+
".3gp", ".3gpp",
|
|
61
|
+
".webm",
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_format_for_file(file_path: str) -> Optional[str]:
|
|
66
|
+
"""
|
|
67
|
+
Determine audio format from file extension.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
file_path: Path to audio file
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Format string (e.g., "wav", "mp3") or None if unsupported
|
|
74
|
+
"""
|
|
75
|
+
ext = Path(file_path).suffix.lower()
|
|
76
|
+
|
|
77
|
+
if ext in (".wav", ".wave"):
|
|
78
|
+
return "wav"
|
|
79
|
+
elif ext == ".mp3":
|
|
80
|
+
return "mp3"
|
|
81
|
+
elif ext in (".m4a", ".aac"):
|
|
82
|
+
return "m4a"
|
|
83
|
+
elif ext in (".ogg", ".oga"):
|
|
84
|
+
return "ogg"
|
|
85
|
+
elif ext == ".opus":
|
|
86
|
+
return "opus"
|
|
87
|
+
elif ext == ".flac":
|
|
88
|
+
return "flac"
|
|
89
|
+
elif ext == ".wma":
|
|
90
|
+
return "wma"
|
|
91
|
+
elif ext == ".amr":
|
|
92
|
+
return "amr"
|
|
93
|
+
elif ext in (".3gp", ".3gpp"):
|
|
94
|
+
return "3gp"
|
|
95
|
+
elif ext == ".webm":
|
|
96
|
+
return "webm"
|
|
97
|
+
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def is_supported_audio_file(file_path: str) -> bool:
|
|
102
|
+
"""
|
|
103
|
+
Check if file is a supported audio format.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
file_path: Path to audio file
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
True if supported, False otherwise
|
|
110
|
+
"""
|
|
111
|
+
ext = Path(file_path).suffix.lower()
|
|
112
|
+
return ext in SUPPORTED_AUDIO_EXTENSIONS
|