orbitalsai 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,317 @@
1
+ """
2
+ OrbitalsAI Streaming Audio Sources
3
+
4
+ Audio source classes for streaming from various inputs.
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ from abc import ABC, abstractmethod
10
+ from pathlib import Path
11
+ from typing import AsyncIterator, Iterator, Optional
12
+
13
+ from .converter import AudioConverter
14
+ from .buffer import AudioBuffer
15
+
16
+ logger = logging.getLogger("orbitalsai.streaming")
17
+
18
+
19
+ class AudioSource(ABC):
20
+ """
21
+ Abstract base class for audio sources.
22
+
23
+ Provides a consistent interface for streaming audio from various sources
24
+ (files, microphone, etc.).
25
+ """
26
+
27
+ def __init__(self, sample_rate: int = 16000, chunk_size: int = 8000):
28
+ """
29
+ Initialize audio source.
30
+
31
+ Args:
32
+ sample_rate: Target sample rate in Hz
33
+ chunk_size: Number of samples per chunk
34
+ """
35
+ self.sample_rate = sample_rate
36
+ self.chunk_size = chunk_size
37
+
38
+ @abstractmethod
39
+ def __iter__(self) -> Iterator[bytes]:
40
+ """Iterate over audio chunks synchronously."""
41
+ pass
42
+
43
+ @abstractmethod
44
+ def __aiter__(self) -> AsyncIterator[bytes]:
45
+ """Iterate over audio chunks asynchronously."""
46
+ pass
47
+
48
+ @property
49
+ def chunk_duration_ms(self) -> float:
50
+ """Get chunk duration in milliseconds."""
51
+ return (self.chunk_size / self.sample_rate) * 1000
52
+
53
+
54
+ class FileAudioSource(AudioSource):
55
+ """
56
+ Audio source that reads from a file.
57
+
58
+ Supports various audio formats (WAV, MP3, M4A, etc.) and handles
59
+ conversion to PCM16 format automatically.
60
+
61
+ Example:
62
+ source = FileAudioSource("speech.mp3")
63
+
64
+ async for chunk in source:
65
+ await client.send_audio(chunk)
66
+ """
67
+
68
+ def __init__(
69
+ self,
70
+ file_path: str,
71
+ sample_rate: int = 16000,
72
+ chunk_size: int = 8000,
73
+ realtime: bool = False
74
+ ):
75
+ """
76
+ Initialize file audio source.
77
+
78
+ Args:
79
+ file_path: Path to audio file
80
+ sample_rate: Target sample rate in Hz
81
+ chunk_size: Number of samples per chunk
82
+ realtime: If True, yields chunks at real-time pace
83
+ """
84
+ super().__init__(sample_rate, chunk_size)
85
+ self.file_path = file_path
86
+ self.realtime = realtime
87
+
88
+ self._audio_bytes: Optional[bytes] = None
89
+ self._chunks: Optional[list] = None
90
+
91
+ def _load(self) -> None:
92
+ """Load and convert audio file."""
93
+ if self._chunks is not None:
94
+ return
95
+
96
+ path = Path(self.file_path)
97
+ if not path.exists():
98
+ raise FileNotFoundError(f"Audio file not found: {self.file_path}")
99
+
100
+ logger.debug(f"Loading audio file: {self.file_path}")
101
+
102
+ self._audio_bytes, _ = AudioConverter.from_file(
103
+ self.file_path,
104
+ target_sample_rate=self.sample_rate
105
+ )
106
+ self._chunks = AudioConverter.split_chunks(
107
+ self._audio_bytes,
108
+ chunk_size=self.chunk_size
109
+ )
110
+
111
+ logger.debug(f"Loaded {len(self._chunks)} chunks")
112
+
113
+ def __iter__(self) -> Iterator[bytes]:
114
+ """Iterate over audio chunks synchronously."""
115
+ import time
116
+
117
+ self._load()
118
+
119
+ for chunk in self._chunks:
120
+ yield chunk
121
+ if self.realtime:
122
+ time.sleep(self.chunk_duration_ms / 1000)
123
+
124
+ async def __aiter__(self) -> AsyncIterator[bytes]:
125
+ """Iterate over audio chunks asynchronously."""
126
+ self._load()
127
+
128
+ for chunk in self._chunks:
129
+ yield chunk
130
+ if self.realtime:
131
+ await asyncio.sleep(self.chunk_duration_ms / 1000)
132
+
133
+ @property
134
+ def duration_seconds(self) -> float:
135
+ """Get total audio duration in seconds."""
136
+ self._load()
137
+ return len(self._audio_bytes) / (self.sample_rate * 2)
138
+
139
+ @property
140
+ def num_chunks(self) -> int:
141
+ """Get total number of chunks."""
142
+ self._load()
143
+ return len(self._chunks)
144
+
145
+
146
+ class RawPCMFileSource(AudioSource):
147
+ """
148
+ Audio source that reads raw PCM16 file.
149
+
150
+ For files that are already in PCM16 mono little-endian format.
151
+ No conversion is performed.
152
+
153
+ Example:
154
+ source = RawPCMFileSource("audio.pcm", sample_rate=16000)
155
+
156
+ for chunk in source:
157
+ client.send_audio(chunk)
158
+ """
159
+
160
+ def __init__(
161
+ self,
162
+ file_path: str,
163
+ sample_rate: int = 16000,
164
+ chunk_size: int = 8000,
165
+ realtime: bool = False
166
+ ):
167
+ """
168
+ Initialize raw PCM file source.
169
+
170
+ Args:
171
+ file_path: Path to raw PCM16 file
172
+ sample_rate: Sample rate of the file
173
+ chunk_size: Number of samples per chunk
174
+ realtime: If True, yields chunks at real-time pace
175
+ """
176
+ super().__init__(sample_rate, chunk_size)
177
+ self.file_path = file_path
178
+ self.realtime = realtime
179
+ self._bytes_per_chunk = chunk_size * 2
180
+
181
+ def __iter__(self) -> Iterator[bytes]:
182
+ """Iterate over audio chunks synchronously."""
183
+ import time
184
+
185
+ with open(self.file_path, "rb") as f:
186
+ while chunk := f.read(self._bytes_per_chunk):
187
+ yield chunk
188
+ if self.realtime:
189
+ time.sleep(self.chunk_duration_ms / 1000)
190
+
191
+ async def __aiter__(self) -> AsyncIterator[bytes]:
192
+ """Iterate over audio chunks asynchronously."""
193
+ import aiofiles
194
+
195
+ async with aiofiles.open(self.file_path, "rb") as f:
196
+ while chunk := await f.read(self._bytes_per_chunk):
197
+ yield chunk
198
+ if self.realtime:
199
+ await asyncio.sleep(self.chunk_duration_ms / 1000)
200
+
201
+
202
+ class MicrophoneSource(AudioSource):
203
+ """
204
+ Audio source that captures from microphone.
205
+
206
+ Requires: sounddevice
207
+
208
+ Example:
209
+ source = MicrophoneSource(duration=30)
210
+
211
+ async for chunk in source:
212
+ await client.send_audio(chunk)
213
+ """
214
+
215
+ def __init__(
216
+ self,
217
+ sample_rate: int = 16000,
218
+ chunk_size: int = 8000,
219
+ device: Optional[int] = None,
220
+ duration: Optional[float] = None
221
+ ):
222
+ """
223
+ Initialize microphone source.
224
+
225
+ Args:
226
+ sample_rate: Sample rate in Hz
227
+ chunk_size: Number of samples per chunk
228
+ device: Audio input device index (None for default)
229
+ duration: Recording duration in seconds (None for indefinite)
230
+ """
231
+ super().__init__(sample_rate, chunk_size)
232
+ self.device = device
233
+ self.duration = duration
234
+
235
+ def __iter__(self) -> Iterator[bytes]:
236
+ """Iterate over audio chunks synchronously."""
237
+ try:
238
+ import sounddevice as sd
239
+ except ImportError:
240
+ raise ImportError(
241
+ "sounddevice is required for microphone input. "
242
+ "Install it with: pip install sounddevice"
243
+ )
244
+
245
+ import queue
246
+ import time
247
+
248
+ audio_queue = queue.Queue()
249
+
250
+ def callback(indata, frames, time_info, status):
251
+ if status:
252
+ logger.warning(f"Audio status: {status}")
253
+ audio_queue.put(indata.copy())
254
+
255
+ start_time = time.time()
256
+
257
+ with sd.InputStream(
258
+ samplerate=self.sample_rate,
259
+ channels=1,
260
+ dtype="int16",
261
+ blocksize=self.chunk_size,
262
+ device=self.device,
263
+ callback=callback
264
+ ):
265
+ while True:
266
+ if self.duration and time.time() - start_time > self.duration:
267
+ break
268
+
269
+ try:
270
+ audio_data = audio_queue.get(timeout=1.0)
271
+ yield audio_data.tobytes()
272
+ except queue.Empty:
273
+ continue
274
+
275
+ async def __aiter__(self) -> AsyncIterator[bytes]:
276
+ """Iterate over audio chunks asynchronously."""
277
+ try:
278
+ import sounddevice as sd
279
+ except ImportError:
280
+ raise ImportError(
281
+ "sounddevice is required for microphone input. "
282
+ "Install it with: pip install sounddevice"
283
+ )
284
+
285
+ import numpy as np
286
+
287
+ audio_queue = asyncio.Queue()
288
+
289
+ def callback(indata, frames, time_info, status):
290
+ if status:
291
+ logger.warning(f"Audio status: {status}")
292
+ audio_queue.put_nowait(indata.copy())
293
+
294
+ start_time = asyncio.get_event_loop().time()
295
+
296
+ with sd.InputStream(
297
+ samplerate=self.sample_rate,
298
+ channels=1,
299
+ dtype="int16",
300
+ blocksize=self.chunk_size,
301
+ device=self.device,
302
+ callback=callback
303
+ ):
304
+ while True:
305
+ if self.duration:
306
+ elapsed = asyncio.get_event_loop().time() - start_time
307
+ if elapsed > self.duration:
308
+ break
309
+
310
+ try:
311
+ audio_data = await asyncio.wait_for(
312
+ audio_queue.get(),
313
+ timeout=1.0
314
+ )
315
+ yield audio_data.tobytes()
316
+ except asyncio.TimeoutError:
317
+ continue
@@ -0,0 +1,384 @@
1
+ """
2
+ OrbitalsAI Sync Streaming Client
3
+
4
+ Synchronous WebSocket client for real-time streaming transcription.
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ import queue
10
+ import threading
11
+ from typing import Optional
12
+
13
+ from .config import StreamingConfig
14
+ from .events import StreamingEventHandlers
15
+ from .async_client import AsyncStreamingClient
16
+ from .exceptions import SessionClosedError, StreamingError
17
+
18
+ logger = logging.getLogger("orbitalsai.streaming")
19
+
20
+
21
+ class StreamingClient:
22
+ """
23
+ Synchronous WebSocket client for streaming transcription.
24
+
25
+ Provides a thread-safe synchronous interface to the async streaming client.
26
+ Uses a background thread to run the async event loop.
27
+
28
+ Example:
29
+ from orbitalsai.streaming import StreamingClient, PrintingEventHandlers
30
+
31
+ with StreamingClient(api_key="your_key") as client:
32
+ client.connect(PrintingEventHandlers())
33
+
34
+ with open("audio.pcm", "rb") as f:
35
+ while chunk := f.read(16000):
36
+ client.send_audio(chunk)
37
+ time.sleep(0.5) # Real-time pacing
38
+
39
+ client.flush()
40
+
41
+ Example with custom handlers:
42
+ class MyHandlers(StreamingEventHandlers):
43
+ def on_transcript_final(self, text, metadata):
44
+ print(f"Transcription: {text}")
45
+
46
+ client = StreamingClient(api_key="your_key")
47
+ client.connect(MyHandlers())
48
+ # ... send audio ...
49
+ client.disconnect()
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ api_key: str,
55
+ config: Optional[StreamingConfig] = None,
56
+ base_url: str = "wss://api.orbitalsai.com"
57
+ ):
58
+ """
59
+ Initialize the synchronous streaming client.
60
+
61
+ Args:
62
+ api_key: OrbitalsAI API key or JWT token
63
+ config: Streaming configuration (optional)
64
+ base_url: WebSocket base URL (default: wss://api.orbitalsai.com)
65
+ """
66
+ self.api_key = api_key
67
+ self.config = config or StreamingConfig()
68
+ self.base_url = base_url
69
+
70
+ self._async_client: Optional[AsyncStreamingClient] = None
71
+ self._thread: Optional[threading.Thread] = None
72
+ self._loop: Optional[asyncio.AbstractEventLoop] = None
73
+ self._started = threading.Event()
74
+ self._error: Optional[Exception] = None
75
+
76
+ @property
77
+ def is_connected(self) -> bool:
78
+ """Check if WebSocket is connected."""
79
+ if self._async_client:
80
+ return self._async_client.is_connected
81
+ return False
82
+
83
+ @property
84
+ def session_id(self) -> Optional[str]:
85
+ """Get current session ID."""
86
+ if self._async_client:
87
+ return self._async_client.session_id
88
+ return None
89
+
90
+ def connect(self, handlers: StreamingEventHandlers, timeout: float = 30.0) -> None:
91
+ """
92
+ Start background thread and connect.
93
+
94
+ Args:
95
+ handlers: Event handlers for callbacks
96
+ timeout: Connection timeout in seconds
97
+
98
+ Raises:
99
+ StreamingError: If connection fails
100
+ TimeoutError: If connection times out
101
+ """
102
+ if self._thread is not None and self._thread.is_alive():
103
+ raise StreamingError("Already connected")
104
+
105
+ # Reset state
106
+ self._error = None
107
+ self._started.clear()
108
+
109
+ # Create async client
110
+ self._async_client = AsyncStreamingClient(
111
+ api_key=self.api_key,
112
+ config=self.config,
113
+ base_url=self.base_url
114
+ )
115
+
116
+ # Start background thread
117
+ self._thread = threading.Thread(
118
+ target=self._run_event_loop,
119
+ args=(handlers,),
120
+ daemon=True,
121
+ name="orbitalsai-streaming"
122
+ )
123
+ self._thread.start()
124
+
125
+ # Wait for connection
126
+ if not self._started.wait(timeout=timeout):
127
+ self._cleanup()
128
+ raise TimeoutError("Connection timed out")
129
+
130
+ # Check for errors during connection
131
+ if self._error:
132
+ error = self._error
133
+ self._error = None
134
+ raise error
135
+
136
+ def _run_event_loop(self, handlers: StreamingEventHandlers) -> None:
137
+ """
138
+ Run the async event loop in background thread.
139
+
140
+ Args:
141
+ handlers: Event handlers for callbacks
142
+ """
143
+ try:
144
+ # Create new event loop for this thread
145
+ self._loop = asyncio.new_event_loop()
146
+ asyncio.set_event_loop(self._loop)
147
+
148
+ # Connect async client
149
+ try:
150
+ self._loop.run_until_complete(
151
+ self._async_client.connect(handlers)
152
+ )
153
+ self._started.set()
154
+ except Exception as e:
155
+ self._error = e
156
+ self._started.set()
157
+ return
158
+
159
+ # Run until disconnected
160
+ try:
161
+ self._loop.run_forever()
162
+ except Exception as e:
163
+ logger.error(f"Event loop error: {e}")
164
+
165
+ finally:
166
+ # Cleanup
167
+ if self._loop:
168
+ try:
169
+ self._loop.close()
170
+ except Exception:
171
+ pass
172
+ self._loop = None
173
+
174
+ def send_audio(self, audio_data: bytes, timeout: float = 10.0) -> None:
175
+ """
176
+ Send PCM16 audio chunk (thread-safe).
177
+
178
+ Args:
179
+ audio_data: Raw PCM16 mono little-endian bytes
180
+ timeout: Operation timeout in seconds
181
+
182
+ Raises:
183
+ SessionClosedError: If session is closed
184
+ TimeoutError: If operation times out
185
+ """
186
+ if not self.is_connected or not self._loop:
187
+ raise SessionClosedError("Session is not connected")
188
+
189
+ future = asyncio.run_coroutine_threadsafe(
190
+ self._async_client.send_audio(audio_data),
191
+ self._loop
192
+ )
193
+
194
+ try:
195
+ future.result(timeout=timeout)
196
+ except asyncio.TimeoutError:
197
+ raise TimeoutError("send_audio timed out")
198
+
199
+ def configure(
200
+ self,
201
+ language: Optional[str] = None,
202
+ sample_rate: Optional[int] = None,
203
+ timeout: float = 10.0
204
+ ) -> None:
205
+ """
206
+ Update session configuration (thread-safe).
207
+
208
+ Args:
209
+ language: New transcription language (optional)
210
+ sample_rate: New sample rate in Hz (optional)
211
+ timeout: Operation timeout in seconds
212
+
213
+ Raises:
214
+ SessionClosedError: If session is closed
215
+ TimeoutError: If operation times out
216
+ """
217
+ if not self.is_connected or not self._loop:
218
+ raise SessionClosedError("Session is not connected")
219
+
220
+ future = asyncio.run_coroutine_threadsafe(
221
+ self._async_client.configure(language=language, sample_rate=sample_rate),
222
+ self._loop
223
+ )
224
+
225
+ try:
226
+ future.result(timeout=timeout)
227
+ except asyncio.TimeoutError:
228
+ raise TimeoutError("configure timed out")
229
+
230
+ def flush(self, timeout: float = 30.0) -> None:
231
+ """
232
+ Force transcription of remaining audio buffer (thread-safe).
233
+
234
+ Args:
235
+ timeout: Operation timeout in seconds
236
+
237
+ Raises:
238
+ SessionClosedError: If session is closed
239
+ TimeoutError: If operation times out
240
+ """
241
+ if not self.is_connected or not self._loop:
242
+ raise SessionClosedError("Session is not connected")
243
+
244
+ future = asyncio.run_coroutine_threadsafe(
245
+ self._async_client.flush(),
246
+ self._loop
247
+ )
248
+
249
+ try:
250
+ future.result(timeout=timeout)
251
+ except asyncio.TimeoutError:
252
+ raise TimeoutError("flush timed out")
253
+
254
+ def disconnect(self, timeout: float = 10.0) -> None:
255
+ """
256
+ Disconnect and cleanup thread.
257
+
258
+ Args:
259
+ timeout: Timeout for graceful shutdown
260
+ """
261
+ self._cleanup(timeout=timeout)
262
+
263
+ def _cleanup(self, timeout: float = 10.0) -> None:
264
+ """
265
+ Internal cleanup method.
266
+
267
+ Args:
268
+ timeout: Timeout for graceful shutdown
269
+ """
270
+ # Disconnect async client
271
+ if self._async_client and self._loop:
272
+ try:
273
+ future = asyncio.run_coroutine_threadsafe(
274
+ self._async_client.disconnect(),
275
+ self._loop
276
+ )
277
+ future.result(timeout=timeout)
278
+ except Exception as e:
279
+ logger.debug(f"Error during disconnect: {e}")
280
+
281
+ # Stop event loop
282
+ if self._loop and self._loop.is_running():
283
+ self._loop.call_soon_threadsafe(self._loop.stop)
284
+
285
+ # Wait for thread to finish
286
+ if self._thread and self._thread.is_alive():
287
+ self._thread.join(timeout=timeout)
288
+
289
+ # Reset state
290
+ self._async_client = None
291
+ self._thread = None
292
+ self._loop = None
293
+
294
+ def __enter__(self) -> 'StreamingClient':
295
+ """Context manager entry."""
296
+ return self
297
+
298
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
299
+ """Context manager exit."""
300
+ self.disconnect()
301
+
302
+
303
+ class StreamingTranscriptAccumulator(StreamingEventHandlers):
304
+ """
305
+ Event handlers that accumulate transcripts.
306
+
307
+ Useful for batch processing where you want to collect all
308
+ final transcripts and retrieve them at the end.
309
+
310
+ Example:
311
+ accumulator = StreamingTranscriptAccumulator()
312
+
313
+ with StreamingClient(api_key="...") as client:
314
+ client.connect(accumulator)
315
+ # ... send audio ...
316
+ client.flush()
317
+
318
+ print(accumulator.get_full_transcript())
319
+ print(f"Total cost: ${accumulator.total_cost:.4f}")
320
+ """
321
+
322
+ def __init__(self):
323
+ """Initialize transcript accumulator."""
324
+ self._transcripts: list = []
325
+ self._total_cost: float = 0.0
326
+ self._total_seconds: float = 0.0
327
+ self._errors: list = []
328
+ self._lock = threading.Lock()
329
+
330
+ def on_transcript_final(self, transcript: str, metadata: dict) -> None:
331
+ """Accumulate final transcript."""
332
+ with self._lock:
333
+ self._transcripts.append(transcript)
334
+ self._total_cost += metadata.get("cost", 0)
335
+ self._total_seconds += metadata.get("audio_seconds", 0)
336
+
337
+ def on_error(self, error: Exception) -> None:
338
+ """Record error."""
339
+ with self._lock:
340
+ self._errors.append(error)
341
+
342
+ def get_transcripts(self) -> list:
343
+ """Get list of final transcripts."""
344
+ with self._lock:
345
+ return list(self._transcripts)
346
+
347
+ def get_full_transcript(self, separator: str = " ") -> str:
348
+ """
349
+ Get concatenated transcript.
350
+
351
+ Args:
352
+ separator: String to join transcripts with
353
+
354
+ Returns:
355
+ Full transcript string
356
+ """
357
+ with self._lock:
358
+ return separator.join(self._transcripts)
359
+
360
+ @property
361
+ def total_cost(self) -> float:
362
+ """Get total cost in dollars."""
363
+ with self._lock:
364
+ return self._total_cost
365
+
366
+ @property
367
+ def total_seconds(self) -> float:
368
+ """Get total audio duration in seconds."""
369
+ with self._lock:
370
+ return self._total_seconds
371
+
372
+ @property
373
+ def errors(self) -> list:
374
+ """Get list of errors that occurred."""
375
+ with self._lock:
376
+ return list(self._errors)
377
+
378
+ def clear(self) -> None:
379
+ """Clear accumulated data."""
380
+ with self._lock:
381
+ self._transcripts.clear()
382
+ self._total_cost = 0.0
383
+ self._total_seconds = 0.0
384
+ self._errors.clear()