amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
- amd_gaia-0.15.1.dist-info/RECORD +178 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
- gaia/__init__.py +29 -29
- gaia/agents/__init__.py +19 -19
- gaia/agents/base/__init__.py +9 -9
- gaia/agents/base/agent.py +2177 -2177
- gaia/agents/base/api_agent.py +120 -120
- gaia/agents/base/console.py +1841 -1841
- gaia/agents/base/errors.py +237 -237
- gaia/agents/base/mcp_agent.py +86 -86
- gaia/agents/base/tools.py +83 -83
- gaia/agents/blender/agent.py +556 -556
- gaia/agents/blender/agent_simple.py +133 -135
- gaia/agents/blender/app.py +211 -211
- gaia/agents/blender/app_simple.py +41 -41
- gaia/agents/blender/core/__init__.py +16 -16
- gaia/agents/blender/core/materials.py +506 -506
- gaia/agents/blender/core/objects.py +316 -316
- gaia/agents/blender/core/rendering.py +225 -225
- gaia/agents/blender/core/scene.py +220 -220
- gaia/agents/blender/core/view.py +146 -146
- gaia/agents/chat/__init__.py +9 -9
- gaia/agents/chat/agent.py +835 -835
- gaia/agents/chat/app.py +1058 -1058
- gaia/agents/chat/session.py +508 -508
- gaia/agents/chat/tools/__init__.py +15 -15
- gaia/agents/chat/tools/file_tools.py +96 -96
- gaia/agents/chat/tools/rag_tools.py +1729 -1729
- gaia/agents/chat/tools/shell_tools.py +436 -436
- gaia/agents/code/__init__.py +7 -7
- gaia/agents/code/agent.py +549 -549
- gaia/agents/code/cli.py +377 -0
- gaia/agents/code/models.py +135 -135
- gaia/agents/code/orchestration/__init__.py +24 -24
- gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
- gaia/agents/code/orchestration/checklist_generator.py +713 -713
- gaia/agents/code/orchestration/factories/__init__.py +9 -9
- gaia/agents/code/orchestration/factories/base.py +63 -63
- gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
- gaia/agents/code/orchestration/factories/python_factory.py +106 -106
- gaia/agents/code/orchestration/orchestrator.py +841 -841
- gaia/agents/code/orchestration/project_analyzer.py +391 -391
- gaia/agents/code/orchestration/steps/__init__.py +67 -67
- gaia/agents/code/orchestration/steps/base.py +188 -188
- gaia/agents/code/orchestration/steps/error_handler.py +314 -314
- gaia/agents/code/orchestration/steps/nextjs.py +828 -828
- gaia/agents/code/orchestration/steps/python.py +307 -307
- gaia/agents/code/orchestration/template_catalog.py +469 -469
- gaia/agents/code/orchestration/workflows/__init__.py +14 -14
- gaia/agents/code/orchestration/workflows/base.py +80 -80
- gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
- gaia/agents/code/orchestration/workflows/python.py +94 -94
- gaia/agents/code/prompts/__init__.py +11 -11
- gaia/agents/code/prompts/base_prompt.py +77 -77
- gaia/agents/code/prompts/code_patterns.py +2036 -2036
- gaia/agents/code/prompts/nextjs_prompt.py +40 -40
- gaia/agents/code/prompts/python_prompt.py +109 -109
- gaia/agents/code/schema_inference.py +365 -365
- gaia/agents/code/system_prompt.py +41 -41
- gaia/agents/code/tools/__init__.py +42 -42
- gaia/agents/code/tools/cli_tools.py +1138 -1138
- gaia/agents/code/tools/code_formatting.py +319 -319
- gaia/agents/code/tools/code_tools.py +769 -769
- gaia/agents/code/tools/error_fixing.py +1347 -1347
- gaia/agents/code/tools/external_tools.py +180 -180
- gaia/agents/code/tools/file_io.py +845 -845
- gaia/agents/code/tools/prisma_tools.py +190 -190
- gaia/agents/code/tools/project_management.py +1016 -1016
- gaia/agents/code/tools/testing.py +321 -321
- gaia/agents/code/tools/typescript_tools.py +122 -122
- gaia/agents/code/tools/validation_parsing.py +461 -461
- gaia/agents/code/tools/validation_tools.py +806 -806
- gaia/agents/code/tools/web_dev_tools.py +1758 -1758
- gaia/agents/code/validators/__init__.py +16 -16
- gaia/agents/code/validators/antipattern_checker.py +241 -241
- gaia/agents/code/validators/ast_analyzer.py +197 -197
- gaia/agents/code/validators/requirements_validator.py +145 -145
- gaia/agents/code/validators/syntax_validator.py +171 -171
- gaia/agents/docker/__init__.py +7 -7
- gaia/agents/docker/agent.py +642 -642
- gaia/agents/emr/__init__.py +8 -8
- gaia/agents/emr/agent.py +1506 -1506
- gaia/agents/emr/cli.py +1322 -1322
- gaia/agents/emr/constants.py +475 -475
- gaia/agents/emr/dashboard/__init__.py +4 -4
- gaia/agents/emr/dashboard/server.py +1974 -1974
- gaia/agents/jira/__init__.py +11 -11
- gaia/agents/jira/agent.py +894 -894
- gaia/agents/jira/jql_templates.py +299 -299
- gaia/agents/routing/__init__.py +7 -7
- gaia/agents/routing/agent.py +567 -570
- gaia/agents/routing/system_prompt.py +75 -75
- gaia/agents/summarize/__init__.py +11 -0
- gaia/agents/summarize/agent.py +885 -0
- gaia/agents/summarize/prompts.py +129 -0
- gaia/api/__init__.py +23 -23
- gaia/api/agent_registry.py +238 -238
- gaia/api/app.py +305 -305
- gaia/api/openai_server.py +575 -575
- gaia/api/schemas.py +186 -186
- gaia/api/sse_handler.py +373 -373
- gaia/apps/__init__.py +4 -4
- gaia/apps/llm/__init__.py +6 -6
- gaia/apps/llm/app.py +173 -169
- gaia/apps/summarize/app.py +116 -633
- gaia/apps/summarize/html_viewer.py +133 -133
- gaia/apps/summarize/pdf_formatter.py +284 -284
- gaia/audio/__init__.py +2 -2
- gaia/audio/audio_client.py +439 -439
- gaia/audio/audio_recorder.py +269 -269
- gaia/audio/kokoro_tts.py +599 -599
- gaia/audio/whisper_asr.py +432 -432
- gaia/chat/__init__.py +16 -16
- gaia/chat/app.py +430 -430
- gaia/chat/prompts.py +522 -522
- gaia/chat/sdk.py +1228 -1225
- gaia/cli.py +5481 -5621
- gaia/database/__init__.py +10 -10
- gaia/database/agent.py +176 -176
- gaia/database/mixin.py +290 -290
- gaia/database/testing.py +64 -64
- gaia/eval/batch_experiment.py +2332 -2332
- gaia/eval/claude.py +542 -542
- gaia/eval/config.py +37 -37
- gaia/eval/email_generator.py +512 -512
- gaia/eval/eval.py +3179 -3179
- gaia/eval/groundtruth.py +1130 -1130
- gaia/eval/transcript_generator.py +582 -582
- gaia/eval/webapp/README.md +167 -167
- gaia/eval/webapp/package-lock.json +875 -875
- gaia/eval/webapp/package.json +20 -20
- gaia/eval/webapp/public/app.js +3402 -3402
- gaia/eval/webapp/public/index.html +87 -87
- gaia/eval/webapp/public/styles.css +3661 -3661
- gaia/eval/webapp/server.js +415 -415
- gaia/eval/webapp/test-setup.js +72 -72
- gaia/llm/__init__.py +9 -2
- gaia/llm/base_client.py +60 -0
- gaia/llm/exceptions.py +12 -0
- gaia/llm/factory.py +70 -0
- gaia/llm/lemonade_client.py +3236 -3221
- gaia/llm/lemonade_manager.py +294 -294
- gaia/llm/providers/__init__.py +9 -0
- gaia/llm/providers/claude.py +108 -0
- gaia/llm/providers/lemonade.py +120 -0
- gaia/llm/providers/openai_provider.py +79 -0
- gaia/llm/vlm_client.py +382 -382
- gaia/logger.py +189 -189
- gaia/mcp/agent_mcp_server.py +245 -245
- gaia/mcp/blender_mcp_client.py +138 -138
- gaia/mcp/blender_mcp_server.py +648 -648
- gaia/mcp/context7_cache.py +332 -332
- gaia/mcp/external_services.py +518 -518
- gaia/mcp/mcp_bridge.py +811 -550
- gaia/mcp/servers/__init__.py +6 -6
- gaia/mcp/servers/docker_mcp.py +83 -83
- gaia/perf_analysis.py +361 -0
- gaia/rag/__init__.py +10 -10
- gaia/rag/app.py +293 -293
- gaia/rag/demo.py +304 -304
- gaia/rag/pdf_utils.py +235 -235
- gaia/rag/sdk.py +2194 -2194
- gaia/security.py +163 -163
- gaia/talk/app.py +289 -289
- gaia/talk/sdk.py +538 -538
- gaia/testing/__init__.py +87 -87
- gaia/testing/assertions.py +330 -330
- gaia/testing/fixtures.py +333 -333
- gaia/testing/mocks.py +493 -493
- gaia/util.py +46 -46
- gaia/utils/__init__.py +33 -33
- gaia/utils/file_watcher.py +675 -675
- gaia/utils/parsing.py +223 -223
- gaia/version.py +100 -100
- amd_gaia-0.14.3.dist-info/RECORD +0 -168
- gaia/agents/code/app.py +0 -266
- gaia/llm/llm_client.py +0 -729
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
gaia/audio/audio_recorder.py
CHANGED
|
@@ -1,269 +1,269 @@
|
|
|
1
|
-
# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: MIT
|
|
3
|
-
|
|
4
|
-
# Standard library imports first
|
|
5
|
-
import queue
|
|
6
|
-
import threading
|
|
7
|
-
import time
|
|
8
|
-
|
|
9
|
-
# Third-party imports next
|
|
10
|
-
import numpy as np
|
|
11
|
-
import pyaudio
|
|
12
|
-
|
|
13
|
-
from gaia.logger import get_logger
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class AudioRecorder:
|
|
17
|
-
log = get_logger(__name__)
|
|
18
|
-
|
|
19
|
-
def __init__(
|
|
20
|
-
self,
|
|
21
|
-
device_index=None,
|
|
22
|
-
):
|
|
23
|
-
self.log = self.__class__.log # Use the class-level logger for instances
|
|
24
|
-
|
|
25
|
-
# Add thread attributes
|
|
26
|
-
self.record_thread = None
|
|
27
|
-
self.process_thread = None
|
|
28
|
-
|
|
29
|
-
# Audio parameters - optimized for better quality
|
|
30
|
-
self.CHUNK = 1024 * 2 # Reduced for lower latency while maintaining quality
|
|
31
|
-
self.FORMAT = pyaudio.paFloat32
|
|
32
|
-
self.CHANNELS = 1
|
|
33
|
-
self.RATE = 16000
|
|
34
|
-
self.device_index = (
|
|
35
|
-
self._get_default_input_device() if device_index is None else device_index
|
|
36
|
-
)
|
|
37
|
-
self.is_recording = False
|
|
38
|
-
self.audio_queue = queue.Queue()
|
|
39
|
-
self.stream = None # Add stream as class attribute
|
|
40
|
-
|
|
41
|
-
# Voice detection parameters
|
|
42
|
-
self.SILENCE_THRESHOLD = 0.003
|
|
43
|
-
self.MIN_AUDIO_LENGTH = self.RATE * 0.25
|
|
44
|
-
self.is_speaking = False
|
|
45
|
-
|
|
46
|
-
# New attributes for pause functionality
|
|
47
|
-
self.is_paused = False
|
|
48
|
-
self.pause_lock = threading.Lock()
|
|
49
|
-
|
|
50
|
-
def _get_default_input_device(self):
|
|
51
|
-
"""Get the default input device index."""
|
|
52
|
-
pa = pyaudio.PyAudio()
|
|
53
|
-
try:
|
|
54
|
-
default_device = pa.get_default_input_device_info()
|
|
55
|
-
return default_device["index"]
|
|
56
|
-
except Exception as e:
|
|
57
|
-
self.log.error(f"Error getting default input device: {e}")
|
|
58
|
-
# Fall back to device 0 if no default found
|
|
59
|
-
return 0
|
|
60
|
-
finally:
|
|
61
|
-
pa.terminate()
|
|
62
|
-
|
|
63
|
-
def _is_speech(self, audio_chunk):
|
|
64
|
-
"""Detect if audio chunk contains speech based on amplitude."""
|
|
65
|
-
return np.abs(audio_chunk).mean() > self.SILENCE_THRESHOLD
|
|
66
|
-
|
|
67
|
-
def _record_audio(self):
|
|
68
|
-
"""Internal method to record audio."""
|
|
69
|
-
pa = pyaudio.PyAudio()
|
|
70
|
-
|
|
71
|
-
try:
|
|
72
|
-
device_info = pa.get_device_info_by_index(self.device_index)
|
|
73
|
-
self.log.debug(f"Using audio device: {device_info['name']}")
|
|
74
|
-
|
|
75
|
-
self.stream = pa.open( # Store stream as class attribute
|
|
76
|
-
format=self.FORMAT,
|
|
77
|
-
channels=self.CHANNELS,
|
|
78
|
-
rate=self.RATE,
|
|
79
|
-
input=True,
|
|
80
|
-
input_device_index=self.device_index,
|
|
81
|
-
frames_per_buffer=self.CHUNK,
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
self.log.debug("Recording started...")
|
|
85
|
-
|
|
86
|
-
# For detecting continuous speech
|
|
87
|
-
speech_buffer = np.array([], dtype=np.float32)
|
|
88
|
-
silence_counter = 0
|
|
89
|
-
SILENCE_LIMIT = (
|
|
90
|
-
10 # Number of silent chunks before considering speech ended
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
chunk_count = 0
|
|
94
|
-
self.log.debug(f"Silence threshold: {self.SILENCE_THRESHOLD}")
|
|
95
|
-
|
|
96
|
-
while self.is_recording:
|
|
97
|
-
try:
|
|
98
|
-
# Skip recording if paused
|
|
99
|
-
with self.pause_lock:
|
|
100
|
-
if self.is_paused:
|
|
101
|
-
time.sleep(0.1)
|
|
102
|
-
continue
|
|
103
|
-
|
|
104
|
-
data = np.frombuffer(
|
|
105
|
-
self.stream.read(self.CHUNK, exception_on_overflow=False),
|
|
106
|
-
dtype=np.float32,
|
|
107
|
-
)
|
|
108
|
-
data = np.clip(data, -1, 1)
|
|
109
|
-
|
|
110
|
-
chunk_count += 1
|
|
111
|
-
energy = np.abs(data).mean()
|
|
112
|
-
|
|
113
|
-
# Log every 10th chunk to avoid spam
|
|
114
|
-
if chunk_count % 10 == 0:
|
|
115
|
-
self.log.debug(
|
|
116
|
-
f"Chunk {chunk_count}: energy={energy:.6f}, is_speech={self._is_speech(data)}, buffer_size={len(speech_buffer)}"
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
if self._is_speech(data):
|
|
120
|
-
silence_counter = 0
|
|
121
|
-
speech_buffer = np.concatenate((speech_buffer, data))
|
|
122
|
-
if (
|
|
123
|
-
not self.is_speaking
|
|
124
|
-
and len(speech_buffer) > self.MIN_AUDIO_LENGTH
|
|
125
|
-
):
|
|
126
|
-
self.is_speaking = True
|
|
127
|
-
else:
|
|
128
|
-
silence_counter += 1
|
|
129
|
-
if self.is_speaking:
|
|
130
|
-
speech_buffer = np.concatenate((speech_buffer, data))
|
|
131
|
-
|
|
132
|
-
# If we've had enough silence and were speaking
|
|
133
|
-
if silence_counter >= SILENCE_LIMIT and self.is_speaking:
|
|
134
|
-
if len(speech_buffer) > self.MIN_AUDIO_LENGTH:
|
|
135
|
-
self.log.debug(
|
|
136
|
-
f"Adding speech to queue: {len(speech_buffer)} samples ({len(speech_buffer)/self.RATE:.2f}s)"
|
|
137
|
-
)
|
|
138
|
-
self.audio_queue.put(speech_buffer)
|
|
139
|
-
else:
|
|
140
|
-
self.log.debug(
|
|
141
|
-
f"Speech too short: {len(speech_buffer)} samples < {self.MIN_AUDIO_LENGTH}"
|
|
142
|
-
)
|
|
143
|
-
speech_buffer = np.array([], dtype=np.float32)
|
|
144
|
-
self.is_speaking = False
|
|
145
|
-
silence_counter = 0
|
|
146
|
-
|
|
147
|
-
except Exception as e:
|
|
148
|
-
self.log.error(f"Error reading from stream: {e}")
|
|
149
|
-
break
|
|
150
|
-
|
|
151
|
-
except Exception as e:
|
|
152
|
-
self.log.error(f"Error with device {self.device_index}: {e}")
|
|
153
|
-
raise
|
|
154
|
-
finally:
|
|
155
|
-
try:
|
|
156
|
-
if self.stream is not None:
|
|
157
|
-
self.stream.stop_stream()
|
|
158
|
-
self.stream.close()
|
|
159
|
-
self.stream = None
|
|
160
|
-
except Exception as e:
|
|
161
|
-
self.log.error(f"Error closing audio stream: {e}")
|
|
162
|
-
pa.terminate()
|
|
163
|
-
|
|
164
|
-
def _process_audio(self):
|
|
165
|
-
"""Process recorded audio chunks from the queue."""
|
|
166
|
-
while self.is_recording:
|
|
167
|
-
try:
|
|
168
|
-
# Process any audio in the queue
|
|
169
|
-
if not self.audio_queue.empty():
|
|
170
|
-
_ = self.audio_queue.get_nowait()
|
|
171
|
-
else:
|
|
172
|
-
time.sleep(0.1) # Prevent busy-waiting
|
|
173
|
-
except queue.Empty:
|
|
174
|
-
continue
|
|
175
|
-
except Exception as e:
|
|
176
|
-
self.log.error(f"Error processing audio: {e}")
|
|
177
|
-
break
|
|
178
|
-
|
|
179
|
-
def list_audio_devices(self):
|
|
180
|
-
"""List all available audio input devices."""
|
|
181
|
-
pa = pyaudio.PyAudio()
|
|
182
|
-
info = []
|
|
183
|
-
self.log.info("Available Audio Devices:")
|
|
184
|
-
for i in range(pa.get_device_count()):
|
|
185
|
-
dev_info = pa.get_device_info_by_index(i)
|
|
186
|
-
if dev_info.get("maxInputChannels") > 0:
|
|
187
|
-
self.log.info(f"Index {i}: {dev_info.get('name')}")
|
|
188
|
-
info.append(dev_info)
|
|
189
|
-
pa.terminate()
|
|
190
|
-
return info
|
|
191
|
-
|
|
192
|
-
def get_device_name(self):
|
|
193
|
-
"""Get the name of the current audio device"""
|
|
194
|
-
pa = pyaudio.PyAudio()
|
|
195
|
-
try:
|
|
196
|
-
device_info = pa.get_device_info_by_index(self.device_index)
|
|
197
|
-
return device_info.get("name", f"Device {self.device_index}")
|
|
198
|
-
except Exception as e:
|
|
199
|
-
self.log.error(f"Error getting device name: {str(e)}")
|
|
200
|
-
return f"Device {self.device_index} (Error: {str(e)})"
|
|
201
|
-
finally:
|
|
202
|
-
pa.terminate()
|
|
203
|
-
|
|
204
|
-
def start_recording(self, duration=None):
|
|
205
|
-
"""Start recording and transcription."""
|
|
206
|
-
self.log.debug("Initializing recording...")
|
|
207
|
-
|
|
208
|
-
# Make sure we're not already recording
|
|
209
|
-
if self.is_recording:
|
|
210
|
-
self.log.warning("Recording is already in progress")
|
|
211
|
-
return
|
|
212
|
-
|
|
213
|
-
# Set recording flag before starting threads
|
|
214
|
-
self.is_recording = True
|
|
215
|
-
|
|
216
|
-
# Start record thread
|
|
217
|
-
self.log.debug("Starting record thread...")
|
|
218
|
-
self.record_thread = threading.Thread(target=self._record_audio)
|
|
219
|
-
self.record_thread.start()
|
|
220
|
-
|
|
221
|
-
# Wait a short moment to ensure recording has started
|
|
222
|
-
time.sleep(0.1)
|
|
223
|
-
|
|
224
|
-
# Start process thread
|
|
225
|
-
self.log.debug("Starting process thread...")
|
|
226
|
-
self.process_thread = threading.Thread(target=self._process_audio)
|
|
227
|
-
self.process_thread.start()
|
|
228
|
-
|
|
229
|
-
# Wait another moment to ensure processing has started
|
|
230
|
-
time.sleep(0.1)
|
|
231
|
-
|
|
232
|
-
if duration:
|
|
233
|
-
time.sleep(duration)
|
|
234
|
-
self.stop_recording()
|
|
235
|
-
|
|
236
|
-
def stop_recording(self):
|
|
237
|
-
"""Stop recording and transcription."""
|
|
238
|
-
self.log.debug("Stopping recording...")
|
|
239
|
-
self.is_recording = False
|
|
240
|
-
if self.record_thread:
|
|
241
|
-
self.log.debug("Waiting for record thread to finish...")
|
|
242
|
-
self.record_thread.join()
|
|
243
|
-
if self.process_thread:
|
|
244
|
-
self.log.debug("Waiting for process thread to finish...")
|
|
245
|
-
self.process_thread.join()
|
|
246
|
-
self.log.debug("Recording stopped")
|
|
247
|
-
|
|
248
|
-
def pause_recording(self):
|
|
249
|
-
"""Pause the recording without stopping threads."""
|
|
250
|
-
with self.pause_lock:
|
|
251
|
-
self.is_paused = True
|
|
252
|
-
self.log.debug("Recording paused")
|
|
253
|
-
|
|
254
|
-
def resume_recording(self):
|
|
255
|
-
"""Resume the recording."""
|
|
256
|
-
with self.pause_lock:
|
|
257
|
-
self.is_paused = False
|
|
258
|
-
self.log.debug("Recording resumed")
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
if __name__ == "__main__":
|
|
262
|
-
ar = AudioRecorder()
|
|
263
|
-
|
|
264
|
-
print("Listing available audio devices...")
|
|
265
|
-
ar.list_audio_devices()
|
|
266
|
-
|
|
267
|
-
print("Starting 30-second recording session...")
|
|
268
|
-
ar.start_recording(duration=30)
|
|
269
|
-
print("Recording session completed!")
|
|
1
|
+
# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
# Standard library imports first
|
|
5
|
+
import queue
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
|
|
9
|
+
# Third-party imports next
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pyaudio
|
|
12
|
+
|
|
13
|
+
from gaia.logger import get_logger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AudioRecorder:
|
|
17
|
+
log = get_logger(__name__)
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
device_index=None,
|
|
22
|
+
):
|
|
23
|
+
self.log = self.__class__.log # Use the class-level logger for instances
|
|
24
|
+
|
|
25
|
+
# Add thread attributes
|
|
26
|
+
self.record_thread = None
|
|
27
|
+
self.process_thread = None
|
|
28
|
+
|
|
29
|
+
# Audio parameters - optimized for better quality
|
|
30
|
+
self.CHUNK = 1024 * 2 # Reduced for lower latency while maintaining quality
|
|
31
|
+
self.FORMAT = pyaudio.paFloat32
|
|
32
|
+
self.CHANNELS = 1
|
|
33
|
+
self.RATE = 16000
|
|
34
|
+
self.device_index = (
|
|
35
|
+
self._get_default_input_device() if device_index is None else device_index
|
|
36
|
+
)
|
|
37
|
+
self.is_recording = False
|
|
38
|
+
self.audio_queue = queue.Queue()
|
|
39
|
+
self.stream = None # Add stream as class attribute
|
|
40
|
+
|
|
41
|
+
# Voice detection parameters
|
|
42
|
+
self.SILENCE_THRESHOLD = 0.003
|
|
43
|
+
self.MIN_AUDIO_LENGTH = self.RATE * 0.25
|
|
44
|
+
self.is_speaking = False
|
|
45
|
+
|
|
46
|
+
# New attributes for pause functionality
|
|
47
|
+
self.is_paused = False
|
|
48
|
+
self.pause_lock = threading.Lock()
|
|
49
|
+
|
|
50
|
+
def _get_default_input_device(self):
|
|
51
|
+
"""Get the default input device index."""
|
|
52
|
+
pa = pyaudio.PyAudio()
|
|
53
|
+
try:
|
|
54
|
+
default_device = pa.get_default_input_device_info()
|
|
55
|
+
return default_device["index"]
|
|
56
|
+
except Exception as e:
|
|
57
|
+
self.log.error(f"Error getting default input device: {e}")
|
|
58
|
+
# Fall back to device 0 if no default found
|
|
59
|
+
return 0
|
|
60
|
+
finally:
|
|
61
|
+
pa.terminate()
|
|
62
|
+
|
|
63
|
+
def _is_speech(self, audio_chunk):
|
|
64
|
+
"""Detect if audio chunk contains speech based on amplitude."""
|
|
65
|
+
return np.abs(audio_chunk).mean() > self.SILENCE_THRESHOLD
|
|
66
|
+
|
|
67
|
+
def _record_audio(self):
|
|
68
|
+
"""Internal method to record audio."""
|
|
69
|
+
pa = pyaudio.PyAudio()
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
device_info = pa.get_device_info_by_index(self.device_index)
|
|
73
|
+
self.log.debug(f"Using audio device: {device_info['name']}")
|
|
74
|
+
|
|
75
|
+
self.stream = pa.open( # Store stream as class attribute
|
|
76
|
+
format=self.FORMAT,
|
|
77
|
+
channels=self.CHANNELS,
|
|
78
|
+
rate=self.RATE,
|
|
79
|
+
input=True,
|
|
80
|
+
input_device_index=self.device_index,
|
|
81
|
+
frames_per_buffer=self.CHUNK,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
self.log.debug("Recording started...")
|
|
85
|
+
|
|
86
|
+
# For detecting continuous speech
|
|
87
|
+
speech_buffer = np.array([], dtype=np.float32)
|
|
88
|
+
silence_counter = 0
|
|
89
|
+
SILENCE_LIMIT = (
|
|
90
|
+
10 # Number of silent chunks before considering speech ended
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
chunk_count = 0
|
|
94
|
+
self.log.debug(f"Silence threshold: {self.SILENCE_THRESHOLD}")
|
|
95
|
+
|
|
96
|
+
while self.is_recording:
|
|
97
|
+
try:
|
|
98
|
+
# Skip recording if paused
|
|
99
|
+
with self.pause_lock:
|
|
100
|
+
if self.is_paused:
|
|
101
|
+
time.sleep(0.1)
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
data = np.frombuffer(
|
|
105
|
+
self.stream.read(self.CHUNK, exception_on_overflow=False),
|
|
106
|
+
dtype=np.float32,
|
|
107
|
+
)
|
|
108
|
+
data = np.clip(data, -1, 1)
|
|
109
|
+
|
|
110
|
+
chunk_count += 1
|
|
111
|
+
energy = np.abs(data).mean()
|
|
112
|
+
|
|
113
|
+
# Log every 10th chunk to avoid spam
|
|
114
|
+
if chunk_count % 10 == 0:
|
|
115
|
+
self.log.debug(
|
|
116
|
+
f"Chunk {chunk_count}: energy={energy:.6f}, is_speech={self._is_speech(data)}, buffer_size={len(speech_buffer)}"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
if self._is_speech(data):
|
|
120
|
+
silence_counter = 0
|
|
121
|
+
speech_buffer = np.concatenate((speech_buffer, data))
|
|
122
|
+
if (
|
|
123
|
+
not self.is_speaking
|
|
124
|
+
and len(speech_buffer) > self.MIN_AUDIO_LENGTH
|
|
125
|
+
):
|
|
126
|
+
self.is_speaking = True
|
|
127
|
+
else:
|
|
128
|
+
silence_counter += 1
|
|
129
|
+
if self.is_speaking:
|
|
130
|
+
speech_buffer = np.concatenate((speech_buffer, data))
|
|
131
|
+
|
|
132
|
+
# If we've had enough silence and were speaking
|
|
133
|
+
if silence_counter >= SILENCE_LIMIT and self.is_speaking:
|
|
134
|
+
if len(speech_buffer) > self.MIN_AUDIO_LENGTH:
|
|
135
|
+
self.log.debug(
|
|
136
|
+
f"Adding speech to queue: {len(speech_buffer)} samples ({len(speech_buffer)/self.RATE:.2f}s)"
|
|
137
|
+
)
|
|
138
|
+
self.audio_queue.put(speech_buffer)
|
|
139
|
+
else:
|
|
140
|
+
self.log.debug(
|
|
141
|
+
f"Speech too short: {len(speech_buffer)} samples < {self.MIN_AUDIO_LENGTH}"
|
|
142
|
+
)
|
|
143
|
+
speech_buffer = np.array([], dtype=np.float32)
|
|
144
|
+
self.is_speaking = False
|
|
145
|
+
silence_counter = 0
|
|
146
|
+
|
|
147
|
+
except Exception as e:
|
|
148
|
+
self.log.error(f"Error reading from stream: {e}")
|
|
149
|
+
break
|
|
150
|
+
|
|
151
|
+
except Exception as e:
|
|
152
|
+
self.log.error(f"Error with device {self.device_index}: {e}")
|
|
153
|
+
raise
|
|
154
|
+
finally:
|
|
155
|
+
try:
|
|
156
|
+
if self.stream is not None:
|
|
157
|
+
self.stream.stop_stream()
|
|
158
|
+
self.stream.close()
|
|
159
|
+
self.stream = None
|
|
160
|
+
except Exception as e:
|
|
161
|
+
self.log.error(f"Error closing audio stream: {e}")
|
|
162
|
+
pa.terminate()
|
|
163
|
+
|
|
164
|
+
def _process_audio(self):
|
|
165
|
+
"""Process recorded audio chunks from the queue."""
|
|
166
|
+
while self.is_recording:
|
|
167
|
+
try:
|
|
168
|
+
# Process any audio in the queue
|
|
169
|
+
if not self.audio_queue.empty():
|
|
170
|
+
_ = self.audio_queue.get_nowait()
|
|
171
|
+
else:
|
|
172
|
+
time.sleep(0.1) # Prevent busy-waiting
|
|
173
|
+
except queue.Empty:
|
|
174
|
+
continue
|
|
175
|
+
except Exception as e:
|
|
176
|
+
self.log.error(f"Error processing audio: {e}")
|
|
177
|
+
break
|
|
178
|
+
|
|
179
|
+
def list_audio_devices(self):
|
|
180
|
+
"""List all available audio input devices."""
|
|
181
|
+
pa = pyaudio.PyAudio()
|
|
182
|
+
info = []
|
|
183
|
+
self.log.info("Available Audio Devices:")
|
|
184
|
+
for i in range(pa.get_device_count()):
|
|
185
|
+
dev_info = pa.get_device_info_by_index(i)
|
|
186
|
+
if dev_info.get("maxInputChannels") > 0:
|
|
187
|
+
self.log.info(f"Index {i}: {dev_info.get('name')}")
|
|
188
|
+
info.append(dev_info)
|
|
189
|
+
pa.terminate()
|
|
190
|
+
return info
|
|
191
|
+
|
|
192
|
+
def get_device_name(self):
|
|
193
|
+
"""Get the name of the current audio device"""
|
|
194
|
+
pa = pyaudio.PyAudio()
|
|
195
|
+
try:
|
|
196
|
+
device_info = pa.get_device_info_by_index(self.device_index)
|
|
197
|
+
return device_info.get("name", f"Device {self.device_index}")
|
|
198
|
+
except Exception as e:
|
|
199
|
+
self.log.error(f"Error getting device name: {str(e)}")
|
|
200
|
+
return f"Device {self.device_index} (Error: {str(e)})"
|
|
201
|
+
finally:
|
|
202
|
+
pa.terminate()
|
|
203
|
+
|
|
204
|
+
def start_recording(self, duration=None):
|
|
205
|
+
"""Start recording and transcription."""
|
|
206
|
+
self.log.debug("Initializing recording...")
|
|
207
|
+
|
|
208
|
+
# Make sure we're not already recording
|
|
209
|
+
if self.is_recording:
|
|
210
|
+
self.log.warning("Recording is already in progress")
|
|
211
|
+
return
|
|
212
|
+
|
|
213
|
+
# Set recording flag before starting threads
|
|
214
|
+
self.is_recording = True
|
|
215
|
+
|
|
216
|
+
# Start record thread
|
|
217
|
+
self.log.debug("Starting record thread...")
|
|
218
|
+
self.record_thread = threading.Thread(target=self._record_audio)
|
|
219
|
+
self.record_thread.start()
|
|
220
|
+
|
|
221
|
+
# Wait a short moment to ensure recording has started
|
|
222
|
+
time.sleep(0.1)
|
|
223
|
+
|
|
224
|
+
# Start process thread
|
|
225
|
+
self.log.debug("Starting process thread...")
|
|
226
|
+
self.process_thread = threading.Thread(target=self._process_audio)
|
|
227
|
+
self.process_thread.start()
|
|
228
|
+
|
|
229
|
+
# Wait another moment to ensure processing has started
|
|
230
|
+
time.sleep(0.1)
|
|
231
|
+
|
|
232
|
+
if duration:
|
|
233
|
+
time.sleep(duration)
|
|
234
|
+
self.stop_recording()
|
|
235
|
+
|
|
236
|
+
def stop_recording(self):
|
|
237
|
+
"""Stop recording and transcription."""
|
|
238
|
+
self.log.debug("Stopping recording...")
|
|
239
|
+
self.is_recording = False
|
|
240
|
+
if self.record_thread:
|
|
241
|
+
self.log.debug("Waiting for record thread to finish...")
|
|
242
|
+
self.record_thread.join()
|
|
243
|
+
if self.process_thread:
|
|
244
|
+
self.log.debug("Waiting for process thread to finish...")
|
|
245
|
+
self.process_thread.join()
|
|
246
|
+
self.log.debug("Recording stopped")
|
|
247
|
+
|
|
248
|
+
def pause_recording(self):
|
|
249
|
+
"""Pause the recording without stopping threads."""
|
|
250
|
+
with self.pause_lock:
|
|
251
|
+
self.is_paused = True
|
|
252
|
+
self.log.debug("Recording paused")
|
|
253
|
+
|
|
254
|
+
def resume_recording(self):
|
|
255
|
+
"""Resume the recording."""
|
|
256
|
+
with self.pause_lock:
|
|
257
|
+
self.is_paused = False
|
|
258
|
+
self.log.debug("Recording resumed")
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
if __name__ == "__main__":
|
|
262
|
+
ar = AudioRecorder()
|
|
263
|
+
|
|
264
|
+
print("Listing available audio devices...")
|
|
265
|
+
ar.list_audio_devices()
|
|
266
|
+
|
|
267
|
+
print("Starting 30-second recording session...")
|
|
268
|
+
ar.start_recording(duration=30)
|
|
269
|
+
print("Recording session completed!")
|