noesium 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noesium/agents/askura_agent/__init__.py +22 -0
- noesium/agents/askura_agent/askura_agent.py +480 -0
- noesium/agents/askura_agent/conversation.py +164 -0
- noesium/agents/askura_agent/extractor.py +175 -0
- noesium/agents/askura_agent/memory.py +14 -0
- noesium/agents/askura_agent/models.py +239 -0
- noesium/agents/askura_agent/prompts.py +202 -0
- noesium/agents/askura_agent/reflection.py +234 -0
- noesium/agents/askura_agent/summarizer.py +30 -0
- noesium/agents/askura_agent/utils.py +6 -0
- noesium/agents/deep_research/__init__.py +13 -0
- noesium/agents/deep_research/agent.py +398 -0
- noesium/agents/deep_research/prompts.py +84 -0
- noesium/agents/deep_research/schemas.py +42 -0
- noesium/agents/deep_research/state.py +54 -0
- noesium/agents/search/__init__.py +5 -0
- noesium/agents/search/agent.py +474 -0
- noesium/agents/search/state.py +28 -0
- noesium/core/__init__.py +1 -1
- noesium/core/agent/base.py +10 -2
- noesium/core/goalith/decomposer/llm_decomposer.py +1 -1
- noesium/core/llm/__init__.py +1 -1
- noesium/core/llm/base.py +2 -2
- noesium/core/llm/litellm.py +42 -21
- noesium/core/llm/llamacpp.py +25 -4
- noesium/core/llm/ollama.py +43 -22
- noesium/core/llm/openai.py +25 -5
- noesium/core/llm/openrouter.py +1 -1
- noesium/core/toolify/base.py +9 -2
- noesium/core/toolify/config.py +2 -2
- noesium/core/toolify/registry.py +21 -5
- noesium/core/tracing/opik_tracing.py +7 -7
- noesium/core/vector_store/__init__.py +2 -2
- noesium/core/vector_store/base.py +1 -1
- noesium/core/vector_store/pgvector.py +10 -13
- noesium/core/vector_store/weaviate.py +2 -1
- noesium/toolkits/__init__.py +1 -0
- noesium/toolkits/arxiv_toolkit.py +310 -0
- noesium/toolkits/audio_aliyun_toolkit.py +441 -0
- noesium/toolkits/audio_toolkit.py +370 -0
- noesium/toolkits/bash_toolkit.py +332 -0
- noesium/toolkits/document_toolkit.py +454 -0
- noesium/toolkits/file_edit_toolkit.py +552 -0
- noesium/toolkits/github_toolkit.py +395 -0
- noesium/toolkits/gmail_toolkit.py +575 -0
- noesium/toolkits/image_toolkit.py +425 -0
- noesium/toolkits/memory_toolkit.py +398 -0
- noesium/toolkits/python_executor_toolkit.py +334 -0
- noesium/toolkits/search_toolkit.py +451 -0
- noesium/toolkits/serper_toolkit.py +623 -0
- noesium/toolkits/tabular_data_toolkit.py +537 -0
- noesium/toolkits/user_interaction_toolkit.py +365 -0
- noesium/toolkits/video_toolkit.py +168 -0
- noesium/toolkits/wikipedia_toolkit.py +420 -0
- noesium-0.2.1.dist-info/METADATA +253 -0
- {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/RECORD +59 -23
- {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/licenses/LICENSE +1 -1
- noesium-0.1.0.dist-info/METADATA +0 -525
- {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/WHEEL +0 -0
- {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,441 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Audio processing toolkit using Aliyun NLS (Natural Language Service) for transcription.
|
|
3
|
+
|
|
4
|
+
Provides tools for audio transcription using Aliyun's Lingjie AI service and
|
|
5
|
+
audio content analysis using LLMs. This toolkit migrates the functionality
|
|
6
|
+
from the smartvoice module to the toolify framework.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
from typing import Any, Callable, Dict, Optional
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
from aliyunsdkcore.acs_exception.exceptions import ClientException, ServerException
|
|
16
|
+
from aliyunsdkcore.client import AcsClient
|
|
17
|
+
from aliyunsdkcore.request import CommonRequest
|
|
18
|
+
|
|
19
|
+
ALIYUN_AVAILABLE = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
ClientException = None
|
|
22
|
+
ServerException = None
|
|
23
|
+
AcsClient = None
|
|
24
|
+
CommonRequest = None
|
|
25
|
+
ALIYUN_AVAILABLE = False
|
|
26
|
+
|
|
27
|
+
from noesium.core.toolify.base import AsyncBaseToolkit
|
|
28
|
+
from noesium.core.toolify.config import ToolkitConfig
|
|
29
|
+
from noesium.core.toolify.registry import register_toolkit
|
|
30
|
+
from noesium.core.utils.logging import get_logger
|
|
31
|
+
|
|
32
|
+
logger = get_logger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@register_toolkit("audio_aliyun")
|
|
36
|
+
class AudioAliyunToolkit(AsyncBaseToolkit):
|
|
37
|
+
"""
|
|
38
|
+
Toolkit for audio processing and analysis using Aliyun NLS service.
|
|
39
|
+
|
|
40
|
+
This toolkit provides capabilities for:
|
|
41
|
+
- Audio transcription using Aliyun's Lingjie AI service
|
|
42
|
+
- Audio content analysis and Q&A using LLMs
|
|
43
|
+
- Async/await support for better performance
|
|
44
|
+
|
|
45
|
+
Features:
|
|
46
|
+
- Direct transcription from publicly accessible audio URLs
|
|
47
|
+
- LLM-powered audio content analysis
|
|
48
|
+
- Optimized for Chinese language content
|
|
49
|
+
|
|
50
|
+
Required configuration:
|
|
51
|
+
- Aliyun Access Key ID and Secret
|
|
52
|
+
- NLS App Key
|
|
53
|
+
- LLM configuration for analysis
|
|
54
|
+
|
|
55
|
+
Note: Audio files must be publicly accessible URLs for Aliyun NLS service.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self, config: ToolkitConfig = None):
|
|
59
|
+
"""
|
|
60
|
+
Initialize the Aliyun audio toolkit.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
config: Toolkit configuration containing API keys and settings
|
|
64
|
+
"""
|
|
65
|
+
if not ALIYUN_AVAILABLE:
|
|
66
|
+
raise ImportError("Aliyun packages are not installed. Install them with: pip install 'noesium[aliyun]'")
|
|
67
|
+
|
|
68
|
+
super().__init__(config)
|
|
69
|
+
|
|
70
|
+
# Aliyun credentials
|
|
71
|
+
self.ak_id = self.config.config.get("ALIYUN_ACCESS_KEY_ID") or os.getenv("ALIYUN_ACCESS_KEY_ID")
|
|
72
|
+
self.ak_secret = self.config.config.get("ALIYUN_ACCESS_KEY_SECRET") or os.getenv("ALIYUN_ACCESS_KEY_SECRET")
|
|
73
|
+
self.app_key = self.config.config.get("ALIYUN_NLS_APP_KEY") or os.getenv("ALIYUN_NLS_APP_KEY")
|
|
74
|
+
self.region_id = self.config.config.get("ALIYUN_REGION_ID", "cn-shanghai")
|
|
75
|
+
|
|
76
|
+
if not all([self.ak_id, self.ak_secret, self.app_key]):
|
|
77
|
+
raise ValueError(
|
|
78
|
+
"Aliyun credentials not found. Please set ALIYUN_ACCESS_KEY_ID, "
|
|
79
|
+
"ALIYUN_ACCESS_KEY_SECRET, and ALIYUN_NLS_APP_KEY in config or environment"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Configuration - minimal setup, no caching like smart_voice.py
|
|
83
|
+
|
|
84
|
+
# Aliyun NLS service constants
|
|
85
|
+
self.PRODUCT = "nls-filetrans"
|
|
86
|
+
self.DOMAIN = f"filetrans.{self.region_id}.aliyuncs.com"
|
|
87
|
+
self.API_VERSION = "2018-08-17"
|
|
88
|
+
self.POST_REQUEST_ACTION = "SubmitTask"
|
|
89
|
+
self.GET_REQUEST_ACTION = "GetTaskResult"
|
|
90
|
+
|
|
91
|
+
# Request parameters
|
|
92
|
+
self.KEY_APP_KEY = "appkey"
|
|
93
|
+
self.KEY_FILE_LINK = "file_link"
|
|
94
|
+
self.KEY_VERSION = "version"
|
|
95
|
+
self.KEY_ENABLE_WORDS = "enable_words"
|
|
96
|
+
self.KEY_AUTO_SPLIT = "auto_split"
|
|
97
|
+
|
|
98
|
+
# Response parameters
|
|
99
|
+
self.KEY_TASK = "Task"
|
|
100
|
+
self.KEY_TASK_ID = "TaskId"
|
|
101
|
+
self.KEY_STATUS_TEXT = "StatusText"
|
|
102
|
+
self.KEY_RESULT = "Result"
|
|
103
|
+
|
|
104
|
+
# Status values
|
|
105
|
+
self.STATUS_SUCCESS = "SUCCESS"
|
|
106
|
+
self.STATUS_RUNNING = "RUNNING"
|
|
107
|
+
self.STATUS_QUEUEING = "QUEUEING"
|
|
108
|
+
|
|
109
|
+
# Create AcsClient instance
|
|
110
|
+
self.client = AcsClient(self.ak_id, self.ak_secret, self.region_id)
|
|
111
|
+
|
|
112
|
+
async def _transcribe_file_aliyun(self, file_link: str) -> Optional[Dict[str, Any]]:
|
|
113
|
+
"""
|
|
114
|
+
Perform file transcription using Aliyun NLS service.
|
|
115
|
+
This follows the exact same logic as smart_voice.py but with async support.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
file_link: URL of the audio file to transcribe
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Transcription result dictionary or None if failed
|
|
122
|
+
"""
|
|
123
|
+
# Submit transcription request
|
|
124
|
+
post_request = CommonRequest()
|
|
125
|
+
post_request.set_domain(self.DOMAIN)
|
|
126
|
+
post_request.set_version(self.API_VERSION)
|
|
127
|
+
post_request.set_product(self.PRODUCT)
|
|
128
|
+
post_request.set_action_name(self.POST_REQUEST_ACTION)
|
|
129
|
+
post_request.set_method("POST")
|
|
130
|
+
|
|
131
|
+
# Configure task parameters
|
|
132
|
+
# Use version 4.0 for new integrations, set enable_words to False by default
|
|
133
|
+
task = {
|
|
134
|
+
self.KEY_APP_KEY: self.app_key,
|
|
135
|
+
self.KEY_FILE_LINK: file_link,
|
|
136
|
+
self.KEY_VERSION: "4.0",
|
|
137
|
+
self.KEY_ENABLE_WORDS: False,
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
# Uncomment to enable auto split for multi-speaker scenarios
|
|
141
|
+
# task[self.KEY_AUTO_SPLIT] = True
|
|
142
|
+
|
|
143
|
+
task_json = json.dumps(task)
|
|
144
|
+
self.logger.info(f"Submitting task: {task_json}")
|
|
145
|
+
post_request.add_body_params(self.KEY_TASK, task_json)
|
|
146
|
+
|
|
147
|
+
task_id = ""
|
|
148
|
+
try:
|
|
149
|
+
# Run in executor to avoid blocking the event loop
|
|
150
|
+
loop = asyncio.get_event_loop()
|
|
151
|
+
post_response = await loop.run_in_executor(None, self.client.do_action_with_exception, post_request)
|
|
152
|
+
post_response_json = json.loads(post_response)
|
|
153
|
+
self.logger.info(f"Submit response: {post_response_json}")
|
|
154
|
+
|
|
155
|
+
status_text = post_response_json[self.KEY_STATUS_TEXT]
|
|
156
|
+
if status_text == self.STATUS_SUCCESS:
|
|
157
|
+
self.logger.info("File transcription request submitted successfully!")
|
|
158
|
+
task_id = post_response_json[self.KEY_TASK_ID]
|
|
159
|
+
else:
|
|
160
|
+
self.logger.error(f"File transcription request failed: {status_text}")
|
|
161
|
+
return None
|
|
162
|
+
except ServerException as e:
|
|
163
|
+
self.logger.error(f"Server error: {e}")
|
|
164
|
+
return None
|
|
165
|
+
except ClientException as e:
|
|
166
|
+
self.logger.error(f"Client error: {e}")
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
if not task_id:
|
|
170
|
+
self.logger.error("No task ID received")
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
# Create request to get task result
|
|
174
|
+
get_request = CommonRequest()
|
|
175
|
+
get_request.set_domain(self.DOMAIN)
|
|
176
|
+
get_request.set_version(self.API_VERSION)
|
|
177
|
+
get_request.set_product(self.PRODUCT)
|
|
178
|
+
get_request.set_action_name(self.GET_REQUEST_ACTION)
|
|
179
|
+
get_request.set_method("GET")
|
|
180
|
+
get_request.add_query_param(self.KEY_TASK_ID, task_id)
|
|
181
|
+
|
|
182
|
+
# Poll for results
|
|
183
|
+
self.logger.info(f"Polling for results with task ID: {task_id}")
|
|
184
|
+
status_text = ""
|
|
185
|
+
max_attempts = 60 # Maximum 10 minutes (60 * 10 seconds)
|
|
186
|
+
attempt = 0
|
|
187
|
+
|
|
188
|
+
while attempt < max_attempts:
|
|
189
|
+
try:
|
|
190
|
+
# Run in executor to avoid blocking the event loop
|
|
191
|
+
get_response = await loop.run_in_executor(None, self.client.do_action_with_exception, get_request)
|
|
192
|
+
get_response_json = json.loads(get_response)
|
|
193
|
+
self.logger.info(f"Poll response (attempt {attempt + 1}): {get_response_json}")
|
|
194
|
+
|
|
195
|
+
status_text = get_response_json[self.KEY_STATUS_TEXT]
|
|
196
|
+
if status_text == self.STATUS_RUNNING or status_text == self.STATUS_QUEUEING:
|
|
197
|
+
# Continue polling
|
|
198
|
+
await asyncio.sleep(10)
|
|
199
|
+
attempt += 1
|
|
200
|
+
else:
|
|
201
|
+
# Exit polling
|
|
202
|
+
break
|
|
203
|
+
except ServerException as e:
|
|
204
|
+
self.logger.error(f"Server error during polling: {e}")
|
|
205
|
+
return None
|
|
206
|
+
except ClientException as e:
|
|
207
|
+
self.logger.error(f"Client error during polling: {e}")
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
if status_text == self.STATUS_SUCCESS:
|
|
211
|
+
self.logger.info("File transcription completed successfully!")
|
|
212
|
+
return get_response_json.get(self.KEY_RESULT)
|
|
213
|
+
else:
|
|
214
|
+
self.logger.error(f"File transcription failed with status: {status_text}")
|
|
215
|
+
return None
|
|
216
|
+
|
|
217
|
+
def _extract_transcription_text(self, result: Dict[str, Any]) -> Optional[str]:
|
|
218
|
+
"""
|
|
219
|
+
Extract transcription text from the lingji_ai result.
|
|
220
|
+
This is exactly the same logic as smart_voice.py.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
result: The result from transcribe_file function
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
Extracted transcription text or None if extraction fails
|
|
227
|
+
"""
|
|
228
|
+
try:
|
|
229
|
+
# The result structure from lingji_ai contains sentences with text
|
|
230
|
+
if isinstance(result, dict) and "Sentences" in result:
|
|
231
|
+
sentences = result["Sentences"]
|
|
232
|
+
if isinstance(sentences, list):
|
|
233
|
+
# Extract text from each sentence, avoiding duplicates
|
|
234
|
+
# Since there are duplicate entries with different ChannelId,
|
|
235
|
+
# we'll use a set to store unique texts
|
|
236
|
+
unique_texts = set()
|
|
237
|
+
for sentence in sentences:
|
|
238
|
+
if isinstance(sentence, dict) and "Text" in sentence:
|
|
239
|
+
text = sentence["Text"].strip()
|
|
240
|
+
if text: # Only add non-empty text
|
|
241
|
+
unique_texts.add(text)
|
|
242
|
+
|
|
243
|
+
# Convert set back to list and join
|
|
244
|
+
if unique_texts:
|
|
245
|
+
transcription_parts = sorted(list(unique_texts))
|
|
246
|
+
return " ".join(transcription_parts)
|
|
247
|
+
|
|
248
|
+
# If the structure is different, try to find text in the result
|
|
249
|
+
if isinstance(result, dict):
|
|
250
|
+
# Look for common transcription result keys
|
|
251
|
+
for key in ["text", "transcription", "content", "result"]:
|
|
252
|
+
if key in result:
|
|
253
|
+
return str(result[key])
|
|
254
|
+
|
|
255
|
+
# If no direct text found, try to extract from nested structure
|
|
256
|
+
return json.dumps(result, ensure_ascii=False)
|
|
257
|
+
|
|
258
|
+
# If result is already a string, return it
|
|
259
|
+
if isinstance(result, str):
|
|
260
|
+
return result
|
|
261
|
+
|
|
262
|
+
except Exception as e:
|
|
263
|
+
self.logger.error("Error extracting transcription text: %s", str(e))
|
|
264
|
+
return None
|
|
265
|
+
|
|
266
|
+
return None
|
|
267
|
+
|
|
268
|
+
async def _transcribe_audio_aliyun(self, md5_hash: str) -> Dict:
|
|
269
|
+
"""
|
|
270
|
+
Transcribe audio file using Aliyun NLS service.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
md5_hash: MD5 hash of the audio file
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
Transcription result with text and metadata
|
|
277
|
+
"""
|
|
278
|
+
# Check cache first
|
|
279
|
+
cache_file = self.cache_dir / f"{md5_hash}.json"
|
|
280
|
+
if cache_file.exists():
|
|
281
|
+
with open(cache_file, "r") as f:
|
|
282
|
+
return json.load(f)
|
|
283
|
+
|
|
284
|
+
# Get file path
|
|
285
|
+
if md5_hash not in self.md5_to_path:
|
|
286
|
+
raise ValueError(f"Audio file with MD5 {md5_hash} not found in cache")
|
|
287
|
+
|
|
288
|
+
file_path = self.md5_to_path[md5_hash]
|
|
289
|
+
|
|
290
|
+
try:
|
|
291
|
+
self.logger.info(f"Transcribing audio file with Aliyun NLS: {file_path}")
|
|
292
|
+
|
|
293
|
+
# For Aliyun NLS, we need to provide a URL to the file
|
|
294
|
+
# If it's a local file, we need to upload it or provide a URL
|
|
295
|
+
# For now, we'll assume the file_path is accessible as a URL
|
|
296
|
+
# In production, you might need to upload the file to OSS first
|
|
297
|
+
|
|
298
|
+
# Perform transcription
|
|
299
|
+
aliyun_result = await self._transcribe_file_aliyun(file_path)
|
|
300
|
+
|
|
301
|
+
if aliyun_result is None:
|
|
302
|
+
raise Exception("Aliyun NLS transcription failed")
|
|
303
|
+
|
|
304
|
+
# Extract text from Aliyun result
|
|
305
|
+
transcription_text = self._extract_transcription_text(aliyun_result)
|
|
306
|
+
|
|
307
|
+
if transcription_text is None:
|
|
308
|
+
raise Exception("Failed to extract text from Aliyun NLS result")
|
|
309
|
+
|
|
310
|
+
# Create standardized result format
|
|
311
|
+
result = {
|
|
312
|
+
"text": transcription_text,
|
|
313
|
+
"language": "zh", # Aliyun NLS primarily supports Chinese
|
|
314
|
+
"aliyun_result": aliyun_result, # Keep original result for reference
|
|
315
|
+
"provider": "aliyun_nls",
|
|
316
|
+
"duration": None, # Aliyun NLS doesn't provide duration in the same format
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
# Cache the result
|
|
320
|
+
with open(cache_file, "w", encoding="utf-8") as f:
|
|
321
|
+
json.dump(result, f, indent=2, ensure_ascii=False)
|
|
322
|
+
|
|
323
|
+
self.logger.info(f"Aliyun NLS transcription completed")
|
|
324
|
+
return result
|
|
325
|
+
|
|
326
|
+
except Exception as e:
|
|
327
|
+
self.logger.error(f"Aliyun NLS transcription failed: {e}")
|
|
328
|
+
raise
|
|
329
|
+
|
|
330
|
+
async def transcribe_audio(self, audio_path: str) -> Dict:
|
|
331
|
+
"""
|
|
332
|
+
Transcribe an audio file to text using Aliyun NLS service.
|
|
333
|
+
This follows the same approach as SmartVoice.transcribe() but with async support.
|
|
334
|
+
|
|
335
|
+
This tool converts speech in audio files to text using Aliyun's Lingjie AI service.
|
|
336
|
+
Note: For Aliyun NLS, the audio_path should be a publicly accessible URL.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
audio_path: URL of the audio file to transcribe (must be publicly accessible)
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
Dictionary containing:
|
|
343
|
+
- text: The transcribed text
|
|
344
|
+
- aliyun_result: Original result from Aliyun NLS for reference
|
|
345
|
+
- provider: "aliyun_nls" to indicate the service used
|
|
346
|
+
|
|
347
|
+
Example:
|
|
348
|
+
result = await transcribe_audio("https://example.com/audio.mp3")
|
|
349
|
+
print(result["text"]) # Full transcription
|
|
350
|
+
"""
|
|
351
|
+
try:
|
|
352
|
+
# First, perform the transcription using Aliyun NLS
|
|
353
|
+
aliyun_result = await self._transcribe_file_aliyun(audio_path)
|
|
354
|
+
if aliyun_result is None:
|
|
355
|
+
return {"error": "Aliyun NLS transcription failed", "text": ""}
|
|
356
|
+
|
|
357
|
+
# Then extract the text from the result
|
|
358
|
+
transcription_text = self._extract_transcription_text(aliyun_result)
|
|
359
|
+
if transcription_text is None:
|
|
360
|
+
return {"error": "Failed to extract text from Aliyun NLS result", "text": ""}
|
|
361
|
+
|
|
362
|
+
return {"text": transcription_text, "aliyun_result": aliyun_result, "provider": "aliyun_nls"}
|
|
363
|
+
|
|
364
|
+
except Exception as e:
|
|
365
|
+
error_msg = f"Aliyun audio transcription failed: {str(e)}"
|
|
366
|
+
self.logger.error(error_msg)
|
|
367
|
+
return {"error": error_msg, "text": ""}
|
|
368
|
+
|
|
369
|
+
async def audio_qa(self, audio_path: str, question: str) -> str:
|
|
370
|
+
"""
|
|
371
|
+
Ask questions about audio content using Aliyun NLS transcription.
|
|
372
|
+
|
|
373
|
+
This tool transcribes audio content using Aliyun NLS and then uses an LLM to answer
|
|
374
|
+
questions about the audio based on the transcription. It's particularly effective
|
|
375
|
+
for Chinese audio content.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
audio_path: URL of the audio file to transcribe (must be publicly accessible)
|
|
379
|
+
question: Question to ask about the audio content
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Answer to the question based on the audio content
|
|
383
|
+
"""
|
|
384
|
+
self.logger.info(f"Processing Aliyun audio Q&A for: {audio_path}")
|
|
385
|
+
self.logger.info(f"Question: {question}")
|
|
386
|
+
|
|
387
|
+
try:
|
|
388
|
+
# Transcribe the audio using Aliyun NLS
|
|
389
|
+
transcription_result = await self.transcribe_audio(audio_path)
|
|
390
|
+
|
|
391
|
+
if "error" in transcription_result:
|
|
392
|
+
return f"Failed to transcribe audio: {transcription_result['error']}"
|
|
393
|
+
|
|
394
|
+
transcription_text = transcription_result.get("text", "")
|
|
395
|
+
|
|
396
|
+
if not transcription_text.strip():
|
|
397
|
+
return "No speech detected in the audio file."
|
|
398
|
+
|
|
399
|
+
# Prepare prompt for LLM analysis
|
|
400
|
+
prompt = f"""基于以下音频转录内容,请回答问题。
|
|
401
|
+
|
|
402
|
+
音频文件: {audio_path}
|
|
403
|
+
转录服务: 阿里云语音识别 (Aliyun NLS)
|
|
404
|
+
转录内容:
|
|
405
|
+
{transcription_text}
|
|
406
|
+
|
|
407
|
+
问题: {question}
|
|
408
|
+
|
|
409
|
+
请基于上述音频内容提供清晰、详细的答案。如果转录内容不足以回答问题,请明确说明。"""
|
|
410
|
+
|
|
411
|
+
# Use LLM to analyze and answer
|
|
412
|
+
response = await self.llm_client.completion(
|
|
413
|
+
messages=[
|
|
414
|
+
{
|
|
415
|
+
"role": "system",
|
|
416
|
+
"content": "你是一个专门分析音频内容的助手。请基于提供的转录内容提供清晰、准确的答案。",
|
|
417
|
+
},
|
|
418
|
+
{"role": "user", "content": prompt},
|
|
419
|
+
],
|
|
420
|
+
temperature=0.1,
|
|
421
|
+
max_tokens=1000,
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
return response.strip()
|
|
425
|
+
|
|
426
|
+
except Exception as e:
|
|
427
|
+
error_msg = f"Aliyun audio Q&A failed: {str(e)}"
|
|
428
|
+
self.logger.error(error_msg)
|
|
429
|
+
return error_msg
|
|
430
|
+
|
|
431
|
+
async def get_tools_map(self) -> Dict[str, Callable]:
|
|
432
|
+
"""
|
|
433
|
+
Get the mapping of tool names to their implementation functions.
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
Dictionary mapping tool names to callable functions
|
|
437
|
+
"""
|
|
438
|
+
return {
|
|
439
|
+
"transcribe_audio": self.transcribe_audio,
|
|
440
|
+
"audio_qa": self.audio_qa,
|
|
441
|
+
}
|