allvoicelabs-mcp 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- allvoicelabs_mcp/__init__.py +1 -0
- allvoicelabs_mcp/__pycache__/__init__.cpython-311.pyc +0 -0
- allvoicelabs_mcp/__pycache__/server.cpython-311.pyc +0 -0
- allvoicelabs_mcp/server.py +276 -0
- allvoicelabs_mcp-0.0.1.dist-info/METADATA +38 -0
- allvoicelabs_mcp-0.0.1.dist-info/RECORD +20 -0
- allvoicelabs_mcp-0.0.1.dist-info/WHEEL +5 -0
- allvoicelabs_mcp-0.0.1.dist-info/entry_points.txt +2 -0
- allvoicelabs_mcp-0.0.1.dist-info/top_level.txt +2 -0
- client/__init__.py +3 -0
- client/__pycache__/__init__.cpython-311.pyc +0 -0
- client/__pycache__/all_voice_lab.cpython-311.pyc +0 -0
- client/__pycache__/audio_isolation.cpython-311.pyc +0 -0
- client/__pycache__/get_all_voices.cpython-311.pyc +0 -0
- client/__pycache__/get_supported_voice_model.cpython-311.pyc +0 -0
- client/__pycache__/model.cpython-311.pyc +0 -0
- client/__pycache__/speech_to_speech.cpython-311.pyc +0 -0
- client/__pycache__/text_to_speech.cpython-311.pyc +0 -0
- client/all_voice_lab.py +313 -0
- client/model.py +97 -0
@@ -0,0 +1 @@
|
|
1
|
+
# AllVoiceLabs MCP 包
|
Binary file
|
Binary file
|
@@ -0,0 +1,276 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import sys
|
4
|
+
|
5
|
+
from mcp.server.fastmcp import FastMCP
|
6
|
+
from mcp.types import TextContent
|
7
|
+
|
8
|
+
from client import AllVoiceLab
|
9
|
+
|
10
|
+
|
11
|
+
def main():
|
12
|
+
# 获取环境变量
|
13
|
+
api_key = os.getenv("ALLVOICELABS_API_KEY")
|
14
|
+
api_domain = os.getenv("ALLVOICELABS_API_DOMAIN")
|
15
|
+
|
16
|
+
if not api_key:
|
17
|
+
print("错误: 未设置ALLVOICELABS_API_KEY环境变量")
|
18
|
+
sys.exit(1)
|
19
|
+
|
20
|
+
# 创建 AllVoiceLab 实例
|
21
|
+
all_voice_lab = AllVoiceLab(api_key, api_domain)
|
22
|
+
# 创建 MCP 服务器
|
23
|
+
mcp = FastMCP("AllVoiceLabs")
|
24
|
+
|
25
|
+
@mcp.tool(
|
26
|
+
name="get_supported_voice_model",
|
27
|
+
description="""【千音工具】获取可用的语音模型"""
|
28
|
+
)
|
29
|
+
def get_supported_models() -> TextContent:
|
30
|
+
try:
|
31
|
+
resp = all_voice_lab.get_supported_voice_model()
|
32
|
+
models = resp.models
|
33
|
+
if len(models) == 0:
|
34
|
+
return TextContent(
|
35
|
+
type="text",
|
36
|
+
text="未找到可用的语音模型"
|
37
|
+
)
|
38
|
+
# 按照设计文档格式化返回结果
|
39
|
+
buffer = []
|
40
|
+
for i, model in enumerate(models):
|
41
|
+
# 如果不是第一个模型,添加分隔符
|
42
|
+
if i > 0:
|
43
|
+
buffer.append("---------------------\n")
|
44
|
+
|
45
|
+
buffer.append(f"- id: {model.model_id}\n")
|
46
|
+
buffer.append(f"- 名称:{model.name}\n")
|
47
|
+
buffer.append(f"- 描述:{model.description}\n")
|
48
|
+
|
49
|
+
# 添加最后的分隔符
|
50
|
+
buffer.append("---------------------\n")
|
51
|
+
|
52
|
+
# 将列表连接成字符串
|
53
|
+
result = "".join(buffer)
|
54
|
+
return TextContent(
|
55
|
+
type="text",
|
56
|
+
text=result
|
57
|
+
)
|
58
|
+
except Exception as e:
|
59
|
+
logging.error(e)
|
60
|
+
return TextContent(
|
61
|
+
type="text",
|
62
|
+
text=f"获取模型失败,工具暂时不可用"
|
63
|
+
)
|
64
|
+
|
65
|
+
|
66
|
+
@mcp.tool(
|
67
|
+
name="get_all_voices",
|
68
|
+
description="""【千音工具】获取可用的音色"""
|
69
|
+
)
|
70
|
+
def get_all_voices() -> TextContent:
|
71
|
+
try:
|
72
|
+
resp = all_voice_lab.get_all_voices()
|
73
|
+
voices = resp.voices
|
74
|
+
if len(voices) == 0:
|
75
|
+
return TextContent(
|
76
|
+
type="text",
|
77
|
+
text="未找到可用的音色"
|
78
|
+
)
|
79
|
+
|
80
|
+
# 按照设计文档格式化返回结果
|
81
|
+
buffer = []
|
82
|
+
for i, voice in enumerate(voices):
|
83
|
+
# 如果不是第一个音色,添加分隔符
|
84
|
+
if i > 0:
|
85
|
+
buffer.append("---------------------\n")
|
86
|
+
|
87
|
+
buffer.append(f"- id: {voice.voice_id}\n")
|
88
|
+
buffer.append(f"- 名称:{voice.name}\n")
|
89
|
+
buffer.append(f"- 描述:{voice.description}\n")
|
90
|
+
|
91
|
+
# 添加语言和性别信息(如果存在)
|
92
|
+
if "language" in voice.labels:
|
93
|
+
buffer.append(f"- 语言:{voice.labels['language']}\n")
|
94
|
+
if "gender" in voice.labels:
|
95
|
+
buffer.append(f"- 性别:{voice.labels['gender']}\n")
|
96
|
+
|
97
|
+
# 添加最后的分隔符
|
98
|
+
buffer.append("---------------------\n")
|
99
|
+
|
100
|
+
# 将列表连接成字符串
|
101
|
+
result = "".join(buffer)
|
102
|
+
return TextContent(
|
103
|
+
type="text",
|
104
|
+
text=result
|
105
|
+
)
|
106
|
+
except Exception as e:
|
107
|
+
logging.error(e)
|
108
|
+
return TextContent(
|
109
|
+
type="text",
|
110
|
+
text=f"获取音色失败,工具暂时不可用"
|
111
|
+
)
|
112
|
+
|
113
|
+
|
114
|
+
@mcp.tool(
|
115
|
+
name="text_to_speech",
|
116
|
+
description="""【千音工具】根据提供的文本合成语音。
|
117
|
+
|
118
|
+
Args:
|
119
|
+
text: 需要合成语音的目标文本
|
120
|
+
voice_id: 合成语音时使用的音色的音色id,用户没有指定的时候,获取可用的音色并使用第一个
|
121
|
+
model_id: 合成语音时使用的模型id,用户没有指定的时候,获取可用的语音模型并使用第一个
|
122
|
+
speed: 语速, 范围[-5, 5], 默认值1
|
123
|
+
output_dir: 输出目录,默认为用户桌面
|
124
|
+
|
125
|
+
Returns:
|
126
|
+
TextContent containing file path to the generated audio file.
|
127
|
+
"""
|
128
|
+
)
|
129
|
+
def text_to_speech(
|
130
|
+
text: str,
|
131
|
+
voice_id: str,
|
132
|
+
model_id: str,
|
133
|
+
speed: int = 1,
|
134
|
+
output_dir: str = os.path.join(os.path.expanduser("~"), "Desktop")
|
135
|
+
) -> TextContent:
|
136
|
+
# 校验参数
|
137
|
+
if not text:
|
138
|
+
return TextContent(
|
139
|
+
type="text",
|
140
|
+
text="text参数不能为空"
|
141
|
+
)
|
142
|
+
if not voice_id:
|
143
|
+
return TextContent(
|
144
|
+
type="text",
|
145
|
+
text="voice_id参数不能为空"
|
146
|
+
)
|
147
|
+
if not model_id:
|
148
|
+
return TextContent(
|
149
|
+
type="text",
|
150
|
+
text="model_id参数不能为空"
|
151
|
+
)
|
152
|
+
|
153
|
+
try:
|
154
|
+
file_path = all_voice_lab.text_to_speech(text, voice_id, model_id, output_dir, speed)
|
155
|
+
return TextContent(
|
156
|
+
type="text",
|
157
|
+
text=f"语音生成完成,文件保存在:{file_path}\n"
|
158
|
+
)
|
159
|
+
except Exception as e:
|
160
|
+
logging.error(e)
|
161
|
+
return TextContent(
|
162
|
+
type="text",
|
163
|
+
text=f"合成失败,工具暂时不可用"
|
164
|
+
)
|
165
|
+
|
166
|
+
|
167
|
+
@mcp.tool(
|
168
|
+
name="speech_to_speech",
|
169
|
+
description="""【千音工具】将音频转换为另一个音色。
|
170
|
+
|
171
|
+
Args:
|
172
|
+
audio_file_path: 音频文件路径
|
173
|
+
voice_id: 合成语音时使用的音色的音色id
|
174
|
+
similarity: 相似度,范围[0, 1],默认值1
|
175
|
+
remove_background_noise: 是否去除背景噪音,默认值False
|
176
|
+
output_dir: 输出目录,默认为用户桌面
|
177
|
+
|
178
|
+
Returns:
|
179
|
+
TextContent containing file path to the generated audio file.
|
180
|
+
"""
|
181
|
+
)
|
182
|
+
def speech_to_speech(
|
183
|
+
audio_file_path: str,
|
184
|
+
voice_id: str,
|
185
|
+
similarity: float = 1,
|
186
|
+
remove_background_noise: bool = False,
|
187
|
+
output_dir: str = os.path.join(os.path.expanduser("~"), "Desktop")
|
188
|
+
) -> TextContent:
|
189
|
+
# 校验参数
|
190
|
+
if not audio_file_path:
|
191
|
+
return TextContent(
|
192
|
+
type="text",
|
193
|
+
text="audio_file_path参数不能为空"
|
194
|
+
)
|
195
|
+
if not voice_id:
|
196
|
+
return TextContent(
|
197
|
+
type="text",
|
198
|
+
text="voice_id参数不能为空"
|
199
|
+
)
|
200
|
+
|
201
|
+
# 验证相似度范围
|
202
|
+
if similarity < 0 or similarity > 1:
|
203
|
+
return TextContent(
|
204
|
+
type="text",
|
205
|
+
text="similarity参数必须在0到1之间"
|
206
|
+
)
|
207
|
+
|
208
|
+
try:
|
209
|
+
file_path = all_voice_lab.speech_to_speech(audio_file_path, voice_id, output_dir, similarity,
|
210
|
+
remove_background_noise)
|
211
|
+
return TextContent(
|
212
|
+
type="text",
|
213
|
+
text=f"音频转换完成,文件保存在:{file_path}\n"
|
214
|
+
)
|
215
|
+
except FileNotFoundError as e:
|
216
|
+
logging.error(e)
|
217
|
+
return TextContent(
|
218
|
+
type="text",
|
219
|
+
text=f"音频文件不存在: {audio_file_path}"
|
220
|
+
)
|
221
|
+
except Exception as e:
|
222
|
+
logging.error(e)
|
223
|
+
return TextContent(
|
224
|
+
type="text",
|
225
|
+
text=f"转换失败,工具暂时不可用"
|
226
|
+
)
|
227
|
+
|
228
|
+
|
229
|
+
@mcp.tool(
|
230
|
+
name="audio_isolation",
|
231
|
+
description="""【千音工具】这是一个人声分离功能,消除音频的背景噪音,仅保留人声。
|
232
|
+
|
233
|
+
Args:
|
234
|
+
audio_file_path: 音频文件路径
|
235
|
+
output_dir: 输出目录,默认为用户桌面
|
236
|
+
|
237
|
+
Returns:
|
238
|
+
TextContent containing file path to the generated audio file.
|
239
|
+
"""
|
240
|
+
)
|
241
|
+
def audio_isolation(
|
242
|
+
audio_file_path: str,
|
243
|
+
output_dir: str = os.path.join(os.path.expanduser("~"), "Desktop")
|
244
|
+
) -> TextContent:
|
245
|
+
# 校验参数
|
246
|
+
if not audio_file_path:
|
247
|
+
return TextContent(
|
248
|
+
type="text",
|
249
|
+
text="audio_file_path参数不能为空"
|
250
|
+
)
|
251
|
+
|
252
|
+
try:
|
253
|
+
file_path = all_voice_lab.audio_isolation(audio_file_path, output_dir)
|
254
|
+
return TextContent(
|
255
|
+
type="text",
|
256
|
+
text=f"人声分离完成,文件保存在:{file_path}\n"
|
257
|
+
)
|
258
|
+
except FileNotFoundError as e:
|
259
|
+
logging.error(e)
|
260
|
+
return TextContent(
|
261
|
+
type="text",
|
262
|
+
text=f"音频文件不存在: {audio_file_path}"
|
263
|
+
)
|
264
|
+
except Exception as e:
|
265
|
+
logging.error(e)
|
266
|
+
return TextContent(
|
267
|
+
type="text",
|
268
|
+
text=f"人声分离失败,工具暂时不可用"
|
269
|
+
)
|
270
|
+
|
271
|
+
print("Starting AllVoiceLabs MCP server")
|
272
|
+
mcp.run()
|
273
|
+
|
274
|
+
|
275
|
+
if __name__ == "__main__":
|
276
|
+
main()
|
@@ -0,0 +1,38 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: allvoicelabs-mcp
|
3
|
+
Version: 0.0.1
|
4
|
+
Summary: AllVoiceLabs MCP Server
|
5
|
+
Author-email: AllVoiceLabs <support@example.com>
|
6
|
+
Keywords: elevenlabs,mcp,text-to-speech,speech-to-text,voice-cloning
|
7
|
+
Requires-Python: >=3.11
|
8
|
+
Description-Content-Type: text/markdown
|
9
|
+
Requires-Dist: build>=1.2.2.post1
|
10
|
+
Requires-Dist: mcp[cli]>=1.6.0
|
11
|
+
Requires-Dist: requests>=2.32.3
|
12
|
+
Requires-Dist: setuptools>=45
|
13
|
+
Requires-Dist: twine>=6.1.0
|
14
|
+
|
15
|
+
# AllVoiceLabs MCP
|
16
|
+
|
17
|
+
This is an AllVoiceLabs MCP (Model Control Protocol) service that provides voice synthesis and conversion functionality.
|
18
|
+
|
19
|
+
## Quickstart
|
20
|
+
|
21
|
+
1. Get your API key from [AllVoiceLab](https://www.allvoicelab.com/).
|
22
|
+
2. Install `uv` (Python package manager), install with `curl -LsSf https://astral.sh/uv/install.sh | sh`
|
23
|
+
3. Use Cline for example:
|
24
|
+
|
25
|
+
```json
|
26
|
+
{
|
27
|
+
"mcpServers": {
|
28
|
+
"ElevenLabs": {
|
29
|
+
"command": "uvx",
|
30
|
+
"args": ["allvoicelabs-mcp"],
|
31
|
+
"env": {
|
32
|
+
"ALLVOICELABS_API_KEY": "<insert-your-api-key-here>",
|
33
|
+
"ALLVOICELABS_API_DOMAIN": "<insert-api-domain-here>"
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|
37
|
+
}
|
38
|
+
```
|
@@ -0,0 +1,20 @@
|
|
1
|
+
allvoicelabs_mcp/__init__.py,sha256=8-U4es3BzXwCStMWsQkGNSRofDvjXWboOYdlCDBY1b8,22
|
2
|
+
allvoicelabs_mcp/server.py,sha256=oob5vEaGbRwTRbyiDKRNuWXN62vMwl-rSdbdWS-9__0,9147
|
3
|
+
allvoicelabs_mcp/__pycache__/__init__.cpython-311.pyc,sha256=UuAB8stq2Hhg8yU8EeKhPwxKtjEgjQLiN6UCIHlYRVg,183
|
4
|
+
allvoicelabs_mcp/__pycache__/server.cpython-311.pyc,sha256=3ZqCMJ61wWcKGdB4_PwuDheR2koGlEZbsjJV5t4eMHc,11444
|
5
|
+
client/__init__.py,sha256=H12-2aUEjckmXWDiMP6TkPH7FJw89wNQtCsIAInfhAo,65
|
6
|
+
client/all_voice_lab.py,sha256=iFVjt7HVKclBnNX3to2DO7xM2Fa-uWBP2TWI6LFw5GU,10271
|
7
|
+
client/model.py,sha256=WRI1SQSMtuColwmV_YO5E-ChJIEHpy4xWS56_BfAQxA,2798
|
8
|
+
client/__pycache__/__init__.cpython-311.pyc,sha256=bdxyQkRPcfVJ2HHfY5iVgpy55-7s8qTvV6XdRIN04-M,261
|
9
|
+
client/__pycache__/all_voice_lab.cpython-311.pyc,sha256=cUB76dN0EUwQcFQlVMdDh0440oVOw2rUc99wTa1i3KM,12135
|
10
|
+
client/__pycache__/audio_isolation.cpython-311.pyc,sha256=6xJn442ydg9KOtHEC1pPk49NWqDuDV9jABBH-49k3fY,3007
|
11
|
+
client/__pycache__/get_all_voices.cpython-311.pyc,sha256=OCPNseNONuZZMnrq-4QR-y-wfsyzYBr3_BBphtbwbD0,5172
|
12
|
+
client/__pycache__/get_supported_voice_model.cpython-311.pyc,sha256=ifG4AcYSGMxdc8atkY-uMv-2mgD17inOJJ8R_8VeQ0A,4447
|
13
|
+
client/__pycache__/model.cpython-311.pyc,sha256=kcywkYqKMvfHGOC49O9WDDwjf3nUAPITN2z-vGwU0xw,6644
|
14
|
+
client/__pycache__/speech_to_speech.cpython-311.pyc,sha256=YeM_RNvpY35PQWAqVGoAh8wtdpbTFbjztNoINPTXS6M,3488
|
15
|
+
client/__pycache__/text_to_speech.cpython-311.pyc,sha256=BGNsjwBv9YICI60hoq1-WhkhIdej1WizwJiFVBuct_k,2768
|
16
|
+
allvoicelabs_mcp-0.0.1.dist-info/METADATA,sha256=rzsv_QuczE400Ip7tntCR-yNa-Qd_JesNo-QT6bpo6g,1061
|
17
|
+
allvoicelabs_mcp-0.0.1.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
18
|
+
allvoicelabs_mcp-0.0.1.dist-info/entry_points.txt,sha256=Bb6tkeBuBROB82Hgl3EHn0SprJOo3sl6xQYoIgwglC8,66
|
19
|
+
allvoicelabs_mcp-0.0.1.dist-info/top_level.txt,sha256=Uf_WW0U55B5f-gihggnnoYeVVvQ3pPFOJw4asw6CQME,24
|
20
|
+
allvoicelabs_mcp-0.0.1.dist-info/RECORD,,
|
client/__init__.py
ADDED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
client/all_voice_lab.py
ADDED
@@ -0,0 +1,313 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
import random
|
4
|
+
import re
|
5
|
+
import time
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Dict, Optional
|
8
|
+
|
9
|
+
import requests
|
10
|
+
|
11
|
+
from client.model import GetAllVoicesResponse, GetSupportedVoiceModelResponse
|
12
|
+
|
13
|
+
|
14
|
+
class AllVoiceLab:
|
15
|
+
"""
|
16
|
+
AllVoiceLab类,封装所有API方法,提供统一的接口
|
17
|
+
"""
|
18
|
+
|
19
|
+
def __init__(self, api_key: str, api_domain: str = "https://api-testing.all-voice.cn"):
|
20
|
+
"""
|
21
|
+
初始化AllVoiceLab类
|
22
|
+
|
23
|
+
Args:
|
24
|
+
api_key: API密钥
|
25
|
+
api_domain: API域名,默认为测试环境
|
26
|
+
"""
|
27
|
+
self.api_key = api_key
|
28
|
+
self.api_domain = api_domain.rstrip('/')
|
29
|
+
|
30
|
+
def _get_headers(self, content_type: str = "application/json") -> Dict[str, str]:
|
31
|
+
"""
|
32
|
+
获取请求头
|
33
|
+
|
34
|
+
Args:
|
35
|
+
content_type: 内容类型
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
请求头字典
|
39
|
+
"""
|
40
|
+
return {
|
41
|
+
"Content-Type": content_type,
|
42
|
+
"Accept": "application/json",
|
43
|
+
"ai-api-key": self.api_key
|
44
|
+
}
|
45
|
+
|
46
|
+
def get_all_voices(self) -> Optional[GetAllVoicesResponse]:
|
47
|
+
"""
|
48
|
+
获取所有音色
|
49
|
+
|
50
|
+
Returns:
|
51
|
+
音色列表响应对象
|
52
|
+
"""
|
53
|
+
# 创建HTTP请求
|
54
|
+
url = f"{self.api_domain}/v1/voices/internal_get_all_voices?status=[2]&legacy_voice_skip_disabled=true"
|
55
|
+
|
56
|
+
# 发送请求
|
57
|
+
response = requests.get(url, headers=self._get_headers(), timeout=30)
|
58
|
+
|
59
|
+
# 检查状态码
|
60
|
+
if response.status_code != 200:
|
61
|
+
logging.error(f"请求失败,状态码: {response.status_code}")
|
62
|
+
raise Exception(f"请求失败,状态码: {response.status_code}")
|
63
|
+
|
64
|
+
# 读取响应内容
|
65
|
+
response_data = response.text
|
66
|
+
logging.info(response_data)
|
67
|
+
|
68
|
+
# 解析JSON响应并直接转换为对象
|
69
|
+
json_data = json.loads(response_data)
|
70
|
+
api_resp = GetAllVoicesResponse.from_dict(json_data)
|
71
|
+
|
72
|
+
# 只要前10个
|
73
|
+
if len(api_resp.voices) > 10:
|
74
|
+
api_resp.voices = api_resp.voices[:10]
|
75
|
+
|
76
|
+
return api_resp
|
77
|
+
|
78
|
+
def get_supported_voice_model(self) -> GetSupportedVoiceModelResponse:
|
79
|
+
"""
|
80
|
+
获取支持的语音模型
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
支持的语音模型响应对象
|
84
|
+
"""
|
85
|
+
# 创建HTTP请求
|
86
|
+
url = f"{self.api_domain}/v1/voices/get_supported_model"
|
87
|
+
|
88
|
+
# 直接使用requests库发送请求
|
89
|
+
response = requests.get(url, headers=self._get_headers(), timeout=30)
|
90
|
+
|
91
|
+
# 检查状态码
|
92
|
+
if response.status_code != 200:
|
93
|
+
# 抛出一个异常
|
94
|
+
logging.error(f"请求失败,状态码: {response.status_code}")
|
95
|
+
raise Exception(f"请求失败,状态码: {response.status_code}")
|
96
|
+
|
97
|
+
# 读取响应内容
|
98
|
+
response_data = response.text
|
99
|
+
logging.info(response_data)
|
100
|
+
|
101
|
+
# 解析JSON响应
|
102
|
+
json_data = json.loads(response_data)
|
103
|
+
resp = GetSupportedVoiceModelResponse.from_dict(json_data)
|
104
|
+
|
105
|
+
return resp
|
106
|
+
|
107
|
+
def audio_isolation(self, audio_file_path: str, output_dir: str) -> str:
|
108
|
+
"""
|
109
|
+
将音频文件发送到人声分离API,并将结果保存到本地
|
110
|
+
|
111
|
+
Args:
|
112
|
+
audio_file_path: 音频文件路径
|
113
|
+
output_dir: 输出目录
|
114
|
+
|
115
|
+
Returns:
|
116
|
+
保存后的文件路径
|
117
|
+
"""
|
118
|
+
# 检查文件是否存在
|
119
|
+
audio_path = Path(audio_file_path)
|
120
|
+
if not audio_path.exists():
|
121
|
+
raise FileNotFoundError(f"音频文件不存在: {audio_file_path}")
|
122
|
+
|
123
|
+
# 创建输出目录
|
124
|
+
output_path = Path(output_dir)
|
125
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
126
|
+
|
127
|
+
# 准备HTTP请求
|
128
|
+
url = f"{self.api_domain}/v1/audio-isolation/create"
|
129
|
+
|
130
|
+
# 使用with语句安全地打开文件
|
131
|
+
with open(audio_path, 'rb') as audio_file:
|
132
|
+
# 创建multipart表单数据
|
133
|
+
files = {
|
134
|
+
'audio': (audio_path.name, audio_file)
|
135
|
+
}
|
136
|
+
|
137
|
+
# 设置请求头
|
138
|
+
headers = {
|
139
|
+
'Accept': '*/*',
|
140
|
+
'ai-api-key': self.api_key
|
141
|
+
}
|
142
|
+
|
143
|
+
# 发送请求
|
144
|
+
response = requests.post(url, files=files, headers=headers)
|
145
|
+
|
146
|
+
# 检查响应状态
|
147
|
+
response.raise_for_status()
|
148
|
+
|
149
|
+
# 尝试从响应头中获取文件名
|
150
|
+
filename = None
|
151
|
+
content_disposition = response.headers.get('Content-Disposition')
|
152
|
+
if content_disposition:
|
153
|
+
filename_match = re.search(r'filename="?([^"]+)"?', content_disposition)
|
154
|
+
if filename_match:
|
155
|
+
filename = filename_match.group(1)
|
156
|
+
|
157
|
+
# 如果没有从响应头获取到文件名,则生成一个唯一文件名
|
158
|
+
if not filename:
|
159
|
+
timestamp = int(time.time())
|
160
|
+
random_suffix = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=6))
|
161
|
+
filename = f"audio_isolation_{timestamp}_{random_suffix}.mp3"
|
162
|
+
|
163
|
+
# 构建完整文件路径
|
164
|
+
file_path = output_path / filename
|
165
|
+
|
166
|
+
# 将响应内容保存到文件
|
167
|
+
with open(file_path, 'wb') as f:
|
168
|
+
f.write(response.content)
|
169
|
+
|
170
|
+
# 返回文件路径
|
171
|
+
return str(file_path)
|
172
|
+
|
173
|
+
def speech_to_speech(self, audio_file_path: str, voice_id_str: str, output_dir: str, similarity: float = 1,
|
174
|
+
remove_background_noise: bool = False) -> str:
|
175
|
+
"""
|
176
|
+
调用API将音频转换为另一个音色并保存到本地
|
177
|
+
|
178
|
+
Args:
|
179
|
+
audio_file_path: 音频文件路径
|
180
|
+
voice_id_str: 音色ID
|
181
|
+
output_dir: 输出目录
|
182
|
+
similarity: 相似度,范围[0, 1]
|
183
|
+
remove_background_noise: 是否去除背景噪音
|
184
|
+
|
185
|
+
Returns:
|
186
|
+
保存的音频文件路径
|
187
|
+
"""
|
188
|
+
# 检查文件是否存在
|
189
|
+
audio_path = Path(audio_file_path)
|
190
|
+
if not audio_path.exists():
|
191
|
+
raise FileNotFoundError(f"音频文件不存在: {audio_file_path}")
|
192
|
+
|
193
|
+
# 创建输出目录
|
194
|
+
output_path = Path(output_dir)
|
195
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
196
|
+
|
197
|
+
# 准备请求参数
|
198
|
+
url = f"{self.api_domain}/v1/speech-to-speech/create"
|
199
|
+
|
200
|
+
# 准备请求数据
|
201
|
+
data = {
|
202
|
+
'voice_id': voice_id_str,
|
203
|
+
'similarity': str(similarity),
|
204
|
+
'remove_background_noise': str(remove_background_noise).lower()
|
205
|
+
}
|
206
|
+
|
207
|
+
headers = {
|
208
|
+
'Accept': '*/*',
|
209
|
+
'ai-api-key': self.api_key
|
210
|
+
}
|
211
|
+
|
212
|
+
# 使用with语句安全地打开文件
|
213
|
+
with open(audio_path, 'rb') as audio_file:
|
214
|
+
files = {
|
215
|
+
'audio': (audio_path.name, audio_file)
|
216
|
+
}
|
217
|
+
|
218
|
+
# 发送请求
|
219
|
+
response = requests.post(url, headers=headers, data=data, files=files)
|
220
|
+
|
221
|
+
# 检查响应状态
|
222
|
+
response.raise_for_status()
|
223
|
+
|
224
|
+
# 尝试从响应头中获取文件名
|
225
|
+
filename = None
|
226
|
+
content_disposition = response.headers.get('Content-Disposition')
|
227
|
+
if content_disposition:
|
228
|
+
filename_match = re.search(r'filename="?([^"]+)"?', content_disposition)
|
229
|
+
if filename_match:
|
230
|
+
filename = filename_match.group(1)
|
231
|
+
|
232
|
+
# 如果没有从响应头获取到文件名,则生成一个唯一文件名
|
233
|
+
if not filename:
|
234
|
+
timestamp = int(time.time())
|
235
|
+
random_suffix = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=6))
|
236
|
+
filename = f"s2s_{timestamp}_{random_suffix}.mp3"
|
237
|
+
|
238
|
+
# 构建完整文件路径
|
239
|
+
file_path = output_path / filename
|
240
|
+
|
241
|
+
# 将响应内容保存到文件
|
242
|
+
with open(file_path, 'wb') as f:
|
243
|
+
f.write(response.content)
|
244
|
+
|
245
|
+
# 返回文件路径
|
246
|
+
return str(file_path)
|
247
|
+
|
248
|
+
def text_to_speech(self, text: str, voice_id, model_id: str, output_dir: str, speed: float = 1.0) -> str:
|
249
|
+
"""
|
250
|
+
调用API将文本转换为语音并保存为文件
|
251
|
+
|
252
|
+
Args:
|
253
|
+
text: 要转换的文本
|
254
|
+
voice_id: 音色ID
|
255
|
+
model_id: 模型ID
|
256
|
+
output_dir: 输出目录
|
257
|
+
speed: 语速
|
258
|
+
|
259
|
+
Returns:
|
260
|
+
保存的音频文件路径
|
261
|
+
"""
|
262
|
+
# 构建请求体
|
263
|
+
request_body = {
|
264
|
+
"text": text,
|
265
|
+
"language_code": "auto",
|
266
|
+
"voice_id": int(voice_id),
|
267
|
+
"model_id": model_id,
|
268
|
+
"voice_settings": {
|
269
|
+
"speed": float(speed)
|
270
|
+
}
|
271
|
+
}
|
272
|
+
|
273
|
+
# API端点
|
274
|
+
url = f"{self.api_domain}/v1/text-to-speech/create"
|
275
|
+
|
276
|
+
# 发送请求并获取响应
|
277
|
+
response = requests.post(
|
278
|
+
url=url,
|
279
|
+
json=request_body,
|
280
|
+
headers=self._get_headers(),
|
281
|
+
stream=True # 使用流式传输处理大文件
|
282
|
+
)
|
283
|
+
|
284
|
+
# 检查响应状态
|
285
|
+
response.raise_for_status()
|
286
|
+
|
287
|
+
# 尝试从响应头中获取文件名
|
288
|
+
filename = None
|
289
|
+
content_disposition = response.headers.get('Content-Disposition')
|
290
|
+
if content_disposition:
|
291
|
+
filename_match = re.search(r'filename="?([^"]+)"?', content_disposition)
|
292
|
+
if filename_match:
|
293
|
+
filename = filename_match.group(1)
|
294
|
+
|
295
|
+
# 如果没有从响应头获取到文件名,则生成一个唯一文件名
|
296
|
+
if not filename:
|
297
|
+
timestamp = int(time.time())
|
298
|
+
random_suffix = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=6))
|
299
|
+
filename = f"tts_{timestamp}_{random_suffix}.mp3"
|
300
|
+
|
301
|
+
# 构建完整文件路径
|
302
|
+
output_dir = Path(output_dir)
|
303
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
304
|
+
file_path = output_dir / filename
|
305
|
+
|
306
|
+
# 将响应内容保存到文件
|
307
|
+
with open(file_path, 'wb') as f:
|
308
|
+
for chunk in response.iter_content(chunk_size=8192):
|
309
|
+
if chunk:
|
310
|
+
f.write(chunk)
|
311
|
+
|
312
|
+
# 返回文件路径
|
313
|
+
return str(file_path)
|
client/model.py
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
from dataclasses import dataclass, field
|
2
|
+
from typing import Dict, List
|
3
|
+
|
4
|
+
|
5
|
+
@dataclass
|
6
|
+
class Voice:
|
7
|
+
voice_id: str = ""
|
8
|
+
name: str = ""
|
9
|
+
description: str = ""
|
10
|
+
files: List[str] = field(default_factory=list)
|
11
|
+
is_legacy: bool = False
|
12
|
+
is_favor: bool = False
|
13
|
+
labels: Dict[str, str] = field(default_factory=dict)
|
14
|
+
voice_settings: Dict[str, int] = field(default_factory=dict)
|
15
|
+
icon_url: str = ""
|
16
|
+
status: int = 0
|
17
|
+
fail_code: int = 0
|
18
|
+
is_pvc: bool = False
|
19
|
+
is_disabled: bool = False
|
20
|
+
created_time_sec: int = 0
|
21
|
+
|
22
|
+
@classmethod
|
23
|
+
def from_dict(cls, data: Dict) -> 'Voice':
|
24
|
+
return cls(
|
25
|
+
voice_id=data.get('voice_id', ''),
|
26
|
+
name=data.get('name', ''),
|
27
|
+
description=data.get('description', ''),
|
28
|
+
files=data.get('files', []),
|
29
|
+
is_legacy=data.get('is_legacy', False),
|
30
|
+
is_favor=data.get('is_favor', False),
|
31
|
+
labels=data.get('labels', {}),
|
32
|
+
voice_settings=data.get('voice_settings', {}),
|
33
|
+
icon_url=data.get('icon_url', ''),
|
34
|
+
status=data.get('status', 0),
|
35
|
+
fail_code=data.get('fail_code', 0),
|
36
|
+
is_pvc=data.get('is_pvc', False),
|
37
|
+
is_disabled=data.get('is_disabled', False),
|
38
|
+
created_time_sec=data.get('created_time_sec', 0)
|
39
|
+
)
|
40
|
+
|
41
|
+
|
42
|
+
@dataclass
|
43
|
+
class GetAllVoicesResponse:
|
44
|
+
voices: List[Voice] = field(default_factory=list)
|
45
|
+
|
46
|
+
@classmethod
|
47
|
+
def from_dict(cls, data: Dict) -> 'GetAllVoicesResponse':
|
48
|
+
return cls(
|
49
|
+
voices=[Voice.from_dict(voice_data) for voice_data in data.get('voices', [])]
|
50
|
+
)
|
51
|
+
|
52
|
+
|
53
|
+
@dataclass
|
54
|
+
class GetSupportedVoiceModelResponse:
|
55
|
+
models: List['VoiceModel']
|
56
|
+
|
57
|
+
@classmethod
|
58
|
+
def from_dict(cls, data):
|
59
|
+
return cls(
|
60
|
+
models=[VoiceModel.from_dict(model) for model in data.get('models', [])]
|
61
|
+
)
|
62
|
+
|
63
|
+
|
64
|
+
# 模型结构
|
65
|
+
@dataclass
|
66
|
+
class VoiceModel:
|
67
|
+
model_id: str
|
68
|
+
name: str
|
69
|
+
can_do_text_to_speech: bool
|
70
|
+
can_do_voice_conversion: bool
|
71
|
+
description: str
|
72
|
+
languages: List['VoiceModelLanguage']
|
73
|
+
|
74
|
+
@classmethod
|
75
|
+
def from_dict(cls, data):
|
76
|
+
return cls(
|
77
|
+
model_id=data.get('model_id', ''),
|
78
|
+
name=data.get('name', ''),
|
79
|
+
can_do_text_to_speech=data.get('can_do_text_to_speech', False),
|
80
|
+
can_do_voice_conversion=data.get('can_do_voice_conversion', False),
|
81
|
+
description=data.get('description', ''),
|
82
|
+
languages=[VoiceModelLanguage.from_dict(lang) for lang in data.get('languages', [])]
|
83
|
+
)
|
84
|
+
|
85
|
+
|
86
|
+
# 语言结构
|
87
|
+
@dataclass
|
88
|
+
class VoiceModelLanguage:
|
89
|
+
language_id: str
|
90
|
+
name: str
|
91
|
+
|
92
|
+
@classmethod
|
93
|
+
def from_dict(cls, data):
|
94
|
+
return cls(
|
95
|
+
language_id=data.get('language_id', ''),
|
96
|
+
name=data.get('name', '')
|
97
|
+
)
|