@ww_nero/media 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -230,15 +230,8 @@ const runAsrScript = (audioPath, outputPath, uploadUrl, apiKey) => {
230
230
  const isWsl = isWslEnvironment();
231
231
 
232
232
  // 参考 bash.js 的调用方式,通过 bash -ic 执行命令
233
- let cmd, args;
234
- if (isWsl || process.platform === 'linux') {
235
- cmd = 'bash';
236
- args = ['-ic', pythonCommand];
237
- } else {
238
- // Windows 环境,通过 wsl 调用 bash
239
- cmd = 'wsl';
240
- args = ['-e', 'bash', '-ic', pythonCommand];
241
- }
233
+ cmd = 'bash';
234
+ args = ['-ic', pythonCommand];
242
235
 
243
236
  const child = spawn(cmd, args, {
244
237
  stdio: ['pipe', 'pipe', 'pipe'],
@@ -279,26 +272,7 @@ const runAsrScript = (audioPath, outputPath, uploadUrl, apiKey) => {
279
272
  if (code === 0) {
280
273
  resolve(stdout.trim());
281
274
  } else {
282
- // 尝试解析结构化的 ASR 错误信息
283
- const stderrContent = stderr.trim();
284
- const asrErrorMatch = stderrContent.match(/ASR_ERROR:\s*(\{.*\})/);
285
- if (asrErrorMatch) {
286
- try {
287
- const errorInfo = JSON.parse(asrErrorMatch[1]);
288
- const parts = [errorInfo.error || '识别失败'];
289
- if (errorInfo.audio_url) {
290
- parts.push(`静态资源地址: ${errorInfo.audio_url}`);
291
- }
292
- if (errorInfo.details) {
293
- parts.push(`服务器错误信息: ${errorInfo.details}`);
294
- }
295
- reject(new Error(parts.join('\n')));
296
- return;
297
- } catch {
298
- // JSON 解析失败,使用原始错误信息
299
- }
300
- }
301
- const errorMsg = stderrContent || stdout.trim() || `进程退出码: ${code}`;
275
+ const errorMsg = stderr.trim() || stdout.trim() || `进程退出码: ${code}`;
302
276
  reject(new Error(errorMsg));
303
277
  }
304
278
  });
@@ -343,7 +317,7 @@ const asr = async ({ working_directory, audio_file }) => {
343
317
  const server = new Server(
344
318
  {
345
319
  name: 'media',
346
- version: '1.0.3',
320
+ version: '1.0.5',
347
321
  },
348
322
  {
349
323
  capabilities: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ww_nero/media",
3
- "version": "1.0.3",
3
+ "version": "1.0.5",
4
4
  "description": "MCP server for media processing, including ASR speech recognition",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -97,34 +97,28 @@ def upload_audio(upload_url: str, audio_path: str) -> str:
97
97
  if not data.get('success') or 'data' not in data or 'path' not in data['data']:
98
98
  raise Exception(f"上传响应格式错误: {data}")
99
99
 
100
- return Path(data['data']['path']).name
100
+ # 返回完整的相对路径,用于拼接到服务器地址后面
101
+ return data['data']['path']
101
102
 
102
103
 
103
- def get_static_url(upload_url: str, filename: str) -> str:
104
+ def get_static_url(upload_url: str, file_path: str) -> str:
104
105
  """
105
- 根据上传接口 URL 和文件名构建静态资源 URL
106
+ 根据上传接口 URL 和文件路径构建静态资源 URL
106
107
 
107
108
  Args:
108
109
  upload_url: 上传接口的完整 URL (如 http://server.domain.com/upload)
109
- filename: 上传后的文件名
110
+ file_path: 上传后返回的文件路径 (如 /tmp/xxx.wav)
110
111
 
111
112
  Returns:
112
113
  静态资源的完整 URL
113
114
  """
114
115
  # 从上传 URL 中提取基础 URL
115
- # 例如: http://server.domain.com/upload -> http://server.domain.com/
116
+ # 例如: http://server.domain.com/upload -> http://server.domain.com
116
117
  from urllib.parse import urlparse, urlunparse
117
118
  parsed = urlparse(upload_url)
118
- base_url = urlunparse((parsed.scheme, parsed.netloc, '/', '', '', ''))
119
- return f"{base_url.rstrip('/')}/{filename}"
120
-
121
-
122
- class AsrError(Exception):
123
- """ASR 识别错误,包含静态资源 URL 和错误详情"""
124
- def __init__(self, message: str, audio_url: str = None, details: str = None):
125
- self.audio_url = audio_url
126
- self.details = details
127
- super().__init__(message)
119
+ base_url = urlunparse((parsed.scheme, parsed.netloc, '', '', '', ''))
120
+ # file_path 已经是以 / 开头的完整相对路径,直接拼接
121
+ return f"{base_url}{file_path}"
128
122
 
129
123
 
130
124
  def transcribe_audio(audio_url: str, api_key: str) -> list:
@@ -141,17 +135,14 @@ def transcribe_audio(audio_url: str, api_key: str) -> list:
141
135
  dashscope.api_key = api_key
142
136
 
143
137
  # 发起异步识别请求
144
- try:
145
- transcribe_response = Transcription.async_call(
146
- model='paraformer-v2',
147
- file_urls=[audio_url],
148
- language_hints=['zh', 'en', 'ja']
149
- )
150
- except Exception as e:
151
- raise AsrError(f"ASR 请求失败: {e}", audio_url=audio_url, details=str(e))
138
+ transcribe_response = Transcription.async_call(
139
+ model='paraformer-v2',
140
+ file_urls=[audio_url],
141
+ language_hints=['zh', 'en', 'ja']
142
+ )
152
143
 
153
144
  if not transcribe_response or not hasattr(transcribe_response, 'output'):
154
- raise AsrError("ASR 请求失败: 无效的响应", audio_url=audio_url)
145
+ raise Exception("ASR 请求失败: 无效的响应")
155
146
 
156
147
  # 轮询等待识别完成
157
148
  while True:
@@ -162,26 +153,10 @@ def transcribe_audio(audio_url: str, api_key: str) -> list:
162
153
  transcribe_response = Transcription.fetch(task=transcribe_response.output.task_id)
163
154
 
164
155
  if transcribe_response.status_code != HTTPStatus.OK:
165
- error_msg = getattr(transcribe_response, 'message', str(transcribe_response.status_code))
166
- raise AsrError(
167
- f"ASR 识别失败: {transcribe_response.status_code}",
168
- audio_url=audio_url,
169
- details=error_msg
170
- )
156
+ raise Exception(f"ASR 识别失败: {transcribe_response.status_code}")
171
157
 
172
158
  if transcribe_response.output.task_status == 'FAILED':
173
- # 尝试获取更详细的错误信息
174
- error_details = None
175
- results = transcribe_response.output.get('results', [])
176
- for result in results:
177
- if result.get('subtask_status') == 'FAILED':
178
- error_details = result.get('message', '未知错误')
179
- break
180
- raise AsrError(
181
- "ASR 识别任务失败",
182
- audio_url=audio_url,
183
- details=error_details
184
- )
159
+ raise Exception("ASR 识别任务失败")
185
160
 
186
161
  # 获取识别结果
187
162
  results = transcribe_response.output.get('results', [])
@@ -217,15 +192,14 @@ def main():
217
192
  print("错误: 请通过 --api-key 参数或 ASR_API_KEY 环境变量提供 API Key", file=sys.stderr)
218
193
  sys.exit(1)
219
194
 
220
- audio_url = None
221
195
  try:
222
196
  # 1. 上传音频文件
223
197
  print(f"正在上传音频文件: {args.audio}")
224
- filename = upload_audio(args.upload_url, args.audio)
225
- print(f"上传成功: {filename}")
198
+ file_path = upload_audio(args.upload_url, args.audio)
199
+ print(f"上传成功: {file_path}")
226
200
 
227
201
  # 2. 构建静态资源 URL
228
- audio_url = get_static_url(args.upload_url, filename)
202
+ audio_url = get_static_url(args.upload_url, file_path)
229
203
  print(f"音频 URL: {audio_url}")
230
204
 
231
205
  # 3. 调用 ASR 识别
@@ -233,7 +207,8 @@ def main():
233
207
  transcriptions = transcribe_audio(audio_url, api_key)
234
208
 
235
209
  if not transcriptions:
236
- raise AsrError("未获取到识别结果", audio_url=audio_url)
210
+ print("警告: 未获取到识别结果", file=sys.stderr)
211
+ sys.exit(1)
237
212
 
238
213
  # 4. 生成 SRT 文件
239
214
  subtitle_count = asr_to_srt(transcriptions, args.output)
@@ -243,23 +218,8 @@ def main():
243
218
  except FileNotFoundError as e:
244
219
  print(f"错误: {e}", file=sys.stderr)
245
220
  sys.exit(1)
246
- except AsrError as e:
247
- # 输出结构化的错误信息,包含静态资源 URL
248
- error_info = {
249
- 'error': str(e),
250
- 'audio_url': e.audio_url or audio_url,
251
- 'details': e.details
252
- }
253
- print(f"ASR_ERROR: {json.dumps(error_info, ensure_ascii=False)}", file=sys.stderr)
254
- sys.exit(1)
255
221
  except Exception as e:
256
- # 其他错误也尝试包含静态资源 URL
257
- error_info = {
258
- 'error': str(e),
259
- 'audio_url': audio_url,
260
- 'details': None
261
- }
262
- print(f"ASR_ERROR: {json.dumps(error_info, ensure_ascii=False)}", file=sys.stderr)
222
+ print(f"错误: {e}", file=sys.stderr)
263
223
  sys.exit(1)
264
224
 
265
225