@ww_nero/media 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/index.js +1 -1
  2. package/package.json +1 -1
  3. package/utils/asr.js +56 -8
package/index.js CHANGED
@@ -114,7 +114,7 @@ const resolveAudioFile = (workingDir, rawPath) => {
114
114
  const server = new Server(
115
115
  {
116
116
  name: 'media',
117
- version: '1.3.1',
117
+ version: '1.3.2',
118
118
  },
119
119
  {
120
120
  capabilities: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ww_nero/media",
3
- "version": "1.3.1",
3
+ "version": "1.3.2",
4
4
  "description": "MCP server for media processing, including ASR speech recognition and TTS speech synthesis",
5
5
  "main": "index.js",
6
6
  "bin": {
package/utils/asr.js CHANGED
@@ -8,6 +8,8 @@ const ASR_SERVICE_URL = 'https://dashscope.aliyuncs.com/api/v1/services/audio/as
8
8
  const SUPPORTED_AUDIO_TYPES = ['.mp3', '.wav'];
9
9
  const MAX_FILE_SIZE_BYTES = 120 * 1024 * 1024; // 120MB
10
10
  const LANGUAGE_HINTS = ['zh', 'en', 'ja'];
11
+ const SENTENCE_SPLIT_REGEX = /[\u3002\uff01\uff1f\uff0c\u3001\uff1b\uff1a.,!?;:]+/u;
12
+ const PUNCTUATION_REMOVE_REGEX = /[\p{P}\p{S}]/gu;
11
13
 
12
14
  /**
13
15
  * 将毫秒转换为 SRT 时间格式 HH:MM:SS,mmm
@@ -20,8 +22,37 @@ const msToSrtTime = (ms) => {
20
22
  return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')},${String(milliseconds).padStart(3, '0')}`;
21
23
  };
22
24
 
25
+ const normalizeClause = (text) => {
26
+ if (!text) return { text: '', charCount: 0 };
27
+
28
+ const withoutPunctuation = text
29
+ .replace(PUNCTUATION_REMOVE_REGEX, ' ')
30
+ .replace(/\s+/g, ' ')
31
+ .trim();
32
+
33
+ const charCount = withoutPunctuation.replace(/\s+/g, '').length;
34
+ return { text: withoutPunctuation, charCount };
35
+ };
36
+
37
+ const splitSentence = (text) => {
38
+ if (!text || typeof text !== 'string') return [];
39
+ const rawClauses = text.split(SENTENCE_SPLIT_REGEX);
40
+ const clauses = [];
41
+
42
+ for (const clause of rawClauses) {
43
+ const { text: cleanedText, charCount } = normalizeClause(clause);
44
+ if (cleanedText && charCount > 0) {
45
+ clauses.push({ text: cleanedText, charCount });
46
+ }
47
+ }
48
+
49
+ return clauses;
50
+ };
51
+
23
52
  /**
24
53
  * 将 ASR 识别结果转换为 SRT 字幕内容
54
+ * 1. 句子按标点拆分为子句,去除所有标点符号
55
+ * 2. 按子句字符数比例分配句子时间轴
25
56
  */
26
57
  const asrToSrt = (asrData) => {
27
58
  const srtEntries = [];
@@ -35,17 +66,34 @@ const asrToSrt = (asrData) => {
35
66
  const sentences = transcript.sentences || [];
36
67
 
37
68
  for (const sentence of sentences) {
38
- const beginTime = sentence.begin_time || 0;
39
- const endTime = sentence.end_time || 0;
40
- const text = (sentence.text || '').trim();
69
+ const beginTime = Number.isFinite(sentence.begin_time) ? sentence.begin_time : 0;
70
+ const endTime = Number.isFinite(sentence.end_time) ? sentence.end_time : 0;
71
+ const duration = Math.max(endTime - beginTime, 0);
72
+ const clauses = splitSentence(sentence.text || '');
73
+
74
+ const totalChars = clauses.reduce((sum, c) => sum + c.charCount, 0);
75
+ if (!totalChars || !clauses.length) {
76
+ continue;
77
+ }
78
+
79
+ let accumulatedChars = 0;
41
80
 
42
- if (text) {
43
- const startStr = msToSrtTime(beginTime);
44
- const endStr = msToSrtTime(endTime);
81
+ clauses.forEach((clause, idx) => {
82
+ const startMs = Math.round(
83
+ beginTime + (duration * accumulatedChars) / totalChars
84
+ );
85
+ accumulatedChars += clause.charCount;
86
+ const endMs =
87
+ idx === clauses.length - 1
88
+ ? endTime
89
+ : Math.round(beginTime + (duration * accumulatedChars) / totalChars);
45
90
 
46
- srtEntries.push(`${subtitleIndex}\n${startStr} --> ${endStr}\n${text}\n`);
91
+ const startStr = msToSrtTime(startMs);
92
+ const endStr = msToSrtTime(endMs);
93
+
94
+ srtEntries.push(`${subtitleIndex}\n${startStr} --> ${endStr}\n${clause.text}\n`);
47
95
  subtitleIndex++;
48
- }
96
+ });
49
97
  }
50
98
  }
51
99
  }