@hsiehchenwei/mcp-gemini-transcriber 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +3 -3
  2. package/server.mjs +255 -54
package/package.json CHANGED
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "name": "@hsiehchenwei/mcp-gemini-transcriber",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "type": "module",
5
5
  "description": "MCP 音訊轉逐字稿工具(使用 Gemini API)- 支援語者識別與情緒分析",
6
6
  "main": "server.mjs",
7
7
  "bin": {
8
- "mcp-gemini-transcriber": "./server.mjs"
8
+ "mcp-gemini-transcriber": "server.mjs"
9
9
  },
10
10
  "scripts": {
11
11
  "start": "node server.mjs"
@@ -24,7 +24,7 @@
24
24
  "license": "MIT",
25
25
  "repository": {
26
26
  "type": "git",
27
- "url": "https://github.com/chenwei/MCPTools.git",
27
+ "url": "git+https://github.com/chenwei/MCPTools.git",
28
28
  "directory": "mcp-gemini-transcriber"
29
29
  },
30
30
  "dependencies": {
package/server.mjs CHANGED
@@ -45,6 +45,138 @@ const MODEL_NAME = 'gemini-3-flash-preview';
45
45
  const SUPPORTED_AUDIO_FORMATS = ['.mp3', '.m4a', '.wav', '.webm', '.ogg', '.flac', '.aiff', '.aac'];
46
46
  const SUPPORTED_IMAGE_FORMATS = ['.png', '.jpg', '.jpeg', '.webp', '.heic', '.heif'];
47
47
 
48
+ // Gemini 3 Flash Preview 價格(每 100 萬 tokens,美元)
49
+ const PRICING = {
50
+ input: {
51
+ text: 0.50, // 文字/圖片/影片
52
+ audio: 1.00 // 音訊
53
+ },
54
+ output: 3.00 // 輸出(含思考代幣)
55
+ };
56
+
57
+ /**
58
+ * Token 用量追蹤器
59
+ */
60
+ class UsageTracker {
61
+ constructor() {
62
+ this.reset();
63
+ }
64
+
65
+ reset() {
66
+ this.inputTokens = { text: 0, audio: 0, image: 0, video: 0 };
67
+ this.outputTokens = 0;
68
+ this.thoughtTokens = 0;
69
+ this.apiCalls = 0;
70
+ }
71
+
72
+ // 從 API response 中提取並累加 usage
73
+ addFromResponse(response) {
74
+ const usage = response?.usageMetadata;
75
+ if (!usage) return;
76
+
77
+ this.apiCalls++;
78
+
79
+ // 輸出 tokens(含思考)
80
+ this.outputTokens += usage.candidatesTokenCount || 0;
81
+ this.thoughtTokens += usage.thoughtsTokenCount || 0;
82
+
83
+ // 輸入 tokens(按 modality 分類)
84
+ if (usage.promptTokensDetails) {
85
+ for (const detail of usage.promptTokensDetails) {
86
+ const modality = (detail.modality || 'TEXT').toLowerCase();
87
+ const count = detail.tokenCount || 0;
88
+ if (modality === 'audio') {
89
+ this.inputTokens.audio += count;
90
+ } else if (modality === 'image') {
91
+ this.inputTokens.image += count;
92
+ } else if (modality === 'video') {
93
+ this.inputTokens.video += count;
94
+ } else {
95
+ this.inputTokens.text += count;
96
+ }
97
+ }
98
+ } else {
99
+ // fallback:沒有 details 時全部算 text
100
+ this.inputTokens.text += usage.promptTokenCount || 0;
101
+ }
102
+ }
103
+
104
+ // 計算費用(美元)
105
+ calculateCost() {
106
+ const inputCost =
107
+ (this.inputTokens.text + this.inputTokens.image + this.inputTokens.video) / 1_000_000 * PRICING.input.text +
108
+ this.inputTokens.audio / 1_000_000 * PRICING.input.audio;
109
+
110
+ // 輸出費用包含思考 tokens
111
+ const outputCost = (this.outputTokens + this.thoughtTokens) / 1_000_000 * PRICING.output;
112
+
113
+ return {
114
+ inputCost,
115
+ outputCost,
116
+ totalCost: inputCost + outputCost
117
+ };
118
+ }
119
+
120
+ // 取得摘要
121
+ getSummary() {
122
+ const cost = this.calculateCost();
123
+ const totalInput = this.inputTokens.text + this.inputTokens.audio + this.inputTokens.image + this.inputTokens.video;
124
+ const totalOutput = this.outputTokens + this.thoughtTokens;
125
+
126
+ return {
127
+ apiCalls: this.apiCalls,
128
+ tokens: {
129
+ input: {
130
+ text: this.inputTokens.text,
131
+ audio: this.inputTokens.audio,
132
+ image: this.inputTokens.image,
133
+ video: this.inputTokens.video,
134
+ total: totalInput
135
+ },
136
+ output: this.outputTokens,
137
+ thought: this.thoughtTokens,
138
+ totalOutput: totalOutput,
139
+ total: totalInput + totalOutput
140
+ },
141
+ cost: {
142
+ input: cost.inputCost,
143
+ output: cost.outputCost,
144
+ total: cost.totalCost,
145
+ formatted: `$${cost.totalCost.toFixed(6)}`
146
+ }
147
+ };
148
+ }
149
+
150
+ // 格式化輸出(用於顯示)
151
+ formatSummary() {
152
+ const summary = this.getSummary();
153
+ let lines = [];
154
+
155
+ lines.push(`📊 **API 使用量統計**`);
156
+ lines.push(`- API 呼叫次數:${summary.apiCalls} 次`);
157
+ lines.push(`- 輸入 Tokens:${summary.tokens.input.total.toLocaleString()}`);
158
+ if (summary.tokens.input.audio > 0) {
159
+ lines.push(` - 音訊:${summary.tokens.input.audio.toLocaleString()}`);
160
+ }
161
+ if (summary.tokens.input.text > 0) {
162
+ lines.push(` - 文字:${summary.tokens.input.text.toLocaleString()}`);
163
+ }
164
+ lines.push(`- 輸出 Tokens:${summary.tokens.totalOutput.toLocaleString()}`);
165
+ if (summary.tokens.thought > 0) {
166
+ lines.push(` - 思考:${summary.tokens.thought.toLocaleString()}`);
167
+ lines.push(` - 回應:${summary.tokens.output.toLocaleString()}`);
168
+ }
169
+ lines.push(`- 總 Tokens:${summary.tokens.total.toLocaleString()}`);
170
+ lines.push('');
171
+ lines.push(`💰 **預估費用**`);
172
+ lines.push(`- 輸入費用:$${summary.cost.input.toFixed(6)}`);
173
+ lines.push(`- 輸出費用:$${summary.cost.output.toFixed(6)}`);
174
+ lines.push(`- **總費用:${summary.cost.formatted}**`);
175
+
176
+ return lines.join('\n');
177
+ }
178
+ }
179
+
48
180
  /**
49
181
  * 計算動態 timeout
50
182
  */
@@ -62,6 +194,48 @@ function withTimeout(promise, ms, message = 'Operation timed out') {
62
194
  ]);
63
195
  }
64
196
 
197
+ /**
198
+ * 檢查路徑是否包含非 ASCII 字元
199
+ */
200
+ function hasNonAscii(str) {
201
+ return /[^\x00-\x7F]/.test(str);
202
+ }
203
+
204
+ /**
205
+ * 安全上傳檔案(處理非 ASCII 路徑)
206
+ * @google/genai SDK 無法處理包含中文字元的檔案路徑
207
+ */
208
+ async function safeUploadFile(ai, filePath, mimeType, timeout) {
209
+ let tempPath = null;
210
+ let actualPath = filePath;
211
+
212
+ // 如果路徑包含非 ASCII 字元,複製到臨時路徑
213
+ if (hasNonAscii(filePath)) {
214
+ const tempDir = path.join(tmpdir(), `upload_${Date.now()}`);
215
+ await fs.mkdir(tempDir, { recursive: true });
216
+ const ext = path.extname(filePath);
217
+ tempPath = path.join(tempDir, `audio_${createHash('md5').update(filePath).digest('hex').slice(0, 8)}${ext}`);
218
+ await fs.copyFile(filePath, tempPath);
219
+ actualPath = tempPath;
220
+ console.error(` 📋 已複製到臨時路徑(避免中文路徑問題)`);
221
+ }
222
+
223
+ try {
224
+ const uploadedFile = await withTimeout(
225
+ ai.files.upload({ file: actualPath, config: { mimeType } }),
226
+ timeout,
227
+ `上傳超時 (${timeout/1000}s)`
228
+ );
229
+ return { uploadedFile, tempPath };
230
+ } catch (error) {
231
+ // 清理臨時檔案
232
+ if (tempPath) {
233
+ await fs.rm(path.dirname(tempPath), { recursive: true, force: true }).catch(() => {});
234
+ }
235
+ throw error;
236
+ }
237
+ }
238
+
65
239
  /**
66
240
  * 取得音檔長度(秒)
67
241
  */
@@ -467,7 +641,7 @@ function escapeRegex(str) {
467
641
  /**
468
642
  * 快速模式:簡單轉錄單一片段(不做語者識別)
469
643
  */
470
- async function transcribeSingleSegmentFast(ai, segment, model, maxRetries = MAX_RETRIES) {
644
+ async function transcribeSingleSegmentFast(ai, segment, model, tracker = null, maxRetries = MAX_RETRIES) {
471
645
  const { index, path: segmentPath, offset, duration = SEGMENT_DURATION } = segment;
472
646
  let uploadedFile = null;
473
647
  const segmentTimeout = calculateTimeout(duration);
@@ -486,13 +660,12 @@ async function transcribeSingleSegmentFast(ai, segment, model, maxRetries = MAX_
486
660
 
487
661
  開始:`;
488
662
 
663
+ let uploadTempPath = null;
489
664
  for (let attempt = 0; attempt < maxRetries; attempt++) {
490
665
  try {
491
- uploadedFile = await withTimeout(
492
- ai.files.upload({ file: segmentPath, config: { mimeType: 'audio/mpeg' } }),
493
- segmentTimeout,
494
- `上傳超時 (${segmentTimeout/1000}s)`
495
- );
666
+ const uploadResult = await safeUploadFile(ai, segmentPath, 'audio/mpeg', segmentTimeout);
667
+ uploadedFile = uploadResult.uploadedFile;
668
+ uploadTempPath = uploadResult.tempPath;
496
669
 
497
670
  const response = await withTimeout(
498
671
  ai.models.generateContent({
@@ -507,7 +680,13 @@ async function transcribeSingleSegmentFast(ai, segment, model, maxRetries = MAX_
507
680
  `轉錄超時 (${segmentTimeout/1000}s)`
508
681
  );
509
682
 
683
+ // 追蹤 token 用量
684
+ if (tracker) tracker.addFromResponse(response);
685
+
510
686
  ai.files.delete({ name: uploadedFile.name }).catch(() => {});
687
+ if (uploadTempPath) {
688
+ await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
689
+ }
511
690
 
512
691
  let transcript = response.text;
513
692
 
@@ -524,6 +703,10 @@ async function transcribeSingleSegmentFast(ai, segment, model, maxRetries = MAX_
524
703
  ai.files.delete({ name: uploadedFile.name }).catch(() => {});
525
704
  uploadedFile = null;
526
705
  }
706
+ if (uploadTempPath) {
707
+ await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
708
+ uploadTempPath = null;
709
+ }
527
710
 
528
711
  if (attempt < maxRetries - 1) {
529
712
  const delay = RETRY_BASE_DELAY * Math.pow(2, attempt);
@@ -541,7 +724,7 @@ async function transcribeSingleSegmentFast(ai, segment, model, maxRetries = MAX_
541
724
  * 語者識別模式:單一 API 呼叫同時轉錄 + 語者識別
542
725
  * 回傳格式包含逐字稿和語者資訊,減少 API 呼叫次數
543
726
  */
544
- async function transcribeSingleSegmentSpeaker(ai, segment, model, speakerProfiles = {}, previousEnding = '', maxRetries = MAX_RETRIES) {
727
+ async function transcribeSingleSegmentSpeaker(ai, segment, model, speakerProfiles = {}, previousEnding = '', tracker = null, maxRetries = MAX_RETRIES) {
545
728
  const { index, path: segmentPath, offset, duration = SEGMENT_DURATION } = segment;
546
729
  let uploadedFile = null;
547
730
  const segmentTimeout = calculateTimeout(duration);
@@ -596,13 +779,12 @@ ${previousEnding.slice(-400)}
596
779
 
597
780
  開始轉錄:`;
598
781
 
782
+ let uploadTempPath = null;
599
783
  for (let attempt = 0; attempt < maxRetries; attempt++) {
600
784
  try {
601
- uploadedFile = await withTimeout(
602
- ai.files.upload({ file: segmentPath, config: { mimeType: 'audio/mpeg' } }),
603
- segmentTimeout,
604
- `上傳超時 (${segmentTimeout/1000}s)`
605
- );
785
+ const uploadResult = await safeUploadFile(ai, segmentPath, 'audio/mpeg', segmentTimeout);
786
+ uploadedFile = uploadResult.uploadedFile;
787
+ uploadTempPath = uploadResult.tempPath;
606
788
 
607
789
  const response = await withTimeout(
608
790
  ai.models.generateContent({
@@ -617,7 +799,13 @@ ${previousEnding.slice(-400)}
617
799
  `轉錄超時 (${segmentTimeout/1000}s)`
618
800
  );
619
801
 
802
+ // 追蹤 token 用量
803
+ if (tracker) tracker.addFromResponse(response);
804
+
620
805
  ai.files.delete({ name: uploadedFile.name }).catch(() => {});
806
+ if (uploadTempPath) {
807
+ await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
808
+ }
621
809
 
622
810
  const fullText = response.text;
623
811
 
@@ -682,7 +870,7 @@ ${previousEnding.slice(-400)}
682
870
  return {
683
871
  index,
684
872
  transcript,
685
- emotion, // 新增
873
+ emotion,
686
874
  success: true,
687
875
  speakerProfiles: newSpeakerProfiles
688
876
  };
@@ -692,6 +880,10 @@ ${previousEnding.slice(-400)}
692
880
  ai.files.delete({ name: uploadedFile.name }).catch(() => {});
693
881
  uploadedFile = null;
694
882
  }
883
+ if (uploadTempPath) {
884
+ await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
885
+ uploadTempPath = null;
886
+ }
695
887
 
696
888
  if (attempt < maxRetries - 1) {
697
889
  const delay = RETRY_BASE_DELAY * Math.pow(2, attempt);
@@ -1103,6 +1295,7 @@ ${transcript}
1103
1295
  async function transcribeAudioFast(audioPath, outputPath, model, ai, durationMin) {
1104
1296
  const segments = await splitAudioFast(audioPath);
1105
1297
  let tempDir = null;
1298
+ const tracker = new UsageTracker();
1106
1299
 
1107
1300
  if (segments.length > 1 && !segments[0].isOriginal) {
1108
1301
  tempDir = path.dirname(segments[0].path);
@@ -1112,10 +1305,7 @@ async function transcribeAudioFast(audioPath, outputPath, model, ai, durationMin
1112
1305
 
1113
1306
  if (segments.length === 1) {
1114
1307
  console.error('⬆️ 上傳中...');
1115
- const uploadedFile = await ai.files.upload({
1116
- file: audioPath,
1117
- config: { mimeType: 'audio/mpeg' }
1118
- });
1308
+ const { uploadedFile, tempPath: uploadTempPath } = await safeUploadFile(ai, audioPath, 'audio/mpeg', 120000);
1119
1309
  console.error('✅ 上傳完成');
1120
1310
 
1121
1311
  const response = await ai.models.generateContent({
@@ -1129,8 +1319,12 @@ async function transcribeAudioFast(audioPath, outputPath, model, ai, durationMin
1129
1319
  config: { maxOutputTokens: 65536, temperature: 0.1 }
1130
1320
  });
1131
1321
 
1322
+ tracker.addFromResponse(response);
1132
1323
  transcript = response.text;
1133
1324
  try { await ai.files.delete({ name: uploadedFile.name }); } catch {}
1325
+ if (uploadTempPath) {
1326
+ await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
1327
+ }
1134
1328
 
1135
1329
  } else {
1136
1330
  // 平行處理
@@ -1139,7 +1333,7 @@ async function transcribeAudioFast(audioPath, outputPath, model, ai, durationMin
1139
1333
 
1140
1334
  for (let i = 0; i < segments.length; i += MAX_WORKERS) {
1141
1335
  const batch = segments.slice(i, i + MAX_WORKERS);
1142
- const batchPromises = batch.map(seg => transcribeSingleSegmentFast(ai, seg, model));
1336
+ const batchPromises = batch.map(seg => transcribeSingleSegmentFast(ai, seg, model, tracker));
1143
1337
  const batchResults = await Promise.all(batchPromises);
1144
1338
 
1145
1339
  for (const r of batchResults) {
@@ -1158,6 +1352,10 @@ async function transcribeAudioFast(audioPath, outputPath, model, ai, durationMin
1158
1352
  await fs.rm(tempDir, { recursive: true, force: true });
1159
1353
  }
1160
1354
 
1355
+ // 取得使用量摘要
1356
+ const usage = tracker.getSummary();
1357
+ console.error(`💰 費用:${usage.cost.formatted}(${usage.tokens.total.toLocaleString()} tokens)`);
1358
+
1161
1359
  // 儲存結果
1162
1360
  const outPath = outputPath || audioPath.replace(/\.[^/.]+$/, '.md');
1163
1361
  const formattedTimestamp = new Date().toISOString().slice(0, 19).replace(/:/g, '-');
@@ -1188,7 +1386,8 @@ ${transcript}
1188
1386
  duration: `${Math.round(durationMin)} 分鐘`,
1189
1387
  segments: segments.length,
1190
1388
  summary: '(快速模式不產生摘要)',
1191
- keywords: ''
1389
+ keywords: '',
1390
+ usage
1192
1391
  };
1193
1392
  }
1194
1393
 
@@ -1198,6 +1397,7 @@ ${transcript}
1198
1397
  async function transcribeAudioSpeaker(audioPath, outputPath, model, ai, durationMin) {
1199
1398
  const segments = await splitAudioSpeaker(audioPath);
1200
1399
  let tempDir = null;
1400
+ const tracker = new UsageTracker();
1201
1401
 
1202
1402
  if (segments.length > 1 && !segments[0].isOriginal) {
1203
1403
  tempDir = path.dirname(segments[0].path);
@@ -1228,7 +1428,7 @@ async function transcribeAudioSpeaker(audioPath, outputPath, model, ai, duration
1228
1428
 
1229
1429
  // 單一 API 呼叫:同時取得逐字稿 + 情緒分析 + 語者資訊
1230
1430
  const result = await transcribeSingleSegmentSpeaker(
1231
- ai, segment, model, speakerProfiles, previousEnding
1431
+ ai, segment, model, speakerProfiles, previousEnding, tracker
1232
1432
  );
1233
1433
 
1234
1434
  if (!result.success) {
@@ -1278,6 +1478,7 @@ async function transcribeAudioSpeaker(audioPath, outputPath, model, ai, duration
1278
1478
  ]),
1279
1479
  config: { maxOutputTokens: 2048, temperature: 0.3 }
1280
1480
  });
1481
+ tracker.addFromResponse(summaryResponse);
1281
1482
  const summary = summaryResponse.text;
1282
1483
 
1283
1484
  // 生成簡短情緒摘要(一段文字,包含經典句子)
@@ -1304,6 +1505,7 @@ ${emotionResults.map(r => `[${r.time}分鐘] ${r.emotion.slice(0, 200)}`).join('
1304
1505
  contents: createUserContent([emotionSummaryPrompt]),
1305
1506
  config: { maxOutputTokens: 1024, temperature: 0.3 }
1306
1507
  });
1508
+ tracker.addFromResponse(emotionSummaryResponse);
1307
1509
  emotionSummary = emotionSummaryResponse.text;
1308
1510
  console.error(' ✅ 情緒摘要完成');
1309
1511
  } catch (error) {
@@ -1409,13 +1611,18 @@ ${finalTranscript}
1409
1611
  await fs.writeFile(outPath, finalContent, 'utf8');
1410
1612
  console.error(`📄 [語者識別模式] 逐字稿已儲存: ${outPath}`);
1411
1613
 
1614
+ // 取得使用量摘要
1615
+ const usage = tracker.getSummary();
1616
+ console.error(`💰 費用:${usage.cost.formatted}(${usage.tokens.total.toLocaleString()} tokens)`);
1617
+
1412
1618
  return {
1413
1619
  outputPath: outPath,
1414
1620
  duration: metadata.duration,
1415
1621
  segments: segments.length,
1416
1622
  summary,
1417
1623
  keywords: '',
1418
- speakerProfiles
1624
+ speakerProfiles,
1625
+ usage
1419
1626
  };
1420
1627
  }
1421
1628
 
@@ -1612,10 +1819,7 @@ async function describeImage(imagePath, outputPath = null, detailLevel = 'detail
1612
1819
 
1613
1820
  const prompt = detailPrompts[detailLevel] || detailPrompts.detailed;
1614
1821
 
1615
- const uploadedFile = await ai.files.upload({
1616
- file: imagePath,
1617
- config: { mimeType: 'image/jpeg' }
1618
- });
1822
+ const { uploadedFile, tempPath: uploadTempPath } = await safeUploadFile(ai, imagePath, 'image/jpeg', 60000);
1619
1823
 
1620
1824
  const response = await ai.models.generateContent({
1621
1825
  model: MODEL_NAME,
@@ -1627,6 +1831,9 @@ async function describeImage(imagePath, outputPath = null, detailLevel = 'detail
1627
1831
  });
1628
1832
 
1629
1833
  try { await ai.files.delete({ name: uploadedFile.name }); } catch { /* ignore */ }
1834
+ if (uploadTempPath) {
1835
+ await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
1836
+ }
1630
1837
 
1631
1838
  const description = response.text;
1632
1839
  const outPath = outputPath || imagePath.replace(/\.[^/.]+$/, '.md');
@@ -1687,43 +1894,36 @@ server.tool(
1687
1894
 
1688
1895
  // 根據模式顯示不同結果(從結果中取得實際使用的模式)
1689
1896
  const actualMode = result.mode || (mode || process.env.DEFAULT_MODE || 'fast');
1690
- if (actualMode === 'fast') {
1691
- return {
1692
- content: [{
1693
- type: 'text',
1694
- text: `✅ [快速模式] 音訊轉錄完成!
1897
+
1898
+ // 格式化費用資訊(簡潔版)
1899
+ const usageInfo = result.usage ? `
1695
1900
 
1696
- 📁 **輸出檔案**: ${result.outputPath}
1697
- ⏱️ **總時長**: ${result.duration}
1698
- 📊 **處理區塊**: ${result.segments} 個`
1699
- }]
1700
- };
1701
- } else if (selectedMode === 'hybrid') {
1702
- const speakerInfo = result.speakerProfiles ?
1703
- Object.entries(result.speakerProfiles)
1704
- .map(([name, info]) => `- **${name}**(${[info.gender, info.role].filter(Boolean).join(',') || '?'})`)
1705
- .join('\n') : '';
1901
+ 💰 **費用統計**
1902
+ - 輸入 Tokens:${result.usage.tokens.input.total.toLocaleString()}
1903
+ - 輸出 Tokens:${result.usage.tokens.totalOutput.toLocaleString()}
1904
+ - **總費用:${result.usage.cost.formatted}**` : '';
1706
1905
 
1906
+ if (actualMode === 'fast') {
1707
1907
  return {
1708
1908
  content: [{
1709
1909
  type: 'text',
1710
- text: `✅ [Hybrid 模式] 音訊轉錄完成!
1910
+ text: `✅ [快速模式] 音訊轉錄完成!
1711
1911
 
1712
1912
  📁 **輸出檔案**: ${result.outputPath}
1713
1913
  ⏱️ **總時長**: ${result.duration}
1714
1914
  📊 **處理區塊**: ${result.segments} 個
1715
-
1716
- ## 語者資訊
1717
- ${speakerInfo || '(未識別)'}
1718
-
1719
- ## 摘要
1720
- ${result.summary}`
1915
+ ${usageInfo}`
1721
1916
  }]
1722
1917
  };
1723
1918
  } else {
1919
+ // speaker 模式
1724
1920
  const speakerInfo = result.speakerProfiles ?
1725
1921
  Object.entries(result.speakerProfiles)
1726
- .map(([id, info]) => `- ${info.name || id}(${info.role || '?'})`)
1922
+ .map(([id, info]) => {
1923
+ const name = info.name || id;
1924
+ const details = [info.gender, info.role, info.traits].filter(Boolean).join(',');
1925
+ return `- **${name}**:${details || '待識別'}`;
1926
+ })
1727
1927
  .join('\n') : '';
1728
1928
 
1729
1929
  return {
@@ -1739,7 +1939,8 @@ ${result.summary}`
1739
1939
  ${speakerInfo || '(未識別)'}
1740
1940
 
1741
1941
  ## 摘要
1742
- ${result.summary}`
1942
+ ${result.summary || '(無)'}
1943
+ ${usageInfo}`
1743
1944
  }]
1744
1945
  };
1745
1946
  }
@@ -1849,11 +2050,8 @@ server.tool(
1849
2050
  ffmpeg.on('error', reject);
1850
2051
  });
1851
2052
 
1852
- // 上傳並分析
1853
- const uploadedFile = await ai.files.upload({
1854
- file: segmentPath,
1855
- config: { mimeType: 'audio/mpeg' }
1856
- });
2053
+ // 上傳並分析(使用 safeUploadFile 處理中文路徑)
2054
+ const { uploadedFile, tempPath: uploadTempPath } = await safeUploadFile(ai, segmentPath, 'audio/mpeg', 60000);
1857
2055
 
1858
2056
  const defaultQuestion = `分析這段音頻中說話的人:
1859
2057
 
@@ -1876,6 +2074,9 @@ server.tool(
1876
2074
 
1877
2075
  // 清理
1878
2076
  ai.files.delete({ name: uploadedFile.name }).catch(() => {});
2077
+ if (uploadTempPath) {
2078
+ await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
2079
+ }
1879
2080
  await fs.rm(tempDir, { recursive: true, force: true });
1880
2081
 
1881
2082
  const timeRange = `${timestamp || '00:00'} - ${formatTime(startSeconds + duration)}`;