@hsiehchenwei/mcp-gemini-transcriber 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/server.mjs +255 -54
package/package.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hsiehchenwei/mcp-gemini-transcriber",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "MCP 音訊轉逐字稿工具(使用 Gemini API)- 支援語者識別與情緒分析",
|
|
6
6
|
"main": "server.mjs",
|
|
7
7
|
"bin": {
|
|
8
|
-
"mcp-gemini-transcriber": "
|
|
8
|
+
"mcp-gemini-transcriber": "server.mjs"
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
11
|
"start": "node server.mjs"
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
"license": "MIT",
|
|
25
25
|
"repository": {
|
|
26
26
|
"type": "git",
|
|
27
|
-
"url": "https://github.com/chenwei/MCPTools.git",
|
|
27
|
+
"url": "git+https://github.com/chenwei/MCPTools.git",
|
|
28
28
|
"directory": "mcp-gemini-transcriber"
|
|
29
29
|
},
|
|
30
30
|
"dependencies": {
|
package/server.mjs
CHANGED
|
@@ -45,6 +45,138 @@ const MODEL_NAME = 'gemini-3-flash-preview';
|
|
|
45
45
|
const SUPPORTED_AUDIO_FORMATS = ['.mp3', '.m4a', '.wav', '.webm', '.ogg', '.flac', '.aiff', '.aac'];
|
|
46
46
|
const SUPPORTED_IMAGE_FORMATS = ['.png', '.jpg', '.jpeg', '.webp', '.heic', '.heif'];
|
|
47
47
|
|
|
48
|
+
// Gemini 3 Flash Preview 價格(每 100 萬 tokens,美元)
|
|
49
|
+
const PRICING = {
|
|
50
|
+
input: {
|
|
51
|
+
text: 0.50, // 文字/圖片/影片
|
|
52
|
+
audio: 1.00 // 音訊
|
|
53
|
+
},
|
|
54
|
+
output: 3.00 // 輸出(含思考代幣)
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Token 用量追蹤器
|
|
59
|
+
*/
|
|
60
|
+
class UsageTracker {
|
|
61
|
+
constructor() {
|
|
62
|
+
this.reset();
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
reset() {
|
|
66
|
+
this.inputTokens = { text: 0, audio: 0, image: 0, video: 0 };
|
|
67
|
+
this.outputTokens = 0;
|
|
68
|
+
this.thoughtTokens = 0;
|
|
69
|
+
this.apiCalls = 0;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// 從 API response 中提取並累加 usage
|
|
73
|
+
addFromResponse(response) {
|
|
74
|
+
const usage = response?.usageMetadata;
|
|
75
|
+
if (!usage) return;
|
|
76
|
+
|
|
77
|
+
this.apiCalls++;
|
|
78
|
+
|
|
79
|
+
// 輸出 tokens(含思考)
|
|
80
|
+
this.outputTokens += usage.candidatesTokenCount || 0;
|
|
81
|
+
this.thoughtTokens += usage.thoughtsTokenCount || 0;
|
|
82
|
+
|
|
83
|
+
// 輸入 tokens(按 modality 分類)
|
|
84
|
+
if (usage.promptTokensDetails) {
|
|
85
|
+
for (const detail of usage.promptTokensDetails) {
|
|
86
|
+
const modality = (detail.modality || 'TEXT').toLowerCase();
|
|
87
|
+
const count = detail.tokenCount || 0;
|
|
88
|
+
if (modality === 'audio') {
|
|
89
|
+
this.inputTokens.audio += count;
|
|
90
|
+
} else if (modality === 'image') {
|
|
91
|
+
this.inputTokens.image += count;
|
|
92
|
+
} else if (modality === 'video') {
|
|
93
|
+
this.inputTokens.video += count;
|
|
94
|
+
} else {
|
|
95
|
+
this.inputTokens.text += count;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
} else {
|
|
99
|
+
// fallback:沒有 details 時全部算 text
|
|
100
|
+
this.inputTokens.text += usage.promptTokenCount || 0;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// 計算費用(美元)
|
|
105
|
+
calculateCost() {
|
|
106
|
+
const inputCost =
|
|
107
|
+
(this.inputTokens.text + this.inputTokens.image + this.inputTokens.video) / 1_000_000 * PRICING.input.text +
|
|
108
|
+
this.inputTokens.audio / 1_000_000 * PRICING.input.audio;
|
|
109
|
+
|
|
110
|
+
// 輸出費用包含思考 tokens
|
|
111
|
+
const outputCost = (this.outputTokens + this.thoughtTokens) / 1_000_000 * PRICING.output;
|
|
112
|
+
|
|
113
|
+
return {
|
|
114
|
+
inputCost,
|
|
115
|
+
outputCost,
|
|
116
|
+
totalCost: inputCost + outputCost
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// 取得摘要
|
|
121
|
+
getSummary() {
|
|
122
|
+
const cost = this.calculateCost();
|
|
123
|
+
const totalInput = this.inputTokens.text + this.inputTokens.audio + this.inputTokens.image + this.inputTokens.video;
|
|
124
|
+
const totalOutput = this.outputTokens + this.thoughtTokens;
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
apiCalls: this.apiCalls,
|
|
128
|
+
tokens: {
|
|
129
|
+
input: {
|
|
130
|
+
text: this.inputTokens.text,
|
|
131
|
+
audio: this.inputTokens.audio,
|
|
132
|
+
image: this.inputTokens.image,
|
|
133
|
+
video: this.inputTokens.video,
|
|
134
|
+
total: totalInput
|
|
135
|
+
},
|
|
136
|
+
output: this.outputTokens,
|
|
137
|
+
thought: this.thoughtTokens,
|
|
138
|
+
totalOutput: totalOutput,
|
|
139
|
+
total: totalInput + totalOutput
|
|
140
|
+
},
|
|
141
|
+
cost: {
|
|
142
|
+
input: cost.inputCost,
|
|
143
|
+
output: cost.outputCost,
|
|
144
|
+
total: cost.totalCost,
|
|
145
|
+
formatted: `$${cost.totalCost.toFixed(6)}`
|
|
146
|
+
}
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// 格式化輸出(用於顯示)
|
|
151
|
+
formatSummary() {
|
|
152
|
+
const summary = this.getSummary();
|
|
153
|
+
let lines = [];
|
|
154
|
+
|
|
155
|
+
lines.push(`📊 **API 使用量統計**`);
|
|
156
|
+
lines.push(`- API 呼叫次數:${summary.apiCalls} 次`);
|
|
157
|
+
lines.push(`- 輸入 Tokens:${summary.tokens.input.total.toLocaleString()}`);
|
|
158
|
+
if (summary.tokens.input.audio > 0) {
|
|
159
|
+
lines.push(` - 音訊:${summary.tokens.input.audio.toLocaleString()}`);
|
|
160
|
+
}
|
|
161
|
+
if (summary.tokens.input.text > 0) {
|
|
162
|
+
lines.push(` - 文字:${summary.tokens.input.text.toLocaleString()}`);
|
|
163
|
+
}
|
|
164
|
+
lines.push(`- 輸出 Tokens:${summary.tokens.totalOutput.toLocaleString()}`);
|
|
165
|
+
if (summary.tokens.thought > 0) {
|
|
166
|
+
lines.push(` - 思考:${summary.tokens.thought.toLocaleString()}`);
|
|
167
|
+
lines.push(` - 回應:${summary.tokens.output.toLocaleString()}`);
|
|
168
|
+
}
|
|
169
|
+
lines.push(`- 總 Tokens:${summary.tokens.total.toLocaleString()}`);
|
|
170
|
+
lines.push('');
|
|
171
|
+
lines.push(`💰 **預估費用**`);
|
|
172
|
+
lines.push(`- 輸入費用:$${summary.cost.input.toFixed(6)}`);
|
|
173
|
+
lines.push(`- 輸出費用:$${summary.cost.output.toFixed(6)}`);
|
|
174
|
+
lines.push(`- **總費用:${summary.cost.formatted}**`);
|
|
175
|
+
|
|
176
|
+
return lines.join('\n');
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
48
180
|
/**
|
|
49
181
|
* 計算動態 timeout
|
|
50
182
|
*/
|
|
@@ -62,6 +194,48 @@ function withTimeout(promise, ms, message = 'Operation timed out') {
|
|
|
62
194
|
]);
|
|
63
195
|
}
|
|
64
196
|
|
|
197
|
+
/**
|
|
198
|
+
* 檢查路徑是否包含非 ASCII 字元
|
|
199
|
+
*/
|
|
200
|
+
function hasNonAscii(str) {
|
|
201
|
+
return /[^\x00-\x7F]/.test(str);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* 安全上傳檔案(處理非 ASCII 路徑)
|
|
206
|
+
* @google/genai SDK 無法處理包含中文字元的檔案路徑
|
|
207
|
+
*/
|
|
208
|
+
async function safeUploadFile(ai, filePath, mimeType, timeout) {
|
|
209
|
+
let tempPath = null;
|
|
210
|
+
let actualPath = filePath;
|
|
211
|
+
|
|
212
|
+
// 如果路徑包含非 ASCII 字元,複製到臨時路徑
|
|
213
|
+
if (hasNonAscii(filePath)) {
|
|
214
|
+
const tempDir = path.join(tmpdir(), `upload_${Date.now()}`);
|
|
215
|
+
await fs.mkdir(tempDir, { recursive: true });
|
|
216
|
+
const ext = path.extname(filePath);
|
|
217
|
+
tempPath = path.join(tempDir, `audio_${createHash('md5').update(filePath).digest('hex').slice(0, 8)}${ext}`);
|
|
218
|
+
await fs.copyFile(filePath, tempPath);
|
|
219
|
+
actualPath = tempPath;
|
|
220
|
+
console.error(` 📋 已複製到臨時路徑(避免中文路徑問題)`);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
try {
|
|
224
|
+
const uploadedFile = await withTimeout(
|
|
225
|
+
ai.files.upload({ file: actualPath, config: { mimeType } }),
|
|
226
|
+
timeout,
|
|
227
|
+
`上傳超時 (${timeout/1000}s)`
|
|
228
|
+
);
|
|
229
|
+
return { uploadedFile, tempPath };
|
|
230
|
+
} catch (error) {
|
|
231
|
+
// 清理臨時檔案
|
|
232
|
+
if (tempPath) {
|
|
233
|
+
await fs.rm(path.dirname(tempPath), { recursive: true, force: true }).catch(() => {});
|
|
234
|
+
}
|
|
235
|
+
throw error;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
65
239
|
/**
|
|
66
240
|
* 取得音檔長度(秒)
|
|
67
241
|
*/
|
|
@@ -467,7 +641,7 @@ function escapeRegex(str) {
|
|
|
467
641
|
/**
|
|
468
642
|
* 快速模式:簡單轉錄單一片段(不做語者識別)
|
|
469
643
|
*/
|
|
470
|
-
async function transcribeSingleSegmentFast(ai, segment, model, maxRetries = MAX_RETRIES) {
|
|
644
|
+
async function transcribeSingleSegmentFast(ai, segment, model, tracker = null, maxRetries = MAX_RETRIES) {
|
|
471
645
|
const { index, path: segmentPath, offset, duration = SEGMENT_DURATION } = segment;
|
|
472
646
|
let uploadedFile = null;
|
|
473
647
|
const segmentTimeout = calculateTimeout(duration);
|
|
@@ -486,13 +660,12 @@ async function transcribeSingleSegmentFast(ai, segment, model, maxRetries = MAX_
|
|
|
486
660
|
|
|
487
661
|
開始:`;
|
|
488
662
|
|
|
663
|
+
let uploadTempPath = null;
|
|
489
664
|
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
490
665
|
try {
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
`上傳超時 (${segmentTimeout/1000}s)`
|
|
495
|
-
);
|
|
666
|
+
const uploadResult = await safeUploadFile(ai, segmentPath, 'audio/mpeg', segmentTimeout);
|
|
667
|
+
uploadedFile = uploadResult.uploadedFile;
|
|
668
|
+
uploadTempPath = uploadResult.tempPath;
|
|
496
669
|
|
|
497
670
|
const response = await withTimeout(
|
|
498
671
|
ai.models.generateContent({
|
|
@@ -507,7 +680,13 @@ async function transcribeSingleSegmentFast(ai, segment, model, maxRetries = MAX_
|
|
|
507
680
|
`轉錄超時 (${segmentTimeout/1000}s)`
|
|
508
681
|
);
|
|
509
682
|
|
|
683
|
+
// 追蹤 token 用量
|
|
684
|
+
if (tracker) tracker.addFromResponse(response);
|
|
685
|
+
|
|
510
686
|
ai.files.delete({ name: uploadedFile.name }).catch(() => {});
|
|
687
|
+
if (uploadTempPath) {
|
|
688
|
+
await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
|
|
689
|
+
}
|
|
511
690
|
|
|
512
691
|
let transcript = response.text;
|
|
513
692
|
|
|
@@ -524,6 +703,10 @@ async function transcribeSingleSegmentFast(ai, segment, model, maxRetries = MAX_
|
|
|
524
703
|
ai.files.delete({ name: uploadedFile.name }).catch(() => {});
|
|
525
704
|
uploadedFile = null;
|
|
526
705
|
}
|
|
706
|
+
if (uploadTempPath) {
|
|
707
|
+
await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
|
|
708
|
+
uploadTempPath = null;
|
|
709
|
+
}
|
|
527
710
|
|
|
528
711
|
if (attempt < maxRetries - 1) {
|
|
529
712
|
const delay = RETRY_BASE_DELAY * Math.pow(2, attempt);
|
|
@@ -541,7 +724,7 @@ async function transcribeSingleSegmentFast(ai, segment, model, maxRetries = MAX_
|
|
|
541
724
|
* 語者識別模式:單一 API 呼叫同時轉錄 + 語者識別
|
|
542
725
|
* 回傳格式包含逐字稿和語者資訊,減少 API 呼叫次數
|
|
543
726
|
*/
|
|
544
|
-
async function transcribeSingleSegmentSpeaker(ai, segment, model, speakerProfiles = {}, previousEnding = '', maxRetries = MAX_RETRIES) {
|
|
727
|
+
async function transcribeSingleSegmentSpeaker(ai, segment, model, speakerProfiles = {}, previousEnding = '', tracker = null, maxRetries = MAX_RETRIES) {
|
|
545
728
|
const { index, path: segmentPath, offset, duration = SEGMENT_DURATION } = segment;
|
|
546
729
|
let uploadedFile = null;
|
|
547
730
|
const segmentTimeout = calculateTimeout(duration);
|
|
@@ -596,13 +779,12 @@ ${previousEnding.slice(-400)}
|
|
|
596
779
|
|
|
597
780
|
開始轉錄:`;
|
|
598
781
|
|
|
782
|
+
let uploadTempPath = null;
|
|
599
783
|
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
600
784
|
try {
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
`上傳超時 (${segmentTimeout/1000}s)`
|
|
605
|
-
);
|
|
785
|
+
const uploadResult = await safeUploadFile(ai, segmentPath, 'audio/mpeg', segmentTimeout);
|
|
786
|
+
uploadedFile = uploadResult.uploadedFile;
|
|
787
|
+
uploadTempPath = uploadResult.tempPath;
|
|
606
788
|
|
|
607
789
|
const response = await withTimeout(
|
|
608
790
|
ai.models.generateContent({
|
|
@@ -617,7 +799,13 @@ ${previousEnding.slice(-400)}
|
|
|
617
799
|
`轉錄超時 (${segmentTimeout/1000}s)`
|
|
618
800
|
);
|
|
619
801
|
|
|
802
|
+
// 追蹤 token 用量
|
|
803
|
+
if (tracker) tracker.addFromResponse(response);
|
|
804
|
+
|
|
620
805
|
ai.files.delete({ name: uploadedFile.name }).catch(() => {});
|
|
806
|
+
if (uploadTempPath) {
|
|
807
|
+
await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
|
|
808
|
+
}
|
|
621
809
|
|
|
622
810
|
const fullText = response.text;
|
|
623
811
|
|
|
@@ -682,7 +870,7 @@ ${previousEnding.slice(-400)}
|
|
|
682
870
|
return {
|
|
683
871
|
index,
|
|
684
872
|
transcript,
|
|
685
|
-
emotion,
|
|
873
|
+
emotion,
|
|
686
874
|
success: true,
|
|
687
875
|
speakerProfiles: newSpeakerProfiles
|
|
688
876
|
};
|
|
@@ -692,6 +880,10 @@ ${previousEnding.slice(-400)}
|
|
|
692
880
|
ai.files.delete({ name: uploadedFile.name }).catch(() => {});
|
|
693
881
|
uploadedFile = null;
|
|
694
882
|
}
|
|
883
|
+
if (uploadTempPath) {
|
|
884
|
+
await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
|
|
885
|
+
uploadTempPath = null;
|
|
886
|
+
}
|
|
695
887
|
|
|
696
888
|
if (attempt < maxRetries - 1) {
|
|
697
889
|
const delay = RETRY_BASE_DELAY * Math.pow(2, attempt);
|
|
@@ -1103,6 +1295,7 @@ ${transcript}
|
|
|
1103
1295
|
async function transcribeAudioFast(audioPath, outputPath, model, ai, durationMin) {
|
|
1104
1296
|
const segments = await splitAudioFast(audioPath);
|
|
1105
1297
|
let tempDir = null;
|
|
1298
|
+
const tracker = new UsageTracker();
|
|
1106
1299
|
|
|
1107
1300
|
if (segments.length > 1 && !segments[0].isOriginal) {
|
|
1108
1301
|
tempDir = path.dirname(segments[0].path);
|
|
@@ -1112,10 +1305,7 @@ async function transcribeAudioFast(audioPath, outputPath, model, ai, durationMin
|
|
|
1112
1305
|
|
|
1113
1306
|
if (segments.length === 1) {
|
|
1114
1307
|
console.error('⬆️ 上傳中...');
|
|
1115
|
-
const uploadedFile = await ai
|
|
1116
|
-
file: audioPath,
|
|
1117
|
-
config: { mimeType: 'audio/mpeg' }
|
|
1118
|
-
});
|
|
1308
|
+
const { uploadedFile, tempPath: uploadTempPath } = await safeUploadFile(ai, audioPath, 'audio/mpeg', 120000);
|
|
1119
1309
|
console.error('✅ 上傳完成');
|
|
1120
1310
|
|
|
1121
1311
|
const response = await ai.models.generateContent({
|
|
@@ -1129,8 +1319,12 @@ async function transcribeAudioFast(audioPath, outputPath, model, ai, durationMin
|
|
|
1129
1319
|
config: { maxOutputTokens: 65536, temperature: 0.1 }
|
|
1130
1320
|
});
|
|
1131
1321
|
|
|
1322
|
+
tracker.addFromResponse(response);
|
|
1132
1323
|
transcript = response.text;
|
|
1133
1324
|
try { await ai.files.delete({ name: uploadedFile.name }); } catch {}
|
|
1325
|
+
if (uploadTempPath) {
|
|
1326
|
+
await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
|
|
1327
|
+
}
|
|
1134
1328
|
|
|
1135
1329
|
} else {
|
|
1136
1330
|
// 平行處理
|
|
@@ -1139,7 +1333,7 @@ async function transcribeAudioFast(audioPath, outputPath, model, ai, durationMin
|
|
|
1139
1333
|
|
|
1140
1334
|
for (let i = 0; i < segments.length; i += MAX_WORKERS) {
|
|
1141
1335
|
const batch = segments.slice(i, i + MAX_WORKERS);
|
|
1142
|
-
const batchPromises = batch.map(seg => transcribeSingleSegmentFast(ai, seg, model));
|
|
1336
|
+
const batchPromises = batch.map(seg => transcribeSingleSegmentFast(ai, seg, model, tracker));
|
|
1143
1337
|
const batchResults = await Promise.all(batchPromises);
|
|
1144
1338
|
|
|
1145
1339
|
for (const r of batchResults) {
|
|
@@ -1158,6 +1352,10 @@ async function transcribeAudioFast(audioPath, outputPath, model, ai, durationMin
|
|
|
1158
1352
|
await fs.rm(tempDir, { recursive: true, force: true });
|
|
1159
1353
|
}
|
|
1160
1354
|
|
|
1355
|
+
// 取得使用量摘要
|
|
1356
|
+
const usage = tracker.getSummary();
|
|
1357
|
+
console.error(`💰 費用:${usage.cost.formatted}(${usage.tokens.total.toLocaleString()} tokens)`);
|
|
1358
|
+
|
|
1161
1359
|
// 儲存結果
|
|
1162
1360
|
const outPath = outputPath || audioPath.replace(/\.[^/.]+$/, '.md');
|
|
1163
1361
|
const formattedTimestamp = new Date().toISOString().slice(0, 19).replace(/:/g, '-');
|
|
@@ -1188,7 +1386,8 @@ ${transcript}
|
|
|
1188
1386
|
duration: `${Math.round(durationMin)} 分鐘`,
|
|
1189
1387
|
segments: segments.length,
|
|
1190
1388
|
summary: '(快速模式不產生摘要)',
|
|
1191
|
-
keywords: ''
|
|
1389
|
+
keywords: '',
|
|
1390
|
+
usage
|
|
1192
1391
|
};
|
|
1193
1392
|
}
|
|
1194
1393
|
|
|
@@ -1198,6 +1397,7 @@ ${transcript}
|
|
|
1198
1397
|
async function transcribeAudioSpeaker(audioPath, outputPath, model, ai, durationMin) {
|
|
1199
1398
|
const segments = await splitAudioSpeaker(audioPath);
|
|
1200
1399
|
let tempDir = null;
|
|
1400
|
+
const tracker = new UsageTracker();
|
|
1201
1401
|
|
|
1202
1402
|
if (segments.length > 1 && !segments[0].isOriginal) {
|
|
1203
1403
|
tempDir = path.dirname(segments[0].path);
|
|
@@ -1228,7 +1428,7 @@ async function transcribeAudioSpeaker(audioPath, outputPath, model, ai, duration
|
|
|
1228
1428
|
|
|
1229
1429
|
// 單一 API 呼叫:同時取得逐字稿 + 情緒分析 + 語者資訊
|
|
1230
1430
|
const result = await transcribeSingleSegmentSpeaker(
|
|
1231
|
-
ai, segment, model, speakerProfiles, previousEnding
|
|
1431
|
+
ai, segment, model, speakerProfiles, previousEnding, tracker
|
|
1232
1432
|
);
|
|
1233
1433
|
|
|
1234
1434
|
if (!result.success) {
|
|
@@ -1278,6 +1478,7 @@ async function transcribeAudioSpeaker(audioPath, outputPath, model, ai, duration
|
|
|
1278
1478
|
]),
|
|
1279
1479
|
config: { maxOutputTokens: 2048, temperature: 0.3 }
|
|
1280
1480
|
});
|
|
1481
|
+
tracker.addFromResponse(summaryResponse);
|
|
1281
1482
|
const summary = summaryResponse.text;
|
|
1282
1483
|
|
|
1283
1484
|
// 生成簡短情緒摘要(一段文字,包含經典句子)
|
|
@@ -1304,6 +1505,7 @@ ${emotionResults.map(r => `[${r.time}分鐘] ${r.emotion.slice(0, 200)}`).join('
|
|
|
1304
1505
|
contents: createUserContent([emotionSummaryPrompt]),
|
|
1305
1506
|
config: { maxOutputTokens: 1024, temperature: 0.3 }
|
|
1306
1507
|
});
|
|
1508
|
+
tracker.addFromResponse(emotionSummaryResponse);
|
|
1307
1509
|
emotionSummary = emotionSummaryResponse.text;
|
|
1308
1510
|
console.error(' ✅ 情緒摘要完成');
|
|
1309
1511
|
} catch (error) {
|
|
@@ -1409,13 +1611,18 @@ ${finalTranscript}
|
|
|
1409
1611
|
await fs.writeFile(outPath, finalContent, 'utf8');
|
|
1410
1612
|
console.error(`📄 [語者識別模式] 逐字稿已儲存: ${outPath}`);
|
|
1411
1613
|
|
|
1614
|
+
// 取得使用量摘要
|
|
1615
|
+
const usage = tracker.getSummary();
|
|
1616
|
+
console.error(`💰 費用:${usage.cost.formatted}(${usage.tokens.total.toLocaleString()} tokens)`);
|
|
1617
|
+
|
|
1412
1618
|
return {
|
|
1413
1619
|
outputPath: outPath,
|
|
1414
1620
|
duration: metadata.duration,
|
|
1415
1621
|
segments: segments.length,
|
|
1416
1622
|
summary,
|
|
1417
1623
|
keywords: '',
|
|
1418
|
-
speakerProfiles
|
|
1624
|
+
speakerProfiles,
|
|
1625
|
+
usage
|
|
1419
1626
|
};
|
|
1420
1627
|
}
|
|
1421
1628
|
|
|
@@ -1612,10 +1819,7 @@ async function describeImage(imagePath, outputPath = null, detailLevel = 'detail
|
|
|
1612
1819
|
|
|
1613
1820
|
const prompt = detailPrompts[detailLevel] || detailPrompts.detailed;
|
|
1614
1821
|
|
|
1615
|
-
const uploadedFile = await ai
|
|
1616
|
-
file: imagePath,
|
|
1617
|
-
config: { mimeType: 'image/jpeg' }
|
|
1618
|
-
});
|
|
1822
|
+
const { uploadedFile, tempPath: uploadTempPath } = await safeUploadFile(ai, imagePath, 'image/jpeg', 60000);
|
|
1619
1823
|
|
|
1620
1824
|
const response = await ai.models.generateContent({
|
|
1621
1825
|
model: MODEL_NAME,
|
|
@@ -1627,6 +1831,9 @@ async function describeImage(imagePath, outputPath = null, detailLevel = 'detail
|
|
|
1627
1831
|
});
|
|
1628
1832
|
|
|
1629
1833
|
try { await ai.files.delete({ name: uploadedFile.name }); } catch { /* ignore */ }
|
|
1834
|
+
if (uploadTempPath) {
|
|
1835
|
+
await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
|
|
1836
|
+
}
|
|
1630
1837
|
|
|
1631
1838
|
const description = response.text;
|
|
1632
1839
|
const outPath = outputPath || imagePath.replace(/\.[^/.]+$/, '.md');
|
|
@@ -1687,43 +1894,36 @@ server.tool(
|
|
|
1687
1894
|
|
|
1688
1895
|
// 根據模式顯示不同結果(從結果中取得實際使用的模式)
|
|
1689
1896
|
const actualMode = result.mode || (mode || process.env.DEFAULT_MODE || 'fast');
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
type: 'text',
|
|
1694
|
-
text: `✅ [快速模式] 音訊轉錄完成!
|
|
1897
|
+
|
|
1898
|
+
// 格式化費用資訊(簡潔版)
|
|
1899
|
+
const usageInfo = result.usage ? `
|
|
1695
1900
|
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
};
|
|
1701
|
-
} else if (selectedMode === 'hybrid') {
|
|
1702
|
-
const speakerInfo = result.speakerProfiles ?
|
|
1703
|
-
Object.entries(result.speakerProfiles)
|
|
1704
|
-
.map(([name, info]) => `- **${name}**(${[info.gender, info.role].filter(Boolean).join(',') || '?'})`)
|
|
1705
|
-
.join('\n') : '';
|
|
1901
|
+
💰 **費用統計**
|
|
1902
|
+
- 輸入 Tokens:${result.usage.tokens.input.total.toLocaleString()}
|
|
1903
|
+
- 輸出 Tokens:${result.usage.tokens.totalOutput.toLocaleString()}
|
|
1904
|
+
- **總費用:${result.usage.cost.formatted}**` : '';
|
|
1706
1905
|
|
|
1906
|
+
if (actualMode === 'fast') {
|
|
1707
1907
|
return {
|
|
1708
1908
|
content: [{
|
|
1709
1909
|
type: 'text',
|
|
1710
|
-
text: `✅ [
|
|
1910
|
+
text: `✅ [快速模式] 音訊轉錄完成!
|
|
1711
1911
|
|
|
1712
1912
|
📁 **輸出檔案**: ${result.outputPath}
|
|
1713
1913
|
⏱️ **總時長**: ${result.duration}
|
|
1714
1914
|
📊 **處理區塊**: ${result.segments} 個
|
|
1715
|
-
|
|
1716
|
-
## 語者資訊
|
|
1717
|
-
${speakerInfo || '(未識別)'}
|
|
1718
|
-
|
|
1719
|
-
## 摘要
|
|
1720
|
-
${result.summary}`
|
|
1915
|
+
${usageInfo}`
|
|
1721
1916
|
}]
|
|
1722
1917
|
};
|
|
1723
1918
|
} else {
|
|
1919
|
+
// speaker 模式
|
|
1724
1920
|
const speakerInfo = result.speakerProfiles ?
|
|
1725
1921
|
Object.entries(result.speakerProfiles)
|
|
1726
|
-
.map(([id, info]) =>
|
|
1922
|
+
.map(([id, info]) => {
|
|
1923
|
+
const name = info.name || id;
|
|
1924
|
+
const details = [info.gender, info.role, info.traits].filter(Boolean).join(',');
|
|
1925
|
+
return `- **${name}**:${details || '待識別'}`;
|
|
1926
|
+
})
|
|
1727
1927
|
.join('\n') : '';
|
|
1728
1928
|
|
|
1729
1929
|
return {
|
|
@@ -1739,7 +1939,8 @@ ${result.summary}`
|
|
|
1739
1939
|
${speakerInfo || '(未識別)'}
|
|
1740
1940
|
|
|
1741
1941
|
## 摘要
|
|
1742
|
-
${result.summary}
|
|
1942
|
+
${result.summary || '(無)'}
|
|
1943
|
+
${usageInfo}`
|
|
1743
1944
|
}]
|
|
1744
1945
|
};
|
|
1745
1946
|
}
|
|
@@ -1849,11 +2050,8 @@ server.tool(
|
|
|
1849
2050
|
ffmpeg.on('error', reject);
|
|
1850
2051
|
});
|
|
1851
2052
|
|
|
1852
|
-
//
|
|
1853
|
-
const uploadedFile = await ai
|
|
1854
|
-
file: segmentPath,
|
|
1855
|
-
config: { mimeType: 'audio/mpeg' }
|
|
1856
|
-
});
|
|
2053
|
+
// 上傳並分析(使用 safeUploadFile 處理中文路徑)
|
|
2054
|
+
const { uploadedFile, tempPath: uploadTempPath } = await safeUploadFile(ai, segmentPath, 'audio/mpeg', 60000);
|
|
1857
2055
|
|
|
1858
2056
|
const defaultQuestion = `分析這段音頻中說話的人:
|
|
1859
2057
|
|
|
@@ -1876,6 +2074,9 @@ server.tool(
|
|
|
1876
2074
|
|
|
1877
2075
|
// 清理
|
|
1878
2076
|
ai.files.delete({ name: uploadedFile.name }).catch(() => {});
|
|
2077
|
+
if (uploadTempPath) {
|
|
2078
|
+
await fs.rm(path.dirname(uploadTempPath), { recursive: true, force: true }).catch(() => {});
|
|
2079
|
+
}
|
|
1879
2080
|
await fs.rm(tempDir, { recursive: true, force: true });
|
|
1880
2081
|
|
|
1881
2082
|
const timeRange = `${timestamp || '00:00'} - ${formatTime(startSeconds + duration)}`;
|