@willh/subtitle-correction-agent 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +18 -1
  2. package/dist/index.js +537 -159
  3. package/package.json +27 -33
package/README.md CHANGED
@@ -31,6 +31,13 @@ npx @willh/subtitle-correction-agent <vtt-file-path>
31
31
 
32
32
  ## 環境設定
33
33
 
34
+ 本工具會依環境變數自動選擇 AI 提供者:
35
+
36
+ - 若已完整設定 `AZURE_OPENAI_*` 相關環境變數,則使用 **Azure OpenAI (BYOK)**。
37
+ - 否則使用 **GitHub Copilot CLI (Default)**(需先安裝並完成登入,且終端機可執行 `copilot`)。
38
+
39
+ ### 使用 Azure OpenAI (BYOK)
40
+
34
41
  複製 `.env.example` 為 `.env` 並填入 Azure OpenAI 的設定:
35
42
 
36
43
  ```bash
@@ -46,6 +53,13 @@ cp .env.example .env
46
53
  | `AZURE_OPENAI_DEPLOYMENT` | Azure OpenAI 部署名稱 |
47
54
  | `AZURE_OPENAI_API_VERSION` | API 版本 (預設: 2024-10-21) |
48
55
 
56
+ ### 使用 GitHub Copilot CLI (Default)
57
+
58
+ 若你要使用預設的 GitHub Copilot CLI:
59
+
60
+ - 請不要設定 `AZURE_OPENAI_ENDPOINT` / `AZURE_OPENAI_API_KEY` / `AZURE_OPENAI_DEPLOYMENT`(可不建立 `.env`)。
61
+ - 若已複製 `.env.example`,請把上述三個變數的值清空或移除,避免誤判為 Azure 模式。
62
+
49
63
  ## 使用方式
50
64
 
51
65
  ```bash
@@ -55,6 +69,9 @@ subtitle-correction-agent <vtt-file-path>
55
69
  # 指定輸出目錄
56
70
  subtitle-correction-agent <vtt-file-path> --output-dir <output-directory>
57
71
 
72
+ # 顯示版本與環境資訊
73
+ subtitle-correction-agent --version
74
+
58
75
  # 範例
59
76
  subtitle-correction-agent ./examples/sample.vtt
60
77
  subtitle-correction-agent "C:\Videos\lecture.vtt" --output-dir ./output
@@ -63,7 +80,7 @@ subtitle-correction-agent "C:\Videos\lecture.vtt" --output-dir ./output
63
80
  ## Telegram Bot
64
81
 
65
82
  只要使用者上傳 `.vtt` 檔案,Bot 會自動校正字幕並回傳所有輸出檔案。
66
- 同樣需要設定 Azure OpenAI 的環境變數。
83
+ 目前 Telegram Bot 只支援 Azure OpenAI (BYOK),因此需要設定 Azure OpenAI 的環境變數。
67
84
 
68
85
  ### 環境變數
69
86
 
package/dist/index.js CHANGED
@@ -3270,9 +3270,10 @@ var require_main = __commonJS((exports) => {
3270
3270
  });
3271
3271
 
3272
3272
  // src/index.ts
3273
- import * as path5 from "path";
3273
+ import * as path6 from "path";
3274
3274
  import * as fs5 from "fs";
3275
3275
  import { fileURLToPath } from "url";
3276
+ import { execSync } from "child_process";
3276
3277
 
3277
3278
  // node_modules/@github/copilot-sdk/dist/client.js
3278
3279
  var import_node = __toESM(require_main(), 1);
@@ -3942,81 +3943,295 @@ class CopilotClient {
3942
3943
 
3943
3944
  // src/agent.ts
3944
3945
  import * as fs4 from "fs";
3945
- import * as path4 from "path";
3946
+ import * as path5 from "path";
3946
3947
 
3947
3948
  // src/tools/read-vtt.ts
3948
3949
  import * as fs from "fs";
3949
- import * as path from "path";
3950
+ import * as path2 from "path";
3950
3951
 
3951
3952
  // src/utils/vtt-parser.ts
3952
- function parseTimeToSeconds(timeStr) {
3953
- const parts = timeStr.trim().split(":");
3954
- if (parts.length === 3) {
3955
- const [hours, minutes, seconds] = parts;
3956
- return parseInt(hours, 10) * 3600 + parseInt(minutes, 10) * 60 + parseFloat(seconds);
3957
- } else if (parts.length === 2) {
3958
- const [minutes, seconds] = parts;
3959
- return parseInt(minutes, 10) * 60 + parseFloat(seconds);
3960
- }
3961
- return parseFloat(timeStr);
3953
+ import * as path from "path";
3954
+
3955
+ // src/utils/caption-converter.ts
3956
+ var ASS_HEADER = `[Script Info]
3957
+ Title: Converted from WebVTT
3958
+ ScriptType: v4.00+
3959
+ WrapStyle: 0
3960
+ PlayResX: 1280
3961
+ PlayResY: 720
3962
+ ScaledBorderAndShadow: yes
3963
+
3964
+ [V4+ Styles]
3965
+ Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline,StrikeOut,ScaleX,ScaleY,Spacing,Angle,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,Encoding
3966
+ Style: Default,LINE Seed TW_OTF Bold,48,&H0080FFFF,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,1,0,1,2,0,2,1,1,20,1
3967
+ Style: Secondary,Helvetica,12,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,2,0,2,1,1,20,1
3968
+
3969
+ [Events]
3970
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
3971
+ `;
3972
+ var SUPPORTED_EXTS = new Set([".srt", ".vtt", ".ass"]);
3973
+ function normalizeInput(content) {
3974
+ let text = content.replace(/\r\n/g, `
3975
+ `).replace(/\r/g, `
3976
+ `);
3977
+ if (text.charCodeAt(0) === 65279) {
3978
+ text = text.slice(1);
3979
+ }
3980
+ return text;
3962
3981
  }
3963
- function parseVtt(content, filePath) {
3964
- const lines = content.replace(/\r\n/g, `
3965
- `).split(`
3982
+ function splitWithLimit(text, delimiter, limit) {
3983
+ const parts = [];
3984
+ let current = "";
3985
+ let count = 0;
3986
+ for (let i = 0;i < text.length; i += 1) {
3987
+ const char = text[i];
3988
+ if (char === delimiter && count < limit - 1) {
3989
+ parts.push(current);
3990
+ current = "";
3991
+ count += 1;
3992
+ } else {
3993
+ current += char;
3994
+ }
3995
+ }
3996
+ parts.push(current);
3997
+ return parts;
3998
+ }
3999
+ function parseSrtVttTime(raw) {
4000
+ const clean = raw.trim().replace(",", ".");
4001
+ const pieces = clean.split(".");
4002
+ if (pieces.length !== 2) {
4003
+ return null;
4004
+ }
4005
+ const timePart = pieces[0];
4006
+ const msPart = pieces[1];
4007
+ if (!/^\d{1,3}$/.test(msPart)) {
4008
+ return null;
4009
+ }
4010
+ const timePieces = timePart.split(":");
4011
+ if (timePieces.length !== 2 && timePieces.length !== 3) {
4012
+ return null;
4013
+ }
4014
+ const numbers = timePieces.map((value) => Number(value));
4015
+ if (numbers.some((value) => Number.isNaN(value))) {
4016
+ return null;
4017
+ }
4018
+ const [hours, minutes, seconds] = timePieces.length === 3 ? numbers : [0, numbers[0], numbers[1]];
4019
+ const ms = Number(msPart.padEnd(3, "0"));
4020
+ return ((hours * 60 + minutes) * 60 + seconds) * 1000 + ms;
4021
+ }
4022
+ function parseSrtVtt(content) {
4023
+ const normalized = normalizeInput(content);
4024
+ const blocks = normalized.split(/\n{2,}/);
4025
+ const cues = [];
4026
+ for (const block of blocks) {
4027
+ const lines = block.split(`
4028
+ `);
4029
+ const timeIndex = lines.findIndex((line) => line.includes("-->"));
4030
+ if (timeIndex === -1) {
4031
+ continue;
4032
+ }
4033
+ const timeLine = lines[timeIndex];
4034
+ const match = timeLine.match(/(.+?)\s*-->\s*(.+)/);
4035
+ if (!match) {
4036
+ continue;
4037
+ }
4038
+ const startRaw = match[1].trim();
4039
+ let endRaw = match[2].trim();
4040
+ endRaw = endRaw.split(/\s+/)[0];
4041
+ const start = parseSrtVttTime(startRaw);
4042
+ const end = parseSrtVttTime(endRaw);
4043
+ if (start === null || end === null) {
4044
+ continue;
4045
+ }
4046
+ const textLines = lines.slice(timeIndex + 1);
4047
+ const text = textLines.join(`
4048
+ `).replace(/\s+$/, "");
4049
+ cues.push({ start, end, text });
4050
+ }
4051
+ return cues;
4052
+ }
4053
+ function parseAssTime(raw) {
4054
+ const match = raw.trim().match(/^(\d+):(\d{2}):(\d{2})\.(\d{1,2})$/);
4055
+ if (!match) {
4056
+ return null;
4057
+ }
4058
+ const hours = Number(match[1]);
4059
+ const minutes = Number(match[2]);
4060
+ const seconds = Number(match[3]);
4061
+ const cs = Number(match[4].padEnd(2, "0"));
4062
+ if ([hours, minutes, seconds, cs].some((value) => Number.isNaN(value))) {
4063
+ return null;
4064
+ }
4065
+ return ((hours * 60 + minutes) * 60 + seconds) * 1000 + cs * 10;
4066
+ }
4067
+ function assToPlainText(text) {
4068
+ let cleaned = text.replace(/\{[^}]*\}/g, "");
4069
+ cleaned = cleaned.replace(/\\N/g, `
4070
+ `);
4071
+ cleaned = cleaned.replace(/\\n/g, `
4072
+ `);
4073
+ cleaned = cleaned.replace(/\\h/g, " ");
4074
+ return cleaned;
4075
+ }
4076
+ function parseAss(content) {
4077
+ const normalized = normalizeInput(content);
4078
+ const lines = normalized.split(`
3966
4079
  `);
3967
4080
  const cues = [];
3968
- let index = 0;
3969
- let i = 0;
3970
- while (i < lines.length && !lines[i].includes("-->")) {
3971
- i++;
3972
- }
3973
- while (i < lines.length) {
3974
- const line = lines[i].trim();
3975
- if (line.includes("-->")) {
3976
- const timeParts = line.split("-->");
3977
- if (timeParts.length >= 2) {
3978
- const startTime = timeParts[0].trim().split(" ")[0];
3979
- const endTime = timeParts[1].trim().split(" ")[0];
3980
- const textLines = [];
3981
- i++;
3982
- while (i < lines.length && lines[i].trim() !== "" && !lines[i].includes("-->")) {
3983
- textLines.push(lines[i].trim());
3984
- i++;
3985
- }
3986
- if (textLines.length > 0) {
3987
- cues.push({
3988
- index: index++,
3989
- startTime,
3990
- endTime,
3991
- startSeconds: parseTimeToSeconds(startTime),
3992
- endSeconds: parseTimeToSeconds(endTime),
3993
- text: textLines.join(`
3994
- `)
3995
- });
3996
- }
3997
- } else {
3998
- i++;
3999
- }
4000
- } else {
4001
- i++;
4081
+ let inEvents = false;
4082
+ let format = null;
4083
+ let indices = null;
4084
+ for (const line of lines) {
4085
+ const trimmed = line.trim();
4086
+ if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
4087
+ inEvents = trimmed.toLowerCase() === "[events]";
4088
+ continue;
4089
+ }
4090
+ if (!inEvents) {
4091
+ continue;
4092
+ }
4093
+ if (trimmed.toLowerCase().startsWith("format:")) {
4094
+ format = trimmed.slice(7).split(",").map((part) => part.trim());
4095
+ indices = {
4096
+ start: format.findIndex((entry) => entry.toLowerCase() === "start"),
4097
+ end: format.findIndex((entry) => entry.toLowerCase() === "end"),
4098
+ text: format.findIndex((entry) => entry.toLowerCase() === "text")
4099
+ };
4100
+ continue;
4101
+ }
4102
+ if (!trimmed.toLowerCase().startsWith("dialogue:")) {
4103
+ continue;
4104
+ }
4105
+ if (!format || !indices || indices.start === -1 || indices.end === -1 || indices.text === -1) {
4106
+ continue;
4002
4107
  }
4108
+ const payload = trimmed.slice(9).trim();
4109
+ const fields = splitWithLimit(payload, ",", format.length);
4110
+ if (fields.length < format.length) {
4111
+ continue;
4112
+ }
4113
+ const start = parseAssTime(fields[indices.start] ?? "");
4114
+ const end = parseAssTime(fields[indices.end] ?? "");
4115
+ if (start === null || end === null) {
4116
+ continue;
4117
+ }
4118
+ const textRaw = fields[indices.text] ?? "";
4119
+ const text = assToPlainText(textRaw);
4120
+ cues.push({ start, end, text });
4121
+ }
4122
+ return cues;
4123
+ }
4124
+ function pad(value, size) {
4125
+ return String(value).padStart(size, "0");
4126
+ }
4127
+ function formatSrtTime(ms) {
4128
+ const total = Math.max(0, Math.round(ms));
4129
+ const hours = Math.floor(total / 3600000);
4130
+ const minutes = Math.floor(total % 3600000 / 60000);
4131
+ const seconds = Math.floor(total % 60000 / 1000);
4132
+ const millis = total % 1000;
4133
+ return `${pad(hours, 2)}:${pad(minutes, 2)}:${pad(seconds, 2)},${pad(millis, 3)}`;
4134
+ }
4135
+ function formatVttTime(ms) {
4136
+ const total = Math.max(0, Math.round(ms));
4137
+ const hours = Math.floor(total / 3600000);
4138
+ const minutes = Math.floor(total % 3600000 / 60000);
4139
+ const seconds = Math.floor(total % 60000 / 1000);
4140
+ const millis = total % 1000;
4141
+ return `${pad(hours, 2)}:${pad(minutes, 2)}:${pad(seconds, 2)}.${pad(millis, 3)}`;
4142
+ }
4143
+ function formatAssTime(ms) {
4144
+ const totalCs = Math.max(0, Math.round(ms / 10));
4145
+ const cs = totalCs % 100;
4146
+ const totalSeconds = Math.floor(totalCs / 100);
4147
+ const seconds = totalSeconds % 60;
4148
+ const totalMinutes = Math.floor(totalSeconds / 60);
4149
+ const minutes = totalMinutes % 60;
4150
+ const hours = Math.floor(totalMinutes / 60);
4151
+ return `${hours}:${pad(minutes, 2)}:${pad(seconds, 2)}.${pad(cs, 2)}`;
4152
+ }
4153
+ function plainToAssText(text) {
4154
+ return text.replace(/\r/g, "").split(`
4155
+ `).join("\\N");
4156
+ }
4157
+ function renderSrt(cues) {
4158
+ const blocks = cues.map((cue, index) => {
4159
+ const text = cue.text ?? "";
4160
+ return `${index + 1}
4161
+ ${formatSrtTime(cue.start)} --> ${formatSrtTime(cue.end)}
4162
+ ${text}`;
4163
+ });
4164
+ return blocks.join(`
4165
+
4166
+ `) + (blocks.length ? `
4167
+ ` : "");
4168
+ }
4169
+ function renderVtt(cues) {
4170
+ const blocks = cues.map((cue) => {
4171
+ const text = cue.text ?? "";
4172
+ return `${formatVttTime(cue.start)} --> ${formatVttTime(cue.end)}
4173
+ ${text}`;
4174
+ });
4175
+ return `WEBVTT
4176
+
4177
+ ${blocks.join(`
4178
+
4179
+ `)}${blocks.length ? `
4180
+ ` : ""}`;
4181
+ }
4182
+ function renderAss(cues) {
4183
+ const lines = [ASS_HEADER.trimEnd()];
4184
+ for (const cue of cues) {
4185
+ const text = plainToAssText(cue.text ?? "");
4186
+ lines.push(`Dialogue: 0,${formatAssTime(cue.start)},${formatAssTime(cue.end)},Default,,0,0,0,,${text}`);
4187
+ }
4188
+ return `${lines.join(`
4189
+ `)}
4190
+ `;
4191
+ }
4192
+ function parseByExt(ext, content) {
4193
+ switch (ext.toLowerCase()) {
4194
+ case ".srt":
4195
+ return parseSrtVtt(content);
4196
+ case ".vtt":
4197
+ return parseSrtVtt(content);
4198
+ case ".ass":
4199
+ return parseAss(content);
4200
+ default:
4201
+ return [];
4003
4202
  }
4203
+ }
4204
+ function renderByExt(ext, cues) {
4205
+ switch (ext.toLowerCase()) {
4206
+ case ".srt":
4207
+ return renderSrt(cues);
4208
+ case ".vtt":
4209
+ return renderVtt(cues);
4210
+ case ".ass":
4211
+ return renderAss(cues);
4212
+ default:
4213
+ return "";
4214
+ }
4215
+ }
4216
+
4217
+ // src/utils/vtt-parser.ts
4218
+ function parseVtt(content, filePath) {
4219
+ const ext = path.extname(filePath);
4220
+ const captionCues = parseByExt(ext, content);
4221
+ const cues = captionCues.map((cue, idx) => ({
4222
+ index: idx,
4223
+ startTime: formatVttTime(cue.start),
4224
+ endTime: formatVttTime(cue.end),
4225
+ startSeconds: cue.start / 1000,
4226
+ endSeconds: cue.end / 1000,
4227
+ text: cue.text
4228
+ }));
4004
4229
  return {
4005
4230
  filePath,
4006
4231
  cues,
4007
4232
  rawContent: content
4008
4233
  };
4009
4234
  }
4010
- function formatVtt(vttFile) {
4011
- const lines = ["WEBVTT", ""];
4012
- for (const cue of vttFile.cues) {
4013
- lines.push(`${cue.startTime} --> ${cue.endTime}`);
4014
- lines.push(cue.text);
4015
- lines.push("");
4016
- }
4017
- return lines.join(`
4018
- `);
4019
- }
4020
4235
  function formatCuesForDisplay(vttFile) {
4021
4236
  return vttFile.cues.map((cue) => `[${cue.index}] ${cue.startTime} --> ${cue.endTime}
4022
4237
  ${cue.text}`).join(`
@@ -4031,18 +4246,18 @@ function getCachedVttFile() {
4031
4246
  }
4032
4247
  var readVttTool = {
4033
4248
  name: "read_vtt_file",
4034
- description: "讀取並解析 VTT 字幕檔案,回傳所有字幕的時間戳和內容。",
4249
+ description: "讀取並解析字幕檔案 (支援 .vtt, .srt, .ass),回傳所有字幕的時間戳和內容。",
4035
4250
  parameters: {
4036
4251
  type: "object",
4037
4252
  properties: {
4038
- filePath: { type: "string", description: "VTT 檔案的完整路徑" }
4253
+ filePath: { type: "string", description: "字幕檔案的完整路徑" }
4039
4254
  },
4040
4255
  required: ["filePath"]
4041
4256
  },
4042
4257
  handler: async ({ filePath }) => {
4043
4258
  try {
4044
4259
  console.log(`[DEBUG] readVttTool: Starting to read ${filePath}`);
4045
- const absolutePath = path.resolve(filePath);
4260
+ const absolutePath = path2.resolve(filePath);
4046
4261
  if (!fs.existsSync(absolutePath)) {
4047
4262
  console.log(`[DEBUG] readVttTool: File not found ${absolutePath}`);
4048
4263
  return { success: false, error: `檔案不存在: ${absolutePath}` };
@@ -4096,7 +4311,7 @@ var getVttCuesTool = {
4096
4311
  };
4097
4312
  // src/tools/write-file.ts
4098
4313
  import * as fs2 from "fs";
4099
- import * as path2 from "path";
4314
+ import * as path3 from "path";
4100
4315
  var writeFileTool = {
4101
4316
  name: "write_file",
4102
4317
  description: "將內容寫入到指定的檔案中。用於產生修正清單、章節資訊、總結等輸出檔案。",
@@ -4111,8 +4326,8 @@ var writeFileTool = {
4111
4326
  },
4112
4327
  handler: async ({ filePath, content, encoding = "utf-8" }) => {
4113
4328
  try {
4114
- const absolutePath = path2.resolve(filePath);
4115
- const dir = path2.dirname(absolutePath);
4329
+ const absolutePath = path3.resolve(filePath);
4330
+ const dir = path3.dirname(absolutePath);
4116
4331
  if (!fs2.existsSync(dir)) {
4117
4332
  fs2.mkdirSync(dir, { recursive: true });
4118
4333
  }
@@ -4140,7 +4355,7 @@ var appendFileTool = {
4140
4355
  },
4141
4356
  handler: async ({ filePath, content }) => {
4142
4357
  try {
4143
- const absolutePath = path2.resolve(filePath);
4358
+ const absolutePath = path3.resolve(filePath);
4144
4359
  fs2.appendFileSync(absolutePath, content, "utf-8");
4145
4360
  return {
4146
4361
  success: true,
@@ -4154,7 +4369,7 @@ var appendFileTool = {
4154
4369
  };
4155
4370
  // src/tools/update-vtt.ts
4156
4371
  import * as fs3 from "fs";
4157
- import * as path3 from "path";
4372
+ import * as path4 from "path";
4158
4373
  var updateCueTool = {
4159
4374
  name: "update_cue",
4160
4375
  description: "更新指定索引的字幕內容。用於校正字幕中的錯字。",
@@ -4233,7 +4448,7 @@ var batchUpdateCuesTool = {
4233
4448
  };
4234
4449
  var saveVttTool = {
4235
4450
  name: "save_vtt",
4236
- description: "將修改後的 VTT 字幕儲存到檔案。",
4451
+ description: "將修改後的字幕儲存到檔案 (支援 .vtt, .srt, .ass)。",
4237
4452
  parameters: {
4238
4453
  type: "object",
4239
4454
  properties: {
@@ -4247,8 +4462,14 @@ var saveVttTool = {
4247
4462
  return { success: false, error: "請先使用 read_vtt_file 載入 VTT 檔案" };
4248
4463
  }
4249
4464
  try {
4250
- const targetPath = outputPath ? path3.resolve(outputPath) : vttFile.filePath;
4251
- const content = formatVtt(vttFile);
4465
+ const targetPath = outputPath ? path4.resolve(outputPath) : vttFile.filePath;
4466
+ const ext = path4.extname(targetPath);
4467
+ const captionCues = vttFile.cues.map((c) => ({
4468
+ start: c.startSeconds * 1000,
4469
+ end: c.endSeconds * 1000,
4470
+ text: c.text
4471
+ }));
4472
+ const content = renderByExt(ext, captionCues);
4252
4473
  fs3.writeFileSync(targetPath, content, "utf-8");
4253
4474
  return {
4254
4475
  success: true,
@@ -4348,23 +4569,49 @@ async function sendAndWait(session, prompt, eventLogger) {
4348
4569
  });
4349
4570
  });
4350
4571
  }
4572
+ function isCopilotTimeoutError(error) {
4573
+ if (!(error instanceof Error)) {
4574
+ return false;
4575
+ }
4576
+ return error.message.includes("Timeout: 等待 Copilot 回應超過 120 秒");
4577
+ }
4578
+ async function sendAndWaitWithRetry(session, prompt, eventLogger, maxRetries = 5) {
4579
+ let attempt = 0;
4580
+ while (true) {
4581
+ try {
4582
+ return await sendAndWait(session, prompt, eventLogger);
4583
+ } catch (error) {
4584
+ if (!isCopilotTimeoutError(error) || attempt >= maxRetries) {
4585
+ throw error;
4586
+ }
4587
+ attempt += 1;
4588
+ const waitMs = Math.min(2000 * attempt, 1e4);
4589
+ console.warn(`發生 Copilot Timeout,將在 ${waitMs / 1000} 秒後重試 (${attempt}/${maxRetries})...`);
4590
+ await new Promise((resolve5) => setTimeout(resolve5, waitMs));
4591
+ }
4592
+ }
4593
+ }
4351
4594
  async function runSubtitleCorrectionAgent(config) {
4352
- const { vttFilePath } = config;
4353
- const absolutePath = path4.resolve(vttFilePath);
4354
- const dir = config.outputDir || path4.dirname(absolutePath);
4355
- const baseName = path4.basename(absolutePath, path4.extname(absolutePath));
4595
+ const { vttFilePath, overwrite, aiProvider } = config;
4596
+ const absolutePath = path5.resolve(vttFilePath);
4597
+ const dir = config.outputDir || path5.dirname(absolutePath);
4598
+ const originalExt = path5.extname(absolutePath);
4599
+ const baseName = path5.basename(absolutePath, originalExt);
4600
+ const outputBaseName = overwrite ? baseName : `${baseName}-corrected`;
4601
+ const correctedVttPath = path5.join(dir, `${outputBaseName}${originalExt}`);
4602
+ const correctionsPath = path5.join(dir, `${outputBaseName}-corrections.txt`);
4603
+ const chaptersPath = path5.join(dir, `${outputBaseName}-chapters.txt`);
4604
+ const summaryPath = path5.join(dir, `${outputBaseName}-summary.txt`);
4356
4605
  const debugEvents = isDebugEnabled();
4357
- const eventLogPath = debugEvents ? path4.join(dir, `${baseName}-copilot-events.json`) : undefined;
4606
+ const eventLogPath = debugEvents ? path5.join(dir, `${baseName}-copilot-events.json`) : undefined;
4358
4607
  const eventLogger = eventLogPath ? new EventLogger(eventLogPath) : undefined;
4359
- const correctedVttPath = path4.join(dir, `${baseName}-corrected.vtt`);
4360
- const correctionsPath = path4.join(dir, `${baseName}-corrections.txt`);
4361
- const chaptersPath = path4.join(dir, `${baseName}-chapters.txt`);
4362
- const summaryPath = path4.join(dir, `${baseName}-summary.txt`);
4363
4608
  console.log("=".repeat(60));
4364
4609
  console.log("字幕校正代理人 - 啟動");
4365
4610
  console.log("=".repeat(60));
4366
4611
  console.log(`輸入檔案: ${absolutePath}`);
4367
4612
  console.log(`輸出目錄: ${dir}`);
4613
+ console.log(`覆寫模式: ${overwrite ? "開啟" : "關閉"}`);
4614
+ console.log(`AI Provider: ${aiProvider || process.env.AI_PROVIDER || "(Auto Detect)"}`);
4368
4615
  console.log("=".repeat(60));
4369
4616
  if (eventLogPath) {
4370
4617
  console.log(`偵錯: Copilot events 記錄到 ${eventLogPath}`);
@@ -4375,6 +4622,9 @@ async function runSubtitleCorrectionAgent(config) {
4375
4622
  const azureEndpoint = process.env.AZURE_OPENAI_ENDPOINT;
4376
4623
  const azureApiKey = process.env.AZURE_OPENAI_API_KEY;
4377
4624
  const azureDeployment = process.env.AZURE_OPENAI_DEPLOYMENT;
4625
+ const openaiApiKey = process.env.OPENAI_API_KEY;
4626
+ const openaiApiBase = process.env.OPENAI_API_BASE;
4627
+ const selectedProvider = aiProvider || process.env.AI_PROVIDER;
4378
4628
  const tools = [
4379
4629
  readVttTool,
4380
4630
  getVttCuesTool,
@@ -4385,8 +4635,11 @@ async function runSubtitleCorrectionAgent(config) {
4385
4635
  saveVttTool
4386
4636
  ];
4387
4637
  let session;
4388
- if (azureEndpoint && azureApiKey && azureDeployment) {
4638
+ if (selectedProvider === "azure" || !selectedProvider && azureEndpoint && azureApiKey && azureDeployment) {
4389
4639
  console.log("使用 Azure OpenAI 作為模型提供者");
4640
+ if (!azureEndpoint || !azureApiKey || !azureDeployment) {
4641
+ throw new Error("選擇了 Azure Provider 但未設定 AZURE_OPENAI_* 環境變數");
4642
+ }
4390
4643
  session = await client.createSession({
4391
4644
  model: azureDeployment,
4392
4645
  streaming: true,
@@ -4400,16 +4653,31 @@ async function runSubtitleCorrectionAgent(config) {
4400
4653
  },
4401
4654
  tools
4402
4655
  });
4656
+ } else if (selectedProvider === "openai" || !selectedProvider && openaiApiKey) {
4657
+ console.log("使用 OpenAI 作為模型提供者");
4658
+ if (!openaiApiKey) {
4659
+ throw new Error("選擇了 OpenAI Provider 但未設定 OPENAI_API_KEY 環境變數");
4660
+ }
4661
+ session = await client.createSession({
4662
+ model: "gpt-4o",
4663
+ streaming: true,
4664
+ provider: {
4665
+ type: "openai",
4666
+ apiKey: openaiApiKey,
4667
+ baseUrl: openaiApiBase || "https://api.openai.com/v1"
4668
+ },
4669
+ tools
4670
+ });
4403
4671
  } else {
4404
- console.log("未找到完整的 Azure 配置,將使用預設的 GitHub Copilot 認證");
4672
+ console.log("使用 GitHub Copilot 作為模型提供者");
4405
4673
  session = await client.createSession({
4406
4674
  tools
4407
4675
  });
4408
4676
  }
4409
4677
  console.log(`
4410
- 【步驟 1/7】讀取 VTT 檔案並分析主題方向...
4678
+ 【步驟 1/7】讀取字幕檔案並分析主題方向...
4411
4679
  `);
4412
- await sendAndWait(session, `請使用 read_vtt_file 工具讀取 VTT 檔案: "${absolutePath}"
4680
+ await sendAndWaitWithRetry(session, `請使用 read_vtt_file 工具讀取字幕檔案: "${absolutePath}"
4413
4681
 
4414
4682
  讀取完成後,請仔細閱讀整個字幕內容,分析並整理出:
4415
4683
  1. 這部影片的主要談論內容是什麼?
@@ -4417,60 +4685,133 @@ async function runSubtitleCorrectionAgent(config) {
4417
4685
  3. 這是什麼類型的影片?(教學、訪談、講座、會議等)
4418
4686
 
4419
4687
  請給出簡潔但完整的分析。`, eventLogger);
4688
+ const initialVtt = getCachedVttFile();
4689
+ if (!initialVtt) {
4690
+ throw new Error("無法取得字幕檔案內容");
4691
+ }
4692
+ const originalCues = JSON.parse(JSON.stringify(initialVtt.cues));
4420
4693
  console.log(`
4421
4694
  【步驟 2/7】整理主題相關的關鍵字與專有名詞...
4422
4695
  `);
4423
- await sendAndWait(session, `根據你剛才分析出的影片主題方向,請整理出這個主題領域中常見的:
4696
+ await sendAndWaitWithRetry(session, `根據你剛才分析出的影片主題方向,請整理出這個主題領域中常見的:
4424
4697
  1. 專業術語與關鍵字
4425
4698
  2. 專有名詞(人名、產品名、技術名稱等)
4426
4699
  3. 常見的縮寫與其全稱
4427
4700
 
4428
4701
  【重要】請不要參考字幕的實際內容來整理這些詞彙(因為字幕可能有轉錄錯誤)。
4429
4702
  請根據你對這個主題領域的知識,列出這些詞彙的「正確寫法」,這將用於後續的字幕校正。`, eventLogger);
4703
+ const CHUNK_SIZE = config.chunkSize || 100;
4704
+ const totalCues = initialVtt.cues.length;
4705
+ const totalChunks = Math.ceil(totalCues / CHUNK_SIZE);
4706
+ const tempPass1Path = path5.join(dir, `${baseName}-temp-pass1${originalExt}`);
4707
+ console.log(`
4708
+ 【步驟 3-4 (Phase 1)/7】第一階段校正:修正聽寫與拼寫錯誤...
4709
+ `);
4710
+ console.log(`總共有 ${totalCues} 個字幕片段,將分為 ${totalChunks} 個批次進行處理。
4711
+ `);
4712
+ for (let i = 0;i < totalChunks; i++) {
4713
+ const startIdx = i * CHUNK_SIZE;
4714
+ const endIdx = Math.min((i + 1) * CHUNK_SIZE, totalCues);
4715
+ const currentChunk = getCachedVttFile().cues.slice(startIdx, endIdx);
4716
+ console.log(`[Phase 1] 正在處理第 ${i + 1}/${totalChunks} 批次 (索引 ${startIdx} - ${endIdx - 1})...`);
4717
+ const chunkContent = currentChunk.map((c) => `[${c.index}] ${c.startTime} --> ${c.endTime}
4718
+ ${c.text}`).join(`
4719
+
4720
+ `);
4721
+ await sendAndWaitWithRetry(session, `這是第一階段校正,第 ${i + 1}/${totalChunks} 批次 (索引 ${startIdx} 到 ${endIdx - 1})。
4722
+
4723
+ ${chunkContent}
4724
+
4725
+ 請根據主題上下文,修正上述字幕中的**聽寫錯誤**與**錯別字**。
4726
+
4727
+ 原則:
4728
+ 1. **僅修正錯誤**:專注於同音異字(如「再/在」)、拼寫錯誤、或明顯的轉錄失誤。
4729
+ 2. **保留原意**:嚴禁重寫句子或加入不存在的資訊。
4730
+ 3. **術語一致**:參考之前的關鍵字列表,確保專有名詞正確。
4731
+
4732
+ 請使用 batch_update_cues 工具進行修正。如果沒有錯誤則無需操作。`, eventLogger);
4733
+ }
4734
+ console.log(`
4735
+ [Phase 1] 完成,儲存暫存檔至: ${tempPass1Path}`);
4736
+ await sendAndWaitWithRetry(session, `請使用 save_vtt 工具將目前進度儲存到 "${tempPass1Path}"`, eventLogger);
4430
4737
  console.log(`
4431
- 【步驟 3-4/7】分析字幕正確性並進行校正...
4738
+ 【步驟 4.5 (Phase 2)/7】第二階段校正:檢查一致性與遺漏...
4432
4739
  `);
4433
- await sendAndWait(session, `現在請逐一分析已載入的 VTT 字幕檔案中每一段 Cue 的正確性。
4740
+ console.log(`正在讀取第一階段校正結果,準備進行複查...
4741
+ `);
4742
+ for (let i = 0;i < totalChunks; i++) {
4743
+ const startIdx = i * CHUNK_SIZE;
4744
+ const endIdx = Math.min((i + 1) * CHUNK_SIZE, totalCues);
4745
+ const currentChunk = getCachedVttFile().cues.slice(startIdx, endIdx);
4746
+ console.log(`[Phase 2] 正在複查第 ${i + 1}/${totalChunks} 批次 (索引 ${startIdx} - ${endIdx - 1})...`);
4747
+ const chunkContent = currentChunk.map((c) => `[${c.index}] ${c.startTime} --> ${c.endTime}
4748
+ ${c.text}`).join(`
4749
+
4750
+ `);
4751
+ await sendAndWaitWithRetry(session, `這是第二階段複查,第 ${i + 1}/${totalChunks} 批次 (索引 ${startIdx} 到 ${endIdx - 1})。
4752
+ 這些字幕已經經過初步校正。
4434
4753
 
4435
- 對於每個 Cue:
4436
- 1. 檢查是否有潛在的錯字、同音字錯誤、轉錄錯誤
4437
- 2. 根據上下文和你整理出的正確關鍵字/專有名詞,推理出正確的文字
4438
- 3. 如果需要校正,請使用 batch_update_cues 工具批次更新校正後的字幕
4754
+ ${chunkContent}
4439
4755
 
4440
- 請特別注意:
4441
- - 專業術語和專有名詞的正確性
4442
- - 同音異義字的正確使用
4443
- - 語句的通順和邏輯性
4444
- - 標點符號的使用
4756
+ 請進行最後檢查:
4757
+ 1. **檢查遺漏**:是否有第一階段漏掉的明顯錯誤?
4758
+ 2. **檢查一致性**:專有名詞的使用是否前後一致?
4759
+ 3. **檢查過度校正**:確認第一階段沒有改變原意或產生幻覺(若有,請改回原意)。
4445
4760
 
4446
- 處理完成後,請報告發現了多少個需要校正的問題。`, eventLogger);
4761
+ 請使用 batch_update_cues 工具修正任何殘留問題。若完美則無需操作。`, eventLogger);
4762
+ }
4447
4763
  console.log(`
4448
- 【步驟 5/7】產生修正清單...
4764
+ 所有批次複查完成。
4449
4765
  `);
4450
- await sendAndWait(session, `請整理所有剛才進行的字幕校正,使用 write_file 工具將修正清單寫入到:
4451
- "${correctionsPath}"
4452
-
4453
- 修正清單的格式應該包含:
4454
- 1. 標題:字幕修正清單
4455
- 2. 修正日期與時間
4456
- 3. 原始檔案路徑
4457
- 4. 修正統計(總共修正了多少處)
4458
- 5. 每一筆修正的詳細資訊:
4459
- - Cue 索引與時間戳
4460
- - 原始文字
4461
- - 校正後文字
4462
- - 修正原因說明
4463
-
4464
- 請以清晰易讀的格式呈現。`, eventLogger);
4465
4766
  console.log(`
4466
- 【儲存校正後的 VTT 檔案】
4767
+ 【步驟 5/7】產生修正清單 (自動比對)...
4768
+ `);
4769
+ const finalCues = getCachedVttFile().cues;
4770
+ let correctionsCount = 0;
4771
+ let correctionsContent = `字幕修正清單
4772
+ 產生日期: ${new Date().toLocaleString()}
4773
+ 原始檔案: ${absolutePath}
4774
+
4775
+ `;
4776
+ correctionsContent += `| 索引 | 時間 | 原始文字 | 校正後文字 |
4777
+ `;
4778
+ correctionsContent += `| --- | --- | --- | --- |
4779
+ `;
4780
+ for (let i = 0;i < finalCues.length; i++) {
4781
+ const original = originalCues[i];
4782
+ const final = finalCues[i];
4783
+ if (original.text !== final.text) {
4784
+ correctionsCount++;
4785
+ const oldText = original.text.replace(/\n/g, " ");
4786
+ const newText = final.text.replace(/\n/g, " ");
4787
+ correctionsContent += `| ${final.index} | ${final.startTime} | ${oldText} | ${newText} |
4788
+ `;
4789
+ }
4790
+ }
4791
+ correctionsContent = `總共修正: ${correctionsCount} 處
4792
+
4793
+ ` + correctionsContent;
4794
+ fs4.writeFileSync(correctionsPath, correctionsContent, "utf-8");
4795
+ console.log(`修正清單已寫入: ${correctionsPath} (共 ${correctionsCount} 處修正)`);
4796
+ console.log(`
4797
+ 【儲存校正後的字幕檔案】
4467
4798
  `);
4468
- await sendAndWait(session, `請使用 save_vtt 工具將校正後的字幕儲存到:
4469
- "${correctedVttPath}"`, eventLogger);
4799
+ const vttFile = getCachedVttFile();
4800
+ const captionCues = vttFile.cues.map((c) => ({
4801
+ start: c.startSeconds * 1000,
4802
+ end: c.endSeconds * 1000,
4803
+ text: c.text
4804
+ }));
4805
+ for (const ext of SUPPORTED_EXTS) {
4806
+ const targetPath = path5.join(dir, `${outputBaseName}${ext}`);
4807
+ const content = renderByExt(ext, captionCues);
4808
+ fs4.writeFileSync(targetPath, content, "utf-8");
4809
+ console.log(`已儲存: ${targetPath}`);
4810
+ }
4470
4811
  console.log(`
4471
4812
  【步驟 6/7】建立分段章節...
4472
4813
  `);
4473
- await sendAndWait(session, `你現在是一個 **Video Chapter Segmenter**,負責分析字幕內容並將影片分割成清晰、合理的章節。
4814
+ await sendAndWaitWithRetry(session, `你現在是一個 **Video Chapter Segmenter**,負責分析字幕內容並將影片分割成清晰、合理的章節。
4474
4815
 
4475
4816
  ## 分析準則
4476
4817
  - **主題轉換**: 識別主題或討論重點的變化
@@ -4486,8 +4827,8 @@ async function runSubtitleCorrectionAgent(config) {
4486
4827
  - **風格一致**: 所有標題保持統一的格式和語調
4487
4828
 
4488
4829
  ## 輸出格式要求
4489
- - **嚴格格式**: \`HH:mm:ss 章節標題\`
4490
- - **第一章節**: 必須以 \`00:00:00 開始\` 開頭
4830
+ - **嚴格格式**:
4831
+ - **第一章節**: 必須以
4491
4832
  - **時間精確度**: 使用字幕檔案中的精確時間戳
4492
4833
  - **每行一章**: 每個章節獨立一行
4493
4834
  - **無額外格式**: 不使用項目符號、編號或特殊字元
@@ -4499,7 +4840,6 @@ async function runSubtitleCorrectionAgent(config) {
4499
4840
  - **完整覆蓋**: 確保從頭到尾完整覆蓋影片
4500
4841
 
4501
4842
  ## 輸出範例
4502
- \`\`\`
4503
4843
  00:00:00 開始
4504
4844
  00:00:16 開發過程的混亂
4505
4845
  00:00:28 規格驅動開發的啟發
@@ -4508,7 +4848,6 @@ async function runSubtitleCorrectionAgent(config) {
4508
4848
  00:01:38 團隊協作的重要性
4509
4849
  00:02:04 從混沌到清晰的轉變
4510
4850
  00:02:29 終章:持續改進的承諾
4511
- \`\`\`
4512
4851
 
4513
4852
  請根據校正後的字幕內容,深入分析並自動建立分段章節。
4514
4853
  使用 write_file 工具將章節資訊寫入到:
@@ -4518,7 +4857,7 @@ async function runSubtitleCorrectionAgent(config) {
4518
4857
  console.log(`
4519
4858
  【步驟 7/7】產生內容總結...
4520
4859
  `);
4521
- await sendAndWait(session, `請根據校正後的字幕內容,用一段話總結這部影片的主要內容。
4860
+ await sendAndWaitWithRetry(session, `請根據校正後的字幕內容,用一段話總結這部影片的主要內容。
4522
4861
 
4523
4862
  總結應該:
4524
4863
  1. 概括影片的核心主題
@@ -4534,7 +4873,7 @@ async function runSubtitleCorrectionAgent(config) {
4534
4873
  console.log("=".repeat(60));
4535
4874
  console.log("字幕校正完成!");
4536
4875
  console.log("=".repeat(60));
4537
- console.log(`校正後字幕: ${correctedVttPath}`);
4876
+ console.log(`校正後字幕 (Primary): ${correctedVttPath}`);
4538
4877
  console.log(`修正清單: ${correctionsPath}`);
4539
4878
  console.log(`章節資訊: ${chaptersPath}`);
4540
4879
  console.log(`內容總結: ${summaryPath}`);
@@ -4564,41 +4903,40 @@ function printUsage() {
4564
4903
  ========================
4565
4904
 
4566
4905
  使用方式:
4567
- subtitle-correction-agent <vtt-file-path> [options]
4906
+ subtitle-correction-agent <file-path> [options]
4568
4907
  subtitle-correction-agent --version
4569
4908
 
4570
4909
  參數:
4571
- <vtt-file-path> VTT 字幕檔案的路徑 (必須)
4910
+ <file-path> 字幕檔案的路徑 (支援 .vtt, .srt, .ass)
4572
4911
 
4573
4912
  選項:
4574
- --output-dir <dir> 指定輸出目錄 (預設為 VTT 檔案所在目錄)
4575
- --version, -v 顯示版本與環境資訊
4576
- --help, -h 顯示此說明
4913
+ --output-dir <dir> 指定輸出目錄 (預設為字幕檔案所在目錄)
4914
+ --chunk-size <number> 指定分段處理的大小 (預設: 100)
4915
+ --overwrite 直接覆寫原始檔案 (僅限 Git 專案且無未提交變更時可用)
4916
+ --ai-provider <name> 指定 AI 提供者 (azure, openai, github,預設優先使用環境變數設定)
4917
+ --version, -v 顯示版本與環境資訊
4918
+ --help, -h 顯示此說明
4577
4919
 
4578
4920
  範例:
4579
4921
  subtitle-correction-agent ./video.vtt
4922
+ subtitle-correction-agent ./video.srt --overwrite
4923
+ subtitle-correction-agent ./video.ass --ai-provider azure
4580
4924
  subtitle-correction-agent ./video.vtt --output-dir ./output
4581
- subtitle-correction-agent --version
4582
- npx @willh/subtitle-correction-agent ./video.vtt
4583
- subtitle-correction-agent "C:\\Videos\\lecture.vtt"
4584
-
4585
- 輸出檔案:
4586
- {filename}-corrected.vtt 校正後的字幕檔案
4587
- {filename}-corrections.txt 修正清單
4588
- {filename}-chapters.txt 章節資訊
4589
- {filename}-summary.txt 內容總結
4590
4925
 
4591
4926
  環境變數:
4592
4927
  AZURE_OPENAI_ENDPOINT Azure OpenAI 端點 URL
4593
4928
  AZURE_OPENAI_API_KEY Azure OpenAI API 金鑰
4594
4929
  AZURE_OPENAI_DEPLOYMENT Azure OpenAI 部署名稱
4595
- AZURE_OPENAI_API_VERSION API 版本 (預設: 2024-10-21)
4930
+ OPENAI_API_KEY OpenAI API 金鑰
4931
+ OPENAI_API_BASE OpenAI API Base URL (選填,可用於 Ollama)
4932
+ AI_PROVIDER 預設 AI 提供者 (azure, openai, github)
4596
4933
  `);
4597
4934
  }
4598
4935
  function parseArgs(args) {
4599
4936
  const result = {
4600
4937
  help: false,
4601
- version: false
4938
+ version: false,
4939
+ overwrite: false
4602
4940
  };
4603
4941
  for (let i = 0;i < args.length; i++) {
4604
4942
  const arg = args[i];
@@ -4608,6 +4946,15 @@ function parseArgs(args) {
4608
4946
  result.version = true;
4609
4947
  } else if (arg === "--output-dir" && i + 1 < args.length) {
4610
4948
  result.outputDir = args[++i];
4949
+ } else if (arg === "--chunk-size" && i + 1 < args.length) {
4950
+ const val = parseInt(args[++i], 10);
4951
+ if (!isNaN(val) && val > 0) {
4952
+ result.chunkSize = val;
4953
+ }
4954
+ } else if (arg === "--overwrite") {
4955
+ result.overwrite = true;
4956
+ } else if (arg === "--ai-provider" && i + 1 < args.length) {
4957
+ result.aiProvider = args[++i];
4611
4958
  } else if (!arg.startsWith("-") && !result.vttFilePath) {
4612
4959
  result.vttFilePath = arg;
4613
4960
  }
@@ -4616,13 +4963,13 @@ function parseArgs(args) {
4616
4963
  }
4617
4964
  function resolveProjectRoot() {
4618
4965
  const currentFile = fileURLToPath(import.meta.url);
4619
- let dir = path5.dirname(currentFile);
4966
+ let dir = path6.dirname(currentFile);
4620
4967
  while (true) {
4621
- const candidate = path5.join(dir, "package.json");
4968
+ const candidate = path6.join(dir, "package.json");
4622
4969
  if (fs5.existsSync(candidate)) {
4623
4970
  return dir;
4624
4971
  }
4625
- const parent = path5.dirname(dir);
4972
+ const parent = path6.dirname(dir);
4626
4973
  if (parent === dir) {
4627
4974
  return null;
4628
4975
  }
@@ -4641,7 +4988,7 @@ function resolveAppVersion(projectRoot) {
4641
4988
  if (!projectRoot) {
4642
4989
  return "未知";
4643
4990
  }
4644
- const pkgJson = readJsonFile(path5.join(projectRoot, "package.json"));
4991
+ const pkgJson = readJsonFile(path6.join(projectRoot, "package.json"));
4645
4992
  if (pkgJson && typeof pkgJson.version === "string") {
4646
4993
  return pkgJson.version;
4647
4994
  }
@@ -4651,12 +4998,12 @@ function resolveCopilotSdkVersion(projectRoot) {
4651
4998
  if (!projectRoot) {
4652
4999
  return "未知";
4653
5000
  }
4654
- const modulePath = path5.join(projectRoot, "node_modules", "@github", "copilot-sdk", "package.json");
5001
+ const modulePath = path6.join(projectRoot, "node_modules", "@github", "copilot-sdk", "package.json");
4655
5002
  const moduleJson = readJsonFile(modulePath);
4656
5003
  if (moduleJson && typeof moduleJson.version === "string") {
4657
5004
  return moduleJson.version;
4658
5005
  }
4659
- const pkgJson = readJsonFile(path5.join(projectRoot, "package.json"));
5006
+ const pkgJson = readJsonFile(path6.join(projectRoot, "package.json"));
4660
5007
  if (!pkgJson) {
4661
5008
  return "未知";
4662
5009
  }
@@ -4712,14 +5059,9 @@ AZURE_OPENAI_ENDPOINT: ${endpoint ? formatEnvValue(endpoint) : "未設定"}
4712
5059
  AZURE_OPENAI_API_KEY: ${maskSecret(apiKey)}
4713
5060
  AZURE_OPENAI_DEPLOYMENT: ${formatEnvValue(deployment)}
4714
5061
  AZURE_OPENAI_API_VERSION: ${formatEnvValue(process.env.AZURE_OPENAI_API_VERSION)}
4715
- TELEGRAM_BOT_TOKEN: ${maskSecret(process.env.TELEGRAM_BOT_TOKEN)}
4716
- TELEGRAM_STARTUP_CHAT_ID: ${formatEnvValue(process.env.TELEGRAM_STARTUP_CHAT_ID)}
4717
- OWNER_CHAT_ID: ${formatEnvValue(process.env.OWNER_CHAT_ID)}
4718
- BOT_MAX_CONCURRENT: ${formatEnvValue(process.env.BOT_MAX_CONCURRENT)}
4719
- KEEP_TEMP_FILES: ${formatEnvValue(process.env.KEEP_TEMP_FILES)}
4720
- BOT_LOG_LEVEL: ${formatEnvValue(process.env.BOT_LOG_LEVEL)}
4721
- BOT_DEBUG: ${formatEnvValue(process.env.BOT_DEBUG)}
4722
- BOT_DATA_DIR: ${formatEnvValue(process.env.BOT_DATA_DIR)}
5062
+ OPENAI_API_KEY: ${maskSecret(process.env.OPENAI_API_KEY)}
5063
+ OPENAI_API_BASE: ${formatEnvValue(process.env.OPENAI_API_BASE)}
5064
+ AI_PROVIDER: ${formatEnvValue(process.env.AI_PROVIDER)}
4723
5065
 
4724
5066
  Copilot SDK
4725
5067
  -----------
@@ -4732,6 +5074,24 @@ Provider.apiVersion: ${apiVersion}
4732
5074
  Provider.apiKey: ${maskSecret(apiKey)}` : ""}
4733
5075
  `);
4734
5076
  }
5077
+ function checkGitStatus(dir) {
5078
+ try {
5079
+ execSync("git rev-parse --is-inside-work-tree", { cwd: dir, stdio: "ignore" });
5080
+ } catch {
5081
+ throw new Error("使用 --overwrite 參數時,目標目錄必須受 Git 版本控制");
5082
+ }
5083
+ try {
5084
+ const status = execSync("git status --porcelain", { cwd: dir, encoding: "utf-8" });
5085
+ if (status.trim().length > 0) {
5086
+ throw new Error("使用 --overwrite 參數時,工作目錄必須是乾淨的 (請先 Commit 或 Stash 您的變更)");
5087
+ }
5088
+ } catch (error) {
5089
+ if (error.message.includes("工作目錄必須是乾淨的")) {
5090
+ throw error;
5091
+ }
5092
+ throw new Error("無法檢查 Git 狀態,請確認 git 指令可正常執行");
5093
+ }
5094
+ }
4735
5095
  async function main() {
4736
5096
  const args = parseArgs(process.argv.slice(2));
4737
5097
  if (args.version) {
@@ -4742,21 +5102,39 @@ async function main() {
4742
5102
  printUsage();
4743
5103
  process.exit(args.help ? 0 : 1);
4744
5104
  }
4745
- const vttFilePath = path5.resolve(args.vttFilePath);
5105
+ const vttFilePath = path6.resolve(args.vttFilePath);
4746
5106
  if (!fs5.existsSync(vttFilePath)) {
4747
5107
  console.error(`錯誤: 找不到檔案 "${vttFilePath}"`);
4748
5108
  process.exit(1);
4749
5109
  }
4750
- if (!vttFilePath.toLowerCase().endsWith(".vtt")) {
4751
- console.error(`錯誤: 檔案必須是 .vtt 格式`);
5110
+ const ext = path6.extname(vttFilePath).toLowerCase();
5111
+ if (!SUPPORTED_EXTS.has(ext)) {
5112
+ console.error(`錯誤: 不支援的檔案格式 "${ext}"。支援的格式: ${Array.from(SUPPORTED_EXTS).join(", ")}`);
4752
5113
  process.exit(1);
4753
5114
  }
4754
- const vttDir = path5.dirname(vttFilePath);
5115
+ process.on("SIGINT", () => {
5116
+ console.log(`
5117
+
5118
+ 收到中斷訊號,正在停止程式...`);
5119
+ process.exit(130);
5120
+ });
5121
+ const vttDir = path6.dirname(vttFilePath);
5122
+ if (args.overwrite) {
5123
+ try {
5124
+ checkGitStatus(vttDir);
5125
+ } catch (error) {
5126
+ console.error(`錯誤: ${error.message}`);
5127
+ process.exit(1);
5128
+ }
5129
+ }
4755
5130
  process.chdir(vttDir);
4756
5131
  console.log(`工作目錄: ${vttDir}`);
4757
5132
  const result = await runSubtitleCorrectionAgent({
4758
5133
  vttFilePath,
4759
- outputDir: args.outputDir
5134
+ outputDir: args.outputDir,
5135
+ chunkSize: args.chunkSize,
5136
+ overwrite: args.overwrite,
5137
+ aiProvider: args.aiProvider
4760
5138
  });
4761
5139
  if (!result.success) {
4762
5140
  console.error(`錯誤: ${result.error}`);
@@ -4770,7 +5148,7 @@ var isDirectRun = (() => {
4770
5148
  return metaMain;
4771
5149
  }
4772
5150
  const entryPath = fileURLToPath(import.meta.url);
4773
- return Boolean(process.argv[1]) && path5.resolve(process.argv[1]) === path5.resolve(entryPath);
5151
+ return Boolean(process.argv[1]) && path6.resolve(process.argv[1]) === path6.resolve(entryPath);
4774
5152
  })();
4775
5153
  if (isDirectRun) {
4776
5154
  main().catch((error) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@willh/subtitle-correction-agent",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "影片字幕校正代理人 - 使用 GitHub Copilot SDK",
5
5
  "type": "module",
6
6
  "files": [
@@ -9,36 +9,30 @@
9
9
  "bin": {
10
10
  "subtitle-correction-agent": "dist/index.js"
11
11
  },
12
- "main": "dist/index.js",
13
- "scripts": {
14
- "bump": "npm version patch --no-git-tag-version && npm install --package-lock-only",
15
-
16
- "build": "bun build src/index.ts --outdir dist --target node --format esm",
17
- "start": "npm run build && bun run src/index.ts",
18
- "dev": "npm run build && bun run src/index.ts",
19
-
20
- "build:bot": "bun build src/telegram-bot.ts --outdir dist --target node --format esm",
21
- "bot": "npm run build:bot && bun run src/telegram-bot.ts",
22
-
23
- "prepack": "npm run build",
24
- "test": "bun test",
25
- "typecheck": "tsc --noEmit"
26
- },
27
- "keywords": [
28
- "subtitle",
29
- "vtt",
30
- "correction",
31
- "copilot-sdk",
32
- "agent"
33
- ],
34
- "author": "Will 保哥",
35
- "license": "MIT",
36
- "dependencies": {
37
- "@github/copilot-sdk": "^0.1.0",
38
- "telegraf": "^4.16.3"
39
- },
40
- "devDependencies": {
41
- "@types/node": "^20.11.0",
42
- "typescript": "^5.3.3"
12
+ "main": "dist/index.js",
13
+ "scripts": {
14
+ "bump": "npm version patch --no-git-tag-version && npm install --package-lock-only",
15
+ "build": "bun build src/index.ts --outdir dist --target node --format esm",
16
+ "start": "npm run build && bun run src/index.ts",
17
+ "dev": "npm run build && bun run src/index.ts",
18
+ "prepack": "npm run build",
19
+ "test": "bun test",
20
+ "typecheck": "tsc --noEmit"
21
+ },
22
+ "keywords": [
23
+ "subtitle",
24
+ "vtt",
25
+ "correction",
26
+ "copilot-sdk",
27
+ "agent"
28
+ ],
29
+ "author": "Will 保哥",
30
+ "license": "MIT",
31
+ "dependencies": {
32
+ "@github/copilot-sdk": "^0.1.0"
33
+ },
34
+ "devDependencies": {
35
+ "@types/node": "^20.11.0",
36
+ "typescript": "^5.3.3"
37
+ }
43
38
  }
44
- }