@willh/subtitle-correction-agent 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +18 -1
  2. package/dist/index.js +542 -160
  3. package/package.json +2 -8
package/README.md CHANGED
@@ -31,6 +31,13 @@ npx @willh/subtitle-correction-agent <vtt-file-path>
31
31
 
32
32
  ## 環境設定
33
33
 
34
+ 本工具會依環境變數自動選擇 AI 提供者:
35
+
36
+ - 若已完整設定 `AZURE_OPENAI_*` 相關環境變數,則使用 **Azure OpenAI (BYOK)**。
37
+ - 否則使用 **GitHub Copilot CLI (Default)**(需先安裝並完成登入,且終端機可執行 `copilot`)。
38
+
39
+ ### 使用 Azure OpenAI (BYOK)
40
+
34
41
  複製 `.env.example` 為 `.env` 並填入 Azure OpenAI 的設定:
35
42
 
36
43
  ```bash
@@ -46,6 +53,13 @@ cp .env.example .env
46
53
  | `AZURE_OPENAI_DEPLOYMENT` | Azure OpenAI 部署名稱 |
47
54
  | `AZURE_OPENAI_API_VERSION` | API 版本 (預設: 2024-10-21) |
48
55
 
56
+ ### 使用 GitHub Copilot CLI (Default)
57
+
58
+ 若你要使用預設的 GitHub Copilot CLI:
59
+
60
+ - 請不要設定 `AZURE_OPENAI_ENDPOINT` / `AZURE_OPENAI_API_KEY` / `AZURE_OPENAI_DEPLOYMENT`(可不建立 `.env`)。
61
+ - 若已複製 `.env.example`,請把上述三個變數的值清空或移除,避免誤判為 Azure 模式。
62
+
49
63
  ## 使用方式
50
64
 
51
65
  ```bash
@@ -55,6 +69,9 @@ subtitle-correction-agent <vtt-file-path>
55
69
  # 指定輸出目錄
56
70
  subtitle-correction-agent <vtt-file-path> --output-dir <output-directory>
57
71
 
72
+ # 顯示版本與環境資訊
73
+ subtitle-correction-agent --version
74
+
58
75
  # 範例
59
76
  subtitle-correction-agent ./examples/sample.vtt
60
77
  subtitle-correction-agent "C:\Videos\lecture.vtt" --output-dir ./output
@@ -63,7 +80,7 @@ subtitle-correction-agent "C:\Videos\lecture.vtt" --output-dir ./output
63
80
  ## Telegram Bot
64
81
 
65
82
  只要使用者上傳 `.vtt` 檔案,Bot 會自動校正字幕並回傳所有輸出檔案。
66
- 同樣需要設定 Azure OpenAI 的環境變數。
83
+ 目前 Telegram Bot 只支援 Azure OpenAI (BYOK),因此需要設定 Azure OpenAI 的環境變數。
67
84
 
68
85
  ### 環境變數
69
86
 
package/dist/index.js CHANGED
@@ -3270,9 +3270,10 @@ var require_main = __commonJS((exports) => {
3270
3270
  });
3271
3271
 
3272
3272
  // src/index.ts
3273
- import * as path5 from "path";
3273
+ import * as path6 from "path";
3274
3274
  import * as fs5 from "fs";
3275
3275
  import { fileURLToPath } from "url";
3276
+ import { execSync } from "child_process";
3276
3277
 
3277
3278
  // node_modules/@github/copilot-sdk/dist/client.js
3278
3279
  var import_node = __toESM(require_main(), 1);
@@ -3942,81 +3943,295 @@ class CopilotClient {
3942
3943
 
3943
3944
  // src/agent.ts
3944
3945
  import * as fs4 from "fs";
3945
- import * as path4 from "path";
3946
+ import * as path5 from "path";
3946
3947
 
3947
3948
  // src/tools/read-vtt.ts
3948
3949
  import * as fs from "fs";
3949
- import * as path from "path";
3950
+ import * as path2 from "path";
3950
3951
 
3951
3952
  // src/utils/vtt-parser.ts
3952
- function parseTimeToSeconds(timeStr) {
3953
- const parts = timeStr.trim().split(":");
3954
- if (parts.length === 3) {
3955
- const [hours, minutes, seconds] = parts;
3956
- return parseInt(hours, 10) * 3600 + parseInt(minutes, 10) * 60 + parseFloat(seconds);
3957
- } else if (parts.length === 2) {
3958
- const [minutes, seconds] = parts;
3959
- return parseInt(minutes, 10) * 60 + parseFloat(seconds);
3960
- }
3961
- return parseFloat(timeStr);
3953
+ import * as path from "path";
3954
+
3955
+ // src/utils/caption-converter.ts
3956
+ var ASS_HEADER = `[Script Info]
3957
+ Title: Converted from WebVTT
3958
+ ScriptType: v4.00+
3959
+ WrapStyle: 0
3960
+ PlayResX: 1280
3961
+ PlayResY: 720
3962
+ ScaledBorderAndShadow: yes
3963
+
3964
+ [V4+ Styles]
3965
+ Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline,StrikeOut,ScaleX,ScaleY,Spacing,Angle,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,Encoding
3966
+ Style: Default,LINE Seed TW_OTF Bold,48,&H0080FFFF,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,1,0,1,2,0,2,1,1,20,1
3967
+ Style: Secondary,Helvetica,12,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,2,0,2,1,1,20,1
3968
+
3969
+ [Events]
3970
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
3971
+ `;
3972
+ var SUPPORTED_EXTS = new Set([".srt", ".vtt", ".ass"]);
3973
+ function normalizeInput(content) {
3974
+ let text = content.replace(/\r\n/g, `
3975
+ `).replace(/\r/g, `
3976
+ `);
3977
+ if (text.charCodeAt(0) === 65279) {
3978
+ text = text.slice(1);
3979
+ }
3980
+ return text;
3962
3981
  }
3963
- function parseVtt(content, filePath) {
3964
- const lines = content.replace(/\r\n/g, `
3965
- `).split(`
3982
+ function splitWithLimit(text, delimiter, limit) {
3983
+ const parts = [];
3984
+ let current = "";
3985
+ let count = 0;
3986
+ for (let i = 0;i < text.length; i += 1) {
3987
+ const char = text[i];
3988
+ if (char === delimiter && count < limit - 1) {
3989
+ parts.push(current);
3990
+ current = "";
3991
+ count += 1;
3992
+ } else {
3993
+ current += char;
3994
+ }
3995
+ }
3996
+ parts.push(current);
3997
+ return parts;
3998
+ }
3999
+ function parseSrtVttTime(raw) {
4000
+ const clean = raw.trim().replace(",", ".");
4001
+ const pieces = clean.split(".");
4002
+ if (pieces.length !== 2) {
4003
+ return null;
4004
+ }
4005
+ const timePart = pieces[0];
4006
+ const msPart = pieces[1];
4007
+ if (!/^\d{1,3}$/.test(msPart)) {
4008
+ return null;
4009
+ }
4010
+ const timePieces = timePart.split(":");
4011
+ if (timePieces.length !== 2 && timePieces.length !== 3) {
4012
+ return null;
4013
+ }
4014
+ const numbers = timePieces.map((value) => Number(value));
4015
+ if (numbers.some((value) => Number.isNaN(value))) {
4016
+ return null;
4017
+ }
4018
+ const [hours, minutes, seconds] = timePieces.length === 3 ? numbers : [0, numbers[0], numbers[1]];
4019
+ const ms = Number(msPart.padEnd(3, "0"));
4020
+ return ((hours * 60 + minutes) * 60 + seconds) * 1000 + ms;
4021
+ }
4022
+ function parseSrtVtt(content) {
4023
+ const normalized = normalizeInput(content);
4024
+ const blocks = normalized.split(/\n{2,}/);
4025
+ const cues = [];
4026
+ for (const block of blocks) {
4027
+ const lines = block.split(`
4028
+ `);
4029
+ const timeIndex = lines.findIndex((line) => line.includes("-->"));
4030
+ if (timeIndex === -1) {
4031
+ continue;
4032
+ }
4033
+ const timeLine = lines[timeIndex];
4034
+ const match = timeLine.match(/(.+?)\s*-->\s*(.+)/);
4035
+ if (!match) {
4036
+ continue;
4037
+ }
4038
+ const startRaw = match[1].trim();
4039
+ let endRaw = match[2].trim();
4040
+ endRaw = endRaw.split(/\s+/)[0];
4041
+ const start = parseSrtVttTime(startRaw);
4042
+ const end = parseSrtVttTime(endRaw);
4043
+ if (start === null || end === null) {
4044
+ continue;
4045
+ }
4046
+ const textLines = lines.slice(timeIndex + 1);
4047
+ const text = textLines.join(`
4048
+ `).replace(/\s+$/, "");
4049
+ cues.push({ start, end, text });
4050
+ }
4051
+ return cues;
4052
+ }
4053
+ function parseAssTime(raw) {
4054
+ const match = raw.trim().match(/^(\d+):(\d{2}):(\d{2})\.(\d{1,2})$/);
4055
+ if (!match) {
4056
+ return null;
4057
+ }
4058
+ const hours = Number(match[1]);
4059
+ const minutes = Number(match[2]);
4060
+ const seconds = Number(match[3]);
4061
+ const cs = Number(match[4].padEnd(2, "0"));
4062
+ if ([hours, minutes, seconds, cs].some((value) => Number.isNaN(value))) {
4063
+ return null;
4064
+ }
4065
+ return ((hours * 60 + minutes) * 60 + seconds) * 1000 + cs * 10;
4066
+ }
4067
+ function assToPlainText(text) {
4068
+ let cleaned = text.replace(/\{[^}]*\}/g, "");
4069
+ cleaned = cleaned.replace(/\\N/g, `
4070
+ `);
4071
+ cleaned = cleaned.replace(/\\n/g, `
4072
+ `);
4073
+ cleaned = cleaned.replace(/\\h/g, " ");
4074
+ return cleaned;
4075
+ }
4076
+ function parseAss(content) {
4077
+ const normalized = normalizeInput(content);
4078
+ const lines = normalized.split(`
3966
4079
  `);
3967
4080
  const cues = [];
3968
- let index = 0;
3969
- let i = 0;
3970
- while (i < lines.length && !lines[i].includes("-->")) {
3971
- i++;
3972
- }
3973
- while (i < lines.length) {
3974
- const line = lines[i].trim();
3975
- if (line.includes("-->")) {
3976
- const timeParts = line.split("-->");
3977
- if (timeParts.length >= 2) {
3978
- const startTime = timeParts[0].trim().split(" ")[0];
3979
- const endTime = timeParts[1].trim().split(" ")[0];
3980
- const textLines = [];
3981
- i++;
3982
- while (i < lines.length && lines[i].trim() !== "" && !lines[i].includes("-->")) {
3983
- textLines.push(lines[i].trim());
3984
- i++;
3985
- }
3986
- if (textLines.length > 0) {
3987
- cues.push({
3988
- index: index++,
3989
- startTime,
3990
- endTime,
3991
- startSeconds: parseTimeToSeconds(startTime),
3992
- endSeconds: parseTimeToSeconds(endTime),
3993
- text: textLines.join(`
3994
- `)
3995
- });
3996
- }
3997
- } else {
3998
- i++;
3999
- }
4000
- } else {
4001
- i++;
4081
+ let inEvents = false;
4082
+ let format = null;
4083
+ let indices = null;
4084
+ for (const line of lines) {
4085
+ const trimmed = line.trim();
4086
+ if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
4087
+ inEvents = trimmed.toLowerCase() === "[events]";
4088
+ continue;
4089
+ }
4090
+ if (!inEvents) {
4091
+ continue;
4092
+ }
4093
+ if (trimmed.toLowerCase().startsWith("format:")) {
4094
+ format = trimmed.slice(7).split(",").map((part) => part.trim());
4095
+ indices = {
4096
+ start: format.findIndex((entry) => entry.toLowerCase() === "start"),
4097
+ end: format.findIndex((entry) => entry.toLowerCase() === "end"),
4098
+ text: format.findIndex((entry) => entry.toLowerCase() === "text")
4099
+ };
4100
+ continue;
4101
+ }
4102
+ if (!trimmed.toLowerCase().startsWith("dialogue:")) {
4103
+ continue;
4104
+ }
4105
+ if (!format || !indices || indices.start === -1 || indices.end === -1 || indices.text === -1) {
4106
+ continue;
4002
4107
  }
4108
+ const payload = trimmed.slice(9).trim();
4109
+ const fields = splitWithLimit(payload, ",", format.length);
4110
+ if (fields.length < format.length) {
4111
+ continue;
4112
+ }
4113
+ const start = parseAssTime(fields[indices.start] ?? "");
4114
+ const end = parseAssTime(fields[indices.end] ?? "");
4115
+ if (start === null || end === null) {
4116
+ continue;
4117
+ }
4118
+ const textRaw = fields[indices.text] ?? "";
4119
+ const text = assToPlainText(textRaw);
4120
+ cues.push({ start, end, text });
4121
+ }
4122
+ return cues;
4123
+ }
4124
+ function pad(value, size) {
4125
+ return String(value).padStart(size, "0");
4126
+ }
4127
+ function formatSrtTime(ms) {
4128
+ const total = Math.max(0, Math.round(ms));
4129
+ const hours = Math.floor(total / 3600000);
4130
+ const minutes = Math.floor(total % 3600000 / 60000);
4131
+ const seconds = Math.floor(total % 60000 / 1000);
4132
+ const millis = total % 1000;
4133
+ return `${pad(hours, 2)}:${pad(minutes, 2)}:${pad(seconds, 2)},${pad(millis, 3)}`;
4134
+ }
4135
+ function formatVttTime(ms) {
4136
+ const total = Math.max(0, Math.round(ms));
4137
+ const hours = Math.floor(total / 3600000);
4138
+ const minutes = Math.floor(total % 3600000 / 60000);
4139
+ const seconds = Math.floor(total % 60000 / 1000);
4140
+ const millis = total % 1000;
4141
+ return `${pad(hours, 2)}:${pad(minutes, 2)}:${pad(seconds, 2)}.${pad(millis, 3)}`;
4142
+ }
4143
+ function formatAssTime(ms) {
4144
+ const totalCs = Math.max(0, Math.round(ms / 10));
4145
+ const cs = totalCs % 100;
4146
+ const totalSeconds = Math.floor(totalCs / 100);
4147
+ const seconds = totalSeconds % 60;
4148
+ const totalMinutes = Math.floor(totalSeconds / 60);
4149
+ const minutes = totalMinutes % 60;
4150
+ const hours = Math.floor(totalMinutes / 60);
4151
+ return `${hours}:${pad(minutes, 2)}:${pad(seconds, 2)}.${pad(cs, 2)}`;
4152
+ }
4153
+ function plainToAssText(text) {
4154
+ return text.replace(/\r/g, "").split(`
4155
+ `).join("\\N");
4156
+ }
4157
+ function renderSrt(cues) {
4158
+ const blocks = cues.map((cue, index) => {
4159
+ const text = cue.text ?? "";
4160
+ return `${index + 1}
4161
+ ${formatSrtTime(cue.start)} --> ${formatSrtTime(cue.end)}
4162
+ ${text}`;
4163
+ });
4164
+ return blocks.join(`
4165
+
4166
+ `) + (blocks.length ? `
4167
+ ` : "");
4168
+ }
4169
+ function renderVtt(cues) {
4170
+ const blocks = cues.map((cue) => {
4171
+ const text = cue.text ?? "";
4172
+ return `${formatVttTime(cue.start)} --> ${formatVttTime(cue.end)}
4173
+ ${text}`;
4174
+ });
4175
+ return `WEBVTT
4176
+
4177
+ ${blocks.join(`
4178
+
4179
+ `)}${blocks.length ? `
4180
+ ` : ""}`;
4181
+ }
4182
+ function renderAss(cues) {
4183
+ const lines = [ASS_HEADER.trimEnd()];
4184
+ for (const cue of cues) {
4185
+ const text = plainToAssText(cue.text ?? "");
4186
+ lines.push(`Dialogue: 0,${formatAssTime(cue.start)},${formatAssTime(cue.end)},Default,,0,0,0,,${text}`);
4187
+ }
4188
+ return `${lines.join(`
4189
+ `)}
4190
+ `;
4191
+ }
4192
+ function parseByExt(ext, content) {
4193
+ switch (ext.toLowerCase()) {
4194
+ case ".srt":
4195
+ return parseSrtVtt(content);
4196
+ case ".vtt":
4197
+ return parseSrtVtt(content);
4198
+ case ".ass":
4199
+ return parseAss(content);
4200
+ default:
4201
+ return [];
4003
4202
  }
4203
+ }
4204
+ function renderByExt(ext, cues) {
4205
+ switch (ext.toLowerCase()) {
4206
+ case ".srt":
4207
+ return renderSrt(cues);
4208
+ case ".vtt":
4209
+ return renderVtt(cues);
4210
+ case ".ass":
4211
+ return renderAss(cues);
4212
+ default:
4213
+ return "";
4214
+ }
4215
+ }
4216
+
4217
+ // src/utils/vtt-parser.ts
4218
+ function parseVtt(content, filePath) {
4219
+ const ext = path.extname(filePath);
4220
+ const captionCues = parseByExt(ext, content);
4221
+ const cues = captionCues.map((cue, idx) => ({
4222
+ index: idx,
4223
+ startTime: formatVttTime(cue.start),
4224
+ endTime: formatVttTime(cue.end),
4225
+ startSeconds: cue.start / 1000,
4226
+ endSeconds: cue.end / 1000,
4227
+ text: cue.text
4228
+ }));
4004
4229
  return {
4005
4230
  filePath,
4006
4231
  cues,
4007
4232
  rawContent: content
4008
4233
  };
4009
4234
  }
4010
- function formatVtt(vttFile) {
4011
- const lines = ["WEBVTT", ""];
4012
- for (const cue of vttFile.cues) {
4013
- lines.push(`${cue.startTime} --> ${cue.endTime}`);
4014
- lines.push(cue.text);
4015
- lines.push("");
4016
- }
4017
- return lines.join(`
4018
- `);
4019
- }
4020
4235
  function formatCuesForDisplay(vttFile) {
4021
4236
  return vttFile.cues.map((cue) => `[${cue.index}] ${cue.startTime} --> ${cue.endTime}
4022
4237
  ${cue.text}`).join(`
@@ -4031,18 +4246,18 @@ function getCachedVttFile() {
4031
4246
  }
4032
4247
  var readVttTool = {
4033
4248
  name: "read_vtt_file",
4034
- description: "讀取並解析 VTT 字幕檔案,回傳所有字幕的時間戳和內容。",
4249
+ description: "讀取並解析字幕檔案 (支援 .vtt, .srt, .ass),回傳所有字幕的時間戳和內容。",
4035
4250
  parameters: {
4036
4251
  type: "object",
4037
4252
  properties: {
4038
- filePath: { type: "string", description: "VTT 檔案的完整路徑" }
4253
+ filePath: { type: "string", description: "字幕檔案的完整路徑" }
4039
4254
  },
4040
4255
  required: ["filePath"]
4041
4256
  },
4042
4257
  handler: async ({ filePath }) => {
4043
4258
  try {
4044
4259
  console.log(`[DEBUG] readVttTool: Starting to read ${filePath}`);
4045
- const absolutePath = path.resolve(filePath);
4260
+ const absolutePath = path2.resolve(filePath);
4046
4261
  if (!fs.existsSync(absolutePath)) {
4047
4262
  console.log(`[DEBUG] readVttTool: File not found ${absolutePath}`);
4048
4263
  return { success: false, error: `檔案不存在: ${absolutePath}` };
@@ -4096,7 +4311,7 @@ var getVttCuesTool = {
4096
4311
  };
4097
4312
  // src/tools/write-file.ts
4098
4313
  import * as fs2 from "fs";
4099
- import * as path2 from "path";
4314
+ import * as path3 from "path";
4100
4315
  var writeFileTool = {
4101
4316
  name: "write_file",
4102
4317
  description: "將內容寫入到指定的檔案中。用於產生修正清單、章節資訊、總結等輸出檔案。",
@@ -4111,8 +4326,8 @@ var writeFileTool = {
4111
4326
  },
4112
4327
  handler: async ({ filePath, content, encoding = "utf-8" }) => {
4113
4328
  try {
4114
- const absolutePath = path2.resolve(filePath);
4115
- const dir = path2.dirname(absolutePath);
4329
+ const absolutePath = path3.resolve(filePath);
4330
+ const dir = path3.dirname(absolutePath);
4116
4331
  if (!fs2.existsSync(dir)) {
4117
4332
  fs2.mkdirSync(dir, { recursive: true });
4118
4333
  }
@@ -4140,7 +4355,7 @@ var appendFileTool = {
4140
4355
  },
4141
4356
  handler: async ({ filePath, content }) => {
4142
4357
  try {
4143
- const absolutePath = path2.resolve(filePath);
4358
+ const absolutePath = path3.resolve(filePath);
4144
4359
  fs2.appendFileSync(absolutePath, content, "utf-8");
4145
4360
  return {
4146
4361
  success: true,
@@ -4154,7 +4369,7 @@ var appendFileTool = {
4154
4369
  };
4155
4370
  // src/tools/update-vtt.ts
4156
4371
  import * as fs3 from "fs";
4157
- import * as path3 from "path";
4372
+ import * as path4 from "path";
4158
4373
  var updateCueTool = {
4159
4374
  name: "update_cue",
4160
4375
  description: "更新指定索引的字幕內容。用於校正字幕中的錯字。",
@@ -4233,7 +4448,7 @@ var batchUpdateCuesTool = {
4233
4448
  };
4234
4449
  var saveVttTool = {
4235
4450
  name: "save_vtt",
4236
- description: "將修改後的 VTT 字幕儲存到檔案。",
4451
+ description: "將修改後的字幕儲存到檔案 (支援 .vtt, .srt, .ass)。",
4237
4452
  parameters: {
4238
4453
  type: "object",
4239
4454
  properties: {
@@ -4247,8 +4462,14 @@ var saveVttTool = {
4247
4462
  return { success: false, error: "請先使用 read_vtt_file 載入 VTT 檔案" };
4248
4463
  }
4249
4464
  try {
4250
- const targetPath = outputPath ? path3.resolve(outputPath) : vttFile.filePath;
4251
- const content = formatVtt(vttFile);
4465
+ const targetPath = outputPath ? path4.resolve(outputPath) : vttFile.filePath;
4466
+ const ext = path4.extname(targetPath);
4467
+ const captionCues = vttFile.cues.map((c) => ({
4468
+ start: c.startSeconds * 1000,
4469
+ end: c.endSeconds * 1000,
4470
+ text: c.text
4471
+ }));
4472
+ const content = renderByExt(ext, captionCues);
4252
4473
  fs3.writeFileSync(targetPath, content, "utf-8");
4253
4474
  return {
4254
4475
  success: true,
@@ -4348,33 +4569,63 @@ async function sendAndWait(session, prompt, eventLogger) {
4348
4569
  });
4349
4570
  });
4350
4571
  }
4572
+ function isCopilotTimeoutError(error) {
4573
+ if (!(error instanceof Error)) {
4574
+ return false;
4575
+ }
4576
+ return error.message.includes("Timeout: 等待 Copilot 回應超過 120 秒");
4577
+ }
4578
+ async function sendAndWaitWithRetry(session, prompt, eventLogger, maxRetries = 5) {
4579
+ let attempt = 0;
4580
+ while (true) {
4581
+ try {
4582
+ return await sendAndWait(session, prompt, eventLogger);
4583
+ } catch (error) {
4584
+ if (!isCopilotTimeoutError(error) || attempt >= maxRetries) {
4585
+ throw error;
4586
+ }
4587
+ attempt += 1;
4588
+ const waitMs = Math.min(2000 * attempt, 1e4);
4589
+ console.warn(`發生 Copilot Timeout,將在 ${waitMs / 1000} 秒後重試 (${attempt}/${maxRetries})...`);
4590
+ await new Promise((resolve5) => setTimeout(resolve5, waitMs));
4591
+ }
4592
+ }
4593
+ }
4351
4594
  async function runSubtitleCorrectionAgent(config) {
4352
- const { vttFilePath } = config;
4353
- const absolutePath = path4.resolve(vttFilePath);
4354
- const dir = config.outputDir || path4.dirname(absolutePath);
4355
- const baseName = path4.basename(absolutePath, path4.extname(absolutePath));
4595
+ const { vttFilePath, overwrite, aiProvider } = config;
4596
+ const absolutePath = path5.resolve(vttFilePath);
4597
+ const dir = config.outputDir || path5.dirname(absolutePath);
4598
+ const originalExt = path5.extname(absolutePath);
4599
+ const baseName = path5.basename(absolutePath, originalExt);
4600
+ const outputBaseName = overwrite ? baseName : `${baseName}-corrected`;
4601
+ const correctedVttPath = path5.join(dir, `${outputBaseName}${originalExt}`);
4602
+ const correctionsPath = path5.join(dir, `${outputBaseName}-corrections.txt`);
4603
+ const chaptersPath = path5.join(dir, `${outputBaseName}-chapters.txt`);
4604
+ const summaryPath = path5.join(dir, `${outputBaseName}-summary.txt`);
4356
4605
  const debugEvents = isDebugEnabled();
4357
- const eventLogPath = debugEvents ? path4.join(dir, `${baseName}-copilot-events.json`) : undefined;
4606
+ const eventLogPath = debugEvents ? path5.join(dir, `${baseName}-copilot-events.json`) : undefined;
4358
4607
  const eventLogger = eventLogPath ? new EventLogger(eventLogPath) : undefined;
4359
- const correctedVttPath = path4.join(dir, `${baseName}-corrected.vtt`);
4360
- const correctionsPath = path4.join(dir, `${baseName}-corrections.txt`);
4361
- const chaptersPath = path4.join(dir, `${baseName}-chapters.txt`);
4362
- const summaryPath = path4.join(dir, `${baseName}-summary.txt`);
4363
4608
  console.log("=".repeat(60));
4364
4609
  console.log("字幕校正代理人 - 啟動");
4365
4610
  console.log("=".repeat(60));
4366
4611
  console.log(`輸入檔案: ${absolutePath}`);
4367
4612
  console.log(`輸出目錄: ${dir}`);
4613
+ console.log(`覆寫模式: ${overwrite ? "開啟" : "關閉"}`);
4614
+ console.log(`AI Provider: ${aiProvider || process.env.AI_PROVIDER || "(Auto Detect)"}`);
4368
4615
  console.log("=".repeat(60));
4369
4616
  if (eventLogPath) {
4370
4617
  console.log(`偵錯: Copilot events 記錄到 ${eventLogPath}`);
4371
4618
  }
4372
- const client = new CopilotClient;
4619
+ const client = new CopilotClient({ cliArgs: ["--allow-all-paths"] });
4620
+ const autoApprovePermission = async () => ({ kind: "approved" });
4373
4621
  try {
4374
4622
  await client.start();
4375
4623
  const azureEndpoint = process.env.AZURE_OPENAI_ENDPOINT;
4376
4624
  const azureApiKey = process.env.AZURE_OPENAI_API_KEY;
4377
4625
  const azureDeployment = process.env.AZURE_OPENAI_DEPLOYMENT;
4626
+ const openaiApiKey = process.env.OPENAI_API_KEY;
4627
+ const openaiApiBase = process.env.OPENAI_API_BASE;
4628
+ const selectedProvider = aiProvider || process.env.AI_PROVIDER;
4378
4629
  const tools = [
4379
4630
  readVttTool,
4380
4631
  getVttCuesTool,
@@ -4385,10 +4636,14 @@ async function runSubtitleCorrectionAgent(config) {
4385
4636
  saveVttTool
4386
4637
  ];
4387
4638
  let session;
4388
- if (azureEndpoint && azureApiKey && azureDeployment) {
4639
+ if (selectedProvider === "azure" || !selectedProvider && azureEndpoint && azureApiKey && azureDeployment) {
4389
4640
  console.log("使用 Azure OpenAI 作為模型提供者");
4641
+ if (!azureEndpoint || !azureApiKey || !azureDeployment) {
4642
+ throw new Error("選擇了 Azure Provider 但未設定 AZURE_OPENAI_* 環境變數");
4643
+ }
4390
4644
  session = await client.createSession({
4391
4645
  model: azureDeployment,
4646
+ onPermissionRequest: autoApprovePermission,
4392
4647
  streaming: true,
4393
4648
  provider: {
4394
4649
  type: "azure",
@@ -4400,16 +4655,33 @@ async function runSubtitleCorrectionAgent(config) {
4400
4655
  },
4401
4656
  tools
4402
4657
  });
4658
+ } else if (selectedProvider === "openai" || !selectedProvider && openaiApiKey) {
4659
+ console.log("使用 OpenAI 作為模型提供者");
4660
+ if (!openaiApiKey) {
4661
+ throw new Error("選擇了 OpenAI Provider 但未設定 OPENAI_API_KEY 環境變數");
4662
+ }
4663
+ session = await client.createSession({
4664
+ model: "gpt-4o",
4665
+ onPermissionRequest: autoApprovePermission,
4666
+ streaming: true,
4667
+ provider: {
4668
+ type: "openai",
4669
+ apiKey: openaiApiKey,
4670
+ baseUrl: openaiApiBase || "https://api.openai.com/v1"
4671
+ },
4672
+ tools
4673
+ });
4403
4674
  } else {
4404
- console.log("未找到完整的 Azure 配置,將使用預設的 GitHub Copilot 認證");
4675
+ console.log("使用 GitHub Copilot 作為模型提供者");
4405
4676
  session = await client.createSession({
4677
+ onPermissionRequest: autoApprovePermission,
4406
4678
  tools
4407
4679
  });
4408
4680
  }
4409
4681
  console.log(`
4410
- 【步驟 1/7】讀取 VTT 檔案並分析主題方向...
4682
+ 【步驟 1/7】讀取字幕檔案並分析主題方向...
4411
4683
  `);
4412
- await sendAndWait(session, `請使用 read_vtt_file 工具讀取 VTT 檔案: "${absolutePath}"
4684
+ await sendAndWaitWithRetry(session, `請使用 read_vtt_file 工具讀取字幕檔案: "${absolutePath}"
4413
4685
 
4414
4686
  讀取完成後,請仔細閱讀整個字幕內容,分析並整理出:
4415
4687
  1. 這部影片的主要談論內容是什麼?
@@ -4417,60 +4689,133 @@ async function runSubtitleCorrectionAgent(config) {
4417
4689
  3. 這是什麼類型的影片?(教學、訪談、講座、會議等)
4418
4690
 
4419
4691
  請給出簡潔但完整的分析。`, eventLogger);
4692
+ const initialVtt = getCachedVttFile();
4693
+ if (!initialVtt) {
4694
+ throw new Error("無法取得字幕檔案內容");
4695
+ }
4696
+ const originalCues = JSON.parse(JSON.stringify(initialVtt.cues));
4420
4697
  console.log(`
4421
4698
  【步驟 2/7】整理主題相關的關鍵字與專有名詞...
4422
4699
  `);
4423
- await sendAndWait(session, `根據你剛才分析出的影片主題方向,請整理出這個主題領域中常見的:
4700
+ await sendAndWaitWithRetry(session, `根據你剛才分析出的影片主題方向,請整理出這個主題領域中常見的:
4424
4701
  1. 專業術語與關鍵字
4425
4702
  2. 專有名詞(人名、產品名、技術名稱等)
4426
4703
  3. 常見的縮寫與其全稱
4427
4704
 
4428
4705
  【重要】請不要參考字幕的實際內容來整理這些詞彙(因為字幕可能有轉錄錯誤)。
4429
4706
  請根據你對這個主題領域的知識,列出這些詞彙的「正確寫法」,這將用於後續的字幕校正。`, eventLogger);
4707
+ const CHUNK_SIZE = config.chunkSize || 100;
4708
+ const totalCues = initialVtt.cues.length;
4709
+ const totalChunks = Math.ceil(totalCues / CHUNK_SIZE);
4710
+ const tempPass1Path = path5.join(dir, `${baseName}-temp-pass1${originalExt}`);
4711
+ console.log(`
4712
+ 【步驟 3-4 (Phase 1)/7】第一階段校正:修正聽寫與拼寫錯誤...
4713
+ `);
4714
+ console.log(`總共有 ${totalCues} 個字幕片段,將分為 ${totalChunks} 個批次進行處理。
4715
+ `);
4716
+ for (let i = 0;i < totalChunks; i++) {
4717
+ const startIdx = i * CHUNK_SIZE;
4718
+ const endIdx = Math.min((i + 1) * CHUNK_SIZE, totalCues);
4719
+ const currentChunk = getCachedVttFile().cues.slice(startIdx, endIdx);
4720
+ console.log(`[Phase 1] 正在處理第 ${i + 1}/${totalChunks} 批次 (索引 ${startIdx} - ${endIdx - 1})...`);
4721
+ const chunkContent = currentChunk.map((c) => `[${c.index}] ${c.startTime} --> ${c.endTime}
4722
+ ${c.text}`).join(`
4723
+
4724
+ `);
4725
+ await sendAndWaitWithRetry(session, `這是第一階段校正,第 ${i + 1}/${totalChunks} 批次 (索引 ${startIdx} 到 ${endIdx - 1})。
4726
+
4727
+ ${chunkContent}
4728
+
4729
+ 請根據主題上下文,修正上述字幕中的**聽寫錯誤**與**錯別字**。
4730
+
4731
+ 原則:
4732
+ 1. **僅修正錯誤**:專注於同音異字(如「再/在」)、拼寫錯誤、或明顯的轉錄失誤。
4733
+ 2. **保留原意**:嚴禁重寫句子或加入不存在的資訊。
4734
+ 3. **術語一致**:參考之前的關鍵字列表,確保專有名詞正確。
4735
+
4736
+ 請使用 batch_update_cues 工具進行修正。如果沒有錯誤則無需操作。`, eventLogger);
4737
+ }
4738
+ console.log(`
4739
+ [Phase 1] 完成,儲存暫存檔至: ${tempPass1Path}`);
4740
+ await sendAndWaitWithRetry(session, `請使用 save_vtt 工具將目前進度儲存到 "${tempPass1Path}"`, eventLogger);
4430
4741
  console.log(`
4431
- 【步驟 3-4/7】分析字幕正確性並進行校正...
4742
+ 【步驟 4.5 (Phase 2)/7】第二階段校正:檢查一致性與遺漏...
4432
4743
  `);
4433
- await sendAndWait(session, `現在請逐一分析已載入的 VTT 字幕檔案中每一段 Cue 的正確性。
4744
+ console.log(`正在讀取第一階段校正結果,準備進行複查...
4745
+ `);
4746
+ for (let i = 0;i < totalChunks; i++) {
4747
+ const startIdx = i * CHUNK_SIZE;
4748
+ const endIdx = Math.min((i + 1) * CHUNK_SIZE, totalCues);
4749
+ const currentChunk = getCachedVttFile().cues.slice(startIdx, endIdx);
4750
+ console.log(`[Phase 2] 正在複查第 ${i + 1}/${totalChunks} 批次 (索引 ${startIdx} - ${endIdx - 1})...`);
4751
+ const chunkContent = currentChunk.map((c) => `[${c.index}] ${c.startTime} --> ${c.endTime}
4752
+ ${c.text}`).join(`
4753
+
4754
+ `);
4755
+ await sendAndWaitWithRetry(session, `這是第二階段複查,第 ${i + 1}/${totalChunks} 批次 (索引 ${startIdx} 到 ${endIdx - 1})。
4756
+ 這些字幕已經經過初步校正。
4434
4757
 
4435
- 對於每個 Cue:
4436
- 1. 檢查是否有潛在的錯字、同音字錯誤、轉錄錯誤
4437
- 2. 根據上下文和你整理出的正確關鍵字/專有名詞,推理出正確的文字
4438
- 3. 如果需要校正,請使用 batch_update_cues 工具批次更新校正後的字幕
4758
+ ${chunkContent}
4439
4759
 
4440
- 請特別注意:
4441
- - 專業術語和專有名詞的正確性
4442
- - 同音異義字的正確使用
4443
- - 語句的通順和邏輯性
4444
- - 標點符號的使用
4760
+ 請進行最後檢查:
4761
+ 1. **檢查遺漏**:是否有第一階段漏掉的明顯錯誤?
4762
+ 2. **檢查一致性**:專有名詞的使用是否前後一致?
4763
+ 3. **檢查過度校正**:確認第一階段沒有改變原意或產生幻覺(若有,請改回原意)。
4445
4764
 
4446
- 處理完成後,請報告發現了多少個需要校正的問題。`, eventLogger);
4765
+ 請使用 batch_update_cues 工具修正任何殘留問題。若完美則無需操作。`, eventLogger);
4766
+ }
4447
4767
  console.log(`
4448
- 【步驟 5/7】產生修正清單...
4768
+ 所有批次複查完成。
4449
4769
  `);
4450
- await sendAndWait(session, `請整理所有剛才進行的字幕校正,使用 write_file 工具將修正清單寫入到:
4451
- "${correctionsPath}"
4452
-
4453
- 修正清單的格式應該包含:
4454
- 1. 標題:字幕修正清單
4455
- 2. 修正日期與時間
4456
- 3. 原始檔案路徑
4457
- 4. 修正統計(總共修正了多少處)
4458
- 5. 每一筆修正的詳細資訊:
4459
- - Cue 索引與時間戳
4460
- - 原始文字
4461
- - 校正後文字
4462
- - 修正原因說明
4463
-
4464
- 請以清晰易讀的格式呈現。`, eventLogger);
4465
4770
  console.log(`
4466
- 【儲存校正後的 VTT 檔案】
4771
+ 【步驟 5/7】產生修正清單 (自動比對)...
4772
+ `);
4773
+ const finalCues = getCachedVttFile().cues;
4774
+ let correctionsCount = 0;
4775
+ let correctionsContent = `字幕修正清單
4776
+ 產生日期: ${new Date().toLocaleString()}
4777
+ 原始檔案: ${absolutePath}
4778
+
4779
+ `;
4780
+ correctionsContent += `| 索引 | 時間 | 原始文字 | 校正後文字 |
4781
+ `;
4782
+ correctionsContent += `| --- | --- | --- | --- |
4783
+ `;
4784
+ for (let i = 0;i < finalCues.length; i++) {
4785
+ const original = originalCues[i];
4786
+ const final = finalCues[i];
4787
+ if (original.text !== final.text) {
4788
+ correctionsCount++;
4789
+ const oldText = original.text.replace(/\n/g, " ");
4790
+ const newText = final.text.replace(/\n/g, " ");
4791
+ correctionsContent += `| ${final.index} | ${final.startTime} | ${oldText} | ${newText} |
4792
+ `;
4793
+ }
4794
+ }
4795
+ correctionsContent = `總共修正: ${correctionsCount} 處
4796
+
4797
+ ` + correctionsContent;
4798
+ fs4.writeFileSync(correctionsPath, correctionsContent, "utf-8");
4799
+ console.log(`修正清單已寫入: ${correctionsPath} (共 ${correctionsCount} 處修正)`);
4800
+ console.log(`
4801
+ 【儲存校正後的字幕檔案】
4467
4802
  `);
4468
- await sendAndWait(session, `請使用 save_vtt 工具將校正後的字幕儲存到:
4469
- "${correctedVttPath}"`, eventLogger);
4803
+ const vttFile = getCachedVttFile();
4804
+ const captionCues = vttFile.cues.map((c) => ({
4805
+ start: c.startSeconds * 1000,
4806
+ end: c.endSeconds * 1000,
4807
+ text: c.text
4808
+ }));
4809
+ for (const ext of SUPPORTED_EXTS) {
4810
+ const targetPath = path5.join(dir, `${outputBaseName}${ext}`);
4811
+ const content = renderByExt(ext, captionCues);
4812
+ fs4.writeFileSync(targetPath, content, "utf-8");
4813
+ console.log(`已儲存: ${targetPath}`);
4814
+ }
4470
4815
  console.log(`
4471
4816
  【步驟 6/7】建立分段章節...
4472
4817
  `);
4473
- await sendAndWait(session, `你現在是一個 **Video Chapter Segmenter**,負責分析字幕內容並將影片分割成清晰、合理的章節。
4818
+ await sendAndWaitWithRetry(session, `你現在是一個 **Video Chapter Segmenter**,負責分析字幕內容並將影片分割成清晰、合理的章節。
4474
4819
 
4475
4820
  ## 分析準則
4476
4821
  - **主題轉換**: 識別主題或討論重點的變化
@@ -4486,8 +4831,8 @@ async function runSubtitleCorrectionAgent(config) {
4486
4831
  - **風格一致**: 所有標題保持統一的格式和語調
4487
4832
 
4488
4833
  ## 輸出格式要求
4489
- - **嚴格格式**: \`HH:mm:ss 章節標題\`
4490
- - **第一章節**: 必須以 \`00:00:00 開始\` 開頭
4834
+ - **嚴格格式**:
4835
+ - **第一章節**: 必須以
4491
4836
  - **時間精確度**: 使用字幕檔案中的精確時間戳
4492
4837
  - **每行一章**: 每個章節獨立一行
4493
4838
  - **無額外格式**: 不使用項目符號、編號或特殊字元
@@ -4499,7 +4844,6 @@ async function runSubtitleCorrectionAgent(config) {
4499
4844
  - **完整覆蓋**: 確保從頭到尾完整覆蓋影片
4500
4845
 
4501
4846
  ## 輸出範例
4502
- \`\`\`
4503
4847
  00:00:00 開始
4504
4848
  00:00:16 開發過程的混亂
4505
4849
  00:00:28 規格驅動開發的啟發
@@ -4508,7 +4852,6 @@ async function runSubtitleCorrectionAgent(config) {
4508
4852
  00:01:38 團隊協作的重要性
4509
4853
  00:02:04 從混沌到清晰的轉變
4510
4854
  00:02:29 終章:持續改進的承諾
4511
- \`\`\`
4512
4855
 
4513
4856
  請根據校正後的字幕內容,深入分析並自動建立分段章節。
4514
4857
  使用 write_file 工具將章節資訊寫入到:
@@ -4518,7 +4861,7 @@ async function runSubtitleCorrectionAgent(config) {
4518
4861
  console.log(`
4519
4862
  【步驟 7/7】產生內容總結...
4520
4863
  `);
4521
- await sendAndWait(session, `請根據校正後的字幕內容,用一段話總結這部影片的主要內容。
4864
+ await sendAndWaitWithRetry(session, `請根據校正後的字幕內容,用一段話總結這部影片的主要內容。
4522
4865
 
4523
4866
  總結應該:
4524
4867
  1. 概括影片的核心主題
@@ -4534,7 +4877,7 @@ async function runSubtitleCorrectionAgent(config) {
4534
4877
  console.log("=".repeat(60));
4535
4878
  console.log("字幕校正完成!");
4536
4879
  console.log("=".repeat(60));
4537
- console.log(`校正後字幕: ${correctedVttPath}`);
4880
+ console.log(`校正後字幕 (Primary): ${correctedVttPath}`);
4538
4881
  console.log(`修正清單: ${correctionsPath}`);
4539
4882
  console.log(`章節資訊: ${chaptersPath}`);
4540
4883
  console.log(`內容總結: ${summaryPath}`);
@@ -4564,41 +4907,40 @@ function printUsage() {
4564
4907
  ========================
4565
4908
 
4566
4909
  使用方式:
4567
- subtitle-correction-agent <vtt-file-path> [options]
4910
+ subtitle-correction-agent <file-path> [options]
4568
4911
  subtitle-correction-agent --version
4569
4912
 
4570
4913
  參數:
4571
- <vtt-file-path> VTT 字幕檔案的路徑 (必須)
4914
+ <file-path> 字幕檔案的路徑 (支援 .vtt, .srt, .ass)
4572
4915
 
4573
4916
  選項:
4574
- --output-dir <dir> 指定輸出目錄 (預設為 VTT 檔案所在目錄)
4575
- --version, -v 顯示版本與環境資訊
4576
- --help, -h 顯示此說明
4917
+ --output-dir <dir> 指定輸出目錄 (預設為字幕檔案所在目錄)
4918
+ --chunk-size <number> 指定分段處理的大小 (預設: 100)
4919
+ --overwrite 直接覆寫原始檔案 (僅限 Git 專案且無未提交變更時可用)
4920
+ --ai-provider <name> 指定 AI 提供者 (azure, openai, github,預設優先使用環境變數設定)
4921
+ --version, -v 顯示版本與環境資訊
4922
+ --help, -h 顯示此說明
4577
4923
 
4578
4924
  範例:
4579
4925
  subtitle-correction-agent ./video.vtt
4926
+ subtitle-correction-agent ./video.srt --overwrite
4927
+ subtitle-correction-agent ./video.ass --ai-provider azure
4580
4928
  subtitle-correction-agent ./video.vtt --output-dir ./output
4581
- subtitle-correction-agent --version
4582
- npx @willh/subtitle-correction-agent ./video.vtt
4583
- subtitle-correction-agent "C:\\Videos\\lecture.vtt"
4584
-
4585
- 輸出檔案:
4586
- {filename}-corrected.vtt 校正後的字幕檔案
4587
- {filename}-corrections.txt 修正清單
4588
- {filename}-chapters.txt 章節資訊
4589
- {filename}-summary.txt 內容總結
4590
4929
 
4591
4930
  環境變數:
4592
4931
  AZURE_OPENAI_ENDPOINT Azure OpenAI 端點 URL
4593
4932
  AZURE_OPENAI_API_KEY Azure OpenAI API 金鑰
4594
4933
  AZURE_OPENAI_DEPLOYMENT Azure OpenAI 部署名稱
4595
- AZURE_OPENAI_API_VERSION API 版本 (預設: 2024-10-21)
4934
+ OPENAI_API_KEY OpenAI API 金鑰
4935
+ OPENAI_API_BASE OpenAI API Base URL (選填,可用於 Ollama)
4936
+ AI_PROVIDER 預設 AI 提供者 (azure, openai, github)
4596
4937
  `);
4597
4938
  }
4598
4939
  function parseArgs(args) {
4599
4940
  const result = {
4600
4941
  help: false,
4601
- version: false
4942
+ version: false,
4943
+ overwrite: false
4602
4944
  };
4603
4945
  for (let i = 0;i < args.length; i++) {
4604
4946
  const arg = args[i];
@@ -4608,6 +4950,15 @@ function parseArgs(args) {
4608
4950
  result.version = true;
4609
4951
  } else if (arg === "--output-dir" && i + 1 < args.length) {
4610
4952
  result.outputDir = args[++i];
4953
+ } else if (arg === "--chunk-size" && i + 1 < args.length) {
4954
+ const val = parseInt(args[++i], 10);
4955
+ if (!isNaN(val) && val > 0) {
4956
+ result.chunkSize = val;
4957
+ }
4958
+ } else if (arg === "--overwrite") {
4959
+ result.overwrite = true;
4960
+ } else if (arg === "--ai-provider" && i + 1 < args.length) {
4961
+ result.aiProvider = args[++i];
4611
4962
  } else if (!arg.startsWith("-") && !result.vttFilePath) {
4612
4963
  result.vttFilePath = arg;
4613
4964
  }
@@ -4616,13 +4967,13 @@ function parseArgs(args) {
4616
4967
  }
4617
4968
  function resolveProjectRoot() {
4618
4969
  const currentFile = fileURLToPath(import.meta.url);
4619
- let dir = path5.dirname(currentFile);
4970
+ let dir = path6.dirname(currentFile);
4620
4971
  while (true) {
4621
- const candidate = path5.join(dir, "package.json");
4972
+ const candidate = path6.join(dir, "package.json");
4622
4973
  if (fs5.existsSync(candidate)) {
4623
4974
  return dir;
4624
4975
  }
4625
- const parent = path5.dirname(dir);
4976
+ const parent = path6.dirname(dir);
4626
4977
  if (parent === dir) {
4627
4978
  return null;
4628
4979
  }
@@ -4641,7 +4992,7 @@ function resolveAppVersion(projectRoot) {
4641
4992
  if (!projectRoot) {
4642
4993
  return "未知";
4643
4994
  }
4644
- const pkgJson = readJsonFile(path5.join(projectRoot, "package.json"));
4995
+ const pkgJson = readJsonFile(path6.join(projectRoot, "package.json"));
4645
4996
  if (pkgJson && typeof pkgJson.version === "string") {
4646
4997
  return pkgJson.version;
4647
4998
  }
@@ -4651,12 +5002,12 @@ function resolveCopilotSdkVersion(projectRoot) {
4651
5002
  if (!projectRoot) {
4652
5003
  return "未知";
4653
5004
  }
4654
- const modulePath = path5.join(projectRoot, "node_modules", "@github", "copilot-sdk", "package.json");
5005
+ const modulePath = path6.join(projectRoot, "node_modules", "@github", "copilot-sdk", "package.json");
4655
5006
  const moduleJson = readJsonFile(modulePath);
4656
5007
  if (moduleJson && typeof moduleJson.version === "string") {
4657
5008
  return moduleJson.version;
4658
5009
  }
4659
- const pkgJson = readJsonFile(path5.join(projectRoot, "package.json"));
5010
+ const pkgJson = readJsonFile(path6.join(projectRoot, "package.json"));
4660
5011
  if (!pkgJson) {
4661
5012
  return "未知";
4662
5013
  }
@@ -4712,14 +5063,9 @@ AZURE_OPENAI_ENDPOINT: ${endpoint ? formatEnvValue(endpoint) : "未設定"}
4712
5063
  AZURE_OPENAI_API_KEY: ${maskSecret(apiKey)}
4713
5064
  AZURE_OPENAI_DEPLOYMENT: ${formatEnvValue(deployment)}
4714
5065
  AZURE_OPENAI_API_VERSION: ${formatEnvValue(process.env.AZURE_OPENAI_API_VERSION)}
4715
- TELEGRAM_BOT_TOKEN: ${maskSecret(process.env.TELEGRAM_BOT_TOKEN)}
4716
- TELEGRAM_STARTUP_CHAT_ID: ${formatEnvValue(process.env.TELEGRAM_STARTUP_CHAT_ID)}
4717
- OWNER_CHAT_ID: ${formatEnvValue(process.env.OWNER_CHAT_ID)}
4718
- BOT_MAX_CONCURRENT: ${formatEnvValue(process.env.BOT_MAX_CONCURRENT)}
4719
- KEEP_TEMP_FILES: ${formatEnvValue(process.env.KEEP_TEMP_FILES)}
4720
- BOT_LOG_LEVEL: ${formatEnvValue(process.env.BOT_LOG_LEVEL)}
4721
- BOT_DEBUG: ${formatEnvValue(process.env.BOT_DEBUG)}
4722
- BOT_DATA_DIR: ${formatEnvValue(process.env.BOT_DATA_DIR)}
5066
+ OPENAI_API_KEY: ${maskSecret(process.env.OPENAI_API_KEY)}
5067
+ OPENAI_API_BASE: ${formatEnvValue(process.env.OPENAI_API_BASE)}
5068
+ AI_PROVIDER: ${formatEnvValue(process.env.AI_PROVIDER)}
4723
5069
 
4724
5070
  Copilot SDK
4725
5071
  -----------
@@ -4732,6 +5078,24 @@ Provider.apiVersion: ${apiVersion}
4732
5078
  Provider.apiKey: ${maskSecret(apiKey)}` : ""}
4733
5079
  `);
4734
5080
  }
5081
+ function checkGitStatus(dir) {
5082
+ try {
5083
+ execSync("git rev-parse --is-inside-work-tree", { cwd: dir, stdio: "ignore" });
5084
+ } catch {
5085
+ throw new Error("使用 --overwrite 參數時,目標目錄必須受 Git 版本控制");
5086
+ }
5087
+ try {
5088
+ const status = execSync("git status --porcelain", { cwd: dir, encoding: "utf-8" });
5089
+ if (status.trim().length > 0) {
5090
+ throw new Error("使用 --overwrite 參數時,工作目錄必須是乾淨的 (請先 Commit 或 Stash 您的變更)");
5091
+ }
5092
+ } catch (error) {
5093
+ if (error.message.includes("工作目錄必須是乾淨的")) {
5094
+ throw error;
5095
+ }
5096
+ throw new Error("無法檢查 Git 狀態,請確認 git 指令可正常執行");
5097
+ }
5098
+ }
4735
5099
  async function main() {
4736
5100
  const args = parseArgs(process.argv.slice(2));
4737
5101
  if (args.version) {
@@ -4742,21 +5106,39 @@ async function main() {
4742
5106
  printUsage();
4743
5107
  process.exit(args.help ? 0 : 1);
4744
5108
  }
4745
- const vttFilePath = path5.resolve(args.vttFilePath);
5109
+ const vttFilePath = path6.resolve(args.vttFilePath);
4746
5110
  if (!fs5.existsSync(vttFilePath)) {
4747
5111
  console.error(`錯誤: 找不到檔案 "${vttFilePath}"`);
4748
5112
  process.exit(1);
4749
5113
  }
4750
- if (!vttFilePath.toLowerCase().endsWith(".vtt")) {
4751
- console.error(`錯誤: 檔案必須是 .vtt 格式`);
5114
+ const ext = path6.extname(vttFilePath).toLowerCase();
5115
+ if (!SUPPORTED_EXTS.has(ext)) {
5116
+ console.error(`錯誤: 不支援的檔案格式 "${ext}"。支援的格式: ${Array.from(SUPPORTED_EXTS).join(", ")}`);
4752
5117
  process.exit(1);
4753
5118
  }
4754
- const vttDir = path5.dirname(vttFilePath);
5119
+ process.on("SIGINT", () => {
5120
+ console.log(`
5121
+
5122
+ 收到中斷訊號,正在停止程式...`);
5123
+ process.exit(130);
5124
+ });
5125
+ const vttDir = path6.dirname(vttFilePath);
5126
+ if (args.overwrite) {
5127
+ try {
5128
+ checkGitStatus(vttDir);
5129
+ } catch (error) {
5130
+ console.error(`錯誤: ${error.message}`);
5131
+ process.exit(1);
5132
+ }
5133
+ }
4755
5134
  process.chdir(vttDir);
4756
5135
  console.log(`工作目錄: ${vttDir}`);
4757
5136
  const result = await runSubtitleCorrectionAgent({
4758
5137
  vttFilePath,
4759
- outputDir: args.outputDir
5138
+ outputDir: args.outputDir,
5139
+ chunkSize: args.chunkSize,
5140
+ overwrite: args.overwrite,
5141
+ aiProvider: args.aiProvider
4760
5142
  });
4761
5143
  if (!result.success) {
4762
5144
  console.error(`錯誤: ${result.error}`);
@@ -4770,7 +5152,7 @@ var isDirectRun = (() => {
4770
5152
  return metaMain;
4771
5153
  }
4772
5154
  const entryPath = fileURLToPath(import.meta.url);
4773
- return Boolean(process.argv[1]) && path5.resolve(process.argv[1]) === path5.resolve(entryPath);
5155
+ return Boolean(process.argv[1]) && path6.resolve(process.argv[1]) === path6.resolve(entryPath);
4774
5156
  })();
4775
5157
  if (isDirectRun) {
4776
5158
  main().catch((error) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@willh/subtitle-correction-agent",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "影片字幕校正代理人 - 使用 GitHub Copilot SDK",
5
5
  "type": "module",
6
6
  "files": [
@@ -12,14 +12,9 @@
12
12
  "main": "dist/index.js",
13
13
  "scripts": {
14
14
  "bump": "npm version patch --no-git-tag-version && npm install --package-lock-only",
15
-
16
15
  "build": "bun build src/index.ts --outdir dist --target node --format esm",
17
16
  "start": "npm run build && bun run src/index.ts",
18
17
  "dev": "npm run build && bun run src/index.ts",
19
-
20
- "build:bot": "bun build src/telegram-bot.ts --outdir dist --target node --format esm",
21
- "bot": "npm run build:bot && bun run src/telegram-bot.ts",
22
-
23
18
  "prepack": "npm run build",
24
19
  "test": "bun test",
25
20
  "typecheck": "tsc --noEmit"
@@ -34,8 +29,7 @@
34
29
  "author": "Will 保哥",
35
30
  "license": "MIT",
36
31
  "dependencies": {
37
- "@github/copilot-sdk": "^0.1.0",
38
- "telegraf": "^4.16.3"
32
+ "@github/copilot-sdk": "^0.1.32"
39
33
  },
40
34
  "devDependencies": {
41
35
  "@types/node": "^20.11.0",