listener-ai 2.6.0 → 2.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -36,10 +36,13 @@ var __importStar = (this && this.__importStar) || (function () {
36
36
  Object.defineProperty(exports, "__esModule", { value: true });
37
37
  const crypto_1 = require("crypto");
38
38
  const fs = __importStar(require("fs"));
39
+ const readline = __importStar(require("readline"));
39
40
  const path = __importStar(require("path"));
40
41
  const agentService_1 = require("./agentService");
42
+ const aiProvider_1 = require("./aiProvider");
41
43
  const audioFormats_1 = require("./audioFormats");
42
44
  const configService_1 = require("./configService");
45
+ const codexOAuth_1 = require("./codexOAuth");
43
46
  const dataPath_1 = require("./dataPath");
44
47
  const geminiService_1 = require("./geminiService");
45
48
  const outputService_1 = require("./outputService");
@@ -81,6 +84,7 @@ const USAGE_TEXT = 'Usage: listener <file> [--output <dir>] Transcribe an aud
81
84
  ' re-transcribe end-to-end, and save as a new note\n' +
82
85
  ' listener ask <question> [--ref <ref>]\n' +
83
86
  ' Ask the AI agent about saved meetings or settings\n' +
87
+ ' listener codex login|logout|status Manage OpenAI Codex OAuth sign-in\n' +
84
88
  ' listener config list|get|set|unset|path\n' +
85
89
  ' Manage configuration\n' +
86
90
  '\n' +
@@ -106,9 +110,12 @@ function showHelp() {
106
110
  process.exit(0);
107
111
  }
108
112
  const KNOWN_CONFIG_KEYS = [
113
+ 'aiProvider',
109
114
  'geminiApiKey',
110
115
  'geminiModel',
111
116
  'geminiFlashModel',
117
+ 'codexModel',
118
+ 'codexTranscriptionModel',
112
119
  'notionApiKey',
113
120
  'notionDatabaseId',
114
121
  'autoMode',
@@ -126,7 +133,7 @@ const KNOWN_CONFIG_KEYS = [
126
133
  ];
127
134
  function isSensitiveKey(key) {
128
135
  const lk = key.toLowerCase();
129
- return lk.includes('key') || lk.includes('webhook');
136
+ return lk.includes('key') || lk.includes('webhook') || lk.includes('oauth');
130
137
  }
131
138
  function maskValue(key, value) {
132
139
  if (value == null || value === '')
@@ -168,6 +175,14 @@ function parseKnownWords(v) {
168
175
  }
169
176
  function applyConfigSet(config, key, value) {
170
177
  switch (key) {
178
+ case 'aiProvider': {
179
+ if (!(0, aiProvider_1.isAiProvider)(value)) {
180
+ process.stderr.write('Error: aiProvider must be "gemini" or "codex"\n');
181
+ process.exit(1);
182
+ }
183
+ config.setAiProvider(value);
184
+ return;
185
+ }
171
186
  case 'geminiApiKey':
172
187
  config.setGeminiApiKey(value);
173
188
  return;
@@ -177,6 +192,12 @@ function applyConfigSet(config, key, value) {
177
192
  case 'geminiFlashModel':
178
193
  config.setGeminiFlashModel(value);
179
194
  return;
195
+ case 'codexModel':
196
+ config.setCodexModel(value);
197
+ return;
198
+ case 'codexTranscriptionModel':
199
+ config.setCodexTranscriptionModel(value);
200
+ return;
180
201
  case 'notionApiKey':
181
202
  config.setNotionApiKey(value);
182
203
  return;
@@ -221,6 +242,87 @@ function applyConfigSet(config, key, value) {
221
242
  return;
222
243
  }
223
244
  }
245
+ function formatAiCredentialsError(config) {
246
+ if (config.getAiProvider() === 'codex') {
247
+ return ('Error: Codex OAuth is not configured.\n' +
248
+ 'Run `listener codex login` or set aiProvider back to gemini with a Gemini API key.\n');
249
+ }
250
+ return ('Error: Gemini API key not found.\n' +
251
+ 'Set GEMINI_API_KEY env var, run `listener config set geminiApiKey <key>`, or run `listener codex login`.\n');
252
+ }
253
+ function createTranscriptionService(config, dataPath) {
254
+ return new geminiService_1.GeminiService({
255
+ provider: config.getAiProvider(),
256
+ apiKey: config.getGeminiApiKey(),
257
+ codexOAuth: config.getCodexOAuth(),
258
+ // Persist refreshed tokens only when credentials are stored in config.json.
259
+ // Env-only credentials must stay ephemeral; persisting them silently writes
260
+ // env-provided OAuth tokens to disk on every refresh.
261
+ onCodexOAuthUpdate: config.hasStoredCodexOAuth()
262
+ ? (credentials) => config.setCodexOAuth(credentials)
263
+ : undefined,
264
+ dataPath,
265
+ knownWords: config.getKnownWords(),
266
+ proModel: config.getGeminiModel(),
267
+ flashModel: config.getGeminiFlashModel(),
268
+ codexModel: config.getCodexModel(),
269
+ codexTranscriptionModel: config.getCodexTranscriptionModel(),
270
+ });
271
+ }
272
+ function createAgentService(config, dataPath) {
273
+ return new agentService_1.AgentService({
274
+ provider: config.getAiProvider(),
275
+ apiKey: config.getGeminiApiKey(),
276
+ codexOAuth: config.getCodexOAuth(),
277
+ // See note in createTranscriptionService(): persist only for stored creds.
278
+ onCodexOAuthUpdate: config.hasStoredCodexOAuth()
279
+ ? (credentials) => config.setCodexOAuth(credentials)
280
+ : undefined,
281
+ dataPath,
282
+ configService: config,
283
+ codexModel: config.getCodexModel(),
284
+ });
285
+ }
286
+ function promptLine(message) {
287
+ const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
288
+ return new Promise((resolve) => {
289
+ rl.question(`${message} `, (answer) => {
290
+ rl.close();
291
+ resolve(answer);
292
+ });
293
+ });
294
+ }
295
+ async function handleCodex(args) {
296
+ const sub = args[0];
297
+ const dataPath = (0, dataPath_1.getDataPath)();
298
+ const config = new configService_1.ConfigService(dataPath);
299
+ if (sub === 'status') {
300
+ process.stdout.write(`aiProvider=${config.getAiProvider()}\n`);
301
+ process.stdout.write(`codexOAuthConfigured=${config.hasCodexOAuth()}\n`);
302
+ process.stdout.write(`codexModel=${config.getCodexModel()}\n`);
303
+ process.stdout.write(`codexTranscriptionModel=${config.getCodexTranscriptionModel()}\n`);
304
+ return;
305
+ }
306
+ if (sub === 'logout') {
307
+ config.clearCodexOAuth();
308
+ process.stderr.write('Signed out of Codex OAuth.\n');
309
+ return;
310
+ }
311
+ if (sub !== 'login') {
312
+ process.stderr.write('Error: Unknown codex command. Usage: listener codex login|logout|status\n');
313
+ process.exit(1);
314
+ }
315
+ const credentials = await (0, codexOAuth_1.loginCodexOAuth)({
316
+ openUrl: (url) => {
317
+ process.stderr.write(`Open this URL in your browser:\n${url}\n`);
318
+ },
319
+ onPrompt: async (prompt) => await promptLine(prompt.message),
320
+ onProgress: (message) => process.stderr.write(`${message}\n`),
321
+ });
322
+ config.setCodexOAuth(credentials);
323
+ config.setAiProvider('codex');
324
+ process.stderr.write('Signed in with Codex OAuth and set aiProvider=codex.\n');
325
+ }
224
326
  function handleConfig(subArgs) {
225
327
  const dataPath = (0, dataPath_1.getDataPath)();
226
328
  const config = new configService_1.ConfigService(dataPath);
@@ -558,9 +660,8 @@ async function handleMerge(args) {
558
660
  }
559
661
  const dataPath = (0, dataPath_1.getDataPath)();
560
662
  const config = new configService_1.ConfigService(dataPath);
561
- const apiKey = config.getGeminiApiKey();
562
- if (!apiKey) {
563
- process.stderr.write('Error: Gemini API key not found. Set GEMINI_API_KEY env var or configure via the Listener.AI app.\n');
663
+ if (!config.hasAiAuth()) {
664
+ process.stderr.write(formatAiCredentialsError(config));
564
665
  process.exit(1);
565
666
  }
566
667
  // Resolve every ref to a folder + audio path before doing any expensive work
@@ -598,13 +699,7 @@ async function handleMerge(args) {
598
699
  outputPath: mergedAudioPath,
599
700
  });
600
701
  process.stderr.write(` -> ${mergedAudioPath}\n`);
601
- const gemini = new geminiService_1.GeminiService({
602
- apiKey,
603
- dataPath,
604
- knownWords: config.getKnownWords(),
605
- proModel: config.getGeminiModel(),
606
- flashModel: config.getGeminiFlashModel(),
607
- });
702
+ const gemini = createTranscriptionService(config, dataPath);
608
703
  process.stderr.write('Transcribing merged recording...\n');
609
704
  const result = await gemini.transcribeAudio(mergedAudioPath, (_percent, message) => {
610
705
  process.stderr.write(` ${message}\n`);
@@ -647,9 +742,8 @@ async function handleAsk(args) {
647
742
  }
648
743
  const dataPath = (0, dataPath_1.getDataPath)();
649
744
  const config = new configService_1.ConfigService(dataPath);
650
- const apiKey = config.getGeminiApiKey();
651
- if (!apiKey) {
652
- process.stderr.write('Error: Gemini API key not found. Set GEMINI_API_KEY env var or configure via the Listener.AI app.\n');
745
+ if (!config.hasAiAuth()) {
746
+ process.stderr.write(formatAiCredentialsError(config));
653
747
  process.exit(1);
654
748
  }
655
749
  let scope = { kind: 'all' };
@@ -657,7 +751,7 @@ async function handleAsk(args) {
657
751
  const folderPath = await resolveRef(ref, dataPath);
658
752
  scope = { kind: 'single', folderName: path.basename(folderPath) };
659
753
  }
660
- const agent = new agentService_1.AgentService({ apiKey, dataPath, configService: config });
754
+ const agent = createAgentService(config, dataPath);
661
755
  const confirm = async (proposal) => {
662
756
  process.stderr.write('\n');
663
757
  return promptYesNo(`Proposed change -> ${proposal.description}\nApply?`);
@@ -712,10 +806,8 @@ async function handleTranscript(args) {
712
806
  }
713
807
  const dataPath = (0, dataPath_1.getDataPath)();
714
808
  const config = new configService_1.ConfigService(dataPath);
715
- const apiKey = config.getGeminiApiKey();
716
- if (!apiKey) {
717
- process.stderr.write('Error: Gemini API key not found.\n' +
718
- 'Set GEMINI_API_KEY env var or configure via the Listener.AI app.\n');
809
+ if (!config.hasAiAuth()) {
810
+ process.stderr.write(formatAiCredentialsError(config));
719
811
  process.exit(1);
720
812
  }
721
813
  // Resolve --output before the expensive transcription so we fail fast on a
@@ -743,13 +835,7 @@ async function handleTranscript(args) {
743
835
  }
744
836
  }
745
837
  }
746
- const gemini = new geminiService_1.GeminiService({
747
- apiKey,
748
- dataPath,
749
- knownWords: config.getKnownWords(),
750
- proModel: config.getGeminiModel(),
751
- flashModel: config.getGeminiFlashModel(),
752
- });
838
+ const gemini = createTranscriptionService(config, dataPath);
753
839
  process.stderr.write(`Processing: ${filePath}\n`);
754
840
  const result = await gemini.transcribeAudio(filePath, (_percent, message) => {
755
841
  process.stderr.write(` ${message}\n`);
@@ -782,6 +868,10 @@ async function main() {
782
868
  handleConfig(args.slice(1));
783
869
  return;
784
870
  }
871
+ if (args[0] === 'codex') {
872
+ await handleCodex(args.slice(1));
873
+ return;
874
+ }
785
875
  if (args[0] === 'list') {
786
876
  await handleList(args.slice(1));
787
877
  return;
@@ -845,22 +935,13 @@ async function main() {
845
935
  if (outputDir) {
846
936
  outputDir = path.resolve(outputDir);
847
937
  }
848
- // Get API key
849
938
  const dataPath = (0, dataPath_1.getDataPath)();
850
939
  const config = new configService_1.ConfigService(dataPath);
851
- const apiKey = config.getGeminiApiKey();
852
- if (!apiKey) {
853
- process.stderr.write('Error: Gemini API key not found.\n' +
854
- 'Set GEMINI_API_KEY env var or configure via the Listener.AI app.\n');
940
+ if (!config.hasAiAuth()) {
941
+ process.stderr.write(formatAiCredentialsError(config));
855
942
  process.exit(1);
856
943
  }
857
- const gemini = new geminiService_1.GeminiService({
858
- apiKey,
859
- dataPath,
860
- knownWords: config.getKnownWords(),
861
- proModel: config.getGeminiModel(),
862
- flashModel: config.getGeminiFlashModel(),
863
- });
944
+ const gemini = createTranscriptionService(config, dataPath);
864
945
  process.stderr.write(`Processing: ${filePath}\n`);
865
946
  const result = await gemini.transcribeAudio(filePath, (_percent, message) => {
866
947
  process.stderr.write(` ${message}\n`);
@@ -0,0 +1,68 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getCodexOAuthEnvCredentials = getCodexOAuthEnvCredentials;
4
+ exports.hasCodexOAuthEnvCredentials = hasCodexOAuthEnvCredentials;
5
+ exports.resolveCodexAccessToken = resolveCodexAccessToken;
6
+ exports.requireCodexAccessToken = requireCodexAccessToken;
7
+ exports.loginCodexOAuth = loginCodexOAuth;
8
+ const esmImport_1 = require("./esmImport");
9
+ let runtimePromise;
10
+ async function loadCodexOAuthRuntime() {
11
+ runtimePromise ?? (runtimePromise = (0, esmImport_1.importEsm)('@earendil-works/pi-ai/oauth'));
12
+ return await runtimePromise;
13
+ }
14
+ function getCodexOAuthEnvCredentials() {
15
+ const access = process.env.CODEX_OAUTH_ACCESS_TOKEN?.trim() ||
16
+ process.env.OPENAI_CODEX_ACCESS_TOKEN?.trim() ||
17
+ '';
18
+ const refresh = process.env.CODEX_OAUTH_REFRESH_TOKEN?.trim() ||
19
+ process.env.OPENAI_CODEX_REFRESH_TOKEN?.trim() ||
20
+ '';
21
+ if (!access || !refresh)
22
+ return undefined;
23
+ const expiresRaw = process.env.CODEX_OAUTH_EXPIRES || process.env.OPENAI_CODEX_EXPIRES;
24
+ const expires = expiresRaw ? Number.parseInt(expiresRaw, 10) : Date.now() + 30 * 60000;
25
+ return {
26
+ access,
27
+ refresh,
28
+ expires: Number.isFinite(expires) ? expires : Date.now() + 30 * 60000,
29
+ };
30
+ }
31
+ function hasCodexOAuthEnvCredentials() {
32
+ return !!getCodexOAuthEnvCredentials();
33
+ }
34
+ async function resolveCodexAccessToken(params) {
35
+ const credentials = params.credentials ?? getCodexOAuthEnvCredentials();
36
+ if (!credentials)
37
+ return undefined;
38
+ const { getOAuthApiKey } = await loadCodexOAuthRuntime();
39
+ const resolved = await getOAuthApiKey('openai-codex', { 'openai-codex': credentials });
40
+ if (!resolved)
41
+ return undefined;
42
+ const nextCredentials = resolved.newCredentials;
43
+ if (nextCredentials.access !== credentials.access ||
44
+ nextCredentials.refresh !== credentials.refresh ||
45
+ nextCredentials.expires !== credentials.expires) {
46
+ await params.onCredentialsChanged?.(nextCredentials);
47
+ }
48
+ return resolved.apiKey;
49
+ }
50
+ async function requireCodexAccessToken(params) {
51
+ const token = await resolveCodexAccessToken(params);
52
+ if (!token) {
53
+ throw new Error('Codex OAuth is not configured.');
54
+ }
55
+ return token;
56
+ }
57
+ async function loginCodexOAuth(params) {
58
+ const { loginOpenAICodex } = await loadCodexOAuthRuntime();
59
+ const credentials = await loginOpenAICodex({
60
+ originator: 'listener-ai',
61
+ onAuth: (info) => {
62
+ void params.openUrl(info.url);
63
+ },
64
+ onPrompt: params.onPrompt,
65
+ onProgress: params.onProgress,
66
+ });
67
+ return credentials;
68
+ }
@@ -0,0 +1,26 @@
1
+ "use strict";
2
+ // Encapsulates Codex OAuth credential state for services that need a fresh
3
+ // access token per request. Replaces the parallel `this.codexOAuth +
4
+ // this.onCodexOAuthUpdate + getToken()` fields that lived inside
5
+ // AgentService and GeminiService -- keeping them in sync was error-prone and
6
+ // the rotation invariant (caller persists when source is config, skips when
7
+ // source is env) is easy to get wrong if scattered.
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.CodexOAuthHolder = void 0;
10
+ const codexOAuth_1 = require("./codexOAuth");
11
+ class CodexOAuthHolder {
12
+ constructor(options) {
13
+ this.credentials = options.credentials;
14
+ this.onUpdate = options.onUpdate;
15
+ }
16
+ async getToken() {
17
+ return await (0, codexOAuth_1.requireCodexAccessToken)({
18
+ credentials: this.credentials,
19
+ onCredentialsChanged: async (next) => {
20
+ this.credentials = next;
21
+ await this.onUpdate?.(next);
22
+ },
23
+ });
24
+ }
25
+ }
26
+ exports.CodexOAuthHolder = CodexOAuthHolder;
@@ -0,0 +1,168 @@
1
+ "use strict";
2
+ // Minimal wrapper around OpenAI's `/v1/audio/transcriptions` endpoint.
3
+ //
4
+ // We keep this here (rather than going through pi-ai) because pi-ai is a
5
+ // chat/tool-call unified API -- it has no audio transcription surface. The
6
+ // Codex transcription flow needs only a multipart POST, so a thin direct
7
+ // fetch is simpler than wedging audio into pi-ai's chat model.
8
+ //
9
+ // Two output shapes, branched on model id:
10
+ // - `gpt-4o-transcribe-diarize` (default) returns `diarized_json` with
11
+ // speaker-labeled segments. We re-label "Speaker 0/1/..." onto the
12
+ // same `참가자N` convention the Gemini path uses so downstream code
13
+ // (summarization, transcript.md, Notion) doesn't have to care which
14
+ // transcription engine produced the text. This model rejects `prompt`,
15
+ // so user-supplied glossaries (`knownWords`) are dropped on this path.
16
+ // - `gpt-4o-transcribe` (and `whisper-1`) return `{text}` and accept
17
+ // `prompt` for vocabulary biasing, but produce no speaker labels.
18
+ //
19
+ // Format support: OpenAI accepts mp3, mp4, mpeg, mpga, m4a, wav, webm. Inputs
20
+ // outside that set are remuxed upstream in geminiService.ts via ffmpeg before
21
+ // reaching this helper.
22
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
23
+ if (k2 === undefined) k2 = k;
24
+ var desc = Object.getOwnPropertyDescriptor(m, k);
25
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
26
+ desc = { enumerable: true, get: function() { return m[k]; } };
27
+ }
28
+ Object.defineProperty(o, k2, desc);
29
+ }) : (function(o, m, k, k2) {
30
+ if (k2 === undefined) k2 = k;
31
+ o[k2] = m[k];
32
+ }));
33
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
34
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
35
+ }) : function(o, v) {
36
+ o["default"] = v;
37
+ });
38
+ var __importStar = (this && this.__importStar) || (function () {
39
+ var ownKeys = function(o) {
40
+ ownKeys = Object.getOwnPropertyNames || function (o) {
41
+ var ar = [];
42
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
43
+ return ar;
44
+ };
45
+ return ownKeys(o);
46
+ };
47
+ return function (mod) {
48
+ if (mod && mod.__esModule) return mod;
49
+ var result = {};
50
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
51
+ __setModuleDefault(result, mod);
52
+ return result;
53
+ };
54
+ })();
55
+ Object.defineProperty(exports, "__esModule", { value: true });
56
+ exports.OPENAI_TRANSCRIPTION_EXTENSIONS = void 0;
57
+ exports.isDiarizeModel = isDiarizeModel;
58
+ exports.transcribeCodexAudio = transcribeCodexAudio;
59
+ exports.formatDiarizedSegments = formatDiarizedSegments;
60
+ const fs = __importStar(require("fs"));
61
+ const path = __importStar(require("path"));
62
+ const audioFormats_1 = require("./audioFormats");
63
+ const OPENAI_API_BASE_URL = 'https://api.openai.com/v1';
64
+ const DIARIZE_MODEL_ID = 'gpt-4o-transcribe-diarize';
65
+ exports.OPENAI_TRANSCRIPTION_EXTENSIONS = new Set([
66
+ '.mp3',
67
+ '.mp4',
68
+ '.mpeg',
69
+ '.mpga',
70
+ '.m4a',
71
+ '.wav',
72
+ '.webm',
73
+ ]);
74
+ function isDiarizeModel(model) {
75
+ return model.trim() === DIARIZE_MODEL_ID;
76
+ }
77
+ async function transcribeCodexAudio(params) {
78
+ const audioData = fs.readFileSync(params.audioFilePath);
79
+ const ext = path.extname(params.audioFilePath);
80
+ const model = params.model.trim();
81
+ const diarize = isDiarizeModel(model);
82
+ const form = new FormData();
83
+ form.append('model', model);
84
+ if (params.language) {
85
+ form.append('language', params.language);
86
+ }
87
+ if (diarize) {
88
+ // Required for the diarize model. `chunking_strategy=auto` lets OpenAI
89
+ // split long audio internally while keeping speaker identity coherent
90
+ // across chunks -- so we can hand it a whole 50-minute meeting (subject
91
+ // to the 25MB file-size limit upstream).
92
+ form.append('response_format', 'diarized_json');
93
+ form.append('chunking_strategy', 'auto');
94
+ }
95
+ else if (params.prompt?.trim()) {
96
+ form.append('prompt', params.prompt.trim());
97
+ }
98
+ form.append('file', new Blob([audioData], { type: (0, audioFormats_1.mimeTypeForExtension)(ext) }), path.basename(params.audioFilePath));
99
+ const sizeMB = (audioData.byteLength / (1024 * 1024)).toFixed(2);
100
+ const startedAt = Date.now();
101
+ console.log(`[codex-transcribe] -> ${path.basename(params.audioFilePath)} ${sizeMB}MB model=${model}${diarize ? ' diarize=true' : params.prompt ? ` prompt=${params.prompt.length}chars` : ''}${params.language ? ` lang=${params.language}` : ''}`);
102
+ const response = await fetch(`${OPENAI_API_BASE_URL}/audio/transcriptions`, {
103
+ method: 'POST',
104
+ headers: { Authorization: `Bearer ${await params.getToken()}` },
105
+ body: form,
106
+ });
107
+ const elapsed = Date.now() - startedAt;
108
+ console.log(`[codex-transcribe] <- ${elapsed}ms status=${response.status} ${response.statusText}`);
109
+ if (!response.ok) {
110
+ // Truncate the error body so a verbose upstream response doesn't leak
111
+ // headers/debug payload into logs and IPC error strings.
112
+ const body = await response.text().catch(() => '');
113
+ const trimmed = body.length > 500 ? `${body.slice(0, 500)}...` : body;
114
+ throw new Error(`OpenAI transcription failed (${response.status} ${response.statusText})${trimmed ? `: ${trimmed}` : ''}`);
115
+ }
116
+ if (diarize) {
117
+ const payload = (await response.json());
118
+ return formatDiarizedSegments(payload.segments);
119
+ }
120
+ const payload = (await response.json());
121
+ if (typeof payload.text !== 'string' || payload.text.trim().length === 0) {
122
+ throw new Error('OpenAI transcription response missing text');
123
+ }
124
+ return payload.text;
125
+ }
126
+ // Re-label OpenAI's raw speaker ids ("Speaker 0", "Speaker 1", or the names
127
+ // supplied via `known_speaker_names[]` if used) onto our `참가자N` convention,
128
+ // matching the format Gemini emits when prompted for speaker labels. Empty
129
+ // segments are dropped; consecutive segments from the same speaker are merged
130
+ // onto a single line so downstream consumers don't see one speaker split into
131
+ // 30+ "참가자1: ..." stubs.
132
+ function formatDiarizedSegments(segments) {
133
+ if (!segments || segments.length === 0) {
134
+ throw new Error('OpenAI diarized transcription returned no segments');
135
+ }
136
+ const speakerIdx = new Map();
137
+ let nextIdx = 1;
138
+ const lines = [];
139
+ let activeLabel;
140
+ let activeBuffer = '';
141
+ for (const seg of segments) {
142
+ const text = (seg.text ?? '').trim();
143
+ if (!text)
144
+ continue;
145
+ const rawSpeaker = seg.speaker ?? 'unknown';
146
+ let idx = speakerIdx.get(rawSpeaker);
147
+ if (idx === undefined) {
148
+ idx = nextIdx++;
149
+ speakerIdx.set(rawSpeaker, idx);
150
+ }
151
+ const label = `참가자${idx}`;
152
+ if (label === activeLabel) {
153
+ activeBuffer += ` ${text}`;
154
+ }
155
+ else {
156
+ if (activeLabel !== undefined)
157
+ lines.push(`${activeLabel}: ${activeBuffer}`);
158
+ activeLabel = label;
159
+ activeBuffer = text;
160
+ }
161
+ }
162
+ if (activeLabel !== undefined)
163
+ lines.push(`${activeLabel}: ${activeBuffer}`);
164
+ if (lines.length === 0) {
165
+ throw new Error('OpenAI diarized transcription had segments but no usable text');
166
+ }
167
+ return lines.join('\n\n');
168
+ }