listener-ai 2.6.0 → 2.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -22
- package/THIRD_PARTY_NOTICES.md +27 -0
- package/dist/agentService.js +142 -119
- package/dist/aiProvider.js +35 -0
- package/dist/cli.js +119 -38
- package/dist/codexOAuth.js +68 -0
- package/dist/codexOAuthHolder.js +26 -0
- package/dist/codexTranscription.js +168 -0
- package/dist/configService.js +171 -25
- package/dist/dataPath.js +30 -10
- package/dist/esmImport.js +15 -0
- package/dist/geminiService.js +203 -39
- package/dist/main.js +84 -17
- package/dist/piAiClient.js +102 -0
- package/package.json +13 -4
package/dist/cli.js
CHANGED
|
@@ -36,10 +36,13 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
36
36
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
37
37
|
const crypto_1 = require("crypto");
|
|
38
38
|
const fs = __importStar(require("fs"));
|
|
39
|
+
const readline = __importStar(require("readline"));
|
|
39
40
|
const path = __importStar(require("path"));
|
|
40
41
|
const agentService_1 = require("./agentService");
|
|
42
|
+
const aiProvider_1 = require("./aiProvider");
|
|
41
43
|
const audioFormats_1 = require("./audioFormats");
|
|
42
44
|
const configService_1 = require("./configService");
|
|
45
|
+
const codexOAuth_1 = require("./codexOAuth");
|
|
43
46
|
const dataPath_1 = require("./dataPath");
|
|
44
47
|
const geminiService_1 = require("./geminiService");
|
|
45
48
|
const outputService_1 = require("./outputService");
|
|
@@ -81,6 +84,7 @@ const USAGE_TEXT = 'Usage: listener <file> [--output <dir>] Transcribe an aud
|
|
|
81
84
|
' re-transcribe end-to-end, and save as a new note\n' +
|
|
82
85
|
' listener ask <question> [--ref <ref>]\n' +
|
|
83
86
|
' Ask the AI agent about saved meetings or settings\n' +
|
|
87
|
+
' listener codex login|logout|status Manage OpenAI Codex OAuth sign-in\n' +
|
|
84
88
|
' listener config list|get|set|unset|path\n' +
|
|
85
89
|
' Manage configuration\n' +
|
|
86
90
|
'\n' +
|
|
@@ -106,9 +110,12 @@ function showHelp() {
|
|
|
106
110
|
process.exit(0);
|
|
107
111
|
}
|
|
108
112
|
const KNOWN_CONFIG_KEYS = [
|
|
113
|
+
'aiProvider',
|
|
109
114
|
'geminiApiKey',
|
|
110
115
|
'geminiModel',
|
|
111
116
|
'geminiFlashModel',
|
|
117
|
+
'codexModel',
|
|
118
|
+
'codexTranscriptionModel',
|
|
112
119
|
'notionApiKey',
|
|
113
120
|
'notionDatabaseId',
|
|
114
121
|
'autoMode',
|
|
@@ -126,7 +133,7 @@ const KNOWN_CONFIG_KEYS = [
|
|
|
126
133
|
];
|
|
127
134
|
function isSensitiveKey(key) {
|
|
128
135
|
const lk = key.toLowerCase();
|
|
129
|
-
return lk.includes('key') || lk.includes('webhook');
|
|
136
|
+
return lk.includes('key') || lk.includes('webhook') || lk.includes('oauth');
|
|
130
137
|
}
|
|
131
138
|
function maskValue(key, value) {
|
|
132
139
|
if (value == null || value === '')
|
|
@@ -168,6 +175,14 @@ function parseKnownWords(v) {
|
|
|
168
175
|
}
|
|
169
176
|
function applyConfigSet(config, key, value) {
|
|
170
177
|
switch (key) {
|
|
178
|
+
case 'aiProvider': {
|
|
179
|
+
if (!(0, aiProvider_1.isAiProvider)(value)) {
|
|
180
|
+
process.stderr.write('Error: aiProvider must be "gemini" or "codex"\n');
|
|
181
|
+
process.exit(1);
|
|
182
|
+
}
|
|
183
|
+
config.setAiProvider(value);
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
171
186
|
case 'geminiApiKey':
|
|
172
187
|
config.setGeminiApiKey(value);
|
|
173
188
|
return;
|
|
@@ -177,6 +192,12 @@ function applyConfigSet(config, key, value) {
|
|
|
177
192
|
case 'geminiFlashModel':
|
|
178
193
|
config.setGeminiFlashModel(value);
|
|
179
194
|
return;
|
|
195
|
+
case 'codexModel':
|
|
196
|
+
config.setCodexModel(value);
|
|
197
|
+
return;
|
|
198
|
+
case 'codexTranscriptionModel':
|
|
199
|
+
config.setCodexTranscriptionModel(value);
|
|
200
|
+
return;
|
|
180
201
|
case 'notionApiKey':
|
|
181
202
|
config.setNotionApiKey(value);
|
|
182
203
|
return;
|
|
@@ -221,6 +242,87 @@ function applyConfigSet(config, key, value) {
|
|
|
221
242
|
return;
|
|
222
243
|
}
|
|
223
244
|
}
|
|
245
|
+
function formatAiCredentialsError(config) {
|
|
246
|
+
if (config.getAiProvider() === 'codex') {
|
|
247
|
+
return ('Error: Codex OAuth is not configured.\n' +
|
|
248
|
+
'Run `listener codex login` or set aiProvider back to gemini with a Gemini API key.\n');
|
|
249
|
+
}
|
|
250
|
+
return ('Error: Gemini API key not found.\n' +
|
|
251
|
+
'Set GEMINI_API_KEY env var, run `listener config set geminiApiKey <key>`, or run `listener codex login`.\n');
|
|
252
|
+
}
|
|
253
|
+
function createTranscriptionService(config, dataPath) {
|
|
254
|
+
return new geminiService_1.GeminiService({
|
|
255
|
+
provider: config.getAiProvider(),
|
|
256
|
+
apiKey: config.getGeminiApiKey(),
|
|
257
|
+
codexOAuth: config.getCodexOAuth(),
|
|
258
|
+
// Persist refreshed tokens only when credentials are stored in config.json.
|
|
259
|
+
// Env-only credentials must stay ephemeral; persisting them silently writes
|
|
260
|
+
// env-provided OAuth tokens to disk on every refresh.
|
|
261
|
+
onCodexOAuthUpdate: config.hasStoredCodexOAuth()
|
|
262
|
+
? (credentials) => config.setCodexOAuth(credentials)
|
|
263
|
+
: undefined,
|
|
264
|
+
dataPath,
|
|
265
|
+
knownWords: config.getKnownWords(),
|
|
266
|
+
proModel: config.getGeminiModel(),
|
|
267
|
+
flashModel: config.getGeminiFlashModel(),
|
|
268
|
+
codexModel: config.getCodexModel(),
|
|
269
|
+
codexTranscriptionModel: config.getCodexTranscriptionModel(),
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
function createAgentService(config, dataPath) {
|
|
273
|
+
return new agentService_1.AgentService({
|
|
274
|
+
provider: config.getAiProvider(),
|
|
275
|
+
apiKey: config.getGeminiApiKey(),
|
|
276
|
+
codexOAuth: config.getCodexOAuth(),
|
|
277
|
+
// See note in createTranscriptionService(): persist only for stored creds.
|
|
278
|
+
onCodexOAuthUpdate: config.hasStoredCodexOAuth()
|
|
279
|
+
? (credentials) => config.setCodexOAuth(credentials)
|
|
280
|
+
: undefined,
|
|
281
|
+
dataPath,
|
|
282
|
+
configService: config,
|
|
283
|
+
codexModel: config.getCodexModel(),
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
function promptLine(message) {
|
|
287
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
|
|
288
|
+
return new Promise((resolve) => {
|
|
289
|
+
rl.question(`${message} `, (answer) => {
|
|
290
|
+
rl.close();
|
|
291
|
+
resolve(answer);
|
|
292
|
+
});
|
|
293
|
+
});
|
|
294
|
+
}
|
|
295
|
+
async function handleCodex(args) {
|
|
296
|
+
const sub = args[0];
|
|
297
|
+
const dataPath = (0, dataPath_1.getDataPath)();
|
|
298
|
+
const config = new configService_1.ConfigService(dataPath);
|
|
299
|
+
if (sub === 'status') {
|
|
300
|
+
process.stdout.write(`aiProvider=${config.getAiProvider()}\n`);
|
|
301
|
+
process.stdout.write(`codexOAuthConfigured=${config.hasCodexOAuth()}\n`);
|
|
302
|
+
process.stdout.write(`codexModel=${config.getCodexModel()}\n`);
|
|
303
|
+
process.stdout.write(`codexTranscriptionModel=${config.getCodexTranscriptionModel()}\n`);
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
if (sub === 'logout') {
|
|
307
|
+
config.clearCodexOAuth();
|
|
308
|
+
process.stderr.write('Signed out of Codex OAuth.\n');
|
|
309
|
+
return;
|
|
310
|
+
}
|
|
311
|
+
if (sub !== 'login') {
|
|
312
|
+
process.stderr.write('Error: Unknown codex command. Usage: listener codex login|logout|status\n');
|
|
313
|
+
process.exit(1);
|
|
314
|
+
}
|
|
315
|
+
const credentials = await (0, codexOAuth_1.loginCodexOAuth)({
|
|
316
|
+
openUrl: (url) => {
|
|
317
|
+
process.stderr.write(`Open this URL in your browser:\n${url}\n`);
|
|
318
|
+
},
|
|
319
|
+
onPrompt: async (prompt) => await promptLine(prompt.message),
|
|
320
|
+
onProgress: (message) => process.stderr.write(`${message}\n`),
|
|
321
|
+
});
|
|
322
|
+
config.setCodexOAuth(credentials);
|
|
323
|
+
config.setAiProvider('codex');
|
|
324
|
+
process.stderr.write('Signed in with Codex OAuth and set aiProvider=codex.\n');
|
|
325
|
+
}
|
|
224
326
|
function handleConfig(subArgs) {
|
|
225
327
|
const dataPath = (0, dataPath_1.getDataPath)();
|
|
226
328
|
const config = new configService_1.ConfigService(dataPath);
|
|
@@ -558,9 +660,8 @@ async function handleMerge(args) {
|
|
|
558
660
|
}
|
|
559
661
|
const dataPath = (0, dataPath_1.getDataPath)();
|
|
560
662
|
const config = new configService_1.ConfigService(dataPath);
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
process.stderr.write('Error: Gemini API key not found. Set GEMINI_API_KEY env var or configure via the Listener.AI app.\n');
|
|
663
|
+
if (!config.hasAiAuth()) {
|
|
664
|
+
process.stderr.write(formatAiCredentialsError(config));
|
|
564
665
|
process.exit(1);
|
|
565
666
|
}
|
|
566
667
|
// Resolve every ref to a folder + audio path before doing any expensive work
|
|
@@ -598,13 +699,7 @@ async function handleMerge(args) {
|
|
|
598
699
|
outputPath: mergedAudioPath,
|
|
599
700
|
});
|
|
600
701
|
process.stderr.write(` -> ${mergedAudioPath}\n`);
|
|
601
|
-
const gemini =
|
|
602
|
-
apiKey,
|
|
603
|
-
dataPath,
|
|
604
|
-
knownWords: config.getKnownWords(),
|
|
605
|
-
proModel: config.getGeminiModel(),
|
|
606
|
-
flashModel: config.getGeminiFlashModel(),
|
|
607
|
-
});
|
|
702
|
+
const gemini = createTranscriptionService(config, dataPath);
|
|
608
703
|
process.stderr.write('Transcribing merged recording...\n');
|
|
609
704
|
const result = await gemini.transcribeAudio(mergedAudioPath, (_percent, message) => {
|
|
610
705
|
process.stderr.write(` ${message}\n`);
|
|
@@ -647,9 +742,8 @@ async function handleAsk(args) {
|
|
|
647
742
|
}
|
|
648
743
|
const dataPath = (0, dataPath_1.getDataPath)();
|
|
649
744
|
const config = new configService_1.ConfigService(dataPath);
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
process.stderr.write('Error: Gemini API key not found. Set GEMINI_API_KEY env var or configure via the Listener.AI app.\n');
|
|
745
|
+
if (!config.hasAiAuth()) {
|
|
746
|
+
process.stderr.write(formatAiCredentialsError(config));
|
|
653
747
|
process.exit(1);
|
|
654
748
|
}
|
|
655
749
|
let scope = { kind: 'all' };
|
|
@@ -657,7 +751,7 @@ async function handleAsk(args) {
|
|
|
657
751
|
const folderPath = await resolveRef(ref, dataPath);
|
|
658
752
|
scope = { kind: 'single', folderName: path.basename(folderPath) };
|
|
659
753
|
}
|
|
660
|
-
const agent =
|
|
754
|
+
const agent = createAgentService(config, dataPath);
|
|
661
755
|
const confirm = async (proposal) => {
|
|
662
756
|
process.stderr.write('\n');
|
|
663
757
|
return promptYesNo(`Proposed change -> ${proposal.description}\nApply?`);
|
|
@@ -712,10 +806,8 @@ async function handleTranscript(args) {
|
|
|
712
806
|
}
|
|
713
807
|
const dataPath = (0, dataPath_1.getDataPath)();
|
|
714
808
|
const config = new configService_1.ConfigService(dataPath);
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
process.stderr.write('Error: Gemini API key not found.\n' +
|
|
718
|
-
'Set GEMINI_API_KEY env var or configure via the Listener.AI app.\n');
|
|
809
|
+
if (!config.hasAiAuth()) {
|
|
810
|
+
process.stderr.write(formatAiCredentialsError(config));
|
|
719
811
|
process.exit(1);
|
|
720
812
|
}
|
|
721
813
|
// Resolve --output before the expensive transcription so we fail fast on a
|
|
@@ -743,13 +835,7 @@ async function handleTranscript(args) {
|
|
|
743
835
|
}
|
|
744
836
|
}
|
|
745
837
|
}
|
|
746
|
-
const gemini =
|
|
747
|
-
apiKey,
|
|
748
|
-
dataPath,
|
|
749
|
-
knownWords: config.getKnownWords(),
|
|
750
|
-
proModel: config.getGeminiModel(),
|
|
751
|
-
flashModel: config.getGeminiFlashModel(),
|
|
752
|
-
});
|
|
838
|
+
const gemini = createTranscriptionService(config, dataPath);
|
|
753
839
|
process.stderr.write(`Processing: ${filePath}\n`);
|
|
754
840
|
const result = await gemini.transcribeAudio(filePath, (_percent, message) => {
|
|
755
841
|
process.stderr.write(` ${message}\n`);
|
|
@@ -782,6 +868,10 @@ async function main() {
|
|
|
782
868
|
handleConfig(args.slice(1));
|
|
783
869
|
return;
|
|
784
870
|
}
|
|
871
|
+
if (args[0] === 'codex') {
|
|
872
|
+
await handleCodex(args.slice(1));
|
|
873
|
+
return;
|
|
874
|
+
}
|
|
785
875
|
if (args[0] === 'list') {
|
|
786
876
|
await handleList(args.slice(1));
|
|
787
877
|
return;
|
|
@@ -845,22 +935,13 @@ async function main() {
|
|
|
845
935
|
if (outputDir) {
|
|
846
936
|
outputDir = path.resolve(outputDir);
|
|
847
937
|
}
|
|
848
|
-
// Get API key
|
|
849
938
|
const dataPath = (0, dataPath_1.getDataPath)();
|
|
850
939
|
const config = new configService_1.ConfigService(dataPath);
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
process.stderr.write('Error: Gemini API key not found.\n' +
|
|
854
|
-
'Set GEMINI_API_KEY env var or configure via the Listener.AI app.\n');
|
|
940
|
+
if (!config.hasAiAuth()) {
|
|
941
|
+
process.stderr.write(formatAiCredentialsError(config));
|
|
855
942
|
process.exit(1);
|
|
856
943
|
}
|
|
857
|
-
const gemini =
|
|
858
|
-
apiKey,
|
|
859
|
-
dataPath,
|
|
860
|
-
knownWords: config.getKnownWords(),
|
|
861
|
-
proModel: config.getGeminiModel(),
|
|
862
|
-
flashModel: config.getGeminiFlashModel(),
|
|
863
|
-
});
|
|
944
|
+
const gemini = createTranscriptionService(config, dataPath);
|
|
864
945
|
process.stderr.write(`Processing: ${filePath}\n`);
|
|
865
946
|
const result = await gemini.transcribeAudio(filePath, (_percent, message) => {
|
|
866
947
|
process.stderr.write(` ${message}\n`);
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.getCodexOAuthEnvCredentials = getCodexOAuthEnvCredentials;
|
|
4
|
+
exports.hasCodexOAuthEnvCredentials = hasCodexOAuthEnvCredentials;
|
|
5
|
+
exports.resolveCodexAccessToken = resolveCodexAccessToken;
|
|
6
|
+
exports.requireCodexAccessToken = requireCodexAccessToken;
|
|
7
|
+
exports.loginCodexOAuth = loginCodexOAuth;
|
|
8
|
+
const esmImport_1 = require("./esmImport");
|
|
9
|
+
let runtimePromise;
|
|
10
|
+
async function loadCodexOAuthRuntime() {
|
|
11
|
+
runtimePromise ?? (runtimePromise = (0, esmImport_1.importEsm)('@earendil-works/pi-ai/oauth'));
|
|
12
|
+
return await runtimePromise;
|
|
13
|
+
}
|
|
14
|
+
function getCodexOAuthEnvCredentials() {
|
|
15
|
+
const access = process.env.CODEX_OAUTH_ACCESS_TOKEN?.trim() ||
|
|
16
|
+
process.env.OPENAI_CODEX_ACCESS_TOKEN?.trim() ||
|
|
17
|
+
'';
|
|
18
|
+
const refresh = process.env.CODEX_OAUTH_REFRESH_TOKEN?.trim() ||
|
|
19
|
+
process.env.OPENAI_CODEX_REFRESH_TOKEN?.trim() ||
|
|
20
|
+
'';
|
|
21
|
+
if (!access || !refresh)
|
|
22
|
+
return undefined;
|
|
23
|
+
const expiresRaw = process.env.CODEX_OAUTH_EXPIRES || process.env.OPENAI_CODEX_EXPIRES;
|
|
24
|
+
const expires = expiresRaw ? Number.parseInt(expiresRaw, 10) : Date.now() + 30 * 60000;
|
|
25
|
+
return {
|
|
26
|
+
access,
|
|
27
|
+
refresh,
|
|
28
|
+
expires: Number.isFinite(expires) ? expires : Date.now() + 30 * 60000,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
function hasCodexOAuthEnvCredentials() {
|
|
32
|
+
return !!getCodexOAuthEnvCredentials();
|
|
33
|
+
}
|
|
34
|
+
async function resolveCodexAccessToken(params) {
|
|
35
|
+
const credentials = params.credentials ?? getCodexOAuthEnvCredentials();
|
|
36
|
+
if (!credentials)
|
|
37
|
+
return undefined;
|
|
38
|
+
const { getOAuthApiKey } = await loadCodexOAuthRuntime();
|
|
39
|
+
const resolved = await getOAuthApiKey('openai-codex', { 'openai-codex': credentials });
|
|
40
|
+
if (!resolved)
|
|
41
|
+
return undefined;
|
|
42
|
+
const nextCredentials = resolved.newCredentials;
|
|
43
|
+
if (nextCredentials.access !== credentials.access ||
|
|
44
|
+
nextCredentials.refresh !== credentials.refresh ||
|
|
45
|
+
nextCredentials.expires !== credentials.expires) {
|
|
46
|
+
await params.onCredentialsChanged?.(nextCredentials);
|
|
47
|
+
}
|
|
48
|
+
return resolved.apiKey;
|
|
49
|
+
}
|
|
50
|
+
async function requireCodexAccessToken(params) {
|
|
51
|
+
const token = await resolveCodexAccessToken(params);
|
|
52
|
+
if (!token) {
|
|
53
|
+
throw new Error('Codex OAuth is not configured.');
|
|
54
|
+
}
|
|
55
|
+
return token;
|
|
56
|
+
}
|
|
57
|
+
async function loginCodexOAuth(params) {
|
|
58
|
+
const { loginOpenAICodex } = await loadCodexOAuthRuntime();
|
|
59
|
+
const credentials = await loginOpenAICodex({
|
|
60
|
+
originator: 'listener-ai',
|
|
61
|
+
onAuth: (info) => {
|
|
62
|
+
void params.openUrl(info.url);
|
|
63
|
+
},
|
|
64
|
+
onPrompt: params.onPrompt,
|
|
65
|
+
onProgress: params.onProgress,
|
|
66
|
+
});
|
|
67
|
+
return credentials;
|
|
68
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Encapsulates Codex OAuth credential state for services that need a fresh
|
|
3
|
+
// access token per request. Replaces the parallel `this.codexOAuth +
|
|
4
|
+
// this.onCodexOAuthUpdate + getToken()` fields that lived inside
|
|
5
|
+
// AgentService and GeminiService -- keeping them in sync was error-prone and
|
|
6
|
+
// the rotation invariant (caller persists when source is config, skips when
|
|
7
|
+
// source is env) is easy to get wrong if scattered.
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.CodexOAuthHolder = void 0;
|
|
10
|
+
const codexOAuth_1 = require("./codexOAuth");
|
|
11
|
+
class CodexOAuthHolder {
|
|
12
|
+
constructor(options) {
|
|
13
|
+
this.credentials = options.credentials;
|
|
14
|
+
this.onUpdate = options.onUpdate;
|
|
15
|
+
}
|
|
16
|
+
async getToken() {
|
|
17
|
+
return await (0, codexOAuth_1.requireCodexAccessToken)({
|
|
18
|
+
credentials: this.credentials,
|
|
19
|
+
onCredentialsChanged: async (next) => {
|
|
20
|
+
this.credentials = next;
|
|
21
|
+
await this.onUpdate?.(next);
|
|
22
|
+
},
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
exports.CodexOAuthHolder = CodexOAuthHolder;
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Minimal wrapper around OpenAI's `/v1/audio/transcriptions` endpoint.
|
|
3
|
+
//
|
|
4
|
+
// We keep this here (rather than going through pi-ai) because pi-ai is a
|
|
5
|
+
// chat/tool-call unified API -- it has no audio transcription surface. The
|
|
6
|
+
// Codex transcription flow needs only a multipart POST, so a thin direct
|
|
7
|
+
// fetch is simpler than wedging audio into pi-ai's chat model.
|
|
8
|
+
//
|
|
9
|
+
// Two output shapes, branched on model id:
|
|
10
|
+
// - `gpt-4o-transcribe-diarize` (default) returns `diarized_json` with
|
|
11
|
+
// speaker-labeled segments. We re-label "Speaker 0/1/..." onto the
|
|
12
|
+
// same `참가자N` convention the Gemini path uses so downstream code
|
|
13
|
+
// (summarization, transcript.md, Notion) doesn't have to care which
|
|
14
|
+
// transcription engine produced the text. This model rejects `prompt`,
|
|
15
|
+
// so user-supplied glossaries (`knownWords`) are dropped on this path.
|
|
16
|
+
// - `gpt-4o-transcribe` (and `whisper-1`) return `{text}` and accept
|
|
17
|
+
// `prompt` for vocabulary biasing, but produce no speaker labels.
|
|
18
|
+
//
|
|
19
|
+
// Format support: OpenAI accepts mp3, mp4, mpeg, mpga, m4a, wav, webm. Inputs
|
|
20
|
+
// outside that set are remuxed upstream in geminiService.ts via ffmpeg before
|
|
21
|
+
// reaching this helper.
|
|
22
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
23
|
+
if (k2 === undefined) k2 = k;
|
|
24
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
25
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
26
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
27
|
+
}
|
|
28
|
+
Object.defineProperty(o, k2, desc);
|
|
29
|
+
}) : (function(o, m, k, k2) {
|
|
30
|
+
if (k2 === undefined) k2 = k;
|
|
31
|
+
o[k2] = m[k];
|
|
32
|
+
}));
|
|
33
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
34
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
35
|
+
}) : function(o, v) {
|
|
36
|
+
o["default"] = v;
|
|
37
|
+
});
|
|
38
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
39
|
+
var ownKeys = function(o) {
|
|
40
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
41
|
+
var ar = [];
|
|
42
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
43
|
+
return ar;
|
|
44
|
+
};
|
|
45
|
+
return ownKeys(o);
|
|
46
|
+
};
|
|
47
|
+
return function (mod) {
|
|
48
|
+
if (mod && mod.__esModule) return mod;
|
|
49
|
+
var result = {};
|
|
50
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
51
|
+
__setModuleDefault(result, mod);
|
|
52
|
+
return result;
|
|
53
|
+
};
|
|
54
|
+
})();
|
|
55
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
56
|
+
exports.OPENAI_TRANSCRIPTION_EXTENSIONS = void 0;
|
|
57
|
+
exports.isDiarizeModel = isDiarizeModel;
|
|
58
|
+
exports.transcribeCodexAudio = transcribeCodexAudio;
|
|
59
|
+
exports.formatDiarizedSegments = formatDiarizedSegments;
|
|
60
|
+
const fs = __importStar(require("fs"));
|
|
61
|
+
const path = __importStar(require("path"));
|
|
62
|
+
const audioFormats_1 = require("./audioFormats");
|
|
63
|
+
const OPENAI_API_BASE_URL = 'https://api.openai.com/v1';
|
|
64
|
+
const DIARIZE_MODEL_ID = 'gpt-4o-transcribe-diarize';
|
|
65
|
+
exports.OPENAI_TRANSCRIPTION_EXTENSIONS = new Set([
|
|
66
|
+
'.mp3',
|
|
67
|
+
'.mp4',
|
|
68
|
+
'.mpeg',
|
|
69
|
+
'.mpga',
|
|
70
|
+
'.m4a',
|
|
71
|
+
'.wav',
|
|
72
|
+
'.webm',
|
|
73
|
+
]);
|
|
74
|
+
function isDiarizeModel(model) {
|
|
75
|
+
return model.trim() === DIARIZE_MODEL_ID;
|
|
76
|
+
}
|
|
77
|
+
async function transcribeCodexAudio(params) {
|
|
78
|
+
const audioData = fs.readFileSync(params.audioFilePath);
|
|
79
|
+
const ext = path.extname(params.audioFilePath);
|
|
80
|
+
const model = params.model.trim();
|
|
81
|
+
const diarize = isDiarizeModel(model);
|
|
82
|
+
const form = new FormData();
|
|
83
|
+
form.append('model', model);
|
|
84
|
+
if (params.language) {
|
|
85
|
+
form.append('language', params.language);
|
|
86
|
+
}
|
|
87
|
+
if (diarize) {
|
|
88
|
+
// Required for the diarize model. `chunking_strategy=auto` lets OpenAI
|
|
89
|
+
// split long audio internally while keeping speaker identity coherent
|
|
90
|
+
// across chunks -- so we can hand it a whole 50-minute meeting (subject
|
|
91
|
+
// to the 25MB file-size limit upstream).
|
|
92
|
+
form.append('response_format', 'diarized_json');
|
|
93
|
+
form.append('chunking_strategy', 'auto');
|
|
94
|
+
}
|
|
95
|
+
else if (params.prompt?.trim()) {
|
|
96
|
+
form.append('prompt', params.prompt.trim());
|
|
97
|
+
}
|
|
98
|
+
form.append('file', new Blob([audioData], { type: (0, audioFormats_1.mimeTypeForExtension)(ext) }), path.basename(params.audioFilePath));
|
|
99
|
+
const sizeMB = (audioData.byteLength / (1024 * 1024)).toFixed(2);
|
|
100
|
+
const startedAt = Date.now();
|
|
101
|
+
console.log(`[codex-transcribe] -> ${path.basename(params.audioFilePath)} ${sizeMB}MB model=${model}${diarize ? ' diarize=true' : params.prompt ? ` prompt=${params.prompt.length}chars` : ''}${params.language ? ` lang=${params.language}` : ''}`);
|
|
102
|
+
const response = await fetch(`${OPENAI_API_BASE_URL}/audio/transcriptions`, {
|
|
103
|
+
method: 'POST',
|
|
104
|
+
headers: { Authorization: `Bearer ${await params.getToken()}` },
|
|
105
|
+
body: form,
|
|
106
|
+
});
|
|
107
|
+
const elapsed = Date.now() - startedAt;
|
|
108
|
+
console.log(`[codex-transcribe] <- ${elapsed}ms status=${response.status} ${response.statusText}`);
|
|
109
|
+
if (!response.ok) {
|
|
110
|
+
// Truncate the error body so a verbose upstream response doesn't leak
|
|
111
|
+
// headers/debug payload into logs and IPC error strings.
|
|
112
|
+
const body = await response.text().catch(() => '');
|
|
113
|
+
const trimmed = body.length > 500 ? `${body.slice(0, 500)}...` : body;
|
|
114
|
+
throw new Error(`OpenAI transcription failed (${response.status} ${response.statusText})${trimmed ? `: ${trimmed}` : ''}`);
|
|
115
|
+
}
|
|
116
|
+
if (diarize) {
|
|
117
|
+
const payload = (await response.json());
|
|
118
|
+
return formatDiarizedSegments(payload.segments);
|
|
119
|
+
}
|
|
120
|
+
const payload = (await response.json());
|
|
121
|
+
if (typeof payload.text !== 'string' || payload.text.trim().length === 0) {
|
|
122
|
+
throw new Error('OpenAI transcription response missing text');
|
|
123
|
+
}
|
|
124
|
+
return payload.text;
|
|
125
|
+
}
|
|
126
|
+
// Re-label OpenAI's raw speaker ids ("Speaker 0", "Speaker 1", or the names
|
|
127
|
+
// supplied via `known_speaker_names[]` if used) onto our `참가자N` convention,
|
|
128
|
+
// matching the format Gemini emits when prompted for speaker labels. Empty
|
|
129
|
+
// segments are dropped; consecutive segments from the same speaker are merged
|
|
130
|
+
// onto a single line so downstream consumers don't see one speaker split into
|
|
131
|
+
// 30+ "참가자1: ..." stubs.
|
|
132
|
+
function formatDiarizedSegments(segments) {
|
|
133
|
+
if (!segments || segments.length === 0) {
|
|
134
|
+
throw new Error('OpenAI diarized transcription returned no segments');
|
|
135
|
+
}
|
|
136
|
+
const speakerIdx = new Map();
|
|
137
|
+
let nextIdx = 1;
|
|
138
|
+
const lines = [];
|
|
139
|
+
let activeLabel;
|
|
140
|
+
let activeBuffer = '';
|
|
141
|
+
for (const seg of segments) {
|
|
142
|
+
const text = (seg.text ?? '').trim();
|
|
143
|
+
if (!text)
|
|
144
|
+
continue;
|
|
145
|
+
const rawSpeaker = seg.speaker ?? 'unknown';
|
|
146
|
+
let idx = speakerIdx.get(rawSpeaker);
|
|
147
|
+
if (idx === undefined) {
|
|
148
|
+
idx = nextIdx++;
|
|
149
|
+
speakerIdx.set(rawSpeaker, idx);
|
|
150
|
+
}
|
|
151
|
+
const label = `참가자${idx}`;
|
|
152
|
+
if (label === activeLabel) {
|
|
153
|
+
activeBuffer += ` ${text}`;
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
156
|
+
if (activeLabel !== undefined)
|
|
157
|
+
lines.push(`${activeLabel}: ${activeBuffer}`);
|
|
158
|
+
activeLabel = label;
|
|
159
|
+
activeBuffer = text;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
if (activeLabel !== undefined)
|
|
163
|
+
lines.push(`${activeLabel}: ${activeBuffer}`);
|
|
164
|
+
if (lines.length === 0) {
|
|
165
|
+
throw new Error('OpenAI diarized transcription had segments but no usable text');
|
|
166
|
+
}
|
|
167
|
+
return lines.join('\n\n');
|
|
168
|
+
}
|