listener-ai 2.6.0 → 2.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -22
- package/THIRD_PARTY_NOTICES.md +27 -0
- package/dist/agentService.js +142 -119
- package/dist/aiProvider.js +35 -0
- package/dist/cli.js +119 -38
- package/dist/codexOAuth.js +68 -0
- package/dist/codexOAuthHolder.js +26 -0
- package/dist/codexTranscription.js +168 -0
- package/dist/configService.js +171 -25
- package/dist/dataPath.js +30 -10
- package/dist/esmImport.js +15 -0
- package/dist/geminiService.js +203 -39
- package/dist/main.js +84 -17
- package/dist/piAiClient.js +102 -0
- package/package.json +13 -4
package/dist/geminiService.js
CHANGED
|
@@ -39,8 +39,12 @@ const path = __importStar(require("path"));
|
|
|
39
39
|
const child_process_1 = require("child_process");
|
|
40
40
|
const util_1 = require("util");
|
|
41
41
|
const genai_1 = require("@google/genai");
|
|
42
|
+
const aiProvider_1 = require("./aiProvider");
|
|
42
43
|
const audioFormats_1 = require("./audioFormats");
|
|
44
|
+
const codexOAuthHolder_1 = require("./codexOAuthHolder");
|
|
45
|
+
const codexTranscription_1 = require("./codexTranscription");
|
|
43
46
|
const outputService_1 = require("./outputService");
|
|
47
|
+
const piAiClient_1 = require("./piAiClient");
|
|
44
48
|
const ffmpegManager_1 = require("./services/ffmpegManager");
|
|
45
49
|
const execFileAsync = (0, util_1.promisify)(child_process_1.execFile);
|
|
46
50
|
// Append a section to the summary prompt instructing Gemini to enrich each
|
|
@@ -145,12 +149,99 @@ class GeminiService {
|
|
|
145
149
|
return process.platform === 'win32' ? 'ffmpeg.exe' : 'ffmpeg';
|
|
146
150
|
}
|
|
147
151
|
constructor(options) {
|
|
148
|
-
this.
|
|
149
|
-
this.
|
|
152
|
+
this.provider = options.provider ?? 'gemini';
|
|
153
|
+
if (this.provider === 'gemini') {
|
|
154
|
+
if (!options.apiKey) {
|
|
155
|
+
throw new Error('Gemini API key is required for the Gemini provider.');
|
|
156
|
+
}
|
|
157
|
+
this.ai = new genai_1.GoogleGenAI({ apiKey: options.apiKey });
|
|
158
|
+
this.geminiApiKey = options.apiKey;
|
|
159
|
+
}
|
|
160
|
+
else {
|
|
161
|
+
this.codexAuth = new codexOAuthHolder_1.CodexOAuthHolder({
|
|
162
|
+
credentials: options.codexOAuth,
|
|
163
|
+
onUpdate: options.onCodexOAuthUpdate,
|
|
164
|
+
});
|
|
165
|
+
}
|
|
150
166
|
this.ffmpegManager = new ffmpegManager_1.FFmpegManager(options.dataPath);
|
|
151
167
|
this.knownWords = options.knownWords || [];
|
|
152
168
|
this.proModel = options.proModel;
|
|
153
169
|
this.flashModel = options.flashModel;
|
|
170
|
+
this.codexModel = options.codexModel || aiProvider_1.DEFAULT_CODEX_MODEL;
|
|
171
|
+
this.codexTranscriptionModel =
|
|
172
|
+
options.codexTranscriptionModel || aiProvider_1.DEFAULT_CODEX_TRANSCRIPTION_MODEL;
|
|
173
|
+
}
|
|
174
|
+
gemini() {
|
|
175
|
+
if (!this.ai) {
|
|
176
|
+
throw new Error('Gemini client is not configured for the selected AI provider.');
|
|
177
|
+
}
|
|
178
|
+
return this.ai;
|
|
179
|
+
}
|
|
180
|
+
async getCodexToken() {
|
|
181
|
+
if (!this.codexAuth) {
|
|
182
|
+
throw new Error('Codex OAuth holder is not configured.');
|
|
183
|
+
}
|
|
184
|
+
return await this.codexAuth.getToken();
|
|
185
|
+
}
|
|
186
|
+
requireGeminiApiKey() {
|
|
187
|
+
if (!this.geminiApiKey) {
|
|
188
|
+
throw new Error('Gemini API key is not configured.');
|
|
189
|
+
}
|
|
190
|
+
return this.geminiApiKey;
|
|
191
|
+
}
|
|
192
|
+
// Pi-ai's GoogleOptions doesn't expose Gemini's `responseMimeType=application/json`
|
|
193
|
+
// knob, so models may wrap the JSON in ```json``` fences. The summary-text
|
|
194
|
+
// consumer strips fences before parsing (see stripJsonFences in
|
|
195
|
+
// transcribeWithTwoSteps).
|
|
196
|
+
async generateSummary(promptText, transcript) {
|
|
197
|
+
const modelId = this.provider === 'codex' ? this.codexModel : this.proModel;
|
|
198
|
+
const apiKey = this.provider === 'codex' ? await this.getCodexToken() : this.requireGeminiApiKey();
|
|
199
|
+
const model = await (0, piAiClient_1.getModel)(this.provider, modelId);
|
|
200
|
+
const context = {
|
|
201
|
+
messages: [
|
|
202
|
+
{
|
|
203
|
+
role: 'user',
|
|
204
|
+
content: `${promptText}\n\nTranscript:\n${transcript}`,
|
|
205
|
+
timestamp: Date.now(),
|
|
206
|
+
},
|
|
207
|
+
],
|
|
208
|
+
};
|
|
209
|
+
const response = await (0, piAiClient_1.complete)(model, context, {
|
|
210
|
+
apiKey,
|
|
211
|
+
temperature: 0.2,
|
|
212
|
+
maxTokens: 32768,
|
|
213
|
+
});
|
|
214
|
+
return (0, piAiClient_1.extractFinalText)(response);
|
|
215
|
+
}
|
|
216
|
+
async prepareAudioForProvider(audioFilePath) {
|
|
217
|
+
if (this.provider !== 'codex')
|
|
218
|
+
return { audioFilePath };
|
|
219
|
+
const ext = path.extname(audioFilePath).toLowerCase();
|
|
220
|
+
if (codexTranscription_1.OPENAI_TRANSCRIPTION_EXTENSIONS.has(ext))
|
|
221
|
+
return { audioFilePath };
|
|
222
|
+
const outputPath = path.join(path.dirname(audioFilePath), `${path.basename(audioFilePath, ext)}_codex_${Date.now()}.webm`);
|
|
223
|
+
const ffmpegPath = await this.getFFmpegPath();
|
|
224
|
+
await execFileAsync(ffmpegPath, [
|
|
225
|
+
'-i',
|
|
226
|
+
audioFilePath,
|
|
227
|
+
'-vn',
|
|
228
|
+
'-c:a',
|
|
229
|
+
'libopus',
|
|
230
|
+
'-b:a',
|
|
231
|
+
'48k',
|
|
232
|
+
outputPath,
|
|
233
|
+
]);
|
|
234
|
+
return {
|
|
235
|
+
audioFilePath: outputPath,
|
|
236
|
+
cleanup: () => {
|
|
237
|
+
try {
|
|
238
|
+
fs.unlinkSync(outputPath);
|
|
239
|
+
}
|
|
240
|
+
catch {
|
|
241
|
+
/* ignore */
|
|
242
|
+
}
|
|
243
|
+
},
|
|
244
|
+
};
|
|
154
245
|
}
|
|
155
246
|
buildGlossaryBlock() {
|
|
156
247
|
if (this.knownWords.length === 0)
|
|
@@ -178,16 +269,17 @@ class GeminiService {
|
|
|
178
269
|
suggestedTitle: 'Stubbed Title',
|
|
179
270
|
};
|
|
180
271
|
}
|
|
272
|
+
const prepared = await this.prepareAudioForProvider(audioFilePath);
|
|
181
273
|
try {
|
|
182
274
|
// Check file size
|
|
183
|
-
const stats = fs.statSync(audioFilePath);
|
|
275
|
+
const stats = fs.statSync(prepared.audioFilePath);
|
|
184
276
|
const fileSizeInMB = stats.size / (1024 * 1024);
|
|
185
277
|
console.error(`Audio file size: ${fileSizeInMB.toFixed(2)} MB`);
|
|
186
278
|
if (progressCallback) {
|
|
187
279
|
progressCallback(15, `Processing ${fileSizeInMB.toFixed(1)} MB audio file...`);
|
|
188
280
|
}
|
|
189
281
|
// Get audio duration using ffmpeg
|
|
190
|
-
const duration = await this.getAudioDuration(audioFilePath);
|
|
282
|
+
const duration = await this.getAudioDuration(prepared.audioFilePath);
|
|
191
283
|
console.error(`Audio duration: ${duration} seconds`);
|
|
192
284
|
// If duration is 0, log a warning but continue processing
|
|
193
285
|
if (duration === 0) {
|
|
@@ -195,14 +287,16 @@ class GeminiService {
|
|
|
195
287
|
}
|
|
196
288
|
// Always use the two-step approach for consistency
|
|
197
289
|
console.error('Using two-step transcription approach...');
|
|
198
|
-
return await this.transcribeWithTwoSteps(audioFilePath, duration, progressCallback, summaryPrompt, liveNotes, options);
|
|
290
|
+
return await this.transcribeWithTwoSteps(prepared.audioFilePath, duration, progressCallback, summaryPrompt, liveNotes, options);
|
|
199
291
|
}
|
|
200
292
|
catch (error) {
|
|
201
293
|
console.error('Error transcribing audio:', error);
|
|
202
294
|
// Provide more specific error messages
|
|
203
295
|
if (error instanceof Error) {
|
|
204
296
|
if (error.message.includes('API key')) {
|
|
205
|
-
throw new Error(
|
|
297
|
+
throw new Error(this.provider === 'codex'
|
|
298
|
+
? 'Invalid Codex OAuth token. Please sign in again.'
|
|
299
|
+
: 'Invalid API key. Please check your Gemini API key configuration.');
|
|
206
300
|
}
|
|
207
301
|
else if (error.message.includes('quota')) {
|
|
208
302
|
throw new Error('API quota exceeded. Please try again later.');
|
|
@@ -213,6 +307,9 @@ class GeminiService {
|
|
|
213
307
|
}
|
|
214
308
|
throw new Error(`Failed to transcribe audio: ${error instanceof Error ? error.message : String(error)}`);
|
|
215
309
|
}
|
|
310
|
+
finally {
|
|
311
|
+
prepared.cleanup?.();
|
|
312
|
+
}
|
|
216
313
|
}
|
|
217
314
|
// Get audio duration using ffmpeg
|
|
218
315
|
async getAudioDuration(audioFilePath) {
|
|
@@ -264,15 +361,35 @@ class GeminiService {
|
|
|
264
361
|
}
|
|
265
362
|
}
|
|
266
363
|
// Split audio file into segments
|
|
267
|
-
async splitAudioIntoSegments(audioFilePath, segmentDuration = 300
|
|
364
|
+
async splitAudioIntoSegments(audioFilePath, segmentDuration = 300,
|
|
365
|
+
// re-encode segments instead of `-c copy`. ffmpeg's segment muxer can
|
|
366
|
+
// only cut at keyframes when copying, and webm-opus has near-zero
|
|
367
|
+
// keyframes by default -- so `-c copy -segment_time 300` silently
|
|
368
|
+
// produces 30+ minute segments that blow past gpt-4o-transcribe's
|
|
369
|
+
// 1400-second per-request limit. Caller passes `reencode: true` for
|
|
370
|
+
// the Codex transcription path; Gemini's API is tolerant of long
|
|
371
|
+
// inputs and stays on the faster `-c copy` path.
|
|
372
|
+
reencode = false) {
|
|
268
373
|
const outputDir = path.dirname(audioFilePath);
|
|
269
374
|
const baseName = path.basename(audioFilePath, path.extname(audioFilePath));
|
|
270
375
|
const ext = path.extname(audioFilePath);
|
|
271
|
-
|
|
376
|
+
// When re-encoding to opus we MUST force a container that supports
|
|
377
|
+
// opus -- ffmpeg picks the muxer from the output extension, so leaving
|
|
378
|
+
// an imported `.mp3`/`.m4a`/`.wav` source as `.mp3` makes ffmpeg pick
|
|
379
|
+
// the MP3 muxer and reject the opus stream. `.webm` is in OpenAI's
|
|
380
|
+
// supported transcription extensions, so the segments still upload.
|
|
381
|
+
const segmentExt = reencode ? '.webm' : ext;
|
|
382
|
+
const segmentPath = path.join(outputDir, `${baseName}_segment_%03d${segmentExt}`);
|
|
272
383
|
// Get the bundled FFmpeg path
|
|
273
384
|
const ffmpegPath = await this.getFFmpegPath();
|
|
274
385
|
try {
|
|
275
|
-
|
|
386
|
+
const codecArgs = reencode ? ['-c:a', 'libopus', '-b:a', '48k'] : ['-c', 'copy'];
|
|
387
|
+
// Split audio into segments. `-reset_timestamps 1` makes each segment
|
|
388
|
+
// start at PTS 0 and gives it its own container duration. Without it,
|
|
389
|
+
// webm output keeps the source file's total duration in the header --
|
|
390
|
+
// and OpenAI rejects the request based on the header value even when
|
|
391
|
+
// the actual encoded audio is short (`audio duration N seconds is
|
|
392
|
+
// longer than 1400` errors on small last-segment files).
|
|
276
393
|
await execFileAsync(ffmpegPath, [
|
|
277
394
|
'-i',
|
|
278
395
|
audioFilePath,
|
|
@@ -280,14 +397,17 @@ class GeminiService {
|
|
|
280
397
|
'segment',
|
|
281
398
|
'-segment_time',
|
|
282
399
|
String(segmentDuration),
|
|
283
|
-
'-
|
|
284
|
-
'
|
|
400
|
+
'-reset_timestamps',
|
|
401
|
+
'1',
|
|
402
|
+
...codecArgs,
|
|
285
403
|
segmentPath,
|
|
286
404
|
]);
|
|
287
|
-
// Find all created segment files
|
|
405
|
+
// Find all created segment files. Match on the EXTENSION WE TOLD
|
|
406
|
+
// FFMPEG TO WRITE -- when re-encoding, that's `.webm` regardless of
|
|
407
|
+
// the source's original extension.
|
|
288
408
|
const segmentFiles = fs
|
|
289
409
|
.readdirSync(outputDir)
|
|
290
|
-
.filter((file) => file.startsWith(`${baseName}_segment_`) && file.endsWith(
|
|
410
|
+
.filter((file) => file.startsWith(`${baseName}_segment_`) && file.endsWith(segmentExt))
|
|
291
411
|
.map((file) => path.join(outputDir, file))
|
|
292
412
|
.sort();
|
|
293
413
|
console.error(`Split audio into ${segmentFiles.length} segments`);
|
|
@@ -339,11 +459,24 @@ class GeminiService {
|
|
|
339
459
|
async transcribeWithTwoSteps(audioFilePath, duration, progressCallback, customSummaryPrompt, liveNotes, options = {}) {
|
|
340
460
|
try {
|
|
341
461
|
let fullTranscript = '';
|
|
462
|
+
const stats = fs.statSync(audioFilePath);
|
|
463
|
+
const fileSizeInMB = stats.size / (1024 * 1024);
|
|
464
|
+
// Segment intentionally for parallelism: even when the API would
|
|
465
|
+
// accept the whole file (Gemini long-context, gpt-4o-transcribe-diarize
|
|
466
|
+
// via chunking_strategy=auto), N parallel 5-min requests finish much
|
|
467
|
+
// faster than one big sequential pass. Trade-off for the diarize
|
|
468
|
+
// model: speaker IDs are mapped fresh per segment ("Speaker 0" in
|
|
469
|
+
// segment 1 may not be the same physical person as "Speaker 0" in
|
|
470
|
+
// segment 2). See docs/model-pricing.md.
|
|
471
|
+
const shouldSegment = duration > 300 || (this.provider === 'codex' && fileSizeInMB > 24);
|
|
472
|
+
const segmentDuration = this.provider === 'codex' && duration > 0 && fileSizeInMB > 20
|
|
473
|
+
? Math.max(30, Math.min(300, Math.floor((20 / fileSizeInMB) * duration)))
|
|
474
|
+
: 300;
|
|
342
475
|
// Step 1: Get transcript
|
|
343
|
-
if (
|
|
476
|
+
if (shouldSegment) {
|
|
344
477
|
// Use segmented approach for long audio
|
|
345
|
-
console.error('
|
|
346
|
-
fullTranscript = await this.getSegmentedTranscript(audioFilePath, duration, progressCallback, options.transcriptionPrompt);
|
|
478
|
+
console.error('Using segmented transcription...');
|
|
479
|
+
fullTranscript = await this.getSegmentedTranscript(audioFilePath, duration, progressCallback, options.transcriptionPrompt, segmentDuration);
|
|
347
480
|
}
|
|
348
481
|
else {
|
|
349
482
|
// Get transcript for short audio
|
|
@@ -380,16 +513,7 @@ Return as JSON:
|
|
|
380
513
|
const enrichableNotes = (liveNotes ?? []).filter((n) => (n.text ?? '').trim().length > 0);
|
|
381
514
|
const highlightsBlock = buildHighlightsPromptBlock(enrichableNotes);
|
|
382
515
|
const summaryPrompt = highlightsBlock ? `${basePrompt}\n\n${highlightsBlock}` : basePrompt;
|
|
383
|
-
const
|
|
384
|
-
model: this.proModel,
|
|
385
|
-
contents: [{ role: 'user', parts: [{ text: summaryPrompt }, { text: fullTranscript }] }],
|
|
386
|
-
config: {
|
|
387
|
-
temperature: 0.2,
|
|
388
|
-
maxOutputTokens: 32768,
|
|
389
|
-
responseMimeType: 'application/json',
|
|
390
|
-
},
|
|
391
|
-
});
|
|
392
|
-
const summaryText = summaryResult.text || '';
|
|
516
|
+
const summaryText = await this.generateSummary(summaryPrompt, fullTranscript);
|
|
393
517
|
let summaryData = {
|
|
394
518
|
suggestedTitle: '',
|
|
395
519
|
summary: '',
|
|
@@ -407,8 +531,16 @@ Return as JSON:
|
|
|
407
531
|
]);
|
|
408
532
|
const customFields = {};
|
|
409
533
|
let rawHighlights;
|
|
534
|
+
// Pi-ai's unified API doesn't pass through Gemini's responseMimeType
|
|
535
|
+
// knob, so models can wrap the JSON in ```json``` fences or add leading
|
|
536
|
+
// chatter. Strip a single fenced block if present, otherwise feed the
|
|
537
|
+
// raw text to JSON.parse and fall back to a regex extract.
|
|
538
|
+
const stripJsonFences = (text) => {
|
|
539
|
+
const fenced = text.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
540
|
+
return fenced ? fenced[1].trim() : text.trim();
|
|
541
|
+
};
|
|
410
542
|
try {
|
|
411
|
-
const parsed = JSON.parse(summaryText);
|
|
543
|
+
const parsed = JSON.parse(stripJsonFences(summaryText));
|
|
412
544
|
summaryData = parsed;
|
|
413
545
|
rawHighlights = parsed.highlights;
|
|
414
546
|
// Extract custom fields (any keys not in the known set)
|
|
@@ -454,6 +586,23 @@ Return as JSON:
|
|
|
454
586
|
if (progressCallback) {
|
|
455
587
|
progressCallback(20, 'Processing audio file...');
|
|
456
588
|
}
|
|
589
|
+
const transcriptPrompt = `${this.buildGlossaryBlock()}${customPrompt ?? DEFAULT_TRANSCRIPT_PROMPT}`;
|
|
590
|
+
if (this.provider === 'codex') {
|
|
591
|
+
return await (0, codexTranscription_1.transcribeCodexAudio)({
|
|
592
|
+
getToken: () => this.getCodexToken(),
|
|
593
|
+
audioFilePath,
|
|
594
|
+
model: this.codexTranscriptionModel,
|
|
595
|
+
// `prompt` is dropped inside transcribeCodexAudio when the
|
|
596
|
+
// diarize model is active. Keep passing it -- the helper picks
|
|
597
|
+
// the right shape per model.
|
|
598
|
+
prompt: transcriptPrompt,
|
|
599
|
+
// Intentionally NOT passing `language: 'ko'`. Whisper-derived
|
|
600
|
+
// transcription auto-detects from the first ~30s, which handles
|
|
601
|
+
// bilingual/code-switched meetings (Korean primary, English
|
|
602
|
+
// acronyms/quotes) better than forcing a single language.
|
|
603
|
+
});
|
|
604
|
+
}
|
|
605
|
+
const ai = this.gemini();
|
|
457
606
|
// Use Files API for files over 20MB
|
|
458
607
|
let fileUri = null;
|
|
459
608
|
if (fileSizeInMB > 20) {
|
|
@@ -463,17 +612,17 @@ Return as JSON:
|
|
|
463
612
|
}
|
|
464
613
|
const mimeType = (0, audioFormats_1.mimeTypeForExtension)(path.extname(audioFilePath));
|
|
465
614
|
const fileData = fs.readFileSync(audioFilePath);
|
|
466
|
-
const uploadResult = await
|
|
615
|
+
const uploadResult = await ai.files.upload({
|
|
467
616
|
file: new Blob([fileData], { type: mimeType }),
|
|
468
617
|
});
|
|
469
618
|
fileUri = uploadResult.uri || '';
|
|
470
619
|
// Wait for file to be active
|
|
471
|
-
let file = await
|
|
620
|
+
let file = await ai.files.get({ name: uploadResult.name || '' });
|
|
472
621
|
let retries = 0;
|
|
473
622
|
while (file.state === 'PROCESSING' && retries < 30) {
|
|
474
623
|
console.error(`Waiting for file to be processed... (attempt ${retries + 1}/30)`);
|
|
475
624
|
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
476
|
-
file = await
|
|
625
|
+
file = await ai.files.get({ name: uploadResult.name || '' });
|
|
477
626
|
retries++;
|
|
478
627
|
}
|
|
479
628
|
if (file.state !== 'ACTIVE') {
|
|
@@ -483,11 +632,10 @@ Return as JSON:
|
|
|
483
632
|
if (progressCallback) {
|
|
484
633
|
progressCallback(50, 'Transcribing audio...');
|
|
485
634
|
}
|
|
486
|
-
const transcriptPrompt = `${this.buildGlossaryBlock()}${customPrompt ?? DEFAULT_TRANSCRIPT_PROMPT}`;
|
|
487
635
|
let result;
|
|
488
636
|
if (fileUri) {
|
|
489
637
|
const mimeType = (0, audioFormats_1.mimeTypeForExtension)(path.extname(audioFilePath));
|
|
490
|
-
result = await
|
|
638
|
+
result = await ai.models.generateContent({
|
|
491
639
|
model: this.flashModel,
|
|
492
640
|
contents: [
|
|
493
641
|
{
|
|
@@ -513,7 +661,7 @@ Return as JSON:
|
|
|
513
661
|
const audioData = fs.readFileSync(audioFilePath);
|
|
514
662
|
const base64Audio = audioData.toString('base64');
|
|
515
663
|
const mimeType = (0, audioFormats_1.mimeTypeForExtension)(path.extname(audioFilePath));
|
|
516
|
-
result = await
|
|
664
|
+
result = await ai.models.generateContent({
|
|
517
665
|
model: this.flashModel,
|
|
518
666
|
contents: [
|
|
519
667
|
{
|
|
@@ -567,10 +715,23 @@ Return as JSON:
|
|
|
567
715
|
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
568
716
|
try {
|
|
569
717
|
console.error(`Starting transcription for segment ${segmentIndex + 1}/${totalSegments} (attempt ${attempt}/${maxRetries})...`);
|
|
718
|
+
if (this.provider === 'codex') {
|
|
719
|
+
const transcript = await (0, codexTranscription_1.transcribeCodexAudio)({
|
|
720
|
+
getToken: () => this.getCodexToken(),
|
|
721
|
+
audioFilePath: segmentFile,
|
|
722
|
+
model: this.codexTranscriptionModel,
|
|
723
|
+
prompt: segmentPrompt,
|
|
724
|
+
});
|
|
725
|
+
console.error(`Completed transcription for segment ${segmentIndex + 1}/${totalSegments}`);
|
|
726
|
+
return {
|
|
727
|
+
index: segmentIndex,
|
|
728
|
+
content: this.createSegmentHeader(segmentIndex, segmentStartTime, segmentEndTime) + transcript,
|
|
729
|
+
};
|
|
730
|
+
}
|
|
570
731
|
const audioData = fs.readFileSync(segmentFile);
|
|
571
732
|
const base64Audio = audioData.toString('base64');
|
|
572
733
|
const mimeType = (0, audioFormats_1.mimeTypeForExtension)(path.extname(segmentFile));
|
|
573
|
-
const result = await this.
|
|
734
|
+
const result = await this.gemini().models.generateContent({
|
|
574
735
|
model: this.flashModel,
|
|
575
736
|
contents: [
|
|
576
737
|
{
|
|
@@ -619,17 +780,20 @@ Return as JSON:
|
|
|
619
780
|
};
|
|
620
781
|
}
|
|
621
782
|
// Get segmented transcript (renamed from transcribeAudioSegmented)
|
|
622
|
-
async getSegmentedTranscript(audioFilePath, duration, progressCallback, customPrompt) {
|
|
783
|
+
async getSegmentedTranscript(audioFilePath, duration, progressCallback, customPrompt, segmentDuration = 300) {
|
|
623
784
|
try {
|
|
624
|
-
// Split audio into 5-minute segments
|
|
625
|
-
|
|
785
|
+
// Split audio into 5-minute segments. Codex transcription requires
|
|
786
|
+
// accurate cut times (gpt-4o-transcribe rejects >1400s/segment), so
|
|
787
|
+
// force re-encode there; Gemini's API tolerates long inputs and we
|
|
788
|
+
// keep the cheaper `-c copy` path for it.
|
|
789
|
+
const segmentFiles = await this.splitAudioIntoSegments(audioFilePath, segmentDuration, this.provider === 'codex');
|
|
626
790
|
if (progressCallback) {
|
|
627
791
|
progressCallback(20, `Processing ${segmentFiles.length} segments...`);
|
|
628
792
|
}
|
|
629
793
|
// Create promises for all segment transcriptions
|
|
630
794
|
const transcriptionPromises = segmentFiles.map(async (segmentFile, i) => {
|
|
631
|
-
const segmentStartTime = i *
|
|
632
|
-
const segmentEndTime = Math.min(segmentStartTime +
|
|
795
|
+
const segmentStartTime = i * segmentDuration;
|
|
796
|
+
const segmentEndTime = Math.min(segmentStartTime + segmentDuration, duration);
|
|
633
797
|
return this.transcribeSingleSegment(segmentFile, i, segmentFiles.length, segmentStartTime, segmentEndTime, customPrompt);
|
|
634
798
|
});
|
|
635
799
|
// Track progress of concurrent transcriptions
|
package/dist/main.js
CHANGED
|
@@ -42,6 +42,8 @@ const electron_1 = require("electron");
|
|
|
42
42
|
const agentService_1 = require("./agentService");
|
|
43
43
|
const audioFormats_1 = require("./audioFormats");
|
|
44
44
|
const configService_1 = require("./configService");
|
|
45
|
+
const codexOAuth_1 = require("./codexOAuth");
|
|
46
|
+
const dataPath_1 = require("./dataPath");
|
|
45
47
|
const displayDetectorService_1 = require("./displayDetectorService");
|
|
46
48
|
const geminiService_1 = require("./geminiService");
|
|
47
49
|
const meetingDetectorService_1 = require("./meetingDetectorService");
|
|
@@ -69,6 +71,7 @@ if (process.platform === 'darwin') {
|
|
|
69
71
|
electron_1.app.commandLine.appendSwitch('enable-features', 'MacSckSystemAudioLoopbackCapture,MacCatapSystemAudioLoopbackCapture');
|
|
70
72
|
}
|
|
71
73
|
global.isQuitting = false;
|
|
74
|
+
electron_1.app.setPath('userData', (0, dataPath_1.getDataPath)());
|
|
72
75
|
let mainWindow = null;
|
|
73
76
|
const audioRecorder = new simpleAudioRecorder_1.SimpleAudioRecorder();
|
|
74
77
|
const systemAudioService = new systemAudioService_1.SystemAudioService();
|
|
@@ -83,14 +86,30 @@ let geminiService = null;
|
|
|
83
86
|
let notionService = null;
|
|
84
87
|
let slackService = null;
|
|
85
88
|
let agentService = null;
|
|
89
|
+
function formatAiCredentialsError() {
|
|
90
|
+
return configService.getAiProvider() === 'codex'
|
|
91
|
+
? 'Codex OAuth is not configured. Sign in with Codex OAuth or switch back to Gemini.'
|
|
92
|
+
: 'Gemini API key not configured.';
|
|
93
|
+
}
|
|
86
94
|
function getAgentService() {
|
|
87
95
|
if (agentService)
|
|
88
96
|
return agentService;
|
|
89
|
-
|
|
90
|
-
if (!apiKey)
|
|
97
|
+
if (!configService.hasAiAuth())
|
|
91
98
|
return null;
|
|
92
99
|
agentService = new agentService_1.AgentService({
|
|
93
|
-
|
|
100
|
+
provider: configService.getAiProvider(),
|
|
101
|
+
apiKey: configService.getGeminiApiKey(),
|
|
102
|
+
codexOAuth: configService.getCodexOAuth(),
|
|
103
|
+
// Only persist refreshed tokens when the credentials originated in config.json.
|
|
104
|
+
// Env-only credentials must stay ephemeral -- writing refreshed tokens to disk
|
|
105
|
+
// would leak ephemeral env creds into the persistent store.
|
|
106
|
+
onCodexOAuthUpdate: configService.hasStoredCodexOAuth()
|
|
107
|
+
? (credentials) => {
|
|
108
|
+
configService.setCodexOAuth(credentials);
|
|
109
|
+
broadcastConfigChanged();
|
|
110
|
+
}
|
|
111
|
+
: undefined,
|
|
112
|
+
codexModel: configService.getCodexModel(),
|
|
94
113
|
dataPath: electron_1.app.getPath('userData'),
|
|
95
114
|
configService,
|
|
96
115
|
});
|
|
@@ -134,14 +153,25 @@ function trackFinalize(work) {
|
|
|
134
153
|
pendingFinalize = pendingFinalize.then(() => work).catch(() => { });
|
|
135
154
|
}
|
|
136
155
|
function createGeminiService() {
|
|
137
|
-
|
|
138
|
-
if (!apiKey)
|
|
156
|
+
if (!configService.hasAiAuth())
|
|
139
157
|
return null;
|
|
140
158
|
return new geminiService_1.GeminiService({
|
|
141
|
-
|
|
159
|
+
provider: configService.getAiProvider(),
|
|
160
|
+
apiKey: configService.getGeminiApiKey(),
|
|
161
|
+
codexOAuth: configService.getCodexOAuth(),
|
|
162
|
+
// See note in getAgentService(): persist refreshed tokens only for stored creds.
|
|
163
|
+
onCodexOAuthUpdate: configService.hasStoredCodexOAuth()
|
|
164
|
+
? (credentials) => {
|
|
165
|
+
configService.setCodexOAuth(credentials);
|
|
166
|
+
broadcastConfigChanged();
|
|
167
|
+
}
|
|
168
|
+
: undefined,
|
|
142
169
|
knownWords: configService.getKnownWords(),
|
|
143
170
|
proModel: configService.getGeminiModel(),
|
|
144
171
|
flashModel: configService.getGeminiFlashModel(),
|
|
172
|
+
codexModel: configService.getCodexModel(),
|
|
173
|
+
codexTranscriptionModel: configService.getCodexTranscriptionModel(),
|
|
174
|
+
dataPath: electron_1.app.getPath('userData'),
|
|
145
175
|
});
|
|
146
176
|
}
|
|
147
177
|
function registerGlobalShortcut() {
|
|
@@ -827,9 +857,8 @@ electron_1.ipcMain.handle('merge-recordings', async (_, opts) => {
|
|
|
827
857
|
resolvedInputs.push(resolved);
|
|
828
858
|
}
|
|
829
859
|
if (!geminiService) {
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
return { success: false, error: 'Gemini API key not configured' };
|
|
860
|
+
if (!configService.hasAiAuth()) {
|
|
861
|
+
return { success: false, error: formatAiCredentialsError() };
|
|
833
862
|
}
|
|
834
863
|
geminiService = createGeminiService();
|
|
835
864
|
}
|
|
@@ -1075,9 +1104,13 @@ electron_1.ipcMain.handle('abort-recording', async () => {
|
|
|
1075
1104
|
// IPC and the agent-chat flow when set_config mutations land.
|
|
1076
1105
|
function applyConfigSideEffects(changed) {
|
|
1077
1106
|
if (changed.knownWords !== undefined ||
|
|
1107
|
+
changed.aiProvider !== undefined ||
|
|
1078
1108
|
changed.geminiApiKey !== undefined ||
|
|
1079
1109
|
changed.geminiModel !== undefined ||
|
|
1080
|
-
changed.geminiFlashModel !== undefined
|
|
1110
|
+
changed.geminiFlashModel !== undefined ||
|
|
1111
|
+
changed.codexOAuth !== undefined ||
|
|
1112
|
+
changed.codexModel !== undefined ||
|
|
1113
|
+
changed.codexTranscriptionModel !== undefined) {
|
|
1081
1114
|
geminiService = createGeminiService();
|
|
1082
1115
|
agentService = null;
|
|
1083
1116
|
}
|
|
@@ -1169,6 +1202,38 @@ electron_1.ipcMain.handle('save-config', async (_, config) => {
|
|
|
1169
1202
|
electron_1.ipcMain.handle('get-config', async () => {
|
|
1170
1203
|
return configService.getAllConfig();
|
|
1171
1204
|
});
|
|
1205
|
+
electron_1.ipcMain.handle('codex-oauth-login', async () => {
|
|
1206
|
+
try {
|
|
1207
|
+
const credentials = await (0, codexOAuth_1.loginCodexOAuth)({
|
|
1208
|
+
openUrl: (url) => electron_1.shell.openExternal(url),
|
|
1209
|
+
onPrompt: async (_prompt) => {
|
|
1210
|
+
throw new Error('Codex OAuth manual callback is only supported from the CLI. Run `listener codex login` if browser sign-in does not complete.');
|
|
1211
|
+
},
|
|
1212
|
+
onProgress: (message) => console.log(`Codex OAuth: ${message}`),
|
|
1213
|
+
});
|
|
1214
|
+
configService.setCodexOAuth(credentials);
|
|
1215
|
+
configService.setAiProvider('codex');
|
|
1216
|
+
applyConfigSideEffects({ aiProvider: 'codex', codexOAuth: credentials });
|
|
1217
|
+
broadcastConfigChanged();
|
|
1218
|
+
return { success: true, config: configService.getAllConfig() };
|
|
1219
|
+
}
|
|
1220
|
+
catch (error) {
|
|
1221
|
+
console.error('Codex OAuth login failed:', error);
|
|
1222
|
+
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
|
1223
|
+
}
|
|
1224
|
+
});
|
|
1225
|
+
electron_1.ipcMain.handle('codex-oauth-clear', async () => {
|
|
1226
|
+
try {
|
|
1227
|
+
configService.clearCodexOAuth();
|
|
1228
|
+
applyConfigSideEffects({ aiProvider: configService.getAiProvider() });
|
|
1229
|
+
broadcastConfigChanged();
|
|
1230
|
+
return { success: true, config: configService.getAllConfig() };
|
|
1231
|
+
}
|
|
1232
|
+
catch (error) {
|
|
1233
|
+
console.error('Codex OAuth clear failed:', error);
|
|
1234
|
+
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
|
1235
|
+
}
|
|
1236
|
+
});
|
|
1172
1237
|
electron_1.ipcMain.handle('get-all-releases', async () => {
|
|
1173
1238
|
console.log('Release list IPC: get-all-releases invoked');
|
|
1174
1239
|
const results = await (0, releaseNotesService_1.fetchAllReleases)();
|
|
@@ -1191,6 +1256,9 @@ electron_1.ipcMain.handle('update:simulate', async (_, event, data) => {
|
|
|
1191
1256
|
electron_1.ipcMain.handle('check-config', async () => {
|
|
1192
1257
|
return {
|
|
1193
1258
|
hasConfig: configService.hasRequiredConfig(),
|
|
1259
|
+
hasAiAuth: configService.hasAiAuth(),
|
|
1260
|
+
aiProvider: configService.getAiProvider(),
|
|
1261
|
+
codexOAuthConfigured: configService.hasCodexOAuth(),
|
|
1194
1262
|
missing: configService.getMissingConfigs(),
|
|
1195
1263
|
};
|
|
1196
1264
|
});
|
|
@@ -1241,15 +1309,14 @@ electron_1.ipcMain.handle('transcribe-audio', async (_, filePath, liveNotesRaw)
|
|
|
1241
1309
|
if (mainWindow) {
|
|
1242
1310
|
mainWindow.webContents.send('transcription-progress', {
|
|
1243
1311
|
percent: 0,
|
|
1244
|
-
message: 'Initializing
|
|
1312
|
+
message: 'Initializing AI service...',
|
|
1245
1313
|
});
|
|
1246
1314
|
}
|
|
1247
|
-
// Initialize
|
|
1315
|
+
// Initialize AI service if not already initialized
|
|
1248
1316
|
if (!geminiService) {
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
return { success: false, error: 'Gemini API key not configured' };
|
|
1317
|
+
console.log('AI credentials configured:', configService.hasAiAuth());
|
|
1318
|
+
if (!configService.hasAiAuth()) {
|
|
1319
|
+
return { success: false, error: formatAiCredentialsError() };
|
|
1253
1320
|
}
|
|
1254
1321
|
geminiService = createGeminiService();
|
|
1255
1322
|
}
|
|
@@ -1667,7 +1734,7 @@ electron_1.ipcMain.handle('agent-chat', async (_event, opts) => {
|
|
|
1667
1734
|
try {
|
|
1668
1735
|
const agent = getAgentService();
|
|
1669
1736
|
if (!agent) {
|
|
1670
|
-
return { success: false, error:
|
|
1737
|
+
return { success: false, error: formatAiCredentialsError() };
|
|
1671
1738
|
}
|
|
1672
1739
|
const question = (opts?.question ?? '').trim();
|
|
1673
1740
|
if (!question)
|