listener-ai 2.6.0 → 2.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,8 +39,12 @@ const path = __importStar(require("path"));
39
39
  const child_process_1 = require("child_process");
40
40
  const util_1 = require("util");
41
41
  const genai_1 = require("@google/genai");
42
+ const aiProvider_1 = require("./aiProvider");
42
43
  const audioFormats_1 = require("./audioFormats");
44
+ const codexOAuthHolder_1 = require("./codexOAuthHolder");
45
+ const codexTranscription_1 = require("./codexTranscription");
43
46
  const outputService_1 = require("./outputService");
47
+ const piAiClient_1 = require("./piAiClient");
44
48
  const ffmpegManager_1 = require("./services/ffmpegManager");
45
49
  const execFileAsync = (0, util_1.promisify)(child_process_1.execFile);
46
50
  // Append a section to the summary prompt instructing Gemini to enrich each
@@ -145,12 +149,99 @@ class GeminiService {
145
149
  return process.platform === 'win32' ? 'ffmpeg.exe' : 'ffmpeg';
146
150
  }
147
151
  constructor(options) {
148
- this.apiKey = options.apiKey;
149
- this.ai = new genai_1.GoogleGenAI({ apiKey: options.apiKey });
152
+ this.provider = options.provider ?? 'gemini';
153
+ if (this.provider === 'gemini') {
154
+ if (!options.apiKey) {
155
+ throw new Error('Gemini API key is required for the Gemini provider.');
156
+ }
157
+ this.ai = new genai_1.GoogleGenAI({ apiKey: options.apiKey });
158
+ this.geminiApiKey = options.apiKey;
159
+ }
160
+ else {
161
+ this.codexAuth = new codexOAuthHolder_1.CodexOAuthHolder({
162
+ credentials: options.codexOAuth,
163
+ onUpdate: options.onCodexOAuthUpdate,
164
+ });
165
+ }
150
166
  this.ffmpegManager = new ffmpegManager_1.FFmpegManager(options.dataPath);
151
167
  this.knownWords = options.knownWords || [];
152
168
  this.proModel = options.proModel;
153
169
  this.flashModel = options.flashModel;
170
+ this.codexModel = options.codexModel || aiProvider_1.DEFAULT_CODEX_MODEL;
171
+ this.codexTranscriptionModel =
172
+ options.codexTranscriptionModel || aiProvider_1.DEFAULT_CODEX_TRANSCRIPTION_MODEL;
173
+ }
174
+ gemini() {
175
+ if (!this.ai) {
176
+ throw new Error('Gemini client is not configured for the selected AI provider.');
177
+ }
178
+ return this.ai;
179
+ }
180
+ async getCodexToken() {
181
+ if (!this.codexAuth) {
182
+ throw new Error('Codex OAuth holder is not configured.');
183
+ }
184
+ return await this.codexAuth.getToken();
185
+ }
186
+ requireGeminiApiKey() {
187
+ if (!this.geminiApiKey) {
188
+ throw new Error('Gemini API key is not configured.');
189
+ }
190
+ return this.geminiApiKey;
191
+ }
192
+ // Pi-ai's GoogleOptions doesn't expose Gemini's `responseMimeType=application/json`
193
+ // knob, so models may wrap the JSON in ```json``` fences. The summary-text
194
+ // consumer strips fences before parsing (see stripJsonFences in
195
+ // transcribeWithTwoSteps).
196
+ async generateSummary(promptText, transcript) {
197
+ const modelId = this.provider === 'codex' ? this.codexModel : this.proModel;
198
+ const apiKey = this.provider === 'codex' ? await this.getCodexToken() : this.requireGeminiApiKey();
199
+ const model = await (0, piAiClient_1.getModel)(this.provider, modelId);
200
+ const context = {
201
+ messages: [
202
+ {
203
+ role: 'user',
204
+ content: `${promptText}\n\nTranscript:\n${transcript}`,
205
+ timestamp: Date.now(),
206
+ },
207
+ ],
208
+ };
209
+ const response = await (0, piAiClient_1.complete)(model, context, {
210
+ apiKey,
211
+ temperature: 0.2,
212
+ maxTokens: 32768,
213
+ });
214
+ return (0, piAiClient_1.extractFinalText)(response);
215
+ }
216
+ async prepareAudioForProvider(audioFilePath) {
217
+ if (this.provider !== 'codex')
218
+ return { audioFilePath };
219
+ const ext = path.extname(audioFilePath).toLowerCase();
220
+ if (codexTranscription_1.OPENAI_TRANSCRIPTION_EXTENSIONS.has(ext))
221
+ return { audioFilePath };
222
+ const outputPath = path.join(path.dirname(audioFilePath), `${path.basename(audioFilePath, ext)}_codex_${Date.now()}.webm`);
223
+ const ffmpegPath = await this.getFFmpegPath();
224
+ await execFileAsync(ffmpegPath, [
225
+ '-i',
226
+ audioFilePath,
227
+ '-vn',
228
+ '-c:a',
229
+ 'libopus',
230
+ '-b:a',
231
+ '48k',
232
+ outputPath,
233
+ ]);
234
+ return {
235
+ audioFilePath: outputPath,
236
+ cleanup: () => {
237
+ try {
238
+ fs.unlinkSync(outputPath);
239
+ }
240
+ catch {
241
+ /* ignore */
242
+ }
243
+ },
244
+ };
154
245
  }
155
246
  buildGlossaryBlock() {
156
247
  if (this.knownWords.length === 0)
@@ -178,16 +269,17 @@ class GeminiService {
178
269
  suggestedTitle: 'Stubbed Title',
179
270
  };
180
271
  }
272
+ const prepared = await this.prepareAudioForProvider(audioFilePath);
181
273
  try {
182
274
  // Check file size
183
- const stats = fs.statSync(audioFilePath);
275
+ const stats = fs.statSync(prepared.audioFilePath);
184
276
  const fileSizeInMB = stats.size / (1024 * 1024);
185
277
  console.error(`Audio file size: ${fileSizeInMB.toFixed(2)} MB`);
186
278
  if (progressCallback) {
187
279
  progressCallback(15, `Processing ${fileSizeInMB.toFixed(1)} MB audio file...`);
188
280
  }
189
281
  // Get audio duration using ffmpeg
190
- const duration = await this.getAudioDuration(audioFilePath);
282
+ const duration = await this.getAudioDuration(prepared.audioFilePath);
191
283
  console.error(`Audio duration: ${duration} seconds`);
192
284
  // If duration is 0, log a warning but continue processing
193
285
  if (duration === 0) {
@@ -195,14 +287,16 @@ class GeminiService {
195
287
  }
196
288
  // Always use the two-step approach for consistency
197
289
  console.error('Using two-step transcription approach...');
198
- return await this.transcribeWithTwoSteps(audioFilePath, duration, progressCallback, summaryPrompt, liveNotes, options);
290
+ return await this.transcribeWithTwoSteps(prepared.audioFilePath, duration, progressCallback, summaryPrompt, liveNotes, options);
199
291
  }
200
292
  catch (error) {
201
293
  console.error('Error transcribing audio:', error);
202
294
  // Provide more specific error messages
203
295
  if (error instanceof Error) {
204
296
  if (error.message.includes('API key')) {
205
- throw new Error('Invalid API key. Please check your Gemini API key configuration.');
297
+ throw new Error(this.provider === 'codex'
298
+ ? 'Invalid Codex OAuth token. Please sign in again.'
299
+ : 'Invalid API key. Please check your Gemini API key configuration.');
206
300
  }
207
301
  else if (error.message.includes('quota')) {
208
302
  throw new Error('API quota exceeded. Please try again later.');
@@ -213,6 +307,9 @@ class GeminiService {
213
307
  }
214
308
  throw new Error(`Failed to transcribe audio: ${error instanceof Error ? error.message : String(error)}`);
215
309
  }
310
+ finally {
311
+ prepared.cleanup?.();
312
+ }
216
313
  }
217
314
  // Get audio duration using ffmpeg
218
315
  async getAudioDuration(audioFilePath) {
@@ -264,15 +361,35 @@ class GeminiService {
264
361
  }
265
362
  }
266
363
  // Split audio file into segments
267
- async splitAudioIntoSegments(audioFilePath, segmentDuration = 300) {
364
+ async splitAudioIntoSegments(audioFilePath, segmentDuration = 300,
365
+ // re-encode segments instead of `-c copy`. ffmpeg's segment muxer can
366
+ // only cut at keyframes when copying, and webm-opus has near-zero
367
+ // keyframes by default -- so `-c copy -segment_time 300` silently
368
+ // produces 30+ minute segments that blow past gpt-4o-transcribe's
369
+ // 1400-second per-request limit. Caller passes `reencode: true` for
370
+ // the Codex transcription path; Gemini's API is tolerant of long
371
+ // inputs and stays on the faster `-c copy` path.
372
+ reencode = false) {
268
373
  const outputDir = path.dirname(audioFilePath);
269
374
  const baseName = path.basename(audioFilePath, path.extname(audioFilePath));
270
375
  const ext = path.extname(audioFilePath);
271
- const segmentPath = path.join(outputDir, `${baseName}_segment_%03d${ext}`);
376
+ // When re-encoding to opus we MUST force a container that supports
377
+ // opus -- ffmpeg picks the muxer from the output extension, so leaving
378
+ // an imported `.mp3`/`.m4a`/`.wav` source as `.mp3` makes ffmpeg pick
379
+ // the MP3 muxer and reject the opus stream. `.webm` is in OpenAI's
380
+ // supported transcription extensions, so the segments still upload.
381
+ const segmentExt = reencode ? '.webm' : ext;
382
+ const segmentPath = path.join(outputDir, `${baseName}_segment_%03d${segmentExt}`);
272
383
  // Get the bundled FFmpeg path
273
384
  const ffmpegPath = await this.getFFmpegPath();
274
385
  try {
275
- // Split audio into segments
386
+ const codecArgs = reencode ? ['-c:a', 'libopus', '-b:a', '48k'] : ['-c', 'copy'];
387
+ // Split audio into segments. `-reset_timestamps 1` makes each segment
388
+ // start at PTS 0 and gives it its own container duration. Without it,
389
+ // webm output keeps the source file's total duration in the header --
390
+ // and OpenAI rejects the request based on the header value even when
391
+ // the actual encoded audio is short (`audio duration N seconds is
392
+ // longer than 1400` errors on small last-segment files).
276
393
  await execFileAsync(ffmpegPath, [
277
394
  '-i',
278
395
  audioFilePath,
@@ -280,14 +397,17 @@ class GeminiService {
280
397
  'segment',
281
398
  '-segment_time',
282
399
  String(segmentDuration),
283
- '-c',
284
- 'copy',
400
+ '-reset_timestamps',
401
+ '1',
402
+ ...codecArgs,
285
403
  segmentPath,
286
404
  ]);
287
- // Find all created segment files
405
+ // Find all created segment files. Match on the EXTENSION WE TOLD
406
+ // FFMPEG TO WRITE -- when re-encoding, that's `.webm` regardless of
407
+ // the source's original extension.
288
408
  const segmentFiles = fs
289
409
  .readdirSync(outputDir)
290
- .filter((file) => file.startsWith(`${baseName}_segment_`) && file.endsWith(ext))
410
+ .filter((file) => file.startsWith(`${baseName}_segment_`) && file.endsWith(segmentExt))
291
411
  .map((file) => path.join(outputDir, file))
292
412
  .sort();
293
413
  console.error(`Split audio into ${segmentFiles.length} segments`);
@@ -339,11 +459,24 @@ class GeminiService {
339
459
  async transcribeWithTwoSteps(audioFilePath, duration, progressCallback, customSummaryPrompt, liveNotes, options = {}) {
340
460
  try {
341
461
  let fullTranscript = '';
462
+ const stats = fs.statSync(audioFilePath);
463
+ const fileSizeInMB = stats.size / (1024 * 1024);
464
+ // Segment intentionally for parallelism: even when the API would
465
+ // accept the whole file (Gemini long-context, gpt-4o-transcribe-diarize
466
+ // via chunking_strategy=auto), N parallel 5-min requests finish much
467
+ // faster than one big sequential pass. Trade-off for the diarize
468
+ // model: speaker IDs are mapped fresh per segment ("Speaker 0" in
469
+ // segment 1 may not be the same physical person as "Speaker 0" in
470
+ // segment 2). See docs/model-pricing.md.
471
+ const shouldSegment = duration > 300 || (this.provider === 'codex' && fileSizeInMB > 24);
472
+ const segmentDuration = this.provider === 'codex' && duration > 0 && fileSizeInMB > 20
473
+ ? Math.max(30, Math.min(300, Math.floor((20 / fileSizeInMB) * duration)))
474
+ : 300;
342
475
  // Step 1: Get transcript
343
- if (duration > 300) {
476
+ if (shouldSegment) {
344
477
  // Use segmented approach for long audio
345
- console.error('Audio is longer than 5 minutes, using segmented transcription...');
346
- fullTranscript = await this.getSegmentedTranscript(audioFilePath, duration, progressCallback, options.transcriptionPrompt);
478
+ console.error('Using segmented transcription...');
479
+ fullTranscript = await this.getSegmentedTranscript(audioFilePath, duration, progressCallback, options.transcriptionPrompt, segmentDuration);
347
480
  }
348
481
  else {
349
482
  // Get transcript for short audio
@@ -380,16 +513,7 @@ Return as JSON:
380
513
  const enrichableNotes = (liveNotes ?? []).filter((n) => (n.text ?? '').trim().length > 0);
381
514
  const highlightsBlock = buildHighlightsPromptBlock(enrichableNotes);
382
515
  const summaryPrompt = highlightsBlock ? `${basePrompt}\n\n${highlightsBlock}` : basePrompt;
383
- const summaryResult = await this.ai.models.generateContent({
384
- model: this.proModel,
385
- contents: [{ role: 'user', parts: [{ text: summaryPrompt }, { text: fullTranscript }] }],
386
- config: {
387
- temperature: 0.2,
388
- maxOutputTokens: 32768,
389
- responseMimeType: 'application/json',
390
- },
391
- });
392
- const summaryText = summaryResult.text || '';
516
+ const summaryText = await this.generateSummary(summaryPrompt, fullTranscript);
393
517
  let summaryData = {
394
518
  suggestedTitle: '',
395
519
  summary: '',
@@ -407,8 +531,16 @@ Return as JSON:
407
531
  ]);
408
532
  const customFields = {};
409
533
  let rawHighlights;
534
+ // Pi-ai's unified API doesn't pass through Gemini's responseMimeType
535
+ // knob, so models can wrap the JSON in ```json``` fences or add leading
536
+ // chatter. Strip a single fenced block if present, otherwise feed the
537
+ // raw text to JSON.parse and fall back to a regex extract.
538
+ const stripJsonFences = (text) => {
539
+ const fenced = text.match(/```(?:json)?\s*([\s\S]*?)```/);
540
+ return fenced ? fenced[1].trim() : text.trim();
541
+ };
410
542
  try {
411
- const parsed = JSON.parse(summaryText);
543
+ const parsed = JSON.parse(stripJsonFences(summaryText));
412
544
  summaryData = parsed;
413
545
  rawHighlights = parsed.highlights;
414
546
  // Extract custom fields (any keys not in the known set)
@@ -454,6 +586,23 @@ Return as JSON:
454
586
  if (progressCallback) {
455
587
  progressCallback(20, 'Processing audio file...');
456
588
  }
589
+ const transcriptPrompt = `${this.buildGlossaryBlock()}${customPrompt ?? DEFAULT_TRANSCRIPT_PROMPT}`;
590
+ if (this.provider === 'codex') {
591
+ return await (0, codexTranscription_1.transcribeCodexAudio)({
592
+ getToken: () => this.getCodexToken(),
593
+ audioFilePath,
594
+ model: this.codexTranscriptionModel,
595
+ // `prompt` is dropped inside transcribeCodexAudio when the
596
+ // diarize model is active. Keep passing it -- the helper picks
597
+ // the right shape per model.
598
+ prompt: transcriptPrompt,
599
+ // Intentionally NOT passing `language: 'ko'`. Whisper-derived
600
+ // transcription auto-detects from the first ~30s, which handles
601
+ // bilingual/code-switched meetings (Korean primary, English
602
+ // acronyms/quotes) better than forcing a single language.
603
+ });
604
+ }
605
+ const ai = this.gemini();
457
606
  // Use Files API for files over 20MB
458
607
  let fileUri = null;
459
608
  if (fileSizeInMB > 20) {
@@ -463,17 +612,17 @@ Return as JSON:
463
612
  }
464
613
  const mimeType = (0, audioFormats_1.mimeTypeForExtension)(path.extname(audioFilePath));
465
614
  const fileData = fs.readFileSync(audioFilePath);
466
- const uploadResult = await this.ai.files.upload({
615
+ const uploadResult = await ai.files.upload({
467
616
  file: new Blob([fileData], { type: mimeType }),
468
617
  });
469
618
  fileUri = uploadResult.uri || '';
470
619
  // Wait for file to be active
471
- let file = await this.ai.files.get({ name: uploadResult.name || '' });
620
+ let file = await ai.files.get({ name: uploadResult.name || '' });
472
621
  let retries = 0;
473
622
  while (file.state === 'PROCESSING' && retries < 30) {
474
623
  console.error(`Waiting for file to be processed... (attempt ${retries + 1}/30)`);
475
624
  await new Promise((resolve) => setTimeout(resolve, 2000));
476
- file = await this.ai.files.get({ name: uploadResult.name || '' });
625
+ file = await ai.files.get({ name: uploadResult.name || '' });
477
626
  retries++;
478
627
  }
479
628
  if (file.state !== 'ACTIVE') {
@@ -483,11 +632,10 @@ Return as JSON:
483
632
  if (progressCallback) {
484
633
  progressCallback(50, 'Transcribing audio...');
485
634
  }
486
- const transcriptPrompt = `${this.buildGlossaryBlock()}${customPrompt ?? DEFAULT_TRANSCRIPT_PROMPT}`;
487
635
  let result;
488
636
  if (fileUri) {
489
637
  const mimeType = (0, audioFormats_1.mimeTypeForExtension)(path.extname(audioFilePath));
490
- result = await this.ai.models.generateContent({
638
+ result = await ai.models.generateContent({
491
639
  model: this.flashModel,
492
640
  contents: [
493
641
  {
@@ -513,7 +661,7 @@ Return as JSON:
513
661
  const audioData = fs.readFileSync(audioFilePath);
514
662
  const base64Audio = audioData.toString('base64');
515
663
  const mimeType = (0, audioFormats_1.mimeTypeForExtension)(path.extname(audioFilePath));
516
- result = await this.ai.models.generateContent({
664
+ result = await ai.models.generateContent({
517
665
  model: this.flashModel,
518
666
  contents: [
519
667
  {
@@ -567,10 +715,23 @@ Return as JSON:
567
715
  for (let attempt = 1; attempt <= maxRetries; attempt++) {
568
716
  try {
569
717
  console.error(`Starting transcription for segment ${segmentIndex + 1}/${totalSegments} (attempt ${attempt}/${maxRetries})...`);
718
+ if (this.provider === 'codex') {
719
+ const transcript = await (0, codexTranscription_1.transcribeCodexAudio)({
720
+ getToken: () => this.getCodexToken(),
721
+ audioFilePath: segmentFile,
722
+ model: this.codexTranscriptionModel,
723
+ prompt: segmentPrompt,
724
+ });
725
+ console.error(`Completed transcription for segment ${segmentIndex + 1}/${totalSegments}`);
726
+ return {
727
+ index: segmentIndex,
728
+ content: this.createSegmentHeader(segmentIndex, segmentStartTime, segmentEndTime) + transcript,
729
+ };
730
+ }
570
731
  const audioData = fs.readFileSync(segmentFile);
571
732
  const base64Audio = audioData.toString('base64');
572
733
  const mimeType = (0, audioFormats_1.mimeTypeForExtension)(path.extname(segmentFile));
573
- const result = await this.ai.models.generateContent({
734
+ const result = await this.gemini().models.generateContent({
574
735
  model: this.flashModel,
575
736
  contents: [
576
737
  {
@@ -619,17 +780,20 @@ Return as JSON:
619
780
  };
620
781
  }
621
782
  // Get segmented transcript (renamed from transcribeAudioSegmented)
622
- async getSegmentedTranscript(audioFilePath, duration, progressCallback, customPrompt) {
783
+ async getSegmentedTranscript(audioFilePath, duration, progressCallback, customPrompt, segmentDuration = 300) {
623
784
  try {
624
- // Split audio into 5-minute segments
625
- const segmentFiles = await this.splitAudioIntoSegments(audioFilePath, 300);
785
+ // Split audio into 5-minute segments. Codex transcription requires
786
+ // accurate cut times (gpt-4o-transcribe rejects >1400s/segment), so
787
+ // force re-encode there; Gemini's API tolerates long inputs and we
788
+ // keep the cheaper `-c copy` path for it.
789
+ const segmentFiles = await this.splitAudioIntoSegments(audioFilePath, segmentDuration, this.provider === 'codex');
626
790
  if (progressCallback) {
627
791
  progressCallback(20, `Processing ${segmentFiles.length} segments...`);
628
792
  }
629
793
  // Create promises for all segment transcriptions
630
794
  const transcriptionPromises = segmentFiles.map(async (segmentFile, i) => {
631
- const segmentStartTime = i * 300; // 5 minutes in seconds
632
- const segmentEndTime = Math.min(segmentStartTime + 300, duration);
795
+ const segmentStartTime = i * segmentDuration;
796
+ const segmentEndTime = Math.min(segmentStartTime + segmentDuration, duration);
633
797
  return this.transcribeSingleSegment(segmentFile, i, segmentFiles.length, segmentStartTime, segmentEndTime, customPrompt);
634
798
  });
635
799
  // Track progress of concurrent transcriptions
package/dist/main.js CHANGED
@@ -42,6 +42,8 @@ const electron_1 = require("electron");
42
42
  const agentService_1 = require("./agentService");
43
43
  const audioFormats_1 = require("./audioFormats");
44
44
  const configService_1 = require("./configService");
45
+ const codexOAuth_1 = require("./codexOAuth");
46
+ const dataPath_1 = require("./dataPath");
45
47
  const displayDetectorService_1 = require("./displayDetectorService");
46
48
  const geminiService_1 = require("./geminiService");
47
49
  const meetingDetectorService_1 = require("./meetingDetectorService");
@@ -69,6 +71,7 @@ if (process.platform === 'darwin') {
69
71
  electron_1.app.commandLine.appendSwitch('enable-features', 'MacSckSystemAudioLoopbackCapture,MacCatapSystemAudioLoopbackCapture');
70
72
  }
71
73
  global.isQuitting = false;
74
+ electron_1.app.setPath('userData', (0, dataPath_1.getDataPath)());
72
75
  let mainWindow = null;
73
76
  const audioRecorder = new simpleAudioRecorder_1.SimpleAudioRecorder();
74
77
  const systemAudioService = new systemAudioService_1.SystemAudioService();
@@ -83,14 +86,30 @@ let geminiService = null;
83
86
  let notionService = null;
84
87
  let slackService = null;
85
88
  let agentService = null;
89
+ function formatAiCredentialsError() {
90
+ return configService.getAiProvider() === 'codex'
91
+ ? 'Codex OAuth is not configured. Sign in with Codex OAuth or switch back to Gemini.'
92
+ : 'Gemini API key not configured.';
93
+ }
86
94
  function getAgentService() {
87
95
  if (agentService)
88
96
  return agentService;
89
- const apiKey = configService.getGeminiApiKey();
90
- if (!apiKey)
97
+ if (!configService.hasAiAuth())
91
98
  return null;
92
99
  agentService = new agentService_1.AgentService({
93
- apiKey,
100
+ provider: configService.getAiProvider(),
101
+ apiKey: configService.getGeminiApiKey(),
102
+ codexOAuth: configService.getCodexOAuth(),
103
+ // Only persist refreshed tokens when the credentials originated in config.json.
104
+ // Env-only credentials must stay ephemeral -- writing refreshed tokens to disk
105
+ // would leak ephemeral env creds into the persistent store.
106
+ onCodexOAuthUpdate: configService.hasStoredCodexOAuth()
107
+ ? (credentials) => {
108
+ configService.setCodexOAuth(credentials);
109
+ broadcastConfigChanged();
110
+ }
111
+ : undefined,
112
+ codexModel: configService.getCodexModel(),
94
113
  dataPath: electron_1.app.getPath('userData'),
95
114
  configService,
96
115
  });
@@ -134,14 +153,25 @@ function trackFinalize(work) {
134
153
  pendingFinalize = pendingFinalize.then(() => work).catch(() => { });
135
154
  }
136
155
  function createGeminiService() {
137
- const apiKey = configService.getGeminiApiKey();
138
- if (!apiKey)
156
+ if (!configService.hasAiAuth())
139
157
  return null;
140
158
  return new geminiService_1.GeminiService({
141
- apiKey,
159
+ provider: configService.getAiProvider(),
160
+ apiKey: configService.getGeminiApiKey(),
161
+ codexOAuth: configService.getCodexOAuth(),
162
+ // See note in getAgentService(): persist refreshed tokens only for stored creds.
163
+ onCodexOAuthUpdate: configService.hasStoredCodexOAuth()
164
+ ? (credentials) => {
165
+ configService.setCodexOAuth(credentials);
166
+ broadcastConfigChanged();
167
+ }
168
+ : undefined,
142
169
  knownWords: configService.getKnownWords(),
143
170
  proModel: configService.getGeminiModel(),
144
171
  flashModel: configService.getGeminiFlashModel(),
172
+ codexModel: configService.getCodexModel(),
173
+ codexTranscriptionModel: configService.getCodexTranscriptionModel(),
174
+ dataPath: electron_1.app.getPath('userData'),
145
175
  });
146
176
  }
147
177
  function registerGlobalShortcut() {
@@ -827,9 +857,8 @@ electron_1.ipcMain.handle('merge-recordings', async (_, opts) => {
827
857
  resolvedInputs.push(resolved);
828
858
  }
829
859
  if (!geminiService) {
830
- const apiKey = configService.getGeminiApiKey();
831
- if (!apiKey) {
832
- return { success: false, error: 'Gemini API key not configured' };
860
+ if (!configService.hasAiAuth()) {
861
+ return { success: false, error: formatAiCredentialsError() };
833
862
  }
834
863
  geminiService = createGeminiService();
835
864
  }
@@ -1075,9 +1104,13 @@ electron_1.ipcMain.handle('abort-recording', async () => {
1075
1104
  // IPC and the agent-chat flow when set_config mutations land.
1076
1105
  function applyConfigSideEffects(changed) {
1077
1106
  if (changed.knownWords !== undefined ||
1107
+ changed.aiProvider !== undefined ||
1078
1108
  changed.geminiApiKey !== undefined ||
1079
1109
  changed.geminiModel !== undefined ||
1080
- changed.geminiFlashModel !== undefined) {
1110
+ changed.geminiFlashModel !== undefined ||
1111
+ changed.codexOAuth !== undefined ||
1112
+ changed.codexModel !== undefined ||
1113
+ changed.codexTranscriptionModel !== undefined) {
1081
1114
  geminiService = createGeminiService();
1082
1115
  agentService = null;
1083
1116
  }
@@ -1169,6 +1202,38 @@ electron_1.ipcMain.handle('save-config', async (_, config) => {
1169
1202
  electron_1.ipcMain.handle('get-config', async () => {
1170
1203
  return configService.getAllConfig();
1171
1204
  });
1205
+ electron_1.ipcMain.handle('codex-oauth-login', async () => {
1206
+ try {
1207
+ const credentials = await (0, codexOAuth_1.loginCodexOAuth)({
1208
+ openUrl: (url) => electron_1.shell.openExternal(url),
1209
+ onPrompt: async (_prompt) => {
1210
+ throw new Error('Codex OAuth manual callback is only supported from the CLI. Run `listener codex login` if browser sign-in does not complete.');
1211
+ },
1212
+ onProgress: (message) => console.log(`Codex OAuth: ${message}`),
1213
+ });
1214
+ configService.setCodexOAuth(credentials);
1215
+ configService.setAiProvider('codex');
1216
+ applyConfigSideEffects({ aiProvider: 'codex', codexOAuth: credentials });
1217
+ broadcastConfigChanged();
1218
+ return { success: true, config: configService.getAllConfig() };
1219
+ }
1220
+ catch (error) {
1221
+ console.error('Codex OAuth login failed:', error);
1222
+ return { success: false, error: error instanceof Error ? error.message : String(error) };
1223
+ }
1224
+ });
1225
+ electron_1.ipcMain.handle('codex-oauth-clear', async () => {
1226
+ try {
1227
+ configService.clearCodexOAuth();
1228
+ applyConfigSideEffects({ aiProvider: configService.getAiProvider() });
1229
+ broadcastConfigChanged();
1230
+ return { success: true, config: configService.getAllConfig() };
1231
+ }
1232
+ catch (error) {
1233
+ console.error('Codex OAuth clear failed:', error);
1234
+ return { success: false, error: error instanceof Error ? error.message : String(error) };
1235
+ }
1236
+ });
1172
1237
  electron_1.ipcMain.handle('get-all-releases', async () => {
1173
1238
  console.log('Release list IPC: get-all-releases invoked');
1174
1239
  const results = await (0, releaseNotesService_1.fetchAllReleases)();
@@ -1191,6 +1256,9 @@ electron_1.ipcMain.handle('update:simulate', async (_, event, data) => {
1191
1256
  electron_1.ipcMain.handle('check-config', async () => {
1192
1257
  return {
1193
1258
  hasConfig: configService.hasRequiredConfig(),
1259
+ hasAiAuth: configService.hasAiAuth(),
1260
+ aiProvider: configService.getAiProvider(),
1261
+ codexOAuthConfigured: configService.hasCodexOAuth(),
1194
1262
  missing: configService.getMissingConfigs(),
1195
1263
  };
1196
1264
  });
@@ -1241,15 +1309,14 @@ electron_1.ipcMain.handle('transcribe-audio', async (_, filePath, liveNotesRaw)
1241
1309
  if (mainWindow) {
1242
1310
  mainWindow.webContents.send('transcription-progress', {
1243
1311
  percent: 0,
1244
- message: 'Initializing Gemini service...',
1312
+ message: 'Initializing AI service...',
1245
1313
  });
1246
1314
  }
1247
- // Initialize Gemini service if not already initialized
1315
+ // Initialize AI service if not already initialized
1248
1316
  if (!geminiService) {
1249
- const apiKey = configService.getGeminiApiKey();
1250
- console.log('API key configured:', !!apiKey);
1251
- if (!apiKey) {
1252
- return { success: false, error: 'Gemini API key not configured' };
1317
+ console.log('AI credentials configured:', configService.hasAiAuth());
1318
+ if (!configService.hasAiAuth()) {
1319
+ return { success: false, error: formatAiCredentialsError() };
1253
1320
  }
1254
1321
  geminiService = createGeminiService();
1255
1322
  }
@@ -1667,7 +1734,7 @@ electron_1.ipcMain.handle('agent-chat', async (_event, opts) => {
1667
1734
  try {
1668
1735
  const agent = getAgentService();
1669
1736
  if (!agent) {
1670
- return { success: false, error: 'Gemini API key not configured.' };
1737
+ return { success: false, error: formatAiCredentialsError() };
1671
1738
  }
1672
1739
  const question = (opts?.question ?? '').trim();
1673
1740
  if (!question)