npm - listener-ai - Versions diffs - 2.6.0 → 2.7.1 - Mend

listener-ai 2.6.0 → 2.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +87 -22
package/THIRD_PARTY_NOTICES.md +27 -0
package/dist/agentService.js +142 -119
package/dist/aiProvider.js +35 -0
package/dist/cli.js +119 -38
package/dist/codexOAuth.js +68 -0
package/dist/codexOAuthHolder.js +26 -0
package/dist/codexTranscription.js +168 -0
package/dist/configService.js +171 -25
package/dist/dataPath.js +30 -10
package/dist/esmImport.js +15 -0
package/dist/geminiService.js +203 -39
package/dist/main.js +84 -17
package/dist/piAiClient.js +102 -0
package/package.json +13 -4

package/dist/cli.js CHANGED Viewed

@@ -36,10 +36,13 @@ var __importStar = (this && this.__importStar) || (function () {
 Object.defineProperty(exports, "__esModule", { value: true });
 const crypto_1 = require("crypto");
 const fs = __importStar(require("fs"));
+const readline = __importStar(require("readline"));
 const path = __importStar(require("path"));
 const agentService_1 = require("./agentService");
+const aiProvider_1 = require("./aiProvider");
 const audioFormats_1 = require("./audioFormats");
 const configService_1 = require("./configService");
+const codexOAuth_1 = require("./codexOAuth");
 const dataPath_1 = require("./dataPath");
 const geminiService_1 = require("./geminiService");
 const outputService_1 = require("./outputService");
@@ -81,6 +84,7 @@ const USAGE_TEXT = 'Usage: listener <file> [--output <dir>]    Transcribe an aud
     '                                           re-transcribe end-to-end, and save as a new note\n' +
     '       listener ask <question> [--ref <ref>]\n' +
     '                                           Ask the AI agent about saved meetings or settings\n' +
+    '       listener codex login|logout|status  Manage OpenAI Codex OAuth sign-in\n' +
     '       listener config list|get|set|unset|path\n' +
     '                                           Manage configuration\n' +
     '\n' +
@@ -106,9 +110,12 @@ function showHelp() {
     process.exit(0);
 }
 const KNOWN_CONFIG_KEYS = [
+    'aiProvider',
     'geminiApiKey',
     'geminiModel',
     'geminiFlashModel',
+    'codexModel',
+    'codexTranscriptionModel',
     'notionApiKey',
     'notionDatabaseId',
     'autoMode',
@@ -126,7 +133,7 @@ const KNOWN_CONFIG_KEYS = [
 ];
 function isSensitiveKey(key) {
     const lk = key.toLowerCase();
-    return lk.includes('key') || lk.includes('webhook');
+    return lk.includes('key') || lk.includes('webhook') || lk.includes('oauth');
 }
 function maskValue(key, value) {
     if (value == null || value === '')
@@ -168,6 +175,14 @@ function parseKnownWords(v) {
 }
 function applyConfigSet(config, key, value) {
     switch (key) {
+        case 'aiProvider': {
+            if (!(0, aiProvider_1.isAiProvider)(value)) {
+                process.stderr.write('Error: aiProvider must be "gemini" or "codex"\n');
+                process.exit(1);
+            }
+            config.setAiProvider(value);
+            return;
+        }
         case 'geminiApiKey':
             config.setGeminiApiKey(value);
             return;
@@ -177,6 +192,12 @@ function applyConfigSet(config, key, value) {
         case 'geminiFlashModel':
             config.setGeminiFlashModel(value);
             return;
+        case 'codexModel':
+            config.setCodexModel(value);
+            return;
+        case 'codexTranscriptionModel':
+            config.setCodexTranscriptionModel(value);
+            return;
         case 'notionApiKey':
             config.setNotionApiKey(value);
             return;
@@ -221,6 +242,87 @@ function applyConfigSet(config, key, value) {
             return;
     }
 }
+function formatAiCredentialsError(config) {
+    if (config.getAiProvider() === 'codex') {
+        return ('Error: Codex OAuth is not configured.\n' +
+            'Run `listener codex login` or set aiProvider back to gemini with a Gemini API key.\n');
+    }
+    return ('Error: Gemini API key not found.\n' +
+        'Set GEMINI_API_KEY env var, run `listener config set geminiApiKey <key>`, or run `listener codex login`.\n');
+}
+function createTranscriptionService(config, dataPath) {
+    return new geminiService_1.GeminiService({
+        provider: config.getAiProvider(),
+        apiKey: config.getGeminiApiKey(),
+        codexOAuth: config.getCodexOAuth(),
+        // Persist refreshed tokens only when credentials are stored in config.json.
+        // Env-only credentials must stay ephemeral; persisting them silently writes
+        // env-provided OAuth tokens to disk on every refresh.
+        onCodexOAuthUpdate: config.hasStoredCodexOAuth()
+            ? (credentials) => config.setCodexOAuth(credentials)
+            : undefined,
+        dataPath,
+        knownWords: config.getKnownWords(),
+        proModel: config.getGeminiModel(),
+        flashModel: config.getGeminiFlashModel(),
+        codexModel: config.getCodexModel(),
+        codexTranscriptionModel: config.getCodexTranscriptionModel(),
+    });
+}
+function createAgentService(config, dataPath) {
+    return new agentService_1.AgentService({
+        provider: config.getAiProvider(),
+        apiKey: config.getGeminiApiKey(),
+        codexOAuth: config.getCodexOAuth(),
+        // See note in createTranscriptionService(): persist only for stored creds.
+        onCodexOAuthUpdate: config.hasStoredCodexOAuth()
+            ? (credentials) => config.setCodexOAuth(credentials)
+            : undefined,
+        dataPath,
+        configService: config,
+        codexModel: config.getCodexModel(),
+    });
+}
+function promptLine(message) {
+    const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
+    return new Promise((resolve) => {
+        rl.question(`${message} `, (answer) => {
+            rl.close();
+            resolve(answer);
+        });
+    });
+}
+async function handleCodex(args) {
+    const sub = args[0];
+    const dataPath = (0, dataPath_1.getDataPath)();
+    const config = new configService_1.ConfigService(dataPath);
+    if (sub === 'status') {
+        process.stdout.write(`aiProvider=${config.getAiProvider()}\n`);
+        process.stdout.write(`codexOAuthConfigured=${config.hasCodexOAuth()}\n`);
+        process.stdout.write(`codexModel=${config.getCodexModel()}\n`);
+        process.stdout.write(`codexTranscriptionModel=${config.getCodexTranscriptionModel()}\n`);
+        return;
+    }
+    if (sub === 'logout') {
+        config.clearCodexOAuth();
+        process.stderr.write('Signed out of Codex OAuth.\n');
+        return;
+    }
+    if (sub !== 'login') {
+        process.stderr.write('Error: Unknown codex command. Usage: listener codex login|logout|status\n');
+        process.exit(1);
+    }
+    const credentials = await (0, codexOAuth_1.loginCodexOAuth)({
+        openUrl: (url) => {
+            process.stderr.write(`Open this URL in your browser:\n${url}\n`);
+        },
+        onPrompt: async (prompt) => await promptLine(prompt.message),
+        onProgress: (message) => process.stderr.write(`${message}\n`),
+    });
+    config.setCodexOAuth(credentials);
+    config.setAiProvider('codex');
+    process.stderr.write('Signed in with Codex OAuth and set aiProvider=codex.\n');
+}
 function handleConfig(subArgs) {
     const dataPath = (0, dataPath_1.getDataPath)();
     const config = new configService_1.ConfigService(dataPath);
@@ -558,9 +660,8 @@ async function handleMerge(args) {
     }
     const dataPath = (0, dataPath_1.getDataPath)();
     const config = new configService_1.ConfigService(dataPath);
-    const apiKey = config.getGeminiApiKey();
-    if (!apiKey) {
-        process.stderr.write('Error: Gemini API key not found. Set GEMINI_API_KEY env var or configure via the Listener.AI app.\n');
+    if (!config.hasAiAuth()) {
+        process.stderr.write(formatAiCredentialsError(config));
         process.exit(1);
     }
     // Resolve every ref to a folder + audio path before doing any expensive work
@@ -598,13 +699,7 @@ async function handleMerge(args) {
         outputPath: mergedAudioPath,
     });
     process.stderr.write(`  -> ${mergedAudioPath}\n`);
-    const gemini = new geminiService_1.GeminiService({
-        apiKey,
-        dataPath,
-        knownWords: config.getKnownWords(),
-        proModel: config.getGeminiModel(),
-        flashModel: config.getGeminiFlashModel(),
-    });
+    const gemini = createTranscriptionService(config, dataPath);
     process.stderr.write('Transcribing merged recording...\n');
     const result = await gemini.transcribeAudio(mergedAudioPath, (_percent, message) => {
         process.stderr.write(`  ${message}\n`);
@@ -647,9 +742,8 @@ async function handleAsk(args) {
     }
     const dataPath = (0, dataPath_1.getDataPath)();
     const config = new configService_1.ConfigService(dataPath);
-    const apiKey = config.getGeminiApiKey();
-    if (!apiKey) {
-        process.stderr.write('Error: Gemini API key not found. Set GEMINI_API_KEY env var or configure via the Listener.AI app.\n');
+    if (!config.hasAiAuth()) {
+        process.stderr.write(formatAiCredentialsError(config));
         process.exit(1);
     }
     let scope = { kind: 'all' };
@@ -657,7 +751,7 @@ async function handleAsk(args) {
         const folderPath = await resolveRef(ref, dataPath);
         scope = { kind: 'single', folderName: path.basename(folderPath) };
     }
-    const agent = new agentService_1.AgentService({ apiKey, dataPath, configService: config });
+    const agent = createAgentService(config, dataPath);
     const confirm = async (proposal) => {
         process.stderr.write('\n');
         return promptYesNo(`Proposed change -> ${proposal.description}\nApply?`);
@@ -712,10 +806,8 @@ async function handleTranscript(args) {
     }
     const dataPath = (0, dataPath_1.getDataPath)();
     const config = new configService_1.ConfigService(dataPath);
-    const apiKey = config.getGeminiApiKey();
-    if (!apiKey) {
-        process.stderr.write('Error: Gemini API key not found.\n' +
-            'Set GEMINI_API_KEY env var or configure via the Listener.AI app.\n');
+    if (!config.hasAiAuth()) {
+        process.stderr.write(formatAiCredentialsError(config));
         process.exit(1);
     }
     // Resolve --output before the expensive transcription so we fail fast on a
@@ -743,13 +835,7 @@ async function handleTranscript(args) {
             }
         }
     }
-    const gemini = new geminiService_1.GeminiService({
-        apiKey,
-        dataPath,
-        knownWords: config.getKnownWords(),
-        proModel: config.getGeminiModel(),
-        flashModel: config.getGeminiFlashModel(),
-    });
+    const gemini = createTranscriptionService(config, dataPath);
     process.stderr.write(`Processing: ${filePath}\n`);
     const result = await gemini.transcribeAudio(filePath, (_percent, message) => {
         process.stderr.write(`  ${message}\n`);
@@ -782,6 +868,10 @@ async function main() {
         handleConfig(args.slice(1));
         return;
     }
+    if (args[0] === 'codex') {
+        await handleCodex(args.slice(1));
+        return;
+    }
     if (args[0] === 'list') {
         await handleList(args.slice(1));
         return;
@@ -845,22 +935,13 @@ async function main() {
     if (outputDir) {
         outputDir = path.resolve(outputDir);
     }
-    // Get API key
     const dataPath = (0, dataPath_1.getDataPath)();
     const config = new configService_1.ConfigService(dataPath);
-    const apiKey = config.getGeminiApiKey();
-    if (!apiKey) {
-        process.stderr.write('Error: Gemini API key not found.\n' +
-            'Set GEMINI_API_KEY env var or configure via the Listener.AI app.\n');
+    if (!config.hasAiAuth()) {
+        process.stderr.write(formatAiCredentialsError(config));
         process.exit(1);
     }
-    const gemini = new geminiService_1.GeminiService({
-        apiKey,
-        dataPath,
-        knownWords: config.getKnownWords(),
-        proModel: config.getGeminiModel(),
-        flashModel: config.getGeminiFlashModel(),
-    });
+    const gemini = createTranscriptionService(config, dataPath);
     process.stderr.write(`Processing: ${filePath}\n`);
     const result = await gemini.transcribeAudio(filePath, (_percent, message) => {
         process.stderr.write(`  ${message}\n`);

package/dist/codexOAuth.js ADDED Viewed

@@ -0,0 +1,68 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.getCodexOAuthEnvCredentials = getCodexOAuthEnvCredentials;
+exports.hasCodexOAuthEnvCredentials = hasCodexOAuthEnvCredentials;
+exports.resolveCodexAccessToken = resolveCodexAccessToken;
+exports.requireCodexAccessToken = requireCodexAccessToken;
+exports.loginCodexOAuth = loginCodexOAuth;
+const esmImport_1 = require("./esmImport");
+let runtimePromise;
+async function loadCodexOAuthRuntime() {
+    runtimePromise ?? (runtimePromise = (0, esmImport_1.importEsm)('@earendil-works/pi-ai/oauth'));
+    return await runtimePromise;
+}
+function getCodexOAuthEnvCredentials() {
+    const access = process.env.CODEX_OAUTH_ACCESS_TOKEN?.trim() ||
+        process.env.OPENAI_CODEX_ACCESS_TOKEN?.trim() ||
+        '';
+    const refresh = process.env.CODEX_OAUTH_REFRESH_TOKEN?.trim() ||
+        process.env.OPENAI_CODEX_REFRESH_TOKEN?.trim() ||
+        '';
+    if (!access || !refresh)
+        return undefined;
+    const expiresRaw = process.env.CODEX_OAUTH_EXPIRES || process.env.OPENAI_CODEX_EXPIRES;
+    const expires = expiresRaw ? Number.parseInt(expiresRaw, 10) : Date.now() + 30 * 60000;
+    return {
+        access,
+        refresh,
+        expires: Number.isFinite(expires) ? expires : Date.now() + 30 * 60000,
+    };
+}
+function hasCodexOAuthEnvCredentials() {
+    return !!getCodexOAuthEnvCredentials();
+}
+async function resolveCodexAccessToken(params) {
+    const credentials = params.credentials ?? getCodexOAuthEnvCredentials();
+    if (!credentials)
+        return undefined;
+    const { getOAuthApiKey } = await loadCodexOAuthRuntime();
+    const resolved = await getOAuthApiKey('openai-codex', { 'openai-codex': credentials });
+    if (!resolved)
+        return undefined;
+    const nextCredentials = resolved.newCredentials;
+    if (nextCredentials.access !== credentials.access ||
+        nextCredentials.refresh !== credentials.refresh ||
+        nextCredentials.expires !== credentials.expires) {
+        await params.onCredentialsChanged?.(nextCredentials);
+    }
+    return resolved.apiKey;
+}
+async function requireCodexAccessToken(params) {
+    const token = await resolveCodexAccessToken(params);
+    if (!token) {
+        throw new Error('Codex OAuth is not configured.');
+    }
+    return token;
+}
+async function loginCodexOAuth(params) {
+    const { loginOpenAICodex } = await loadCodexOAuthRuntime();
+    const credentials = await loginOpenAICodex({
+        originator: 'listener-ai',
+        onAuth: (info) => {
+            void params.openUrl(info.url);
+        },
+        onPrompt: params.onPrompt,
+        onProgress: params.onProgress,
+    });
+    return credentials;
+}

package/dist/codexOAuthHolder.js ADDED Viewed

@@ -0,0 +1,26 @@
+"use strict";
+// Encapsulates Codex OAuth credential state for services that need a fresh
+// access token per request. Replaces the parallel `this.codexOAuth +
+// this.onCodexOAuthUpdate + getToken()` fields that lived inside
+// AgentService and GeminiService -- keeping them in sync was error-prone and
+// the rotation invariant (caller persists when source is config, skips when
+// source is env) is easy to get wrong if scattered.
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.CodexOAuthHolder = void 0;
+const codexOAuth_1 = require("./codexOAuth");
+class CodexOAuthHolder {
+    constructor(options) {
+        this.credentials = options.credentials;
+        this.onUpdate = options.onUpdate;
+    }
+    async getToken() {
+        return await (0, codexOAuth_1.requireCodexAccessToken)({
+            credentials: this.credentials,
+            onCredentialsChanged: async (next) => {
+                this.credentials = next;
+                await this.onUpdate?.(next);
+            },
+        });
+    }
+}
+exports.CodexOAuthHolder = CodexOAuthHolder;

package/dist/codexTranscription.js ADDED Viewed

@@ -0,0 +1,168 @@
+"use strict";
+// Minimal wrapper around OpenAI's `/v1/audio/transcriptions` endpoint.
+//
+// We keep this here (rather than going through pi-ai) because pi-ai is a
+// chat/tool-call unified API -- it has no audio transcription surface. The
+// Codex transcription flow needs only a multipart POST, so a thin direct
+// fetch is simpler than wedging audio into pi-ai's chat model.
+//
+// Two output shapes, branched on model id:
+//   - `gpt-4o-transcribe-diarize` (default) returns `diarized_json` with
+//     speaker-labeled segments. We re-label "Speaker 0/1/..." onto the
+//     same `참가자N` convention the Gemini path uses so downstream code
+//     (summarization, transcript.md, Notion) doesn't have to care which
+//     transcription engine produced the text. This model rejects `prompt`,
+//     so user-supplied glossaries (`knownWords`) are dropped on this path.
+//   - `gpt-4o-transcribe` (and `whisper-1`) return `{text}` and accept
+//     `prompt` for vocabulary biasing, but produce no speaker labels.
+//
+// Format support: OpenAI accepts mp3, mp4, mpeg, mpga, m4a, wav, webm. Inputs
+// outside that set are remuxed upstream in geminiService.ts via ffmpeg before
+// reaching this helper.
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.OPENAI_TRANSCRIPTION_EXTENSIONS = void 0;
+exports.isDiarizeModel = isDiarizeModel;
+exports.transcribeCodexAudio = transcribeCodexAudio;
+exports.formatDiarizedSegments = formatDiarizedSegments;
+const fs = __importStar(require("fs"));
+const path = __importStar(require("path"));
+const audioFormats_1 = require("./audioFormats");
+const OPENAI_API_BASE_URL = 'https://api.openai.com/v1';
+const DIARIZE_MODEL_ID = 'gpt-4o-transcribe-diarize';
+exports.OPENAI_TRANSCRIPTION_EXTENSIONS = new Set([
+    '.mp3',
+    '.mp4',
+    '.mpeg',
+    '.mpga',
+    '.m4a',
+    '.wav',
+    '.webm',
+]);
+function isDiarizeModel(model) {
+    return model.trim() === DIARIZE_MODEL_ID;
+}
+async function transcribeCodexAudio(params) {
+    const audioData = fs.readFileSync(params.audioFilePath);
+    const ext = path.extname(params.audioFilePath);
+    const model = params.model.trim();
+    const diarize = isDiarizeModel(model);
+    const form = new FormData();
+    form.append('model', model);
+    if (params.language) {
+        form.append('language', params.language);
+    }
+    if (diarize) {
+        // Required for the diarize model. `chunking_strategy=auto` lets OpenAI
+        // split long audio internally while keeping speaker identity coherent
+        // across chunks -- so we can hand it a whole 50-minute meeting (subject
+        // to the 25MB file-size limit upstream).
+        form.append('response_format', 'diarized_json');
+        form.append('chunking_strategy', 'auto');
+    }
+    else if (params.prompt?.trim()) {
+        form.append('prompt', params.prompt.trim());
+    }
+    form.append('file', new Blob([audioData], { type: (0, audioFormats_1.mimeTypeForExtension)(ext) }), path.basename(params.audioFilePath));
+    const sizeMB = (audioData.byteLength / (1024 * 1024)).toFixed(2);
+    const startedAt = Date.now();
+    console.log(`[codex-transcribe] -> ${path.basename(params.audioFilePath)} ${sizeMB}MB model=${model}${diarize ? ' diarize=true' : params.prompt ? ` prompt=${params.prompt.length}chars` : ''}${params.language ? ` lang=${params.language}` : ''}`);
+    const response = await fetch(`${OPENAI_API_BASE_URL}/audio/transcriptions`, {
+        method: 'POST',
+        headers: { Authorization: `Bearer ${await params.getToken()}` },
+        body: form,
+    });
+    const elapsed = Date.now() - startedAt;
+    console.log(`[codex-transcribe] <- ${elapsed}ms status=${response.status} ${response.statusText}`);
+    if (!response.ok) {
+        // Truncate the error body so a verbose upstream response doesn't leak
+        // headers/debug payload into logs and IPC error strings.
+        const body = await response.text().catch(() => '');
+        const trimmed = body.length > 500 ? `${body.slice(0, 500)}...` : body;
+        throw new Error(`OpenAI transcription failed (${response.status} ${response.statusText})${trimmed ? `: ${trimmed}` : ''}`);
+    }
+    if (diarize) {
+        const payload = (await response.json());
+        return formatDiarizedSegments(payload.segments);
+    }
+    const payload = (await response.json());
+    if (typeof payload.text !== 'string' || payload.text.trim().length === 0) {
+        throw new Error('OpenAI transcription response missing text');
+    }
+    return payload.text;
+}
+// Re-label OpenAI's raw speaker ids ("Speaker 0", "Speaker 1", or the names
+// supplied via `known_speaker_names[]` if used) onto our `참가자N` convention,
+// matching the format Gemini emits when prompted for speaker labels. Empty
+// segments are dropped; consecutive segments from the same speaker are merged
+// onto a single line so downstream consumers don't see one speaker split into
+// 30+ "참가자1: ..." stubs.
+function formatDiarizedSegments(segments) {
+    if (!segments || segments.length === 0) {
+        throw new Error('OpenAI diarized transcription returned no segments');
+    }
+    const speakerIdx = new Map();
+    let nextIdx = 1;
+    const lines = [];
+    let activeLabel;
+    let activeBuffer = '';
+    for (const seg of segments) {
+        const text = (seg.text ?? '').trim();
+        if (!text)
+            continue;
+        const rawSpeaker = seg.speaker ?? 'unknown';
+        let idx = speakerIdx.get(rawSpeaker);
+        if (idx === undefined) {
+            idx = nextIdx++;
+            speakerIdx.set(rawSpeaker, idx);
+        }
+        const label = `참가자${idx}`;
+        if (label === activeLabel) {
+            activeBuffer += ` ${text}`;
+        }
+        else {
+            if (activeLabel !== undefined)
+                lines.push(`${activeLabel}: ${activeBuffer}`);
+            activeLabel = label;
+            activeBuffer = text;
+        }
+    }
+    if (activeLabel !== undefined)
+        lines.push(`${activeLabel}: ${activeBuffer}`);
+    if (lines.length === 0) {
+        throw new Error('OpenAI diarized transcription had segments but no usable text');
+    }
+    return lines.join('\n\n');
+}