npm - listener-ai - Versions diffs - 2.6.0 → 2.7.1 - Mend

listener-ai 2.6.0 → 2.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +87 -22
package/THIRD_PARTY_NOTICES.md +27 -0
package/dist/agentService.js +142 -119
package/dist/aiProvider.js +35 -0
package/dist/cli.js +119 -38
package/dist/codexOAuth.js +68 -0
package/dist/codexOAuthHolder.js +26 -0
package/dist/codexTranscription.js +168 -0
package/dist/configService.js +171 -25
package/dist/dataPath.js +30 -10
package/dist/esmImport.js +15 -0
package/dist/geminiService.js +203 -39
package/dist/main.js +84 -17
package/dist/piAiClient.js +102 -0
package/package.json +13 -4

package/README.md CHANGED Viewed

@@ -1,8 +1,38 @@
 # Listener.AI
-AI-powered audio transcription with meeting summaries, key points, and action items.
+Listener.AI is a desktop meeting recorder and CLI that turns audio into searchable AI meeting notes. It records meetings, imports existing audio, transcribes with Gemini, generates Korean summaries, key points, and action items, then keeps everything in a local archive you can send to Notion or Slack.
-Available as a **CLI tool** (via npm) and a **desktop app** (via [GitHub Releases](https://github.com/asleep-ai/listener-ai/releases)).
+Available as a **desktop app** via [GitHub Releases](https://github.com/asleep-ai/listener-ai/releases) and as a **CLI tool** via npm.
+![Listener.AI desktop home screen](assets/readme/listener-ai-home.png)
+## What It Does
+1. Record a meeting from your microphone, or import an existing audio file.
+2. Capture timestamped live highlights while the meeting is running.
+3. Transcribe the audio and generate a structured meeting note.
+4. Search, reopen, merge, export, or re-transcribe past recordings.
+5. Share completed notes to Notion or Slack when your integrations are configured.
+## Desktop App
+Download from [GitHub Releases](https://github.com/asleep-ai/listener-ai/releases):
+- **macOS**: Intel (x64) and Apple Silicon (arm64) DMG
+- **Windows**: x64 installer
+The desktop app includes:
+- One-click recording with meeting title, mic selection, and elapsed timer
+- Optional macOS system audio capture for Zoom, Meet, Teams, browser tabs, and other app audio
+- Drag-and-drop or paste import for audio files
+- Live highlights and timestamped flags during recording
+- Recent recordings with search, transcript status, merge, Finder reveal, and M4A export actions
+- Meeting detection and external display prompts for recording automation
+- Automatic FFmpeg setup when transcription needs it
+- Local configuration shared with the CLI
+![Listener.AI settings and integrations](assets/readme/listener-ai-settings.png)
 ## CLI
@@ -21,12 +51,20 @@ npx listener-ai <audio-file>
 ### Prerequisites
 - **FFmpeg** installed on your system (`brew install ffmpeg` / `apt install ffmpeg`)
-- **Google Gemini API key** from [Google AI Studio](https://makersuite.google.com/app/apikey)
+- One of:
+  - **Google Gemini API key** from [Google AI Studio](https://makersuite.google.com/app/apikey), or
+  - **ChatGPT Plus / Pro subscription** (Codex OAuth)
 ### Setup
+Pick one AI provider. Gemini uses a static API key; Codex uses a ChatGPT subscription via OAuth sign-in.
 ```bash
+# Option A -- Gemini
 listener config set geminiApiKey <your-key>
+# Option B -- Codex (uses your ChatGPT Plus/Pro account)
+listener codex login                   # browser sign-in, sets aiProvider=codex
 ```
 Optional Notion integration:
@@ -36,20 +74,37 @@ listener config set notionApiKey <your-key>
 listener config set notionDatabaseId <your-id>
 ```
+Optional Slack integration:
+```bash
+listener config set slackWebhookUrl <your-webhook-url>
+listener config set slackAutoShare true  # Auto-share when auto mode is enabled
+```
 ### Usage
 ```bash
-listener recording.mp3                # Transcribe to default output dir
-listener recording.m4a --output ./    # Transcribe to current directory
-listener transcript recording.wav     # Print transcript to stdout (no summary)
+listener recording.mp3                # Transcribe to the default output directory
+listener recording.m4a --output ./    # Transcribe to the current directory
+listener transcript recording.wav     # Print transcript to stdout without summary
 listener transcript recording.wav -o out.txt
                                       # Write transcript to a file
 listener transcript recording.wav --prompt "Translate to English while transcribing"
                                       # Override the default transcription instruction
-listener config list                  # Show all config values (secrets masked)
+listener list                         # List saved transcriptions
+listener show <ref>                   # Print a saved meeting summary
+listener search "roadmap"             # Search past meeting notes
+listener export <ref> --transcript    # Export a saved note with transcript
+listener merge <ref1> <ref2>          # Merge and re-transcribe multiple notes
+listener ask "What did we decide?" --ref <ref>
+                                      # Ask about a saved meeting
+listener codex login                  # Sign in with ChatGPT and set aiProvider=codex
+listener codex status                 # Show Codex OAuth + provider/model status
+listener codex logout                 # Clear stored Codex OAuth credentials
+listener config list                  # Show all config values with secrets masked
 listener config get <key>             # Print one config value
 listener config set <key> <value>     # Set a config value
-listener config unset <key>           # Clear a config value (falls back to default)
+listener config unset <key>           # Clear a config value
 listener config path                  # Print config file path
 listener --version                    # Print CLI version
 listener --help                       # Show usage
@@ -57,33 +112,36 @@ listener --help                       # Show usage
 Supported formats: mp3, m4a, wav, ogg, flac, aac, wma, opus, webm
-Output is a folder containing `transcript.md` and `summary.md` with speaker identification, Korean summary, key points, and action items.
-## Desktop App
-Download from [GitHub Releases](https://github.com/asleep-ai/listener-ai/releases):
-- **macOS**: Intel (x64) and Apple Silicon (arm64) DMG
-- **Windows**: x64 installer
-The desktop app includes one-click recording, auto-transcription, Notion upload, and automatic FFmpeg download.
+Full meeting-note output is a folder containing `transcript.md` and `summary.md` with speaker identification, Korean summary, key points, and action items. Transcript-only output can print plain text to stdout or write directly to a file.
 ## Configuration
 Config is stored in your system application data folder:
-- **macOS**: `~/Library/Application Support/Listener.AI/config.json`
-- **Windows**: `%APPDATA%/Listener.AI/config.json`
-- **Linux**: `~/.config/Listener.AI/config.json`
-CLI and desktop app share the same config file.
+- **macOS**: `~/Library/Application Support/listener-ai/config.json`
+- **Windows**: `%APPDATA%/listener-ai/config.json`
+- **Linux**: `~/.config/listener-ai/config.json`
+CLI and desktop app share the same config file. Existing installs that already have a `Listener.AI` data folder continue to use it when `listener-ai` is not present.
 ### Getting API Keys
 #### Google Gemini API
 1. Visit [Google AI Studio](https://makersuite.google.com/app/apikey)
 2. Click "Create API Key"
 3. Copy the generated key
-#### Notion Integration (optional)
+#### Codex OAuth (ChatGPT Plus/Pro)
+1. Run `listener codex login` (or sign in from the desktop app's Settings panel).
+2. Complete the browser sign-in to ChatGPT.
+3. Confirm `listener codex status` shows `codexOAuthConfigured=true`.
+Codex transcription, summarization, and the Ask Listener agent all go through your ChatGPT subscription -- no separate API key needed.
+#### Notion Integration
 1. Go to [Notion Integrations](https://www.notion.so/my-integrations)
 2. Create a new integration named "Listener.AI"
 3. Grant permissions: Read, Insert, Update content
@@ -91,11 +149,18 @@ CLI and desktop app share the same config file.
 5. Share your database with the integration
 6. Get database ID from URL: `notion.so/workspace/DATABASE_ID`
+#### Slack Integration
+1. Create a Slack app with an Incoming Webhook.
+2. Add the webhook to the channel where meeting notes should be posted.
+3. Copy the webhook URL and save it with `listener config set slackWebhookUrl <url>`.
 ## Development
 ```bash
 pnpm install
 pnpm run start        # Run Electron app
+pnpm run dev:renderer # Run renderer-only preview with sample data
 pnpm run cli          # Run CLI locally
 pnpm run dist:mac     # Build macOS
 pnpm run dist:win     # Build Windows

package/THIRD_PARTY_NOTICES.md ADDED Viewed

@@ -0,0 +1,27 @@
+# Third-Party Notices
+This project uses third-party packages under their own licenses. The entries below cover packages added or newly bundled for Codex OAuth support.
+## @earendil-works/pi-ai
+- Version: 0.74.0
+- License: MIT
+- Copyright: Mario Zechner
+- Source: https://github.com/earendil-works/pi-mono/tree/main/packages/ai
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+## openai
+- Version: 6.26.0 (transitive dependency of `@earendil-works/pi-ai`)
+- License: Apache-2.0
+- Copyright: OpenAI
+- Source: https://github.com/openai/openai-node
+The full Apache License, Version 2.0 text is included in the published `openai` package as `LICENSE`.

package/dist/agentService.js CHANGED Viewed

@@ -38,7 +38,9 @@ exports.coerceConfigValue = coerceConfigValue;
 exports.isValidFolderName = isValidFolderName;
 exports.describeProposal = describeProposal;
 const path = __importStar(require("path"));
-const genai_1 = require("@google/genai");
+const aiProvider_1 = require("./aiProvider");
+const codexOAuthHolder_1 = require("./codexOAuthHolder");
+const piAiClient_1 = require("./piAiClient");
 const outputService_1 = require("./outputService");
 const searchService_1 = require("./searchService");
 exports.WRITABLE_CONFIG_KEYS = [
@@ -53,8 +55,11 @@ exports.WRITABLE_CONFIG_KEYS = [
 ];
 exports.READABLE_CONFIG_KEYS = [
     ...exports.WRITABLE_CONFIG_KEYS,
+    'aiProvider',
     'geminiModel',
     'geminiFlashModel',
+    'codexModel',
+    'codexTranscriptionModel',
 ];
 function isWritableKey(key) {
     return exports.WRITABLE_CONFIG_KEYS.includes(key);
@@ -94,84 +99,63 @@ function coerceConfigValue(key, raw) {
         }
     }
 }
-function buildTools(scope, hasConfirm) {
+// Pi-ai validates tool arguments against TypeBox schemas. We build them lazily
+// because TypeBox lives inside the ESM-only pi-ai package; resolving the
+// schemas in module scope would fire a synchronous require() before pi-ai is
+// loaded.
+async function buildTools(scope, hasConfirm) {
+    const Type = await (0, piAiClient_1.getTypeBox)();
     const tools = [];
     if (scope.kind === 'all') {
         tools.push({
             name: 'search_transcriptions',
             description: 'Full-text search across saved meeting transcriptions. Returns top-k hits with title, date, snippet, and folder name. Use this to find meetings relevant to the user question.',
-            parameters: {
-                type: genai_1.Type.OBJECT,
-                properties: {
-                    query: { type: genai_1.Type.STRING, description: 'Search keywords. Can be Korean or English.' },
-                    limit: { type: genai_1.Type.INTEGER, description: 'Max hits to return (default 5).' },
-                    include_transcript: {
-                        type: genai_1.Type.BOOLEAN,
-                        description: 'Also search the full transcript body (slower). Default false.',
-                    },
-                },
-                required: ['query'],
-            },
+            parameters: Type.Object({
+                query: Type.String({ description: 'Search keywords. Can be Korean or English.' }),
+                limit: Type.Optional(Type.Integer({ description: 'Max hits to return (default 5).' })),
+                include_transcript: Type.Optional(Type.Boolean({
+                    description: 'Also search the full transcript body (slower). Default false.',
+                })),
+            }),
         });
         tools.push({
             name: 'list_recent_transcriptions',
             description: 'List the most recent saved transcriptions, newest first. Use when the user asks "what did we talk about recently" or "show me yesterday\'s meetings".',
-            parameters: {
-                type: genai_1.Type.OBJECT,
-                properties: {
-                    limit: { type: genai_1.Type.INTEGER, description: 'Max entries (default 10).' },
-                },
-            },
+            parameters: Type.Object({
+                limit: Type.Optional(Type.Integer({ description: 'Max entries (default 10).' })),
+            }),
         });
         tools.push({
             name: 'get_transcription',
             description: 'Fetch a saved meeting record (summary, key points, action items) by folder name. Pass include_transcript=true only when you need the verbatim transcript body; omit it for summary-level questions to keep the response compact.',
-            parameters: {
-                type: genai_1.Type.OBJECT,
-                properties: {
-                    folder_name: {
-                        type: genai_1.Type.STRING,
-                        description: 'The folderName returned by search_transcriptions or list_recent_transcriptions.',
-                    },
-                    include_transcript: {
-                        type: genai_1.Type.BOOLEAN,
-                        description: 'Include the full transcript body. Default false.',
-                    },
-                },
-                required: ['folder_name'],
-            },
+            parameters: Type.Object({
+                folder_name: Type.String({
+                    description: 'The folderName returned by search_transcriptions or list_recent_transcriptions.',
+                }),
+                include_transcript: Type.Optional(Type.Boolean({ description: 'Include the full transcript body. Default false.' })),
+            }),
         });
     }
     tools.push({
         name: 'get_config',
         description: `Read a single Listener.AI setting value. Allowed keys: ${exports.READABLE_CONFIG_KEYS.join(', ')}. API keys and database IDs are never readable here.`,
-        parameters: {
-            type: genai_1.Type.OBJECT,
-            properties: {
-                key: { type: genai_1.Type.STRING, description: `One of: ${exports.READABLE_CONFIG_KEYS.join(', ')}` },
-            },
-            required: ['key'],
-        },
+        parameters: Type.Object({
+            key: Type.String({ description: `One of: ${exports.READABLE_CONFIG_KEYS.join(', ')}` }),
+        }),
     });
     if (hasConfirm) {
         tools.push({
             name: 'set_config',
             description: `Propose a change to a Listener.AI setting. Requires user confirmation before taking effect. Allowed keys: ${exports.WRITABLE_CONFIG_KEYS.join(', ')}. Do NOT try to set API keys, Notion database ID, or other credentials here.`,
-            parameters: {
-                type: genai_1.Type.OBJECT,
-                properties: {
-                    key: { type: genai_1.Type.STRING, description: `One of: ${exports.WRITABLE_CONFIG_KEYS.join(', ')}` },
-                    value: {
-                        type: genai_1.Type.STRING,
-                        description: 'The new value. For booleans pass "true"/"false"; for numbers pass the digits as a string; for strings pass the string.',
-                    },
-                    reason: {
-                        type: genai_1.Type.STRING,
-                        description: 'Short human-readable reason shown to the user in the confirmation prompt.',
-                    },
-                },
-                required: ['key', 'value'],
-            },
+            parameters: Type.Object({
+                key: Type.String({ description: `One of: ${exports.WRITABLE_CONFIG_KEYS.join(', ')}` }),
+                value: Type.String({
+                    description: 'The new value. For booleans pass "true"/"false"; for numbers pass the digits as a string; for strings pass the string.',
+                }),
+                reason: Type.Optional(Type.String({
+                    description: 'Short human-readable reason shown to the user in the confirmation prompt.',
+                })),
+            }),
         });
     }
     return tools;
@@ -216,103 +200,142 @@ function buildSinglePrimer(data) {
     }
     return lines.join('\n');
 }
-function historyToContents(history) {
+// Replay prior conversation as pi-ai Messages. Model turns are replayed in
+// full (assistant content + tool results) when `piaiMessages` is present so
+// the model can reason about its earlier tool use. Without those, we degrade
+// gracefully to plain text -- this is the path old-format history entries
+// (pre-migration) take, and the path the renderer takes on a fresh session.
+// Replay an old AgentChatMessage as a pi-ai assistant message when the
+// caller didn't carry the full `piaiMessages` cluster forward. The api /
+// provider / model fields on assistant messages drive cross-provider handoff
+// transformations inside pi-ai, but plain-text replay carries no thinking or
+// tool-call content for pi-ai to massage -- the values just need to parse.
+function synthAssistantText(text, provider) {
+    const isCodex = provider === 'codex';
+    return {
+        role: 'assistant',
+        content: [{ type: 'text', text }],
+        api: isCodex ? 'openai-codex-responses' : 'google-generative-ai',
+        provider: isCodex ? 'openai-codex' : 'google',
+        model: '',
+    };
+}
+function historyToMessages(history, provider) {
     const out = [];
     for (const m of history) {
-        // Model messages replay their full turn cluster (text + function calls +
-        // tool responses) so the agent can reason about prior tool use.
-        if (m.role === 'model' && m.turns && m.turns.length > 0) {
-            out.push(...m.turns);
+        if (m.role === 'model' && m.piaiMessages && m.piaiMessages.length > 0) {
+            out.push(...m.piaiMessages);
+            continue;
+        }
+        if (m.role === 'model') {
+            out.push(synthAssistantText(m.text, provider));
             continue;
         }
-        out.push({ role: m.role, parts: [{ text: m.text }] });
+        out.push({ role: 'user', content: m.text, timestamp: Date.now() });
     }
     return out;
 }
-function extractFinalText(parts) {
-    if (!parts)
-        return '';
-    return parts
-        .map((p) => (typeof p.text === 'string' ? p.text : ''))
-        .filter(Boolean)
-        .join('\n')
-        .trim();
+function extractToolCalls(message) {
+    return message.content.filter((b) => b.type === 'toolCall');
 }
 class AgentService {
     constructor(opts) {
-        this.ai = new genai_1.GoogleGenAI({ apiKey: opts.apiKey });
+        this.provider = opts.provider ?? 'gemini';
+        if (this.provider === 'gemini') {
+            if (!opts.apiKey) {
+                throw new Error('Gemini API key is required for the Gemini provider.');
+            }
+            this.geminiApiKey = opts.apiKey;
+        }
+        else {
+            this.codexAuth = new codexOAuthHolder_1.CodexOAuthHolder({
+                credentials: opts.codexOAuth,
+                onUpdate: opts.onCodexOAuthUpdate,
+            });
+        }
         this.dataPath = opts.dataPath;
         this.configService = opts.configService;
-        this.defaultModel = opts.defaultModel ?? opts.configService.getGeminiFlashModel();
+        this.defaultModel =
+            opts.defaultModel ??
+                (this.provider === 'codex'
+                    ? opts.codexModel || aiProvider_1.DEFAULT_CODEX_MODEL
+                    : opts.configService.getGeminiFlashModel());
+    }
+    // For Codex we mint a fresh access token per request (the holder rotates
+    // it transparently). For Gemini we already have the static key in hand.
+    async resolveApiKey() {
+        if (this.codexAuth)
+            return await this.codexAuth.getToken();
+        if (!this.geminiApiKey) {
+            throw new Error('Gemini API key is not configured.');
+        }
+        return this.geminiApiKey;
     }
     async run(opts) {
-        const model = opts.model ?? this.defaultModel;
+        const modelId = opts.model ?? this.defaultModel;
         const maxSteps = opts.maxSteps ?? 6;
-        const tools = buildTools(opts.scope, !!opts.confirm);
-        // Load the single-meeting record once if needed; title + primer derive from it.
+        const tools = await buildTools(opts.scope, !!opts.confirm);
+        // Load the single-meeting record once so the system prompt can name the
+        // meeting and the primer message can carry its body.
         const singleData = opts.scope.kind === 'single' && isValidFolderName(opts.scope.folderName)
             ? await (0, outputService_1.readTranscription)(path.join((0, outputService_1.getTranscriptionsDir)(this.dataPath), opts.scope.folderName))
             : null;
-        const systemInstruction = systemInstructionFor(opts.scope, singleData?.title);
         const history = opts.history ? [...opts.history] : [];
-        // For single-meeting scope the primer must precede all prior turns so the
-        // model sees the meeting context before its own earlier responses about it.
-        const contents = [];
+        const context = {
+            systemPrompt: systemInstructionFor(opts.scope, singleData?.title),
+            messages: [],
+            tools: tools.length > 0 ? tools : undefined,
+        };
+        // Single-meeting primer goes first so the model sees the meeting body
+        // before any of its own prior turns about it.
         if (singleData) {
-            contents.push({ role: 'user', parts: [{ text: buildSinglePrimer(singleData) }] });
+            context.messages.push({
+                role: 'user',
+                content: buildSinglePrimer(singleData),
+                timestamp: Date.now(),
+            });
         }
-        for (const c of historyToContents(history))
-            contents.push(c);
-        contents.push({ role: 'user', parts: [{ text: opts.question }] });
+        for (const m of historyToMessages(history, this.provider))
+            context.messages.push(m);
+        context.messages.push({ role: 'user', content: opts.question, timestamp: Date.now() });
         history.push({ role: 'user', text: opts.question });
-        // Track turns added from here on so they can be attached to the model
-        // message for multi-turn tool memory.
-        const modelTurnsStart = contents.length;
+        const model = await (0, piAiClient_1.getModel)(this.provider, modelId);
+        const turnsStart = context.messages.length;
         const applied = [];
         let finalAnswer = '';
         for (let step = 0; step < maxSteps; step++) {
-            const response = await this.ai.models.generateContent({
-                model,
-                contents,
-                config: {
-                    systemInstruction,
-                    temperature: 0.3,
-                    tools: tools.length > 0 ? [{ functionDeclarations: tools }] : undefined,
-                },
-            });
-            const candidate = response.candidates?.[0];
-            const parts = candidate?.content?.parts ?? [];
-            const functionCalls = response.functionCalls ?? [];
-            // Record model turn verbatim (keeps function call history correct).
-            if (candidate?.content) {
-                contents.push(candidate.content);
-            }
-            if (functionCalls.length === 0) {
-                finalAnswer = extractFinalText(parts);
+            const apiKey = await this.resolveApiKey();
+            const response = await (0, piAiClient_1.complete)(model, context, { apiKey, temperature: 0.3 });
+            context.messages.push(response);
+            const toolCalls = extractToolCalls(response);
+            if (toolCalls.length === 0) {
+                finalAnswer = (0, piAiClient_1.extractFinalText)(response);
                 break;
             }
-            // Dispatch all tool calls from this turn in parallel. Read-only tools
-            // (search/list/get) benefit directly; set_config awaits a user click but
-            // that still happens concurrently with the reads rather than after them.
-            const results = await Promise.all(functionCalls.map((call) => this.dispatchTool(call, opts, applied)));
-            const toolResponseParts = functionCalls.map((call, i) => ({
-                functionResponse: {
-                    id: call.id,
-                    name: call.name ?? '',
-                    response: results[i],
-                },
-            }));
-            contents.push({ role: 'user', parts: toolResponseParts });
+            // Read-only tools (search/list/get) run in parallel; set_config awaits a
+            // user click but that still happens concurrently with the reads rather
+            // than serializing the round-trip.
+            const results = await Promise.all(toolCalls.map((call) => this.dispatchTool(call, opts, applied)));
+            for (let i = 0; i < toolCalls.length; i++) {
+                context.messages.push({
+                    role: 'toolResult',
+                    toolCallId: toolCalls[i].id,
+                    toolName: toolCalls[i].name,
+                    content: [{ type: 'text', text: JSON.stringify(results[i]) }],
+                    isError: false,
+                    timestamp: Date.now(),
+                });
+            }
         }
         if (!finalAnswer) {
             finalAnswer = '(no answer produced within step limit)';
         }
-        const modelTurns = contents.slice(modelTurnsStart);
-        history.push({ role: 'model', text: finalAnswer, turns: modelTurns });
+        const piaiMessages = context.messages.slice(turnsStart);
+        history.push({ role: 'model', text: finalAnswer, piaiMessages });
         return { answer: finalAnswer, appliedActions: applied, history };
     }
     async dispatchTool(call, opts, applied) {
-        const args = (call.args ?? {});
+        const args = call.arguments ?? {};
         try {
             switch (call.name) {
                 case 'search_transcriptions': {

package/dist/aiProvider.js ADDED Viewed

@@ -0,0 +1,35 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.CODEX_TRANSCRIPTION_NON_DIARIZE_MODEL = exports.DEFAULT_CODEX_TRANSCRIPTION_MODEL = exports.DEFAULT_CODEX_MODEL = exports.DEFAULT_GEMINI_FLASH_MODEL = exports.DEFAULT_GEMINI_MODEL = exports.AI_PROVIDERS = void 0;
+exports.isAiProvider = isAiProvider;
+exports.normalizeAiProvider = normalizeAiProvider;
+exports.toPiAiProvider = toPiAiProvider;
+exports.AI_PROVIDERS = ['gemini', 'codex'];
+exports.DEFAULT_GEMINI_MODEL = 'gemini-2.5-pro';
+exports.DEFAULT_GEMINI_FLASH_MODEL = 'gemini-2.5-flash';
+exports.DEFAULT_CODEX_MODEL = 'gpt-5.5';
+// gpt-4o-transcribe-diarize ships native speaker diarization at the same
+// per-minute price ($0.006/min) as the non-diarize model. Trade-offs vs
+// gpt-4o-transcribe (see docs/model-pricing.md):
+//   - doesn't accept the `prompt` parameter, so user glossaries
+//     (`knownWords`) are silently dropped on this path
+//   - we still segment audio into 5-min chunks for parallel-upload speed,
+//     so "Speaker 0" in chunk 1 is not guaranteed to be the same physical
+//     person as "Speaker 0" in chunk 2
+exports.DEFAULT_CODEX_TRANSCRIPTION_MODEL = 'gpt-4o-transcribe-diarize';
+// Pre-diarize model id. Useful for users who want the older prompt-driven
+// behavior (vocabulary hints via `knownWords`) at the cost of speaker
+// labels. Switch via `listener config set codexTranscriptionModel gpt-4o-transcribe`.
+exports.CODEX_TRANSCRIPTION_NON_DIARIZE_MODEL = 'gpt-4o-transcribe';
+function isAiProvider(value) {
+    return exports.AI_PROVIDERS.includes(value);
+}
+function normalizeAiProvider(value) {
+    if (typeof value !== 'string')
+        return undefined;
+    const normalized = value.trim().toLowerCase();
+    return isAiProvider(normalized) ? normalized : undefined;
+}
+function toPiAiProvider(provider) {
+    return provider === 'codex' ? 'openai-codex' : 'google';
+}