listener-ai 2.6.0 → 2.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -22
- package/THIRD_PARTY_NOTICES.md +27 -0
- package/dist/agentService.js +142 -119
- package/dist/aiProvider.js +35 -0
- package/dist/cli.js +119 -38
- package/dist/codexOAuth.js +68 -0
- package/dist/codexOAuthHolder.js +26 -0
- package/dist/codexTranscription.js +168 -0
- package/dist/configService.js +171 -25
- package/dist/dataPath.js +30 -10
- package/dist/esmImport.js +15 -0
- package/dist/geminiService.js +203 -39
- package/dist/main.js +84 -17
- package/dist/piAiClient.js +102 -0
- package/package.json +13 -4
package/README.md
CHANGED
|
@@ -1,8 +1,38 @@
|
|
|
1
1
|
# Listener.AI
|
|
2
2
|
|
|
3
|
-
AI
|
|
3
|
+
Listener.AI is a desktop meeting recorder and CLI that turns audio into searchable AI meeting notes. It records meetings, imports existing audio, transcribes with Gemini, generates Korean summaries, key points, and action items, then keeps everything in a local archive you can send to Notion or Slack.
|
|
4
4
|
|
|
5
|
-
Available as a **
|
|
5
|
+
Available as a **desktop app** via [GitHub Releases](https://github.com/asleep-ai/listener-ai/releases) and as a **CLI tool** via npm.
|
|
6
|
+
|
|
7
|
+

|
|
8
|
+
|
|
9
|
+
## What It Does
|
|
10
|
+
|
|
11
|
+
1. Record a meeting from your microphone, or import an existing audio file.
|
|
12
|
+
2. Capture timestamped live highlights while the meeting is running.
|
|
13
|
+
3. Transcribe the audio and generate a structured meeting note.
|
|
14
|
+
4. Search, reopen, merge, export, or re-transcribe past recordings.
|
|
15
|
+
5. Share completed notes to Notion or Slack when your integrations are configured.
|
|
16
|
+
|
|
17
|
+
## Desktop App
|
|
18
|
+
|
|
19
|
+
Download from [GitHub Releases](https://github.com/asleep-ai/listener-ai/releases):
|
|
20
|
+
|
|
21
|
+
- **macOS**: Intel (x64) and Apple Silicon (arm64) DMG
|
|
22
|
+
- **Windows**: x64 installer
|
|
23
|
+
|
|
24
|
+
The desktop app includes:
|
|
25
|
+
|
|
26
|
+
- One-click recording with meeting title, mic selection, and elapsed timer
|
|
27
|
+
- Optional macOS system audio capture for Zoom, Meet, Teams, browser tabs, and other app audio
|
|
28
|
+
- Drag-and-drop or paste import for audio files
|
|
29
|
+
- Live highlights and timestamped flags during recording
|
|
30
|
+
- Recent recordings with search, transcript status, merge, Finder reveal, and M4A export actions
|
|
31
|
+
- Meeting detection and external display prompts for recording automation
|
|
32
|
+
- Automatic FFmpeg setup when transcription needs it
|
|
33
|
+
- Local configuration shared with the CLI
|
|
34
|
+
|
|
35
|
+

|
|
6
36
|
|
|
7
37
|
## CLI
|
|
8
38
|
|
|
@@ -21,12 +51,20 @@ npx listener-ai <audio-file>
|
|
|
21
51
|
### Prerequisites
|
|
22
52
|
|
|
23
53
|
- **FFmpeg** installed on your system (`brew install ffmpeg` / `apt install ffmpeg`)
|
|
24
|
-
-
|
|
54
|
+
- One of:
|
|
55
|
+
- **Google Gemini API key** from [Google AI Studio](https://makersuite.google.com/app/apikey), or
|
|
56
|
+
- **ChatGPT Plus / Pro subscription** (Codex OAuth)
|
|
25
57
|
|
|
26
58
|
### Setup
|
|
27
59
|
|
|
60
|
+
Pick one AI provider. Gemini uses a static API key; Codex uses a ChatGPT subscription via OAuth sign-in.
|
|
61
|
+
|
|
28
62
|
```bash
|
|
63
|
+
# Option A -- Gemini
|
|
29
64
|
listener config set geminiApiKey <your-key>
|
|
65
|
+
|
|
66
|
+
# Option B -- Codex (uses your ChatGPT Plus/Pro account)
|
|
67
|
+
listener codex login # browser sign-in, sets aiProvider=codex
|
|
30
68
|
```
|
|
31
69
|
|
|
32
70
|
Optional Notion integration:
|
|
@@ -36,20 +74,37 @@ listener config set notionApiKey <your-key>
|
|
|
36
74
|
listener config set notionDatabaseId <your-id>
|
|
37
75
|
```
|
|
38
76
|
|
|
77
|
+
Optional Slack integration:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
listener config set slackWebhookUrl <your-webhook-url>
|
|
81
|
+
listener config set slackAutoShare true # Auto-share when auto mode is enabled
|
|
82
|
+
```
|
|
83
|
+
|
|
39
84
|
### Usage
|
|
40
85
|
|
|
41
86
|
```bash
|
|
42
|
-
listener recording.mp3 # Transcribe to default output
|
|
43
|
-
listener recording.m4a --output ./ # Transcribe to current directory
|
|
44
|
-
listener transcript recording.wav # Print transcript to stdout
|
|
87
|
+
listener recording.mp3 # Transcribe to the default output directory
|
|
88
|
+
listener recording.m4a --output ./ # Transcribe to the current directory
|
|
89
|
+
listener transcript recording.wav # Print transcript to stdout without summary
|
|
45
90
|
listener transcript recording.wav -o out.txt
|
|
46
91
|
# Write transcript to a file
|
|
47
92
|
listener transcript recording.wav --prompt "Translate to English while transcribing"
|
|
48
93
|
# Override the default transcription instruction
|
|
49
|
-
listener
|
|
94
|
+
listener list # List saved transcriptions
|
|
95
|
+
listener show <ref> # Print a saved meeting summary
|
|
96
|
+
listener search "roadmap" # Search past meeting notes
|
|
97
|
+
listener export <ref> --transcript # Export a saved note with transcript
|
|
98
|
+
listener merge <ref1> <ref2> # Merge and re-transcribe multiple notes
|
|
99
|
+
listener ask "What did we decide?" --ref <ref>
|
|
100
|
+
# Ask about a saved meeting
|
|
101
|
+
listener codex login # Sign in with ChatGPT and set aiProvider=codex
|
|
102
|
+
listener codex status # Show Codex OAuth + provider/model status
|
|
103
|
+
listener codex logout # Clear stored Codex OAuth credentials
|
|
104
|
+
listener config list # Show all config values with secrets masked
|
|
50
105
|
listener config get <key> # Print one config value
|
|
51
106
|
listener config set <key> <value> # Set a config value
|
|
52
|
-
listener config unset <key> # Clear a config value
|
|
107
|
+
listener config unset <key> # Clear a config value
|
|
53
108
|
listener config path # Print config file path
|
|
54
109
|
listener --version # Print CLI version
|
|
55
110
|
listener --help # Show usage
|
|
@@ -57,33 +112,36 @@ listener --help # Show usage
|
|
|
57
112
|
|
|
58
113
|
Supported formats: mp3, m4a, wav, ogg, flac, aac, wma, opus, webm
|
|
59
114
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
## Desktop App
|
|
63
|
-
|
|
64
|
-
Download from [GitHub Releases](https://github.com/asleep-ai/listener-ai/releases):
|
|
65
|
-
- **macOS**: Intel (x64) and Apple Silicon (arm64) DMG
|
|
66
|
-
- **Windows**: x64 installer
|
|
67
|
-
|
|
68
|
-
The desktop app includes one-click recording, auto-transcription, Notion upload, and automatic FFmpeg download.
|
|
115
|
+
Full meeting-note output is a folder containing `transcript.md` and `summary.md` with speaker identification, Korean summary, key points, and action items. Transcript-only output can print plain text to stdout or write directly to a file.
|
|
69
116
|
|
|
70
117
|
## Configuration
|
|
71
118
|
|
|
72
119
|
Config is stored in your system application data folder:
|
|
73
|
-
- **macOS**: `~/Library/Application Support/Listener.AI/config.json`
|
|
74
|
-
- **Windows**: `%APPDATA%/Listener.AI/config.json`
|
|
75
|
-
- **Linux**: `~/.config/Listener.AI/config.json`
|
|
76
120
|
|
|
77
|
-
|
|
121
|
+
- **macOS**: `~/Library/Application Support/listener-ai/config.json`
|
|
122
|
+
- **Windows**: `%APPDATA%/listener-ai/config.json`
|
|
123
|
+
- **Linux**: `~/.config/listener-ai/config.json`
|
|
124
|
+
|
|
125
|
+
CLI and desktop app share the same config file. Existing installs that already have a `Listener.AI` data folder continue to use it when `listener-ai` is not present.
|
|
78
126
|
|
|
79
127
|
### Getting API Keys
|
|
80
128
|
|
|
81
129
|
#### Google Gemini API
|
|
130
|
+
|
|
82
131
|
1. Visit [Google AI Studio](https://makersuite.google.com/app/apikey)
|
|
83
132
|
2. Click "Create API Key"
|
|
84
133
|
3. Copy the generated key
|
|
85
134
|
|
|
86
|
-
####
|
|
135
|
+
#### Codex OAuth (ChatGPT Plus/Pro)
|
|
136
|
+
|
|
137
|
+
1. Run `listener codex login` (or sign in from the desktop app's Settings panel).
|
|
138
|
+
2. Complete the browser sign-in to ChatGPT.
|
|
139
|
+
3. Confirm `listener codex status` shows `codexOAuthConfigured=true`.
|
|
140
|
+
|
|
141
|
+
Codex transcription, summarization, and the Ask Listener agent all go through your ChatGPT subscription -- no separate API key needed.
|
|
142
|
+
|
|
143
|
+
#### Notion Integration
|
|
144
|
+
|
|
87
145
|
1. Go to [Notion Integrations](https://www.notion.so/my-integrations)
|
|
88
146
|
2. Create a new integration named "Listener.AI"
|
|
89
147
|
3. Grant permissions: Read, Insert, Update content
|
|
@@ -91,11 +149,18 @@ CLI and desktop app share the same config file.
|
|
|
91
149
|
5. Share your database with the integration
|
|
92
150
|
6. Get database ID from URL: `notion.so/workspace/DATABASE_ID`
|
|
93
151
|
|
|
152
|
+
#### Slack Integration
|
|
153
|
+
|
|
154
|
+
1. Create a Slack app with an Incoming Webhook.
|
|
155
|
+
2. Add the webhook to the channel where meeting notes should be posted.
|
|
156
|
+
3. Copy the webhook URL and save it with `listener config set slackWebhookUrl <url>`.
|
|
157
|
+
|
|
94
158
|
## Development
|
|
95
159
|
|
|
96
160
|
```bash
|
|
97
161
|
pnpm install
|
|
98
162
|
pnpm run start # Run Electron app
|
|
163
|
+
pnpm run dev:renderer # Run renderer-only preview with sample data
|
|
99
164
|
pnpm run cli # Run CLI locally
|
|
100
165
|
pnpm run dist:mac # Build macOS
|
|
101
166
|
pnpm run dist:win # Build Windows
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Third-Party Notices
|
|
2
|
+
|
|
3
|
+
This project uses third-party packages under their own licenses. The entries below cover packages added or newly bundled for Codex OAuth support.
|
|
4
|
+
|
|
5
|
+
## @earendil-works/pi-ai
|
|
6
|
+
|
|
7
|
+
- Version: 0.74.0
|
|
8
|
+
- License: MIT
|
|
9
|
+
- Copyright: Mario Zechner
|
|
10
|
+
- Source: https://github.com/earendil-works/pi-mono/tree/main/packages/ai
|
|
11
|
+
|
|
12
|
+
MIT License
|
|
13
|
+
|
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
15
|
+
|
|
16
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
17
|
+
|
|
18
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
19
|
+
|
|
20
|
+
## openai
|
|
21
|
+
|
|
22
|
+
- Version: 6.26.0 (transitive dependency of `@earendil-works/pi-ai`)
|
|
23
|
+
- License: Apache-2.0
|
|
24
|
+
- Copyright: OpenAI
|
|
25
|
+
- Source: https://github.com/openai/openai-node
|
|
26
|
+
|
|
27
|
+
The full Apache License, Version 2.0 text is included in the published `openai` package as `LICENSE`.
|
package/dist/agentService.js
CHANGED
|
@@ -38,7 +38,9 @@ exports.coerceConfigValue = coerceConfigValue;
|
|
|
38
38
|
exports.isValidFolderName = isValidFolderName;
|
|
39
39
|
exports.describeProposal = describeProposal;
|
|
40
40
|
const path = __importStar(require("path"));
|
|
41
|
-
const
|
|
41
|
+
const aiProvider_1 = require("./aiProvider");
|
|
42
|
+
const codexOAuthHolder_1 = require("./codexOAuthHolder");
|
|
43
|
+
const piAiClient_1 = require("./piAiClient");
|
|
42
44
|
const outputService_1 = require("./outputService");
|
|
43
45
|
const searchService_1 = require("./searchService");
|
|
44
46
|
exports.WRITABLE_CONFIG_KEYS = [
|
|
@@ -53,8 +55,11 @@ exports.WRITABLE_CONFIG_KEYS = [
|
|
|
53
55
|
];
|
|
54
56
|
exports.READABLE_CONFIG_KEYS = [
|
|
55
57
|
...exports.WRITABLE_CONFIG_KEYS,
|
|
58
|
+
'aiProvider',
|
|
56
59
|
'geminiModel',
|
|
57
60
|
'geminiFlashModel',
|
|
61
|
+
'codexModel',
|
|
62
|
+
'codexTranscriptionModel',
|
|
58
63
|
];
|
|
59
64
|
function isWritableKey(key) {
|
|
60
65
|
return exports.WRITABLE_CONFIG_KEYS.includes(key);
|
|
@@ -94,84 +99,63 @@ function coerceConfigValue(key, raw) {
|
|
|
94
99
|
}
|
|
95
100
|
}
|
|
96
101
|
}
|
|
97
|
-
|
|
102
|
+
// Pi-ai validates tool arguments against TypeBox schemas. We build them lazily
|
|
103
|
+
// because TypeBox lives inside the ESM-only pi-ai package; resolving the
|
|
104
|
+
// schemas in module scope would fire a synchronous require() before pi-ai is
|
|
105
|
+
// loaded.
|
|
106
|
+
async function buildTools(scope, hasConfirm) {
|
|
107
|
+
const Type = await (0, piAiClient_1.getTypeBox)();
|
|
98
108
|
const tools = [];
|
|
99
109
|
if (scope.kind === 'all') {
|
|
100
110
|
tools.push({
|
|
101
111
|
name: 'search_transcriptions',
|
|
102
112
|
description: 'Full-text search across saved meeting transcriptions. Returns top-k hits with title, date, snippet, and folder name. Use this to find meetings relevant to the user question.',
|
|
103
|
-
parameters: {
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
description: 'Also search the full transcript body (slower). Default false.',
|
|
111
|
-
},
|
|
112
|
-
},
|
|
113
|
-
required: ['query'],
|
|
114
|
-
},
|
|
113
|
+
parameters: Type.Object({
|
|
114
|
+
query: Type.String({ description: 'Search keywords. Can be Korean or English.' }),
|
|
115
|
+
limit: Type.Optional(Type.Integer({ description: 'Max hits to return (default 5).' })),
|
|
116
|
+
include_transcript: Type.Optional(Type.Boolean({
|
|
117
|
+
description: 'Also search the full transcript body (slower). Default false.',
|
|
118
|
+
})),
|
|
119
|
+
}),
|
|
115
120
|
});
|
|
116
121
|
tools.push({
|
|
117
122
|
name: 'list_recent_transcriptions',
|
|
118
123
|
description: 'List the most recent saved transcriptions, newest first. Use when the user asks "what did we talk about recently" or "show me yesterday\'s meetings".',
|
|
119
|
-
parameters: {
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
limit: { type: genai_1.Type.INTEGER, description: 'Max entries (default 10).' },
|
|
123
|
-
},
|
|
124
|
-
},
|
|
124
|
+
parameters: Type.Object({
|
|
125
|
+
limit: Type.Optional(Type.Integer({ description: 'Max entries (default 10).' })),
|
|
126
|
+
}),
|
|
125
127
|
});
|
|
126
128
|
tools.push({
|
|
127
129
|
name: 'get_transcription',
|
|
128
130
|
description: 'Fetch a saved meeting record (summary, key points, action items) by folder name. Pass include_transcript=true only when you need the verbatim transcript body; omit it for summary-level questions to keep the response compact.',
|
|
129
|
-
parameters: {
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
},
|
|
136
|
-
include_transcript: {
|
|
137
|
-
type: genai_1.Type.BOOLEAN,
|
|
138
|
-
description: 'Include the full transcript body. Default false.',
|
|
139
|
-
},
|
|
140
|
-
},
|
|
141
|
-
required: ['folder_name'],
|
|
142
|
-
},
|
|
131
|
+
parameters: Type.Object({
|
|
132
|
+
folder_name: Type.String({
|
|
133
|
+
description: 'The folderName returned by search_transcriptions or list_recent_transcriptions.',
|
|
134
|
+
}),
|
|
135
|
+
include_transcript: Type.Optional(Type.Boolean({ description: 'Include the full transcript body. Default false.' })),
|
|
136
|
+
}),
|
|
143
137
|
});
|
|
144
138
|
}
|
|
145
139
|
tools.push({
|
|
146
140
|
name: 'get_config',
|
|
147
141
|
description: `Read a single Listener.AI setting value. Allowed keys: ${exports.READABLE_CONFIG_KEYS.join(', ')}. API keys and database IDs are never readable here.`,
|
|
148
|
-
parameters: {
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
key: { type: genai_1.Type.STRING, description: `One of: ${exports.READABLE_CONFIG_KEYS.join(', ')}` },
|
|
152
|
-
},
|
|
153
|
-
required: ['key'],
|
|
154
|
-
},
|
|
142
|
+
parameters: Type.Object({
|
|
143
|
+
key: Type.String({ description: `One of: ${exports.READABLE_CONFIG_KEYS.join(', ')}` }),
|
|
144
|
+
}),
|
|
155
145
|
});
|
|
156
146
|
if (hasConfirm) {
|
|
157
147
|
tools.push({
|
|
158
148
|
name: 'set_config',
|
|
159
149
|
description: `Propose a change to a Listener.AI setting. Requires user confirmation before taking effect. Allowed keys: ${exports.WRITABLE_CONFIG_KEYS.join(', ')}. Do NOT try to set API keys, Notion database ID, or other credentials here.`,
|
|
160
|
-
parameters: {
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
type: genai_1.Type.STRING,
|
|
170
|
-
description: 'Short human-readable reason shown to the user in the confirmation prompt.',
|
|
171
|
-
},
|
|
172
|
-
},
|
|
173
|
-
required: ['key', 'value'],
|
|
174
|
-
},
|
|
150
|
+
parameters: Type.Object({
|
|
151
|
+
key: Type.String({ description: `One of: ${exports.WRITABLE_CONFIG_KEYS.join(', ')}` }),
|
|
152
|
+
value: Type.String({
|
|
153
|
+
description: 'The new value. For booleans pass "true"/"false"; for numbers pass the digits as a string; for strings pass the string.',
|
|
154
|
+
}),
|
|
155
|
+
reason: Type.Optional(Type.String({
|
|
156
|
+
description: 'Short human-readable reason shown to the user in the confirmation prompt.',
|
|
157
|
+
})),
|
|
158
|
+
}),
|
|
175
159
|
});
|
|
176
160
|
}
|
|
177
161
|
return tools;
|
|
@@ -216,103 +200,142 @@ function buildSinglePrimer(data) {
|
|
|
216
200
|
}
|
|
217
201
|
return lines.join('\n');
|
|
218
202
|
}
|
|
219
|
-
|
|
203
|
+
// Replay prior conversation as pi-ai Messages. Model turns are replayed in
|
|
204
|
+
// full (assistant content + tool results) when `piaiMessages` is present so
|
|
205
|
+
// the model can reason about its earlier tool use. Without those, we degrade
|
|
206
|
+
// gracefully to plain text -- this is the path old-format history entries
|
|
207
|
+
// (pre-migration) take, and the path the renderer takes on a fresh session.
|
|
208
|
+
// Replay an old AgentChatMessage as a pi-ai assistant message when the
|
|
209
|
+
// caller didn't carry the full `piaiMessages` cluster forward. The api /
|
|
210
|
+
// provider / model fields on assistant messages drive cross-provider handoff
|
|
211
|
+
// transformations inside pi-ai, but plain-text replay carries no thinking or
|
|
212
|
+
// tool-call content for pi-ai to massage -- the values just need to parse.
|
|
213
|
+
function synthAssistantText(text, provider) {
|
|
214
|
+
const isCodex = provider === 'codex';
|
|
215
|
+
return {
|
|
216
|
+
role: 'assistant',
|
|
217
|
+
content: [{ type: 'text', text }],
|
|
218
|
+
api: isCodex ? 'openai-codex-responses' : 'google-generative-ai',
|
|
219
|
+
provider: isCodex ? 'openai-codex' : 'google',
|
|
220
|
+
model: '',
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
function historyToMessages(history, provider) {
|
|
220
224
|
const out = [];
|
|
221
225
|
for (const m of history) {
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
+
if (m.role === 'model' && m.piaiMessages && m.piaiMessages.length > 0) {
|
|
227
|
+
out.push(...m.piaiMessages);
|
|
228
|
+
continue;
|
|
229
|
+
}
|
|
230
|
+
if (m.role === 'model') {
|
|
231
|
+
out.push(synthAssistantText(m.text, provider));
|
|
226
232
|
continue;
|
|
227
233
|
}
|
|
228
|
-
out.push({ role:
|
|
234
|
+
out.push({ role: 'user', content: m.text, timestamp: Date.now() });
|
|
229
235
|
}
|
|
230
236
|
return out;
|
|
231
237
|
}
|
|
232
|
-
function
|
|
233
|
-
|
|
234
|
-
return '';
|
|
235
|
-
return parts
|
|
236
|
-
.map((p) => (typeof p.text === 'string' ? p.text : ''))
|
|
237
|
-
.filter(Boolean)
|
|
238
|
-
.join('\n')
|
|
239
|
-
.trim();
|
|
238
|
+
function extractToolCalls(message) {
|
|
239
|
+
return message.content.filter((b) => b.type === 'toolCall');
|
|
240
240
|
}
|
|
241
241
|
class AgentService {
|
|
242
242
|
constructor(opts) {
|
|
243
|
-
this.
|
|
243
|
+
this.provider = opts.provider ?? 'gemini';
|
|
244
|
+
if (this.provider === 'gemini') {
|
|
245
|
+
if (!opts.apiKey) {
|
|
246
|
+
throw new Error('Gemini API key is required for the Gemini provider.');
|
|
247
|
+
}
|
|
248
|
+
this.geminiApiKey = opts.apiKey;
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
this.codexAuth = new codexOAuthHolder_1.CodexOAuthHolder({
|
|
252
|
+
credentials: opts.codexOAuth,
|
|
253
|
+
onUpdate: opts.onCodexOAuthUpdate,
|
|
254
|
+
});
|
|
255
|
+
}
|
|
244
256
|
this.dataPath = opts.dataPath;
|
|
245
257
|
this.configService = opts.configService;
|
|
246
|
-
this.defaultModel =
|
|
258
|
+
this.defaultModel =
|
|
259
|
+
opts.defaultModel ??
|
|
260
|
+
(this.provider === 'codex'
|
|
261
|
+
? opts.codexModel || aiProvider_1.DEFAULT_CODEX_MODEL
|
|
262
|
+
: opts.configService.getGeminiFlashModel());
|
|
263
|
+
}
|
|
264
|
+
// For Codex we mint a fresh access token per request (the holder rotates
|
|
265
|
+
// it transparently). For Gemini we already have the static key in hand.
|
|
266
|
+
async resolveApiKey() {
|
|
267
|
+
if (this.codexAuth)
|
|
268
|
+
return await this.codexAuth.getToken();
|
|
269
|
+
if (!this.geminiApiKey) {
|
|
270
|
+
throw new Error('Gemini API key is not configured.');
|
|
271
|
+
}
|
|
272
|
+
return this.geminiApiKey;
|
|
247
273
|
}
|
|
248
274
|
async run(opts) {
|
|
249
|
-
const
|
|
275
|
+
const modelId = opts.model ?? this.defaultModel;
|
|
250
276
|
const maxSteps = opts.maxSteps ?? 6;
|
|
251
|
-
const tools = buildTools(opts.scope, !!opts.confirm);
|
|
252
|
-
// Load the single-meeting record once
|
|
277
|
+
const tools = await buildTools(opts.scope, !!opts.confirm);
|
|
278
|
+
// Load the single-meeting record once so the system prompt can name the
|
|
279
|
+
// meeting and the primer message can carry its body.
|
|
253
280
|
const singleData = opts.scope.kind === 'single' && isValidFolderName(opts.scope.folderName)
|
|
254
281
|
? await (0, outputService_1.readTranscription)(path.join((0, outputService_1.getTranscriptionsDir)(this.dataPath), opts.scope.folderName))
|
|
255
282
|
: null;
|
|
256
|
-
const systemInstruction = systemInstructionFor(opts.scope, singleData?.title);
|
|
257
283
|
const history = opts.history ? [...opts.history] : [];
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
284
|
+
const context = {
|
|
285
|
+
systemPrompt: systemInstructionFor(opts.scope, singleData?.title),
|
|
286
|
+
messages: [],
|
|
287
|
+
tools: tools.length > 0 ? tools : undefined,
|
|
288
|
+
};
|
|
289
|
+
// Single-meeting primer goes first so the model sees the meeting body
|
|
290
|
+
// before any of its own prior turns about it.
|
|
261
291
|
if (singleData) {
|
|
262
|
-
|
|
292
|
+
context.messages.push({
|
|
293
|
+
role: 'user',
|
|
294
|
+
content: buildSinglePrimer(singleData),
|
|
295
|
+
timestamp: Date.now(),
|
|
296
|
+
});
|
|
263
297
|
}
|
|
264
|
-
for (const
|
|
265
|
-
|
|
266
|
-
|
|
298
|
+
for (const m of historyToMessages(history, this.provider))
|
|
299
|
+
context.messages.push(m);
|
|
300
|
+
context.messages.push({ role: 'user', content: opts.question, timestamp: Date.now() });
|
|
267
301
|
history.push({ role: 'user', text: opts.question });
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
const modelTurnsStart = contents.length;
|
|
302
|
+
const model = await (0, piAiClient_1.getModel)(this.provider, modelId);
|
|
303
|
+
const turnsStart = context.messages.length;
|
|
271
304
|
const applied = [];
|
|
272
305
|
let finalAnswer = '';
|
|
273
306
|
for (let step = 0; step < maxSteps; step++) {
|
|
274
|
-
const
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
tools: tools.length > 0 ? [{ functionDeclarations: tools }] : undefined,
|
|
281
|
-
},
|
|
282
|
-
});
|
|
283
|
-
const candidate = response.candidates?.[0];
|
|
284
|
-
const parts = candidate?.content?.parts ?? [];
|
|
285
|
-
const functionCalls = response.functionCalls ?? [];
|
|
286
|
-
// Record model turn verbatim (keeps function call history correct).
|
|
287
|
-
if (candidate?.content) {
|
|
288
|
-
contents.push(candidate.content);
|
|
289
|
-
}
|
|
290
|
-
if (functionCalls.length === 0) {
|
|
291
|
-
finalAnswer = extractFinalText(parts);
|
|
307
|
+
const apiKey = await this.resolveApiKey();
|
|
308
|
+
const response = await (0, piAiClient_1.complete)(model, context, { apiKey, temperature: 0.3 });
|
|
309
|
+
context.messages.push(response);
|
|
310
|
+
const toolCalls = extractToolCalls(response);
|
|
311
|
+
if (toolCalls.length === 0) {
|
|
312
|
+
finalAnswer = (0, piAiClient_1.extractFinalText)(response);
|
|
292
313
|
break;
|
|
293
314
|
}
|
|
294
|
-
//
|
|
295
|
-
//
|
|
296
|
-
//
|
|
297
|
-
const results = await Promise.all(
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
315
|
+
// Read-only tools (search/list/get) run in parallel; set_config awaits a
|
|
316
|
+
// user click but that still happens concurrently with the reads rather
|
|
317
|
+
// than serializing the round-trip.
|
|
318
|
+
const results = await Promise.all(toolCalls.map((call) => this.dispatchTool(call, opts, applied)));
|
|
319
|
+
for (let i = 0; i < toolCalls.length; i++) {
|
|
320
|
+
context.messages.push({
|
|
321
|
+
role: 'toolResult',
|
|
322
|
+
toolCallId: toolCalls[i].id,
|
|
323
|
+
toolName: toolCalls[i].name,
|
|
324
|
+
content: [{ type: 'text', text: JSON.stringify(results[i]) }],
|
|
325
|
+
isError: false,
|
|
326
|
+
timestamp: Date.now(),
|
|
327
|
+
});
|
|
328
|
+
}
|
|
306
329
|
}
|
|
307
330
|
if (!finalAnswer) {
|
|
308
331
|
finalAnswer = '(no answer produced within step limit)';
|
|
309
332
|
}
|
|
310
|
-
const
|
|
311
|
-
history.push({ role: 'model', text: finalAnswer,
|
|
333
|
+
const piaiMessages = context.messages.slice(turnsStart);
|
|
334
|
+
history.push({ role: 'model', text: finalAnswer, piaiMessages });
|
|
312
335
|
return { answer: finalAnswer, appliedActions: applied, history };
|
|
313
336
|
}
|
|
314
337
|
async dispatchTool(call, opts, applied) {
|
|
315
|
-
const args =
|
|
338
|
+
const args = call.arguments ?? {};
|
|
316
339
|
try {
|
|
317
340
|
switch (call.name) {
|
|
318
341
|
case 'search_transcriptions': {
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CODEX_TRANSCRIPTION_NON_DIARIZE_MODEL = exports.DEFAULT_CODEX_TRANSCRIPTION_MODEL = exports.DEFAULT_CODEX_MODEL = exports.DEFAULT_GEMINI_FLASH_MODEL = exports.DEFAULT_GEMINI_MODEL = exports.AI_PROVIDERS = void 0;
|
|
4
|
+
exports.isAiProvider = isAiProvider;
|
|
5
|
+
exports.normalizeAiProvider = normalizeAiProvider;
|
|
6
|
+
exports.toPiAiProvider = toPiAiProvider;
|
|
7
|
+
exports.AI_PROVIDERS = ['gemini', 'codex'];
|
|
8
|
+
exports.DEFAULT_GEMINI_MODEL = 'gemini-2.5-pro';
|
|
9
|
+
exports.DEFAULT_GEMINI_FLASH_MODEL = 'gemini-2.5-flash';
|
|
10
|
+
exports.DEFAULT_CODEX_MODEL = 'gpt-5.5';
|
|
11
|
+
// gpt-4o-transcribe-diarize ships native speaker diarization at the same
|
|
12
|
+
// per-minute price ($0.006/min) as the non-diarize model. Trade-offs vs
|
|
13
|
+
// gpt-4o-transcribe (see docs/model-pricing.md):
|
|
14
|
+
// - doesn't accept the `prompt` parameter, so user glossaries
|
|
15
|
+
// (`knownWords`) are silently dropped on this path
|
|
16
|
+
// - we still segment audio into 5-min chunks for parallel-upload speed,
|
|
17
|
+
// so "Speaker 0" in chunk 1 is not guaranteed to be the same physical
|
|
18
|
+
// person as "Speaker 0" in chunk 2
|
|
19
|
+
exports.DEFAULT_CODEX_TRANSCRIPTION_MODEL = 'gpt-4o-transcribe-diarize';
|
|
20
|
+
// Pre-diarize model id. Useful for users who want the older prompt-driven
|
|
21
|
+
// behavior (vocabulary hints via `knownWords`) at the cost of speaker
|
|
22
|
+
// labels. Switch via `listener config set codexTranscriptionModel gpt-4o-transcribe`.
|
|
23
|
+
exports.CODEX_TRANSCRIPTION_NON_DIARIZE_MODEL = 'gpt-4o-transcribe';
|
|
24
|
+
function isAiProvider(value) {
|
|
25
|
+
return exports.AI_PROVIDERS.includes(value);
|
|
26
|
+
}
|
|
27
|
+
function normalizeAiProvider(value) {
|
|
28
|
+
if (typeof value !== 'string')
|
|
29
|
+
return undefined;
|
|
30
|
+
const normalized = value.trim().toLowerCase();
|
|
31
|
+
return isAiProvider(normalized) ? normalized : undefined;
|
|
32
|
+
}
|
|
33
|
+
function toPiAiProvider(provider) {
|
|
34
|
+
return provider === 'codex' ? 'openai-codex' : 'google';
|
|
35
|
+
}
|