listener-ai 2.7.0 → 2.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/aiProvider.js +14 -2
- package/dist/codexTranscription.js +83 -2
- package/dist/configService.js +25 -0
- package/dist/geminiService.js +62 -9
- package/dist/piAiClient.js +61 -3
- package/package.json +1 -1
package/dist/aiProvider.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.DEFAULT_CODEX_TRANSCRIPTION_MODEL = exports.DEFAULT_CODEX_MODEL = exports.DEFAULT_GEMINI_FLASH_MODEL = exports.DEFAULT_GEMINI_MODEL = exports.AI_PROVIDERS = void 0;
|
|
3
|
+
exports.CODEX_TRANSCRIPTION_NON_DIARIZE_MODEL = exports.DEFAULT_CODEX_TRANSCRIPTION_MODEL = exports.DEFAULT_CODEX_MODEL = exports.DEFAULT_GEMINI_FLASH_MODEL = exports.DEFAULT_GEMINI_MODEL = exports.AI_PROVIDERS = void 0;
|
|
4
4
|
exports.isAiProvider = isAiProvider;
|
|
5
5
|
exports.normalizeAiProvider = normalizeAiProvider;
|
|
6
6
|
exports.toPiAiProvider = toPiAiProvider;
|
|
@@ -8,7 +8,19 @@ exports.AI_PROVIDERS = ['gemini', 'codex'];
|
|
|
8
8
|
exports.DEFAULT_GEMINI_MODEL = 'gemini-2.5-pro';
|
|
9
9
|
exports.DEFAULT_GEMINI_FLASH_MODEL = 'gemini-2.5-flash';
|
|
10
10
|
exports.DEFAULT_CODEX_MODEL = 'gpt-5.5';
|
|
11
|
-
|
|
11
|
+
// gpt-4o-transcribe-diarize ships native speaker diarization at the same
|
|
12
|
+
// per-minute price ($0.006/min) as the non-diarize model. Trade-offs vs
|
|
13
|
+
// gpt-4o-transcribe (see docs/model-pricing.md):
|
|
14
|
+
// - doesn't accept the `prompt` parameter, so user glossaries
|
|
15
|
+
// (`knownWords`) are silently dropped on this path
|
|
16
|
+
// - we still segment audio into 5-min chunks for parallel-upload speed,
|
|
17
|
+
// so "Speaker 0" in chunk 1 is not guaranteed to be the same physical
|
|
18
|
+
// person as "Speaker 0" in chunk 2
|
|
19
|
+
exports.DEFAULT_CODEX_TRANSCRIPTION_MODEL = 'gpt-4o-transcribe-diarize';
|
|
20
|
+
// Pre-diarize model id. Useful for users who want the older prompt-driven
|
|
21
|
+
// behavior (vocabulary hints via `knownWords`) at the cost of speaker
|
|
22
|
+
// labels. Switch via `listener config set codexTranscriptionModel gpt-4o-transcribe`.
|
|
23
|
+
exports.CODEX_TRANSCRIPTION_NON_DIARIZE_MODEL = 'gpt-4o-transcribe';
|
|
12
24
|
function isAiProvider(value) {
|
|
13
25
|
return exports.AI_PROVIDERS.includes(value);
|
|
14
26
|
}
|
|
@@ -6,6 +6,16 @@
|
|
|
6
6
|
// Codex transcription flow needs only a multipart POST, so a thin direct
|
|
7
7
|
// fetch is simpler than wedging audio into pi-ai's chat model.
|
|
8
8
|
//
|
|
9
|
+
// Two output shapes, branched on model id:
|
|
10
|
+
// - `gpt-4o-transcribe-diarize` (default) returns `diarized_json` with
|
|
11
|
+
// speaker-labeled segments. We re-label "Speaker 0/1/..." onto the
|
|
12
|
+
// same `참가자N` convention the Gemini path uses so downstream code
|
|
13
|
+
// (summarization, transcript.md, Notion) doesn't have to care which
|
|
14
|
+
// transcription engine produced the text. This model rejects `prompt`,
|
|
15
|
+
// so user-supplied glossaries (`knownWords`) are dropped on this path.
|
|
16
|
+
// - `gpt-4o-transcribe` (and `whisper-1`) return `{text}` and accept
|
|
17
|
+
// `prompt` for vocabulary biasing, but produce no speaker labels.
|
|
18
|
+
//
|
|
9
19
|
// Format support: OpenAI accepts mp3, mp4, mpeg, mpga, m4a, wav, webm. Inputs
|
|
10
20
|
// outside that set are remuxed upstream in geminiService.ts via ffmpeg before
|
|
11
21
|
// reaching this helper.
|
|
@@ -44,11 +54,14 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
44
54
|
})();
|
|
45
55
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
46
56
|
exports.OPENAI_TRANSCRIPTION_EXTENSIONS = void 0;
|
|
57
|
+
exports.isDiarizeModel = isDiarizeModel;
|
|
47
58
|
exports.transcribeCodexAudio = transcribeCodexAudio;
|
|
59
|
+
exports.formatDiarizedSegments = formatDiarizedSegments;
|
|
48
60
|
const fs = __importStar(require("fs"));
|
|
49
61
|
const path = __importStar(require("path"));
|
|
50
62
|
const audioFormats_1 = require("./audioFormats");
|
|
51
63
|
const OPENAI_API_BASE_URL = 'https://api.openai.com/v1';
|
|
64
|
+
const DIARIZE_MODEL_ID = 'gpt-4o-transcribe-diarize';
|
|
52
65
|
exports.OPENAI_TRANSCRIPTION_EXTENSIONS = new Set([
|
|
53
66
|
'.mp3',
|
|
54
67
|
'.mp4',
|
|
@@ -58,20 +71,41 @@ exports.OPENAI_TRANSCRIPTION_EXTENSIONS = new Set([
|
|
|
58
71
|
'.wav',
|
|
59
72
|
'.webm',
|
|
60
73
|
]);
|
|
74
|
+
function isDiarizeModel(model) {
|
|
75
|
+
return model.trim() === DIARIZE_MODEL_ID;
|
|
76
|
+
}
|
|
61
77
|
async function transcribeCodexAudio(params) {
|
|
62
78
|
const audioData = fs.readFileSync(params.audioFilePath);
|
|
63
79
|
const ext = path.extname(params.audioFilePath);
|
|
80
|
+
const model = params.model.trim();
|
|
81
|
+
const diarize = isDiarizeModel(model);
|
|
64
82
|
const form = new FormData();
|
|
65
|
-
form.append('model',
|
|
66
|
-
if (params.
|
|
83
|
+
form.append('model', model);
|
|
84
|
+
if (params.language) {
|
|
85
|
+
form.append('language', params.language);
|
|
86
|
+
}
|
|
87
|
+
if (diarize) {
|
|
88
|
+
// Required for the diarize model. `chunking_strategy=auto` lets OpenAI
|
|
89
|
+
// split long audio internally while keeping speaker identity coherent
|
|
90
|
+
// across chunks -- so we can hand it a whole 50-minute meeting (subject
|
|
91
|
+
// to the 25MB file-size limit upstream).
|
|
92
|
+
form.append('response_format', 'diarized_json');
|
|
93
|
+
form.append('chunking_strategy', 'auto');
|
|
94
|
+
}
|
|
95
|
+
else if (params.prompt?.trim()) {
|
|
67
96
|
form.append('prompt', params.prompt.trim());
|
|
68
97
|
}
|
|
69
98
|
form.append('file', new Blob([audioData], { type: (0, audioFormats_1.mimeTypeForExtension)(ext) }), path.basename(params.audioFilePath));
|
|
99
|
+
const sizeMB = (audioData.byteLength / (1024 * 1024)).toFixed(2);
|
|
100
|
+
const startedAt = Date.now();
|
|
101
|
+
console.log(`[codex-transcribe] -> ${path.basename(params.audioFilePath)} ${sizeMB}MB model=${model}${diarize ? ' diarize=true' : params.prompt ? ` prompt=${params.prompt.length}chars` : ''}${params.language ? ` lang=${params.language}` : ''}`);
|
|
70
102
|
const response = await fetch(`${OPENAI_API_BASE_URL}/audio/transcriptions`, {
|
|
71
103
|
method: 'POST',
|
|
72
104
|
headers: { Authorization: `Bearer ${await params.getToken()}` },
|
|
73
105
|
body: form,
|
|
74
106
|
});
|
|
107
|
+
const elapsed = Date.now() - startedAt;
|
|
108
|
+
console.log(`[codex-transcribe] <- ${elapsed}ms status=${response.status} ${response.statusText}`);
|
|
75
109
|
if (!response.ok) {
|
|
76
110
|
// Truncate the error body so a verbose upstream response doesn't leak
|
|
77
111
|
// headers/debug payload into logs and IPC error strings.
|
|
@@ -79,9 +113,56 @@ async function transcribeCodexAudio(params) {
|
|
|
79
113
|
const trimmed = body.length > 500 ? `${body.slice(0, 500)}...` : body;
|
|
80
114
|
throw new Error(`OpenAI transcription failed (${response.status} ${response.statusText})${trimmed ? `: ${trimmed}` : ''}`);
|
|
81
115
|
}
|
|
116
|
+
if (diarize) {
|
|
117
|
+
const payload = (await response.json());
|
|
118
|
+
return formatDiarizedSegments(payload.segments);
|
|
119
|
+
}
|
|
82
120
|
const payload = (await response.json());
|
|
83
121
|
if (typeof payload.text !== 'string' || payload.text.trim().length === 0) {
|
|
84
122
|
throw new Error('OpenAI transcription response missing text');
|
|
85
123
|
}
|
|
86
124
|
return payload.text;
|
|
87
125
|
}
|
|
126
|
+
// Re-label OpenAI's raw speaker ids ("Speaker 0", "Speaker 1", or the names
|
|
127
|
+
// supplied via `known_speaker_names[]` if used) onto our `참가자N` convention,
|
|
128
|
+
// matching the format Gemini emits when prompted for speaker labels. Empty
|
|
129
|
+
// segments are dropped; consecutive segments from the same speaker are merged
|
|
130
|
+
// onto a single line so downstream consumers don't see one speaker split into
|
|
131
|
+
// 30+ "참가자1: ..." stubs.
|
|
132
|
+
function formatDiarizedSegments(segments) {
|
|
133
|
+
if (!segments || segments.length === 0) {
|
|
134
|
+
throw new Error('OpenAI diarized transcription returned no segments');
|
|
135
|
+
}
|
|
136
|
+
const speakerIdx = new Map();
|
|
137
|
+
let nextIdx = 1;
|
|
138
|
+
const lines = [];
|
|
139
|
+
let activeLabel;
|
|
140
|
+
let activeBuffer = '';
|
|
141
|
+
for (const seg of segments) {
|
|
142
|
+
const text = (seg.text ?? '').trim();
|
|
143
|
+
if (!text)
|
|
144
|
+
continue;
|
|
145
|
+
const rawSpeaker = seg.speaker ?? 'unknown';
|
|
146
|
+
let idx = speakerIdx.get(rawSpeaker);
|
|
147
|
+
if (idx === undefined) {
|
|
148
|
+
idx = nextIdx++;
|
|
149
|
+
speakerIdx.set(rawSpeaker, idx);
|
|
150
|
+
}
|
|
151
|
+
const label = `참가자${idx}`;
|
|
152
|
+
if (label === activeLabel) {
|
|
153
|
+
activeBuffer += ` ${text}`;
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
156
|
+
if (activeLabel !== undefined)
|
|
157
|
+
lines.push(`${activeLabel}: ${activeBuffer}`);
|
|
158
|
+
activeLabel = label;
|
|
159
|
+
activeBuffer = text;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
if (activeLabel !== undefined)
|
|
163
|
+
lines.push(`${activeLabel}: ${activeBuffer}`);
|
|
164
|
+
if (lines.length === 0) {
|
|
165
|
+
throw new Error('OpenAI diarized transcription had segments but no usable text');
|
|
166
|
+
}
|
|
167
|
+
return lines.join('\n\n');
|
|
168
|
+
}
|
package/dist/configService.js
CHANGED
|
@@ -80,6 +80,31 @@ class ConfigService {
|
|
|
80
80
|
}
|
|
81
81
|
this.configPath = path.join(userDataPath, 'config.json');
|
|
82
82
|
this.loadConfig();
|
|
83
|
+
this.migrateLegacyDefaults();
|
|
84
|
+
}
|
|
85
|
+
// One-shot upgrade hook for keys that older versions auto-persisted from
|
|
86
|
+
// their then-current default. The settings modal in those versions wrote
|
|
87
|
+
// back the full payload on save -- including fields the user never
|
|
88
|
+
// touched -- so the next default change can't reach existing installs.
|
|
89
|
+
// Today's case: `codexTranscriptionModel: 'gpt-4o-transcribe'` was the
|
|
90
|
+
// legacy default before gpt-4o-transcribe-diarize shipped; clearing it
|
|
91
|
+
// here lets `getCodexTranscriptionModel()` return the current default
|
|
92
|
+
// (diarize) without forcing every user to manually unset it.
|
|
93
|
+
//
|
|
94
|
+
// The marker semantics are "we've considered migrating this user" --
|
|
95
|
+
// it lands on EVERY install on first launch, not just the ones we
|
|
96
|
+
// actually had to migrate. That way if a user later opts back into
|
|
97
|
+
// `gpt-4o-transcribe` deliberately (e.g. for glossary support), the
|
|
98
|
+
// next ConfigService construction sees the marker and skips the
|
|
99
|
+
// migration entirely instead of clobbering their explicit choice.
|
|
100
|
+
migrateLegacyDefaults() {
|
|
101
|
+
if (this.config.codexTranscriptionMigratedToDiarize)
|
|
102
|
+
return;
|
|
103
|
+
if (this.config.codexTranscriptionModel === 'gpt-4o-transcribe') {
|
|
104
|
+
this.setKey('codexTranscriptionModel', undefined);
|
|
105
|
+
}
|
|
106
|
+
this.setKey('codexTranscriptionMigratedToDiarize', true);
|
|
107
|
+
this.saveConfig();
|
|
83
108
|
}
|
|
84
109
|
loadConfig() {
|
|
85
110
|
try {
|
package/dist/geminiService.js
CHANGED
|
@@ -197,7 +197,14 @@ class GeminiService {
|
|
|
197
197
|
const modelId = this.provider === 'codex' ? this.codexModel : this.proModel;
|
|
198
198
|
const apiKey = this.provider === 'codex' ? await this.getCodexToken() : this.requireGeminiApiKey();
|
|
199
199
|
const model = await (0, piAiClient_1.getModel)(this.provider, modelId);
|
|
200
|
+
// Force formal Korean register (합니다체). Codex (GPT-5.x) defaults to
|
|
201
|
+
// mixed/해요체 in Korean output; Gemini tends to 합니다체 already but the
|
|
202
|
+
// explicit constraint keeps both providers consistent. Applied as a system
|
|
203
|
+
// prompt so it overrides whatever tone the user's customSummaryPrompt
|
|
204
|
+
// implies for summary/keyPoints/actionItems bodies.
|
|
205
|
+
const koreanToneSystem = '모든 한국어 출력은 격식체(합니다/입니다 어미)로 작성하세요. 반말이나 해요체를 쓰지 마세요. summary, keyPoints, actionItems 본문 모두 동일하게 적용합니다.';
|
|
200
206
|
const context = {
|
|
207
|
+
systemPrompt: koreanToneSystem,
|
|
201
208
|
messages: [
|
|
202
209
|
{
|
|
203
210
|
role: 'user',
|
|
@@ -210,6 +217,12 @@ class GeminiService {
|
|
|
210
217
|
apiKey,
|
|
211
218
|
temperature: 0.2,
|
|
212
219
|
maxTokens: 32768,
|
|
220
|
+
// Codex-only knobs; pi-ai's google provider ignores unknown keys.
|
|
221
|
+
// pi-ai omits `reasoning.effort` by default (server default ~medium); we
|
|
222
|
+
// force xhigh for deepest analysis -- gpt-5.5's thinkingLevelMap maps
|
|
223
|
+
// xhigh -> "max". Verbosity stays at pi-ai's "low" default (terse output
|
|
224
|
+
// is fine; reasoning depth is what was missing).
|
|
225
|
+
reasoningEffort: 'xhigh',
|
|
213
226
|
});
|
|
214
227
|
return (0, piAiClient_1.extractFinalText)(response);
|
|
215
228
|
}
|
|
@@ -361,15 +374,35 @@ class GeminiService {
|
|
|
361
374
|
}
|
|
362
375
|
}
|
|
363
376
|
// Split audio file into segments
|
|
364
|
-
async splitAudioIntoSegments(audioFilePath, segmentDuration = 300
|
|
377
|
+
async splitAudioIntoSegments(audioFilePath, segmentDuration = 300,
|
|
378
|
+
// re-encode segments instead of `-c copy`. ffmpeg's segment muxer can
|
|
379
|
+
// only cut at keyframes when copying, and webm-opus has near-zero
|
|
380
|
+
// keyframes by default -- so `-c copy -segment_time 300` silently
|
|
381
|
+
// produces 30+ minute segments that blow past gpt-4o-transcribe's
|
|
382
|
+
// 1400-second per-request limit. Caller passes `reencode: true` for
|
|
383
|
+
// the Codex transcription path; Gemini's API is tolerant of long
|
|
384
|
+
// inputs and stays on the faster `-c copy` path.
|
|
385
|
+
reencode = false) {
|
|
365
386
|
const outputDir = path.dirname(audioFilePath);
|
|
366
387
|
const baseName = path.basename(audioFilePath, path.extname(audioFilePath));
|
|
367
388
|
const ext = path.extname(audioFilePath);
|
|
368
|
-
|
|
389
|
+
// When re-encoding to opus we MUST force a container that supports
|
|
390
|
+
// opus -- ffmpeg picks the muxer from the output extension, so leaving
|
|
391
|
+
// an imported `.mp3`/`.m4a`/`.wav` source as `.mp3` makes ffmpeg pick
|
|
392
|
+
// the MP3 muxer and reject the opus stream. `.webm` is in OpenAI's
|
|
393
|
+
// supported transcription extensions, so the segments still upload.
|
|
394
|
+
const segmentExt = reencode ? '.webm' : ext;
|
|
395
|
+
const segmentPath = path.join(outputDir, `${baseName}_segment_%03d${segmentExt}`);
|
|
369
396
|
// Get the bundled FFmpeg path
|
|
370
397
|
const ffmpegPath = await this.getFFmpegPath();
|
|
371
398
|
try {
|
|
372
|
-
|
|
399
|
+
const codecArgs = reencode ? ['-c:a', 'libopus', '-b:a', '48k'] : ['-c', 'copy'];
|
|
400
|
+
// Split audio into segments. `-reset_timestamps 1` makes each segment
|
|
401
|
+
// start at PTS 0 and gives it its own container duration. Without it,
|
|
402
|
+
// webm output keeps the source file's total duration in the header --
|
|
403
|
+
// and OpenAI rejects the request based on the header value even when
|
|
404
|
+
// the actual encoded audio is short (`audio duration N seconds is
|
|
405
|
+
// longer than 1400` errors on small last-segment files).
|
|
373
406
|
await execFileAsync(ffmpegPath, [
|
|
374
407
|
'-i',
|
|
375
408
|
audioFilePath,
|
|
@@ -377,14 +410,17 @@ class GeminiService {
|
|
|
377
410
|
'segment',
|
|
378
411
|
'-segment_time',
|
|
379
412
|
String(segmentDuration),
|
|
380
|
-
'-
|
|
381
|
-
'
|
|
413
|
+
'-reset_timestamps',
|
|
414
|
+
'1',
|
|
415
|
+
...codecArgs,
|
|
382
416
|
segmentPath,
|
|
383
417
|
]);
|
|
384
|
-
// Find all created segment files
|
|
418
|
+
// Find all created segment files. Match on the EXTENSION WE TOLD
|
|
419
|
+
// FFMPEG TO WRITE -- when re-encoding, that's `.webm` regardless of
|
|
420
|
+
// the source's original extension.
|
|
385
421
|
const segmentFiles = fs
|
|
386
422
|
.readdirSync(outputDir)
|
|
387
|
-
.filter((file) => file.startsWith(`${baseName}_segment_`) && file.endsWith(
|
|
423
|
+
.filter((file) => file.startsWith(`${baseName}_segment_`) && file.endsWith(segmentExt))
|
|
388
424
|
.map((file) => path.join(outputDir, file))
|
|
389
425
|
.sort();
|
|
390
426
|
console.error(`Split audio into ${segmentFiles.length} segments`);
|
|
@@ -438,6 +474,13 @@ class GeminiService {
|
|
|
438
474
|
let fullTranscript = '';
|
|
439
475
|
const stats = fs.statSync(audioFilePath);
|
|
440
476
|
const fileSizeInMB = stats.size / (1024 * 1024);
|
|
477
|
+
// Segment intentionally for parallelism: even when the API would
|
|
478
|
+
// accept the whole file (Gemini long-context, gpt-4o-transcribe-diarize
|
|
479
|
+
// via chunking_strategy=auto), N parallel 5-min requests finish much
|
|
480
|
+
// faster than one big sequential pass. Trade-off for the diarize
|
|
481
|
+
// model: speaker IDs are mapped fresh per segment ("Speaker 0" in
|
|
482
|
+
// segment 1 may not be the same physical person as "Speaker 0" in
|
|
483
|
+
// segment 2). See docs/model-pricing.md.
|
|
441
484
|
const shouldSegment = duration > 300 || (this.provider === 'codex' && fileSizeInMB > 24);
|
|
442
485
|
const segmentDuration = this.provider === 'codex' && duration > 0 && fileSizeInMB > 20
|
|
443
486
|
? Math.max(30, Math.min(300, Math.floor((20 / fileSizeInMB) * duration)))
|
|
@@ -562,7 +605,14 @@ Return as JSON:
|
|
|
562
605
|
getToken: () => this.getCodexToken(),
|
|
563
606
|
audioFilePath,
|
|
564
607
|
model: this.codexTranscriptionModel,
|
|
608
|
+
// `prompt` is dropped inside transcribeCodexAudio when the
|
|
609
|
+
// diarize model is active. Keep passing it -- the helper picks
|
|
610
|
+
// the right shape per model.
|
|
565
611
|
prompt: transcriptPrompt,
|
|
612
|
+
// Intentionally NOT passing `language: 'ko'`. Whisper-derived
|
|
613
|
+
// transcription auto-detects from the first ~30s, which handles
|
|
614
|
+
// bilingual/code-switched meetings (Korean primary, English
|
|
615
|
+
// acronyms/quotes) better than forcing a single language.
|
|
566
616
|
});
|
|
567
617
|
}
|
|
568
618
|
const ai = this.gemini();
|
|
@@ -745,8 +795,11 @@ Return as JSON:
|
|
|
745
795
|
// Get segmented transcript (renamed from transcribeAudioSegmented)
|
|
746
796
|
async getSegmentedTranscript(audioFilePath, duration, progressCallback, customPrompt, segmentDuration = 300) {
|
|
747
797
|
try {
|
|
748
|
-
// Split audio into 5-minute segments
|
|
749
|
-
|
|
798
|
+
// Split audio into 5-minute segments. Codex transcription requires
|
|
799
|
+
// accurate cut times (gpt-4o-transcribe rejects >1400s/segment), so
|
|
800
|
+
// force re-encode there; Gemini's API tolerates long inputs and we
|
|
801
|
+
// keep the cheaper `-c copy` path for it.
|
|
802
|
+
const segmentFiles = await this.splitAudioIntoSegments(audioFilePath, segmentDuration, this.provider === 'codex');
|
|
750
803
|
if (progressCallback) {
|
|
751
804
|
progressCallback(20, `Processing ${segmentFiles.length} segments...`);
|
|
752
805
|
}
|
package/dist/piAiClient.js
CHANGED
|
@@ -23,11 +23,69 @@ async function getModel(provider, modelId) {
|
|
|
23
23
|
// path for non-literal ids ("Custom Models" in pi-ai's README).
|
|
24
24
|
return m.getModel(piId, modelId);
|
|
25
25
|
}
|
|
26
|
+
function summarizeContextSize(context) {
|
|
27
|
+
let chars = 0;
|
|
28
|
+
let toolCalls = 0;
|
|
29
|
+
let toolResults = 0;
|
|
30
|
+
for (const msg of context.messages) {
|
|
31
|
+
if (msg.role === 'user') {
|
|
32
|
+
chars +=
|
|
33
|
+
typeof msg.content === 'string'
|
|
34
|
+
? msg.content.length
|
|
35
|
+
: msg.content.reduce((n, b) => n + (b.type === 'text' ? b.text.length : 0), 0);
|
|
36
|
+
}
|
|
37
|
+
else if (msg.role === 'assistant') {
|
|
38
|
+
for (const b of msg.content) {
|
|
39
|
+
if (b.type === 'text')
|
|
40
|
+
chars += b.text.length;
|
|
41
|
+
else if (b.type === 'toolCall')
|
|
42
|
+
toolCalls++;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
else if (msg.role === 'toolResult') {
|
|
46
|
+
toolResults++;
|
|
47
|
+
for (const b of msg.content)
|
|
48
|
+
if (b.type === 'text')
|
|
49
|
+
chars += b.text.length;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
const systemChars = context.systemPrompt?.length ?? 0;
|
|
53
|
+
return `messages=${context.messages.length} chars=${chars + systemChars} (system=${systemChars}) toolCalls=${toolCalls} toolResults=${toolResults} tools=${context.tools?.length ?? 0}`;
|
|
54
|
+
}
|
|
55
|
+
// Strip options the target provider doesn't accept. OpenAI Codex routes
|
|
56
|
+
// through GPT-5.x reasoning models which reject sampling parameters
|
|
57
|
+
// (`Unsupported parameter: temperature`). pi-ai forwards options verbatim,
|
|
58
|
+
// so the adjustment has to happen at our boundary -- doing it here keeps
|
|
59
|
+
// callsites free of provider conditionals.
|
|
60
|
+
function adjustOptionsForModel(model, options) {
|
|
61
|
+
if (!options)
|
|
62
|
+
return undefined;
|
|
63
|
+
const isCodex = model.api === 'openai-codex-responses' || model.provider === 'openai-codex';
|
|
64
|
+
if (isCodex) {
|
|
65
|
+
const { temperature: _t, ...rest } = options;
|
|
66
|
+
return { ...rest };
|
|
67
|
+
}
|
|
68
|
+
return { ...options };
|
|
69
|
+
}
|
|
26
70
|
async function complete(model, context, options) {
|
|
27
71
|
const m = await loadPiAi();
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
72
|
+
const tag = `[pi-ai ${model.provider}/${model.id}]`;
|
|
73
|
+
const startedAt = Date.now();
|
|
74
|
+
console.log(`${tag} -> ${summarizeContextSize(context)}`);
|
|
75
|
+
const adjustedOptions = adjustOptionsForModel(model, options);
|
|
76
|
+
const response = await m.complete(model, context, adjustedOptions);
|
|
77
|
+
const elapsed = Date.now() - startedAt;
|
|
78
|
+
const stop = response.stopReason ?? 'unknown';
|
|
79
|
+
const textChars = extractFinalText(response).length;
|
|
80
|
+
console.log(`${tag} <- ${elapsed}ms stop=${stop} textChars=${textChars} usage=in:${response.usage?.input ?? '?'}/out:${response.usage?.output ?? '?'}${response.errorMessage ? ` errorMessage=${response.errorMessage.slice(0, 300)}` : ''}`);
|
|
81
|
+
// pi-ai surfaces upstream failures via stopReason='error' rather than
|
|
82
|
+
// throwing. Without this, geminiService.generateSummary returns "" and
|
|
83
|
+
// agentService.run returns "(no answer)" with no breadcrumb. Promote the
|
|
84
|
+
// diagnostic into a thrown error so it reaches the renderer / CLI surface.
|
|
85
|
+
if (response.stopReason === 'error') {
|
|
86
|
+
throw new Error(`Pi-ai ${model.provider}/${model.id} failed: ${response.errorMessage ?? 'no errorMessage'}`);
|
|
87
|
+
}
|
|
88
|
+
return response;
|
|
31
89
|
}
|
|
32
90
|
async function getTypeBox() {
|
|
33
91
|
const m = await loadPiAi();
|