bloby-bot 0.47.2 → 0.47.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
* Endpoint: POST {baseUrl}/models/{modelId}:streamGenerateContent?alt=sse&key={apiKey}
|
|
9
9
|
* Stream: SSE — each `data: {...}` is one candidate update.
|
|
10
10
|
*/
|
|
11
|
+
import { log } from '../../../../shared/logger.js';
|
|
11
12
|
import type {
|
|
12
13
|
PiStreamRequest,
|
|
13
14
|
PiStreamEvent,
|
|
@@ -66,11 +67,15 @@ function mapStopReason(reason?: string): PiStopReason {
|
|
|
66
67
|
switch (reason) {
|
|
67
68
|
case 'STOP':
|
|
68
69
|
case 'FINISH_REASON_STOP':
|
|
70
|
+
case undefined:
|
|
69
71
|
return 'end_turn';
|
|
70
72
|
case 'MAX_TOKENS':
|
|
71
73
|
return 'max_tokens';
|
|
72
74
|
case 'SAFETY':
|
|
73
75
|
case 'RECITATION':
|
|
76
|
+
case 'BLOCKLIST':
|
|
77
|
+
case 'PROHIBITED_CONTENT':
|
|
78
|
+
case 'SPII':
|
|
74
79
|
case 'OTHER':
|
|
75
80
|
return 'error';
|
|
76
81
|
default:
|
|
@@ -78,6 +83,24 @@ function mapStopReason(reason?: string): PiStopReason {
|
|
|
78
83
|
}
|
|
79
84
|
}
|
|
80
85
|
|
|
86
|
+
function finishReasonMessage(reason?: string): string {
|
|
87
|
+
switch (reason) {
|
|
88
|
+
case 'MAX_TOKENS':
|
|
89
|
+
return 'Response cut off — the model hit its output-token budget before finishing.';
|
|
90
|
+
case 'SAFETY':
|
|
91
|
+
return 'Response blocked by Gemini safety filters.';
|
|
92
|
+
case 'RECITATION':
|
|
93
|
+
return 'Response blocked by recitation policy.';
|
|
94
|
+
case 'BLOCKLIST':
|
|
95
|
+
case 'PROHIBITED_CONTENT':
|
|
96
|
+
case 'SPII':
|
|
97
|
+
return `Response blocked by Gemini policy (${reason}).`;
|
|
98
|
+
case 'OTHER':
|
|
99
|
+
default:
|
|
100
|
+
return `Gemini stopped without producing output (finishReason=${reason || 'unknown'}).`;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
81
104
|
export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStreamEvent> {
|
|
82
105
|
const url =
|
|
83
106
|
`${req.baseUrl.replace(/\/+$/, '')}/models/${encodeURIComponent(req.modelId)}:streamGenerateContent` +
|
|
@@ -89,10 +112,14 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
89
112
|
.map((m) => ({ role: toGeminiRole(m.role), parts: toGeminiParts(m.content) }))
|
|
90
113
|
.filter((m) => m.parts.length > 0);
|
|
91
114
|
|
|
115
|
+
// Default to a generous cap because thinking-capable Gemini models (2.5+,
|
|
116
|
+
// 3.x) consume `maxOutputTokens` for both reasoning AND final text — a small
|
|
117
|
+
// cap silently truncates the answer to nothing. Pi's catalog lists 65 536
|
|
118
|
+
// as the model's hard ceiling.
|
|
92
119
|
const body: any = {
|
|
93
120
|
contents,
|
|
94
121
|
generationConfig: {
|
|
95
|
-
maxOutputTokens: req.maxOutputTokens ??
|
|
122
|
+
maxOutputTokens: req.maxOutputTokens ?? 32768,
|
|
96
123
|
},
|
|
97
124
|
};
|
|
98
125
|
if (req.systemPrompt?.trim()) {
|
|
@@ -121,16 +148,35 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
121
148
|
|
|
122
149
|
let accumulated = '';
|
|
123
150
|
let lastFinish: string | undefined;
|
|
151
|
+
let promptBlockReason: string | undefined;
|
|
124
152
|
let usage: { inputTokens?: number; outputTokens?: number } | undefined;
|
|
153
|
+
// Debug counters — drop once this stabilises.
|
|
154
|
+
let chunkCount = 0;
|
|
155
|
+
let thoughtPartCount = 0;
|
|
156
|
+
let emptyTextPartCount = 0;
|
|
157
|
+
let firstChunkSummary = '';
|
|
125
158
|
|
|
126
159
|
try {
|
|
127
160
|
for await (const chunk of parseSse(res)) {
|
|
161
|
+
chunkCount++;
|
|
162
|
+
if (chunkCount === 1) {
|
|
163
|
+
try { firstChunkSummary = JSON.stringify(chunk).slice(0, 600); } catch {}
|
|
164
|
+
}
|
|
165
|
+
// The whole prompt can be rejected before we even get a candidate.
|
|
166
|
+
if (chunk?.promptFeedback?.blockReason) {
|
|
167
|
+
promptBlockReason = chunk.promptFeedback.blockReason;
|
|
168
|
+
}
|
|
128
169
|
const candidate = chunk?.candidates?.[0];
|
|
129
170
|
const parts: any[] = candidate?.content?.parts || [];
|
|
130
171
|
for (const part of parts) {
|
|
172
|
+
// Thinking models emit reasoning parts with `thought: true`. They
|
|
173
|
+
// shouldn't be shown to the user as part of the visible answer.
|
|
174
|
+
if (part?.thought) { thoughtPartCount++; continue; }
|
|
131
175
|
if (typeof part?.text === 'string' && part.text.length > 0) {
|
|
132
176
|
accumulated += part.text;
|
|
133
177
|
yield { type: 'text_delta', delta: part.text };
|
|
178
|
+
} else {
|
|
179
|
+
emptyTextPartCount++;
|
|
134
180
|
}
|
|
135
181
|
}
|
|
136
182
|
if (candidate?.finishReason) lastFinish = candidate.finishReason;
|
|
@@ -151,6 +197,40 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
151
197
|
return;
|
|
152
198
|
}
|
|
153
199
|
|
|
154
|
-
|
|
200
|
+
log.info(
|
|
201
|
+
`[pi/google] stream done — chunks=${chunkCount} text=${accumulated.length} ` +
|
|
202
|
+
`thoughtParts=${thoughtPartCount} emptyTextParts=${emptyTextPartCount} ` +
|
|
203
|
+
`finishReason=${lastFinish || 'none'} ` +
|
|
204
|
+
`promptTok=${usage?.inputTokens ?? '?'} outTok=${usage?.outputTokens ?? '?'}`,
|
|
205
|
+
);
|
|
206
|
+
if (chunkCount > 0 && !accumulated) {
|
|
207
|
+
log.info(`[pi/google] first chunk (truncated): ${firstChunkSummary}`);
|
|
208
|
+
} else if (chunkCount === 0) {
|
|
209
|
+
log.warn(`[pi/google] SSE stream parsed zero chunks — check response shape (status=${res.status} content-type=${res.headers.get('content-type') || ''})`);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Prompt-level block: nothing came back at all.
|
|
213
|
+
if (promptBlockReason) {
|
|
214
|
+
yield { type: 'error', error: `Gemini blocked the prompt (${promptBlockReason}).` };
|
|
215
|
+
yield { type: 'done', stopReason: 'error', usage };
|
|
216
|
+
return;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// We finished cleanly but the model produced no visible text. That's almost
|
|
220
|
+
// always a finish-reason problem (MAX_TOKENS, SAFETY, ...) we'd otherwise
|
|
221
|
+
// silently swallow. Surface it.
|
|
222
|
+
if (!accumulated) {
|
|
223
|
+
const reason = lastFinish && lastFinish !== 'STOP' && lastFinish !== 'FINISH_REASON_STOP'
|
|
224
|
+
? lastFinish
|
|
225
|
+
: undefined;
|
|
226
|
+
const hint = thoughtPartCount > 0 && !lastFinish
|
|
227
|
+
? ' (model emitted thinking but never the final answer — try a non-thinking model like gemini-2.5-flash, or raise maxOutputTokens)'
|
|
228
|
+
: '';
|
|
229
|
+
yield { type: 'error', error: finishReasonMessage(reason) + hint };
|
|
230
|
+
yield { type: 'done', stopReason: mapStopReason(lastFinish), usage };
|
|
231
|
+
return;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
yield { type: 'text_end', text: accumulated };
|
|
155
235
|
yield { type: 'done', stopReason: mapStopReason(lastFinish), usage };
|
|
156
236
|
}
|