bloby-bot 0.47.2 → 0.47.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bloby-bot",
3
- "version": "0.47.2",
3
+ "version": "0.47.3",
4
4
  "releaseNotes": [
5
5
  "1. # voice note (PTT bubble)",
6
6
  "2. # audio file + caption",
@@ -66,11 +66,15 @@ function mapStopReason(reason?: string): PiStopReason {
66
66
  switch (reason) {
67
67
  case 'STOP':
68
68
  case 'FINISH_REASON_STOP':
69
+ case undefined:
69
70
  return 'end_turn';
70
71
  case 'MAX_TOKENS':
71
72
  return 'max_tokens';
72
73
  case 'SAFETY':
73
74
  case 'RECITATION':
75
+ case 'BLOCKLIST':
76
+ case 'PROHIBITED_CONTENT':
77
+ case 'SPII':
74
78
  case 'OTHER':
75
79
  return 'error';
76
80
  default:
@@ -78,6 +82,24 @@ function mapStopReason(reason?: string): PiStopReason {
78
82
  }
79
83
  }
80
84
 
85
+ function finishReasonMessage(reason?: string): string {
86
+ switch (reason) {
87
+ case 'MAX_TOKENS':
88
+ return 'Response cut off — the model hit its output-token budget before finishing.';
89
+ case 'SAFETY':
90
+ return 'Response blocked by Gemini safety filters.';
91
+ case 'RECITATION':
92
+ return 'Response blocked by recitation policy.';
93
+ case 'BLOCKLIST':
94
+ case 'PROHIBITED_CONTENT':
95
+ case 'SPII':
96
+ return `Response blocked by Gemini policy (${reason}).`;
97
+ case 'OTHER':
98
+ default:
99
+ return `Gemini stopped without producing output (finishReason=${reason || 'unknown'}).`;
100
+ }
101
+ }
102
+
81
103
  export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStreamEvent> {
82
104
  const url =
83
105
  `${req.baseUrl.replace(/\/+$/, '')}/models/${encodeURIComponent(req.modelId)}:streamGenerateContent` +
@@ -89,10 +111,14 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
89
111
  .map((m) => ({ role: toGeminiRole(m.role), parts: toGeminiParts(m.content) }))
90
112
  .filter((m) => m.parts.length > 0);
91
113
 
114
+ // Default to a generous cap because thinking-capable Gemini models (2.5+,
115
+ // 3.x) consume `maxOutputTokens` for both reasoning AND final text — a small
116
+ // cap silently truncates the answer to nothing. Pi's catalog lists 65 536
117
+ // as the model's hard ceiling.
92
118
  const body: any = {
93
119
  contents,
94
120
  generationConfig: {
95
- maxOutputTokens: req.maxOutputTokens ?? 4096,
121
+ maxOutputTokens: req.maxOutputTokens ?? 32768,
96
122
  },
97
123
  };
98
124
  if (req.systemPrompt?.trim()) {
@@ -121,13 +147,21 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
121
147
 
122
148
  let accumulated = '';
123
149
  let lastFinish: string | undefined;
150
+ let promptBlockReason: string | undefined;
124
151
  let usage: { inputTokens?: number; outputTokens?: number } | undefined;
125
152
 
126
153
  try {
127
154
  for await (const chunk of parseSse(res)) {
155
+ // The whole prompt can be rejected before we even get a candidate.
156
+ if (chunk?.promptFeedback?.blockReason) {
157
+ promptBlockReason = chunk.promptFeedback.blockReason;
158
+ }
128
159
  const candidate = chunk?.candidates?.[0];
129
160
  const parts: any[] = candidate?.content?.parts || [];
130
161
  for (const part of parts) {
162
+ // Thinking models emit reasoning parts with `thought: true`. They
163
+ // shouldn't be shown to the user as part of the visible answer.
164
+ if (part?.thought) continue;
131
165
  if (typeof part?.text === 'string' && part.text.length > 0) {
132
166
  accumulated += part.text;
133
167
  yield { type: 'text_delta', delta: part.text };
@@ -151,6 +185,25 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
151
185
  return;
152
186
  }
153
187
 
154
- if (accumulated) yield { type: 'text_end', text: accumulated };
188
+ // Prompt-level block: nothing came back at all.
189
+ if (promptBlockReason) {
190
+ yield { type: 'error', error: `Gemini blocked the prompt (${promptBlockReason}).` };
191
+ yield { type: 'done', stopReason: 'error', usage };
192
+ return;
193
+ }
194
+
195
+ // We finished cleanly but the model produced no visible text. That's almost
196
+ // always a finish-reason problem (MAX_TOKENS, SAFETY, ...) we'd otherwise
197
+ // silently swallow. Surface it.
198
+ if (!accumulated) {
199
+ const reason = lastFinish && lastFinish !== 'STOP' && lastFinish !== 'FINISH_REASON_STOP'
200
+ ? lastFinish
201
+ : undefined;
202
+ yield { type: 'error', error: finishReasonMessage(reason) };
203
+ yield { type: 'done', stopReason: mapStopReason(lastFinish), usage };
204
+ return;
205
+ }
206
+
207
+ yield { type: 'text_end', text: accumulated };
155
208
  yield { type: 'done', stopReason: mapStopReason(lastFinish), usage };
156
209
  }