@blockrun/franklin 3.15.84 → 3.15.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -133,6 +133,7 @@ export declare class ModelClient {
133
133
  */
134
134
  private resolveVirtualModel;
135
135
  streamCompletion(request: ModelRequest, signal?: AbortSignal): AsyncGenerator<StreamChunk>;
136
+ private parseNonStreamingMessage;
136
137
  /**
137
138
  * Non-streaming completion for simple requests.
138
139
  */
package/dist/agent/llm.js CHANGED
@@ -337,6 +337,8 @@ export class ModelClient {
337
337
  }
338
338
  const isAnthropic = request.model.startsWith('anthropic/');
339
339
  const isGLM = request.model.startsWith('zai/') || request.model.includes('glm');
340
+ const isGeminiThinkingRequired = request.model.startsWith('google/gemini-3.1') ||
341
+ request.model.startsWith('google/gemini-2.5-pro');
340
342
  // Build the request payload, injecting model-specific optimizations
341
343
  let requestPayload = { ...request, stream: true };
342
344
  // Safety: tool_choice without tools causes upstream 400. Strip rather
@@ -368,6 +370,30 @@ export class ModelClient {
368
370
  requestPayload['thinking'] = { type: 'enabled' };
369
371
  }
370
372
  }
373
+ // Gemini Pro reasoning models reject a missing/zero thinking budget. Normalize
374
+ // the gateway default so fallback routing doesn't fail with "Budget 0 is invalid."
375
+ if (isGeminiThinkingRequired) {
376
+ // The gateway's streaming path currently drops Gemini's thinking budget;
377
+ // non-streaming preserves it. We convert the JSON response back into the
378
+ // same internal chunks below so callers keep one code path.
379
+ requestPayload['stream'] = false;
380
+ const maxOut = request.max_tokens ?? 16_384;
381
+ const budgetTokens = Math.min(maxOut, 8_192);
382
+ const thinking = requestPayload['thinking'];
383
+ if (thinking && typeof thinking === 'object' && !Array.isArray(thinking)) {
384
+ requestPayload['thinking'] = {
385
+ ...thinking,
386
+ type: 'enabled',
387
+ budget_tokens: budgetTokens,
388
+ };
389
+ }
390
+ else {
391
+ requestPayload['thinking'] = {
392
+ type: 'enabled',
393
+ budget_tokens: budgetTokens,
394
+ };
395
+ }
396
+ }
371
397
  if (isAnthropic) {
372
398
  // ─ Anthropic extended thinking ──────────────────────────────────────
373
399
  // Enable the `thinking` API block only for models that accept it.
@@ -529,6 +555,10 @@ export class ModelClient {
529
555
  return;
530
556
  }
531
557
  }
558
+ if (requestPayload['stream'] === false) {
559
+ yield* this.parseNonStreamingMessage(response, request.model);
560
+ return;
561
+ }
532
562
  // Parse SSE stream
533
563
  yield* this.parseSSEStream(response, requestController, streamTimeoutMs, request.model);
534
564
  }
@@ -536,6 +566,51 @@ export class ModelClient {
536
566
  unlinkAbort();
537
567
  }
538
568
  }
569
+ async *parseNonStreamingMessage(response, model) {
570
+ const parsed = await response.json();
571
+ yield { kind: 'message_start', payload: { message: parsed } };
572
+ const content = Array.isArray(parsed['content']) ? parsed['content'] : [];
573
+ for (let index = 0; index < content.length; index++) {
574
+ const block = content[index];
575
+ yield { kind: 'content_block_start', payload: { index, content_block: block } };
576
+ if (block.type === 'text' && typeof block.text === 'string') {
577
+ yield {
578
+ kind: 'content_block_delta',
579
+ payload: { index, delta: { type: 'text_delta', text: block.text } },
580
+ };
581
+ }
582
+ else if (block.type === 'thinking' && typeof block.thinking === 'string') {
583
+ yield {
584
+ kind: 'content_block_delta',
585
+ payload: { index, delta: { type: 'thinking_delta', thinking: block.thinking } },
586
+ };
587
+ if (typeof block.signature === 'string') {
588
+ yield {
589
+ kind: 'content_block_delta',
590
+ payload: { index, delta: { type: 'signature_delta', signature: block.signature } },
591
+ };
592
+ }
593
+ }
594
+ else if (block.type === 'tool_use') {
595
+ yield {
596
+ kind: 'content_block_delta',
597
+ payload: { index, delta: { type: 'input_json_delta', partial_json: JSON.stringify(block.input ?? {}) } },
598
+ };
599
+ }
600
+ yield { kind: 'content_block_stop', payload: { index } };
601
+ }
602
+ yield {
603
+ kind: 'message_delta',
604
+ payload: {
605
+ delta: { stop_reason: parsed['stop_reason'] ?? 'end_turn' },
606
+ usage: parsed['usage'] ?? {},
607
+ },
608
+ };
609
+ yield { kind: 'message_stop', payload: {} };
610
+ if (this.debug) {
611
+ console.error(`[franklin] Parsed non-streaming response for ${model}`);
612
+ }
613
+ }
539
614
  /**
540
615
  * Non-streaming completion for simple requests.
541
616
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.15.84",
3
+ "version": "3.15.85",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {