@blockrun/franklin 3.15.83 → 3.15.85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/llm.d.ts +1 -0
- package/dist/agent/llm.js +75 -0
- package/dist/stats/audit.js +11 -1
- package/package.json +1 -1
package/dist/agent/llm.d.ts
CHANGED
|
@@ -133,6 +133,7 @@ export declare class ModelClient {
|
|
|
133
133
|
*/
|
|
134
134
|
private resolveVirtualModel;
|
|
135
135
|
streamCompletion(request: ModelRequest, signal?: AbortSignal): AsyncGenerator<StreamChunk>;
|
|
136
|
+
private parseNonStreamingMessage;
|
|
136
137
|
/**
|
|
137
138
|
* Non-streaming completion for simple requests.
|
|
138
139
|
*/
|
package/dist/agent/llm.js
CHANGED
|
@@ -337,6 +337,8 @@ export class ModelClient {
|
|
|
337
337
|
}
|
|
338
338
|
const isAnthropic = request.model.startsWith('anthropic/');
|
|
339
339
|
const isGLM = request.model.startsWith('zai/') || request.model.includes('glm');
|
|
340
|
+
const isGeminiThinkingRequired = request.model.startsWith('google/gemini-3.1') ||
|
|
341
|
+
request.model.startsWith('google/gemini-2.5-pro');
|
|
340
342
|
// Build the request payload, injecting model-specific optimizations
|
|
341
343
|
let requestPayload = { ...request, stream: true };
|
|
342
344
|
// Safety: tool_choice without tools causes upstream 400. Strip rather
|
|
@@ -368,6 +370,30 @@ export class ModelClient {
|
|
|
368
370
|
requestPayload['thinking'] = { type: 'enabled' };
|
|
369
371
|
}
|
|
370
372
|
}
|
|
373
|
+
// Gemini Pro reasoning models reject a missing/zero thinking budget. Normalize
|
|
374
|
+
// the gateway default so fallback routing doesn't fail with "Budget 0 is invalid."
|
|
375
|
+
if (isGeminiThinkingRequired) {
|
|
376
|
+
// The gateway's streaming path currently drops Gemini's thinking budget;
|
|
377
|
+
// non-streaming preserves it. We convert the JSON response back into the
|
|
378
|
+
// same internal chunks below so callers keep one code path.
|
|
379
|
+
requestPayload['stream'] = false;
|
|
380
|
+
const maxOut = request.max_tokens ?? 16_384;
|
|
381
|
+
const budgetTokens = Math.min(maxOut, 8_192);
|
|
382
|
+
const thinking = requestPayload['thinking'];
|
|
383
|
+
if (thinking && typeof thinking === 'object' && !Array.isArray(thinking)) {
|
|
384
|
+
requestPayload['thinking'] = {
|
|
385
|
+
...thinking,
|
|
386
|
+
type: 'enabled',
|
|
387
|
+
budget_tokens: budgetTokens,
|
|
388
|
+
};
|
|
389
|
+
}
|
|
390
|
+
else {
|
|
391
|
+
requestPayload['thinking'] = {
|
|
392
|
+
type: 'enabled',
|
|
393
|
+
budget_tokens: budgetTokens,
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
}
|
|
371
397
|
if (isAnthropic) {
|
|
372
398
|
// ─ Anthropic extended thinking ──────────────────────────────────────
|
|
373
399
|
// Enable the `thinking` API block only for models that accept it.
|
|
@@ -529,6 +555,10 @@ export class ModelClient {
|
|
|
529
555
|
return;
|
|
530
556
|
}
|
|
531
557
|
}
|
|
558
|
+
if (requestPayload['stream'] === false) {
|
|
559
|
+
yield* this.parseNonStreamingMessage(response, request.model);
|
|
560
|
+
return;
|
|
561
|
+
}
|
|
532
562
|
// Parse SSE stream
|
|
533
563
|
yield* this.parseSSEStream(response, requestController, streamTimeoutMs, request.model);
|
|
534
564
|
}
|
|
@@ -536,6 +566,51 @@ export class ModelClient {
|
|
|
536
566
|
unlinkAbort();
|
|
537
567
|
}
|
|
538
568
|
}
|
|
569
|
+
async *parseNonStreamingMessage(response, model) {
|
|
570
|
+
const parsed = await response.json();
|
|
571
|
+
yield { kind: 'message_start', payload: { message: parsed } };
|
|
572
|
+
const content = Array.isArray(parsed['content']) ? parsed['content'] : [];
|
|
573
|
+
for (let index = 0; index < content.length; index++) {
|
|
574
|
+
const block = content[index];
|
|
575
|
+
yield { kind: 'content_block_start', payload: { index, content_block: block } };
|
|
576
|
+
if (block.type === 'text' && typeof block.text === 'string') {
|
|
577
|
+
yield {
|
|
578
|
+
kind: 'content_block_delta',
|
|
579
|
+
payload: { index, delta: { type: 'text_delta', text: block.text } },
|
|
580
|
+
};
|
|
581
|
+
}
|
|
582
|
+
else if (block.type === 'thinking' && typeof block.thinking === 'string') {
|
|
583
|
+
yield {
|
|
584
|
+
kind: 'content_block_delta',
|
|
585
|
+
payload: { index, delta: { type: 'thinking_delta', thinking: block.thinking } },
|
|
586
|
+
};
|
|
587
|
+
if (typeof block.signature === 'string') {
|
|
588
|
+
yield {
|
|
589
|
+
kind: 'content_block_delta',
|
|
590
|
+
payload: { index, delta: { type: 'signature_delta', signature: block.signature } },
|
|
591
|
+
};
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
else if (block.type === 'tool_use') {
|
|
595
|
+
yield {
|
|
596
|
+
kind: 'content_block_delta',
|
|
597
|
+
payload: { index, delta: { type: 'input_json_delta', partial_json: JSON.stringify(block.input ?? {}) } },
|
|
598
|
+
};
|
|
599
|
+
}
|
|
600
|
+
yield { kind: 'content_block_stop', payload: { index } };
|
|
601
|
+
}
|
|
602
|
+
yield {
|
|
603
|
+
kind: 'message_delta',
|
|
604
|
+
payload: {
|
|
605
|
+
delta: { stop_reason: parsed['stop_reason'] ?? 'end_turn' },
|
|
606
|
+
usage: parsed['usage'] ?? {},
|
|
607
|
+
},
|
|
608
|
+
};
|
|
609
|
+
yield { kind: 'message_stop', payload: {} };
|
|
610
|
+
if (this.debug) {
|
|
611
|
+
console.error(`[franklin] Parsed non-streaming response for ${model}`);
|
|
612
|
+
}
|
|
613
|
+
}
|
|
539
614
|
/**
|
|
540
615
|
* Non-streaming completion for simple requests.
|
|
541
616
|
*/
|
package/dist/stats/audit.js
CHANGED
|
@@ -109,8 +109,18 @@ export function readAudit() {
|
|
|
109
109
|
* Regex: SCREAMING-CASE bracketed label like `[SYSTEM NOTE]`,
|
|
110
110
|
* `[FRANKLIN HARNESS PREFETCH]`, `[GROUNDING CHECK FAILED]`. Used to detect
|
|
111
111
|
* harness-injected text that masks the real user prompt in audit forensics.
|
|
112
|
+
*
|
|
113
|
+
* Character class includes:
|
|
114
|
+
* A-Z 0-9 bare label content
|
|
115
|
+
* space multi-word labels
|
|
116
|
+
* _ - underscore + hyphen
|
|
117
|
+
* — – : em dash, en dash, colon — common in extended labels like
|
|
118
|
+
* `[GROUNDING CHECK FAILED — RETRY ROUND]` or
|
|
119
|
+
* `[ESCALATION: stronger model]`. Verified 2026-05-07 from a
|
|
120
|
+
* real Predexon-side audit slice where the em-dash form
|
|
121
|
+
* slipped through the previous `[A-Z _-]` regex.
|
|
112
122
|
*/
|
|
113
|
-
const SYNTHETIC_LABEL = /\[[A-Z][A-
|
|
123
|
+
const SYNTHETIC_LABEL = /\[[A-Z][A-Z0-9 _\-—–:]+\]/;
|
|
114
124
|
/** Pull the last user message from a Dialogue history, flatten, and strip newlines. */
|
|
115
125
|
export function extractLastUserPrompt(history) {
|
|
116
126
|
for (let i = history.length - 1; i >= 0; i--) {
|
package/package.json
CHANGED