@vybestack/llxprt-code-core 0.5.0-nightly.251103.c825fa57 → 0.5.0-nightly.251104.319bfefc
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -14
- package/dist/src/core/geminiChat.d.ts +19 -2
- package/dist/src/core/geminiChat.js +153 -56
- package/dist/src/core/geminiChat.js.map +1 -1
- package/dist/src/core/turn.d.ts +6 -2
- package/dist/src/core/turn.js +6 -0
- package/dist/src/core/turn.js.map +1 -1
- package/dist/src/debug/ConfigurationManager.js +6 -0
- package/dist/src/debug/ConfigurationManager.js.map +1 -1
- package/dist/src/providers/LoggingProviderWrapper.d.ts +1 -0
- package/dist/src/providers/LoggingProviderWrapper.js +89 -4
- package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
- package/dist/src/providers/gemini/GeminiProvider.js +5 -6
- package/dist/src/providers/gemini/GeminiProvider.js.map +1 -1
- package/dist/src/services/complexity-analyzer.d.ts +3 -1
- package/dist/src/services/complexity-analyzer.js +66 -17
- package/dist/src/services/complexity-analyzer.js.map +1 -1
- package/dist/src/telemetry/sdk.js +2 -2
- package/dist/src/telemetry/sdk.js.map +1 -1
- package/dist/src/tools/edit.js +27 -7
- package/dist/src/tools/edit.js.map +1 -1
- package/dist/src/tools/fuzzy-replacer.d.ts +61 -0
- package/dist/src/tools/fuzzy-replacer.js +450 -0
- package/dist/src/tools/fuzzy-replacer.js.map +1 -0
- package/dist/src/tools/mcp-tool.d.ts +1 -1
- package/dist/src/tools/mcp-tool.js +1 -1
- package/dist/src/tools/tool-registry.js +1 -1
- package/dist/src/tools/tool-registry.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -8,23 +8,15 @@
|
|
|
8
8
|
|
|
9
9
|

|
|
10
10
|
|
|
11
|
-
LLxprt Code is a
|
|
12
|
-
|
|
13
|
-
## What's new in 0.4.5
|
|
14
|
-
|
|
15
|
-
- **Startup configuration:** supply ephemeral settings via `--set key=value` (same keys as `/set`), ideal for CI and automation.
|
|
16
|
-
- **Resilient streaming:** unified retry defaults (6 attempts / 4 s) and better handling of transient SSE disconnects.
|
|
17
|
-
- **Smarter todos:** complex request detection now nudges you to create todo lists and escalates reminders when none exist.
|
|
18
|
-
- **Configurable todo UI:** control the Todo panel via `/settings → UI → Show Todo Panel`; when hidden, todo tool output appears inline in scrollback.
|
|
19
|
-
- **Simplified Gemini UX:** the "Paid Mode" badge and flash fallback were removed; monitor usage with `/stats` or provider dashboards instead.
|
|
20
|
-
- **Token budgeting clarity:** `context-limit` now clearly counts system prompts + `LLXPRT.md`, with improved error messaging and docs.
|
|
11
|
+
LLxprt Code is a CLI-based LLM assisted coding tool. It is highly configurable and can support nearly any provider or model as well as local/self-hosted models.
|
|
21
12
|
|
|
22
13
|
## Key Features
|
|
23
14
|
|
|
24
|
-
- **Multi-Provider Support**: Direct access to OpenAI (
|
|
15
|
+
- **Multi-Provider Support**: Direct access to OpenAI (gpt-5), Anthropic (Claude Opus/Sonnet), Google Gemini, plus OpenRouter, Fireworks, Synthetic, Cerebras, Chutes, Z.ai and local models
|
|
16
|
+
- **Authenticate** to use free: Gemini and Qwen models as well as using your Claude Pro/Max account. Use `/auth` to enable/disable/logout of Google/Anthropic/Qwen.
|
|
25
17
|
- **Installable Provider Aliases**: Save `/provider` setups as reusable configs and load OpenAI-compatible endpoints instantly
|
|
26
|
-
- **
|
|
27
|
-
- **
|
|
18
|
+
- **Multi-model/Provider Subagents**: Use `/subagent` to define specialized subagents with isolated contexts
|
|
19
|
+
- **Configuration Profiles**: define and save specific model/provider settings using `/profile` for instance temperature or custom headers
|
|
28
20
|
- **Local Model Support**: Run models locally with LM Studio, llama.cpp, or any OpenAI-compatible server
|
|
29
21
|
- **Flexible Configuration**: Switch providers, models, and API keys on the fly
|
|
30
22
|
- **Advanced Settings & Profiles**: Fine-tune model parameters, manage ephemeral settings, and save configurations for reuse. [Learn more →](./docs/settings-and-profiles.md)
|
|
@@ -75,7 +67,7 @@ You have two options to install LLxprt Code.
|
|
|
75
67
|
|
|
76
68
|
### Using OpenAI
|
|
77
69
|
|
|
78
|
-
Direct access to
|
|
70
|
+
Direct access to GPT-5, and other OpenAI models:
|
|
79
71
|
|
|
80
72
|
1. Get your API key from [OpenAI](https://platform.openai.com/api-keys)
|
|
81
73
|
2. Configure LLxprt Code:
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Copyright 2025 Google LLC
|
|
4
4
|
* SPDX-License-Identifier: Apache-2.0
|
|
5
5
|
*/
|
|
6
|
-
import { GenerateContentResponse, Content, GenerateContentConfig, SendMessageParameters, GenerateContentResponseUsageMetadata, Tool, PartListUnion } from '@google/genai';
|
|
6
|
+
import { GenerateContentResponse, Content, GenerateContentConfig, SendMessageParameters, Part, GenerateContentResponseUsageMetadata, Tool, PartListUnion } from '@google/genai';
|
|
7
7
|
import { ContentGenerator } from './contentGenerator.js';
|
|
8
8
|
import { HistoryService } from '../services/history/HistoryService.js';
|
|
9
9
|
import type { IContent } from '../services/history/IContent.js';
|
|
@@ -22,9 +22,21 @@ export type StreamEvent = {
|
|
|
22
22
|
type: StreamEventType.RETRY;
|
|
23
23
|
};
|
|
24
24
|
/**
|
|
25
|
-
*
|
|
25
|
+
* Checks if a part contains valid non-thought text content.
|
|
26
|
+
* This helps in consolidating text parts properly during stream processing.
|
|
27
|
+
*/
|
|
28
|
+
export declare function isValidNonThoughtTextPart(part: Part): boolean;
|
|
29
|
+
/**
|
|
30
|
+
* Custom error to signal that a stream completed with invalid content,
|
|
26
31
|
* which should trigger a retry.
|
|
27
32
|
*/
|
|
33
|
+
export declare class InvalidStreamError extends Error {
|
|
34
|
+
readonly type: 'NO_FINISH_REASON' | 'NO_RESPONSE_TEXT' | 'NO_FINISH_REASON_NO_TEXT';
|
|
35
|
+
constructor(message: string, type: 'NO_FINISH_REASON' | 'NO_RESPONSE_TEXT' | 'NO_FINISH_REASON_NO_TEXT');
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Legacy error class for backward compatibility.
|
|
39
|
+
*/
|
|
28
40
|
export declare class EmptyStreamError extends Error {
|
|
29
41
|
constructor(message: string);
|
|
30
42
|
}
|
|
@@ -42,6 +54,7 @@ export declare class GeminiChat {
|
|
|
42
54
|
private compressionPromise;
|
|
43
55
|
private logger;
|
|
44
56
|
private cachedCompressionThreshold;
|
|
57
|
+
private lastPromptTokenCount;
|
|
45
58
|
private readonly generationConfig;
|
|
46
59
|
/**
|
|
47
60
|
* Runtime state for stateless operation (Phase 6)
|
|
@@ -52,6 +65,10 @@ export declare class GeminiChat {
|
|
|
52
65
|
private readonly runtimeState;
|
|
53
66
|
private readonly historyService;
|
|
54
67
|
private readonly runtimeContext;
|
|
68
|
+
/**
|
|
69
|
+
* Gets the last prompt token count.
|
|
70
|
+
*/
|
|
71
|
+
getLastPromptTokenCount(): number;
|
|
55
72
|
/**
|
|
56
73
|
* @plan PLAN-20251028-STATELESS6.P10
|
|
57
74
|
* @requirement REQ-STAT6-001.2, REQ-STAT6-002.2, REQ-STAT6-002.3
|
|
@@ -152,9 +152,23 @@ function normalizeToolInteractionInput(message) {
|
|
|
152
152
|
return result;
|
|
153
153
|
}
|
|
154
154
|
const INVALID_CONTENT_RETRY_OPTIONS = {
|
|
155
|
-
maxAttempts:
|
|
155
|
+
maxAttempts: 2, // 1 initial call + 1 retry
|
|
156
156
|
initialDelayMs: 500,
|
|
157
157
|
};
|
|
158
|
+
/**
|
|
159
|
+
* Checks if a part contains valid non-thought text content.
|
|
160
|
+
* This helps in consolidating text parts properly during stream processing.
|
|
161
|
+
*/
|
|
162
|
+
export function isValidNonThoughtTextPart(part) {
|
|
163
|
+
return (typeof part.text === 'string' &&
|
|
164
|
+
!part.thought &&
|
|
165
|
+
// Technically, the model should never generate parts that have text and
|
|
166
|
+
// any of these but we don't trust them so check anyways.
|
|
167
|
+
!part.functionCall &&
|
|
168
|
+
!part.functionResponse &&
|
|
169
|
+
!part.inlineData &&
|
|
170
|
+
!part.fileData);
|
|
171
|
+
}
|
|
158
172
|
/**
|
|
159
173
|
* Returns true if the response is valid, false otherwise.
|
|
160
174
|
*/
|
|
@@ -233,9 +247,20 @@ function extractCuratedHistory(comprehensiveHistory) {
|
|
|
233
247
|
return curatedHistory;
|
|
234
248
|
}
|
|
235
249
|
/**
|
|
236
|
-
* Custom error to signal that a stream completed
|
|
250
|
+
* Custom error to signal that a stream completed with invalid content,
|
|
237
251
|
* which should trigger a retry.
|
|
238
252
|
*/
|
|
253
|
+
export class InvalidStreamError extends Error {
|
|
254
|
+
type;
|
|
255
|
+
constructor(message, type) {
|
|
256
|
+
super(message);
|
|
257
|
+
this.name = 'InvalidStreamError';
|
|
258
|
+
this.type = type;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Legacy error class for backward compatibility.
|
|
263
|
+
*/
|
|
239
264
|
export class EmptyStreamError extends Error {
|
|
240
265
|
constructor(message) {
|
|
241
266
|
super(message);
|
|
@@ -260,6 +285,7 @@ export class GeminiChat {
|
|
|
260
285
|
logger = new DebugLogger('llxprt:gemini:chat');
|
|
261
286
|
// Cache the compression threshold to avoid recalculating
|
|
262
287
|
cachedCompressionThreshold = null;
|
|
288
|
+
lastPromptTokenCount = 0;
|
|
263
289
|
generationConfig;
|
|
264
290
|
/**
|
|
265
291
|
* Runtime state for stateless operation (Phase 6)
|
|
@@ -270,6 +296,12 @@ export class GeminiChat {
|
|
|
270
296
|
runtimeState;
|
|
271
297
|
historyService;
|
|
272
298
|
runtimeContext;
|
|
299
|
+
/**
|
|
300
|
+
* Gets the last prompt token count.
|
|
301
|
+
*/
|
|
302
|
+
getLastPromptTokenCount() {
|
|
303
|
+
return this.lastPromptTokenCount;
|
|
304
|
+
}
|
|
273
305
|
/**
|
|
274
306
|
* @plan PLAN-20251028-STATELESS6.P10
|
|
275
307
|
* @requirement REQ-STAT6-001.2, REQ-STAT6-002.2, REQ-STAT6-002.3
|
|
@@ -553,6 +585,7 @@ export class GeminiChat {
|
|
|
553
585
|
runtime: runtimeContext,
|
|
554
586
|
settings: runtimeContext.settingsService,
|
|
555
587
|
metadata: runtimeContext.metadata,
|
|
588
|
+
userMemory: runtimeContext.config?.getUserMemory?.(),
|
|
556
589
|
});
|
|
557
590
|
// Collect all chunks from the stream
|
|
558
591
|
let lastResponse;
|
|
@@ -712,7 +745,26 @@ export class GeminiChat {
|
|
|
712
745
|
if (attempt > 0) {
|
|
713
746
|
yield { type: StreamEventType.RETRY };
|
|
714
747
|
}
|
|
715
|
-
|
|
748
|
+
// If this is a retry, adjust temperature to encourage different output.
|
|
749
|
+
// Use temperature 1 as baseline (or the original temperature if it's higher than 1) and add increasing variation to avoid repetition.
|
|
750
|
+
const currentParams = { ...params };
|
|
751
|
+
if (attempt > 0) {
|
|
752
|
+
// Use 1 as the baseline temperature for retries, or the original if it's higher
|
|
753
|
+
const baselineTemperature = Math.max(params.config?.temperature ?? 1, 1);
|
|
754
|
+
// Add increasing variation for each retry attempt to encourage different output
|
|
755
|
+
const variation = attempt * 0.1;
|
|
756
|
+
let newTemperature = baselineTemperature + variation;
|
|
757
|
+
// Ensure temperature stays within valid range [0, 2] for Gemini models
|
|
758
|
+
newTemperature = Math.min(Math.max(newTemperature, 0), 2);
|
|
759
|
+
// Ensure config exists
|
|
760
|
+
currentParams.config = currentParams.config || {};
|
|
761
|
+
currentParams.config = {
|
|
762
|
+
...currentParams.config,
|
|
763
|
+
temperature: newTemperature,
|
|
764
|
+
};
|
|
765
|
+
}
|
|
766
|
+
const stream = await instance.makeApiCallAndProcessStream(currentParams, // Use the modified params with temperature
|
|
767
|
+
prompt_id, pendingTokens, userContent);
|
|
716
768
|
for await (const chunk of stream) {
|
|
717
769
|
yield { type: StreamEventType.CHUNK, value: chunk };
|
|
718
770
|
}
|
|
@@ -721,7 +773,8 @@ export class GeminiChat {
|
|
|
721
773
|
}
|
|
722
774
|
catch (error) {
|
|
723
775
|
lastError = error;
|
|
724
|
-
const isContentError = error instanceof
|
|
776
|
+
const isContentError = error instanceof InvalidStreamError ||
|
|
777
|
+
error instanceof EmptyStreamError;
|
|
725
778
|
if (isContentError) {
|
|
726
779
|
// Check if we have more attempts left.
|
|
727
780
|
if (attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts - 1) {
|
|
@@ -787,6 +840,7 @@ export class GeminiChat {
|
|
|
787
840
|
runtime: runtimeContext,
|
|
788
841
|
settings: runtimeContext.settingsService,
|
|
789
842
|
metadata: runtimeContext.metadata,
|
|
843
|
+
userMemory: runtimeContext.config?.getUserMemory?.(),
|
|
790
844
|
});
|
|
791
845
|
let lastResponse;
|
|
792
846
|
for await (const iContent of streamResponse) {
|
|
@@ -848,7 +902,7 @@ export class GeminiChat {
|
|
|
848
902
|
throw error;
|
|
849
903
|
}
|
|
850
904
|
}
|
|
851
|
-
async makeApiCallAndProcessStream(
|
|
905
|
+
async makeApiCallAndProcessStream(params, promptId, pendingTokens, userContent) {
|
|
852
906
|
// Get the active provider
|
|
853
907
|
let provider = this.getActiveProvider();
|
|
854
908
|
if (!provider) {
|
|
@@ -918,7 +972,18 @@ export class GeminiChat {
|
|
|
918
972
|
baseUrl: providerBaseUrl,
|
|
919
973
|
authType: activeAuthType,
|
|
920
974
|
});
|
|
921
|
-
|
|
975
|
+
// Create a runtime context that incorporates the config from params
|
|
976
|
+
const baseRuntimeContext = this.buildProviderRuntime('GeminiChat.generateRequest', { historyLength: requestContents.length });
|
|
977
|
+
// If params has config, merge it with the runtime context config
|
|
978
|
+
const runtimeContext = params.config
|
|
979
|
+
? {
|
|
980
|
+
...baseRuntimeContext,
|
|
981
|
+
config: {
|
|
982
|
+
...baseRuntimeContext.config,
|
|
983
|
+
...params.config,
|
|
984
|
+
},
|
|
985
|
+
}
|
|
986
|
+
: baseRuntimeContext;
|
|
922
987
|
const streamResponse = provider.generateChatCompletion({
|
|
923
988
|
contents: requestContents,
|
|
924
989
|
tools: tools,
|
|
@@ -926,6 +991,7 @@ export class GeminiChat {
|
|
|
926
991
|
runtime: runtimeContext,
|
|
927
992
|
settings: runtimeContext.settingsService,
|
|
928
993
|
metadata: runtimeContext.metadata,
|
|
994
|
+
userMemory: baseRuntimeContext.config?.getUserMemory?.(),
|
|
929
995
|
});
|
|
930
996
|
// Convert the IContent stream to GenerateContentResponse stream
|
|
931
997
|
return (async function* (instance) {
|
|
@@ -1349,6 +1415,7 @@ export class GeminiChat {
|
|
|
1349
1415
|
runtime: runtimeContext,
|
|
1350
1416
|
settings: runtimeContext.settingsService,
|
|
1351
1417
|
metadata: runtimeContext.metadata,
|
|
1418
|
+
userMemory: runtimeContext.config?.getUserMemory?.(),
|
|
1352
1419
|
});
|
|
1353
1420
|
// Collect response
|
|
1354
1421
|
let summary = '';
|
|
@@ -1399,61 +1466,93 @@ export class GeminiChat {
|
|
|
1399
1466
|
}
|
|
1400
1467
|
async *processStreamResponse(streamResponse, userInput) {
|
|
1401
1468
|
const modelResponseParts = [];
|
|
1402
|
-
let
|
|
1403
|
-
let
|
|
1404
|
-
let
|
|
1405
|
-
|
|
1406
|
-
let streamingUsageMetadata = null;
|
|
1469
|
+
let hasToolCall = false;
|
|
1470
|
+
let hasFinishReason = false;
|
|
1471
|
+
let hasTextResponse = false;
|
|
1472
|
+
const allChunks = [];
|
|
1407
1473
|
for await (const chunk of streamResponse) {
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
// Capture usage metadata from IContent chunks (from providers that yield IContent)
|
|
1411
|
-
const chunkWithMetadata = chunk;
|
|
1412
|
-
if (chunkWithMetadata?.metadata?.usage) {
|
|
1413
|
-
streamingUsageMetadata = chunkWithMetadata.metadata.usage;
|
|
1414
|
-
}
|
|
1474
|
+
hasFinishReason =
|
|
1475
|
+
chunk?.candidates?.some((candidate) => candidate.finishReason) ?? false;
|
|
1415
1476
|
if (isValidResponse(chunk)) {
|
|
1416
1477
|
const content = chunk.candidates?.[0]?.content;
|
|
1417
|
-
if (content) {
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1478
|
+
if (content?.parts) {
|
|
1479
|
+
if (content.parts.some((part) => part.functionCall)) {
|
|
1480
|
+
hasToolCall = true;
|
|
1481
|
+
}
|
|
1482
|
+
// Check if any part has text content (not just thoughts)
|
|
1483
|
+
if (content.parts.some((part) => part.text &&
|
|
1484
|
+
typeof part.text === 'string' &&
|
|
1485
|
+
part.text.trim() !== '')) {
|
|
1486
|
+
hasTextResponse = true;
|
|
1426
1487
|
}
|
|
1427
1488
|
// Filter out thought parts from being added to history.
|
|
1428
|
-
if (!this.isThoughtContent(content)
|
|
1429
|
-
modelResponseParts.push(...content.parts);
|
|
1489
|
+
if (!this.isThoughtContent(content)) {
|
|
1490
|
+
modelResponseParts.push(...content.parts.filter((part) => !part.thought));
|
|
1430
1491
|
}
|
|
1431
1492
|
}
|
|
1432
1493
|
}
|
|
1433
|
-
|
|
1434
|
-
|
|
1494
|
+
// Record token usage if this chunk has usageMetadata
|
|
1495
|
+
if (chunk.usageMetadata) {
|
|
1496
|
+
if (chunk.usageMetadata.promptTokenCount !== undefined) {
|
|
1497
|
+
this.lastPromptTokenCount = chunk.usageMetadata.promptTokenCount;
|
|
1498
|
+
}
|
|
1435
1499
|
}
|
|
1500
|
+
allChunks.push(chunk);
|
|
1436
1501
|
yield chunk; // Yield every chunk to the UI immediately.
|
|
1437
1502
|
}
|
|
1438
|
-
//
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1503
|
+
// String thoughts and consolidate text parts.
|
|
1504
|
+
const consolidatedParts = [];
|
|
1505
|
+
for (const part of modelResponseParts) {
|
|
1506
|
+
const lastPart = consolidatedParts[consolidatedParts.length - 1];
|
|
1507
|
+
if (lastPart?.text &&
|
|
1508
|
+
isValidNonThoughtTextPart(lastPart) &&
|
|
1509
|
+
isValidNonThoughtTextPart(part)) {
|
|
1510
|
+
lastPart.text += part.text;
|
|
1511
|
+
}
|
|
1512
|
+
else {
|
|
1513
|
+
consolidatedParts.push(part);
|
|
1514
|
+
}
|
|
1515
|
+
}
|
|
1516
|
+
const responseText = consolidatedParts
|
|
1517
|
+
.filter((part) => part.text)
|
|
1518
|
+
.map((part) => part.text)
|
|
1519
|
+
.join('')
|
|
1520
|
+
.trim();
|
|
1521
|
+
// Enhanced stream validation logic: A stream is considered successful if:
|
|
1522
|
+
// 1. There's a tool call (tool calls can end without explicit finish reasons), OR
|
|
1523
|
+
// 2. There's a finish reason AND we have non-empty response text, OR
|
|
1524
|
+
// 3. We detected text content during streaming (hasTextResponse = true)
|
|
1525
|
+
//
|
|
1526
|
+
// We throw an error only when there's no tool call AND:
|
|
1527
|
+
// - No finish reason AND no text response during streaming, OR
|
|
1528
|
+
// - Empty response text after consolidation (e.g., only thoughts with no actual content)
|
|
1529
|
+
if (!hasToolCall &&
|
|
1530
|
+
((!hasFinishReason && !hasTextResponse) || !responseText)) {
|
|
1531
|
+
if (!hasFinishReason && !hasTextResponse) {
|
|
1532
|
+
throw new InvalidStreamError('Model stream ended without a finish reason and no text response.', 'NO_FINISH_REASON_NO_TEXT');
|
|
1533
|
+
}
|
|
1534
|
+
else {
|
|
1535
|
+
throw new InvalidStreamError('Model stream ended with empty response text.', 'NO_RESPONSE_TEXT');
|
|
1451
1536
|
}
|
|
1452
1537
|
}
|
|
1453
1538
|
// Use recordHistory to correctly save the conversation turn.
|
|
1454
1539
|
const modelOutput = [
|
|
1455
|
-
{ role: 'model', parts:
|
|
1540
|
+
{ role: 'model', parts: consolidatedParts },
|
|
1456
1541
|
];
|
|
1542
|
+
// Capture usage metadata from the stream
|
|
1543
|
+
let streamingUsageMetadata = null;
|
|
1544
|
+
// Find the last chunk that has usage metadata (similar to getLastChunkWithMetadata logic)
|
|
1545
|
+
const lastChunkWithMetadata = allChunks
|
|
1546
|
+
.slice()
|
|
1547
|
+
.reverse()
|
|
1548
|
+
.find((chunk) => chunk.usageMetadata);
|
|
1549
|
+
if (lastChunkWithMetadata && lastChunkWithMetadata.usageMetadata) {
|
|
1550
|
+
streamingUsageMetadata = {
|
|
1551
|
+
promptTokens: lastChunkWithMetadata.usageMetadata.promptTokenCount || 0,
|
|
1552
|
+
completionTokens: lastChunkWithMetadata.usageMetadata.candidatesTokenCount || 0,
|
|
1553
|
+
totalTokens: lastChunkWithMetadata.usageMetadata.totalTokenCount || 0,
|
|
1554
|
+
};
|
|
1555
|
+
}
|
|
1457
1556
|
this.recordHistory(userInput, modelOutput, undefined, streamingUsageMetadata);
|
|
1458
1557
|
}
|
|
1459
1558
|
recordHistory(userInput, modelOutput, automaticFunctionCallingHistory, usageMetadata) {
|
|
@@ -1771,17 +1870,15 @@ export class GeminiChat {
|
|
|
1771
1870
|
return (typeof provider
|
|
1772
1871
|
.generateChatCompletion === 'function');
|
|
1773
1872
|
}
|
|
1774
|
-
resolveProviderBaseUrl(
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
}
|
|
1784
|
-
return candidate.baseURL;
|
|
1873
|
+
resolveProviderBaseUrl(_provider) {
|
|
1874
|
+
// REQ-SP4-004: ONLY read baseURL from runtime state, NEVER from provider instance.
|
|
1875
|
+
// This ensures each agent/subagent can have its own baseURL even when using
|
|
1876
|
+
// the same provider (e.g., main uses OpenRouter, subagent uses Cerebras, both via openai).
|
|
1877
|
+
//
|
|
1878
|
+
// If runtime state has baseURL → use it
|
|
1879
|
+
// If runtime state has no baseURL → return undefined (provider uses default endpoint)
|
|
1880
|
+
// NEVER read from provider instance - that violates stateless pattern and causes bugs
|
|
1881
|
+
return this.runtimeState.baseUrl;
|
|
1785
1882
|
}
|
|
1786
1883
|
}
|
|
1787
1884
|
/** Visible for Testing */
|