@vybestack/llxprt-code-core 0.5.0-nightly.251103.c825fa57 → 0.5.0-nightly.251104.b1b63628

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,23 +8,15 @@
8
8
 
9
9
  ![LLxprt Code Screenshot](./docs/assets/llxprt-screenshot.png)
10
10
 
11
- LLxprt Code is a powerful fork of [Google's Gemini CLI](https://github.com/google-gemini/gemini-cli), enhanced with multi-provider support and improved theming. We thank Google for their excellent foundation and will continue to track and merge upstream changes as long as practical.
12
-
13
- ## What's new in 0.4.5
14
-
15
- - **Startup configuration:** supply ephemeral settings via `--set key=value` (same keys as `/set`), ideal for CI and automation.
16
- - **Resilient streaming:** unified retry defaults (6 attempts / 4 s) and better handling of transient SSE disconnects.
17
- - **Smarter todos:** complex request detection now nudges you to create todo lists and escalates reminders when none exist.
18
- - **Configurable todo UI:** control the Todo panel via `/settings → UI → Show Todo Panel`; when hidden, todo tool output appears inline in scrollback.
19
- - **Simplified Gemini UX:** the "Paid Mode" badge and flash fallback were removed; monitor usage with `/stats` or provider dashboards instead.
20
- - **Token budgeting clarity:** `context-limit` now clearly counts system prompts + `LLXPRT.md`, with improved error messaging and docs.
11
+ LLxprt Code is a CLI-based LLM assisted coding tool. It is highly configurable and can support nearly any provider or model as well as local/self-hosted models.
21
12
 
22
13
  ## Key Features
23
14
 
24
- - **Multi-Provider Support**: Direct access to OpenAI (o3), Anthropic (Claude), Google Gemini, plus OpenRouter, Fireworks, and local models
15
+ - **Multi-Provider Support**: Direct access to OpenAI (gpt-5), Anthropic (Claude Opus/Sonnet), Google Gemini, plus OpenRouter, Fireworks, Synthetic, Cerebras, Chutes, Z.ai and local models
16
+ - **Authenticate** to use free: Gemini and Qwen models as well as using your Claude Pro/Max account. Use `/auth` to enable/disable/logout of Google/Anthropic/Qwen.
25
17
  - **Installable Provider Aliases**: Save `/provider` setups as reusable configs and load OpenAI-compatible endpoints instantly
26
- - **Enhanced Theme Support**: Beautiful themes applied consistently across the entire tool
27
- - **Full Gemini CLI Compatibility**: All original features work seamlessly, including Google authentication via `/auth`
18
+ - **Multi-model/Provider Subagents**: Use `/subagent` to define specialized subagents with isolated contexts
19
+ - **Configuration Profiles**: define and save specific model/provider settings using `/profile` for instance temperature or custom headers
28
20
  - **Local Model Support**: Run models locally with LM Studio, llama.cpp, or any OpenAI-compatible server
29
21
  - **Flexible Configuration**: Switch providers, models, and API keys on the fly
30
22
  - **Advanced Settings & Profiles**: Fine-tune model parameters, manage ephemeral settings, and save configurations for reuse. [Learn more →](./docs/settings-and-profiles.md)
@@ -75,7 +67,7 @@ You have two options to install LLxprt Code.
75
67
 
76
68
  ### Using OpenAI
77
69
 
78
- Direct access to o3, o1, GPT-4.1, and other OpenAI models:
70
+ Direct access to GPT-5, and other OpenAI models:
79
71
 
80
72
  1. Get your API key from [OpenAI](https://platform.openai.com/api-keys)
81
73
  2. Configure LLxprt Code:
@@ -3,7 +3,7 @@
3
3
  * Copyright 2025 Google LLC
4
4
  * SPDX-License-Identifier: Apache-2.0
5
5
  */
6
- import { GenerateContentResponse, Content, GenerateContentConfig, SendMessageParameters, GenerateContentResponseUsageMetadata, Tool, PartListUnion } from '@google/genai';
6
+ import { GenerateContentResponse, Content, GenerateContentConfig, SendMessageParameters, Part, GenerateContentResponseUsageMetadata, Tool, PartListUnion } from '@google/genai';
7
7
  import { ContentGenerator } from './contentGenerator.js';
8
8
  import { HistoryService } from '../services/history/HistoryService.js';
9
9
  import type { IContent } from '../services/history/IContent.js';
@@ -22,9 +22,21 @@ export type StreamEvent = {
22
22
  type: StreamEventType.RETRY;
23
23
  };
24
24
  /**
25
- * Custom error to signal that a stream completed without valid content,
25
+ * Checks if a part contains valid non-thought text content.
26
+ * This helps in consolidating text parts properly during stream processing.
27
+ */
28
+ export declare function isValidNonThoughtTextPart(part: Part): boolean;
29
+ /**
30
+ * Custom error to signal that a stream completed with invalid content,
26
31
  * which should trigger a retry.
27
32
  */
33
+ export declare class InvalidStreamError extends Error {
34
+ readonly type: 'NO_FINISH_REASON' | 'NO_RESPONSE_TEXT' | 'NO_FINISH_REASON_NO_TEXT';
35
+ constructor(message: string, type: 'NO_FINISH_REASON' | 'NO_RESPONSE_TEXT' | 'NO_FINISH_REASON_NO_TEXT');
36
+ }
37
+ /**
38
+ * Legacy error class for backward compatibility.
39
+ */
28
40
  export declare class EmptyStreamError extends Error {
29
41
  constructor(message: string);
30
42
  }
@@ -42,6 +54,7 @@ export declare class GeminiChat {
42
54
  private compressionPromise;
43
55
  private logger;
44
56
  private cachedCompressionThreshold;
57
+ private lastPromptTokenCount;
45
58
  private readonly generationConfig;
46
59
  /**
47
60
  * Runtime state for stateless operation (Phase 6)
@@ -52,6 +65,10 @@ export declare class GeminiChat {
52
65
  private readonly runtimeState;
53
66
  private readonly historyService;
54
67
  private readonly runtimeContext;
68
+ /**
69
+ * Gets the last prompt token count.
70
+ */
71
+ getLastPromptTokenCount(): number;
55
72
  /**
56
73
  * @plan PLAN-20251028-STATELESS6.P10
57
74
  * @requirement REQ-STAT6-001.2, REQ-STAT6-002.2, REQ-STAT6-002.3
@@ -152,9 +152,23 @@ function normalizeToolInteractionInput(message) {
152
152
  return result;
153
153
  }
154
154
  const INVALID_CONTENT_RETRY_OPTIONS = {
155
- maxAttempts: 3, // 1 initial call + 2 retries
155
+ maxAttempts: 2, // 1 initial call + 1 retry
156
156
  initialDelayMs: 500,
157
157
  };
158
+ /**
159
+ * Checks if a part contains valid non-thought text content.
160
+ * This helps in consolidating text parts properly during stream processing.
161
+ */
162
+ export function isValidNonThoughtTextPart(part) {
163
+ return (typeof part.text === 'string' &&
164
+ !part.thought &&
165
+ // Technically, the model should never generate parts that have text and
166
+ // any of these but we don't trust them so check anyways.
167
+ !part.functionCall &&
168
+ !part.functionResponse &&
169
+ !part.inlineData &&
170
+ !part.fileData);
171
+ }
158
172
  /**
159
173
  * Returns true if the response is valid, false otherwise.
160
174
  */
@@ -233,9 +247,20 @@ function extractCuratedHistory(comprehensiveHistory) {
233
247
  return curatedHistory;
234
248
  }
235
249
  /**
236
- * Custom error to signal that a stream completed without valid content,
250
+ * Custom error to signal that a stream completed with invalid content,
237
251
  * which should trigger a retry.
238
252
  */
253
+ export class InvalidStreamError extends Error {
254
+ type;
255
+ constructor(message, type) {
256
+ super(message);
257
+ this.name = 'InvalidStreamError';
258
+ this.type = type;
259
+ }
260
+ }
261
+ /**
262
+ * Legacy error class for backward compatibility.
263
+ */
239
264
  export class EmptyStreamError extends Error {
240
265
  constructor(message) {
241
266
  super(message);
@@ -260,6 +285,7 @@ export class GeminiChat {
260
285
  logger = new DebugLogger('llxprt:gemini:chat');
261
286
  // Cache the compression threshold to avoid recalculating
262
287
  cachedCompressionThreshold = null;
288
+ lastPromptTokenCount = 0;
263
289
  generationConfig;
264
290
  /**
265
291
  * Runtime state for stateless operation (Phase 6)
@@ -270,6 +296,12 @@ export class GeminiChat {
270
296
  runtimeState;
271
297
  historyService;
272
298
  runtimeContext;
299
+ /**
300
+ * Gets the last prompt token count.
301
+ */
302
+ getLastPromptTokenCount() {
303
+ return this.lastPromptTokenCount;
304
+ }
273
305
  /**
274
306
  * @plan PLAN-20251028-STATELESS6.P10
275
307
  * @requirement REQ-STAT6-001.2, REQ-STAT6-002.2, REQ-STAT6-002.3
@@ -553,6 +585,7 @@ export class GeminiChat {
553
585
  runtime: runtimeContext,
554
586
  settings: runtimeContext.settingsService,
555
587
  metadata: runtimeContext.metadata,
588
+ userMemory: runtimeContext.config?.getUserMemory?.(),
556
589
  });
557
590
  // Collect all chunks from the stream
558
591
  let lastResponse;
@@ -712,7 +745,26 @@ export class GeminiChat {
712
745
  if (attempt > 0) {
713
746
  yield { type: StreamEventType.RETRY };
714
747
  }
715
- const stream = await instance.makeApiCallAndProcessStream(params, prompt_id, pendingTokens, userContent);
748
+ // If this is a retry, adjust temperature to encourage different output.
749
+ // Use temperature 1 as baseline (or the original temperature if it's higher than 1) and add increasing variation to avoid repetition.
750
+ const currentParams = { ...params };
751
+ if (attempt > 0) {
752
+ // Use 1 as the baseline temperature for retries, or the original if it's higher
753
+ const baselineTemperature = Math.max(params.config?.temperature ?? 1, 1);
754
+ // Add increasing variation for each retry attempt to encourage different output
755
+ const variation = attempt * 0.1;
756
+ let newTemperature = baselineTemperature + variation;
757
+ // Ensure temperature stays within valid range [0, 2] for Gemini models
758
+ newTemperature = Math.min(Math.max(newTemperature, 0), 2);
759
+ // Ensure config exists
760
+ currentParams.config = currentParams.config || {};
761
+ currentParams.config = {
762
+ ...currentParams.config,
763
+ temperature: newTemperature,
764
+ };
765
+ }
766
+ const stream = await instance.makeApiCallAndProcessStream(currentParams, // Use the modified params with temperature
767
+ prompt_id, pendingTokens, userContent);
716
768
  for await (const chunk of stream) {
717
769
  yield { type: StreamEventType.CHUNK, value: chunk };
718
770
  }
@@ -721,7 +773,8 @@ export class GeminiChat {
721
773
  }
722
774
  catch (error) {
723
775
  lastError = error;
724
- const isContentError = error instanceof EmptyStreamError;
776
+ const isContentError = error instanceof InvalidStreamError ||
777
+ error instanceof EmptyStreamError;
725
778
  if (isContentError) {
726
779
  // Check if we have more attempts left.
727
780
  if (attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts - 1) {
@@ -787,6 +840,7 @@ export class GeminiChat {
787
840
  runtime: runtimeContext,
788
841
  settings: runtimeContext.settingsService,
789
842
  metadata: runtimeContext.metadata,
843
+ userMemory: runtimeContext.config?.getUserMemory?.(),
790
844
  });
791
845
  let lastResponse;
792
846
  for await (const iContent of streamResponse) {
@@ -848,7 +902,7 @@ export class GeminiChat {
848
902
  throw error;
849
903
  }
850
904
  }
851
- async makeApiCallAndProcessStream(_params, promptId, pendingTokens, userContent) {
905
+ async makeApiCallAndProcessStream(params, promptId, pendingTokens, userContent) {
852
906
  // Get the active provider
853
907
  let provider = this.getActiveProvider();
854
908
  if (!provider) {
@@ -918,7 +972,18 @@ export class GeminiChat {
918
972
  baseUrl: providerBaseUrl,
919
973
  authType: activeAuthType,
920
974
  });
921
- const runtimeContext = this.buildProviderRuntime('GeminiChat.generateRequest', { historyLength: requestContents.length });
975
+ // Create a runtime context that incorporates the config from params
976
+ const baseRuntimeContext = this.buildProviderRuntime('GeminiChat.generateRequest', { historyLength: requestContents.length });
977
+ // If params has config, merge it with the runtime context config
978
+ const runtimeContext = params.config
979
+ ? {
980
+ ...baseRuntimeContext,
981
+ config: {
982
+ ...baseRuntimeContext.config,
983
+ ...params.config,
984
+ },
985
+ }
986
+ : baseRuntimeContext;
922
987
  const streamResponse = provider.generateChatCompletion({
923
988
  contents: requestContents,
924
989
  tools: tools,
@@ -926,6 +991,7 @@ export class GeminiChat {
926
991
  runtime: runtimeContext,
927
992
  settings: runtimeContext.settingsService,
928
993
  metadata: runtimeContext.metadata,
994
+ userMemory: baseRuntimeContext.config?.getUserMemory?.(),
929
995
  });
930
996
  // Convert the IContent stream to GenerateContentResponse stream
931
997
  return (async function* (instance) {
@@ -1349,6 +1415,7 @@ export class GeminiChat {
1349
1415
  runtime: runtimeContext,
1350
1416
  settings: runtimeContext.settingsService,
1351
1417
  metadata: runtimeContext.metadata,
1418
+ userMemory: runtimeContext.config?.getUserMemory?.(),
1352
1419
  });
1353
1420
  // Collect response
1354
1421
  let summary = '';
@@ -1399,61 +1466,93 @@ export class GeminiChat {
1399
1466
  }
1400
1467
  async *processStreamResponse(streamResponse, userInput) {
1401
1468
  const modelResponseParts = [];
1402
- let hasReceivedValidContent = false;
1403
- let hasReceivedAnyChunk = false;
1404
- let invalidChunkCount = 0;
1405
- let totalChunkCount = 0;
1406
- let streamingUsageMetadata = null;
1469
+ let hasToolCall = false;
1470
+ let hasFinishReason = false;
1471
+ let hasTextResponse = false;
1472
+ const allChunks = [];
1407
1473
  for await (const chunk of streamResponse) {
1408
- hasReceivedAnyChunk = true;
1409
- totalChunkCount++;
1410
- // Capture usage metadata from IContent chunks (from providers that yield IContent)
1411
- const chunkWithMetadata = chunk;
1412
- if (chunkWithMetadata?.metadata?.usage) {
1413
- streamingUsageMetadata = chunkWithMetadata.metadata.usage;
1414
- }
1474
+ hasFinishReason =
1475
+ chunk?.candidates?.some((candidate) => candidate.finishReason) ?? false;
1415
1476
  if (isValidResponse(chunk)) {
1416
1477
  const content = chunk.candidates?.[0]?.content;
1417
- if (content) {
1418
- // Check if this chunk has meaningful content (text or function calls)
1419
- if (content.parts && content.parts.length > 0) {
1420
- const hasMeaningfulContent = content.parts.some((part) => part.text ||
1421
- 'functionCall' in part ||
1422
- 'functionResponse' in part);
1423
- if (hasMeaningfulContent) {
1424
- hasReceivedValidContent = true;
1425
- }
1478
+ if (content?.parts) {
1479
+ if (content.parts.some((part) => part.functionCall)) {
1480
+ hasToolCall = true;
1481
+ }
1482
+ // Check if any part has text content (not just thoughts)
1483
+ if (content.parts.some((part) => part.text &&
1484
+ typeof part.text === 'string' &&
1485
+ part.text.trim() !== '')) {
1486
+ hasTextResponse = true;
1426
1487
  }
1427
1488
  // Filter out thought parts from being added to history.
1428
- if (!this.isThoughtContent(content) && content.parts) {
1429
- modelResponseParts.push(...content.parts);
1489
+ if (!this.isThoughtContent(content)) {
1490
+ modelResponseParts.push(...content.parts.filter((part) => !part.thought));
1430
1491
  }
1431
1492
  }
1432
1493
  }
1433
- else {
1434
- invalidChunkCount++;
1494
+ // Record token usage if this chunk has usageMetadata
1495
+ if (chunk.usageMetadata) {
1496
+ if (chunk.usageMetadata.promptTokenCount !== undefined) {
1497
+ this.lastPromptTokenCount = chunk.usageMetadata.promptTokenCount;
1498
+ }
1435
1499
  }
1500
+ allChunks.push(chunk);
1436
1501
  yield chunk; // Yield every chunk to the UI immediately.
1437
1502
  }
1438
- // Now that the stream is finished, make a decision.
1439
- // Only throw an error if:
1440
- // 1. We received no chunks at all, OR
1441
- // 2. We received chunks but NONE had valid content (all were invalid or empty)
1442
- // This allows models like Qwen to send empty chunks at the end of a stream
1443
- // as long as they sent valid content earlier.
1444
- if (!hasReceivedAnyChunk ||
1445
- (!hasReceivedValidContent && totalChunkCount > 0)) {
1446
- // Only throw if this looks like a genuinely empty/invalid stream
1447
- // Not just a stream that ended with some invalid chunks
1448
- if (invalidChunkCount === totalChunkCount ||
1449
- modelResponseParts.length === 0) {
1450
- throw new EmptyStreamError('Model stream was invalid or completed without valid content.');
1503
+ // String thoughts and consolidate text parts.
1504
+ const consolidatedParts = [];
1505
+ for (const part of modelResponseParts) {
1506
+ const lastPart = consolidatedParts[consolidatedParts.length - 1];
1507
+ if (lastPart?.text &&
1508
+ isValidNonThoughtTextPart(lastPart) &&
1509
+ isValidNonThoughtTextPart(part)) {
1510
+ lastPart.text += part.text;
1511
+ }
1512
+ else {
1513
+ consolidatedParts.push(part);
1514
+ }
1515
+ }
1516
+ const responseText = consolidatedParts
1517
+ .filter((part) => part.text)
1518
+ .map((part) => part.text)
1519
+ .join('')
1520
+ .trim();
1521
+ // Enhanced stream validation logic: A stream is considered successful if:
1522
+ // 1. There's a tool call (tool calls can end without explicit finish reasons), OR
1523
+ // 2. There's a finish reason AND we have non-empty response text, OR
1524
+ // 3. We detected text content during streaming (hasTextResponse = true)
1525
+ //
1526
+ // We throw an error only when there's no tool call AND:
1527
+ // - No finish reason AND no text response during streaming, OR
1528
+ // - Empty response text after consolidation (e.g., only thoughts with no actual content)
1529
+ if (!hasToolCall &&
1530
+ ((!hasFinishReason && !hasTextResponse) || !responseText)) {
1531
+ if (!hasFinishReason && !hasTextResponse) {
1532
+ throw new InvalidStreamError('Model stream ended without a finish reason and no text response.', 'NO_FINISH_REASON_NO_TEXT');
1533
+ }
1534
+ else {
1535
+ throw new InvalidStreamError('Model stream ended with empty response text.', 'NO_RESPONSE_TEXT');
1451
1536
  }
1452
1537
  }
1453
1538
  // Use recordHistory to correctly save the conversation turn.
1454
1539
  const modelOutput = [
1455
- { role: 'model', parts: modelResponseParts },
1540
+ { role: 'model', parts: consolidatedParts },
1456
1541
  ];
1542
+ // Capture usage metadata from the stream
1543
+ let streamingUsageMetadata = null;
1544
+ // Find the last chunk that has usage metadata (similar to getLastChunkWithMetadata logic)
1545
+ const lastChunkWithMetadata = allChunks
1546
+ .slice()
1547
+ .reverse()
1548
+ .find((chunk) => chunk.usageMetadata);
1549
+ if (lastChunkWithMetadata && lastChunkWithMetadata.usageMetadata) {
1550
+ streamingUsageMetadata = {
1551
+ promptTokens: lastChunkWithMetadata.usageMetadata.promptTokenCount || 0,
1552
+ completionTokens: lastChunkWithMetadata.usageMetadata.candidatesTokenCount || 0,
1553
+ totalTokens: lastChunkWithMetadata.usageMetadata.totalTokenCount || 0,
1554
+ };
1555
+ }
1457
1556
  this.recordHistory(userInput, modelOutput, undefined, streamingUsageMetadata);
1458
1557
  }
1459
1558
  recordHistory(userInput, modelOutput, automaticFunctionCallingHistory, usageMetadata) {
@@ -1771,17 +1870,15 @@ export class GeminiChat {
1771
1870
  return (typeof provider
1772
1871
  .generateChatCompletion === 'function');
1773
1872
  }
1774
- resolveProviderBaseUrl(provider) {
1775
- const candidate = provider;
1776
- try {
1777
- if (typeof candidate.getBaseURL === 'function') {
1778
- return candidate.getBaseURL();
1779
- }
1780
- }
1781
- catch {
1782
- // Ignore failures from provider-specific base URL accessors
1783
- }
1784
- return candidate.baseURL;
1873
+ resolveProviderBaseUrl(_provider) {
1874
+ // REQ-SP4-004: ONLY read baseURL from runtime state, NEVER from provider instance.
1875
+ // This ensures each agent/subagent can have its own baseURL even when using
1876
+ // the same provider (e.g., main uses OpenRouter, subagent uses Cerebras, both via openai).
1877
+ //
1878
+ // If runtime state has baseURL → use it
1879
+ // If runtime state has no baseURL → return undefined (provider uses default endpoint)
1880
+ // NEVER read from provider instance - that violates stateless pattern and causes bugs
1881
+ return this.runtimeState.baseUrl;
1785
1882
  }
1786
1883
  }
1787
1884
  /** Visible for Testing */