@librechat/agents 3.1.77-dev.1 → 3.1.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -141,6 +141,56 @@ type OpenAIChatCompletionRetry = (
141
141
  AsyncIterable<OpenAIChatCompletionStreamItem> | OpenAIChatCompletion
142
142
  >;
143
143
 
144
+ function createUsageMetadata(
145
+ usage?: OpenAIClient.Completions.CompletionUsage
146
+ ): UsageMetadata {
147
+ const usageMetadata: UsageMetadata = {
148
+ input_tokens: usage?.prompt_tokens ?? 0,
149
+ output_tokens: usage?.completion_tokens ?? 0,
150
+ total_tokens: usage?.total_tokens ?? 0,
151
+ };
152
+
153
+ if (usage == null) {
154
+ return usageMetadata;
155
+ }
156
+
157
+ const inputTokenDetails: UsageMetadata['input_token_details'] = {};
158
+ const outputTokenDetails: UsageMetadata['output_token_details'] = {};
159
+ let hasInputTokenDetails = false;
160
+ let hasOutputTokenDetails = false;
161
+ const audioInputTokens = usage.prompt_tokens_details?.audio_tokens;
162
+ const cachedInputTokens = usage.prompt_tokens_details?.cached_tokens;
163
+ const audioOutputTokens = usage.completion_tokens_details?.audio_tokens;
164
+ const reasoningOutputTokens =
165
+ usage.completion_tokens_details?.reasoning_tokens;
166
+
167
+ if (audioInputTokens != null) {
168
+ inputTokenDetails.audio = audioInputTokens;
169
+ hasInputTokenDetails = true;
170
+ }
171
+ if (cachedInputTokens != null) {
172
+ inputTokenDetails.cache_read = cachedInputTokens;
173
+ hasInputTokenDetails = true;
174
+ }
175
+ if (audioOutputTokens != null) {
176
+ outputTokenDetails.audio = audioOutputTokens;
177
+ hasOutputTokenDetails = true;
178
+ }
179
+ if (reasoningOutputTokens != null) {
180
+ outputTokenDetails.reasoning = reasoningOutputTokens;
181
+ hasOutputTokenDetails = true;
182
+ }
183
+
184
+ if (hasInputTokenDetails) {
185
+ usageMetadata.input_token_details = inputTokenDetails;
186
+ }
187
+ if (hasOutputTokenDetails) {
188
+ usageMetadata.output_token_details = outputTokenDetails;
189
+ }
190
+
191
+ return usageMetadata;
192
+ }
193
+
144
194
  function getExposedOpenAIClient(
145
195
  completions: OpenAIClientDelegate,
146
196
  responses: OpenAIClientDelegate,
@@ -1242,6 +1292,79 @@ export class ChatDeepSeek extends OriginalChatDeepSeek {
1242
1292
  return 'LibreChatDeepSeek';
1243
1293
  }
1244
1294
 
1295
+ protected _convertDeepSeekMessages(
1296
+ messages: BaseMessage[]
1297
+ ): OpenAICompletionParam[] {
1298
+ return _convertMessagesToOpenAIParams(messages, this.model, {
1299
+ includeReasoningContent: true,
1300
+ });
1301
+ }
1302
+
1303
+ async _generate(
1304
+ messages: BaseMessage[],
1305
+ options: this['ParsedCallOptions'],
1306
+ runManager?: CallbackManagerForLLMRun
1307
+ ): Promise<ChatResult> {
1308
+ options.signal?.throwIfAborted();
1309
+ const params = this.invocationParams(options);
1310
+
1311
+ if (params.stream === true) {
1312
+ return super._generate(messages, options, runManager);
1313
+ }
1314
+
1315
+ const messagesMapped = this._convertDeepSeekMessages(messages);
1316
+ const response = await this.completionWithRetry(
1317
+ {
1318
+ ...params,
1319
+ stream: false,
1320
+ messages: messagesMapped,
1321
+ },
1322
+ {
1323
+ signal: options.signal,
1324
+ ...options.options,
1325
+ }
1326
+ );
1327
+
1328
+ const usageMetadata = createUsageMetadata(response.usage);
1329
+
1330
+ const generations: ChatGeneration[] = response.choices.map((part) => {
1331
+ const text = part.message.content ?? '';
1332
+ const generation: ChatGeneration = {
1333
+ text,
1334
+ message: this._convertCompletionsMessageToBaseMessage(
1335
+ part.message,
1336
+ response
1337
+ ),
1338
+ };
1339
+ generation.generationInfo = {
1340
+ finish_reason: part.finish_reason,
1341
+ ...(part.logprobs != null ? { logprobs: part.logprobs } : {}),
1342
+ };
1343
+ if (isAIMessage(generation.message)) {
1344
+ generation.message.usage_metadata = usageMetadata;
1345
+ }
1346
+ generation.message = new AIMessage(
1347
+ Object.fromEntries(
1348
+ Object.entries(generation.message).filter(
1349
+ ([key]) => !key.startsWith('lc_')
1350
+ )
1351
+ )
1352
+ );
1353
+ return generation;
1354
+ });
1355
+
1356
+ return {
1357
+ generations,
1358
+ llmOutput: {
1359
+ tokenUsage: {
1360
+ promptTokens: usageMetadata.input_tokens,
1361
+ completionTokens: usageMetadata.output_tokens,
1362
+ totalTokens: usageMetadata.total_tokens,
1363
+ },
1364
+ },
1365
+ };
1366
+ }
1367
+
1245
1368
  _getClientOptions(
1246
1369
  options?: OpenAICoreRequestOptions
1247
1370
  ): OpenAICoreRequestOptions {
@@ -1276,10 +1399,370 @@ export class ChatDeepSeek extends OriginalChatDeepSeek {
1276
1399
  runManager?: CallbackManagerForLLMRun
1277
1400
  ): AsyncGenerator<ChatGenerationChunk> {
1278
1401
  yield* delayStreamChunks(
1279
- super._streamResponseChunks(messages, options, runManager),
1402
+ this._streamResponseChunksWithReasoning(messages, options, runManager),
1280
1403
  this._lc_stream_delay
1281
1404
  );
1282
1405
  }
1406
+
1407
+ /** Parses raw `<think>` fallback tags across chunks and emits sanitized DeepSeek stream chunks. */
1408
+ protected async *_streamResponseChunksWithReasoning(
1409
+ messages: BaseMessage[],
1410
+ options: this['ParsedCallOptions'],
1411
+ runManager?: CallbackManagerForLLMRun
1412
+ ): AsyncGenerator<ChatGenerationChunk> {
1413
+ const stream = this._streamResponseChunksFromReasoningMessages(
1414
+ messages,
1415
+ options
1416
+ );
1417
+ const thinkStartTag = '<think>';
1418
+ const thinkEndTag = '</think>';
1419
+ let tokensBuffer = '';
1420
+ let isThinking = false;
1421
+
1422
+ for await (const chunk of stream) {
1423
+ if (options.signal?.aborted === true) {
1424
+ throw new Error('AbortError');
1425
+ }
1426
+
1427
+ const reasoningContent =
1428
+ chunk.message.additional_kwargs.reasoning_content;
1429
+ if (reasoningContent != null && reasoningContent !== '') {
1430
+ yield* this._yieldDeepSeekStreamChunk(chunk, runManager);
1431
+ continue;
1432
+ }
1433
+
1434
+ const text = chunk.text;
1435
+ if (text === '') {
1436
+ yield* this._yieldDeepSeekStreamChunk(chunk, runManager);
1437
+ continue;
1438
+ }
1439
+
1440
+ tokensBuffer += text;
1441
+
1442
+ while (tokensBuffer !== '') {
1443
+ if (isThinking) {
1444
+ const thinkEndIndex = tokensBuffer.indexOf(thinkEndTag);
1445
+ if (thinkEndIndex !== -1) {
1446
+ const thoughtContent = tokensBuffer.substring(0, thinkEndIndex);
1447
+ if (thoughtContent !== '') {
1448
+ yield* this._yieldDeepSeekReasoningText(
1449
+ chunk,
1450
+ thoughtContent,
1451
+ runManager
1452
+ );
1453
+ }
1454
+
1455
+ tokensBuffer = tokensBuffer.substring(
1456
+ thinkEndIndex + thinkEndTag.length
1457
+ );
1458
+ isThinking = false;
1459
+ continue;
1460
+ }
1461
+
1462
+ const splitIndex = this._getDeepSeekPartialTagSplitIndex(
1463
+ tokensBuffer,
1464
+ thinkEndTag
1465
+ );
1466
+ if (splitIndex !== -1) {
1467
+ const safeToYield = tokensBuffer.substring(0, splitIndex);
1468
+ if (safeToYield !== '') {
1469
+ yield* this._yieldDeepSeekReasoningText(
1470
+ chunk,
1471
+ safeToYield,
1472
+ runManager
1473
+ );
1474
+ }
1475
+ tokensBuffer = tokensBuffer.substring(splitIndex);
1476
+ break;
1477
+ }
1478
+
1479
+ yield* this._yieldDeepSeekReasoningText(
1480
+ chunk,
1481
+ tokensBuffer,
1482
+ runManager
1483
+ );
1484
+ tokensBuffer = '';
1485
+ break;
1486
+ }
1487
+
1488
+ const thinkStartIndex = tokensBuffer.indexOf(thinkStartTag);
1489
+ if (thinkStartIndex !== -1) {
1490
+ const beforeThink = tokensBuffer.substring(0, thinkStartIndex);
1491
+ if (beforeThink !== '') {
1492
+ yield* this._yieldDeepSeekStreamChunk(
1493
+ this._createDeepSeekStreamChunk(chunk, beforeThink),
1494
+ runManager
1495
+ );
1496
+ }
1497
+
1498
+ tokensBuffer = tokensBuffer.substring(
1499
+ thinkStartIndex + thinkStartTag.length
1500
+ );
1501
+ isThinking = true;
1502
+ continue;
1503
+ }
1504
+
1505
+ const splitIndex = this._getDeepSeekPartialTagSplitIndex(
1506
+ tokensBuffer,
1507
+ thinkStartTag
1508
+ );
1509
+ if (splitIndex !== -1) {
1510
+ const safeToYield = tokensBuffer.substring(0, splitIndex);
1511
+ if (safeToYield !== '') {
1512
+ yield* this._yieldDeepSeekStreamChunk(
1513
+ this._createDeepSeekStreamChunk(chunk, safeToYield),
1514
+ runManager
1515
+ );
1516
+ }
1517
+ tokensBuffer = tokensBuffer.substring(splitIndex);
1518
+ break;
1519
+ }
1520
+
1521
+ yield* this._yieldDeepSeekStreamChunk(
1522
+ this._createDeepSeekStreamChunk(chunk, tokensBuffer),
1523
+ runManager
1524
+ );
1525
+ tokensBuffer = '';
1526
+ break;
1527
+ }
1528
+ }
1529
+
1530
+ if (tokensBuffer === '') {
1531
+ return;
1532
+ }
1533
+
1534
+ if (isThinking) {
1535
+ yield* this._yieldDeepSeekStreamChunk(
1536
+ new ChatGenerationChunk({
1537
+ message: new AIMessageChunk({
1538
+ content: '',
1539
+ additional_kwargs: {
1540
+ reasoning_content: tokensBuffer,
1541
+ },
1542
+ }),
1543
+ text: '',
1544
+ }),
1545
+ runManager
1546
+ );
1547
+ return;
1548
+ }
1549
+
1550
+ yield* this._yieldDeepSeekStreamChunk(
1551
+ new ChatGenerationChunk({
1552
+ message: new AIMessageChunk({
1553
+ content: tokensBuffer,
1554
+ }),
1555
+ text: tokensBuffer,
1556
+ }),
1557
+ runManager
1558
+ );
1559
+ }
1560
+
1561
+ protected async *_streamResponseChunksFromReasoningMessages(
1562
+ messages: BaseMessage[],
1563
+ options: this['ParsedCallOptions']
1564
+ ): AsyncGenerator<ChatGenerationChunk> {
1565
+ const params = {
1566
+ ...this.invocationParams(options, { streaming: true }),
1567
+ stream: true as const,
1568
+ };
1569
+ const messagesMapped = this._convertDeepSeekMessages(messages);
1570
+ const streamIterable = await this.completionWithRetry(
1571
+ {
1572
+ ...params,
1573
+ messages: messagesMapped,
1574
+ },
1575
+ {
1576
+ signal: options.signal,
1577
+ ...options.options,
1578
+ }
1579
+ );
1580
+
1581
+ let defaultRole:
1582
+ | OpenAIClient.Chat.Completions.ChatCompletionRole
1583
+ | undefined;
1584
+ let usage: OpenAIClient.Completions.CompletionUsage | undefined;
1585
+
1586
+ for await (const data of streamIterable) {
1587
+ if (options.signal?.aborted === true) {
1588
+ throw new Error('AbortError');
1589
+ }
1590
+
1591
+ if (data.usage != null) {
1592
+ usage = data.usage;
1593
+ }
1594
+
1595
+ if (data.choices.length === 0) {
1596
+ continue;
1597
+ }
1598
+
1599
+ const choice = data.choices[0];
1600
+ const { delta } = choice;
1601
+ const messageChunk = this._convertCompletionsDeltaToBaseMessageChunk(
1602
+ delta,
1603
+ data,
1604
+ defaultRole
1605
+ );
1606
+ defaultRole = delta.role ?? defaultRole;
1607
+
1608
+ if (typeof messageChunk.content !== 'string') {
1609
+ continue;
1610
+ }
1611
+
1612
+ const messageText = messageChunk.content;
1613
+ const newTokenIndices = {
1614
+ prompt: options.promptIndex ?? 0,
1615
+ completion: choice.index,
1616
+ };
1617
+ const generationInfo = { ...newTokenIndices };
1618
+ if (choice.finish_reason != null) {
1619
+ Object.assign(generationInfo, {
1620
+ finish_reason: choice.finish_reason,
1621
+ system_fingerprint: data.system_fingerprint,
1622
+ model_name: data.model,
1623
+ service_tier: data.service_tier,
1624
+ });
1625
+ }
1626
+ if (this.logprobs === true) {
1627
+ Object.assign(generationInfo, { logprobs: choice.logprobs });
1628
+ }
1629
+
1630
+ const generationChunk = new ChatGenerationChunk({
1631
+ message: messageChunk,
1632
+ text: messageText,
1633
+ generationInfo,
1634
+ });
1635
+
1636
+ yield generationChunk;
1637
+ }
1638
+
1639
+ if (usage != null) {
1640
+ const usageMetadata = createUsageMetadata(usage);
1641
+
1642
+ const generationChunk = new ChatGenerationChunk({
1643
+ message: new AIMessageChunk({
1644
+ content: '',
1645
+ response_metadata: {
1646
+ usage: { ...usage },
1647
+ },
1648
+ usage_metadata: usageMetadata,
1649
+ }),
1650
+ text: '',
1651
+ generationInfo: {
1652
+ prompt: 0,
1653
+ completion: 0,
1654
+ },
1655
+ });
1656
+
1657
+ yield generationChunk;
1658
+ }
1659
+
1660
+ if (options.signal?.aborted === true) {
1661
+ throw new Error('AbortError');
1662
+ }
1663
+ }
1664
+
1665
+ protected _createDeepSeekStreamChunk(
1666
+ chunk: ChatGenerationChunk,
1667
+ content: string,
1668
+ additionalKwargs?: AIMessageChunk['additional_kwargs'],
1669
+ text = content
1670
+ ): ChatGenerationChunk {
1671
+ if (!(chunk.message instanceof AIMessageChunk)) {
1672
+ return new ChatGenerationChunk({
1673
+ message: new AIMessageChunk({
1674
+ content,
1675
+ additional_kwargs:
1676
+ additionalKwargs ?? chunk.message.additional_kwargs,
1677
+ response_metadata: chunk.message.response_metadata,
1678
+ id: chunk.message.id,
1679
+ }),
1680
+ text,
1681
+ generationInfo: chunk.generationInfo,
1682
+ });
1683
+ }
1684
+
1685
+ const message = chunk.message;
1686
+ return new ChatGenerationChunk({
1687
+ message: new AIMessageChunk({
1688
+ content,
1689
+ additional_kwargs: additionalKwargs ?? message.additional_kwargs,
1690
+ response_metadata: message.response_metadata,
1691
+ tool_calls: message.tool_calls,
1692
+ tool_call_chunks: message.tool_call_chunks,
1693
+ id: message.id,
1694
+ }),
1695
+ text,
1696
+ generationInfo: chunk.generationInfo,
1697
+ });
1698
+ }
1699
+
1700
+ protected _createDeepSeekReasoningStreamChunk(
1701
+ chunk: ChatGenerationChunk,
1702
+ reasoningContent: string
1703
+ ): ChatGenerationChunk {
1704
+ return this._createDeepSeekStreamChunk(
1705
+ chunk,
1706
+ '',
1707
+ {
1708
+ ...chunk.message.additional_kwargs,
1709
+ reasoning_content: reasoningContent,
1710
+ },
1711
+ ''
1712
+ );
1713
+ }
1714
+
1715
+ protected async *_yieldDeepSeekReasoningText(
1716
+ chunk: ChatGenerationChunk,
1717
+ reasoningContent: string,
1718
+ runManager?: CallbackManagerForLLMRun
1719
+ ): AsyncGenerator<ChatGenerationChunk> {
1720
+ yield* this._yieldDeepSeekStreamChunk(
1721
+ this._createDeepSeekReasoningStreamChunk(chunk, reasoningContent),
1722
+ runManager
1723
+ );
1724
+ }
1725
+
1726
+ protected async *_yieldDeepSeekStreamChunk(
1727
+ chunk: ChatGenerationChunk,
1728
+ runManager?: CallbackManagerForLLMRun
1729
+ ): AsyncGenerator<ChatGenerationChunk> {
1730
+ yield chunk;
1731
+ await runManager?.handleLLMNewToken(
1732
+ chunk.text,
1733
+ this._getDeepSeekTokenIndices(chunk),
1734
+ undefined,
1735
+ undefined,
1736
+ undefined,
1737
+ { chunk }
1738
+ );
1739
+ }
1740
+
1741
+ protected _getDeepSeekTokenIndices(
1742
+ chunk: ChatGenerationChunk
1743
+ ): { prompt: number; completion: number } | undefined {
1744
+ const prompt = chunk.generationInfo?.prompt;
1745
+ const completion = chunk.generationInfo?.completion;
1746
+
1747
+ if (typeof prompt === 'number' && typeof completion === 'number') {
1748
+ return { prompt, completion };
1749
+ }
1750
+
1751
+ return undefined;
1752
+ }
1753
+
1754
+ protected _getDeepSeekPartialTagSplitIndex(
1755
+ text: string,
1756
+ tag: string
1757
+ ): number {
1758
+ for (let i = tag.length - 1; i >= 1; i--) {
1759
+ if (text.endsWith(tag.substring(0, i))) {
1760
+ return text.length - i;
1761
+ }
1762
+ }
1763
+
1764
+ return -1;
1765
+ }
1283
1766
  }
1284
1767
 
1285
1768
  /** xAI-specific usage metadata type */