@librechat/agents 3.1.77-dev.1 → 3.1.77
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/llm/openai/index.cjs +317 -1
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +318 -2
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/types/llm/openai/index.d.ts +17 -0
- package/package.json +1 -1
- package/src/llm/openai/deepseek.test.ts +479 -0
- package/src/llm/openai/index.ts +484 -1
package/src/llm/openai/index.ts
CHANGED
|
@@ -141,6 +141,56 @@ type OpenAIChatCompletionRetry = (
|
|
|
141
141
|
AsyncIterable<OpenAIChatCompletionStreamItem> | OpenAIChatCompletion
|
|
142
142
|
>;
|
|
143
143
|
|
|
144
|
+
function createUsageMetadata(
|
|
145
|
+
usage?: OpenAIClient.Completions.CompletionUsage
|
|
146
|
+
): UsageMetadata {
|
|
147
|
+
const usageMetadata: UsageMetadata = {
|
|
148
|
+
input_tokens: usage?.prompt_tokens ?? 0,
|
|
149
|
+
output_tokens: usage?.completion_tokens ?? 0,
|
|
150
|
+
total_tokens: usage?.total_tokens ?? 0,
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
if (usage == null) {
|
|
154
|
+
return usageMetadata;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const inputTokenDetails: UsageMetadata['input_token_details'] = {};
|
|
158
|
+
const outputTokenDetails: UsageMetadata['output_token_details'] = {};
|
|
159
|
+
let hasInputTokenDetails = false;
|
|
160
|
+
let hasOutputTokenDetails = false;
|
|
161
|
+
const audioInputTokens = usage.prompt_tokens_details?.audio_tokens;
|
|
162
|
+
const cachedInputTokens = usage.prompt_tokens_details?.cached_tokens;
|
|
163
|
+
const audioOutputTokens = usage.completion_tokens_details?.audio_tokens;
|
|
164
|
+
const reasoningOutputTokens =
|
|
165
|
+
usage.completion_tokens_details?.reasoning_tokens;
|
|
166
|
+
|
|
167
|
+
if (audioInputTokens != null) {
|
|
168
|
+
inputTokenDetails.audio = audioInputTokens;
|
|
169
|
+
hasInputTokenDetails = true;
|
|
170
|
+
}
|
|
171
|
+
if (cachedInputTokens != null) {
|
|
172
|
+
inputTokenDetails.cache_read = cachedInputTokens;
|
|
173
|
+
hasInputTokenDetails = true;
|
|
174
|
+
}
|
|
175
|
+
if (audioOutputTokens != null) {
|
|
176
|
+
outputTokenDetails.audio = audioOutputTokens;
|
|
177
|
+
hasOutputTokenDetails = true;
|
|
178
|
+
}
|
|
179
|
+
if (reasoningOutputTokens != null) {
|
|
180
|
+
outputTokenDetails.reasoning = reasoningOutputTokens;
|
|
181
|
+
hasOutputTokenDetails = true;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (hasInputTokenDetails) {
|
|
185
|
+
usageMetadata.input_token_details = inputTokenDetails;
|
|
186
|
+
}
|
|
187
|
+
if (hasOutputTokenDetails) {
|
|
188
|
+
usageMetadata.output_token_details = outputTokenDetails;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return usageMetadata;
|
|
192
|
+
}
|
|
193
|
+
|
|
144
194
|
function getExposedOpenAIClient(
|
|
145
195
|
completions: OpenAIClientDelegate,
|
|
146
196
|
responses: OpenAIClientDelegate,
|
|
@@ -1242,6 +1292,79 @@ export class ChatDeepSeek extends OriginalChatDeepSeek {
|
|
|
1242
1292
|
return 'LibreChatDeepSeek';
|
|
1243
1293
|
}
|
|
1244
1294
|
|
|
1295
|
+
protected _convertDeepSeekMessages(
|
|
1296
|
+
messages: BaseMessage[]
|
|
1297
|
+
): OpenAICompletionParam[] {
|
|
1298
|
+
return _convertMessagesToOpenAIParams(messages, this.model, {
|
|
1299
|
+
includeReasoningContent: true,
|
|
1300
|
+
});
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
async _generate(
|
|
1304
|
+
messages: BaseMessage[],
|
|
1305
|
+
options: this['ParsedCallOptions'],
|
|
1306
|
+
runManager?: CallbackManagerForLLMRun
|
|
1307
|
+
): Promise<ChatResult> {
|
|
1308
|
+
options.signal?.throwIfAborted();
|
|
1309
|
+
const params = this.invocationParams(options);
|
|
1310
|
+
|
|
1311
|
+
if (params.stream === true) {
|
|
1312
|
+
return super._generate(messages, options, runManager);
|
|
1313
|
+
}
|
|
1314
|
+
|
|
1315
|
+
const messagesMapped = this._convertDeepSeekMessages(messages);
|
|
1316
|
+
const response = await this.completionWithRetry(
|
|
1317
|
+
{
|
|
1318
|
+
...params,
|
|
1319
|
+
stream: false,
|
|
1320
|
+
messages: messagesMapped,
|
|
1321
|
+
},
|
|
1322
|
+
{
|
|
1323
|
+
signal: options.signal,
|
|
1324
|
+
...options.options,
|
|
1325
|
+
}
|
|
1326
|
+
);
|
|
1327
|
+
|
|
1328
|
+
const usageMetadata = createUsageMetadata(response.usage);
|
|
1329
|
+
|
|
1330
|
+
const generations: ChatGeneration[] = response.choices.map((part) => {
|
|
1331
|
+
const text = part.message.content ?? '';
|
|
1332
|
+
const generation: ChatGeneration = {
|
|
1333
|
+
text,
|
|
1334
|
+
message: this._convertCompletionsMessageToBaseMessage(
|
|
1335
|
+
part.message,
|
|
1336
|
+
response
|
|
1337
|
+
),
|
|
1338
|
+
};
|
|
1339
|
+
generation.generationInfo = {
|
|
1340
|
+
finish_reason: part.finish_reason,
|
|
1341
|
+
...(part.logprobs != null ? { logprobs: part.logprobs } : {}),
|
|
1342
|
+
};
|
|
1343
|
+
if (isAIMessage(generation.message)) {
|
|
1344
|
+
generation.message.usage_metadata = usageMetadata;
|
|
1345
|
+
}
|
|
1346
|
+
generation.message = new AIMessage(
|
|
1347
|
+
Object.fromEntries(
|
|
1348
|
+
Object.entries(generation.message).filter(
|
|
1349
|
+
([key]) => !key.startsWith('lc_')
|
|
1350
|
+
)
|
|
1351
|
+
)
|
|
1352
|
+
);
|
|
1353
|
+
return generation;
|
|
1354
|
+
});
|
|
1355
|
+
|
|
1356
|
+
return {
|
|
1357
|
+
generations,
|
|
1358
|
+
llmOutput: {
|
|
1359
|
+
tokenUsage: {
|
|
1360
|
+
promptTokens: usageMetadata.input_tokens,
|
|
1361
|
+
completionTokens: usageMetadata.output_tokens,
|
|
1362
|
+
totalTokens: usageMetadata.total_tokens,
|
|
1363
|
+
},
|
|
1364
|
+
},
|
|
1365
|
+
};
|
|
1366
|
+
}
|
|
1367
|
+
|
|
1245
1368
|
_getClientOptions(
|
|
1246
1369
|
options?: OpenAICoreRequestOptions
|
|
1247
1370
|
): OpenAICoreRequestOptions {
|
|
@@ -1276,10 +1399,370 @@ export class ChatDeepSeek extends OriginalChatDeepSeek {
|
|
|
1276
1399
|
runManager?: CallbackManagerForLLMRun
|
|
1277
1400
|
): AsyncGenerator<ChatGenerationChunk> {
|
|
1278
1401
|
yield* delayStreamChunks(
|
|
1279
|
-
|
|
1402
|
+
this._streamResponseChunksWithReasoning(messages, options, runManager),
|
|
1280
1403
|
this._lc_stream_delay
|
|
1281
1404
|
);
|
|
1282
1405
|
}
|
|
1406
|
+
|
|
1407
|
+
/** Parses raw `<think>` fallback tags across chunks and emits sanitized DeepSeek stream chunks. */
|
|
1408
|
+
protected async *_streamResponseChunksWithReasoning(
|
|
1409
|
+
messages: BaseMessage[],
|
|
1410
|
+
options: this['ParsedCallOptions'],
|
|
1411
|
+
runManager?: CallbackManagerForLLMRun
|
|
1412
|
+
): AsyncGenerator<ChatGenerationChunk> {
|
|
1413
|
+
const stream = this._streamResponseChunksFromReasoningMessages(
|
|
1414
|
+
messages,
|
|
1415
|
+
options
|
|
1416
|
+
);
|
|
1417
|
+
const thinkStartTag = '<think>';
|
|
1418
|
+
const thinkEndTag = '</think>';
|
|
1419
|
+
let tokensBuffer = '';
|
|
1420
|
+
let isThinking = false;
|
|
1421
|
+
|
|
1422
|
+
for await (const chunk of stream) {
|
|
1423
|
+
if (options.signal?.aborted === true) {
|
|
1424
|
+
throw new Error('AbortError');
|
|
1425
|
+
}
|
|
1426
|
+
|
|
1427
|
+
const reasoningContent =
|
|
1428
|
+
chunk.message.additional_kwargs.reasoning_content;
|
|
1429
|
+
if (reasoningContent != null && reasoningContent !== '') {
|
|
1430
|
+
yield* this._yieldDeepSeekStreamChunk(chunk, runManager);
|
|
1431
|
+
continue;
|
|
1432
|
+
}
|
|
1433
|
+
|
|
1434
|
+
const text = chunk.text;
|
|
1435
|
+
if (text === '') {
|
|
1436
|
+
yield* this._yieldDeepSeekStreamChunk(chunk, runManager);
|
|
1437
|
+
continue;
|
|
1438
|
+
}
|
|
1439
|
+
|
|
1440
|
+
tokensBuffer += text;
|
|
1441
|
+
|
|
1442
|
+
while (tokensBuffer !== '') {
|
|
1443
|
+
if (isThinking) {
|
|
1444
|
+
const thinkEndIndex = tokensBuffer.indexOf(thinkEndTag);
|
|
1445
|
+
if (thinkEndIndex !== -1) {
|
|
1446
|
+
const thoughtContent = tokensBuffer.substring(0, thinkEndIndex);
|
|
1447
|
+
if (thoughtContent !== '') {
|
|
1448
|
+
yield* this._yieldDeepSeekReasoningText(
|
|
1449
|
+
chunk,
|
|
1450
|
+
thoughtContent,
|
|
1451
|
+
runManager
|
|
1452
|
+
);
|
|
1453
|
+
}
|
|
1454
|
+
|
|
1455
|
+
tokensBuffer = tokensBuffer.substring(
|
|
1456
|
+
thinkEndIndex + thinkEndTag.length
|
|
1457
|
+
);
|
|
1458
|
+
isThinking = false;
|
|
1459
|
+
continue;
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
const splitIndex = this._getDeepSeekPartialTagSplitIndex(
|
|
1463
|
+
tokensBuffer,
|
|
1464
|
+
thinkEndTag
|
|
1465
|
+
);
|
|
1466
|
+
if (splitIndex !== -1) {
|
|
1467
|
+
const safeToYield = tokensBuffer.substring(0, splitIndex);
|
|
1468
|
+
if (safeToYield !== '') {
|
|
1469
|
+
yield* this._yieldDeepSeekReasoningText(
|
|
1470
|
+
chunk,
|
|
1471
|
+
safeToYield,
|
|
1472
|
+
runManager
|
|
1473
|
+
);
|
|
1474
|
+
}
|
|
1475
|
+
tokensBuffer = tokensBuffer.substring(splitIndex);
|
|
1476
|
+
break;
|
|
1477
|
+
}
|
|
1478
|
+
|
|
1479
|
+
yield* this._yieldDeepSeekReasoningText(
|
|
1480
|
+
chunk,
|
|
1481
|
+
tokensBuffer,
|
|
1482
|
+
runManager
|
|
1483
|
+
);
|
|
1484
|
+
tokensBuffer = '';
|
|
1485
|
+
break;
|
|
1486
|
+
}
|
|
1487
|
+
|
|
1488
|
+
const thinkStartIndex = tokensBuffer.indexOf(thinkStartTag);
|
|
1489
|
+
if (thinkStartIndex !== -1) {
|
|
1490
|
+
const beforeThink = tokensBuffer.substring(0, thinkStartIndex);
|
|
1491
|
+
if (beforeThink !== '') {
|
|
1492
|
+
yield* this._yieldDeepSeekStreamChunk(
|
|
1493
|
+
this._createDeepSeekStreamChunk(chunk, beforeThink),
|
|
1494
|
+
runManager
|
|
1495
|
+
);
|
|
1496
|
+
}
|
|
1497
|
+
|
|
1498
|
+
tokensBuffer = tokensBuffer.substring(
|
|
1499
|
+
thinkStartIndex + thinkStartTag.length
|
|
1500
|
+
);
|
|
1501
|
+
isThinking = true;
|
|
1502
|
+
continue;
|
|
1503
|
+
}
|
|
1504
|
+
|
|
1505
|
+
const splitIndex = this._getDeepSeekPartialTagSplitIndex(
|
|
1506
|
+
tokensBuffer,
|
|
1507
|
+
thinkStartTag
|
|
1508
|
+
);
|
|
1509
|
+
if (splitIndex !== -1) {
|
|
1510
|
+
const safeToYield = tokensBuffer.substring(0, splitIndex);
|
|
1511
|
+
if (safeToYield !== '') {
|
|
1512
|
+
yield* this._yieldDeepSeekStreamChunk(
|
|
1513
|
+
this._createDeepSeekStreamChunk(chunk, safeToYield),
|
|
1514
|
+
runManager
|
|
1515
|
+
);
|
|
1516
|
+
}
|
|
1517
|
+
tokensBuffer = tokensBuffer.substring(splitIndex);
|
|
1518
|
+
break;
|
|
1519
|
+
}
|
|
1520
|
+
|
|
1521
|
+
yield* this._yieldDeepSeekStreamChunk(
|
|
1522
|
+
this._createDeepSeekStreamChunk(chunk, tokensBuffer),
|
|
1523
|
+
runManager
|
|
1524
|
+
);
|
|
1525
|
+
tokensBuffer = '';
|
|
1526
|
+
break;
|
|
1527
|
+
}
|
|
1528
|
+
}
|
|
1529
|
+
|
|
1530
|
+
if (tokensBuffer === '') {
|
|
1531
|
+
return;
|
|
1532
|
+
}
|
|
1533
|
+
|
|
1534
|
+
if (isThinking) {
|
|
1535
|
+
yield* this._yieldDeepSeekStreamChunk(
|
|
1536
|
+
new ChatGenerationChunk({
|
|
1537
|
+
message: new AIMessageChunk({
|
|
1538
|
+
content: '',
|
|
1539
|
+
additional_kwargs: {
|
|
1540
|
+
reasoning_content: tokensBuffer,
|
|
1541
|
+
},
|
|
1542
|
+
}),
|
|
1543
|
+
text: '',
|
|
1544
|
+
}),
|
|
1545
|
+
runManager
|
|
1546
|
+
);
|
|
1547
|
+
return;
|
|
1548
|
+
}
|
|
1549
|
+
|
|
1550
|
+
yield* this._yieldDeepSeekStreamChunk(
|
|
1551
|
+
new ChatGenerationChunk({
|
|
1552
|
+
message: new AIMessageChunk({
|
|
1553
|
+
content: tokensBuffer,
|
|
1554
|
+
}),
|
|
1555
|
+
text: tokensBuffer,
|
|
1556
|
+
}),
|
|
1557
|
+
runManager
|
|
1558
|
+
);
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
protected async *_streamResponseChunksFromReasoningMessages(
|
|
1562
|
+
messages: BaseMessage[],
|
|
1563
|
+
options: this['ParsedCallOptions']
|
|
1564
|
+
): AsyncGenerator<ChatGenerationChunk> {
|
|
1565
|
+
const params = {
|
|
1566
|
+
...this.invocationParams(options, { streaming: true }),
|
|
1567
|
+
stream: true as const,
|
|
1568
|
+
};
|
|
1569
|
+
const messagesMapped = this._convertDeepSeekMessages(messages);
|
|
1570
|
+
const streamIterable = await this.completionWithRetry(
|
|
1571
|
+
{
|
|
1572
|
+
...params,
|
|
1573
|
+
messages: messagesMapped,
|
|
1574
|
+
},
|
|
1575
|
+
{
|
|
1576
|
+
signal: options.signal,
|
|
1577
|
+
...options.options,
|
|
1578
|
+
}
|
|
1579
|
+
);
|
|
1580
|
+
|
|
1581
|
+
let defaultRole:
|
|
1582
|
+
| OpenAIClient.Chat.Completions.ChatCompletionRole
|
|
1583
|
+
| undefined;
|
|
1584
|
+
let usage: OpenAIClient.Completions.CompletionUsage | undefined;
|
|
1585
|
+
|
|
1586
|
+
for await (const data of streamIterable) {
|
|
1587
|
+
if (options.signal?.aborted === true) {
|
|
1588
|
+
throw new Error('AbortError');
|
|
1589
|
+
}
|
|
1590
|
+
|
|
1591
|
+
if (data.usage != null) {
|
|
1592
|
+
usage = data.usage;
|
|
1593
|
+
}
|
|
1594
|
+
|
|
1595
|
+
if (data.choices.length === 0) {
|
|
1596
|
+
continue;
|
|
1597
|
+
}
|
|
1598
|
+
|
|
1599
|
+
const choice = data.choices[0];
|
|
1600
|
+
const { delta } = choice;
|
|
1601
|
+
const messageChunk = this._convertCompletionsDeltaToBaseMessageChunk(
|
|
1602
|
+
delta,
|
|
1603
|
+
data,
|
|
1604
|
+
defaultRole
|
|
1605
|
+
);
|
|
1606
|
+
defaultRole = delta.role ?? defaultRole;
|
|
1607
|
+
|
|
1608
|
+
if (typeof messageChunk.content !== 'string') {
|
|
1609
|
+
continue;
|
|
1610
|
+
}
|
|
1611
|
+
|
|
1612
|
+
const messageText = messageChunk.content;
|
|
1613
|
+
const newTokenIndices = {
|
|
1614
|
+
prompt: options.promptIndex ?? 0,
|
|
1615
|
+
completion: choice.index,
|
|
1616
|
+
};
|
|
1617
|
+
const generationInfo = { ...newTokenIndices };
|
|
1618
|
+
if (choice.finish_reason != null) {
|
|
1619
|
+
Object.assign(generationInfo, {
|
|
1620
|
+
finish_reason: choice.finish_reason,
|
|
1621
|
+
system_fingerprint: data.system_fingerprint,
|
|
1622
|
+
model_name: data.model,
|
|
1623
|
+
service_tier: data.service_tier,
|
|
1624
|
+
});
|
|
1625
|
+
}
|
|
1626
|
+
if (this.logprobs === true) {
|
|
1627
|
+
Object.assign(generationInfo, { logprobs: choice.logprobs });
|
|
1628
|
+
}
|
|
1629
|
+
|
|
1630
|
+
const generationChunk = new ChatGenerationChunk({
|
|
1631
|
+
message: messageChunk,
|
|
1632
|
+
text: messageText,
|
|
1633
|
+
generationInfo,
|
|
1634
|
+
});
|
|
1635
|
+
|
|
1636
|
+
yield generationChunk;
|
|
1637
|
+
}
|
|
1638
|
+
|
|
1639
|
+
if (usage != null) {
|
|
1640
|
+
const usageMetadata = createUsageMetadata(usage);
|
|
1641
|
+
|
|
1642
|
+
const generationChunk = new ChatGenerationChunk({
|
|
1643
|
+
message: new AIMessageChunk({
|
|
1644
|
+
content: '',
|
|
1645
|
+
response_metadata: {
|
|
1646
|
+
usage: { ...usage },
|
|
1647
|
+
},
|
|
1648
|
+
usage_metadata: usageMetadata,
|
|
1649
|
+
}),
|
|
1650
|
+
text: '',
|
|
1651
|
+
generationInfo: {
|
|
1652
|
+
prompt: 0,
|
|
1653
|
+
completion: 0,
|
|
1654
|
+
},
|
|
1655
|
+
});
|
|
1656
|
+
|
|
1657
|
+
yield generationChunk;
|
|
1658
|
+
}
|
|
1659
|
+
|
|
1660
|
+
if (options.signal?.aborted === true) {
|
|
1661
|
+
throw new Error('AbortError');
|
|
1662
|
+
}
|
|
1663
|
+
}
|
|
1664
|
+
|
|
1665
|
+
protected _createDeepSeekStreamChunk(
|
|
1666
|
+
chunk: ChatGenerationChunk,
|
|
1667
|
+
content: string,
|
|
1668
|
+
additionalKwargs?: AIMessageChunk['additional_kwargs'],
|
|
1669
|
+
text = content
|
|
1670
|
+
): ChatGenerationChunk {
|
|
1671
|
+
if (!(chunk.message instanceof AIMessageChunk)) {
|
|
1672
|
+
return new ChatGenerationChunk({
|
|
1673
|
+
message: new AIMessageChunk({
|
|
1674
|
+
content,
|
|
1675
|
+
additional_kwargs:
|
|
1676
|
+
additionalKwargs ?? chunk.message.additional_kwargs,
|
|
1677
|
+
response_metadata: chunk.message.response_metadata,
|
|
1678
|
+
id: chunk.message.id,
|
|
1679
|
+
}),
|
|
1680
|
+
text,
|
|
1681
|
+
generationInfo: chunk.generationInfo,
|
|
1682
|
+
});
|
|
1683
|
+
}
|
|
1684
|
+
|
|
1685
|
+
const message = chunk.message;
|
|
1686
|
+
return new ChatGenerationChunk({
|
|
1687
|
+
message: new AIMessageChunk({
|
|
1688
|
+
content,
|
|
1689
|
+
additional_kwargs: additionalKwargs ?? message.additional_kwargs,
|
|
1690
|
+
response_metadata: message.response_metadata,
|
|
1691
|
+
tool_calls: message.tool_calls,
|
|
1692
|
+
tool_call_chunks: message.tool_call_chunks,
|
|
1693
|
+
id: message.id,
|
|
1694
|
+
}),
|
|
1695
|
+
text,
|
|
1696
|
+
generationInfo: chunk.generationInfo,
|
|
1697
|
+
});
|
|
1698
|
+
}
|
|
1699
|
+
|
|
1700
|
+
protected _createDeepSeekReasoningStreamChunk(
|
|
1701
|
+
chunk: ChatGenerationChunk,
|
|
1702
|
+
reasoningContent: string
|
|
1703
|
+
): ChatGenerationChunk {
|
|
1704
|
+
return this._createDeepSeekStreamChunk(
|
|
1705
|
+
chunk,
|
|
1706
|
+
'',
|
|
1707
|
+
{
|
|
1708
|
+
...chunk.message.additional_kwargs,
|
|
1709
|
+
reasoning_content: reasoningContent,
|
|
1710
|
+
},
|
|
1711
|
+
''
|
|
1712
|
+
);
|
|
1713
|
+
}
|
|
1714
|
+
|
|
1715
|
+
protected async *_yieldDeepSeekReasoningText(
|
|
1716
|
+
chunk: ChatGenerationChunk,
|
|
1717
|
+
reasoningContent: string,
|
|
1718
|
+
runManager?: CallbackManagerForLLMRun
|
|
1719
|
+
): AsyncGenerator<ChatGenerationChunk> {
|
|
1720
|
+
yield* this._yieldDeepSeekStreamChunk(
|
|
1721
|
+
this._createDeepSeekReasoningStreamChunk(chunk, reasoningContent),
|
|
1722
|
+
runManager
|
|
1723
|
+
);
|
|
1724
|
+
}
|
|
1725
|
+
|
|
1726
|
+
protected async *_yieldDeepSeekStreamChunk(
|
|
1727
|
+
chunk: ChatGenerationChunk,
|
|
1728
|
+
runManager?: CallbackManagerForLLMRun
|
|
1729
|
+
): AsyncGenerator<ChatGenerationChunk> {
|
|
1730
|
+
yield chunk;
|
|
1731
|
+
await runManager?.handleLLMNewToken(
|
|
1732
|
+
chunk.text,
|
|
1733
|
+
this._getDeepSeekTokenIndices(chunk),
|
|
1734
|
+
undefined,
|
|
1735
|
+
undefined,
|
|
1736
|
+
undefined,
|
|
1737
|
+
{ chunk }
|
|
1738
|
+
);
|
|
1739
|
+
}
|
|
1740
|
+
|
|
1741
|
+
protected _getDeepSeekTokenIndices(
|
|
1742
|
+
chunk: ChatGenerationChunk
|
|
1743
|
+
): { prompt: number; completion: number } | undefined {
|
|
1744
|
+
const prompt = chunk.generationInfo?.prompt;
|
|
1745
|
+
const completion = chunk.generationInfo?.completion;
|
|
1746
|
+
|
|
1747
|
+
if (typeof prompt === 'number' && typeof completion === 'number') {
|
|
1748
|
+
return { prompt, completion };
|
|
1749
|
+
}
|
|
1750
|
+
|
|
1751
|
+
return undefined;
|
|
1752
|
+
}
|
|
1753
|
+
|
|
1754
|
+
protected _getDeepSeekPartialTagSplitIndex(
|
|
1755
|
+
text: string,
|
|
1756
|
+
tag: string
|
|
1757
|
+
): number {
|
|
1758
|
+
for (let i = tag.length - 1; i >= 1; i--) {
|
|
1759
|
+
if (text.endsWith(tag.substring(0, i))) {
|
|
1760
|
+
return text.length - i;
|
|
1761
|
+
}
|
|
1762
|
+
}
|
|
1763
|
+
|
|
1764
|
+
return -1;
|
|
1765
|
+
}
|
|
1283
1766
|
}
|
|
1284
1767
|
|
|
1285
1768
|
/** xAI-specific usage metadata type */
|