@smythos/sre 1.7.18 → 1.7.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +120 -82
- package/dist/index.js.map +1 -1
- package/dist/types/Components/DataSourceIndexer.class.d.ts +4 -12
- package/dist/types/Components/GenAILLM.class.d.ts +5 -5
- package/dist/types/Components/RAG/DataSourceCleaner.class.d.ts +37 -0
- package/dist/types/Components/RAG/DataSourceComponent.class.d.ts +30 -0
- package/dist/types/Components/RAG/DataSourceIndexer.class.d.ts +14 -0
- package/dist/types/Components/RAG/DataSourceLookup.class.d.ts +36 -0
- package/dist/types/Components/index.d.ts +3 -3
- package/dist/types/helpers/Conversation.helper.d.ts +3 -0
- package/dist/types/index.d.ts +3 -3
- package/dist/types/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.d.ts +1 -0
- package/dist/types/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.d.ts +11 -4
- package/dist/types/subsystems/IO/VectorDB.service/embed/index.d.ts +5 -0
- package/dist/types/subsystems/LLMManager/LLM.inference.d.ts +10 -3
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.d.ts +4 -2
- package/dist/types/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.d.ts +35 -0
- package/dist/types/subsystems/Security/Account.service/AccountConnector.d.ts +2 -2
- package/dist/types/subsystems/Security/ManagedVault.service/connectors/SecretManagerManagedVault.d.ts +10 -0
- package/dist/types/subsystems/Security/Vault.service/connectors/SecretsManager.class.d.ts +6 -2
- package/dist/types/types/LLM.types.d.ts +2 -0
- package/dist/types/types/VectorDB.types.d.ts +4 -0
- package/dist/types/utils/array.utils.d.ts +4 -0
- package/dist/types/utils/string.utils.d.ts +1 -0
- package/package.json +3 -3
- package/src/Components/APIEndpoint.class.ts +1 -6
- package/src/Components/Component.class.ts +14 -1
- package/src/Components/DataSourceIndexer.class.ts +148 -34
- package/src/Components/GenAILLM.class.ts +21 -11
- package/src/Components/RAG/DataSourceCleaner.class.ts +178 -0
- package/src/Components/RAG/DataSourceComponent.class.ts +111 -0
- package/src/Components/RAG/DataSourceIndexer.class.ts +254 -0
- package/src/Components/{DataSourceLookup.class.ts → RAG/DataSourceLookup.class.ts} +92 -3
- package/src/Components/ServerlessCode.class.ts +1 -4
- package/src/Components/index.ts +3 -3
- package/src/helpers/AWSLambdaCode.helper.ts +40 -45
- package/src/helpers/Conversation.helper.ts +14 -10
- package/src/helpers/S3Cache.helper.ts +2 -1
- package/src/index.ts +212 -212
- package/src/index.ts.bak +212 -212
- package/src/subsystems/IO/NKV.service/connectors/NKVRedis.class.ts +3 -1
- package/src/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.ts +145 -19
- package/src/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.ts +56 -22
- package/src/subsystems/IO/VectorDB.service/embed/GoogleEmbedding.ts +1 -0
- package/src/subsystems/IO/VectorDB.service/embed/OpenAIEmbedding.ts +2 -1
- package/src/subsystems/IO/VectorDB.service/embed/index.ts +18 -0
- package/src/subsystems/LLMManager/LLM.inference.ts +63 -47
- package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts +35 -10
- package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts +12 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/Echo.class.ts +4 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +105 -23
- package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +17 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/Ollama.class.ts +18 -3
- package/src/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.ts +14 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/VertexAI.class.ts +6 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +5 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +8 -3
- package/src/subsystems/LLMManager/LLM.service/connectors/xAI.class.ts +9 -8
- package/src/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.ts +126 -28
- package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.ts +38 -6
- package/src/subsystems/Security/Account.service/AccountConnector.ts +3 -3
- package/src/subsystems/Security/ManagedVault.service/connectors/SecretManagerManagedVault.ts +111 -48
- package/src/subsystems/Security/Vault.service/connectors/SecretsManager.class.ts +41 -66
- package/src/types/LLM.types.ts +5 -0
- package/src/types/VectorDB.types.ts +4 -0
- package/src/utils/array.utils.ts +11 -0
- package/src/utils/base64.utils.ts +1 -1
- package/src/utils/string.utils.ts +3 -192
- package/src/Components/DataSourceCleaner.class.ts +0 -92
|
@@ -139,6 +139,7 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
139
139
|
? toolCall.functionCall?.args
|
|
140
140
|
: JSON.stringify(toolCall.functionCall?.args ?? {}),
|
|
141
141
|
role: TLLMMessageRole.Assistant,
|
|
142
|
+
thoughtSignature: (toolCall as any).thoughtSignature, // Preserve Google AI's reasoning context
|
|
142
143
|
}));
|
|
143
144
|
useTool = true;
|
|
144
145
|
}
|
|
@@ -185,9 +186,11 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
185
186
|
(async () => {
|
|
186
187
|
try {
|
|
187
188
|
for await (const chunk of stream) {
|
|
189
|
+
emitter.emit(TLLMEvent.Data, chunk);
|
|
190
|
+
|
|
188
191
|
const chunkText = chunk.text ?? '';
|
|
189
192
|
if (chunkText) {
|
|
190
|
-
emitter.emit(
|
|
193
|
+
emitter.emit(TLLMEvent.Content, chunkText);
|
|
191
194
|
}
|
|
192
195
|
|
|
193
196
|
const toolCalls = chunk.candidates?.[0]?.content?.parts?.filter((part) => part.functionCall);
|
|
@@ -202,6 +205,7 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
202
205
|
? toolCall.functionCall?.args
|
|
203
206
|
: JSON.stringify(toolCall.functionCall?.args ?? {}),
|
|
204
207
|
role: TLLMMessageRole.Assistant,
|
|
208
|
+
thoughtSignature: (toolCall as any).thoughtSignature, // Preserve Google AI's reasoning context
|
|
205
209
|
}));
|
|
206
210
|
emitter.emit(TLLMEvent.ToolInfo, toolsData);
|
|
207
211
|
}
|
|
@@ -211,21 +215,28 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
211
215
|
}
|
|
212
216
|
}
|
|
213
217
|
|
|
218
|
+
const finishReason = 'stop'; // GoogleAI doesn't provide finishReason in streaming
|
|
219
|
+
const reportedUsage: any[] = [];
|
|
220
|
+
|
|
214
221
|
if (usage) {
|
|
215
|
-
this.reportUsage(usage, {
|
|
222
|
+
const reported = this.reportUsage(usage, {
|
|
216
223
|
modelEntryName: context.modelEntryName,
|
|
217
224
|
keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
|
|
218
225
|
agentId: context.agentId,
|
|
219
226
|
teamId: context.teamId,
|
|
220
227
|
});
|
|
228
|
+
reportedUsage.push(reported);
|
|
221
229
|
}
|
|
222
230
|
|
|
231
|
+
// Note: GoogleAI stream doesn't provide explicit finish reasons
|
|
232
|
+
// If we had a non-stop finish reason, we would emit Interrupted here
|
|
233
|
+
|
|
223
234
|
setTimeout(() => {
|
|
224
|
-
emitter.emit(
|
|
235
|
+
emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
|
|
225
236
|
}, 100);
|
|
226
237
|
} catch (error) {
|
|
227
238
|
logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
|
|
228
|
-
emitter.emit(
|
|
239
|
+
emitter.emit(TLLMEvent.Error, error);
|
|
229
240
|
}
|
|
230
241
|
})();
|
|
231
242
|
|
|
@@ -426,6 +437,13 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
426
437
|
if (params.stopSequences?.length) config.stopSequences = params.stopSequences;
|
|
427
438
|
if (responseMimeType) config.responseMimeType = responseMimeType;
|
|
428
439
|
|
|
440
|
+
// #region Gemini 3 specific fields
|
|
441
|
+
const isGemini3Model = params.modelEntryName?.includes('gemini-3');
|
|
442
|
+
|
|
443
|
+
if (isGemini3Model) {
|
|
444
|
+
if (params?.reasoningEffort) config.thinkingConfig = { thinkingLevel: params.reasoningEffort };
|
|
445
|
+
}
|
|
446
|
+
|
|
429
447
|
if (systemInstruction) body.systemInstruction = systemInstruction;
|
|
430
448
|
if (Object.keys(config).length > 0) {
|
|
431
449
|
body.generationConfig = config;
|
|
@@ -505,36 +523,76 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
505
523
|
) {
|
|
506
524
|
// SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
|
|
507
525
|
const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
|
|
508
|
-
|
|
526
|
+
|
|
527
|
+
// Initially, all input tokens – such as text, audio, image, video, document, etc. – were included in promptTokenCount.
|
|
528
|
+
let inputTokens = usage?.promptTokenCount || 0;
|
|
529
|
+
|
|
530
|
+
// The pricing is the same for output and thinking tokens, so we can add them together.
|
|
531
|
+
const outputTokens = (usage?.candidatesTokenCount || 0) + (usage?.thoughtsTokenCount || 0);
|
|
532
|
+
|
|
533
|
+
// If cached input tokens are available, we need to subtract them from the input tokens.
|
|
534
|
+
let cachedInputTokens = usage?.cachedContentTokenCount || 0;
|
|
535
|
+
|
|
536
|
+
if (cachedInputTokens) {
|
|
537
|
+
inputTokens = inputTokens - cachedInputTokens;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
// #region Find matching model and set tier based on threshold
|
|
509
541
|
const tierThresholds = {
|
|
510
542
|
'gemini-1.5-pro': 128_000,
|
|
511
543
|
'gemini-2.5-pro': 200_000,
|
|
544
|
+
'gemini-3-pro': 200_000,
|
|
512
545
|
};
|
|
513
546
|
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
547
|
+
let inTier = '';
|
|
548
|
+
let outTier = '';
|
|
549
|
+
let crTier = '';
|
|
517
550
|
|
|
518
|
-
// Find matching model and set tier based on threshold
|
|
519
551
|
const modelWithTier = Object.keys(tierThresholds).find((model) => modelName.includes(model));
|
|
520
552
|
if (modelWithTier) {
|
|
521
|
-
|
|
553
|
+
inTier = inputTokens <= tierThresholds[modelWithTier] ? 'tier1' : 'tier2';
|
|
554
|
+
outTier = outputTokens <= tierThresholds[modelWithTier] ? 'tier1' : 'tier2';
|
|
555
|
+
crTier = cachedInputTokens <= tierThresholds[modelWithTier] ? 'tier1' : 'tier2';
|
|
522
556
|
}
|
|
557
|
+
// #endregion
|
|
558
|
+
|
|
559
|
+
// #region Calculate audio input tokens
|
|
560
|
+
// Since Gemini 2.5 Flash has a different pricing model for audio input tokens, we need to report audio input tokens separately.
|
|
561
|
+
let audioInputTokens = 0;
|
|
562
|
+
let cachedAudioInputTokens = 0;
|
|
563
|
+
const isFlashModel = ['gemini-2.5-flash'].includes(modelName);
|
|
564
|
+
|
|
565
|
+
if (isFlashModel) {
|
|
566
|
+
// There is no concept of different pricing for Flash models based on token tiers (e.g., less than or greater than 200k),
|
|
567
|
+
// so we don't need to provide tier information for audio input tokens.
|
|
568
|
+
audioInputTokens = usage?.promptTokensDetails?.find((detail) => detail.modality === 'AUDIO')?.tokenCount || 0;
|
|
569
|
+
|
|
570
|
+
// subtract the audio cached input tokens from the audio input tokens and total cached input tokens.
|
|
571
|
+
cachedAudioInputTokens = usage?.cacheTokensDetails?.find((detail) => detail.modality === 'AUDIO')?.tokenCount || 0;
|
|
572
|
+
if (cachedAudioInputTokens) {
|
|
573
|
+
audioInputTokens = audioInputTokens - cachedAudioInputTokens;
|
|
574
|
+
cachedInputTokens = cachedInputTokens - cachedAudioInputTokens;
|
|
575
|
+
}
|
|
523
576
|
|
|
577
|
+
inputTokens = inputTokens - audioInputTokens;
|
|
578
|
+
}
|
|
524
579
|
// #endregion
|
|
525
580
|
|
|
526
581
|
const usageData = {
|
|
527
582
|
sourceId: `llm:${modelName}`,
|
|
528
|
-
input_tokens:
|
|
529
|
-
output_tokens:
|
|
583
|
+
input_tokens: inputTokens,
|
|
584
|
+
output_tokens: outputTokens,
|
|
530
585
|
input_tokens_audio: audioInputTokens,
|
|
531
|
-
input_tokens_cache_read:
|
|
586
|
+
input_tokens_cache_read: cachedInputTokens,
|
|
587
|
+
input_tokens_cache_read_audio: cachedAudioInputTokens,
|
|
532
588
|
input_tokens_cache_write: 0,
|
|
533
|
-
reasoning_tokens: usage?.thoughtsTokenCount,
|
|
589
|
+
// reasoning_tokens: usage?.thoughtsTokenCount, // * reasoning tokens are included in the output tokens.
|
|
534
590
|
keySource: metadata.keySource,
|
|
535
591
|
agentId: metadata.agentId,
|
|
536
592
|
teamId: metadata.teamId,
|
|
537
|
-
|
|
593
|
+
inTier,
|
|
594
|
+
outTier,
|
|
595
|
+
crTier,
|
|
538
596
|
};
|
|
539
597
|
SystemEvents.emit('USAGE:LLM', usageData);
|
|
540
598
|
|
|
@@ -665,12 +723,17 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
665
723
|
}
|
|
666
724
|
|
|
667
725
|
if (part.functionCall) {
|
|
668
|
-
|
|
726
|
+
const functionCallPart: any = {
|
|
669
727
|
functionCall: {
|
|
670
728
|
name: part.functionCall.name,
|
|
671
729
|
args: parseFunctionArgs(part.functionCall.args),
|
|
672
730
|
},
|
|
673
|
-
}
|
|
731
|
+
};
|
|
732
|
+
// Preserve thoughtSignature if present for Google AI reasoning context
|
|
733
|
+
if ((part as any).thoughtSignature) {
|
|
734
|
+
functionCallPart.thoughtSignature = (part as any).thoughtSignature;
|
|
735
|
+
}
|
|
736
|
+
content.push(functionCallPart);
|
|
674
737
|
continue;
|
|
675
738
|
}
|
|
676
739
|
|
|
@@ -699,12 +762,17 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
699
762
|
const hasFunctionCall = content.some((part) => part.functionCall);
|
|
700
763
|
if (!hasFunctionCall && toolsData.length > 0) {
|
|
701
764
|
toolsData.forEach((toolCall) => {
|
|
702
|
-
|
|
765
|
+
const functionCallPart: any = {
|
|
703
766
|
functionCall: {
|
|
704
767
|
name: toolCall.name,
|
|
705
768
|
args: parseFunctionArgs(toolCall.arguments),
|
|
706
769
|
},
|
|
707
|
-
}
|
|
770
|
+
};
|
|
771
|
+
// Preserve thoughtSignature if present for Google AI reasoning context
|
|
772
|
+
if (toolCall.thoughtSignature) {
|
|
773
|
+
functionCallPart.thoughtSignature = toolCall.thoughtSignature;
|
|
774
|
+
}
|
|
775
|
+
content.push(functionCallPart);
|
|
708
776
|
});
|
|
709
777
|
}
|
|
710
778
|
|
|
@@ -811,6 +879,10 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
811
879
|
name: part.functionCall.name,
|
|
812
880
|
args: parseFunctionArgs(part.functionCall.args),
|
|
813
881
|
};
|
|
882
|
+
// Preserve thoughtSignature if present for Google AI reasoning context
|
|
883
|
+
if ((part as any).thoughtSignature) {
|
|
884
|
+
normalizedPart.thoughtSignature = (part as any).thoughtSignature;
|
|
885
|
+
}
|
|
814
886
|
}
|
|
815
887
|
|
|
816
888
|
if (part.functionResponse) {
|
|
@@ -839,12 +911,17 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
839
911
|
pushTextPart(normalizedParts, contentPart.text);
|
|
840
912
|
} else if ('functionCall' in contentPart && (contentPart as any).functionCall) {
|
|
841
913
|
const functionCallPart = (contentPart as any).functionCall;
|
|
842
|
-
|
|
914
|
+
const normalizedFunctionCall: any = {
|
|
843
915
|
functionCall: {
|
|
844
916
|
name: functionCallPart.name,
|
|
845
917
|
args: parseFunctionArgs(functionCallPart.args),
|
|
846
918
|
},
|
|
847
|
-
}
|
|
919
|
+
};
|
|
920
|
+
// Preserve thoughtSignature if present for Google AI reasoning context
|
|
921
|
+
if ((contentPart as any).thoughtSignature) {
|
|
922
|
+
normalizedFunctionCall.thoughtSignature = (contentPart as any).thoughtSignature;
|
|
923
|
+
}
|
|
924
|
+
normalizedParts.push(normalizedFunctionCall);
|
|
848
925
|
} else if ('functionResponse' in contentPart && (contentPart as any).functionResponse) {
|
|
849
926
|
const functionResponsePart = (contentPart as any).functionResponse;
|
|
850
927
|
normalizedParts.push({
|
|
@@ -882,12 +959,17 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
882
959
|
for (const toolCall of message.tool_calls) {
|
|
883
960
|
if (!toolCall?.function?.name) continue;
|
|
884
961
|
|
|
885
|
-
|
|
962
|
+
const normalizedFunctionCall: any = {
|
|
886
963
|
functionCall: {
|
|
887
964
|
name: toolCall.function.name,
|
|
888
965
|
args: parseFunctionArgs(toolCall.function.arguments),
|
|
889
966
|
},
|
|
890
|
-
}
|
|
967
|
+
};
|
|
968
|
+
// Preserve thoughtSignature if present for Google AI reasoning context
|
|
969
|
+
if ((toolCall as any).thoughtSignature) {
|
|
970
|
+
normalizedFunctionCall.thoughtSignature = (toolCall as any).thoughtSignature;
|
|
971
|
+
}
|
|
972
|
+
normalizedParts.push(normalizedFunctionCall);
|
|
891
973
|
}
|
|
892
974
|
}
|
|
893
975
|
|
|
@@ -108,6 +108,7 @@ export class GroqConnector extends LLMConnector {
|
|
|
108
108
|
const stream = await groq.chat.completions.create({ ...body, stream: true, stream_options: { include_usage: true } });
|
|
109
109
|
|
|
110
110
|
let toolsData: ToolData[] = [];
|
|
111
|
+
let finishReason = 'stop';
|
|
111
112
|
|
|
112
113
|
(async () => {
|
|
113
114
|
for await (const chunk of stream as any) {
|
|
@@ -117,10 +118,10 @@ export class GroqConnector extends LLMConnector {
|
|
|
117
118
|
if (usage) {
|
|
118
119
|
usage_data.push(usage);
|
|
119
120
|
}
|
|
120
|
-
emitter.emit(
|
|
121
|
+
emitter.emit(TLLMEvent.Data, delta);
|
|
121
122
|
|
|
122
123
|
if (delta?.content) {
|
|
123
|
-
emitter.emit(
|
|
124
|
+
emitter.emit(TLLMEvent.Content, delta.content);
|
|
124
125
|
}
|
|
125
126
|
|
|
126
127
|
if (delta?.tool_calls) {
|
|
@@ -139,24 +140,35 @@ export class GroqConnector extends LLMConnector {
|
|
|
139
140
|
}
|
|
140
141
|
});
|
|
141
142
|
}
|
|
143
|
+
|
|
144
|
+
// Capture finish reason
|
|
145
|
+
if (chunk.choices[0]?.finish_reason) {
|
|
146
|
+
finishReason = chunk.choices[0].finish_reason;
|
|
147
|
+
}
|
|
142
148
|
}
|
|
143
149
|
|
|
144
150
|
if (toolsData.length > 0) {
|
|
145
151
|
emitter.emit(TLLMEvent.ToolInfo, toolsData);
|
|
146
152
|
}
|
|
147
153
|
|
|
154
|
+
const reportedUsage: any[] = [];
|
|
148
155
|
usage_data.forEach((usage) => {
|
|
149
|
-
|
|
150
|
-
this.reportUsage(usage, {
|
|
156
|
+
const reported = this.reportUsage(usage, {
|
|
151
157
|
modelEntryName: context.modelEntryName,
|
|
152
158
|
keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
|
|
153
159
|
agentId: context.agentId,
|
|
154
160
|
teamId: context.teamId,
|
|
155
161
|
});
|
|
162
|
+
reportedUsage.push(reported);
|
|
156
163
|
});
|
|
157
164
|
|
|
165
|
+
// Emit interrupted event if finishReason is not 'stop'
|
|
166
|
+
if (finishReason !== 'stop') {
|
|
167
|
+
emitter.emit(TLLMEvent.Interrupted, finishReason);
|
|
168
|
+
}
|
|
169
|
+
|
|
158
170
|
setTimeout(() => {
|
|
159
|
-
emitter.emit(
|
|
171
|
+
emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
|
|
160
172
|
}, 100);
|
|
161
173
|
})();
|
|
162
174
|
|
|
@@ -138,14 +138,17 @@ export class OllamaConnector extends LLMConnector {
|
|
|
138
138
|
|
|
139
139
|
let toolsData: ToolData[] = [];
|
|
140
140
|
let fullContent = '';
|
|
141
|
+
let finishReason = 'stop';
|
|
141
142
|
|
|
142
143
|
(async () => {
|
|
143
144
|
for await (const chunk of stream) {
|
|
145
|
+
emitter.emit(TLLMEvent.Data, chunk);
|
|
146
|
+
|
|
144
147
|
// Emit content deltas
|
|
145
148
|
if (chunk.message?.content) {
|
|
146
149
|
const content = chunk.message.content;
|
|
147
150
|
fullContent += content;
|
|
148
|
-
emitter.emit(
|
|
151
|
+
emitter.emit(TLLMEvent.Content, content);
|
|
149
152
|
}
|
|
150
153
|
|
|
151
154
|
// Handle tool calls accumulation
|
|
@@ -181,6 +184,11 @@ export class OllamaConnector extends LLMConnector {
|
|
|
181
184
|
};
|
|
182
185
|
usage_data.push(usage);
|
|
183
186
|
}
|
|
187
|
+
|
|
188
|
+
// Capture finish reason from Ollama's done_reason
|
|
189
|
+
if (chunk.done_reason) {
|
|
190
|
+
finishReason = chunk.done_reason;
|
|
191
|
+
}
|
|
184
192
|
}
|
|
185
193
|
|
|
186
194
|
// Emit tool info if tools were requested
|
|
@@ -189,18 +197,25 @@ export class OllamaConnector extends LLMConnector {
|
|
|
189
197
|
}
|
|
190
198
|
|
|
191
199
|
// Report usage
|
|
200
|
+
const reportedUsage: any[] = [];
|
|
192
201
|
usage_data.forEach((usage) => {
|
|
193
|
-
this.reportUsage(usage, {
|
|
202
|
+
const reported = this.reportUsage(usage, {
|
|
194
203
|
modelEntryName: context.modelEntryName,
|
|
195
204
|
keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
|
|
196
205
|
agentId: context.agentId,
|
|
197
206
|
teamId: context.teamId,
|
|
198
207
|
});
|
|
208
|
+
reportedUsage.push(reported);
|
|
199
209
|
});
|
|
200
210
|
|
|
211
|
+
// Emit interrupted event if finishReason is not 'stop'
|
|
212
|
+
if (finishReason !== 'stop') {
|
|
213
|
+
emitter.emit(TLLMEvent.Interrupted, finishReason);
|
|
214
|
+
}
|
|
215
|
+
|
|
201
216
|
// Final end event
|
|
202
217
|
setTimeout(() => {
|
|
203
|
-
emitter.emit(
|
|
218
|
+
emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
|
|
204
219
|
}, 100);
|
|
205
220
|
})();
|
|
206
221
|
|
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
TLLMChatResponse,
|
|
15
15
|
ILLMRequestContext,
|
|
16
16
|
TLLMPreparedParams,
|
|
17
|
+
TLLMEvent,
|
|
17
18
|
} from '@sre/types/LLM.types';
|
|
18
19
|
import { LLMHelper } from '@sre/LLMManager/LLM.helper';
|
|
19
20
|
|
|
@@ -97,6 +98,8 @@ export class PerplexityConnector extends LLMConnector {
|
|
|
97
98
|
//fallback to chatRequest
|
|
98
99
|
const emitter = new EventEmitter();
|
|
99
100
|
|
|
101
|
+
// TODO: need to implement proper streaming for Perplexity
|
|
102
|
+
|
|
100
103
|
setTimeout(() => {
|
|
101
104
|
try {
|
|
102
105
|
logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
|
|
@@ -105,17 +108,23 @@ export class PerplexityConnector extends LLMConnector {
|
|
|
105
108
|
const finishReason = respose.finishReason;
|
|
106
109
|
const usage = respose.usage;
|
|
107
110
|
|
|
108
|
-
emitter.emit(
|
|
109
|
-
emitter.emit(
|
|
110
|
-
|
|
111
|
+
emitter.emit(TLLMEvent.Data, respose);
|
|
112
|
+
emitter.emit(TLLMEvent.Content, respose.content);
|
|
113
|
+
|
|
114
|
+
// Only emit Interrupted if finishReason is not 'stop'
|
|
115
|
+
if (finishReason !== 'stop') {
|
|
116
|
+
emitter.emit(TLLMEvent.Interrupted, finishReason);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
emitter.emit(TLLMEvent.End, [], [usage], finishReason);
|
|
111
120
|
})
|
|
112
121
|
.catch((error) => {
|
|
113
|
-
emitter.emit(
|
|
122
|
+
emitter.emit(TLLMEvent.Error, error.message || error.toString());
|
|
114
123
|
});
|
|
115
124
|
//emitter.emit('finishReason', respose.finishReason);
|
|
116
125
|
} catch (error) {
|
|
117
126
|
logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
|
|
118
|
-
emitter.emit(
|
|
127
|
+
emitter.emit(TLLMEvent.Error, error.message || error.toString());
|
|
119
128
|
}
|
|
120
129
|
}, 100);
|
|
121
130
|
|
|
@@ -138,12 +138,14 @@ export class VertexAIConnector extends LLMConnector {
|
|
|
138
138
|
for await (const chunk of streamResult.stream) {
|
|
139
139
|
const chunkText = chunk.candidates?.[0]?.content?.parts?.[0]?.text || '';
|
|
140
140
|
if (chunkText) {
|
|
141
|
-
emitter.emit(
|
|
141
|
+
emitter.emit(TLLMEvent.Content, chunkText);
|
|
142
142
|
}
|
|
143
143
|
}
|
|
144
144
|
|
|
145
145
|
const aggregatedResponse = await streamResult.response;
|
|
146
146
|
|
|
147
|
+
emitter.emit(TLLMEvent.Data, aggregatedResponse);
|
|
148
|
+
|
|
147
149
|
// Check for function calls in the final response (like Anthropic does)
|
|
148
150
|
const functionCalls = aggregatedResponse.candidates?.[0]?.content?.parts?.filter((part) => part.functionCall);
|
|
149
151
|
if (functionCalls && functionCalls.length > 0) {
|
|
@@ -176,15 +178,15 @@ export class VertexAIConnector extends LLMConnector {
|
|
|
176
178
|
const finishReason = (aggregatedResponse.candidates?.[0]?.finishReason || 'stop').toLowerCase();
|
|
177
179
|
|
|
178
180
|
if (finishReason !== 'stop') {
|
|
179
|
-
emitter.emit(
|
|
181
|
+
emitter.emit(TLLMEvent.Interrupted, finishReason);
|
|
180
182
|
}
|
|
181
183
|
|
|
182
184
|
setTimeout(() => {
|
|
183
|
-
emitter.emit(
|
|
185
|
+
emitter.emit(TLLMEvent.End, toolsData, usageData, finishReason);
|
|
184
186
|
}, 100);
|
|
185
187
|
} catch (error) {
|
|
186
188
|
logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
|
|
187
|
-
emitter.emit(
|
|
189
|
+
emitter.emit(TLLMEvent.Error, error);
|
|
188
190
|
}
|
|
189
191
|
}, 100);
|
|
190
192
|
|
|
@@ -73,7 +73,7 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
|
|
|
73
73
|
// Step 3: Emit final events
|
|
74
74
|
this.emitFinalEvents(emitter, finalToolsData, reportedUsage, finishReason);
|
|
75
75
|
} catch (error) {
|
|
76
|
-
emitter.emit(
|
|
76
|
+
emitter.emit(TLLMEvent.Error, error);
|
|
77
77
|
}
|
|
78
78
|
})();
|
|
79
79
|
|
|
@@ -267,11 +267,11 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
|
|
|
267
267
|
}
|
|
268
268
|
|
|
269
269
|
// Emit data event for delta
|
|
270
|
-
emitter.emit(
|
|
270
|
+
emitter.emit(TLLMEvent.Data, delta);
|
|
271
271
|
|
|
272
272
|
// Handle content deltas
|
|
273
273
|
if (!delta?.tool_calls && delta?.content) {
|
|
274
|
-
emitter.emit(
|
|
274
|
+
emitter.emit(TLLMEvent.Content, delta?.content, delta?.role);
|
|
275
275
|
}
|
|
276
276
|
|
|
277
277
|
// Handle tool calls
|
|
@@ -350,12 +350,12 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
|
|
|
350
350
|
|
|
351
351
|
// Emit interrupted event if finishReason is not 'stop'
|
|
352
352
|
if (finishReason !== 'stop') {
|
|
353
|
-
emitter.emit(
|
|
353
|
+
emitter.emit(TLLMEvent.Interrupted, finishReason);
|
|
354
354
|
}
|
|
355
355
|
|
|
356
356
|
// Emit end event with setImmediate to ensure proper event ordering
|
|
357
357
|
setImmediate(() => {
|
|
358
|
-
emitter.emit(
|
|
358
|
+
emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
|
|
359
359
|
});
|
|
360
360
|
}
|
|
361
361
|
|
|
@@ -119,7 +119,7 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
|
|
|
119
119
|
// Step 3: Emit final events
|
|
120
120
|
this.emitFinalEvents(emitter, finalToolsData, reportedUsage, finishReason);
|
|
121
121
|
} catch (error) {
|
|
122
|
-
emitter.emit(
|
|
122
|
+
emitter.emit(TLLMEvent.Error, error);
|
|
123
123
|
}
|
|
124
124
|
})();
|
|
125
125
|
|
|
@@ -362,8 +362,10 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
|
|
|
362
362
|
role: 'assistant',
|
|
363
363
|
content: part.delta,
|
|
364
364
|
};
|
|
365
|
-
|
|
366
|
-
|
|
365
|
+
|
|
366
|
+
// TODO: we have inconsistency for data event with chat completions API, we need to check and fix it
|
|
367
|
+
emitter.emit(TLLMEvent.Data, deltaMsg);
|
|
368
|
+
emitter.emit(TLLMEvent.Content, part.delta, 'assistant');
|
|
367
369
|
}
|
|
368
370
|
} catch (error) {
|
|
369
371
|
console.warn('Error handling output text delta:', error);
|
|
@@ -412,6 +414,7 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
|
|
|
412
414
|
}
|
|
413
415
|
|
|
414
416
|
if (addingNew) {
|
|
417
|
+
// TODO: Check whether this event is being used.
|
|
415
418
|
emitter.emit('tool_call_started', {
|
|
416
419
|
id: callId,
|
|
417
420
|
name: functionName || '',
|
|
@@ -458,6 +461,7 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
|
|
|
458
461
|
}
|
|
459
462
|
|
|
460
463
|
const entry = existingIndex === -1 ? updated[finalIndex] : updated[finalIndex];
|
|
464
|
+
// TODO: Check whether this event is being used.
|
|
461
465
|
emitter.emit('tool_call_progress', {
|
|
462
466
|
id: entry.callId || itemId,
|
|
463
467
|
name: entry.name,
|
|
@@ -489,6 +493,7 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
|
|
|
489
493
|
const updated = toolsData.map((t, idx) => (idx === toolIndex ? { ...t, arguments: finalArguments } : t));
|
|
490
494
|
|
|
491
495
|
const updatedEntry = updated[toolIndex];
|
|
496
|
+
// TODO: Check whether this event is being used.
|
|
492
497
|
emitter.emit('tool_call_completed', {
|
|
493
498
|
id: updatedEntry.callId || itemId,
|
|
494
499
|
name: updatedEntry.name,
|
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
ILLMRequestFuncParams,
|
|
15
15
|
TLLMChatResponse,
|
|
16
16
|
ILLMRequestContext,
|
|
17
|
+
TLLMEvent,
|
|
17
18
|
} from '@sre/types/LLM.types';
|
|
18
19
|
import { LLMHelper } from '@sre/LLMManager/LLM.helper';
|
|
19
20
|
|
|
@@ -202,10 +203,10 @@ export class xAIConnector extends LLMConnector {
|
|
|
202
203
|
}
|
|
203
204
|
|
|
204
205
|
if (delta) {
|
|
205
|
-
emitter.emit(
|
|
206
|
+
emitter.emit(TLLMEvent.Data, delta);
|
|
206
207
|
|
|
207
208
|
if (delta.content) {
|
|
208
|
-
emitter.emit(
|
|
209
|
+
emitter.emit(TLLMEvent.Content, delta.content, delta.role);
|
|
209
210
|
}
|
|
210
211
|
|
|
211
212
|
if (delta.tool_calls) {
|
|
@@ -238,11 +239,11 @@ export class xAIConnector extends LLMConnector {
|
|
|
238
239
|
if (citations && citations.length > 0) {
|
|
239
240
|
const citationsText = '\n\n**Sources:**\n' + citations.map((url, index) => `${index + 1}. ${url}`).join('\n');
|
|
240
241
|
|
|
241
|
-
emitter.emit(
|
|
242
|
+
emitter.emit(TLLMEvent.Content, citationsText, 'assistant');
|
|
242
243
|
}
|
|
243
244
|
|
|
244
245
|
if (toolsData.length > 0) {
|
|
245
|
-
emitter.emit(
|
|
246
|
+
emitter.emit(TLLMEvent.ToolInfo, toolsData);
|
|
246
247
|
}
|
|
247
248
|
|
|
248
249
|
// Report usage if available
|
|
@@ -257,20 +258,20 @@ export class xAIConnector extends LLMConnector {
|
|
|
257
258
|
}
|
|
258
259
|
|
|
259
260
|
if (finishReason !== 'stop') {
|
|
260
|
-
emitter.emit(
|
|
261
|
+
emitter.emit(TLLMEvent.Interrupted, finishReason);
|
|
261
262
|
}
|
|
262
263
|
|
|
263
264
|
setTimeout(() => {
|
|
264
|
-
emitter.emit(
|
|
265
|
+
emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
|
|
265
266
|
}, 100);
|
|
266
267
|
});
|
|
267
268
|
|
|
268
269
|
response.data.on('error', (error) => {
|
|
269
|
-
emitter.emit(
|
|
270
|
+
emitter.emit(TLLMEvent.Error, error);
|
|
270
271
|
});
|
|
271
272
|
} catch (error) {
|
|
272
273
|
logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
|
|
273
|
-
emitter.emit(
|
|
274
|
+
emitter.emit(TLLMEvent.Error, error);
|
|
274
275
|
}
|
|
275
276
|
|
|
276
277
|
return emitter;
|