@smythos/sre 1.7.41 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/CHANGELOG +136 -64
  2. package/dist/index.js +65 -50
  3. package/dist/index.js.map +1 -1
  4. package/dist/types/Components/Async.class.d.ts +11 -5
  5. package/dist/types/index.d.ts +2 -0
  6. package/dist/types/subsystems/AgentManager/AgentData.service/connectors/SQLiteAgentDataConnector.class.d.ts +45 -0
  7. package/dist/types/subsystems/LLMManager/LLM.helper.d.ts +32 -1
  8. package/dist/types/subsystems/LLMManager/LLM.inference.d.ts +25 -2
  9. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.d.ts +22 -2
  10. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.d.ts +2 -2
  11. package/dist/types/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.d.ts +27 -2
  12. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Groq.class.d.ts +22 -2
  13. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Ollama.class.d.ts +22 -2
  14. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.d.ts +3 -3
  15. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/OpenAIConnector.class.d.ts +23 -3
  16. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.d.ts +2 -2
  17. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterface.d.ts +2 -2
  18. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.d.ts +2 -2
  19. package/dist/types/subsystems/LLMManager/LLM.service/connectors/xAI.class.d.ts +3 -3
  20. package/dist/types/subsystems/MemoryManager/LLMContext.d.ts +10 -3
  21. package/dist/types/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.d.ts +24 -0
  22. package/dist/types/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.redaction.helper.d.ts +49 -0
  23. package/dist/types/types/LLM.types.d.ts +30 -1
  24. package/package.json +4 -3
  25. package/src/Components/APICall/OAuth.helper.ts +16 -1
  26. package/src/Components/APIEndpoint.class.ts +11 -4
  27. package/src/Components/Async.class.ts +38 -5
  28. package/src/Components/GenAILLM.class.ts +13 -7
  29. package/src/Components/LLMAssistant.class.ts +3 -1
  30. package/src/Components/LogicAND.class.ts +13 -0
  31. package/src/Components/LogicAtLeast.class.ts +18 -0
  32. package/src/Components/LogicAtMost.class.ts +19 -0
  33. package/src/Components/LogicOR.class.ts +12 -2
  34. package/src/Components/LogicXOR.class.ts +11 -0
  35. package/src/constants.ts +1 -1
  36. package/src/helpers/Conversation.helper.ts +10 -8
  37. package/src/index.ts +2 -0
  38. package/src/index.ts.bak +2 -0
  39. package/src/subsystems/AgentManager/AgentData.service/connectors/SQLiteAgentDataConnector.class.ts +190 -0
  40. package/src/subsystems/AgentManager/AgentData.service/index.ts +2 -0
  41. package/src/subsystems/LLMManager/LLM.helper.ts +117 -1
  42. package/src/subsystems/LLMManager/LLM.inference.ts +136 -67
  43. package/src/subsystems/LLMManager/LLM.service/LLMConnector.ts +13 -6
  44. package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts +157 -33
  45. package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts +9 -8
  46. package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +121 -83
  47. package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +125 -62
  48. package/src/subsystems/LLMManager/LLM.service/connectors/Ollama.class.ts +168 -76
  49. package/src/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.ts +18 -8
  50. package/src/subsystems/LLMManager/LLM.service/connectors/VertexAI.class.ts +8 -4
  51. package/src/subsystems/LLMManager/LLM.service/connectors/openai/OpenAIConnector.class.ts +50 -8
  52. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +30 -16
  53. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterface.ts +2 -2
  54. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +29 -15
  55. package/src/subsystems/LLMManager/LLM.service/connectors/xAI.class.ts +10 -8
  56. package/src/subsystems/MemoryManager/LLMContext.ts +27 -8
  57. package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.ts +467 -120
  58. package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.redaction.helper.ts +203 -0
  59. package/src/types/LLM.types.ts +31 -1
  60. package/src/types/node-sqlite.d.ts +45 -0
@@ -27,6 +27,7 @@ import {
27
27
  ILLMRequestContext,
28
28
  TLLMPreparedParams,
29
29
  LLMInterface,
30
+ TLLMFinishReason,
30
31
  } from '@sre/types/LLM.types';
31
32
  import { LLMHelper } from '@sre/LLMManager/LLM.helper';
32
33
 
@@ -39,18 +40,6 @@ import { hookAsync } from '@sre/Core/HookService';
39
40
 
40
41
  const logger = Logger('GoogleAIConnector');
41
42
 
42
- const MODELS_SUPPORT_SYSTEM_INSTRUCTION = [
43
- 'gemini-1.5-pro-exp-0801',
44
- 'gemini-1.5-pro-latest',
45
- 'gemini-1.5-pro-latest',
46
- 'gemini-1.5-pro',
47
- 'gemini-1.5-pro-001',
48
- 'gemini-1.5-flash-latest',
49
- 'gemini-1.5-flash-001',
50
- 'gemini-1.5-flash',
51
- ];
52
- const MODELS_SUPPORT_JSON_RESPONSE = MODELS_SUPPORT_SYSTEM_INSTRUCTION;
53
-
54
43
  // Supported file MIME types for Google AI's Gemini models
55
44
  const VALID_MIME_TYPES = [
56
45
  ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.image,
@@ -84,7 +73,7 @@ export class GoogleAIConnector extends LLMConnector {
84
73
  }
85
74
 
86
75
  @hookAsync('LLMConnector.request')
87
- protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
76
+ protected async request({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
88
77
  try {
89
78
  logger.debug(`request ${this.name}`, acRequest.candidate);
90
79
 
@@ -94,6 +83,7 @@ export class GoogleAIConnector extends LLMConnector {
94
83
  generationConfig: body.generationConfig,
95
84
  systemInstruction: body.systemInstruction,
96
85
  promptConfig,
86
+ abortSignal,
97
87
  });
98
88
 
99
89
  const genAI = await this.getClient(context);
@@ -108,7 +98,7 @@ export class GoogleAIConnector extends LLMConnector {
108
98
 
109
99
  const response = await genAI.models.generateContent(requestPayload as any);
110
100
  const content = response.text ?? '';
111
- const finishReason = (response.candidates?.[0]?.finishReason || 'stop').toLowerCase();
101
+ const finishReason = LLMHelper.normalizeFinishReason(response.candidates?.[0]?.finishReason || TLLMFinishReason.Stop);
112
102
  const usage = response.usageMetadata as UsageMetadataWithThoughtsToken | undefined;
113
103
 
114
104
  if (usage) {
@@ -166,8 +156,28 @@ export class GoogleAIConnector extends LLMConnector {
166
156
  }
167
157
  }
168
158
 
159
+ /**
160
+ * Stream request implementation.
161
+ *
162
+ * **Error Handling Pattern:**
163
+ * - Always returns emitters, never throws errors - ensures consistent error handling
164
+ * - Uses setImmediate for event emission - prevents race conditions where events fire before listeners attach
165
+ * - Emits End after terminal events (Error, Abort) - ensures cleanup code always runs
166
+ *
167
+ * **Why setImmediate?**
168
+ * Since streamRequest is async, callers must await to get the emitter, creating a timing gap.
169
+ * setImmediate defers event emission to the next event loop tick, ensuring events fire AFTER
170
+ * listeners are attached. This prevents race conditions where synchronous event emission
171
+ * would occur before listeners can be registered.
172
+ *
173
+ * @param acRequest - Access request for authorization
174
+ * @param body - Request body parameters
175
+ * @param context - LLM request context
176
+ * @param abortSignal - AbortSignal for cancellation
177
+ * @returns EventEmitter that emits TLLMEvent events (Data, Content, Error, Abort, End, etc.)
178
+ */
169
179
  @hookAsync('LLMConnector.streamRequest')
170
- protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
180
+ protected async streamRequest({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<EventEmitter> {
171
181
  logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
172
182
  const emitter = new EventEmitter();
173
183
 
@@ -177,6 +187,7 @@ export class GoogleAIConnector extends LLMConnector {
177
187
  generationConfig: body.generationConfig,
178
188
  systemInstruction: body.systemInstruction,
179
189
  promptConfig,
190
+ abortSignal,
180
191
  });
181
192
 
182
193
  const genAI = await this.getClient(context);
@@ -251,7 +262,7 @@ export class GoogleAIConnector extends LLMConnector {
251
262
  emitter.emit(TLLMEvent.ToolInfo, toolsData);
252
263
  }
253
264
 
254
- const finishReason = 'stop'; // GoogleAI doesn't provide finishReason in streaming
265
+ const finishReason: TLLMFinishReason = TLLMFinishReason.Stop; // GoogleAI doesn't provide finishReason in streaming
255
266
  const reportedUsage: any[] = [];
256
267
 
257
268
  if (usage) {
@@ -267,20 +278,51 @@ export class GoogleAIConnector extends LLMConnector {
267
278
  // Note: GoogleAI stream doesn't provide explicit finish reasons
268
279
  // If we had a non-stop finish reason, we would emit Interrupted here
269
280
 
270
- setTimeout(() => {
271
- emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
272
- }, 100);
273
- } catch (error) {
274
- logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
281
+ setTimeout(() => {
282
+ emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
283
+ }, 100);
284
+ } catch (error) {
285
+ const isAbort = (error as any)?.name === 'AbortError' || abortSignal?.aborted;
286
+ if (isAbort) {
287
+ logger.debug(`streamRequest ${this.name} aborted`, error, acRequest.candidate);
288
+ // Always use DOMException with name 'AbortError' per Web API standards for consistency
289
+ const abortError = new DOMException('Request aborted', 'AbortError');
290
+ setImmediate(() => {
291
+ emitter.emit(TLLMEvent.Abort, abortError);
292
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
293
+ });
294
+ } else {
295
+ logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
296
+ setImmediate(() => {
275
297
  emitter.emit(TLLMEvent.Error, error);
276
- }
298
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
299
+ });
300
+ }
301
+ }
277
302
  })();
278
303
  });
279
304
 
280
305
  return emitter;
281
306
  } catch (error: any) {
307
+ const isAbort = error?.name === 'AbortError' || abortSignal?.aborted;
308
+
309
+ if (isAbort) {
310
+ // Always use DOMException with name 'AbortError' per Web API standards for consistency
311
+ const abortError = new DOMException('Request aborted', 'AbortError');
312
+ logger.debug(`streamRequest ${this.name} aborted`, abortError, acRequest.candidate);
313
+ setImmediate(() => {
314
+ emitter.emit(TLLMEvent.Abort, abortError);
315
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
316
+ });
317
+ return emitter;
318
+ }
319
+
282
320
  logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
283
- throw error;
321
+ setImmediate(() => {
322
+ emitter.emit(TLLMEvent.Error, error);
323
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
324
+ });
325
+ return emitter;
284
326
  }
285
327
  }
286
328
  // #region Image Generation, will be moved to a different subsystem/service
@@ -452,6 +494,18 @@ export class GoogleAIConnector extends LLMConnector {
452
494
  }
453
495
  }
454
496
 
497
+ // Extract system messages before preparing messages
498
+ // All modern Gemini models (2.0+, 2.5, 3.0) support native system instruction
499
+ let systemInstruction = '';
500
+ const originalMessages = params?.messages || [];
501
+
502
+ if (LLMHelper.hasSystemMessage(originalMessages)) {
503
+ const { systemMessage, otherMessages } = LLMHelper.separateSystemMessages(originalMessages);
504
+ systemInstruction = this.extractMessageContent(systemMessage as TLLMMessageBlock);
505
+ // Pass only non-system messages to prepareMessages
506
+ params = { ...params, messages: otherMessages };
507
+ }
508
+
455
509
  const messages = await this.prepareMessages(params);
456
510
 
457
511
  const body: TGoogleAIRequestBody = {
@@ -461,14 +515,11 @@ export class GoogleAIConnector extends LLMConnector {
461
515
 
462
516
  const responseFormat = params?.responseFormat || '';
463
517
  let responseMimeType = '';
464
- let systemInstruction = '';
465
518
 
466
519
  if (responseFormat === 'json') {
467
520
  systemInstruction += JSON_RESPONSE_INSTRUCTION;
468
521
 
469
- if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
470
- responseMimeType = 'application/json';
471
- }
522
+ responseMimeType = 'application/json';
472
523
  }
473
524
 
474
525
  const config: Record<string, any> = {};
@@ -528,10 +579,12 @@ export class GoogleAIConnector extends LLMConnector {
528
579
  generationConfig,
529
580
  systemInstruction,
530
581
  promptConfig,
582
+ abortSignal,
531
583
  }: {
532
584
  generationConfig?: TGoogleAIRequestBody['generationConfig'];
533
585
  systemInstruction?: TGoogleAIRequestBody['systemInstruction'];
534
586
  promptConfig?: Record<string, any>;
587
+ abortSignal?: AbortSignal;
535
588
  }): Record<string, any> | undefined {
536
589
  const config: Record<string, any> = {};
537
590
 
@@ -557,6 +610,10 @@ export class GoogleAIConnector extends LLMConnector {
557
610
  config.systemInstruction = systemInstruction;
558
611
  }
559
612
 
613
+ if (abortSignal) {
614
+ config.abortSignal = abortSignal;
615
+ }
616
+
560
617
  return Object.keys(config).length > 0 ? config : undefined;
561
618
  }
562
619
 
@@ -888,12 +945,16 @@ export class GoogleAIConnector extends LLMConnector {
888
945
  let functionCallCount = 0; // Track function call parts for thoughtSignature handling
889
946
 
890
947
  // Map roles to valid Google AI roles
948
+ // Note: System role is preserved so it can be extracted as systemInstruction later
891
949
  switch (_message.role) {
892
950
  case TLLMMessageRole.Assistant:
893
- case TLLMMessageRole.System:
894
951
  case TLLMMessageRole.Model:
895
952
  _message.role = TLLMMessageRole.Model;
896
953
  break;
954
+ case TLLMMessageRole.System:
955
+ // Keep system role as-is for later extraction to systemInstruction
956
+ _message.role = TLLMMessageRole.System;
957
+ break;
897
958
  case TLLMMessageRole.Function:
898
959
  case TLLMMessageRole.Tool:
899
960
  _message.role = TLLMMessageRole.Function;
@@ -1030,6 +1091,31 @@ export class GoogleAIConnector extends LLMConnector {
1030
1091
  });
1031
1092
  }
1032
1093
 
1094
+ /**
1095
+ * Extracts text content from a message block, handling multiple formats (.parts, .content as string/array)
1096
+ * This ensures compatibility with messages that have been normalized by getConsistentMessages or come in various formats
1097
+ */
1098
+ private extractMessageContent(message: TLLMMessageBlock | any): string {
1099
+ if (!message) return '';
1100
+
1101
+ // Handle .parts array format (Google AI native format)
1102
+ if (message.parts && Array.isArray(message.parts)) {
1103
+ return message.parts.map((part) => part?.text || '').join(' ');
1104
+ }
1105
+
1106
+ // Handle .content as string
1107
+ if (typeof message.content === 'string') {
1108
+ return message.content;
1109
+ }
1110
+
1111
+ // Handle .content as array
1112
+ if (Array.isArray(message.content)) {
1113
+ return message.content.map((part) => (typeof part === 'string' ? part : part?.text || '')).join(' ');
1114
+ }
1115
+
1116
+ return '';
1117
+ }
1118
+
1033
1119
  private async prepareMessages(params: TLLMPreparedParams): Promise<string | TLLMMessageBlock[] | TGoogleAIToolPrompt> {
1034
1120
  let messages: string | TLLMMessageBlock[] | TGoogleAIToolPrompt = (params?.messages as any) || '';
1035
1121
 
@@ -1050,7 +1136,6 @@ export class GoogleAIConnector extends LLMConnector {
1050
1136
  const model = params.model;
1051
1137
 
1052
1138
  let messages: string | TLLMMessageBlock[] = params?.messages || '';
1053
- let systemInstruction = '';
1054
1139
  const files: BinaryInput[] = params?.files || [];
1055
1140
 
1056
1141
  // #region Upload files
@@ -1101,12 +1186,7 @@ export class GoogleAIConnector extends LLMConnector {
1101
1186
  const fileData = this.getFileData(uploadedFiles);
1102
1187
 
1103
1188
  const userMessage: TLLMMessageBlock = Array.isArray(messages) ? messages.pop() : { role: TLLMMessageRole.User, content: '' };
1104
- let prompt = userMessage?.content || '';
1105
-
1106
- // if the the model does not support system instruction, we will add it to the prompt
1107
- if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
1108
- prompt = `${prompt}\n${systemInstruction}`;
1109
- }
1189
+ let prompt = this.extractMessageContent(userMessage);
1110
1190
  //#endregion Separate system message and add JSON response instruction if needed
1111
1191
 
1112
1192
  // Adjust input structure handling for multiple image files to accommodate variations.
@@ -1116,30 +1196,12 @@ export class GoogleAIConnector extends LLMConnector {
1116
1196
  }
1117
1197
 
1118
1198
  private async prepareMessagesWithTools(params: TLLMPreparedParams): Promise<TGoogleAIToolPrompt> {
1119
- let formattedMessages: TLLMMessageBlock[];
1120
- let systemInstruction = '';
1121
-
1122
- let messages = params?.messages || [];
1123
-
1124
- const hasSystemMessage = LLMHelper.hasSystemMessage(messages);
1125
-
1126
- if (hasSystemMessage) {
1127
- const separateMessages = LLMHelper.separateSystemMessages(messages);
1128
- const systemMessageContent = (separateMessages.systemMessage as TLLMMessageBlock)?.content;
1129
- systemInstruction = typeof systemMessageContent === 'string' ? systemMessageContent : '';
1130
- formattedMessages = separateMessages.otherMessages;
1131
- } else {
1132
- formattedMessages = messages;
1133
- }
1199
+ const messages = params?.messages || [];
1134
1200
 
1135
1201
  const toolsPrompt: TGoogleAIToolPrompt = {
1136
- contents: formattedMessages as any,
1202
+ contents: messages as any,
1137
1203
  };
1138
1204
 
1139
- if (systemInstruction) {
1140
- toolsPrompt.systemInstruction = systemInstruction;
1141
- }
1142
-
1143
1205
  if (params?.toolsConfig?.tools) toolsPrompt.tools = params?.toolsConfig?.tools as any;
1144
1206
  if (params?.toolsConfig?.tool_choice) {
1145
1207
  // Map tool choice to valid Google AI function calling modes
@@ -1172,37 +1234,13 @@ export class GoogleAIConnector extends LLMConnector {
1172
1234
  }
1173
1235
 
1174
1236
  private async prepareMessagesWithTextQuery(params: TLLMPreparedParams): Promise<string> {
1175
- const model = params.model;
1176
- let systemInstruction = '';
1237
+ const messages = (params?.messages as TLLMMessageBlock[]) || [];
1177
1238
  let prompt = '';
1178
1239
 
1179
- const { systemMessage, otherMessages } = LLMHelper.separateSystemMessages(params?.messages as TLLMMessageBlock[]);
1180
-
1181
- if ('content' in systemMessage) {
1182
- systemInstruction = systemMessage.content as string;
1183
- }
1184
-
1185
- const responseFormat = params?.responseFormat || '';
1186
- let responseMimeType = '';
1187
-
1188
- if (responseFormat === 'json') {
1189
- systemInstruction += JSON_RESPONSE_INSTRUCTION;
1190
-
1191
- if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
1192
- responseMimeType = 'application/json';
1193
- }
1194
- }
1195
-
1196
- if (otherMessages?.length > 0) {
1197
- // Concatenate messages with prompt and remove messages from params as it's not supported
1198
- prompt += otherMessages.map((message) => message?.parts?.[0]?.text || '').join('\n');
1199
- }
1200
-
1201
- // if the the model does not support system instruction, we will add it to the prompt
1202
- if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
1203
- prompt = `${prompt}\n${systemInstruction}`;
1240
+ if (messages?.length > 0) {
1241
+ // Concatenate messages using the helper method
1242
+ prompt = messages.map((message) => this.extractMessageContent(message)).join('\n');
1204
1243
  }
1205
- //#endregion Separate system message and add JSON response instruction if needed
1206
1244
 
1207
1245
  return prompt;
1208
1246
  }
@@ -14,6 +14,7 @@ import {
14
14
  ILLMRequestContext,
15
15
  TLLMPreparedParams,
16
16
  TLLMToolResultMessageBlock,
17
+ TLLMFinishReason,
17
18
  } from '@sre/types/LLM.types';
18
19
  import { LLMHelper } from '@sre/LLMManager/LLM.helper';
19
20
 
@@ -52,13 +53,13 @@ export class GroqConnector extends LLMConnector {
52
53
  }
53
54
 
54
55
  @hookAsync('LLMConnector.request')
55
- protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
56
+ protected async request({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
56
57
  try {
57
58
  logger.debug(`request ${this.name}`, acRequest.candidate);
58
59
  const groq = await this.getClient(context);
59
- const result = await groq.chat.completions.create(body);
60
+ const result = await groq.chat.completions.create(body, { signal: abortSignal });
60
61
  const message = result?.choices?.[0]?.message;
61
- const finishReason = result?.choices?.[0]?.finish_reason;
62
+ const finishReason = LLMHelper.normalizeFinishReason(result?.choices?.[0]?.finish_reason);
62
63
  const toolCalls = message?.tool_calls;
63
64
  const usage = result.usage;
64
65
  this.reportUsage(usage, {
@@ -97,85 +98,147 @@ export class GroqConnector extends LLMConnector {
97
98
  }
98
99
  }
99
100
 
101
+ /**
102
+ * Stream request implementation.
103
+ *
104
+ * **Error Handling Pattern:**
105
+ * - Always returns emitters, never throws errors - ensures consistent error handling
106
+ * - Uses setImmediate for event emission - prevents race conditions where events fire before listeners attach
107
+ * - Emits End after terminal events (Error, Abort) - ensures cleanup code always runs
108
+ *
109
+ * **Why setImmediate?**
110
+ * Since streamRequest is async, callers must await to get the emitter, creating a timing gap.
111
+ * setImmediate defers event emission to the next event loop tick, ensuring events fire AFTER
112
+ * listeners are attached. This prevents race conditions where synchronous event emission
113
+ * would occur before listeners can be registered.
114
+ *
115
+ * @param acRequest - Access request for authorization
116
+ * @param body - Request body parameters
117
+ * @param context - LLM request context
118
+ * @param abortSignal - AbortSignal for cancellation
119
+ * @returns EventEmitter that emits TLLMEvent events (Data, Content, Error, Abort, End, etc.)
120
+ */
100
121
  @hookAsync('LLMConnector.streamRequest')
101
- protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
122
+ protected async streamRequest({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<EventEmitter> {
123
+ const emitter = new EventEmitter();
124
+
102
125
  try {
103
126
  logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
104
- const emitter = new EventEmitter();
105
127
  const usage_data = [];
106
128
 
107
129
  const groq = await this.getClient(context);
108
- const stream = await groq.chat.completions.create({ ...body, stream: true, stream_options: { include_usage: true } });
130
+ const stream = await groq.chat.completions.create(
131
+ { ...body, stream: true, stream_options: { include_usage: true } },
132
+ { signal: abortSignal }
133
+ );
109
134
 
110
135
  let toolsData: ToolData[] = [];
111
- let finishReason = 'stop';
136
+ let finishReason: TLLMFinishReason = TLLMFinishReason.Stop;
112
137
 
113
- (async () => {
114
- for await (const chunk of stream as any) {
115
- const delta = chunk.choices[0]?.delta;
116
- const usage = chunk['x_groq']?.usage || chunk['usage'];
138
+ setImmediate(() => {
139
+ (async () => {
140
+ try {
141
+ for await (const chunk of stream as any) {
142
+ const delta = chunk.choices[0]?.delta;
143
+ const usage = chunk['x_groq']?.usage || chunk['usage'];
117
144
 
118
- if (usage) {
119
- usage_data.push(usage);
120
- }
121
- emitter.emit(TLLMEvent.Data, delta);
145
+ if (usage) {
146
+ usage_data.push(usage);
147
+ }
148
+ emitter.emit(TLLMEvent.Data, delta);
122
149
 
123
- if (delta?.content) {
124
- emitter.emit(TLLMEvent.Content, delta.content);
125
- }
150
+ if (delta?.content) {
151
+ emitter.emit(TLLMEvent.Content, delta.content);
152
+ }
153
+
154
+ if (delta?.tool_calls) {
155
+ delta.tool_calls.forEach((toolCall, index) => {
156
+ if (!toolsData[index]) {
157
+ toolsData[index] = {
158
+ index,
159
+ id: toolCall.id,
160
+ type: toolCall.type,
161
+ name: toolCall.function?.name,
162
+ arguments: toolCall.function?.arguments,
163
+ role: 'assistant',
164
+ };
165
+ } else {
166
+ toolsData[index].arguments += toolCall.function?.arguments || '';
167
+ }
168
+ });
169
+ }
126
170
 
127
- if (delta?.tool_calls) {
128
- delta.tool_calls.forEach((toolCall, index) => {
129
- if (!toolsData[index]) {
130
- toolsData[index] = {
131
- index,
132
- id: toolCall.id,
133
- type: toolCall.type,
134
- name: toolCall.function?.name,
135
- arguments: toolCall.function?.arguments,
136
- role: 'assistant',
137
- };
138
- } else {
139
- toolsData[index].arguments += toolCall.function?.arguments || '';
171
+ // Capture finish reason
172
+ if (chunk.choices[0]?.finish_reason) {
173
+ finishReason = LLMHelper.normalizeFinishReason(chunk.choices[0].finish_reason);
140
174
  }
175
+ }
176
+
177
+ if (toolsData.length > 0) {
178
+ emitter.emit(TLLMEvent.ToolInfo, toolsData);
179
+ }
180
+
181
+ const reportedUsage: any[] = [];
182
+ usage_data.forEach((usage) => {
183
+ const reported = this.reportUsage(usage, {
184
+ modelEntryName: context.modelEntryName,
185
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
186
+ agentId: context.agentId,
187
+ teamId: context.teamId,
188
+ });
189
+ reportedUsage.push(reported);
141
190
  });
142
- }
143
191
 
144
- // Capture finish reason
145
- if (chunk.choices[0]?.finish_reason) {
146
- finishReason = chunk.choices[0].finish_reason;
192
+ // Emit interrupted event if finishReason is not 'stop'
193
+ if (finishReason !== TLLMFinishReason.Stop) {
194
+ emitter.emit(TLLMEvent.Interrupted, finishReason);
195
+ }
196
+
197
+ setTimeout(() => {
198
+ emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
199
+ }, 100);
200
+ } catch (error: any) {
201
+ const isAbort = error?.name === 'AbortError' || abortSignal?.aborted;
202
+ if (isAbort) {
203
+ logger.debug(`streamRequest ${this.name} aborted`, error, acRequest.candidate);
204
+ // Always use DOMException with name 'AbortError' per Web API standards for consistency
205
+ const abortError = new DOMException('Request aborted', 'AbortError');
206
+ emitter.emit(TLLMEvent.Abort, abortError);
207
+ setImmediate(() => {
208
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
209
+ });
210
+ } else {
211
+ logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
212
+ emitter.emit(TLLMEvent.Error, error);
213
+ setImmediate(() => {
214
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
215
+ });
216
+ }
147
217
  }
148
- }
149
-
150
- if (toolsData.length > 0) {
151
- emitter.emit(TLLMEvent.ToolInfo, toolsData);
152
- }
153
-
154
- const reportedUsage: any[] = [];
155
- usage_data.forEach((usage) => {
156
- const reported = this.reportUsage(usage, {
157
- modelEntryName: context.modelEntryName,
158
- keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
159
- agentId: context.agentId,
160
- teamId: context.teamId,
161
- });
162
- reportedUsage.push(reported);
163
- });
164
-
165
- // Emit interrupted event if finishReason is not 'stop'
166
- if (finishReason !== 'stop') {
167
- emitter.emit(TLLMEvent.Interrupted, finishReason);
168
- }
169
-
170
- setTimeout(() => {
171
- emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
172
- }, 100);
173
- })();
218
+ })();
219
+ });
174
220
 
175
221
  return emitter;
176
222
  } catch (error: any) {
223
+ const isAbort = error?.name === 'AbortError' || abortSignal?.aborted;
224
+
225
+ if (isAbort) {
226
+ // Always use DOMException with name 'AbortError' per Web API standards for consistency
227
+ const abortError = new DOMException('Request aborted', 'AbortError');
228
+ logger.debug(`streamRequest ${this.name} aborted`, abortError, acRequest.candidate);
229
+ setImmediate(() => {
230
+ emitter.emit(TLLMEvent.Abort, abortError);
231
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
232
+ });
233
+ return emitter;
234
+ }
235
+
177
236
  logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
178
- throw error;
237
+ setImmediate(() => {
238
+ emitter.emit(TLLMEvent.Error, error);
239
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
240
+ });
241
+ return emitter;
179
242
  }
180
243
  }
181
244