@smythos/sre 1.7.41 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/CHANGELOG +136 -64
  2. package/dist/index.js +65 -50
  3. package/dist/index.js.map +1 -1
  4. package/dist/types/Components/Async.class.d.ts +11 -5
  5. package/dist/types/index.d.ts +2 -0
  6. package/dist/types/subsystems/AgentManager/AgentData.service/connectors/SQLiteAgentDataConnector.class.d.ts +45 -0
  7. package/dist/types/subsystems/LLMManager/LLM.helper.d.ts +32 -1
  8. package/dist/types/subsystems/LLMManager/LLM.inference.d.ts +25 -2
  9. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.d.ts +22 -2
  10. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.d.ts +2 -2
  11. package/dist/types/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.d.ts +27 -2
  12. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Groq.class.d.ts +22 -2
  13. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Ollama.class.d.ts +22 -2
  14. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.d.ts +3 -3
  15. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/OpenAIConnector.class.d.ts +23 -3
  16. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.d.ts +2 -2
  17. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterface.d.ts +2 -2
  18. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.d.ts +2 -2
  19. package/dist/types/subsystems/LLMManager/LLM.service/connectors/xAI.class.d.ts +3 -3
  20. package/dist/types/subsystems/MemoryManager/LLMContext.d.ts +10 -3
  21. package/dist/types/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.d.ts +24 -0
  22. package/dist/types/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.redaction.helper.d.ts +49 -0
  23. package/dist/types/types/LLM.types.d.ts +30 -1
  24. package/package.json +4 -3
  25. package/src/Components/APICall/OAuth.helper.ts +16 -1
  26. package/src/Components/APIEndpoint.class.ts +11 -4
  27. package/src/Components/Async.class.ts +38 -5
  28. package/src/Components/GenAILLM.class.ts +13 -7
  29. package/src/Components/LLMAssistant.class.ts +3 -1
  30. package/src/Components/LogicAND.class.ts +13 -0
  31. package/src/Components/LogicAtLeast.class.ts +18 -0
  32. package/src/Components/LogicAtMost.class.ts +19 -0
  33. package/src/Components/LogicOR.class.ts +12 -2
  34. package/src/Components/LogicXOR.class.ts +11 -0
  35. package/src/constants.ts +1 -1
  36. package/src/helpers/Conversation.helper.ts +10 -8
  37. package/src/index.ts +2 -0
  38. package/src/index.ts.bak +2 -0
  39. package/src/subsystems/AgentManager/AgentData.service/connectors/SQLiteAgentDataConnector.class.ts +190 -0
  40. package/src/subsystems/AgentManager/AgentData.service/index.ts +2 -0
  41. package/src/subsystems/LLMManager/LLM.helper.ts +117 -1
  42. package/src/subsystems/LLMManager/LLM.inference.ts +136 -67
  43. package/src/subsystems/LLMManager/LLM.service/LLMConnector.ts +13 -6
  44. package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts +157 -33
  45. package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts +9 -8
  46. package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +121 -83
  47. package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +125 -62
  48. package/src/subsystems/LLMManager/LLM.service/connectors/Ollama.class.ts +168 -76
  49. package/src/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.ts +18 -8
  50. package/src/subsystems/LLMManager/LLM.service/connectors/VertexAI.class.ts +8 -4
  51. package/src/subsystems/LLMManager/LLM.service/connectors/openai/OpenAIConnector.class.ts +50 -8
  52. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +30 -16
  53. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterface.ts +2 -2
  54. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +29 -15
  55. package/src/subsystems/LLMManager/LLM.service/connectors/xAI.class.ts +10 -8
  56. package/src/subsystems/MemoryManager/LLMContext.ts +27 -8
  57. package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.ts +467 -120
  58. package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.redaction.helper.ts +203 -0
  59. package/src/types/LLM.types.ts +31 -1
  60. package/src/types/node-sqlite.d.ts +45 -0
@@ -15,6 +15,7 @@ import {
15
15
  TLLMToolResultMessageBlock,
16
16
  TLLMRequestBody,
17
17
  BasicCredentials,
18
+ TLLMFinishReason,
18
19
  } from '@sre/types/LLM.types';
19
20
  import { LLMHelper } from '@sre/LLMManager/LLM.helper';
20
21
 
@@ -42,7 +43,7 @@ type OllamaChatRequest = {
42
43
  export class OllamaConnector extends LLMConnector {
43
44
  public name = 'LLM:Ollama';
44
45
 
45
- private getClient(context: ILLMRequestContext): Ollama {
46
+ private getClient(context: ILLMRequestContext, abortSignal?: AbortSignal): Ollama {
46
47
  // Extract baseURL and sanitize it for Ollama SDK
47
48
  let host = 'http://localhost:11434';
48
49
 
@@ -55,7 +56,7 @@ export class OllamaConnector extends LLMConnector {
55
56
  host = url.origin;
56
57
  }
57
58
 
58
- const config: { host: string; headers?: { Authorization?: string } } = { host };
59
+ const config: { host: string; headers?: { Authorization?: string }; fetch?: typeof fetch } = { host };
59
60
 
60
61
  if (apiKey) {
61
62
  config.headers = {
@@ -63,15 +64,27 @@ export class OllamaConnector extends LLMConnector {
63
64
  };
64
65
  }
65
66
 
67
+ // Pass abortSignal through custom fetch function
68
+ // Best practice: Respect existing signal in init if present, otherwise use our abortSignal
69
+ if (abortSignal) {
70
+ config.fetch = (url: RequestInfo | URL, init?: RequestInit) => {
71
+ return fetch(url, {
72
+ ...init,
73
+ // Use abortSignal if no signal exists in init, otherwise respect the existing signal
74
+ signal: init?.signal || abortSignal,
75
+ });
76
+ };
77
+ }
78
+
66
79
  // No API key validation required for Ollama (local by default)
67
80
  return new Ollama(config);
68
81
  }
69
82
 
70
83
  @hookAsync('LLMConnector.request')
71
- protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
84
+ protected async request({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
72
85
  try {
73
86
  logger.debug(`request ${this.name}`, acRequest.candidate);
74
- const ollama = this.getClient(context);
87
+ const ollama = this.getClient(context, abortSignal);
75
88
 
76
89
  const result = (await ollama.chat({
77
90
  ...body,
@@ -79,7 +92,7 @@ export class OllamaConnector extends LLMConnector {
79
92
  })) as unknown as ChatResponse;
80
93
 
81
94
  const message = result.message;
82
- const finishReason = result.done_reason || 'stop';
95
+ const finishReason = LLMHelper.normalizeFinishReason(result.done_reason || TLLMFinishReason.Stop);
83
96
  const usage = {
84
97
  prompt_tokens: result.prompt_eval_count || 0,
85
98
  completion_tokens: result.eval_count || 0,
@@ -117,112 +130,191 @@ export class OllamaConnector extends LLMConnector {
117
130
  message: message as any,
118
131
  usage,
119
132
  };
120
- } catch (error) {
133
+ } catch (error: any) {
134
+ // Handle AbortError specifically - this is expected when abortSignal is triggered
135
+ if (error?.name === 'AbortError' || abortSignal?.aborted) {
136
+ logger.debug(`request ${this.name} aborted`, acRequest.candidate);
137
+ throw error;
138
+ }
121
139
  logger.error(`request ${this.name}`, error, acRequest.candidate);
122
140
  throw error;
123
141
  }
124
142
  }
125
143
 
144
+ /**
145
+ * Stream request implementation.
146
+ *
147
+ * **Error Handling Pattern:**
148
+ * - Always returns emitters, never throws errors - ensures consistent error handling
149
+ * - Uses setImmediate for event emission - prevents race conditions where events fire before listeners attach
150
+ * - Emits End after terminal events (Error, Abort) - ensures cleanup code always runs
151
+ *
152
+ * **Why setImmediate?**
153
+ * Since streamRequest is async, callers must await to get the emitter, creating a timing gap.
154
+ * setImmediate defers event emission to the next event loop tick, ensuring events fire AFTER
155
+ * listeners are attached. This prevents race conditions where synchronous event emission
156
+ * would occur before listeners can be registered.
157
+ *
158
+ * @param acRequest - Access request for authorization
159
+ * @param body - Request body parameters
160
+ * @param context - LLM request context
161
+ * @param abortSignal - AbortSignal for cancellation
162
+ * @returns EventEmitter that emits TLLMEvent events (Data, Content, Error, Abort, End, etc.)
163
+ */
126
164
  @hookAsync('LLMConnector.streamRequest')
127
- protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
165
+ protected async streamRequest({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<EventEmitter> {
166
+ const emitter = new EventEmitter();
167
+
128
168
  try {
129
169
  logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
130
- const emitter = new EventEmitter();
131
170
  const usage_data = [];
132
171
 
133
- const ollama = this.getClient(context);
172
+ const ollama = this.getClient(context, abortSignal);
134
173
  const stream = (await ollama.chat({
135
174
  ...body,
136
175
  stream: true,
137
176
  })) as AsyncIterable<ChatResponse>;
138
177
 
178
+ if (abortSignal) {
179
+ abortSignal.addEventListener('abort', () => {
180
+ // Abort the stream if it supports abort
181
+ if (typeof (stream as any)?.abort === 'function') {
182
+ (stream as any).abort();
183
+ }
184
+ // Emit abort event on the emitter for proper cleanup
185
+ const abortError = new DOMException('Request aborted', 'AbortError');
186
+ setImmediate(() => {
187
+ emitter.emit(TLLMEvent.Abort, abortError);
188
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
189
+ });
190
+ });
191
+ }
192
+
139
193
  let toolsData: ToolData[] = [];
140
194
  let fullContent = '';
141
- let finishReason = 'stop';
195
+ let finishReason: TLLMFinishReason = TLLMFinishReason.Stop;
142
196
 
143
197
  (async () => {
144
- for await (const chunk of stream) {
145
- emitter.emit(TLLMEvent.Data, chunk);
146
-
147
- // Emit content deltas
148
- if (chunk.message?.content) {
149
- const content = chunk.message.content;
150
- fullContent += content;
151
- emitter.emit(TLLMEvent.Content, content);
152
- }
198
+ try {
199
+ for await (const chunk of stream) {
200
+ // Check if aborted before processing chunk
201
+ if (abortSignal?.aborted) {
202
+ break;
203
+ }
204
+ emitter.emit(TLLMEvent.Data, chunk);
153
205
 
154
- // Handle tool calls accumulation
155
- if (chunk.message?.tool_calls) {
156
- chunk.message.tool_calls.forEach((toolCall, index) => {
157
- if (!toolsData[index]) {
158
- toolsData[index] = {
159
- index,
160
- id: toolCall.function?.name || `tool_${index}`,
161
- type: 'function',
162
- name: toolCall.function?.name,
163
- arguments: toolCall.function?.arguments || '',
164
- role: 'assistant',
165
- };
166
- } else {
167
- // Merge arguments across chunks for string arguments
168
- if (typeof toolsData[index].arguments === 'string' && typeof toolCall.function?.arguments === 'string') {
169
- toolsData[index].arguments += toolCall.function.arguments;
206
+ // Emit content deltas
207
+ if (chunk.message?.content) {
208
+ const content = chunk.message.content;
209
+ fullContent += content;
210
+ emitter.emit(TLLMEvent.Content, content);
211
+ }
212
+
213
+ // Handle tool calls accumulation
214
+ if (chunk.message?.tool_calls) {
215
+ chunk.message.tool_calls.forEach((toolCall, index) => {
216
+ if (!toolsData[index]) {
217
+ toolsData[index] = {
218
+ index,
219
+ id: toolCall.function?.name || `tool_${index}`,
220
+ type: 'function',
221
+ name: toolCall.function?.name,
222
+ arguments: toolCall.function?.arguments || '',
223
+ role: 'assistant',
224
+ };
170
225
  } else {
171
- // For object arguments, merge them properly
172
- toolsData[index].arguments = { ...(toolsData[index].arguments as any), ...toolCall.function?.arguments };
226
+ // Merge arguments across chunks for string arguments
227
+ if (typeof toolsData[index].arguments === 'string' && typeof toolCall.function?.arguments === 'string') {
228
+ toolsData[index].arguments += toolCall.function.arguments;
229
+ } else {
230
+ // For object arguments, merge them properly
231
+ toolsData[index].arguments = { ...(toolsData[index].arguments as any), ...toolCall.function?.arguments };
232
+ }
173
233
  }
174
- }
175
- });
176
- }
234
+ });
235
+ }
177
236
 
178
- // Capture usage data when available
179
- if (chunk.prompt_eval_count !== undefined || chunk.eval_count !== undefined) {
180
- const usage = {
181
- prompt_tokens: chunk.prompt_eval_count || 0,
182
- completion_tokens: chunk.eval_count || 0,
183
- total_tokens: (chunk.prompt_eval_count || 0) + (chunk.eval_count || 0),
184
- };
185
- usage_data.push(usage);
186
- }
237
+ // Capture usage data when available
238
+ if (chunk.prompt_eval_count !== undefined || chunk.eval_count !== undefined) {
239
+ const usage = {
240
+ prompt_tokens: chunk.prompt_eval_count || 0,
241
+ completion_tokens: chunk.eval_count || 0,
242
+ total_tokens: (chunk.prompt_eval_count || 0) + (chunk.eval_count || 0),
243
+ };
244
+ usage_data.push(usage);
245
+ }
187
246
 
188
- // Capture finish reason from Ollama's done_reason
189
- if (chunk.done_reason) {
190
- finishReason = chunk.done_reason;
247
+ // Capture finish reason from Ollama's done_reason
248
+ if (chunk.done_reason) {
249
+ finishReason = LLMHelper.normalizeFinishReason(chunk.done_reason);
250
+ }
191
251
  }
192
- }
193
252
 
194
- // Emit tool info if tools were requested
195
- if (toolsData.length > 0) {
196
- emitter.emit(TLLMEvent.ToolInfo, toolsData);
197
- }
253
+ // Emit tool info if tools were requested
254
+ if (toolsData.length > 0) {
255
+ emitter.emit(TLLMEvent.ToolInfo, toolsData);
256
+ }
198
257
 
199
- // Report usage
200
- const reportedUsage: any[] = [];
201
- usage_data.forEach((usage) => {
202
- const reported = this.reportUsage(usage, {
203
- modelEntryName: context.modelEntryName,
204
- keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
205
- agentId: context.agentId,
206
- teamId: context.teamId,
258
+ // Report usage
259
+ const reportedUsage: any[] = [];
260
+ usage_data.forEach((usage) => {
261
+ const reported = this.reportUsage(usage, {
262
+ modelEntryName: context.modelEntryName,
263
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
264
+ agentId: context.agentId,
265
+ teamId: context.teamId,
266
+ });
267
+ reportedUsage.push(reported);
207
268
  });
208
- reportedUsage.push(reported);
209
- });
210
269
 
211
- // Emit interrupted event if finishReason is not 'stop'
212
- if (finishReason !== 'stop') {
213
- emitter.emit(TLLMEvent.Interrupted, finishReason);
214
- }
270
+ // Emit interrupted event if finishReason is not 'stop'
271
+ if (finishReason !== TLLMFinishReason.Stop) {
272
+ emitter.emit(TLLMEvent.Interrupted, finishReason);
273
+ }
215
274
 
216
- // Final end event
217
- setTimeout(() => {
218
- emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
219
- }, 100);
275
+ // Final end event
276
+ setTimeout(() => {
277
+ emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
278
+ }, 100);
279
+ } catch (error: any) {
280
+ // Handle AbortError specifically - this is expected when abortSignal is triggered
281
+ if (error?.name === 'AbortError' || abortSignal?.aborted) {
282
+ logger.debug(`streamRequest ${this.name} aborted`, acRequest.candidate);
283
+ // Always use DOMException with name 'AbortError' per Web API standards for consistency
284
+ const abortError = new DOMException('Request aborted', 'AbortError');
285
+ setImmediate(() => {
286
+ emitter.emit(TLLMEvent.Abort, abortError);
287
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
288
+ });
289
+ } else {
290
+ logger.error(`streamRequest ${this.name} error`, error, acRequest.candidate);
291
+ setImmediate(() => {
292
+ emitter.emit(TLLMEvent.Error, error);
293
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
294
+ });
295
+ }
296
+ }
220
297
  })();
221
298
 
222
299
  return emitter;
223
300
  } catch (error: any) {
301
+ // Handle AbortError specifically - this is expected when abortSignal is triggered
302
+ if (error?.name === 'AbortError' || abortSignal?.aborted) {
303
+ logger.debug(`streamRequest ${this.name} aborted`, acRequest.candidate);
304
+ // Always use DOMException with name 'AbortError' per Web API standards for consistency
305
+ const abortError = new DOMException('Request aborted', 'AbortError');
306
+ setImmediate(() => {
307
+ emitter.emit(TLLMEvent.Abort, abortError);
308
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
309
+ });
310
+ return emitter;
311
+ }
224
312
  logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
225
- throw error;
313
+ setImmediate(() => {
314
+ emitter.emit(TLLMEvent.Error, error);
315
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
316
+ });
317
+ return emitter;
226
318
  }
227
319
  }
228
320
 
@@ -15,6 +15,7 @@ import {
15
15
  ILLMRequestContext,
16
16
  TLLMPreparedParams,
17
17
  TLLMEvent,
18
+ TLLMFinishReason,
18
19
  } from '@sre/types/LLM.types';
19
20
  import { LLMHelper } from '@sre/LLMManager/LLM.helper';
20
21
 
@@ -61,14 +62,14 @@ export class PerplexityConnector extends LLMConnector {
61
62
  }
62
63
 
63
64
  @hookAsync('LLMConnector.request')
64
- protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
65
+ protected async request({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
65
66
  try {
66
67
  logger.debug(`request ${this.name}`, acRequest.candidate);
67
68
  const perplexity = await this.getClient(context);
68
- const response = await perplexity.post('/chat/completions', body);
69
+ const response = await perplexity.post('/chat/completions', body, { signal: abortSignal });
69
70
 
70
71
  const content = response?.data?.choices?.[0]?.message.content;
71
- const finishReason = response?.data?.choices?.[0]?.finish_reason;
72
+ const finishReason = LLMHelper.normalizeFinishReason(response?.data?.choices?.[0]?.finish_reason);
72
73
  const usage = response?.data?.usage as any;
73
74
 
74
75
  this.reportUsage(usage, {
@@ -87,13 +88,15 @@ export class PerplexityConnector extends LLMConnector {
87
88
  usage,
88
89
  };
89
90
  } catch (error) {
91
+ // set the actual error message from the response
92
+ error.message = error?.response?.data?.error?.message || error?.message || 'Unknown error';
90
93
  logger.error(`request ${this.name}`, error, acRequest.candidate);
91
94
  throw error;
92
95
  }
93
96
  }
94
97
 
95
98
  @hookAsync('LLMConnector.streamRequest')
96
- protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
99
+ protected async streamRequest({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<EventEmitter> {
97
100
  //throw new Error('Multimodal request is not supported for Perplexity.');
98
101
  //fallback to chatRequest
99
102
  const emitter = new EventEmitter();
@@ -103,7 +106,7 @@ export class PerplexityConnector extends LLMConnector {
103
106
  setTimeout(() => {
104
107
  try {
105
108
  logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
106
- this.request({ acRequest, body, context })
109
+ this.request({ acRequest, body, context, abortSignal })
107
110
  .then((respose) => {
108
111
  const finishReason = respose.finishReason;
109
112
  const usage = respose.usage;
@@ -112,7 +115,7 @@ export class PerplexityConnector extends LLMConnector {
112
115
  emitter.emit(TLLMEvent.Content, respose.content);
113
116
 
114
117
  // Only emit Interrupted if finishReason is not 'stop'
115
- if (finishReason !== 'stop') {
118
+ if (finishReason !== TLLMFinishReason.Stop) {
116
119
  emitter.emit(TLLMEvent.Interrupted, finishReason);
117
120
  }
118
121
 
@@ -158,8 +161,15 @@ export class PerplexityConnector extends LLMConnector {
158
161
  if (params?.temperature !== undefined) body.temperature = params.temperature;
159
162
  if (params?.topP !== undefined) body.top_p = params.topP;
160
163
  if (params?.topK !== undefined) body.top_k = params.topK;
161
- if (params?.frequencyPenalty) body.frequency_penalty = params.frequencyPenalty;
162
- if (params?.presencePenalty !== undefined) body.presence_penalty = params.presencePenalty;
164
+
165
+ // Perplexity API does not allow both presence_penalty and frequency_penalty to be set simultaneously.
166
+ // A value of 0 means no penalty (same as default), so we only include these parameters when they have a non-zero value.
167
+ // Apply either frequencyPenalty or presencePenalty, prioritizing frequencyPenalty
168
+ if (params?.frequencyPenalty) {
169
+ body.frequency_penalty = params.frequencyPenalty;
170
+ } else if (params?.presencePenalty) {
171
+ body.presence_penalty = params.presencePenalty;
172
+ }
163
173
 
164
174
  if (params.responseFormat) {
165
175
  body.response_format = params.responseFormat;
@@ -16,6 +16,7 @@ import {
16
16
  TLLMMessageRole,
17
17
  TLLMChatResponse,
18
18
  TLLMEvent,
19
+ TLLMFinishReason,
19
20
  } from '@sre/types/LLM.types';
20
21
  import { LLMHelper } from '@sre/LLMManager/LLM.helper';
21
22
  import { BinaryInput } from '@sre/helpers/BinaryInput.helper';
@@ -69,7 +70,7 @@ export class VertexAIConnector extends LLMConnector {
69
70
  const response = await result.response;
70
71
 
71
72
  const content = response.candidates?.[0]?.content?.parts?.[0]?.text || '';
72
- const finishReason = response.candidates?.[0]?.finishReason || 'stop';
73
+ const finishReason = LLMHelper.normalizeFinishReason(response.candidates?.[0]?.finishReason || 'stop');
73
74
  const usage = response.usageMetadata;
74
75
 
75
76
  let toolsData: ToolData[] = [];
@@ -175,9 +176,9 @@ export class VertexAIConnector extends LLMConnector {
175
176
  usageData.push(reportedUsage);
176
177
  }
177
178
 
178
- const finishReason = (aggregatedResponse.candidates?.[0]?.finishReason || 'stop').toLowerCase();
179
+ const finishReason = LLMHelper.normalizeFinishReason(aggregatedResponse.candidates?.[0]?.finishReason || 'stop');
179
180
 
180
- if (finishReason !== 'stop') {
181
+ if (finishReason !== TLLMFinishReason.Stop) {
181
182
  emitter.emit(TLLMEvent.Interrupted, finishReason);
182
183
  }
183
184
 
@@ -186,7 +187,10 @@ export class VertexAIConnector extends LLMConnector {
186
187
  }, 100);
187
188
  } catch (error) {
188
189
  logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
189
- emitter.emit(TLLMEvent.Error, error);
190
+ setImmediate(() => {
191
+ emitter.emit(TLLMEvent.Error, error);
192
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
193
+ });
190
194
  }
191
195
  }, 100);
192
196
 
@@ -20,6 +20,8 @@ import {
20
20
  TLLMToolResultMessageBlock,
21
21
  ToolData,
22
22
  TOpenAIRequestBody,
23
+ TLLMEvent,
24
+ TLLMFinishReason,
23
25
  } from '@sre/types/LLM.types';
24
26
 
25
27
  import { ConnectorService } from '@sre/Core/ConnectorsService';
@@ -85,7 +87,7 @@ export class OpenAIConnector extends LLMConnector {
85
87
  }
86
88
 
87
89
  @hookAsync('LLMConnector.request')
88
- protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
90
+ protected async request({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
89
91
  try {
90
92
  logger.debug(`request ${this.name}`, acRequest.candidate);
91
93
  const _body = body as OpenAI.ChatCompletionCreateParams;
@@ -107,15 +109,15 @@ export class OpenAIConnector extends LLMConnector {
107
109
  const responseInterface = this.getInterfaceType(context);
108
110
  const apiInterface = this.getApiInterface(responseInterface, context);
109
111
 
110
- const result = await apiInterface.createRequest(body, context);
112
+ const result = await apiInterface.createRequest(body, context, abortSignal);
111
113
 
112
114
  const message = result?.choices?.[0]?.message || { content: result?.output_text };
113
- const finishReason = result?.choices?.[0]?.finish_reason || result?.incomplete_details || 'stop';
115
+ const finishReason = LLMHelper.normalizeFinishReason(result?.choices?.[0]?.finish_reason || result?.incomplete_details || TLLMFinishReason.Stop);
114
116
 
115
117
  let toolsData: ToolData[] = [];
116
118
  let useTool = false;
117
119
 
118
- if (finishReason === 'tool_calls') {
120
+ if (finishReason === TLLMFinishReason.ToolCalls) {
119
121
  toolsData =
120
122
  message?.tool_calls?.map((tool, index) => ({
121
123
  index,
@@ -151,8 +153,30 @@ export class OpenAIConnector extends LLMConnector {
151
153
  }
152
154
  }
153
155
 
156
+ /**
157
+ * Stream request implementation.
158
+ *
159
+ * **Error Handling Pattern:**
160
+ * - Always returns emitters, never throws errors - ensures consistent error handling
161
+ * - Uses setImmediate for event emission - prevents race conditions where events fire before listeners attach
162
+ * - Emits End after terminal events (Error, Abort) - ensures cleanup code always runs
163
+ *
164
+ * **Why setImmediate?**
165
+ * Since streamRequest is async, callers must await to get the emitter, creating a timing gap.
166
+ * setImmediate defers event emission to the next event loop tick, ensuring events fire AFTER
167
+ * listeners are attached. This prevents race conditions where synchronous event emission
168
+ * would occur before listeners can be registered.
169
+ *
170
+ * @param acRequest - Access request for authorization
171
+ * @param body - Request body parameters
172
+ * @param context - LLM request context
173
+ * @param abortSignal - AbortSignal for cancellation
174
+ * @returns EventEmitter that emits TLLMEvent events (Data, Content, Error, Abort, End, etc.)
175
+ */
154
176
  @hookAsync('LLMConnector.streamRequest')
155
- protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
177
+ protected async streamRequest({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<EventEmitter> {
178
+ let emitter: EventEmitter = new EventEmitter();
179
+
156
180
  try {
157
181
  logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
158
182
 
@@ -173,14 +197,32 @@ export class OpenAIConnector extends LLMConnector {
173
197
  const responseInterface = this.getInterfaceType(context);
174
198
  const apiInterface = this.getApiInterface(responseInterface, context);
175
199
 
176
- const stream = await apiInterface.createStream(body, context);
200
+ const stream = await apiInterface.createStream(body, context, abortSignal);
177
201
 
178
- const emitter = apiInterface.handleStream(stream, context);
202
+ emitter = apiInterface.handleStream(stream, context);
179
203
 
180
204
  return emitter;
181
205
  } catch (error) {
206
+ const isAbort = (error as any)?.name === 'AbortError' || abortSignal?.aborted;
207
+
208
+ if (isAbort) {
209
+ // Always use DOMException with name 'AbortError' per Web API standards for consistency
210
+ const abortError = new DOMException('Request aborted', 'AbortError');
211
+ logger.debug(`streamRequest ${this.name} aborted`, abortError, acRequest.candidate);
212
+ setImmediate(() => {
213
+ emitter.emit(TLLMEvent.Abort, abortError);
214
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
215
+ });
216
+ return emitter;
217
+ }
218
+
182
219
  logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
183
- throw error;
220
+ setImmediate(() => {
221
+ emitter.emit(TLLMEvent.Error, error);
222
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
223
+ });
224
+
225
+ return emitter;
184
226
  }
185
227
  }
186
228
 
@@ -2,7 +2,8 @@ import EventEmitter from 'events';
2
2
  import OpenAI from 'openai';
3
3
  import { BinaryInput } from '@sre/helpers/BinaryInput.helper';
4
4
  import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
5
- import { TLLMParams, TLLMPreparedParams, ILLMRequestContext, ToolData, TLLMMessageRole, APIKeySource, TLLMEvent } from '@sre/types/LLM.types';
5
+ import { TLLMParams, TLLMPreparedParams, ILLMRequestContext, ToolData, TLLMMessageRole, APIKeySource, TLLMEvent, TLLMFinishReason } from '@sre/types/LLM.types';
6
+ import { LLMHelper } from '@sre/LLMManager/LLM.helper';
6
7
  import { OpenAIApiInterface, ToolConfig } from './OpenAIApiInterface';
7
8
  import { HandlerDependencies } from '../types';
8
9
  import { JSON_RESPONSE_INSTRUCTION, SUPPORTED_MIME_TYPES_MAP, BUILT_IN_MODEL_PREFIX } from '@sre/constants';
@@ -32,24 +33,35 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
32
33
  this.deps = deps;
33
34
  }
34
35
 
35
- public async createRequest(body: OpenAI.ChatCompletionCreateParams, context: ILLMRequestContext): Promise<OpenAI.ChatCompletion> {
36
+ public async createRequest(
37
+ body: OpenAI.ChatCompletionCreateParams,
38
+ context: ILLMRequestContext,
39
+ abortSignal?: AbortSignal
40
+ ): Promise<OpenAI.ChatCompletion> {
36
41
  const openai = await this.deps.getClient(context);
37
- return await openai.chat.completions.create({
38
- ...body,
39
- stream: false,
40
- });
42
+ return await openai.chat.completions.create(
43
+ {
44
+ ...body,
45
+ stream: false,
46
+ },
47
+ { signal: abortSignal }
48
+ );
41
49
  }
42
50
 
43
51
  public async createStream(
44
52
  body: OpenAI.ChatCompletionCreateParams,
45
- context: ILLMRequestContext
53
+ context: ILLMRequestContext,
54
+ abortSignal?: AbortSignal
46
55
  ): Promise<AsyncIterable<OpenAI.ChatCompletionChunk>> {
47
56
  const openai = await this.deps.getClient(context);
48
- return await openai.chat.completions.create({
49
- ...body,
50
- stream: true,
51
- stream_options: { include_usage: true },
52
- });
57
+ return await openai.chat.completions.create(
58
+ {
59
+ ...body,
60
+ stream: true,
61
+ stream_options: { include_usage: true },
62
+ },
63
+ { signal: abortSignal }
64
+ );
53
65
  }
54
66
 
55
67
  public handleStream(stream: AsyncIterable<OpenAI.ChatCompletionChunk>, context: ILLMRequestContext): EventEmitter {
@@ -342,20 +354,22 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
342
354
  /**
343
355
  * Emit final events
344
356
  */
345
- private emitFinalEvents(emitter: EventEmitter, toolsData: ToolData[], reportedUsage: any[], finishReason: string): void {
357
+ private emitFinalEvents(emitter: EventEmitter, toolsData: ToolData[], reportedUsage: any[], finishReason: string | TLLMFinishReason): void {
358
+ const normalizedFinishReason = typeof finishReason === 'string' ? LLMHelper.normalizeFinishReason(finishReason) : finishReason;
359
+
346
360
  // Emit tool info event if tools were called
347
361
  if (toolsData.length > 0) {
348
362
  emitter.emit(TLLMEvent.ToolInfo, toolsData);
349
363
  }
350
364
 
351
365
  // Emit interrupted event if finishReason is not 'stop'
352
- if (finishReason !== 'stop') {
353
- emitter.emit(TLLMEvent.Interrupted, finishReason);
366
+ if (normalizedFinishReason !== TLLMFinishReason.Stop) {
367
+ emitter.emit(TLLMEvent.Interrupted, normalizedFinishReason);
354
368
  }
355
369
 
356
370
  // Emit end event with setImmediate to ensure proper event ordering
357
371
  setImmediate(() => {
358
- emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
372
+ emitter.emit(TLLMEvent.End, toolsData, reportedUsage, normalizedFinishReason);
359
373
  });
360
374
  }
361
375
 
@@ -33,14 +33,14 @@ export abstract class OpenAIApiInterface {
33
33
  * @param body - The request body prepared for this API
34
34
  * @param context - The request context
35
35
  */
36
- abstract createRequest(body: any, context: ILLMRequestContext): Promise<any>;
36
+ abstract createRequest(body: any, context: ILLMRequestContext, abortSignal?: AbortSignal): Promise<any>;
37
37
 
38
38
  /**
39
39
  * Create a stream for this API interface
40
40
  * @param body - The request body prepared for this API
41
41
  * @param context - The request context
42
42
  */
43
- abstract createStream(body: any, context: ILLMRequestContext): Promise<any>;
43
+ abstract createStream(body: any, context: ILLMRequestContext, abortSignal?: AbortSignal): Promise<any>;
44
44
 
45
45
  /**
46
46
  * Handle the stream response from this API interface