@smythos/sre 1.7.42 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG +448 -66
  2. package/dist/index.js +65 -50
  3. package/dist/index.js.map +1 -1
  4. package/dist/types/Components/Async.class.d.ts +11 -5
  5. package/dist/types/index.d.ts +2 -0
  6. package/dist/types/subsystems/AgentManager/AgentData.service/connectors/SQLiteAgentDataConnector.class.d.ts +45 -0
  7. package/dist/types/subsystems/LLMManager/LLM.helper.d.ts +32 -1
  8. package/dist/types/subsystems/LLMManager/LLM.inference.d.ts +25 -2
  9. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.d.ts +22 -2
  10. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.d.ts +2 -2
  11. package/dist/types/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.d.ts +27 -2
  12. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Groq.class.d.ts +22 -2
  13. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Ollama.class.d.ts +22 -2
  14. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.d.ts +3 -3
  15. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/OpenAIConnector.class.d.ts +23 -3
  16. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.d.ts +2 -2
  17. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterface.d.ts +2 -2
  18. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.d.ts +2 -2
  19. package/dist/types/subsystems/LLMManager/LLM.service/connectors/xAI.class.d.ts +3 -3
  20. package/dist/types/subsystems/MemoryManager/LLMContext.d.ts +10 -3
  21. package/dist/types/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.d.ts +24 -0
  22. package/dist/types/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.redaction.helper.d.ts +49 -0
  23. package/dist/types/types/LLM.types.d.ts +30 -1
  24. package/package.json +4 -3
  25. package/src/Components/APICall/OAuth.helper.ts +16 -1
  26. package/src/Components/APIEndpoint.class.ts +11 -4
  27. package/src/Components/Async.class.ts +38 -5
  28. package/src/Components/GenAILLM.class.ts +13 -7
  29. package/src/Components/ImageGenerator.class.ts +32 -13
  30. package/src/Components/LLMAssistant.class.ts +3 -1
  31. package/src/Components/LogicAND.class.ts +13 -0
  32. package/src/Components/LogicAtLeast.class.ts +18 -0
  33. package/src/Components/LogicAtMost.class.ts +19 -0
  34. package/src/Components/LogicOR.class.ts +12 -2
  35. package/src/Components/LogicXOR.class.ts +11 -0
  36. package/src/constants.ts +1 -1
  37. package/src/helpers/Conversation.helper.ts +10 -8
  38. package/src/index.ts +2 -0
  39. package/src/index.ts.bak +2 -0
  40. package/src/subsystems/AgentManager/AgentData.service/connectors/SQLiteAgentDataConnector.class.ts +190 -0
  41. package/src/subsystems/AgentManager/AgentData.service/index.ts +2 -0
  42. package/src/subsystems/LLMManager/LLM.helper.ts +117 -1
  43. package/src/subsystems/LLMManager/LLM.inference.ts +136 -67
  44. package/src/subsystems/LLMManager/LLM.service/LLMConnector.ts +22 -6
  45. package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts +157 -33
  46. package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts +9 -8
  47. package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +124 -90
  48. package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +125 -62
  49. package/src/subsystems/LLMManager/LLM.service/connectors/Ollama.class.ts +168 -76
  50. package/src/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.ts +18 -8
  51. package/src/subsystems/LLMManager/LLM.service/connectors/VertexAI.class.ts +8 -4
  52. package/src/subsystems/LLMManager/LLM.service/connectors/openai/OpenAIConnector.class.ts +50 -8
  53. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +30 -16
  54. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterface.ts +2 -2
  55. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +29 -15
  56. package/src/subsystems/LLMManager/LLM.service/connectors/xAI.class.ts +10 -8
  57. package/src/subsystems/MemoryManager/LLMContext.ts +27 -8
  58. package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.ts +313 -85
  59. package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.redaction.helper.ts +203 -0
  60. package/src/types/LLM.types.ts +31 -1
  61. package/src/types/node-sqlite.d.ts +45 -0
@@ -112,6 +112,7 @@ export abstract class LLMConnector extends Connector {
112
112
  const response = await this.request({
113
113
  acRequest: candidate.readRequest,
114
114
  body: preparedParams.body,
115
+ abortSignal: preparedParams.abortSignal,
115
116
  context: {
116
117
  modelEntryName: preparedParams.modelEntryName,
117
118
  agentId: preparedParams.agentId,
@@ -137,6 +138,7 @@ export abstract class LLMConnector extends Connector {
137
138
  const requestParams = {
138
139
  acRequest: candidate.readRequest,
139
140
  body: preparedParams.body,
141
+ abortSignal: preparedParams.abortSignal,
140
142
  context: {
141
143
  modelEntryName: preparedParams.modelEntryName,
142
144
  agentId: preparedParams.agentId,
@@ -262,15 +264,18 @@ export abstract class LLMConnector extends Connector {
262
264
 
263
265
  private async prepareParams(candidate: AccessCandidate, params: TLLMConnectorParams): Promise<TLLMPreparedParams> {
264
266
  const modelsProvider: ModelsProviderConnector = ConnectorService.getModelsProviderConnector();
265
- // Assign file from the original parameters to avoid overwriting the original constructor
266
- const files = params?.files;
267
- delete params?.files; // need to remove files to avoid any issues during JSON.stringify() especially when we have large files
267
+ // Extract files and abortSignal from the original parameters to avoid overwriting the original constructor
268
+ const { files, abortSignal, ...restParams } = params;
268
269
 
269
- const clonedParams = JSON.parse(JSON.stringify(params)); // Avoid mutation of the original params
270
+ const clonedParams = JSON.parse(JSON.stringify(restParams)); // Avoid mutation of the original params
270
271
 
271
272
  // Format the parameters to ensure proper type of values
272
273
  const _params: TLLMPreparedParams = this.formatParamValues(clonedParams);
273
274
 
275
+ // Re-attach non-serializable properties ignored before cloning
276
+ _params.abortSignal = abortSignal;
277
+ _params.files = files;
278
+
274
279
  const model = _params.model;
275
280
  const teamId = await this.getTeamId(candidate);
276
281
 
@@ -281,6 +286,15 @@ export abstract class LLMConnector extends Connector {
281
286
  const modelProviderCandidate = modelsProvider.requester(candidate);
282
287
  const modelInfo: TLLMModel | TCustomLLMModel = await modelProviderCandidate.getModelInfo(model);
283
288
 
289
+ // If the model entry has an alias, it means this entry forwards to another model.
290
+ // Usage must be reported against the alias (the actual model being billed),
291
+ // not the forwarding entry (which may have stale/different pricing).
292
+ // Guard: skip for custom/enterprise LLMs — they are not billed and should
293
+ // retain their own entry name (enterprise models use alias only for config inheritance).
294
+ if (modelInfo?.alias && !(modelInfo as TCustomLLMModel)?.isCustomLLM) {
295
+ _params.modelEntryName = modelInfo.alias;
296
+ }
297
+
284
298
  //if the model has default params make sure to set them if they are not present
285
299
  if (modelInfo.params) {
286
300
  for (let key in modelInfo.params) {
@@ -307,8 +321,6 @@ export abstract class LLMConnector extends Connector {
307
321
  }
308
322
 
309
323
  _params.model = await modelProviderCandidate.getModelId(model);
310
- // Attach the files again after formatting the parameters
311
- _params.files = files;
312
324
 
313
325
  const features = modelInfo?.features || [];
314
326
 
@@ -327,6 +339,9 @@ export abstract class LLMConnector extends Connector {
327
339
  xai: await this.prepareXAIToolsInfo(_params),
328
340
  };
329
341
 
342
+ // Filter out default and system-specific outputs (e.g., _debug, _error) to isolate custom outputs for structured response
343
+ _params.structuredOutputs = _params?.outputs?.filter((output) => !output.default && !['_debug', '_error'].includes(output.name)) || [];
344
+
330
345
  // The input adapter transforms the standardized parameters into the specific format required by the target LLM provider
331
346
  _params.agentId = candidate.id;
332
347
  const body = await this.reqBodyAdapter(_params);
@@ -461,6 +476,7 @@ export abstract class LLMConnector extends Connector {
461
476
  }
462
477
 
463
478
  //FIXME: to revisit by Alaa-eddine
479
+ // TODO: This part is a bit confusing. We send “consistent” messages to the LLM, but they still aren’t truly consistent. For example, we send { role: 'system', content: 'You are a helpful assistant.' }, which isn’t compatible with Google AI. However, we still need to mark it as `system` because we later convert it to `systemInstruction`. We should revisit the architecture later and make the flow simpler and more straightforward.
464
480
  if (key === 'messages') {
465
481
  _value = this.getConsistentMessages(_value);
466
482
  }
@@ -1,6 +1,8 @@
1
1
  import EventEmitter from 'events';
2
+ import z from 'zod';
2
3
  import Anthropic from '@anthropic-ai/sdk';
3
4
  import type { MessageStreamEvents } from '@anthropic-ai/sdk/lib/MessageStream';
5
+ import { zodOutputFormat } from '@anthropic-ai/sdk/helpers/zod';
4
6
 
5
7
  import { JSON_RESPONSE_INSTRUCTION, BUILT_IN_MODEL_PREFIX } from '@sre/constants';
6
8
  import { BinaryInput } from '@sre/helpers/BinaryInput.helper';
@@ -18,6 +20,7 @@ import {
18
20
  TAnthropicRequestBody,
19
21
  ILLMRequestContext,
20
22
  TLLMPreparedParams,
23
+ TLLMFinishReason,
21
24
  } from '@sre/types/LLM.types';
22
25
 
23
26
  import { LLMHelper } from '@sre/LLMManager/LLM.helper';
@@ -32,10 +35,17 @@ import { hookAsync } from '@sre/Core/HookService';
32
35
  const logger = Logger('AnthropicConnector');
33
36
 
34
37
  const PREFILL_TEXT_FOR_JSON_RESPONSE = '{';
35
- const LEGACY_THINKING_MODELS = ['smythos/claude-3.7-sonnet-thinking', 'claude-3.7-sonnet-thinking'];
38
+ const LEGACY_MODELS = [
39
+ 'claude-4-sonnet',
40
+ 'claude-4-opus',
41
+ 'claude-opus-4-1',
42
+ 'smythos/claude-4-sonnet',
43
+ 'smythos/claude-4-opus',
44
+ 'smythos/claude-opus-4-1',
45
+ ];
46
+ const MODELS_SUPPORTING_REASONING_EFFORT = ['claude-opus-4-6', 'claude-opus-4-5', 'smythos/claude-opus-4-6', 'smythos/claude-opus-4-5'];
36
47
 
37
48
  // Type aliases
38
- type AnthropicMessageParams = Anthropic.MessageCreateParamsNonStreaming | Anthropic.Messages.MessageStreamParams;
39
49
  type AnthropicStreamEventType = keyof MessageStreamEvents;
40
50
 
41
51
  // Event names automatically validated against MessageStreamEvents type
@@ -73,21 +83,21 @@ export class AnthropicConnector extends LLMConnector {
73
83
  }
74
84
 
75
85
  @hookAsync('LLMConnector.request')
76
- protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
86
+ protected async request({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
77
87
  try {
78
88
  logger.debug(`request ${this.name}`, acRequest.candidate);
79
89
  const anthropic = await this.getClient(context);
80
- const result = await anthropic.messages.create(body);
90
+ const result = await anthropic.messages.create(body, { signal: abortSignal });
81
91
  const message: Anthropic.MessageParam = {
82
92
  role: (result?.role || TLLMMessageRole.User) as Anthropic.MessageParam['role'],
83
93
  content: result?.content || '',
84
94
  };
85
- const stopReason = result?.stop_reason;
95
+ const finishReason = LLMHelper.normalizeFinishReason(result?.stop_reason);
86
96
 
87
97
  let toolsData: ToolData[] = [];
88
98
  let useTool = false;
89
99
 
90
- if ((stopReason as 'tool_use') === 'tool_use') {
100
+ if (finishReason === TLLMFinishReason.ToolCalls) {
91
101
  const toolUseContentBlocks = result?.content?.filter((c) => (c.type as 'tool_use') === 'tool_use');
92
102
 
93
103
  if (toolUseContentBlocks?.length === 0) return;
@@ -124,7 +134,7 @@ export class AnthropicConnector extends LLMConnector {
124
134
 
125
135
  return {
126
136
  content,
127
- finishReason: result?.stop_reason,
137
+ finishReason,
128
138
  useTool,
129
139
  toolsData,
130
140
  message,
@@ -136,15 +146,49 @@ export class AnthropicConnector extends LLMConnector {
136
146
  }
137
147
  }
138
148
 
149
+ /**
150
+ * Stream request implementation.
151
+ *
152
+ * **Error Handling Pattern:**
153
+ * - Always returns emitters, never throws errors - ensures consistent error handling
154
+ * - Uses setImmediate for event emission - prevents race conditions where events fire before listeners attach
155
+ * - Emits End after terminal events (Error, Abort) - ensures cleanup code always runs
156
+ *
157
+ * **Why setImmediate?**
158
+ * Since streamRequest is async, callers must await to get the emitter, creating a timing gap.
159
+ * setImmediate defers event emission to the next event loop tick, ensuring events fire AFTER
160
+ * listeners are attached. This prevents race conditions where synchronous event emission
161
+ * would occur before listeners can be registered.
162
+ *
163
+ * @param acRequest - Access request for authorization
164
+ * @param body - Request body parameters
165
+ * @param context - LLM request context
166
+ * @param abortSignal - AbortSignal for cancellation
167
+ * @returns EventEmitter that emits TLLMEvent events (Data, Content, Error, Abort, End, etc.)
168
+ */
139
169
  @hookAsync('LLMConnector.streamRequest')
140
- protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
170
+ protected async streamRequest({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<EventEmitter> {
171
+ const emitter = new EventEmitter();
172
+
141
173
  try {
142
174
  logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
143
- const emitter = new EventEmitter();
175
+
176
+ // Pre-flight: already aborted before we start — emit Abort immediately.
177
+ // This is especially important for Anthropic because if we try to start the stream
178
+ // with an already-aborted signal, the SDK may never emit abort/error, leaving callers hanging.
179
+ if (abortSignal?.aborted) {
180
+ const abortError = new DOMException('Request aborted', 'AbortError');
181
+ setImmediate(() => {
182
+ emitter.emit(TLLMEvent.Abort, abortError);
183
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
184
+ });
185
+ return emitter;
186
+ }
187
+
144
188
  const usage_data = [];
145
189
 
146
190
  const anthropic = await this.getClient(context);
147
- let stream = anthropic.messages.stream(body);
191
+ let stream = anthropic.messages.stream(body, { signal: abortSignal });
148
192
 
149
193
  let toolsData: ToolData[] = [];
150
194
  let thinkingBlocks: any[] = []; // To preserve thinking blocks
@@ -160,9 +204,22 @@ export class AnthropicConnector extends LLMConnector {
160
204
  });
161
205
 
162
206
  stream.on(AnthropicStreamEvent.error, (error) => {
163
- //console.log('error', error);
207
+ logger.debug(`streamRequest ${this.name} stream error`, error);
208
+ setImmediate(() => {
209
+ emitter.emit(TLLMEvent.Error, error);
210
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
211
+ });
212
+ });
164
213
 
165
- emitter.emit(TLLMEvent.Error, error);
214
+ // Anthropic emits a dedicated abort event; translate it to our Abort signal
215
+ stream.on(AnthropicStreamEvent.abort, (error) => {
216
+ logger.debug(`streamRequest ${this.name} stream abort`, error);
217
+ // Always use DOMException with name 'AbortError' per Web API standards for consistency
218
+ const abortError = new DOMException('Request aborted', 'AbortError');
219
+ setImmediate(() => {
220
+ emitter.emit(TLLMEvent.Abort, abortError);
221
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
222
+ });
166
223
  });
167
224
 
168
225
  stream.on(AnthropicStreamEvent.message, (message) => {
@@ -184,8 +241,25 @@ export class AnthropicConnector extends LLMConnector {
184
241
  emitter.emit(TLLMEvent.Thinking, thinking);
185
242
  });
186
243
 
244
+ if (abortSignal) {
245
+ // Catch mid-flight cancellations even if the Anthropic stream never emits its own abort
246
+ // (e.g., aborted during setup before stream listeners attach).
247
+ abortSignal.addEventListener(
248
+ 'abort',
249
+ () => {
250
+ logger.debug(`streamRequest ${this.name} abortSignal triggered`, acRequest.candidate);
251
+ const abortError = new DOMException('Request aborted', 'AbortError');
252
+ setImmediate(() => {
253
+ emitter.emit(TLLMEvent.Abort, abortError);
254
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
255
+ });
256
+ },
257
+ { once: true },
258
+ );
259
+ }
260
+
187
261
  stream.on(AnthropicStreamEvent.finalMessage, (finalMessage) => {
188
- let finishReason = 'stop';
262
+ let finishReason: TLLMFinishReason = TLLMFinishReason.Stop;
189
263
  // Preserve thinking blocks for subsequent tool interactions
190
264
  thinkingBlocks = finalMessage.content.filter((block) => block.type === 'thinking' || block.type === 'redacted_thinking');
191
265
 
@@ -206,7 +280,7 @@ export class AnthropicConnector extends LLMConnector {
206
280
 
207
281
  emitter.emit(TLLMEvent.ToolInfo, toolsData, thinkingBlocks);
208
282
  } else {
209
- finishReason = finalMessage.stop_reason;
283
+ finishReason = LLMHelper.normalizeFinishReason(finalMessage.stop_reason);
210
284
  }
211
285
 
212
286
  if (finalMessage?.usage) {
@@ -221,7 +295,7 @@ export class AnthropicConnector extends LLMConnector {
221
295
 
222
296
  usage_data.push(reportedUsage);
223
297
  }
224
- if (finishReason !== 'stop' && finishReason !== 'end_turn') {
298
+ if (finishReason !== TLLMFinishReason.Stop) {
225
299
  emitter.emit(TLLMEvent.Interrupted, finishReason);
226
300
  }
227
301
 
@@ -233,8 +307,27 @@ export class AnthropicConnector extends LLMConnector {
233
307
 
234
308
  return emitter;
235
309
  } catch (error: any) {
310
+ // #region Safety net for aborts that happen while creating the stream (before stream events/listeners exist).
311
+ const isAbort = error?.name === 'AbortError' || abortSignal?.aborted;
312
+ if (isAbort) {
313
+ // Always use DOMException with name 'AbortError' per Web API standards for consistency
314
+ const abortError = new DOMException('Request aborted', 'AbortError');
315
+ logger.debug(`streamRequest ${this.name} aborted`, abortError, acRequest.candidate);
316
+ setImmediate(() => {
317
+ emitter.emit(TLLMEvent.Abort, abortError);
318
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
319
+ });
320
+ return emitter;
321
+ }
322
+ // #endregion Abort error handling
323
+
236
324
  logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
237
- throw error;
325
+ setImmediate(() => {
326
+ emitter.emit(TLLMEvent.Error, error);
327
+ emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
328
+ });
329
+
330
+ return emitter;
238
331
  }
239
332
  }
240
333
 
@@ -255,7 +348,7 @@ export class AnthropicConnector extends LLMConnector {
255
348
 
256
349
  protected reportUsage(
257
350
  usage: Anthropic.Messages.Usage & { cache_creation_input_tokens?: number; cache_read_input_tokens?: number },
258
- metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string }
351
+ metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string },
259
352
  ) {
260
353
  // SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
261
354
  const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
@@ -386,7 +479,7 @@ export class AnthropicConnector extends LLMConnector {
386
479
  } else if (Array.isArray(message?.content)) {
387
480
  if (Array.isArray(message.content)) {
388
481
  const toolBlocks = message.content.filter(
389
- (item) => typeof item === 'object' && 'type' in item && (item.type === 'tool_use' || item.type === 'tool_result')
482
+ (item) => typeof item === 'object' && 'type' in item && (item.type === 'tool_use' || item.type === 'tool_result'),
390
483
  );
391
484
 
392
485
  if (toolBlocks?.length > 0) {
@@ -455,11 +548,26 @@ export class AnthropicConnector extends LLMConnector {
455
548
  }
456
549
  messages = otherMessages;
457
550
 
458
- const responseFormat = params?.responseFormat || '';
459
- if (responseFormat === 'json') {
460
- body.system = body.system ? `${body.system} ${JSON_RESPONSE_INSTRUCTION}` : JSON_RESPONSE_INSTRUCTION;
551
+ // For backward compatibility, we keep the prefill text with JSON response instruction for legacy models
552
+ if (LEGACY_MODELS.includes(params?.modelEntryName)) {
553
+ const responseFormat = params?.responseFormat || '';
554
+ if (responseFormat === 'json') {
555
+ body.system = body.system ? `${body.system} ${JSON_RESPONSE_INSTRUCTION}` : JSON_RESPONSE_INSTRUCTION;
461
556
 
462
- messages.push({ role: TLLMMessageRole.Assistant, content: PREFILL_TEXT_FOR_JSON_RESPONSE });
557
+ messages.push({ role: TLLMMessageRole.Assistant, content: PREFILL_TEXT_FOR_JSON_RESPONSE });
558
+ }
559
+ }
560
+ // For new models, we use the structured output feature
561
+ else {
562
+ if (params?.structuredOutputs?.length > 0) {
563
+ // Note: We only support string type output for our components for now
564
+ const schemaShape = Object.fromEntries(params?.structuredOutputs?.map((output) => [output.name, z.string()]));
565
+ const ResponseSchema = z.object(schemaShape);
566
+
567
+ body.output_config = {
568
+ format: zodOutputFormat(ResponseSchema),
569
+ };
570
+ }
463
571
  }
464
572
 
465
573
  const hasSystemMessage = LLMHelper.hasSystemMessage(messages);
@@ -475,13 +583,28 @@ export class AnthropicConnector extends LLMConnector {
475
583
  }
476
584
  //#endregion Prepare system message and add JSON response instruction if needed
477
585
 
478
- const isReasoningModel = params?.capabilities?.reasoning;
586
+ // Temperature and top_p are mutually exclusive for Anthropic API.
587
+ // Temperature takes precedence. Guard ensures only one is ever set.
588
+ if (params?.temperature !== undefined && params.temperature >= 0) {
589
+ body.temperature = params.temperature;
590
+ delete body.top_p;
591
+ } else if (params?.topP !== undefined && params.topP >= 0) {
592
+ body.top_p = params.topP;
593
+ delete body.temperature;
594
+ }
479
595
 
480
- if (params?.temperature !== undefined && !isReasoningModel) body.temperature = params.temperature;
481
- if (params?.topP !== undefined && !isReasoningModel) body.top_p = params.topP;
482
- if (params?.topK !== undefined && !isReasoningModel) body.top_k = params.topK;
596
+ if (params?.topK !== undefined) body.top_k = params.topK;
483
597
  if (params?.stopSequences?.length) body.stop_sequences = params.stopSequences;
484
598
 
599
+ // #region Reasoning effort, only supported by specific models
600
+ if (params?.reasoningEffort && MODELS_SUPPORTING_REASONING_EFFORT.includes(params.modelEntryName)) {
601
+ body.output_config = {
602
+ ...(body.output_config || {}),
603
+ effort: params.reasoningEffort as Anthropic.OutputConfig['effort'],
604
+ };
605
+ }
606
+ // #endregion Reasoning effort
607
+
485
608
  // #region Tools
486
609
  if (params?.toolsConfig?.tools && params?.toolsConfig?.tools.length > 0) {
487
610
  body.tools = params?.toolsConfig?.tools as unknown as Anthropic.Tool[];
@@ -506,13 +629,13 @@ export class AnthropicConnector extends LLMConnector {
506
629
  maxThinkingTokens,
507
630
  toolChoice = null,
508
631
  }: {
509
- body: AnthropicMessageParams;
632
+ body: Anthropic.MessageCreateParamsNonStreaming;
510
633
  maxThinkingTokens: number;
511
634
  toolChoice?: Anthropic.ToolChoice;
512
635
  }): Promise<Anthropic.MessageCreateParamsNonStreaming> {
513
636
  // Remove the assistant message with the prefill text for JSON response, it's not supported with thinking
514
637
  let messages = body.messages.filter(
515
- (message) => !(message?.role === TLLMMessageRole.Assistant && message?.content === PREFILL_TEXT_FOR_JSON_RESPONSE)
638
+ (message) => !(message?.role === TLLMMessageRole.Assistant && message?.content === PREFILL_TEXT_FOR_JSON_RESPONSE),
516
639
  );
517
640
 
518
641
  let budget_tokens = Math.min(maxThinkingTokens, body.max_tokens);
@@ -591,7 +714,7 @@ export class AnthropicConnector extends LLMConnector {
591
714
 
592
715
  private async prepareSystemPrompt(
593
716
  systemMessage: TLLMMessageBlock,
594
- params: TLLMPreparedParams
717
+ params: TLLMPreparedParams,
595
718
  ): Promise<string | Array<Anthropic.TextBlockParam>> {
596
719
  let systemPrompt = systemMessage?.content;
597
720
 
@@ -622,9 +745,10 @@ export class AnthropicConnector extends LLMConnector {
622
745
  */
623
746
  private async shouldUseThinkingMode(params: TLLMPreparedParams): Promise<boolean> {
624
747
  // Legacy thinking models always use thinking mode
625
- if (LEGACY_THINKING_MODELS.includes(params.modelEntryName)) {
626
- return true;
627
- }
748
+ // Legacy thinking models retired and replaced with new models
749
+ // if (LEGACY_THINKING_MODELS.includes(params.modelEntryName)) {
750
+ // return true;
751
+ // }
628
752
 
629
753
  // Check if reasoning is explicitly requested and model supports it
630
754
  const useReasoning = params?.useReasoning && params.capabilities?.reasoning === true;
@@ -650,7 +774,7 @@ export class AnthropicConnector extends LLMConnector {
650
774
 
651
775
  private async getImageData(
652
776
  files: BinaryInput[],
653
- agentId: string
777
+ agentId: string,
654
778
  ): Promise<
655
779
  {
656
780
  type: string;
@@ -17,6 +17,7 @@ import {
17
17
  TLLMMessageRole,
18
18
  APIKeySource,
19
19
  TLLMEvent,
20
+ TLLMFinishReason,
20
21
  BedrockCredentials,
21
22
  ILLMRequestFuncParams,
22
23
  TLLMChatResponse,
@@ -53,12 +54,12 @@ export class BedrockConnector extends LLMConnector {
53
54
  }
54
55
 
55
56
  @hookAsync('LLMConnector.request')
56
- protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
57
+ protected async request({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
57
58
  try {
58
59
  logger.debug(`request ${this.name}`, acRequest.candidate);
59
60
  const bedrock = await this.getClient(context);
60
61
  const command = new ConverseCommand(body);
61
- const response: ConverseCommandOutput = await bedrock.send(command);
62
+ const response: ConverseCommandOutput = await bedrock.send(command, { abortSignal });
62
63
 
63
64
  const usage = response.usage;
64
65
  this.reportUsage(usage as any, {
@@ -69,12 +70,12 @@ export class BedrockConnector extends LLMConnector {
69
70
  });
70
71
 
71
72
  const message = response.output?.message;
72
- const finishReason = response.stopReason;
73
+ const finishReason = LLMHelper.normalizeFinishReason(response.stopReason);
73
74
 
74
75
  let toolsData: ToolData[] = [];
75
76
  let useTool = false;
76
77
 
77
- if (finishReason === 'tool_use') {
78
+ if (finishReason === TLLMFinishReason.ToolCalls) {
78
79
  const toolUseBlocks = message?.content?.filter((block) => block?.toolUse) || [];
79
80
 
80
81
  toolsData = toolUseBlocks.map((block, index) => ({
@@ -102,14 +103,14 @@ export class BedrockConnector extends LLMConnector {
102
103
  }
103
104
  }
104
105
  @hookAsync('LLMConnector.streamRequest')
105
- protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
106
+ protected async streamRequest({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<EventEmitter> {
106
107
  const emitter = new EventEmitter();
107
108
 
108
109
  try {
109
110
  logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
110
111
  const bedrock = await this.getClient(context);
111
112
  const command = new ConverseStreamCommand(body);
112
- const response: ConverseStreamCommandOutput = await bedrock.send(command);
113
+ const response: ConverseStreamCommandOutput = await bedrock.send(command, { abortSignal });
113
114
  const stream = response.stream;
114
115
 
115
116
  if (stream) {
@@ -175,14 +176,14 @@ export class BedrockConnector extends LLMConnector {
175
176
 
176
177
  // Handle message completion
177
178
  if (chunk.messageStop) {
178
- const finishReason = chunk.messageStop.stopReason || 'stop';
179
+ const finishReason = LLMHelper.normalizeFinishReason(chunk.messageStop.stopReason);
179
180
 
180
181
  if (currentMessage.toolCalls.length > 0) {
181
182
  emitter.emit(TLLMEvent.ToolInfo, currentMessage.toolCalls);
182
183
  }
183
184
 
184
185
  // Emit interrupted event if finishReason is not 'stop'
185
- if (finishReason !== 'stop' && finishReason !== 'end_turn') {
186
+ if (finishReason !== TLLMFinishReason.Stop) {
186
187
  emitter.emit(TLLMEvent.Interrupted, finishReason);
187
188
  }
188
189