@smythos/sre 1.7.42 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG +448 -66
- package/dist/index.js +65 -50
- package/dist/index.js.map +1 -1
- package/dist/types/Components/Async.class.d.ts +11 -5
- package/dist/types/index.d.ts +2 -0
- package/dist/types/subsystems/AgentManager/AgentData.service/connectors/SQLiteAgentDataConnector.class.d.ts +45 -0
- package/dist/types/subsystems/LLMManager/LLM.helper.d.ts +32 -1
- package/dist/types/subsystems/LLMManager/LLM.inference.d.ts +25 -2
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.d.ts +22 -2
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.d.ts +2 -2
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.d.ts +27 -2
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/Groq.class.d.ts +22 -2
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/Ollama.class.d.ts +22 -2
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.d.ts +3 -3
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/OpenAIConnector.class.d.ts +23 -3
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.d.ts +2 -2
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterface.d.ts +2 -2
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.d.ts +2 -2
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/xAI.class.d.ts +3 -3
- package/dist/types/subsystems/MemoryManager/LLMContext.d.ts +10 -3
- package/dist/types/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.d.ts +24 -0
- package/dist/types/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.redaction.helper.d.ts +49 -0
- package/dist/types/types/LLM.types.d.ts +30 -1
- package/package.json +4 -3
- package/src/Components/APICall/OAuth.helper.ts +16 -1
- package/src/Components/APIEndpoint.class.ts +11 -4
- package/src/Components/Async.class.ts +38 -5
- package/src/Components/GenAILLM.class.ts +13 -7
- package/src/Components/ImageGenerator.class.ts +32 -13
- package/src/Components/LLMAssistant.class.ts +3 -1
- package/src/Components/LogicAND.class.ts +13 -0
- package/src/Components/LogicAtLeast.class.ts +18 -0
- package/src/Components/LogicAtMost.class.ts +19 -0
- package/src/Components/LogicOR.class.ts +12 -2
- package/src/Components/LogicXOR.class.ts +11 -0
- package/src/constants.ts +1 -1
- package/src/helpers/Conversation.helper.ts +10 -8
- package/src/index.ts +2 -0
- package/src/index.ts.bak +2 -0
- package/src/subsystems/AgentManager/AgentData.service/connectors/SQLiteAgentDataConnector.class.ts +190 -0
- package/src/subsystems/AgentManager/AgentData.service/index.ts +2 -0
- package/src/subsystems/LLMManager/LLM.helper.ts +117 -1
- package/src/subsystems/LLMManager/LLM.inference.ts +136 -67
- package/src/subsystems/LLMManager/LLM.service/LLMConnector.ts +22 -6
- package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts +157 -33
- package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts +9 -8
- package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +124 -90
- package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +125 -62
- package/src/subsystems/LLMManager/LLM.service/connectors/Ollama.class.ts +168 -76
- package/src/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.ts +18 -8
- package/src/subsystems/LLMManager/LLM.service/connectors/VertexAI.class.ts +8 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/OpenAIConnector.class.ts +50 -8
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +30 -16
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterface.ts +2 -2
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +29 -15
- package/src/subsystems/LLMManager/LLM.service/connectors/xAI.class.ts +10 -8
- package/src/subsystems/MemoryManager/LLMContext.ts +27 -8
- package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.ts +313 -85
- package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.redaction.helper.ts +203 -0
- package/src/types/LLM.types.ts +31 -1
- package/src/types/node-sqlite.d.ts +45 -0
|
@@ -112,6 +112,7 @@ export abstract class LLMConnector extends Connector {
|
|
|
112
112
|
const response = await this.request({
|
|
113
113
|
acRequest: candidate.readRequest,
|
|
114
114
|
body: preparedParams.body,
|
|
115
|
+
abortSignal: preparedParams.abortSignal,
|
|
115
116
|
context: {
|
|
116
117
|
modelEntryName: preparedParams.modelEntryName,
|
|
117
118
|
agentId: preparedParams.agentId,
|
|
@@ -137,6 +138,7 @@ export abstract class LLMConnector extends Connector {
|
|
|
137
138
|
const requestParams = {
|
|
138
139
|
acRequest: candidate.readRequest,
|
|
139
140
|
body: preparedParams.body,
|
|
141
|
+
abortSignal: preparedParams.abortSignal,
|
|
140
142
|
context: {
|
|
141
143
|
modelEntryName: preparedParams.modelEntryName,
|
|
142
144
|
agentId: preparedParams.agentId,
|
|
@@ -262,15 +264,18 @@ export abstract class LLMConnector extends Connector {
|
|
|
262
264
|
|
|
263
265
|
private async prepareParams(candidate: AccessCandidate, params: TLLMConnectorParams): Promise<TLLMPreparedParams> {
|
|
264
266
|
const modelsProvider: ModelsProviderConnector = ConnectorService.getModelsProviderConnector();
|
|
265
|
-
//
|
|
266
|
-
const files = params
|
|
267
|
-
delete params?.files; // need to remove files to avoid any issues during JSON.stringify() especially when we have large files
|
|
267
|
+
// Extract files and abortSignal from the original parameters to avoid overwriting the original constructor
|
|
268
|
+
const { files, abortSignal, ...restParams } = params;
|
|
268
269
|
|
|
269
|
-
const clonedParams = JSON.parse(JSON.stringify(
|
|
270
|
+
const clonedParams = JSON.parse(JSON.stringify(restParams)); // Avoid mutation of the original params
|
|
270
271
|
|
|
271
272
|
// Format the parameters to ensure proper type of values
|
|
272
273
|
const _params: TLLMPreparedParams = this.formatParamValues(clonedParams);
|
|
273
274
|
|
|
275
|
+
// Re-attach non-serializable properties ignored before cloning
|
|
276
|
+
_params.abortSignal = abortSignal;
|
|
277
|
+
_params.files = files;
|
|
278
|
+
|
|
274
279
|
const model = _params.model;
|
|
275
280
|
const teamId = await this.getTeamId(candidate);
|
|
276
281
|
|
|
@@ -281,6 +286,15 @@ export abstract class LLMConnector extends Connector {
|
|
|
281
286
|
const modelProviderCandidate = modelsProvider.requester(candidate);
|
|
282
287
|
const modelInfo: TLLMModel | TCustomLLMModel = await modelProviderCandidate.getModelInfo(model);
|
|
283
288
|
|
|
289
|
+
// If the model entry has an alias, it means this entry forwards to another model.
|
|
290
|
+
// Usage must be reported against the alias (the actual model being billed),
|
|
291
|
+
// not the forwarding entry (which may have stale/different pricing).
|
|
292
|
+
// Guard: skip for custom/enterprise LLMs — they are not billed and should
|
|
293
|
+
// retain their own entry name (enterprise models use alias only for config inheritance).
|
|
294
|
+
if (modelInfo?.alias && !(modelInfo as TCustomLLMModel)?.isCustomLLM) {
|
|
295
|
+
_params.modelEntryName = modelInfo.alias;
|
|
296
|
+
}
|
|
297
|
+
|
|
284
298
|
//if the model has default params make sure to set them if they are not present
|
|
285
299
|
if (modelInfo.params) {
|
|
286
300
|
for (let key in modelInfo.params) {
|
|
@@ -307,8 +321,6 @@ export abstract class LLMConnector extends Connector {
|
|
|
307
321
|
}
|
|
308
322
|
|
|
309
323
|
_params.model = await modelProviderCandidate.getModelId(model);
|
|
310
|
-
// Attach the files again after formatting the parameters
|
|
311
|
-
_params.files = files;
|
|
312
324
|
|
|
313
325
|
const features = modelInfo?.features || [];
|
|
314
326
|
|
|
@@ -327,6 +339,9 @@ export abstract class LLMConnector extends Connector {
|
|
|
327
339
|
xai: await this.prepareXAIToolsInfo(_params),
|
|
328
340
|
};
|
|
329
341
|
|
|
342
|
+
// Filter out default and system-specific outputs (e.g., _debug, _error) to isolate custom outputs for structured response
|
|
343
|
+
_params.structuredOutputs = _params?.outputs?.filter((output) => !output.default && !['_debug', '_error'].includes(output.name)) || [];
|
|
344
|
+
|
|
330
345
|
// The input adapter transforms the standardized parameters into the specific format required by the target LLM provider
|
|
331
346
|
_params.agentId = candidate.id;
|
|
332
347
|
const body = await this.reqBodyAdapter(_params);
|
|
@@ -461,6 +476,7 @@ export abstract class LLMConnector extends Connector {
|
|
|
461
476
|
}
|
|
462
477
|
|
|
463
478
|
//FIXME: to revisit by Alaa-eddine
|
|
479
|
+
// TODO: This part is a bit confusing. We send “consistent” messages to the LLM, but they still aren’t truly consistent. For example, we send { role: 'system', content: 'You are a helpful assistant.' }, which isn’t compatible with Google AI. However, we still need to mark it as `system` because we later convert it to `systemInstruction`. We should revisit the architecture later and make the flow simpler and more straightforward.
|
|
464
480
|
if (key === 'messages') {
|
|
465
481
|
_value = this.getConsistentMessages(_value);
|
|
466
482
|
}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import EventEmitter from 'events';
|
|
2
|
+
import z from 'zod';
|
|
2
3
|
import Anthropic from '@anthropic-ai/sdk';
|
|
3
4
|
import type { MessageStreamEvents } from '@anthropic-ai/sdk/lib/MessageStream';
|
|
5
|
+
import { zodOutputFormat } from '@anthropic-ai/sdk/helpers/zod';
|
|
4
6
|
|
|
5
7
|
import { JSON_RESPONSE_INSTRUCTION, BUILT_IN_MODEL_PREFIX } from '@sre/constants';
|
|
6
8
|
import { BinaryInput } from '@sre/helpers/BinaryInput.helper';
|
|
@@ -18,6 +20,7 @@ import {
|
|
|
18
20
|
TAnthropicRequestBody,
|
|
19
21
|
ILLMRequestContext,
|
|
20
22
|
TLLMPreparedParams,
|
|
23
|
+
TLLMFinishReason,
|
|
21
24
|
} from '@sre/types/LLM.types';
|
|
22
25
|
|
|
23
26
|
import { LLMHelper } from '@sre/LLMManager/LLM.helper';
|
|
@@ -32,10 +35,17 @@ import { hookAsync } from '@sre/Core/HookService';
|
|
|
32
35
|
const logger = Logger('AnthropicConnector');
|
|
33
36
|
|
|
34
37
|
const PREFILL_TEXT_FOR_JSON_RESPONSE = '{';
|
|
35
|
-
const
|
|
38
|
+
const LEGACY_MODELS = [
|
|
39
|
+
'claude-4-sonnet',
|
|
40
|
+
'claude-4-opus',
|
|
41
|
+
'claude-opus-4-1',
|
|
42
|
+
'smythos/claude-4-sonnet',
|
|
43
|
+
'smythos/claude-4-opus',
|
|
44
|
+
'smythos/claude-opus-4-1',
|
|
45
|
+
];
|
|
46
|
+
const MODELS_SUPPORTING_REASONING_EFFORT = ['claude-opus-4-6', 'claude-opus-4-5', 'smythos/claude-opus-4-6', 'smythos/claude-opus-4-5'];
|
|
36
47
|
|
|
37
48
|
// Type aliases
|
|
38
|
-
type AnthropicMessageParams = Anthropic.MessageCreateParamsNonStreaming | Anthropic.Messages.MessageStreamParams;
|
|
39
49
|
type AnthropicStreamEventType = keyof MessageStreamEvents;
|
|
40
50
|
|
|
41
51
|
// Event names automatically validated against MessageStreamEvents type
|
|
@@ -73,21 +83,21 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
73
83
|
}
|
|
74
84
|
|
|
75
85
|
@hookAsync('LLMConnector.request')
|
|
76
|
-
protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
|
|
86
|
+
protected async request({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
|
|
77
87
|
try {
|
|
78
88
|
logger.debug(`request ${this.name}`, acRequest.candidate);
|
|
79
89
|
const anthropic = await this.getClient(context);
|
|
80
|
-
const result = await anthropic.messages.create(body);
|
|
90
|
+
const result = await anthropic.messages.create(body, { signal: abortSignal });
|
|
81
91
|
const message: Anthropic.MessageParam = {
|
|
82
92
|
role: (result?.role || TLLMMessageRole.User) as Anthropic.MessageParam['role'],
|
|
83
93
|
content: result?.content || '',
|
|
84
94
|
};
|
|
85
|
-
const
|
|
95
|
+
const finishReason = LLMHelper.normalizeFinishReason(result?.stop_reason);
|
|
86
96
|
|
|
87
97
|
let toolsData: ToolData[] = [];
|
|
88
98
|
let useTool = false;
|
|
89
99
|
|
|
90
|
-
if (
|
|
100
|
+
if (finishReason === TLLMFinishReason.ToolCalls) {
|
|
91
101
|
const toolUseContentBlocks = result?.content?.filter((c) => (c.type as 'tool_use') === 'tool_use');
|
|
92
102
|
|
|
93
103
|
if (toolUseContentBlocks?.length === 0) return;
|
|
@@ -124,7 +134,7 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
124
134
|
|
|
125
135
|
return {
|
|
126
136
|
content,
|
|
127
|
-
finishReason
|
|
137
|
+
finishReason,
|
|
128
138
|
useTool,
|
|
129
139
|
toolsData,
|
|
130
140
|
message,
|
|
@@ -136,15 +146,49 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
136
146
|
}
|
|
137
147
|
}
|
|
138
148
|
|
|
149
|
+
/**
|
|
150
|
+
* Stream request implementation.
|
|
151
|
+
*
|
|
152
|
+
* **Error Handling Pattern:**
|
|
153
|
+
* - Always returns emitters, never throws errors - ensures consistent error handling
|
|
154
|
+
* - Uses setImmediate for event emission - prevents race conditions where events fire before listeners attach
|
|
155
|
+
* - Emits End after terminal events (Error, Abort) - ensures cleanup code always runs
|
|
156
|
+
*
|
|
157
|
+
* **Why setImmediate?**
|
|
158
|
+
* Since streamRequest is async, callers must await to get the emitter, creating a timing gap.
|
|
159
|
+
* setImmediate defers event emission to the next event loop tick, ensuring events fire AFTER
|
|
160
|
+
* listeners are attached. This prevents race conditions where synchronous event emission
|
|
161
|
+
* would occur before listeners can be registered.
|
|
162
|
+
*
|
|
163
|
+
* @param acRequest - Access request for authorization
|
|
164
|
+
* @param body - Request body parameters
|
|
165
|
+
* @param context - LLM request context
|
|
166
|
+
* @param abortSignal - AbortSignal for cancellation
|
|
167
|
+
* @returns EventEmitter that emits TLLMEvent events (Data, Content, Error, Abort, End, etc.)
|
|
168
|
+
*/
|
|
139
169
|
@hookAsync('LLMConnector.streamRequest')
|
|
140
|
-
protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
|
|
170
|
+
protected async streamRequest({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<EventEmitter> {
|
|
171
|
+
const emitter = new EventEmitter();
|
|
172
|
+
|
|
141
173
|
try {
|
|
142
174
|
logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
|
|
143
|
-
|
|
175
|
+
|
|
176
|
+
// Pre-flight: already aborted before we start — emit Abort immediately.
|
|
177
|
+
// This is especially important for Anthropic because if we try to start the stream
|
|
178
|
+
// with an already-aborted signal, the SDK may never emit abort/error, leaving callers hanging.
|
|
179
|
+
if (abortSignal?.aborted) {
|
|
180
|
+
const abortError = new DOMException('Request aborted', 'AbortError');
|
|
181
|
+
setImmediate(() => {
|
|
182
|
+
emitter.emit(TLLMEvent.Abort, abortError);
|
|
183
|
+
emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
|
|
184
|
+
});
|
|
185
|
+
return emitter;
|
|
186
|
+
}
|
|
187
|
+
|
|
144
188
|
const usage_data = [];
|
|
145
189
|
|
|
146
190
|
const anthropic = await this.getClient(context);
|
|
147
|
-
let stream = anthropic.messages.stream(body);
|
|
191
|
+
let stream = anthropic.messages.stream(body, { signal: abortSignal });
|
|
148
192
|
|
|
149
193
|
let toolsData: ToolData[] = [];
|
|
150
194
|
let thinkingBlocks: any[] = []; // To preserve thinking blocks
|
|
@@ -160,9 +204,22 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
160
204
|
});
|
|
161
205
|
|
|
162
206
|
stream.on(AnthropicStreamEvent.error, (error) => {
|
|
163
|
-
|
|
207
|
+
logger.debug(`streamRequest ${this.name} stream error`, error);
|
|
208
|
+
setImmediate(() => {
|
|
209
|
+
emitter.emit(TLLMEvent.Error, error);
|
|
210
|
+
emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
|
|
211
|
+
});
|
|
212
|
+
});
|
|
164
213
|
|
|
165
|
-
|
|
214
|
+
// Anthropic emits a dedicated abort event; translate it to our Abort signal
|
|
215
|
+
stream.on(AnthropicStreamEvent.abort, (error) => {
|
|
216
|
+
logger.debug(`streamRequest ${this.name} stream abort`, error);
|
|
217
|
+
// Always use DOMException with name 'AbortError' per Web API standards for consistency
|
|
218
|
+
const abortError = new DOMException('Request aborted', 'AbortError');
|
|
219
|
+
setImmediate(() => {
|
|
220
|
+
emitter.emit(TLLMEvent.Abort, abortError);
|
|
221
|
+
emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
|
|
222
|
+
});
|
|
166
223
|
});
|
|
167
224
|
|
|
168
225
|
stream.on(AnthropicStreamEvent.message, (message) => {
|
|
@@ -184,8 +241,25 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
184
241
|
emitter.emit(TLLMEvent.Thinking, thinking);
|
|
185
242
|
});
|
|
186
243
|
|
|
244
|
+
if (abortSignal) {
|
|
245
|
+
// Catch mid-flight cancellations even if the Anthropic stream never emits its own abort
|
|
246
|
+
// (e.g., aborted during setup before stream listeners attach).
|
|
247
|
+
abortSignal.addEventListener(
|
|
248
|
+
'abort',
|
|
249
|
+
() => {
|
|
250
|
+
logger.debug(`streamRequest ${this.name} abortSignal triggered`, acRequest.candidate);
|
|
251
|
+
const abortError = new DOMException('Request aborted', 'AbortError');
|
|
252
|
+
setImmediate(() => {
|
|
253
|
+
emitter.emit(TLLMEvent.Abort, abortError);
|
|
254
|
+
emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
|
|
255
|
+
});
|
|
256
|
+
},
|
|
257
|
+
{ once: true },
|
|
258
|
+
);
|
|
259
|
+
}
|
|
260
|
+
|
|
187
261
|
stream.on(AnthropicStreamEvent.finalMessage, (finalMessage) => {
|
|
188
|
-
let finishReason =
|
|
262
|
+
let finishReason: TLLMFinishReason = TLLMFinishReason.Stop;
|
|
189
263
|
// Preserve thinking blocks for subsequent tool interactions
|
|
190
264
|
thinkingBlocks = finalMessage.content.filter((block) => block.type === 'thinking' || block.type === 'redacted_thinking');
|
|
191
265
|
|
|
@@ -206,7 +280,7 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
206
280
|
|
|
207
281
|
emitter.emit(TLLMEvent.ToolInfo, toolsData, thinkingBlocks);
|
|
208
282
|
} else {
|
|
209
|
-
finishReason = finalMessage.stop_reason;
|
|
283
|
+
finishReason = LLMHelper.normalizeFinishReason(finalMessage.stop_reason);
|
|
210
284
|
}
|
|
211
285
|
|
|
212
286
|
if (finalMessage?.usage) {
|
|
@@ -221,7 +295,7 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
221
295
|
|
|
222
296
|
usage_data.push(reportedUsage);
|
|
223
297
|
}
|
|
224
|
-
if (finishReason !==
|
|
298
|
+
if (finishReason !== TLLMFinishReason.Stop) {
|
|
225
299
|
emitter.emit(TLLMEvent.Interrupted, finishReason);
|
|
226
300
|
}
|
|
227
301
|
|
|
@@ -233,8 +307,27 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
233
307
|
|
|
234
308
|
return emitter;
|
|
235
309
|
} catch (error: any) {
|
|
310
|
+
// #region Safety net for aborts that happen while creating the stream (before stream events/listeners exist).
|
|
311
|
+
const isAbort = error?.name === 'AbortError' || abortSignal?.aborted;
|
|
312
|
+
if (isAbort) {
|
|
313
|
+
// Always use DOMException with name 'AbortError' per Web API standards for consistency
|
|
314
|
+
const abortError = new DOMException('Request aborted', 'AbortError');
|
|
315
|
+
logger.debug(`streamRequest ${this.name} aborted`, abortError, acRequest.candidate);
|
|
316
|
+
setImmediate(() => {
|
|
317
|
+
emitter.emit(TLLMEvent.Abort, abortError);
|
|
318
|
+
emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
|
|
319
|
+
});
|
|
320
|
+
return emitter;
|
|
321
|
+
}
|
|
322
|
+
// #endregion Abort error handling
|
|
323
|
+
|
|
236
324
|
logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
|
|
237
|
-
|
|
325
|
+
setImmediate(() => {
|
|
326
|
+
emitter.emit(TLLMEvent.Error, error);
|
|
327
|
+
emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
return emitter;
|
|
238
331
|
}
|
|
239
332
|
}
|
|
240
333
|
|
|
@@ -255,7 +348,7 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
255
348
|
|
|
256
349
|
protected reportUsage(
|
|
257
350
|
usage: Anthropic.Messages.Usage & { cache_creation_input_tokens?: number; cache_read_input_tokens?: number },
|
|
258
|
-
metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string }
|
|
351
|
+
metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string },
|
|
259
352
|
) {
|
|
260
353
|
// SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
|
|
261
354
|
const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
|
|
@@ -386,7 +479,7 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
386
479
|
} else if (Array.isArray(message?.content)) {
|
|
387
480
|
if (Array.isArray(message.content)) {
|
|
388
481
|
const toolBlocks = message.content.filter(
|
|
389
|
-
(item) => typeof item === 'object' && 'type' in item && (item.type === 'tool_use' || item.type === 'tool_result')
|
|
482
|
+
(item) => typeof item === 'object' && 'type' in item && (item.type === 'tool_use' || item.type === 'tool_result'),
|
|
390
483
|
);
|
|
391
484
|
|
|
392
485
|
if (toolBlocks?.length > 0) {
|
|
@@ -455,11 +548,26 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
455
548
|
}
|
|
456
549
|
messages = otherMessages;
|
|
457
550
|
|
|
458
|
-
|
|
459
|
-
if (
|
|
460
|
-
|
|
551
|
+
// For backward compatibility, we keep the prefill text with JSON response instruction for legacy models
|
|
552
|
+
if (LEGACY_MODELS.includes(params?.modelEntryName)) {
|
|
553
|
+
const responseFormat = params?.responseFormat || '';
|
|
554
|
+
if (responseFormat === 'json') {
|
|
555
|
+
body.system = body.system ? `${body.system} ${JSON_RESPONSE_INSTRUCTION}` : JSON_RESPONSE_INSTRUCTION;
|
|
461
556
|
|
|
462
|
-
|
|
557
|
+
messages.push({ role: TLLMMessageRole.Assistant, content: PREFILL_TEXT_FOR_JSON_RESPONSE });
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
// For new models, we use the structured output feature
|
|
561
|
+
else {
|
|
562
|
+
if (params?.structuredOutputs?.length > 0) {
|
|
563
|
+
// Note: We only support string type output for our components for now
|
|
564
|
+
const schemaShape = Object.fromEntries(params?.structuredOutputs?.map((output) => [output.name, z.string()]));
|
|
565
|
+
const ResponseSchema = z.object(schemaShape);
|
|
566
|
+
|
|
567
|
+
body.output_config = {
|
|
568
|
+
format: zodOutputFormat(ResponseSchema),
|
|
569
|
+
};
|
|
570
|
+
}
|
|
463
571
|
}
|
|
464
572
|
|
|
465
573
|
const hasSystemMessage = LLMHelper.hasSystemMessage(messages);
|
|
@@ -475,13 +583,28 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
475
583
|
}
|
|
476
584
|
//#endregion Prepare system message and add JSON response instruction if needed
|
|
477
585
|
|
|
478
|
-
|
|
586
|
+
// Temperature and top_p are mutually exclusive for Anthropic API.
|
|
587
|
+
// Temperature takes precedence. Guard ensures only one is ever set.
|
|
588
|
+
if (params?.temperature !== undefined && params.temperature >= 0) {
|
|
589
|
+
body.temperature = params.temperature;
|
|
590
|
+
delete body.top_p;
|
|
591
|
+
} else if (params?.topP !== undefined && params.topP >= 0) {
|
|
592
|
+
body.top_p = params.topP;
|
|
593
|
+
delete body.temperature;
|
|
594
|
+
}
|
|
479
595
|
|
|
480
|
-
if (params?.
|
|
481
|
-
if (params?.topP !== undefined && !isReasoningModel) body.top_p = params.topP;
|
|
482
|
-
if (params?.topK !== undefined && !isReasoningModel) body.top_k = params.topK;
|
|
596
|
+
if (params?.topK !== undefined) body.top_k = params.topK;
|
|
483
597
|
if (params?.stopSequences?.length) body.stop_sequences = params.stopSequences;
|
|
484
598
|
|
|
599
|
+
// #region Reasoning effort, only supported by specific models
|
|
600
|
+
if (params?.reasoningEffort && MODELS_SUPPORTING_REASONING_EFFORT.includes(params.modelEntryName)) {
|
|
601
|
+
body.output_config = {
|
|
602
|
+
...(body.output_config || {}),
|
|
603
|
+
effort: params.reasoningEffort as Anthropic.OutputConfig['effort'],
|
|
604
|
+
};
|
|
605
|
+
}
|
|
606
|
+
// #endregion Reasoning effort
|
|
607
|
+
|
|
485
608
|
// #region Tools
|
|
486
609
|
if (params?.toolsConfig?.tools && params?.toolsConfig?.tools.length > 0) {
|
|
487
610
|
body.tools = params?.toolsConfig?.tools as unknown as Anthropic.Tool[];
|
|
@@ -506,13 +629,13 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
506
629
|
maxThinkingTokens,
|
|
507
630
|
toolChoice = null,
|
|
508
631
|
}: {
|
|
509
|
-
body:
|
|
632
|
+
body: Anthropic.MessageCreateParamsNonStreaming;
|
|
510
633
|
maxThinkingTokens: number;
|
|
511
634
|
toolChoice?: Anthropic.ToolChoice;
|
|
512
635
|
}): Promise<Anthropic.MessageCreateParamsNonStreaming> {
|
|
513
636
|
// Remove the assistant message with the prefill text for JSON response, it's not supported with thinking
|
|
514
637
|
let messages = body.messages.filter(
|
|
515
|
-
(message) => !(message?.role === TLLMMessageRole.Assistant && message?.content === PREFILL_TEXT_FOR_JSON_RESPONSE)
|
|
638
|
+
(message) => !(message?.role === TLLMMessageRole.Assistant && message?.content === PREFILL_TEXT_FOR_JSON_RESPONSE),
|
|
516
639
|
);
|
|
517
640
|
|
|
518
641
|
let budget_tokens = Math.min(maxThinkingTokens, body.max_tokens);
|
|
@@ -591,7 +714,7 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
591
714
|
|
|
592
715
|
private async prepareSystemPrompt(
|
|
593
716
|
systemMessage: TLLMMessageBlock,
|
|
594
|
-
params: TLLMPreparedParams
|
|
717
|
+
params: TLLMPreparedParams,
|
|
595
718
|
): Promise<string | Array<Anthropic.TextBlockParam>> {
|
|
596
719
|
let systemPrompt = systemMessage?.content;
|
|
597
720
|
|
|
@@ -622,9 +745,10 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
622
745
|
*/
|
|
623
746
|
private async shouldUseThinkingMode(params: TLLMPreparedParams): Promise<boolean> {
|
|
624
747
|
// Legacy thinking models always use thinking mode
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
748
|
+
// Legacy thinking models retired and replaced with new models
|
|
749
|
+
// if (LEGACY_THINKING_MODELS.includes(params.modelEntryName)) {
|
|
750
|
+
// return true;
|
|
751
|
+
// }
|
|
628
752
|
|
|
629
753
|
// Check if reasoning is explicitly requested and model supports it
|
|
630
754
|
const useReasoning = params?.useReasoning && params.capabilities?.reasoning === true;
|
|
@@ -650,7 +774,7 @@ export class AnthropicConnector extends LLMConnector {
|
|
|
650
774
|
|
|
651
775
|
private async getImageData(
|
|
652
776
|
files: BinaryInput[],
|
|
653
|
-
agentId: string
|
|
777
|
+
agentId: string,
|
|
654
778
|
): Promise<
|
|
655
779
|
{
|
|
656
780
|
type: string;
|
|
@@ -17,6 +17,7 @@ import {
|
|
|
17
17
|
TLLMMessageRole,
|
|
18
18
|
APIKeySource,
|
|
19
19
|
TLLMEvent,
|
|
20
|
+
TLLMFinishReason,
|
|
20
21
|
BedrockCredentials,
|
|
21
22
|
ILLMRequestFuncParams,
|
|
22
23
|
TLLMChatResponse,
|
|
@@ -53,12 +54,12 @@ export class BedrockConnector extends LLMConnector {
|
|
|
53
54
|
}
|
|
54
55
|
|
|
55
56
|
@hookAsync('LLMConnector.request')
|
|
56
|
-
protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
|
|
57
|
+
protected async request({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
|
|
57
58
|
try {
|
|
58
59
|
logger.debug(`request ${this.name}`, acRequest.candidate);
|
|
59
60
|
const bedrock = await this.getClient(context);
|
|
60
61
|
const command = new ConverseCommand(body);
|
|
61
|
-
const response: ConverseCommandOutput = await bedrock.send(command);
|
|
62
|
+
const response: ConverseCommandOutput = await bedrock.send(command, { abortSignal });
|
|
62
63
|
|
|
63
64
|
const usage = response.usage;
|
|
64
65
|
this.reportUsage(usage as any, {
|
|
@@ -69,12 +70,12 @@ export class BedrockConnector extends LLMConnector {
|
|
|
69
70
|
});
|
|
70
71
|
|
|
71
72
|
const message = response.output?.message;
|
|
72
|
-
const finishReason = response.stopReason;
|
|
73
|
+
const finishReason = LLMHelper.normalizeFinishReason(response.stopReason);
|
|
73
74
|
|
|
74
75
|
let toolsData: ToolData[] = [];
|
|
75
76
|
let useTool = false;
|
|
76
77
|
|
|
77
|
-
if (finishReason ===
|
|
78
|
+
if (finishReason === TLLMFinishReason.ToolCalls) {
|
|
78
79
|
const toolUseBlocks = message?.content?.filter((block) => block?.toolUse) || [];
|
|
79
80
|
|
|
80
81
|
toolsData = toolUseBlocks.map((block, index) => ({
|
|
@@ -102,14 +103,14 @@ export class BedrockConnector extends LLMConnector {
|
|
|
102
103
|
}
|
|
103
104
|
}
|
|
104
105
|
@hookAsync('LLMConnector.streamRequest')
|
|
105
|
-
protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
|
|
106
|
+
protected async streamRequest({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<EventEmitter> {
|
|
106
107
|
const emitter = new EventEmitter();
|
|
107
108
|
|
|
108
109
|
try {
|
|
109
110
|
logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
|
|
110
111
|
const bedrock = await this.getClient(context);
|
|
111
112
|
const command = new ConverseStreamCommand(body);
|
|
112
|
-
const response: ConverseStreamCommandOutput = await bedrock.send(command);
|
|
113
|
+
const response: ConverseStreamCommandOutput = await bedrock.send(command, { abortSignal });
|
|
113
114
|
const stream = response.stream;
|
|
114
115
|
|
|
115
116
|
if (stream) {
|
|
@@ -175,14 +176,14 @@ export class BedrockConnector extends LLMConnector {
|
|
|
175
176
|
|
|
176
177
|
// Handle message completion
|
|
177
178
|
if (chunk.messageStop) {
|
|
178
|
-
const finishReason = chunk.messageStop.stopReason
|
|
179
|
+
const finishReason = LLMHelper.normalizeFinishReason(chunk.messageStop.stopReason);
|
|
179
180
|
|
|
180
181
|
if (currentMessage.toolCalls.length > 0) {
|
|
181
182
|
emitter.emit(TLLMEvent.ToolInfo, currentMessage.toolCalls);
|
|
182
183
|
}
|
|
183
184
|
|
|
184
185
|
// Emit interrupted event if finishReason is not 'stop'
|
|
185
|
-
if (finishReason !==
|
|
186
|
+
if (finishReason !== TLLMFinishReason.Stop) {
|
|
186
187
|
emitter.emit(TLLMEvent.Interrupted, finishReason);
|
|
187
188
|
}
|
|
188
189
|
|