npm - @smythos/sre - Versions diffs - 1.7.42 → 1.8.1 - Mend

@smythos/sre 1.7.42 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/src/subsystems/LLMManager/LLM.service/LLMConnector.ts CHANGED Viewed

@@ -112,6 +112,7 @@ export abstract class LLMConnector extends Connector {
                 const response = await this.request({
                     acRequest: candidate.readRequest,
                     body: preparedParams.body,
+                    abortSignal: preparedParams.abortSignal,
                     context: {
                         modelEntryName: preparedParams.modelEntryName,
                         agentId: preparedParams.agentId,
@@ -137,6 +138,7 @@ export abstract class LLMConnector extends Connector {
                 const requestParams = {
                     acRequest: candidate.readRequest,
                     body: preparedParams.body,
+                    abortSignal: preparedParams.abortSignal,
                     context: {
                         modelEntryName: preparedParams.modelEntryName,
                         agentId: preparedParams.agentId,
@@ -262,15 +264,18 @@ export abstract class LLMConnector extends Connector {
     private async prepareParams(candidate: AccessCandidate, params: TLLMConnectorParams): Promise<TLLMPreparedParams> {
         const modelsProvider: ModelsProviderConnector = ConnectorService.getModelsProviderConnector();
-        // Assign file from the original parameters to avoid overwriting the original constructor
-        const files = params?.files;
-        delete params?.files; // need to remove files to avoid any issues during JSON.stringify() especially when we have large files
+        // Extract files and abortSignal from the original parameters to avoid overwriting the original constructor
+        const { files, abortSignal, ...restParams } = params;
-        const clonedParams = JSON.parse(JSON.stringify(params)); // Avoid mutation of the original params
+        const clonedParams = JSON.parse(JSON.stringify(restParams)); // Avoid mutation of the original params
         // Format the parameters to ensure proper type of values
         const _params: TLLMPreparedParams = this.formatParamValues(clonedParams);
+        // Re-attach non-serializable properties ignored before cloning
+        _params.abortSignal = abortSignal;
+        _params.files = files;
         const model = _params.model;
         const teamId = await this.getTeamId(candidate);
@@ -281,6 +286,15 @@ export abstract class LLMConnector extends Connector {
         const modelProviderCandidate = modelsProvider.requester(candidate);
         const modelInfo: TLLMModel | TCustomLLMModel = await modelProviderCandidate.getModelInfo(model);
+        // If the model entry has an alias, it means this entry forwards to another model.
+        // Usage must be reported against the alias (the actual model being billed),
+        // not the forwarding entry (which may have stale/different pricing).
+        // Guard: skip for custom/enterprise LLMs — they are not billed and should
+        // retain their own entry name (enterprise models use alias only for config inheritance).
+        if (modelInfo?.alias && !(modelInfo as TCustomLLMModel)?.isCustomLLM) {
+            _params.modelEntryName = modelInfo.alias;
+        }
         //if the model has default params make sure to set them if they are not present
         if (modelInfo.params) {
             for (let key in modelInfo.params) {
@@ -307,8 +321,6 @@ export abstract class LLMConnector extends Connector {
         }
         _params.model = await modelProviderCandidate.getModelId(model);
-        // Attach the files again after formatting the parameters
-        _params.files = files;
         const features = modelInfo?.features || [];
@@ -327,6 +339,9 @@ export abstract class LLMConnector extends Connector {
             xai: await this.prepareXAIToolsInfo(_params),
         };
+        // Filter out default and system-specific outputs (e.g., _debug, _error) to isolate custom outputs for structured response
+        _params.structuredOutputs = _params?.outputs?.filter((output) => !output.default && !['_debug', '_error'].includes(output.name)) || [];
         // The input adapter transforms the standardized parameters into the specific format required by the target LLM provider
         _params.agentId = candidate.id;
         const body = await this.reqBodyAdapter(_params);
@@ -461,6 +476,7 @@ export abstract class LLMConnector extends Connector {
             }
             //FIXME: to revisit by Alaa-eddine
+            // TODO: This part is a bit confusing. We send “consistent” messages to the LLM, but they still aren’t truly consistent. For example, we send { role: 'system', content: 'You are a helpful assistant.' }, which isn’t compatible with Google AI. However, we still need to mark it as `system` because we later convert it to `systemInstruction`. We should revisit the architecture later and make the flow simpler and more straightforward.
             if (key === 'messages') {
                 _value = this.getConsistentMessages(_value);
             }

package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts CHANGED Viewed

@@ -1,6 +1,8 @@
 import EventEmitter from 'events';
+import z from 'zod';
 import Anthropic from '@anthropic-ai/sdk';
 import type { MessageStreamEvents } from '@anthropic-ai/sdk/lib/MessageStream';
+import { zodOutputFormat } from '@anthropic-ai/sdk/helpers/zod';
 import { JSON_RESPONSE_INSTRUCTION, BUILT_IN_MODEL_PREFIX } from '@sre/constants';
 import { BinaryInput } from '@sre/helpers/BinaryInput.helper';
@@ -18,6 +20,7 @@ import {
     TAnthropicRequestBody,
     ILLMRequestContext,
     TLLMPreparedParams,
+    TLLMFinishReason,
 } from '@sre/types/LLM.types';
 import { LLMHelper } from '@sre/LLMManager/LLM.helper';
@@ -32,10 +35,17 @@ import { hookAsync } from '@sre/Core/HookService';
 const logger = Logger('AnthropicConnector');
 const PREFILL_TEXT_FOR_JSON_RESPONSE = '{';
-const LEGACY_THINKING_MODELS = ['smythos/claude-3.7-sonnet-thinking', 'claude-3.7-sonnet-thinking'];
+const LEGACY_MODELS = [
+    'claude-4-sonnet',
+    'claude-4-opus',
+    'claude-opus-4-1',
+    'smythos/claude-4-sonnet',
+    'smythos/claude-4-opus',
+    'smythos/claude-opus-4-1',
+];
+const MODELS_SUPPORTING_REASONING_EFFORT = ['claude-opus-4-6', 'claude-opus-4-5', 'smythos/claude-opus-4-6', 'smythos/claude-opus-4-5'];
 // Type aliases
-type AnthropicMessageParams = Anthropic.MessageCreateParamsNonStreaming | Anthropic.Messages.MessageStreamParams;
 type AnthropicStreamEventType = keyof MessageStreamEvents;
 // Event names automatically validated against MessageStreamEvents type
@@ -73,21 +83,21 @@ export class AnthropicConnector extends LLMConnector {
     }
     @hookAsync('LLMConnector.request')
-    protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
+    protected async request({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
         try {
             logger.debug(`request ${this.name}`, acRequest.candidate);
             const anthropic = await this.getClient(context);
-            const result = await anthropic.messages.create(body);
+            const result = await anthropic.messages.create(body, { signal: abortSignal });
             const message: Anthropic.MessageParam = {
                 role: (result?.role || TLLMMessageRole.User) as Anthropic.MessageParam['role'],
                 content: result?.content || '',
             };
-            const stopReason = result?.stop_reason;
+            const finishReason = LLMHelper.normalizeFinishReason(result?.stop_reason);
             let toolsData: ToolData[] = [];
             let useTool = false;
-            if ((stopReason as 'tool_use') === 'tool_use') {
+            if (finishReason === TLLMFinishReason.ToolCalls) {
                 const toolUseContentBlocks = result?.content?.filter((c) => (c.type as 'tool_use') === 'tool_use');
                 if (toolUseContentBlocks?.length === 0) return;
@@ -124,7 +134,7 @@ export class AnthropicConnector extends LLMConnector {
             return {
                 content,
-                finishReason: result?.stop_reason,
+                finishReason,
                 useTool,
                 toolsData,
                 message,
@@ -136,15 +146,49 @@ export class AnthropicConnector extends LLMConnector {
         }
     }
+    /**
+     * Stream request implementation.
+     *
+     * **Error Handling Pattern:**
+     * - Always returns emitters, never throws errors - ensures consistent error handling
+     * - Uses setImmediate for event emission - prevents race conditions where events fire before listeners attach
+     * - Emits End after terminal events (Error, Abort) - ensures cleanup code always runs
+     *
+     * **Why setImmediate?**
+     * Since streamRequest is async, callers must await to get the emitter, creating a timing gap.
+     * setImmediate defers event emission to the next event loop tick, ensuring events fire AFTER
+     * listeners are attached. This prevents race conditions where synchronous event emission
+     * would occur before listeners can be registered.
+     *
+     * @param acRequest - Access request for authorization
+     * @param body - Request body parameters
+     * @param context - LLM request context
+     * @param abortSignal - AbortSignal for cancellation
+     * @returns EventEmitter that emits TLLMEvent events (Data, Content, Error, Abort, End, etc.)
+     */
     @hookAsync('LLMConnector.streamRequest')
-    protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
+    protected async streamRequest({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<EventEmitter> {
+        const emitter = new EventEmitter();
         try {
             logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
-            const emitter = new EventEmitter();
+            // Pre-flight: already aborted before we start — emit Abort immediately.
+            // This is especially important for Anthropic because if we try to start the stream
+            // with an already-aborted signal, the SDK may never emit abort/error, leaving callers hanging.
+            if (abortSignal?.aborted) {
+                const abortError = new DOMException('Request aborted', 'AbortError');
+                setImmediate(() => {
+                    emitter.emit(TLLMEvent.Abort, abortError);
+                    emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
+                });
+                return emitter;
+            }
             const usage_data = [];
             const anthropic = await this.getClient(context);
-            let stream = anthropic.messages.stream(body);
+            let stream = anthropic.messages.stream(body, { signal: abortSignal });
             let toolsData: ToolData[] = [];
             let thinkingBlocks: any[] = []; // To preserve thinking blocks
@@ -160,9 +204,22 @@ export class AnthropicConnector extends LLMConnector {
             });
             stream.on(AnthropicStreamEvent.error, (error) => {
-                //console.log('error', error);
+                logger.debug(`streamRequest ${this.name} stream error`, error);
+                setImmediate(() => {
+                    emitter.emit(TLLMEvent.Error, error);
+                    emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
+                });
+            });
-                emitter.emit(TLLMEvent.Error, error);
+            // Anthropic emits a dedicated abort event; translate it to our Abort signal
+            stream.on(AnthropicStreamEvent.abort, (error) => {
+                logger.debug(`streamRequest ${this.name} stream abort`, error);
+                // Always use DOMException with name 'AbortError' per Web API standards for consistency
+                const abortError = new DOMException('Request aborted', 'AbortError');
+                setImmediate(() => {
+                    emitter.emit(TLLMEvent.Abort, abortError);
+                    emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
+                });
             });
             stream.on(AnthropicStreamEvent.message, (message) => {
@@ -184,8 +241,25 @@ export class AnthropicConnector extends LLMConnector {
                 emitter.emit(TLLMEvent.Thinking, thinking);
             });
+            if (abortSignal) {
+                // Catch mid-flight cancellations even if the Anthropic stream never emits its own abort
+                // (e.g., aborted during setup before stream listeners attach).
+                abortSignal.addEventListener(
+                    'abort',
+                    () => {
+                        logger.debug(`streamRequest ${this.name} abortSignal triggered`, acRequest.candidate);
+                        const abortError = new DOMException('Request aborted', 'AbortError');
+                        setImmediate(() => {
+                            emitter.emit(TLLMEvent.Abort, abortError);
+                            emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
+                        });
+                    },
+                    { once: true },
+                );
+            }
             stream.on(AnthropicStreamEvent.finalMessage, (finalMessage) => {
-                let finishReason = 'stop';
+                let finishReason: TLLMFinishReason = TLLMFinishReason.Stop;
                 // Preserve thinking blocks for subsequent tool interactions
                 thinkingBlocks = finalMessage.content.filter((block) => block.type === 'thinking' || block.type === 'redacted_thinking');
@@ -206,7 +280,7 @@ export class AnthropicConnector extends LLMConnector {
                     emitter.emit(TLLMEvent.ToolInfo, toolsData, thinkingBlocks);
                 } else {
-                    finishReason = finalMessage.stop_reason;
+                    finishReason = LLMHelper.normalizeFinishReason(finalMessage.stop_reason);
                 }
                 if (finalMessage?.usage) {
@@ -221,7 +295,7 @@ export class AnthropicConnector extends LLMConnector {
                     usage_data.push(reportedUsage);
                 }
-                if (finishReason !== 'stop' && finishReason !== 'end_turn') {
+                if (finishReason !== TLLMFinishReason.Stop) {
                     emitter.emit(TLLMEvent.Interrupted, finishReason);
                 }
@@ -233,8 +307,27 @@ export class AnthropicConnector extends LLMConnector {
             return emitter;
         } catch (error: any) {
+            // #region Safety net for aborts that happen while creating the stream (before stream events/listeners exist).
+            const isAbort = error?.name === 'AbortError' || abortSignal?.aborted;
+            if (isAbort) {
+                // Always use DOMException with name 'AbortError' per Web API standards for consistency
+                const abortError = new DOMException('Request aborted', 'AbortError');
+                logger.debug(`streamRequest ${this.name} aborted`, abortError, acRequest.candidate);
+                setImmediate(() => {
+                    emitter.emit(TLLMEvent.Abort, abortError);
+                    emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Abort);
+                });
+                return emitter;
+            }
+            // #endregion Abort error handling
             logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
-            throw error;
+            setImmediate(() => {
+                emitter.emit(TLLMEvent.Error, error);
+                emitter.emit(TLLMEvent.End, [], [], TLLMFinishReason.Error);
+            });
+            return emitter;
         }
     }
@@ -255,7 +348,7 @@ export class AnthropicConnector extends LLMConnector {
     protected reportUsage(
         usage: Anthropic.Messages.Usage & { cache_creation_input_tokens?: number; cache_read_input_tokens?: number },
-        metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string }
+        metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string },
     ) {
         // SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
         const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
@@ -386,7 +479,7 @@ export class AnthropicConnector extends LLMConnector {
             } else if (Array.isArray(message?.content)) {
                 if (Array.isArray(message.content)) {
                     const toolBlocks = message.content.filter(
-                        (item) => typeof item === 'object' && 'type' in item && (item.type === 'tool_use' || item.type === 'tool_result')
+                        (item) => typeof item === 'object' && 'type' in item && (item.type === 'tool_use' || item.type === 'tool_result'),
                     );
                     if (toolBlocks?.length > 0) {
@@ -455,11 +548,26 @@ export class AnthropicConnector extends LLMConnector {
         }
         messages = otherMessages;
-        const responseFormat = params?.responseFormat || '';
-        if (responseFormat === 'json') {
-            body.system = body.system ? `${body.system} ${JSON_RESPONSE_INSTRUCTION}` : JSON_RESPONSE_INSTRUCTION;
+        // For backward compatibility, we keep the prefill text with JSON response instruction for legacy models
+        if (LEGACY_MODELS.includes(params?.modelEntryName)) {
+            const responseFormat = params?.responseFormat || '';
+            if (responseFormat === 'json') {
+                body.system = body.system ? `${body.system} ${JSON_RESPONSE_INSTRUCTION}` : JSON_RESPONSE_INSTRUCTION;
-            messages.push({ role: TLLMMessageRole.Assistant, content: PREFILL_TEXT_FOR_JSON_RESPONSE });
+                messages.push({ role: TLLMMessageRole.Assistant, content: PREFILL_TEXT_FOR_JSON_RESPONSE });
+            }
+        }
+        // For new models, we use the structured output feature
+        else {
+            if (params?.structuredOutputs?.length > 0) {
+                // Note: We only support string type output for our components for now
+                const schemaShape = Object.fromEntries(params?.structuredOutputs?.map((output) => [output.name, z.string()]));
+                const ResponseSchema = z.object(schemaShape);
+                body.output_config = {
+                    format: zodOutputFormat(ResponseSchema),
+                };
+            }
         }
         const hasSystemMessage = LLMHelper.hasSystemMessage(messages);
@@ -475,13 +583,28 @@ export class AnthropicConnector extends LLMConnector {
         }
         //#endregion Prepare system message and add JSON response instruction if needed
-        const isReasoningModel = params?.capabilities?.reasoning;
+        // Temperature and top_p are mutually exclusive for Anthropic API.
+        // Temperature takes precedence. Guard ensures only one is ever set.
+        if (params?.temperature !== undefined && params.temperature >= 0) {
+            body.temperature = params.temperature;
+            delete body.top_p;
+        } else if (params?.topP !== undefined && params.topP >= 0) {
+            body.top_p = params.topP;
+            delete body.temperature;
+        }
-        if (params?.temperature !== undefined && !isReasoningModel) body.temperature = params.temperature;
-        if (params?.topP !== undefined && !isReasoningModel) body.top_p = params.topP;
-        if (params?.topK !== undefined && !isReasoningModel) body.top_k = params.topK;
+        if (params?.topK !== undefined) body.top_k = params.topK;
         if (params?.stopSequences?.length) body.stop_sequences = params.stopSequences;
+        // #region Reasoning effort, only supported by specific models
+        if (params?.reasoningEffort && MODELS_SUPPORTING_REASONING_EFFORT.includes(params.modelEntryName)) {
+            body.output_config = {
+                ...(body.output_config || {}),
+                effort: params.reasoningEffort as Anthropic.OutputConfig['effort'],
+            };
+        }
+        // #endregion Reasoning effort
         // #region Tools
         if (params?.toolsConfig?.tools && params?.toolsConfig?.tools.length > 0) {
             body.tools = params?.toolsConfig?.tools as unknown as Anthropic.Tool[];
@@ -506,13 +629,13 @@ export class AnthropicConnector extends LLMConnector {
         maxThinkingTokens,
         toolChoice = null,
     }: {
-        body: AnthropicMessageParams;
+        body: Anthropic.MessageCreateParamsNonStreaming;
         maxThinkingTokens: number;
         toolChoice?: Anthropic.ToolChoice;
     }): Promise<Anthropic.MessageCreateParamsNonStreaming> {
         // Remove the assistant message with the prefill text for JSON response, it's not supported with thinking
         let messages = body.messages.filter(
-            (message) => !(message?.role === TLLMMessageRole.Assistant && message?.content === PREFILL_TEXT_FOR_JSON_RESPONSE)
+            (message) => !(message?.role === TLLMMessageRole.Assistant && message?.content === PREFILL_TEXT_FOR_JSON_RESPONSE),
         );
         let budget_tokens = Math.min(maxThinkingTokens, body.max_tokens);
@@ -591,7 +714,7 @@ export class AnthropicConnector extends LLMConnector {
     private async prepareSystemPrompt(
         systemMessage: TLLMMessageBlock,
-        params: TLLMPreparedParams
+        params: TLLMPreparedParams,
     ): Promise<string | Array<Anthropic.TextBlockParam>> {
         let systemPrompt = systemMessage?.content;
@@ -622,9 +745,10 @@ export class AnthropicConnector extends LLMConnector {
      */
     private async shouldUseThinkingMode(params: TLLMPreparedParams): Promise<boolean> {
         // Legacy thinking models always use thinking mode
-        if (LEGACY_THINKING_MODELS.includes(params.modelEntryName)) {
-            return true;
-        }
+        // Legacy thinking models retired and replaced with new models
+        // if (LEGACY_THINKING_MODELS.includes(params.modelEntryName)) {
+        //     return true;
+        // }
         // Check if reasoning is explicitly requested and model supports it
         const useReasoning = params?.useReasoning && params.capabilities?.reasoning === true;
@@ -650,7 +774,7 @@ export class AnthropicConnector extends LLMConnector {
     private async getImageData(
         files: BinaryInput[],
-        agentId: string
+        agentId: string,
     ): Promise<
         {
             type: string;

package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts CHANGED Viewed

@@ -17,6 +17,7 @@ import {
     TLLMMessageRole,
     APIKeySource,
     TLLMEvent,
+    TLLMFinishReason,
     BedrockCredentials,
     ILLMRequestFuncParams,
     TLLMChatResponse,
@@ -53,12 +54,12 @@ export class BedrockConnector extends LLMConnector {
     }
     @hookAsync('LLMConnector.request')
-    protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
+    protected async request({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
         try {
             logger.debug(`request ${this.name}`, acRequest.candidate);
             const bedrock = await this.getClient(context);
             const command = new ConverseCommand(body);
-            const response: ConverseCommandOutput = await bedrock.send(command);
+            const response: ConverseCommandOutput = await bedrock.send(command, { abortSignal });
             const usage = response.usage;
             this.reportUsage(usage as any, {
@@ -69,12 +70,12 @@ export class BedrockConnector extends LLMConnector {
             });
             const message = response.output?.message;
-            const finishReason = response.stopReason;
+            const finishReason = LLMHelper.normalizeFinishReason(response.stopReason);
             let toolsData: ToolData[] = [];
             let useTool = false;
-            if (finishReason === 'tool_use') {
+            if (finishReason === TLLMFinishReason.ToolCalls) {
                 const toolUseBlocks = message?.content?.filter((block) => block?.toolUse) || [];
                 toolsData = toolUseBlocks.map((block, index) => ({
@@ -102,14 +103,14 @@ export class BedrockConnector extends LLMConnector {
         }
     }
     @hookAsync('LLMConnector.streamRequest')
-    protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
+    protected async streamRequest({ acRequest, body, context, abortSignal }: ILLMRequestFuncParams): Promise<EventEmitter> {
         const emitter = new EventEmitter();
         try {
             logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
             const bedrock = await this.getClient(context);
             const command = new ConverseStreamCommand(body);
-            const response: ConverseStreamCommandOutput = await bedrock.send(command);
+            const response: ConverseStreamCommandOutput = await bedrock.send(command, { abortSignal });
             const stream = response.stream;
             if (stream) {
@@ -175,14 +176,14 @@ export class BedrockConnector extends LLMConnector {
                         // Handle message completion
                         if (chunk.messageStop) {
-                            const finishReason = chunk.messageStop.stopReason || 'stop';
+                            const finishReason = LLMHelper.normalizeFinishReason(chunk.messageStop.stopReason);
                             if (currentMessage.toolCalls.length > 0) {
                                 emitter.emit(TLLMEvent.ToolInfo, currentMessage.toolCalls);
                             }
                             // Emit interrupted event if finishReason is not 'stop'
-                            if (finishReason !== 'stop' && finishReason !== 'end_turn') {
+                            if (finishReason !== TLLMFinishReason.Stop) {
                                 emitter.emit(TLLMEvent.Interrupted, finishReason);
                             }