npm - @smythos/sre - Versions diffs - 1.7.1 → 1.7.7 - Mend

@smythos/sre 1.7.1 → 1.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/src/subsystems/LLMManager/LLM.inference.ts CHANGED Viewed

@@ -15,7 +15,7 @@ import { IModelsProviderRequest, ModelsProviderConnector } from './ModelsProvide
 const console = Logger('LLMInference');
-type TPromptParams = { query?: string; contextWindow?: any[]; files?: any[]; params: TLLMParams };
+type TPromptParams = { query?: string; contextWindow?: any[]; files?: any[]; params: TLLMParams; onFallback?: (data: any) => void };
 export class LLMInference {
     private model: string | TLLMModel;
@@ -52,12 +52,11 @@ export class LLMInference {
     public static user(candidate: AccessCandidate): any {}
     public get connector(): LLMConnector {
         return this.llmConnector;
     }
-    public async prompt({ query, contextWindow, files, params }: TPromptParams, isInFallback: boolean = false) {
+    public async prompt({ query, contextWindow, files, params, onFallback = () => {} }: TPromptParams, isInFallback: boolean = false) {
         let messages = contextWindow || [];
         if (query) {
@@ -71,6 +70,11 @@ export class LLMInference {
         params.messages = messages;
         params.files = files;
+        // If a fallback model is used, trigger the onFallback callback to notify the caller.
+        if (isInFallback && typeof onFallback === 'function') {
+            onFallback({ model: this.model });
+        }
         try {
             let response: TLLMChatResponse = await this.llmConnector.requester(AccessCandidate.agent(params.agentId)).request(params);
@@ -89,8 +93,9 @@ export class LLMInference {
             // Attempt fallback for custom models (only if not already in fallback)
             if (!isInFallback) {
                 try {
-                    const fallbackResult = await this.executeFallback('prompt', { query, contextWindow, files, params });
+                    const fallbackParams = await this.getSafeFallbackParams(params);
+                    const fallbackResult = await this.executeFallback('prompt', { query, contextWindow, files, params: fallbackParams, onFallback });
                     // If fallback succeeded, return the result
                     if (fallbackResult !== null) {
                         return fallbackResult;
@@ -107,7 +112,7 @@ export class LLMInference {
         }
     }
-    public async promptStream({ query, contextWindow, files, params }: TPromptParams, isInFallback: boolean = false) {
+    public async promptStream({ query, contextWindow, files, params, onFallback = () => {} }: TPromptParams, isInFallback: boolean = false) {
         let messages = contextWindow || [];
         if (query) {
@@ -121,14 +126,26 @@ export class LLMInference {
         params.messages = messages;
         params.files = files;
+        // If a fallback model is used, trigger the onFallback callback to notify the caller.
+        if (isInFallback && typeof onFallback === 'function') {
+            onFallback({ model: this.model });
+        }
         try {
             return await this.llmConnector.user(AccessCandidate.agent(params.agentId)).streamRequest(params);
         } catch (error) {
             // Attempt fallback for custom models (only if not already in fallback)
             if (!isInFallback) {
                 try {
-                    const fallbackResult = await this.executeFallback('promptStream', { query, contextWindow, files, params });
+                    const fallbackParams = await this.getSafeFallbackParams(params);
+                    const fallbackResult = await this.executeFallback('promptStream', {
+                        query,
+                        contextWindow,
+                        files,
+                        params: fallbackParams,
+                        onFallback,
+                    });
                     // If fallback succeeded, return the result
                     if (fallbackResult !== null) {
                         return fallbackResult;
@@ -151,38 +168,80 @@ export class LLMInference {
         }
     }
+    /**
+     * Creates a safe, minimal set of parameters when switching to a fallback LLM provider.
+     *
+     * **Why this exists:**
+     * Model settings persist in the component's configuration data, even when you switch models.
+     * This can cause issues when fallback models run with settings the user can't see or track.
+     *
+     * **Real-world scenario:**
+     * 1. User configures a GPT-5 model and sets `reasoning_effort: "high"`
+     * 2. This setting gets saved to the component's configuration
+     * 3. User switches to a custom model (e.g., for cost savings)
+     * 4. The UI now shows custom model options - GPT-5 options are hidden
+     * 5. **BUT**: `reasoning_effort: "high"` is STILL in the config data!
+     * 6. Custom model has GPT-5 as its fallback
+     * 7. Primary custom model fails → automatically switches to GPT-5 fallback
+     * 8. GPT-5 fallback runs with the hidden `reasoning_effort: "high"` setting
+     * 9. `reasoning_effort: "high"` requires a high `max_tokens` value
+     * 10. If `max_tokens` is too low → the request fails
+     *
+     * **The impact:**
+     * Users can't track response quality properly because they don't know what configuration
+     * the fallback model is using. The UI doesn't show fallback model settings, so users have
+     * no visibility into how responses are being generated.
+     *
+     * **What this function does:**
+     * Strips out provider-specific settings when falling back, using only universal parameters.
+     * This ensures predictable behavior. (Note: A more robust solution would be showing fallback
+     * configuration in the UI, but for now this handles it at the parameter level.)
+     *
+     * @param params - The full set of LLM parameters from the original request
+     * @returns A filtered parameter object with only provider-agnostic, safe parameters
+     */
+    private async getSafeFallbackParams(params: TLLMParams): Promise<TLLMParams> {
+        const fallbackParams = {
+            agentId: params.agentId,
+            model: params.model,
+            maxContextWindowLength: params.maxContextWindowLength,
+            maxTokens: params.maxTokens,
+            messages: params.messages,
+            passthrough: params.passthrough,
+            useContextWindow: params.useContextWindow,
+        };
+        return fallbackParams;
+    }
     /**
      * Executes fallback logic for custom models when the primary model fails.
      * This method checks if a fallback model is configured and invokes the appropriate LLM method.
      * Prevents infinite loops by passing a flag to indicate we're in a fallback attempt.
-     *
+     *
      * @param methodName - The name of the method being called ('prompt' or 'promptStream')
      * @param args - The original arguments passed to the method
      * @returns The result from the fallback execution, or null if fallback should not be attempted
      */
-    private async executeFallback(
-        methodName: 'prompt' | 'promptStream',
-        args: TPromptParams
-    ): Promise<any> {
+    private async executeFallback(methodName: 'prompt' | 'promptStream', args: TPromptParams): Promise<any> {
         const isCustomModel = await this.modelProviderReq.isUserCustomLLM(this.model);
         const fallbackModel = await this.modelProviderReq.getFallbackLLM(this.model);
         // Only execute fallback if it's a custom model with a configured fallback
         if (!isCustomModel || !fallbackModel) {
             return null;
         }
         console.info(`Attempting fallback from ${this.model} to ${fallbackModel}`);
         // Mutate the model and connector to use fallback
         this.model = fallbackModel;
         const llmProvider = await this.modelProviderReq.getProvider(fallbackModel);
         if (llmProvider) {
             this.llmConnector = ConnectorService.getLLMConnector(llmProvider);
         }
         // Call the appropriate method with isInFallback=true to prevent further fallbacks
         if (methodName === 'prompt') {
             return await this.prompt(args, true);

package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts CHANGED Viewed

@@ -389,9 +389,10 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
      * Upload files to storage
      */
     private async uploadFiles(files: BinaryInput[], agentId: string): Promise<BinaryInput[]> {
+        const candidate = AccessCandidate.agent(agentId);
         const promises = files.map((file) => {
-            const binaryInput = BinaryInput.from(file);
-            return binaryInput.upload(AccessCandidate.agent(agentId)).then(() => binaryInput);
+            const binaryInput = BinaryInput.from(file, null, null, candidate);
+            return binaryInput.upload(candidate).then(() => binaryInput);
         });
         return Promise.all(promises);

package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts CHANGED Viewed

@@ -270,12 +270,12 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
         // Emit interrupted event if finishReason is not 'stop'
         if (finishReason !== 'stop') {
-            emitter.emit('interrupted', finishReason);
+            emitter.emit(TLLMEvent.Interrupted, finishReason);
         }
         // Emit end event with setImmediate to ensure proper event ordering
         setImmediate(() => {
-            emitter.emit('end', toolsData, reportedUsage, finishReason);
+            emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
         });
     }

package/src/subsystems/LLMManager/ModelsProvider.service/ModelsProviderConnector.ts CHANGED Viewed

@@ -193,11 +193,11 @@ export abstract class ModelsProviderConnector extends SecureConnector {
             modelInfo = models?.[model as string];
         }
+        // TODO: We will clean up `keyOptions` in the future but keep it for legacy users.
         const aliasKeyOptions = aliasModelInfo && hasAPIKey ? aliasModelInfo?.keyOptions : null;
         const modelKeyOptions = modelInfo?.keyOptions || aliasKeyOptions;
-        return { ...aliasModelInfo, ...modelInfo, ...aliasKeyOptions, ...modelKeyOptions, modelId };
+        return { ...modelInfo, ...aliasModelInfo, ...aliasKeyOptions, ...modelKeyOptions, modelId };
     }
     protected async getModelId(acRequest: AccessRequest, models: TLLMModelsList, model: string | TLLMModel | TCustomLLMModel): Promise<string> {

package/src/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.ts CHANGED Viewed

@@ -170,11 +170,13 @@ export class JSONModelsProvider extends ModelsProviderConnector {
                 }
             } else if (typeof modelData === 'object' && !Array.isArray(modelData)) {
                 // Object of models case
+                let models = '';
                 for (const [modelId, model] of Object.entries(modelData)) {
                     try {
                         if (this.isValidSingleModel(model)) {
                             validModels[modelId] = model as TLLMModel;
-                            console.debug(`Loaded model: ${modelId}`);
+                            //console.debug(`Loaded model: ${modelId}`);
+                            models += `${modelId} `;
                         } else {
                             console.warn(`Invalid model format for model "${modelId}"`);
                         }
@@ -183,6 +185,7 @@ export class JSONModelsProvider extends ModelsProviderConnector {
                         // Continue processing other models instead of failing the whole file
                     }
                 }
+                console.debug(`Loaded models: ${models}`);
             } else {
                 console.warn(`Invalid format (not a model or object of models)`);
             }

package/src/subsystems/MemoryManager/Cache.service/connectors/RedisCache.class.ts CHANGED Viewed

@@ -111,6 +111,18 @@ export class RedisCache extends CacheConnector {
     @SecureConnector.AccessControl
     public async setMetadata(acRequest: AccessRequest, key: string, metadata: CacheMetadata): Promise<void> {
+        if (metadata.acl) {
+            //preserve the ownership of the metadata
+            const newACL = ACL.from(metadata.acl).addAccess(acRequest.candidate.role, acRequest.candidate.id, TAccessLevel.Owner).ACL;
+            metadata.acl = newACL;
+        }
+        //no ACL present ==> preserve the existing ACL
+        if (!metadata.acl) {
+            const curACL = await this.getACL(acRequest, key);
+            metadata.acl = curACL;
+        }
         await this.setMetadataWithTTL(acRequest, key, metadata);
     }
     private async setMetadataWithTTL(acRequest: AccessRequest, key: string, metadata: CacheMetadata, ttl?: number): Promise<void> {

package/src/types/LLM.types.ts CHANGED Viewed

@@ -87,52 +87,65 @@ export type TToolsInfo = {
 export type TSearchContextSize = 'low' | 'medium' | 'high';
-export type TLLMParams = {
-    model: TLLMModel | string;
-    prompt?: string;
-    messages?: any[]; // TODO [Forhad]: apply proper typing
-    temperature?: number;
-    maxTokens?: number;
-    stopSequences?: string[];
-    topP?: number;
-    topK?: number;
-    frequencyPenalty?: number;
-    presencePenalty?: number;
-    responseFormat?: any; // TODO [Forhad]: apply proper typing
-    modelInfo?: TCustomLLMModel;
-    files?: BinaryInput[];
+type TLLMToolConfig = {
     toolsConfig?: {
         tools?: OpenAI.ChatCompletionTool[] | OpenAI.Responses.Tool[] | OpenAI.Responses.WebSearchTool[];
         tool_choice?: TLLMToolChoice;
     };
-    baseURL?: string;
-    size?: OpenAI.Images.ImageGenerateParams['size'] | OpenAI.Images.ImageEditParams['size']; // for image generation and image editing
-    quality?: 'standard' | 'hd'; // for image generation
-    n?: number; // for image generation
-    style?: 'vivid' | 'natural'; // for image generation
+};
-    cache?: boolean;
-    agentId?: string;
-    teamId?: string;
+type TLLMThinkingConfig = {
     thinking?: {
         // for Anthropic
         type: 'enabled' | 'disabled';
         budget_tokens: number;
     };
     maxThinkingTokens?: number;
+};
+type TLLMReasoningConfig = {
+    useReasoning?: boolean;
+    /**
+     * Controls the level of effort the model will put into reasoning
+     * For GPT-OSS models (20B, 120B): "low" | "medium" | "high"
+     * For Qwen 3 32B: "none" | "default"
+     */
+    reasoningEffort?: 'none' | 'default' | OpenAIReasoningEffort;
+    max_output_tokens?: number;
+    verbosity?: OpenAI.Responses.ResponseCreateParams['text']['verbosity'];
+    abortSignal?: AbortSignal;
+};
-    // #region Search
-    // Web search parameters (will be organized into toolsInfo.webSearch internally)
+type TLLMTextGenConfig = {
+    model: TLLMModel | string;
+    prompt?: string;
+    messages?: any[]; // TODO [Forhad]: apply proper typing
+    temperature?: number;
+    maxTokens?: number;
+    stopSequences?: string[];
+    topP?: number;
+    topK?: number;
+    frequencyPenalty?: number;
+    presencePenalty?: number;
+    responseFormat?: any; // TODO [Forhad]: apply proper typing
+} & TLLMToolConfig &
+    TLLMThinkingConfig &
+    TLLMReasoningConfig;
+// OpenAI specific web search parameters
+type TLLMWebSearchConfig = {
     useWebSearch?: boolean;
     webSearchContextSize?: TSearchContextSize;
     webSearchCity?: string;
     webSearchCountry?: string;
     webSearchRegion?: string;
     webSearchTimezone?: string;
+};
-    // xAI specific search parameters (consider moving to toolsInfo.xaiSearch)
+// xAI specific search parameters
+type TLLMSearchConfig = {
     useSearch?: boolean;
     searchMode?: 'auto' | 'on' | 'off';
     returnCitations?: boolean;
@@ -149,20 +162,33 @@ export type TLLMParams = {
     safeSearch?: boolean;
     fromDate?: string;
     toDate?: string;
-    // #endregion
+} & TLLMWebSearchConfig;
-    useReasoning?: boolean;
-    /**
-     * Controls the level of effort the model will put into reasoning
-     * For GPT-OSS models (20B, 120B): "low" | "medium" | "high"
-     * For Qwen 3 32B: "none" | "default"
-     */
-    reasoningEffort?: 'none' | 'default' | OpenAIReasoningEffort;
-    max_output_tokens?: number;
-    verbosity?: OpenAI.Responses.ResponseCreateParams['text']['verbosity'];
-    abortSignal?: AbortSignal;
+type TLLMMiscConfig = {
+    maxContextWindowLength?: number;
+    useContextWindow?: boolean;
+    passthrough?: boolean;
 };
+type TLLMRuntimeContext = {
+    modelInfo?: TCustomLLMModel;
+    files?: BinaryInput[];
+    baseURL?: string;
+    cache?: boolean;
+    agentId?: string;
+    teamId?: string;
+};
+type TLLMImageGenConfig = {
+    size?: OpenAI.Images.ImageGenerateParams['size'] | OpenAI.Images.ImageEditParams['size']; // for image generation and image editing
+    quality?: 'standard' | 'hd'; // for image generation
+    n?: number; // for image generation
+    style?: 'vivid' | 'natural'; // for image generation
+};
+export type TLLMParams = TLLMTextGenConfig & TLLMSearchConfig & TLLMImageGenConfig & TLLMMiscConfig & TLLMRuntimeContext;
 export type TLLMPreparedParams = TLLMParams & {
     body: any;
     modelEntryName?: string; // for usage reporting
@@ -471,6 +497,8 @@ export enum TLLMEvent {
     Usage = 'usage',
     /** Interrupted : emitted when the response is interrupted before completion */
     Interrupted = 'interrupted',
+    /** Fallback : emitted when the response is using a fallback model */
+    Fallback = 'fallback',
 }
 export interface ILLMRequestContext {

package/src/types/VectorDB.types.ts CHANGED Viewed

@@ -3,17 +3,19 @@ export type VectorDBMetadata = {
     datasourceId: string;
     datasourceLabel: string;
     acl: string;
-    user_metadata?: string;
+    user_metadata?: string | Record<string, any>;
     text?: string;
 };
-export type VectorsResultData = {
+export type VectorDBResult = {
     id: string;
     score?: number;
     values: number[];
     text: string;
     metadata?: Record<string, any>;
-}[];
+};
+export type VectorsResultData = VectorDBResult[];
 export interface NsKnownMetadata {
     isOnCustomStorage?: boolean;
@@ -58,6 +60,7 @@ export interface IStorageVectorDataSource {
     metadata: string;
     text?: string;
     vectorIds: string[];
+    vectorInfo?: VectorDBResult[];
     id: string;
     candidateId: string;
     candidateRole: string;
@@ -83,4 +86,5 @@ export interface DatasourceDto {
     chunkOverlap?: number;
     label?: string;
     id?: string;
+    returnFullVectorInfo?: boolean;
 }