@smythos/sre 1.7.1 → 1.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/dist/index.js +33 -31
  2. package/dist/index.js.map +1 -1
  3. package/dist/types/Core/ConnectorsService.d.ts +2 -1
  4. package/dist/types/Core/SmythRuntime.class.d.ts +1 -0
  5. package/dist/types/helpers/BinaryInput.helper.d.ts +1 -1
  6. package/dist/types/helpers/LocalCache.helper.d.ts +18 -0
  7. package/dist/types/helpers/TemplateString.helper.d.ts +2 -1
  8. package/dist/types/subsystems/IO/VectorDB.service/VectorDBConnector.d.ts +4 -4
  9. package/dist/types/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.d.ts +2 -2
  10. package/dist/types/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.d.ts +2 -2
  11. package/dist/types/subsystems/IO/VectorDB.service/connectors/RAMVecrtorDB.class.d.ts +2 -2
  12. package/dist/types/subsystems/IO/VectorDB.service/embed/BaseEmbedding.d.ts +16 -9
  13. package/dist/types/subsystems/IO/VectorDB.service/embed/index.d.ts +4 -1
  14. package/dist/types/subsystems/LLMManager/LLM.inference.d.ts +36 -2
  15. package/dist/types/types/LLM.types.d.ts +54 -31
  16. package/dist/types/types/VectorDB.types.d.ts +6 -3
  17. package/dist/types/utils/string.utils.d.ts +0 -4
  18. package/package.json +1 -1
  19. package/src/Components/Classifier.class.ts +8 -2
  20. package/src/Components/GenAILLM.class.ts +11 -7
  21. package/src/Components/LLMAssistant.class.ts +12 -3
  22. package/src/Components/ScrapflyWebScrape.class.ts +8 -1
  23. package/src/Components/TavilyWebSearch.class.ts +4 -1
  24. package/src/Core/ConnectorsService.ts +12 -2
  25. package/src/Core/SmythRuntime.class.ts +32 -17
  26. package/src/helpers/BinaryInput.helper.ts +8 -8
  27. package/src/helpers/Conversation.helper.ts +11 -1
  28. package/src/helpers/LocalCache.helper.ts +18 -0
  29. package/src/helpers/TemplateString.helper.ts +20 -9
  30. package/src/index.ts +208 -208
  31. package/src/index.ts.bak +208 -208
  32. package/src/subsystems/AgentManager/Agent.class.ts +2 -0
  33. package/src/subsystems/AgentManager/AgentData.service/AgentDataConnector.ts +6 -5
  34. package/src/subsystems/AgentManager/AgentLogger.class.ts +1 -1
  35. package/src/subsystems/IO/VectorDB.service/VectorDBConnector.ts +15 -4
  36. package/src/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.ts +31 -10
  37. package/src/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.ts +27 -10
  38. package/src/subsystems/IO/VectorDB.service/connectors/RAMVecrtorDB.class.ts +25 -9
  39. package/src/subsystems/IO/VectorDB.service/embed/BaseEmbedding.ts +182 -12
  40. package/src/subsystems/IO/VectorDB.service/embed/GoogleEmbedding.ts +1 -1
  41. package/src/subsystems/IO/VectorDB.service/embed/OpenAIEmbedding.ts +1 -1
  42. package/src/subsystems/IO/VectorDB.service/embed/index.ts +12 -2
  43. package/src/subsystems/LLMManager/LLM.inference.ts +76 -17
  44. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +3 -2
  45. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +2 -2
  46. package/src/subsystems/LLMManager/ModelsProvider.service/ModelsProviderConnector.ts +2 -2
  47. package/src/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.ts +4 -1
  48. package/src/subsystems/MemoryManager/Cache.service/connectors/RedisCache.class.ts +12 -0
  49. package/src/types/LLM.types.ts +66 -38
  50. package/src/types/VectorDB.types.ts +7 -3
  51. package/src/utils/string.utils.ts +193 -191
  52. package/dist/bundle-analysis-lazy.html +0 -4949
  53. package/dist/bundle-analysis.html +0 -4949
@@ -15,7 +15,7 @@ import { IModelsProviderRequest, ModelsProviderConnector } from './ModelsProvide
15
15
 
16
16
  const console = Logger('LLMInference');
17
17
 
18
- type TPromptParams = { query?: string; contextWindow?: any[]; files?: any[]; params: TLLMParams };
18
+ type TPromptParams = { query?: string; contextWindow?: any[]; files?: any[]; params: TLLMParams; onFallback?: (data: any) => void };
19
19
 
20
20
  export class LLMInference {
21
21
  private model: string | TLLMModel;
@@ -52,12 +52,11 @@ export class LLMInference {
52
52
 
53
53
  public static user(candidate: AccessCandidate): any {}
54
54
 
55
-
56
55
  public get connector(): LLMConnector {
57
56
  return this.llmConnector;
58
57
  }
59
58
 
60
- public async prompt({ query, contextWindow, files, params }: TPromptParams, isInFallback: boolean = false) {
59
+ public async prompt({ query, contextWindow, files, params, onFallback = () => {} }: TPromptParams, isInFallback: boolean = false) {
61
60
  let messages = contextWindow || [];
62
61
 
63
62
  if (query) {
@@ -71,6 +70,11 @@ export class LLMInference {
71
70
  params.messages = messages;
72
71
  params.files = files;
73
72
 
73
+ // If a fallback model is used, trigger the onFallback callback to notify the caller.
74
+ if (isInFallback && typeof onFallback === 'function') {
75
+ onFallback({ model: this.model });
76
+ }
77
+
74
78
  try {
75
79
  let response: TLLMChatResponse = await this.llmConnector.requester(AccessCandidate.agent(params.agentId)).request(params);
76
80
 
@@ -89,8 +93,9 @@ export class LLMInference {
89
93
  // Attempt fallback for custom models (only if not already in fallback)
90
94
  if (!isInFallback) {
91
95
  try {
92
- const fallbackResult = await this.executeFallback('prompt', { query, contextWindow, files, params });
93
-
96
+ const fallbackParams = await this.getSafeFallbackParams(params);
97
+ const fallbackResult = await this.executeFallback('prompt', { query, contextWindow, files, params: fallbackParams, onFallback });
98
+
94
99
  // If fallback succeeded, return the result
95
100
  if (fallbackResult !== null) {
96
101
  return fallbackResult;
@@ -107,7 +112,7 @@ export class LLMInference {
107
112
  }
108
113
  }
109
114
 
110
- public async promptStream({ query, contextWindow, files, params }: TPromptParams, isInFallback: boolean = false) {
115
+ public async promptStream({ query, contextWindow, files, params, onFallback = () => {} }: TPromptParams, isInFallback: boolean = false) {
111
116
  let messages = contextWindow || [];
112
117
 
113
118
  if (query) {
@@ -121,14 +126,26 @@ export class LLMInference {
121
126
  params.messages = messages;
122
127
  params.files = files;
123
128
 
129
+ // If a fallback model is used, trigger the onFallback callback to notify the caller.
130
+ if (isInFallback && typeof onFallback === 'function') {
131
+ onFallback({ model: this.model });
132
+ }
133
+
124
134
  try {
125
135
  return await this.llmConnector.user(AccessCandidate.agent(params.agentId)).streamRequest(params);
126
136
  } catch (error) {
127
137
  // Attempt fallback for custom models (only if not already in fallback)
128
138
  if (!isInFallback) {
129
139
  try {
130
- const fallbackResult = await this.executeFallback('promptStream', { query, contextWindow, files, params });
131
-
140
+ const fallbackParams = await this.getSafeFallbackParams(params);
141
+ const fallbackResult = await this.executeFallback('promptStream', {
142
+ query,
143
+ contextWindow,
144
+ files,
145
+ params: fallbackParams,
146
+ onFallback,
147
+ });
148
+
132
149
  // If fallback succeeded, return the result
133
150
  if (fallbackResult !== null) {
134
151
  return fallbackResult;
@@ -151,38 +168,80 @@ export class LLMInference {
151
168
  }
152
169
  }
153
170
 
171
+ /**
172
+ * Creates a safe, minimal set of parameters when switching to a fallback LLM provider.
173
+ *
174
+ * **Why this exists:**
175
+ * Model settings persist in the component's configuration data, even when you switch models.
176
+ * This can cause issues when fallback models run with settings the user can't see or track.
177
+ *
178
+ * **Real-world scenario:**
179
+ * 1. User configures a GPT-5 model and sets `reasoning_effort: "high"`
180
+ * 2. This setting gets saved to the component's configuration
181
+ * 3. User switches to a custom model (e.g., for cost savings)
182
+ * 4. The UI now shows custom model options - GPT-5 options are hidden
183
+ * 5. **BUT**: `reasoning_effort: "high"` is STILL in the config data!
184
+ * 6. Custom model has GPT-5 as its fallback
185
+ * 7. Primary custom model fails → automatically switches to GPT-5 fallback
186
+ * 8. GPT-5 fallback runs with the hidden `reasoning_effort: "high"` setting
187
+ * 9. `reasoning_effort: "high"` requires a high `max_tokens` value
188
+ * 10. If `max_tokens` is too low → the request fails
189
+ *
190
+ * **The impact:**
191
+ * Users can't track response quality properly because they don't know what configuration
192
+ * the fallback model is using. The UI doesn't show fallback model settings, so users have
193
+ * no visibility into how responses are being generated.
194
+ *
195
+ * **What this function does:**
196
+ * Strips out provider-specific settings when falling back, using only universal parameters.
197
+ * This ensures predictable behavior. (Note: A more robust solution would be showing fallback
198
+ * configuration in the UI, but for now this handles it at the parameter level.)
199
+ *
200
+ * @param params - The full set of LLM parameters from the original request
201
+ * @returns A filtered parameter object with only provider-agnostic, safe parameters
202
+ */
203
+ private async getSafeFallbackParams(params: TLLMParams): Promise<TLLMParams> {
204
+ const fallbackParams = {
205
+ agentId: params.agentId,
206
+ model: params.model,
207
+ maxContextWindowLength: params.maxContextWindowLength,
208
+ maxTokens: params.maxTokens,
209
+ messages: params.messages,
210
+ passthrough: params.passthrough,
211
+ useContextWindow: params.useContextWindow,
212
+ };
213
+
214
+ return fallbackParams;
215
+ }
154
216
 
155
217
  /**
156
218
  * Executes fallback logic for custom models when the primary model fails.
157
219
  * This method checks if a fallback model is configured and invokes the appropriate LLM method.
158
220
  * Prevents infinite loops by passing a flag to indicate we're in a fallback attempt.
159
- *
221
+ *
160
222
  * @param methodName - The name of the method being called ('prompt' or 'promptStream')
161
223
  * @param args - The original arguments passed to the method
162
224
  * @returns The result from the fallback execution, or null if fallback should not be attempted
163
225
  */
164
- private async executeFallback(
165
- methodName: 'prompt' | 'promptStream',
166
- args: TPromptParams
167
- ): Promise<any> {
226
+ private async executeFallback(methodName: 'prompt' | 'promptStream', args: TPromptParams): Promise<any> {
168
227
  const isCustomModel = await this.modelProviderReq.isUserCustomLLM(this.model);
169
228
  const fallbackModel = await this.modelProviderReq.getFallbackLLM(this.model);
170
-
229
+
171
230
  // Only execute fallback if it's a custom model with a configured fallback
172
231
  if (!isCustomModel || !fallbackModel) {
173
232
  return null;
174
233
  }
175
234
 
176
235
  console.info(`Attempting fallback from ${this.model} to ${fallbackModel}`);
177
-
236
+
178
237
  // Mutate the model and connector to use fallback
179
238
  this.model = fallbackModel;
180
-
239
+
181
240
  const llmProvider = await this.modelProviderReq.getProvider(fallbackModel);
182
241
  if (llmProvider) {
183
242
  this.llmConnector = ConnectorService.getLLMConnector(llmProvider);
184
243
  }
185
-
244
+
186
245
  // Call the appropriate method with isInFallback=true to prevent further fallbacks
187
246
  if (methodName === 'prompt') {
188
247
  return await this.prompt(args, true);
@@ -389,9 +389,10 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
389
389
  * Upload files to storage
390
390
  */
391
391
  private async uploadFiles(files: BinaryInput[], agentId: string): Promise<BinaryInput[]> {
392
+ const candidate = AccessCandidate.agent(agentId);
392
393
  const promises = files.map((file) => {
393
- const binaryInput = BinaryInput.from(file);
394
- return binaryInput.upload(AccessCandidate.agent(agentId)).then(() => binaryInput);
394
+ const binaryInput = BinaryInput.from(file, null, null, candidate);
395
+ return binaryInput.upload(candidate).then(() => binaryInput);
395
396
  });
396
397
 
397
398
  return Promise.all(promises);
@@ -270,12 +270,12 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
270
270
 
271
271
  // Emit interrupted event if finishReason is not 'stop'
272
272
  if (finishReason !== 'stop') {
273
- emitter.emit('interrupted', finishReason);
273
+ emitter.emit(TLLMEvent.Interrupted, finishReason);
274
274
  }
275
275
 
276
276
  // Emit end event with setImmediate to ensure proper event ordering
277
277
  setImmediate(() => {
278
- emitter.emit('end', toolsData, reportedUsage, finishReason);
278
+ emitter.emit(TLLMEvent.End, toolsData, reportedUsage, finishReason);
279
279
  });
280
280
  }
281
281
 
@@ -193,11 +193,11 @@ export abstract class ModelsProviderConnector extends SecureConnector {
193
193
  modelInfo = models?.[model as string];
194
194
  }
195
195
 
196
+ // TODO: We will clean up `keyOptions` in the future but keep it for legacy users.
196
197
  const aliasKeyOptions = aliasModelInfo && hasAPIKey ? aliasModelInfo?.keyOptions : null;
197
-
198
198
  const modelKeyOptions = modelInfo?.keyOptions || aliasKeyOptions;
199
199
 
200
- return { ...aliasModelInfo, ...modelInfo, ...aliasKeyOptions, ...modelKeyOptions, modelId };
200
+ return { ...modelInfo, ...aliasModelInfo, ...aliasKeyOptions, ...modelKeyOptions, modelId };
201
201
  }
202
202
 
203
203
  protected async getModelId(acRequest: AccessRequest, models: TLLMModelsList, model: string | TLLMModel | TCustomLLMModel): Promise<string> {
@@ -170,11 +170,13 @@ export class JSONModelsProvider extends ModelsProviderConnector {
170
170
  }
171
171
  } else if (typeof modelData === 'object' && !Array.isArray(modelData)) {
172
172
  // Object of models case
173
+ let models = '';
173
174
  for (const [modelId, model] of Object.entries(modelData)) {
174
175
  try {
175
176
  if (this.isValidSingleModel(model)) {
176
177
  validModels[modelId] = model as TLLMModel;
177
- console.debug(`Loaded model: ${modelId}`);
178
+ //console.debug(`Loaded model: ${modelId}`);
179
+ models += `${modelId} `;
178
180
  } else {
179
181
  console.warn(`Invalid model format for model "${modelId}"`);
180
182
  }
@@ -183,6 +185,7 @@ export class JSONModelsProvider extends ModelsProviderConnector {
183
185
  // Continue processing other models instead of failing the whole file
184
186
  }
185
187
  }
188
+ console.debug(`Loaded models: ${models}`);
186
189
  } else {
187
190
  console.warn(`Invalid format (not a model or object of models)`);
188
191
  }
@@ -111,6 +111,18 @@ export class RedisCache extends CacheConnector {
111
111
 
112
112
  @SecureConnector.AccessControl
113
113
  public async setMetadata(acRequest: AccessRequest, key: string, metadata: CacheMetadata): Promise<void> {
114
+ if (metadata.acl) {
115
+ //preserve the ownership of the metadata
116
+ const newACL = ACL.from(metadata.acl).addAccess(acRequest.candidate.role, acRequest.candidate.id, TAccessLevel.Owner).ACL;
117
+ metadata.acl = newACL;
118
+ }
119
+
120
+ //no ACL present ==> preserve the existing ACL
121
+ if (!metadata.acl) {
122
+ const curACL = await this.getACL(acRequest, key);
123
+ metadata.acl = curACL;
124
+ }
125
+
114
126
  await this.setMetadataWithTTL(acRequest, key, metadata);
115
127
  }
116
128
  private async setMetadataWithTTL(acRequest: AccessRequest, key: string, metadata: CacheMetadata, ttl?: number): Promise<void> {
@@ -87,52 +87,65 @@ export type TToolsInfo = {
87
87
 
88
88
  export type TSearchContextSize = 'low' | 'medium' | 'high';
89
89
 
90
- export type TLLMParams = {
91
- model: TLLMModel | string;
92
-
93
- prompt?: string;
94
- messages?: any[]; // TODO [Forhad]: apply proper typing
95
- temperature?: number;
96
- maxTokens?: number;
97
- stopSequences?: string[];
98
- topP?: number;
99
- topK?: number;
100
- frequencyPenalty?: number;
101
- presencePenalty?: number;
102
- responseFormat?: any; // TODO [Forhad]: apply proper typing
103
- modelInfo?: TCustomLLMModel;
104
- files?: BinaryInput[];
90
+ type TLLMToolConfig = {
105
91
  toolsConfig?: {
106
92
  tools?: OpenAI.ChatCompletionTool[] | OpenAI.Responses.Tool[] | OpenAI.Responses.WebSearchTool[];
107
93
  tool_choice?: TLLMToolChoice;
108
94
  };
109
- baseURL?: string;
110
-
111
- size?: OpenAI.Images.ImageGenerateParams['size'] | OpenAI.Images.ImageEditParams['size']; // for image generation and image editing
112
- quality?: 'standard' | 'hd'; // for image generation
113
- n?: number; // for image generation
114
- style?: 'vivid' | 'natural'; // for image generation
95
+ };
115
96
 
116
- cache?: boolean;
117
- agentId?: string;
118
- teamId?: string;
97
+ type TLLMThinkingConfig = {
119
98
  thinking?: {
120
99
  // for Anthropic
121
100
  type: 'enabled' | 'disabled';
122
101
  budget_tokens: number;
123
102
  };
124
103
  maxThinkingTokens?: number;
104
+ };
105
+
106
+ type TLLMReasoningConfig = {
107
+ useReasoning?: boolean;
108
+
109
+ /**
110
+ * Controls the level of effort the model will put into reasoning
111
+ * For GPT-OSS models (20B, 120B): "low" | "medium" | "high"
112
+ * For Qwen 3 32B: "none" | "default"
113
+ */
114
+ reasoningEffort?: 'none' | 'default' | OpenAIReasoningEffort;
115
+
116
+ max_output_tokens?: number;
117
+ verbosity?: OpenAI.Responses.ResponseCreateParams['text']['verbosity'];
118
+ abortSignal?: AbortSignal;
119
+ };
125
120
 
126
- // #region Search
127
- // Web search parameters (will be organized into toolsInfo.webSearch internally)
121
+ type TLLMTextGenConfig = {
122
+ model: TLLMModel | string;
123
+ prompt?: string;
124
+ messages?: any[]; // TODO [Forhad]: apply proper typing
125
+ temperature?: number;
126
+ maxTokens?: number;
127
+ stopSequences?: string[];
128
+ topP?: number;
129
+ topK?: number;
130
+ frequencyPenalty?: number;
131
+ presencePenalty?: number;
132
+ responseFormat?: any; // TODO [Forhad]: apply proper typing
133
+ } & TLLMToolConfig &
134
+ TLLMThinkingConfig &
135
+ TLLMReasoningConfig;
136
+
137
+ // OpenAI specific web search parameters
138
+ type TLLMWebSearchConfig = {
128
139
  useWebSearch?: boolean;
129
140
  webSearchContextSize?: TSearchContextSize;
130
141
  webSearchCity?: string;
131
142
  webSearchCountry?: string;
132
143
  webSearchRegion?: string;
133
144
  webSearchTimezone?: string;
145
+ };
134
146
 
135
- // xAI specific search parameters (consider moving to toolsInfo.xaiSearch)
147
+ // xAI specific search parameters
148
+ type TLLMSearchConfig = {
136
149
  useSearch?: boolean;
137
150
  searchMode?: 'auto' | 'on' | 'off';
138
151
  returnCitations?: boolean;
@@ -149,20 +162,33 @@ export type TLLMParams = {
149
162
  safeSearch?: boolean;
150
163
  fromDate?: string;
151
164
  toDate?: string;
152
- // #endregion
165
+ } & TLLMWebSearchConfig;
153
166
 
154
- useReasoning?: boolean;
155
- /**
156
- * Controls the level of effort the model will put into reasoning
157
- * For GPT-OSS models (20B, 120B): "low" | "medium" | "high"
158
- * For Qwen 3 32B: "none" | "default"
159
- */
160
- reasoningEffort?: 'none' | 'default' | OpenAIReasoningEffort;
161
- max_output_tokens?: number;
162
- verbosity?: OpenAI.Responses.ResponseCreateParams['text']['verbosity'];
163
- abortSignal?: AbortSignal;
167
+ type TLLMMiscConfig = {
168
+ maxContextWindowLength?: number;
169
+ useContextWindow?: boolean;
170
+ passthrough?: boolean;
164
171
  };
165
172
 
173
+ type TLLMRuntimeContext = {
174
+ modelInfo?: TCustomLLMModel;
175
+ files?: BinaryInput[];
176
+ baseURL?: string;
177
+
178
+ cache?: boolean;
179
+ agentId?: string;
180
+ teamId?: string;
181
+ };
182
+
183
+ type TLLMImageGenConfig = {
184
+ size?: OpenAI.Images.ImageGenerateParams['size'] | OpenAI.Images.ImageEditParams['size']; // for image generation and image editing
185
+ quality?: 'standard' | 'hd'; // for image generation
186
+ n?: number; // for image generation
187
+ style?: 'vivid' | 'natural'; // for image generation
188
+ };
189
+
190
+ export type TLLMParams = TLLMTextGenConfig & TLLMSearchConfig & TLLMImageGenConfig & TLLMMiscConfig & TLLMRuntimeContext;
191
+
166
192
  export type TLLMPreparedParams = TLLMParams & {
167
193
  body: any;
168
194
  modelEntryName?: string; // for usage reporting
@@ -471,6 +497,8 @@ export enum TLLMEvent {
471
497
  Usage = 'usage',
472
498
  /** Interrupted : emitted when the response is interrupted before completion */
473
499
  Interrupted = 'interrupted',
500
+ /** Fallback : emitted when the response is using a fallback model */
501
+ Fallback = 'fallback',
474
502
  }
475
503
 
476
504
  export interface ILLMRequestContext {
@@ -3,17 +3,19 @@ export type VectorDBMetadata = {
3
3
  datasourceId: string;
4
4
  datasourceLabel: string;
5
5
  acl: string;
6
- user_metadata?: string;
6
+ user_metadata?: string | Record<string, any>;
7
7
  text?: string;
8
8
  };
9
9
 
10
- export type VectorsResultData = {
10
+ export type VectorDBResult = {
11
11
  id: string;
12
12
  score?: number;
13
13
  values: number[];
14
14
  text: string;
15
15
  metadata?: Record<string, any>;
16
- }[];
16
+ };
17
+
18
+ export type VectorsResultData = VectorDBResult[];
17
19
 
18
20
  export interface NsKnownMetadata {
19
21
  isOnCustomStorage?: boolean;
@@ -58,6 +60,7 @@ export interface IStorageVectorDataSource {
58
60
  metadata: string;
59
61
  text?: string;
60
62
  vectorIds: string[];
63
+ vectorInfo?: VectorDBResult[];
61
64
  id: string;
62
65
  candidateId: string;
63
66
  candidateRole: string;
@@ -83,4 +86,5 @@ export interface DatasourceDto {
83
86
  chunkOverlap?: number;
84
87
  label?: string;
85
88
  id?: string;
89
+ returnFullVectorInfo?: boolean;
86
90
  }