@smythos/sre 1.7.20 → 1.7.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/index.js +114 -76
  2. package/dist/index.js.map +1 -1
  3. package/dist/types/Components/DataSourceIndexer.class.d.ts +4 -12
  4. package/dist/types/Components/GenAILLM.class.d.ts +5 -5
  5. package/dist/types/Components/index.d.ts +3 -3
  6. package/dist/types/index.d.ts +3 -3
  7. package/dist/types/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.d.ts +1 -0
  8. package/dist/types/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.d.ts +11 -4
  9. package/dist/types/subsystems/IO/VectorDB.service/embed/index.d.ts +5 -0
  10. package/dist/types/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.d.ts +35 -0
  11. package/dist/types/subsystems/Security/Account.service/AccountConnector.d.ts +2 -2
  12. package/dist/types/subsystems/Security/Vault.service/connectors/SecretsManager.class.d.ts +2 -3
  13. package/dist/types/types/VectorDB.types.d.ts +4 -0
  14. package/dist/types/utils/string.utils.d.ts +1 -0
  15. package/package.json +3 -3
  16. package/src/Components/APIEndpoint.class.ts +1 -6
  17. package/src/Components/Component.class.ts +14 -1
  18. package/src/Components/DataSourceIndexer.class.ts +148 -34
  19. package/src/Components/GenAILLM.class.ts +21 -11
  20. package/src/Components/RAG/DataSourceCleaner.class.ts +178 -0
  21. package/src/Components/RAG/DataSourceComponent.class.ts +111 -0
  22. package/src/Components/RAG/DataSourceIndexer.class.ts +254 -0
  23. package/src/Components/{DataSourceLookup.class.ts → RAG/DataSourceLookup.class.ts} +92 -3
  24. package/src/Components/ServerlessCode.class.ts +1 -4
  25. package/src/Components/index.ts +3 -3
  26. package/src/helpers/S3Cache.helper.ts +2 -1
  27. package/src/index.ts +212 -212
  28. package/src/index.ts.bak +212 -212
  29. package/src/subsystems/IO/NKV.service/connectors/NKVRedis.class.ts +3 -1
  30. package/src/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.ts +145 -19
  31. package/src/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.ts +56 -22
  32. package/src/subsystems/IO/VectorDB.service/embed/GoogleEmbedding.ts +1 -0
  33. package/src/subsystems/IO/VectorDB.service/embed/OpenAIEmbedding.ts +2 -1
  34. package/src/subsystems/IO/VectorDB.service/embed/index.ts +18 -0
  35. package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts +35 -10
  36. package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts +12 -4
  37. package/src/subsystems/LLMManager/LLM.service/connectors/Echo.class.ts +4 -4
  38. package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +13 -4
  39. package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +17 -5
  40. package/src/subsystems/LLMManager/LLM.service/connectors/Ollama.class.ts +18 -3
  41. package/src/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.ts +14 -5
  42. package/src/subsystems/LLMManager/LLM.service/connectors/VertexAI.class.ts +6 -4
  43. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +5 -5
  44. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +8 -3
  45. package/src/subsystems/LLMManager/LLM.service/connectors/xAI.class.ts +9 -8
  46. package/src/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.ts +92 -1
  47. package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.ts +32 -6
  48. package/src/subsystems/Security/Account.service/AccountConnector.ts +3 -3
  49. package/src/subsystems/Security/Vault.service/connectors/SecretsManager.class.ts +8 -63
  50. package/src/types/VectorDB.types.ts +4 -0
  51. package/src/utils/array.utils.ts +11 -0
  52. package/src/utils/base64.utils.ts +1 -1
  53. package/src/utils/string.utils.ts +3 -192
  54. package/src/Components/DataSourceCleaner.class.ts +0 -92
@@ -20,8 +20,9 @@ import { OTLPLogExporter } from '@opentelemetry/exporter-logs-otlp-http';
20
20
  import { IAgent } from '@sre/types/Agent.types';
21
21
  import { Conversation } from '@sre/helpers/Conversation.helper';
22
22
  import { TLLMEvent } from '@sre/types/LLM.types';
23
+ import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
23
24
 
24
- const outputLogger = Logger('OTelLog');
25
+ const outputLogger = Logger('OTel');
25
26
 
26
27
  export type OTelLogConfig = {
27
28
  endpoint: string;
@@ -50,7 +51,7 @@ export type OTelLogConfig = {
50
51
  */
51
52
  redactFields?: string[];
52
53
  };
53
-
54
+ const OTEL_DEBUG_LOGS = true;
54
55
  export class OTel extends TelemetryConnector {
55
56
  public name: string = 'OTel';
56
57
  public id: string;
@@ -208,6 +209,8 @@ export class OTel extends TelemetryConnector {
208
209
 
209
210
  const createToolInfoHandler = function (hookContext) {
210
211
  return function (toolInfo: any) {
212
+ const accessCandidate = AccessCandidate.agent(hookContext?.agentId);
213
+ if (OTEL_DEBUG_LOGS) outputLogger.debug('createToolInfoHandler started', accessCandidate);
211
214
  if (!hookContext.curLLMGenSpan || !hookContext.convSpan) return;
212
215
 
213
216
  const modelId = toolInfo.model;
@@ -238,6 +241,7 @@ export class OTel extends TelemetryConnector {
238
241
 
239
242
  hookContext.curLLMGenSpan.end();
240
243
  delete hookContext.curLLMGenSpan;
244
+ if (OTEL_DEBUG_LOGS) outputLogger.debug('createToolInfoHandler completed', accessCandidate);
241
245
  };
242
246
  };
243
247
 
@@ -245,6 +249,8 @@ export class OTel extends TelemetryConnector {
245
249
  return function (data: any, reqInfo: any) {
246
250
  if (!hookContext.convSpan) return;
247
251
  if (hookContext.curLLMGenSpan) return;
252
+ const accessCandidate = AccessCandidate.agent(hookContext?.agentId);
253
+ if (OTEL_DEBUG_LOGS) outputLogger.debug('createDataHandler started', reqInfo?.requestId, accessCandidate);
248
254
 
249
255
  const modelId = reqInfo.model;
250
256
  const contextWindow = reqInfo.contextWindow;
@@ -285,13 +291,15 @@ export class OTel extends TelemetryConnector {
285
291
  'context.preview': JSON.stringify(lastContext).substring(0, 200),
286
292
  });
287
293
  hookContext.curLLMGenSpan = llmGenSpan;
294
+ if (OTEL_DEBUG_LOGS) outputLogger.debug('createDataHandler completed', reqInfo?.requestId, accessCandidate);
288
295
  };
289
296
  };
290
297
 
291
298
  const createRequestedHandler = function (hookContext) {
292
299
  return function (reqInfo: any) {
293
300
  if (!hookContext.convSpan) return;
294
-
301
+ const accessCandidate = AccessCandidate.agent(hookContext?.agentId);
302
+ if (OTEL_DEBUG_LOGS) outputLogger.debug('createRequestedHandler started', reqInfo?.requestId, accessCandidate);
295
303
  if (!hookContext.latencySpans) hookContext.latencySpans = {};
296
304
  const contextWindow = reqInfo.contextWindow;
297
305
 
@@ -317,6 +325,7 @@ export class OTel extends TelemetryConnector {
317
325
  'context.preview': JSON.stringify(lastContext).substring(0, 200),
318
326
  });
319
327
  hookContext.latencySpans[reqInfo.requestId] = llmGenLatencySpan;
328
+ if (OTEL_DEBUG_LOGS) outputLogger.debug('createRequestedHandler completed', reqInfo?.requestId, accessCandidate);
320
329
  };
321
330
  };
322
331
  HookService.register(
@@ -332,6 +341,8 @@ export class OTel extends TelemetryConnector {
332
341
 
333
342
  return;
334
343
  }
344
+ const accessCandidate = AccessCandidate.agent(agentId);
345
+ if (OTEL_DEBUG_LOGS) outputLogger.debug('Conversation.streamPrompt started', { processId, message }, accessCandidate);
335
346
 
336
347
  const modelId = typeof conversation?.model === 'string' ? conversation?.model : conversation?.model?.modelId;
337
348
 
@@ -349,15 +360,16 @@ export class OTel extends TelemetryConnector {
349
360
  },
350
361
  });
351
362
  hookContext.convSpan = convSpan;
363
+ hookContext.agentId = agentId;
364
+ hookContext.processId = processId;
352
365
 
353
366
  hookContext.dataHandler = createDataHandler(hookContext);
354
367
  conversation.on(TLLMEvent.Data, hookContext.dataHandler);
368
+
355
369
  hookContext.requestedHandler = createRequestedHandler(hookContext);
356
370
  conversation.on(TLLMEvent.Requested, hookContext.requestedHandler);
357
- hookContext.agentId = agentId;
358
- hookContext.processId = processId;
359
- hookContext.toolInfoHandler = createToolInfoHandler(hookContext);
360
371
 
372
+ hookContext.toolInfoHandler = createToolInfoHandler(hookContext);
361
373
  conversation.on(TLLMEvent.ToolInfo, hookContext.toolInfoHandler);
362
374
 
363
375
  // Add start event
@@ -409,6 +421,9 @@ export class OTel extends TelemetryConnector {
409
421
  const ctx = OTelContextRegistry.get(agentId, processId);
410
422
  if (!ctx) return;
411
423
 
424
+ const accessCandidate = AccessCandidate.agent(agentId);
425
+ if (OTEL_DEBUG_LOGS) outputLogger.debug('Conversation.streamPrompt completed', { processId }, accessCandidate);
426
+
412
427
  if (hookContext.curLLMGenSpan) {
413
428
  hookContext.curLLMGenSpan.addEvent('llm.gen.content', {
414
429
  'content.size': JSON.stringify(result || {}).length,
@@ -461,6 +476,9 @@ export class OTel extends TelemetryConnector {
461
476
  const teamId = agent.teamId;
462
477
  const _hookContext: any = this.context;
463
478
 
479
+ const accessCandidate = AccessCandidate.agent(agentId);
480
+ if (OTEL_DEBUG_LOGS) outputLogger.debug('SREAgent.process started', { processId, agentProcessId, endpointPath }, accessCandidate);
481
+
464
482
  const body = oTelInstance.prepareComponentData(agentRequest.body || {});
465
483
  const query = oTelInstance.prepareComponentData(agentRequest.query || {});
466
484
  const headers = oTelInstance.prepareComponentData(agentRequest.headers || {});
@@ -537,6 +555,9 @@ export class OTel extends TelemetryConnector {
537
555
 
538
556
  if (!agentSpan) return;
539
557
 
558
+ const accessCandidate = AccessCandidate.agent(agentId);
559
+ if (OTEL_DEBUG_LOGS) outputLogger.debug('SREAgent.process completed', { agentProcessId }, accessCandidate);
560
+
540
561
  if (error) {
541
562
  agentSpan.recordException(error);
542
563
  agentSpan.setStatus({ code: SpanStatusCode.ERROR, message: error.message });
@@ -603,6 +624,8 @@ export class OTel extends TelemetryConnector {
603
624
  const componentType = settings.name;
604
625
  const componentName = settings.displayName || settings.name;
605
626
  const eventId = settings.eventId; // specific event id attached to this component execution
627
+ const accessCandidate = AccessCandidate.agent(agentId);
628
+ if (OTEL_DEBUG_LOGS) outputLogger.debug('Component.process started', { componentId }, accessCandidate);
606
629
 
607
630
  const ctx = OTelContextRegistry.get(agentId, processId);
608
631
  const parentSpan = ctx?.rootSpan;
@@ -677,6 +700,9 @@ export class OTel extends TelemetryConnector {
677
700
  const componentType = settings.name;
678
701
  const componentName = settings.displayName || settings.name;
679
702
 
703
+ const accessCandidate = AccessCandidate.agent(agentId);
704
+ if (OTEL_DEBUG_LOGS) outputLogger.debug('Component.process completed', { componentId }, accessCandidate);
705
+
680
706
  if (error) {
681
707
  // Capture error details
682
708
  span.recordException(error);
@@ -10,7 +10,7 @@ export interface ISmythAccountRequest {
10
10
  getCandidateTeam(): Promise<string | undefined>;
11
11
  getAllTeamSettings(): Promise<KeyValueObject>;
12
12
  getAllUserSettings(): Promise<KeyValueObject>;
13
- getTeamSetting(settingKey: string): Promise<string>;
13
+ getTeamSetting(settingKey: string, group?: string): Promise<string>;
14
14
  getUserSetting(settingKey: string): Promise<string>;
15
15
  getAgentSetting(settingKey: string): Promise<string>;
16
16
  getTeam(): Promise<string>;
@@ -25,7 +25,7 @@ export abstract class AccountConnector extends Connector {
25
25
  getAllUserSettings: async () => this.getAllUserSettings(candidate.readRequest, candidate.id),
26
26
  getUserSetting: async (settingKey: string) => this.getUserSetting(candidate.readRequest, candidate.id, settingKey),
27
27
  getAllTeamSettings: async () => this.getAllTeamSettings(candidate.readRequest, candidate.id),
28
- getTeamSetting: async (settingKey: string) => this.getTeamSetting(candidate.readRequest, candidate.id, settingKey),
28
+ getTeamSetting: async (settingKey: string, group?: string) => this.getTeamSetting(candidate.readRequest, candidate.id, settingKey, group),
29
29
  isTeamMember: async (teamId: string) => this.isTeamMember(teamId, candidate),
30
30
  getCandidateTeam: async () => this.getCandidateTeam(candidate),
31
31
  getTeam: async () => this.getCandidateTeam(candidate),
@@ -38,7 +38,7 @@ export abstract class AccountConnector extends Connector {
38
38
  public abstract getCandidateTeam(candidate: IAccessCandidate): Promise<string | undefined>;
39
39
  public abstract getAllTeamSettings(acRequest: AccessRequest, teamId: string): Promise<KeyValueObject>;
40
40
  public abstract getAllUserSettings(acRequest: AccessRequest, accountId: string): Promise<KeyValueObject>;
41
- public abstract getTeamSetting(acRequest: AccessRequest, teamId: string, settingKey: string): Promise<string>;
41
+ public abstract getTeamSetting(acRequest: AccessRequest, teamId: string, settingKey: string, group?: string): Promise<string>;
42
42
  public abstract getUserSetting(acRequest: AccessRequest, accountId: string, settingKey: string): Promise<string>;
43
43
  public abstract getAgentSetting(acRequest: AccessRequest, agentId: string, settingKey: string): Promise<string>;
44
44
  }
@@ -13,6 +13,7 @@ import {
13
13
  GetSecretValueCommandOutput,
14
14
  } from '@aws-sdk/client-secrets-manager';
15
15
 
16
+ const defaultPrefix = 'smythos';
16
17
  const console = Logger('SecretsManager');
17
18
 
18
19
  export type SecretsManagerConfig = {
@@ -28,8 +29,7 @@ export class SecretsManager extends VaultConnector {
28
29
 
29
30
  constructor(protected _settings: SecretsManagerConfig) {
30
31
  super(_settings);
31
- //if (!SmythRuntime.Instance) throw new Error('SRE not initialized');
32
- this.prefix = _settings.prefix || '';
32
+ this.prefix = _settings.prefix || defaultPrefix;
33
33
  this.secretsManager = new SecretsManagerClient({
34
34
  region: _settings.region,
35
35
  ...(_settings.awsAccessKeyId && _settings.awsSecretAccessKey
@@ -46,11 +46,7 @@ export class SecretsManager extends VaultConnector {
46
46
  try {
47
47
  const accountConnector = ConnectorService.getAccountConnector();
48
48
  const teamId = await accountConnector.getCandidateTeam(acRequest.candidate);
49
- // try fetchting by Id, if not found, try fetching by name
50
49
  let secret = await this.getSecretById(teamId, secretName);
51
- if (!secret) {
52
- secret = await this.getSecretByName(acRequest, secretName);
53
- }
54
50
  return secret;
55
51
  } catch (error) {
56
52
  console.error(error);
@@ -73,51 +69,18 @@ export class SecretsManager extends VaultConnector {
73
69
 
74
70
  do {
75
71
  const listResponse: ListSecretsCommandOutput = await this.secretsManager.send(
76
- new ListSecretsCommand({ NextToken: nextToken, Filters: [{ Key: 'tag-key', Values: ['smyth-vault'] }, { Key: 'name', Values: [this.getVaultKey(teamId, '')] }] })
72
+ new ListSecretsCommand({ NextToken: nextToken, Filters: [{ Key: 'name', Values: [this.getVaultKey(teamId, '')] }] })
77
73
  );
78
74
  if (listResponse.SecretList) {
79
75
  for (const secret of listResponse.SecretList) {
80
76
  if (secret.Name) {
81
- secrets.push({
82
- ARN: secret.ARN,
83
- Name: secret.Name,
84
- CreatedDate: secret.CreatedDate,
85
- });
77
+ secrets.push(this.extractSecretName(secret.Name, teamId, this.prefix));
86
78
  }
87
79
  }
88
80
  }
89
81
  nextToken = listResponse.NextToken;
90
82
  } while (nextToken);
91
-
92
- const $promises = [];
93
- for (const secret of secrets) {
94
- $promises.push(getSpecificSecret(secret, this.secretsManager));
95
- }
96
- const formattedSecrets = await Promise.all($promises);
97
- return formattedSecrets;
98
-
99
- async function getSpecificSecret(secret, secretsManager: SecretsManagerClient) {
100
- const data: GetSecretValueCommandOutput = await secretsManager.send(new GetSecretValueCommand({ SecretId: secret.ARN }));
101
- let secretString = data.SecretString;
102
-
103
- if (secretString) {
104
- try {
105
- let parsedSecret = JSON.parse(secretString);
106
- const secretId = secret.Name?.split('/').pop();
107
- const key = parsedSecret.key;
108
- const value = parsedSecret.value;
109
- const metadata = parsedSecret.metadata;
110
- return {
111
- id: secretId,
112
- key,
113
- value,
114
- metadata,
115
- };
116
- } catch (error) {
117
- }
118
- }
119
- return null;
120
- }
83
+ return secrets;
121
84
  }
122
85
 
123
86
  public async getResourceACL(resourceId: string, candidate: IAccessCandidate) {
@@ -133,17 +96,6 @@ export class SecretsManager extends VaultConnector {
133
96
  return acl;
134
97
  }
135
98
 
136
- private async getSecretByName(acRequest: AccessRequest, secretName: string) {
137
- try {
138
- const secrets = await this.listKeys(acRequest);
139
- const secret = secrets.find((s) => s.key === secretName);
140
- return secret?.value;
141
- } catch (error) {
142
- console.error(error);
143
- return null;
144
- }
145
- }
146
-
147
99
  private getVaultKey(teamId: string, secretName: string) {
148
100
  return `${this.prefix.length ? `${this.prefix}/` : ''}${teamId}/${secretName}`;
149
101
  }
@@ -151,20 +103,13 @@ export class SecretsManager extends VaultConnector {
151
103
  private async getSecretById(teamId: string, secretId: string) {
152
104
  try {
153
105
  const secret: GetSecretValueCommandOutput = await this.secretsManager.send(new GetSecretValueCommand({ SecretId: this.getVaultKey(teamId, secretId) }));
154
- return this.getSecretValue(secret);
106
+ return secret.SecretString;
155
107
  } catch (error) {
156
- console.error(error);
157
108
  return null;
158
109
  }
159
110
  }
160
111
 
161
- private getSecretValue(secret: GetSecretValueCommandOutput) {
162
- try {
163
- const parsedSecret = typeof secret.SecretString === 'string' ? JSON.parse(secret.SecretString) : secret.SecretString;
164
- return parsedSecret.value;
165
- } catch (error) {
166
- return null;
167
- }
168
-
112
+ private extractSecretName(secretKey: string, teamId: string, prefix: string) {
113
+ return secretKey.replace(`${prefix}/${teamId}/`, '');
169
114
  }
170
115
  }
@@ -64,6 +64,10 @@ export interface IStorageVectorDataSource {
64
64
  id: string;
65
65
  candidateId: string;
66
66
  candidateRole: string;
67
+ datasourceSizeMb?: number;
68
+ createdAt?: Date;
69
+ chunkSize?: number;
70
+ chunkOverlap?: number;
67
71
  }
68
72
 
69
73
  export interface IStorageVectorNamespace {
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Utility method to chunk arrays into smaller batches
3
+ */
4
+ export function chunkArr<T>(arr: T[], sizePerChunk: number): T[][] {
5
+ return arr.reduce((chunks, elem, index) => {
6
+ const chunkIndex = Math.floor(index / sizePerChunk);
7
+ const chunk = chunks[chunkIndex] || [];
8
+ chunks[chunkIndex] = chunk.concat([elem]);
9
+ return chunks;
10
+ }, [] as T[][]);
11
+ }
@@ -110,7 +110,7 @@ export function isBase64(str: string): boolean {
110
110
  const cleanedBase64Data = cleanBase64(str);
111
111
 
112
112
  // Sometimes words like 'male' and hashes like md5, sha1, sha256, sha512 are detected as base64
113
- if (cleanedBase64Data.length < 128) return false;
113
+ //if (cleanedBase64Data.length < 128) return false;
114
114
 
115
115
  try {
116
116
  const buffer = Buffer.from(cleanedBase64Data, 'base64');
@@ -222,195 +222,6 @@ export const identifyMimetypeFromString = (input: string) => {
222
222
  return 'text/plain';
223
223
  };
224
224
 
225
- // export function chunkText(
226
- // text: string,
227
- // {
228
- // chunkSize,
229
- // chunkOverlap,
230
- // }: {
231
- // chunkSize: number;
232
- // chunkOverlap: number;
233
- // }
234
- // ): string[] {
235
- // const textSplitter = new TextSplitter({
236
- // chunkSize,
237
- // chunkOverlap,
238
- // separators: ['\n\n', '\n', ' ', ''],
239
- // keepSeparator: true,
240
- // });
241
- // let output = textSplitter.splitText(text);
242
-
243
- // return output;
244
- // }
245
- // class TextSplitter {
246
- // private chunkSize: number;
247
- // private chunkOverlap: number;
248
- // private separators: string[] = ['\n\n', '\n', ' ', ''];
249
- // private keepSeparator: boolean = true;
250
-
251
- // constructor({
252
- // chunkSize = 1000,
253
- // chunkOverlap = 200,
254
- // separators,
255
- // keepSeparator,
256
- // }: {
257
- // chunkSize?: number;
258
- // chunkOverlap?: number;
259
- // separators?: string[];
260
- // keepSeparator?: boolean;
261
- // } = {}) {
262
- // this.chunkSize = chunkSize;
263
- // this.chunkOverlap = chunkOverlap;
264
-
265
- // if (separators) {
266
- // this.separators = separators;
267
- // }
268
-
269
- // if (keepSeparator !== undefined) {
270
- // this.keepSeparator = keepSeparator;
271
- // }
272
-
273
- // if (this.chunkOverlap >= this.chunkSize) {
274
- // throw new Error('Cannot have chunkOverlap >= chunkSize');
275
- // }
276
- // }
277
-
278
- // public splitText(text: string): string[] {
279
- // return this._splitText(text, this.separators);
280
- // }
281
-
282
- // private _splitText(text: string, separators: string[]): string[] {
283
- // const finalChunks: string[] = [];
284
-
285
- // // Get appropriate separator to use
286
- // let separator: string = separators[separators.length - 1];
287
- // let newSeparators: string[] | undefined;
288
-
289
- // for (let i = 0; i < separators.length; i += 1) {
290
- // const s = separators[i];
291
- // if (s === '') {
292
- // separator = s;
293
- // break;
294
- // }
295
- // if (text.includes(s)) {
296
- // separator = s;
297
- // newSeparators = separators.slice(i + 1);
298
- // break;
299
- // }
300
- // }
301
-
302
- // // Split the text using the identified separator
303
- // const splits = this.splitOnSeparator(text, separator);
304
-
305
- // // Process splits, recursively splitting longer texts
306
- // let goodSplits: string[] = [];
307
- // const _separator = this.keepSeparator ? '' : separator;
308
-
309
- // for (const s of splits) {
310
- // if (this.lengthFunction(s) < this.chunkSize) {
311
- // goodSplits.push(s);
312
- // } else {
313
- // if (goodSplits.length) {
314
- // const mergedText = this.mergeSplits(goodSplits, _separator);
315
- // finalChunks.push(...mergedText);
316
- // goodSplits = [];
317
- // }
318
-
319
- // if (!newSeparators) {
320
- // finalChunks.push(s);
321
- // } else {
322
- // const otherInfo = this._splitText(s, newSeparators);
323
- // finalChunks.push(...otherInfo);
324
- // }
325
- // }
326
- // }
327
-
328
- // if (goodSplits.length) {
329
- // const mergedText = this.mergeSplits(goodSplits, _separator);
330
- // finalChunks.push(...mergedText);
331
- // }
332
-
333
- // return finalChunks;
334
- // }
335
-
336
- // private splitOnSeparator(text: string, separator: string): string[] {
337
- // let splits: string[];
338
-
339
- // if (separator) {
340
- // if (this.keepSeparator) {
341
- // const regexEscapedSeparator = separator.replace(/[/\-\\^$*+?.()|[\]{}]/g, '\\$&');
342
- // splits = text.split(new RegExp(`(?=${regexEscapedSeparator})`));
343
- // } else {
344
- // splits = text.split(separator);
345
- // }
346
- // } else {
347
- // splits = text.split('');
348
- // }
349
-
350
- // return splits.filter((s) => s !== '');
351
- // }
352
-
353
- // private lengthFunction(text: string): number {
354
- // return text.length;
355
- // }
356
-
357
- // private joinDocs(docs: string[], separator: string): string | null {
358
- // const text = docs.join(separator).trim();
359
- // return text === '' ? null : text;
360
- // }
361
-
362
- // private mergeSplits(splits: string[], separator: string): string[] {
363
- // const docs: string[] = [];
364
- // const currentDoc: string[] = [];
365
- // let total = 0;
366
-
367
- // for (const d of splits) {
368
- // const _len = this.lengthFunction(d);
369
-
370
- // if (total + _len + currentDoc.length * separator.length > this.chunkSize) {
371
- // if (total > this.chunkSize) {
372
- // console.warn(`Created a chunk of size ${total}, which is longer than the specified ${this.chunkSize}`);
373
- // }
374
-
375
- // if (currentDoc.length > 0) {
376
- // const doc = this.joinDocs(currentDoc, separator);
377
- // if (doc !== null) {
378
- // docs.push(doc);
379
- // }
380
-
381
- // // Keep popping if conditions are met
382
- // while (total > this.chunkOverlap || (total + _len + currentDoc.length * separator.length > this.chunkSize && total > 0)) {
383
- // total -= this.lengthFunction(currentDoc[0]);
384
- // currentDoc.shift();
385
- // }
386
- // }
387
- // }
388
-
389
- // currentDoc.push(d);
390
- // total += _len;
391
- // }
392
-
393
- // const doc = this.joinDocs(currentDoc, separator);
394
- // if (doc !== null) {
395
- // docs.push(doc);
396
- // }
397
-
398
- // return docs;
399
- // }
400
- // }
401
-
402
- // class RecursiveTextSplitter extends TextSplitter {
403
- // constructor({
404
- // chunkSize = 1000,
405
- // chunkOverlap = 200,
406
- // separators = ['\n\n', '\n', ' ', ''],
407
- // keepSeparator = true,
408
- // }: {
409
- // chunkSize?: number;
410
- // chunkOverlap?: number;
411
- // separators?: string[];
412
- // keepSeparator?: boolean;
413
- // } = {}) {
414
- // super({ chunkSize, chunkOverlap, separators, keepSeparator });
415
- // }
416
- // }
225
+ export function calcSizeMb(text: string): number {
226
+ return Buffer.byteLength(text, 'utf-8') / 1024 / 1024;
227
+ }
@@ -1,92 +0,0 @@
1
- import { IAgent as Agent } from '@sre/types/Agent.types';
2
- import { Component } from './Component.class';
3
- import { TemplateString } from '@sre/helpers/TemplateString.helper';
4
- import Joi from 'joi';
5
- import { validateCharacterSet } from '../utils';
6
- import { ConnectorService } from '@sre/Core/ConnectorsService';
7
- import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
8
-
9
- import { DataSourceIndexer } from './DataSourceIndexer.class';
10
-
11
- export class DataSourceCleaner extends Component {
12
- protected configSchema = Joi.object({
13
- namespaceId: Joi.string().max(50).allow('').label('namespace'),
14
- id: Joi.string().custom(validateCharacterSet, 'custom validation characterSet').allow('').label('source identifier'),
15
- });
16
- constructor() {
17
- super();
18
- }
19
- init() {}
20
- async process(input, config, agent: Agent) {
21
- await super.process(input, config, agent);
22
- const teamId = agent.teamId;
23
- const agentId = agent.id;
24
- let debugOutput = agent.agentRuntime?.debug ? '== Source Indexer Log ==\n' : null;
25
-
26
- try {
27
- const configSchema = this.validateConfigData(config.data);
28
- if (configSchema.error) {
29
- throw new Error(`Config data validation error: ${configSchema.error}\n EXITING...`);
30
- }
31
-
32
- const outputs = {};
33
- for (let con of config.outputs) {
34
- if (con.default) continue;
35
- outputs[con.name] = con?.description ? `<${con?.description}>` : '';
36
- }
37
-
38
- const inputSchema = this.validateInput(input);
39
- if (inputSchema.error) {
40
- throw new Error(`Input validation error: ${inputSchema.error}\n EXITING...`);
41
- }
42
-
43
- const namespaceId = configSchema.value.namespaceId.split('_')?.slice(1).join('_') || configSchema.value.namespaceId;
44
-
45
- let vectorDbConnector = ConnectorService.getVectorDBConnector();
46
-
47
- let existingnamespace = await vectorDbConnector.requester(AccessCandidate.team(teamId)).namespaceExists(namespaceId);
48
- if (!existingnamespace) {
49
- throw new Error(`Namespace ${namespaceId} does not exist`);
50
- }
51
-
52
- const providedId = TemplateString(config.data.id).parse(input).result;
53
- const idRegex = /^[a-zA-Z0-9\-\_\.]+$/;
54
- if (!idRegex.test(providedId)) {
55
- throw new Error(`Invalid id. Accepted characters: 'a-z', 'A-Z', '0-9', '-', '_', '.'`);
56
- }
57
- debugOutput += `Searching for data source with id: ${providedId}\n`;
58
-
59
- const dsId = DataSourceIndexer.genDsId(providedId, teamId, namespaceId);
60
-
61
- await vectorDbConnector.requester(AccessCandidate.team(teamId)).deleteDatasource(namespaceId, dsId);
62
-
63
- debugOutput += `Deleted data source with id: ${providedId}\n`;
64
-
65
- return {
66
- _debug: debugOutput,
67
- Success: true,
68
- // _error,
69
- };
70
- } catch (err: any) {
71
- debugOutput += `Failed to delete data source: \n Error: ${err?.message}\n`;
72
-
73
- return {
74
- _debug: debugOutput,
75
- _error: err?.message || "Couldn't delete data source",
76
- };
77
- }
78
- }
79
-
80
- validateInput(input: any) {
81
- return Joi.object({}).unknown(true).validate(input);
82
- }
83
-
84
- validateConfigData(data: any) {
85
- return Joi.object({
86
- namespaceId: Joi.string().required(),
87
- id: Joi.string().optional().allow('').allow(null),
88
- })
89
- .unknown(true)
90
- .validate(data);
91
- }
92
- }