@smythos/sre 1.7.20 → 1.7.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +114 -76
- package/dist/index.js.map +1 -1
- package/dist/types/Components/DataSourceIndexer.class.d.ts +4 -12
- package/dist/types/Components/GenAILLM.class.d.ts +5 -5
- package/dist/types/Components/index.d.ts +3 -3
- package/dist/types/index.d.ts +3 -3
- package/dist/types/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.d.ts +1 -0
- package/dist/types/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.d.ts +11 -4
- package/dist/types/subsystems/IO/VectorDB.service/embed/index.d.ts +5 -0
- package/dist/types/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.d.ts +35 -0
- package/dist/types/subsystems/Security/Account.service/AccountConnector.d.ts +2 -2
- package/dist/types/subsystems/Security/Vault.service/connectors/SecretsManager.class.d.ts +2 -3
- package/dist/types/types/VectorDB.types.d.ts +4 -0
- package/dist/types/utils/string.utils.d.ts +1 -0
- package/package.json +3 -3
- package/src/Components/APIEndpoint.class.ts +1 -6
- package/src/Components/Component.class.ts +14 -1
- package/src/Components/DataSourceIndexer.class.ts +148 -34
- package/src/Components/GenAILLM.class.ts +21 -11
- package/src/Components/RAG/DataSourceCleaner.class.ts +178 -0
- package/src/Components/RAG/DataSourceComponent.class.ts +111 -0
- package/src/Components/RAG/DataSourceIndexer.class.ts +254 -0
- package/src/Components/{DataSourceLookup.class.ts → RAG/DataSourceLookup.class.ts} +92 -3
- package/src/Components/ServerlessCode.class.ts +1 -4
- package/src/Components/index.ts +3 -3
- package/src/helpers/S3Cache.helper.ts +2 -1
- package/src/index.ts +212 -212
- package/src/index.ts.bak +212 -212
- package/src/subsystems/IO/NKV.service/connectors/NKVRedis.class.ts +3 -1
- package/src/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.ts +145 -19
- package/src/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.ts +56 -22
- package/src/subsystems/IO/VectorDB.service/embed/GoogleEmbedding.ts +1 -0
- package/src/subsystems/IO/VectorDB.service/embed/OpenAIEmbedding.ts +2 -1
- package/src/subsystems/IO/VectorDB.service/embed/index.ts +18 -0
- package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts +35 -10
- package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts +12 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/Echo.class.ts +4 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +13 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +17 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/Ollama.class.ts +18 -3
- package/src/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.ts +14 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/VertexAI.class.ts +6 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +5 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +8 -3
- package/src/subsystems/LLMManager/LLM.service/connectors/xAI.class.ts +9 -8
- package/src/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.ts +92 -1
- package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.ts +32 -6
- package/src/subsystems/Security/Account.service/AccountConnector.ts +3 -3
- package/src/subsystems/Security/Vault.service/connectors/SecretsManager.class.ts +8 -63
- package/src/types/VectorDB.types.ts +4 -0
- package/src/utils/array.utils.ts +11 -0
- package/src/utils/base64.utils.ts +1 -1
- package/src/utils/string.utils.ts +3 -192
- package/src/Components/DataSourceCleaner.class.ts +0 -92
|
@@ -20,8 +20,9 @@ import { OTLPLogExporter } from '@opentelemetry/exporter-logs-otlp-http';
|
|
|
20
20
|
import { IAgent } from '@sre/types/Agent.types';
|
|
21
21
|
import { Conversation } from '@sre/helpers/Conversation.helper';
|
|
22
22
|
import { TLLMEvent } from '@sre/types/LLM.types';
|
|
23
|
+
import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
|
|
23
24
|
|
|
24
|
-
const outputLogger = Logger('
|
|
25
|
+
const outputLogger = Logger('OTel');
|
|
25
26
|
|
|
26
27
|
export type OTelLogConfig = {
|
|
27
28
|
endpoint: string;
|
|
@@ -50,7 +51,7 @@ export type OTelLogConfig = {
|
|
|
50
51
|
*/
|
|
51
52
|
redactFields?: string[];
|
|
52
53
|
};
|
|
53
|
-
|
|
54
|
+
const OTEL_DEBUG_LOGS = true;
|
|
54
55
|
export class OTel extends TelemetryConnector {
|
|
55
56
|
public name: string = 'OTel';
|
|
56
57
|
public id: string;
|
|
@@ -208,6 +209,8 @@ export class OTel extends TelemetryConnector {
|
|
|
208
209
|
|
|
209
210
|
const createToolInfoHandler = function (hookContext) {
|
|
210
211
|
return function (toolInfo: any) {
|
|
212
|
+
const accessCandidate = AccessCandidate.agent(hookContext?.agentId);
|
|
213
|
+
if (OTEL_DEBUG_LOGS) outputLogger.debug('createToolInfoHandler started', accessCandidate);
|
|
211
214
|
if (!hookContext.curLLMGenSpan || !hookContext.convSpan) return;
|
|
212
215
|
|
|
213
216
|
const modelId = toolInfo.model;
|
|
@@ -238,6 +241,7 @@ export class OTel extends TelemetryConnector {
|
|
|
238
241
|
|
|
239
242
|
hookContext.curLLMGenSpan.end();
|
|
240
243
|
delete hookContext.curLLMGenSpan;
|
|
244
|
+
if (OTEL_DEBUG_LOGS) outputLogger.debug('createToolInfoHandler completed', accessCandidate);
|
|
241
245
|
};
|
|
242
246
|
};
|
|
243
247
|
|
|
@@ -245,6 +249,8 @@ export class OTel extends TelemetryConnector {
|
|
|
245
249
|
return function (data: any, reqInfo: any) {
|
|
246
250
|
if (!hookContext.convSpan) return;
|
|
247
251
|
if (hookContext.curLLMGenSpan) return;
|
|
252
|
+
const accessCandidate = AccessCandidate.agent(hookContext?.agentId);
|
|
253
|
+
if (OTEL_DEBUG_LOGS) outputLogger.debug('createDataHandler started', reqInfo?.requestId, accessCandidate);
|
|
248
254
|
|
|
249
255
|
const modelId = reqInfo.model;
|
|
250
256
|
const contextWindow = reqInfo.contextWindow;
|
|
@@ -285,13 +291,15 @@ export class OTel extends TelemetryConnector {
|
|
|
285
291
|
'context.preview': JSON.stringify(lastContext).substring(0, 200),
|
|
286
292
|
});
|
|
287
293
|
hookContext.curLLMGenSpan = llmGenSpan;
|
|
294
|
+
if (OTEL_DEBUG_LOGS) outputLogger.debug('createDataHandler completed', reqInfo?.requestId, accessCandidate);
|
|
288
295
|
};
|
|
289
296
|
};
|
|
290
297
|
|
|
291
298
|
const createRequestedHandler = function (hookContext) {
|
|
292
299
|
return function (reqInfo: any) {
|
|
293
300
|
if (!hookContext.convSpan) return;
|
|
294
|
-
|
|
301
|
+
const accessCandidate = AccessCandidate.agent(hookContext?.agentId);
|
|
302
|
+
if (OTEL_DEBUG_LOGS) outputLogger.debug('createRequestedHandler started', reqInfo?.requestId, accessCandidate);
|
|
295
303
|
if (!hookContext.latencySpans) hookContext.latencySpans = {};
|
|
296
304
|
const contextWindow = reqInfo.contextWindow;
|
|
297
305
|
|
|
@@ -317,6 +325,7 @@ export class OTel extends TelemetryConnector {
|
|
|
317
325
|
'context.preview': JSON.stringify(lastContext).substring(0, 200),
|
|
318
326
|
});
|
|
319
327
|
hookContext.latencySpans[reqInfo.requestId] = llmGenLatencySpan;
|
|
328
|
+
if (OTEL_DEBUG_LOGS) outputLogger.debug('createRequestedHandler completed', reqInfo?.requestId, accessCandidate);
|
|
320
329
|
};
|
|
321
330
|
};
|
|
322
331
|
HookService.register(
|
|
@@ -332,6 +341,8 @@ export class OTel extends TelemetryConnector {
|
|
|
332
341
|
|
|
333
342
|
return;
|
|
334
343
|
}
|
|
344
|
+
const accessCandidate = AccessCandidate.agent(agentId);
|
|
345
|
+
if (OTEL_DEBUG_LOGS) outputLogger.debug('Conversation.streamPrompt started', { processId, message }, accessCandidate);
|
|
335
346
|
|
|
336
347
|
const modelId = typeof conversation?.model === 'string' ? conversation?.model : conversation?.model?.modelId;
|
|
337
348
|
|
|
@@ -349,15 +360,16 @@ export class OTel extends TelemetryConnector {
|
|
|
349
360
|
},
|
|
350
361
|
});
|
|
351
362
|
hookContext.convSpan = convSpan;
|
|
363
|
+
hookContext.agentId = agentId;
|
|
364
|
+
hookContext.processId = processId;
|
|
352
365
|
|
|
353
366
|
hookContext.dataHandler = createDataHandler(hookContext);
|
|
354
367
|
conversation.on(TLLMEvent.Data, hookContext.dataHandler);
|
|
368
|
+
|
|
355
369
|
hookContext.requestedHandler = createRequestedHandler(hookContext);
|
|
356
370
|
conversation.on(TLLMEvent.Requested, hookContext.requestedHandler);
|
|
357
|
-
hookContext.agentId = agentId;
|
|
358
|
-
hookContext.processId = processId;
|
|
359
|
-
hookContext.toolInfoHandler = createToolInfoHandler(hookContext);
|
|
360
371
|
|
|
372
|
+
hookContext.toolInfoHandler = createToolInfoHandler(hookContext);
|
|
361
373
|
conversation.on(TLLMEvent.ToolInfo, hookContext.toolInfoHandler);
|
|
362
374
|
|
|
363
375
|
// Add start event
|
|
@@ -409,6 +421,9 @@ export class OTel extends TelemetryConnector {
|
|
|
409
421
|
const ctx = OTelContextRegistry.get(agentId, processId);
|
|
410
422
|
if (!ctx) return;
|
|
411
423
|
|
|
424
|
+
const accessCandidate = AccessCandidate.agent(agentId);
|
|
425
|
+
if (OTEL_DEBUG_LOGS) outputLogger.debug('Conversation.streamPrompt completed', { processId }, accessCandidate);
|
|
426
|
+
|
|
412
427
|
if (hookContext.curLLMGenSpan) {
|
|
413
428
|
hookContext.curLLMGenSpan.addEvent('llm.gen.content', {
|
|
414
429
|
'content.size': JSON.stringify(result || {}).length,
|
|
@@ -461,6 +476,9 @@ export class OTel extends TelemetryConnector {
|
|
|
461
476
|
const teamId = agent.teamId;
|
|
462
477
|
const _hookContext: any = this.context;
|
|
463
478
|
|
|
479
|
+
const accessCandidate = AccessCandidate.agent(agentId);
|
|
480
|
+
if (OTEL_DEBUG_LOGS) outputLogger.debug('SREAgent.process started', { processId, agentProcessId, endpointPath }, accessCandidate);
|
|
481
|
+
|
|
464
482
|
const body = oTelInstance.prepareComponentData(agentRequest.body || {});
|
|
465
483
|
const query = oTelInstance.prepareComponentData(agentRequest.query || {});
|
|
466
484
|
const headers = oTelInstance.prepareComponentData(agentRequest.headers || {});
|
|
@@ -537,6 +555,9 @@ export class OTel extends TelemetryConnector {
|
|
|
537
555
|
|
|
538
556
|
if (!agentSpan) return;
|
|
539
557
|
|
|
558
|
+
const accessCandidate = AccessCandidate.agent(agentId);
|
|
559
|
+
if (OTEL_DEBUG_LOGS) outputLogger.debug('SREAgent.process completed', { agentProcessId }, accessCandidate);
|
|
560
|
+
|
|
540
561
|
if (error) {
|
|
541
562
|
agentSpan.recordException(error);
|
|
542
563
|
agentSpan.setStatus({ code: SpanStatusCode.ERROR, message: error.message });
|
|
@@ -603,6 +624,8 @@ export class OTel extends TelemetryConnector {
|
|
|
603
624
|
const componentType = settings.name;
|
|
604
625
|
const componentName = settings.displayName || settings.name;
|
|
605
626
|
const eventId = settings.eventId; // specific event id attached to this component execution
|
|
627
|
+
const accessCandidate = AccessCandidate.agent(agentId);
|
|
628
|
+
if (OTEL_DEBUG_LOGS) outputLogger.debug('Component.process started', { componentId }, accessCandidate);
|
|
606
629
|
|
|
607
630
|
const ctx = OTelContextRegistry.get(agentId, processId);
|
|
608
631
|
const parentSpan = ctx?.rootSpan;
|
|
@@ -677,6 +700,9 @@ export class OTel extends TelemetryConnector {
|
|
|
677
700
|
const componentType = settings.name;
|
|
678
701
|
const componentName = settings.displayName || settings.name;
|
|
679
702
|
|
|
703
|
+
const accessCandidate = AccessCandidate.agent(agentId);
|
|
704
|
+
if (OTEL_DEBUG_LOGS) outputLogger.debug('Component.process completed', { componentId }, accessCandidate);
|
|
705
|
+
|
|
680
706
|
if (error) {
|
|
681
707
|
// Capture error details
|
|
682
708
|
span.recordException(error);
|
|
@@ -10,7 +10,7 @@ export interface ISmythAccountRequest {
|
|
|
10
10
|
getCandidateTeam(): Promise<string | undefined>;
|
|
11
11
|
getAllTeamSettings(): Promise<KeyValueObject>;
|
|
12
12
|
getAllUserSettings(): Promise<KeyValueObject>;
|
|
13
|
-
getTeamSetting(settingKey: string): Promise<string>;
|
|
13
|
+
getTeamSetting(settingKey: string, group?: string): Promise<string>;
|
|
14
14
|
getUserSetting(settingKey: string): Promise<string>;
|
|
15
15
|
getAgentSetting(settingKey: string): Promise<string>;
|
|
16
16
|
getTeam(): Promise<string>;
|
|
@@ -25,7 +25,7 @@ export abstract class AccountConnector extends Connector {
|
|
|
25
25
|
getAllUserSettings: async () => this.getAllUserSettings(candidate.readRequest, candidate.id),
|
|
26
26
|
getUserSetting: async (settingKey: string) => this.getUserSetting(candidate.readRequest, candidate.id, settingKey),
|
|
27
27
|
getAllTeamSettings: async () => this.getAllTeamSettings(candidate.readRequest, candidate.id),
|
|
28
|
-
getTeamSetting: async (settingKey: string) => this.getTeamSetting(candidate.readRequest, candidate.id, settingKey),
|
|
28
|
+
getTeamSetting: async (settingKey: string, group?: string) => this.getTeamSetting(candidate.readRequest, candidate.id, settingKey, group),
|
|
29
29
|
isTeamMember: async (teamId: string) => this.isTeamMember(teamId, candidate),
|
|
30
30
|
getCandidateTeam: async () => this.getCandidateTeam(candidate),
|
|
31
31
|
getTeam: async () => this.getCandidateTeam(candidate),
|
|
@@ -38,7 +38,7 @@ export abstract class AccountConnector extends Connector {
|
|
|
38
38
|
public abstract getCandidateTeam(candidate: IAccessCandidate): Promise<string | undefined>;
|
|
39
39
|
public abstract getAllTeamSettings(acRequest: AccessRequest, teamId: string): Promise<KeyValueObject>;
|
|
40
40
|
public abstract getAllUserSettings(acRequest: AccessRequest, accountId: string): Promise<KeyValueObject>;
|
|
41
|
-
public abstract getTeamSetting(acRequest: AccessRequest, teamId: string, settingKey: string): Promise<string>;
|
|
41
|
+
public abstract getTeamSetting(acRequest: AccessRequest, teamId: string, settingKey: string, group?: string): Promise<string>;
|
|
42
42
|
public abstract getUserSetting(acRequest: AccessRequest, accountId: string, settingKey: string): Promise<string>;
|
|
43
43
|
public abstract getAgentSetting(acRequest: AccessRequest, agentId: string, settingKey: string): Promise<string>;
|
|
44
44
|
}
|
|
@@ -13,6 +13,7 @@ import {
|
|
|
13
13
|
GetSecretValueCommandOutput,
|
|
14
14
|
} from '@aws-sdk/client-secrets-manager';
|
|
15
15
|
|
|
16
|
+
const defaultPrefix = 'smythos';
|
|
16
17
|
const console = Logger('SecretsManager');
|
|
17
18
|
|
|
18
19
|
export type SecretsManagerConfig = {
|
|
@@ -28,8 +29,7 @@ export class SecretsManager extends VaultConnector {
|
|
|
28
29
|
|
|
29
30
|
constructor(protected _settings: SecretsManagerConfig) {
|
|
30
31
|
super(_settings);
|
|
31
|
-
|
|
32
|
-
this.prefix = _settings.prefix || '';
|
|
32
|
+
this.prefix = _settings.prefix || defaultPrefix;
|
|
33
33
|
this.secretsManager = new SecretsManagerClient({
|
|
34
34
|
region: _settings.region,
|
|
35
35
|
...(_settings.awsAccessKeyId && _settings.awsSecretAccessKey
|
|
@@ -46,11 +46,7 @@ export class SecretsManager extends VaultConnector {
|
|
|
46
46
|
try {
|
|
47
47
|
const accountConnector = ConnectorService.getAccountConnector();
|
|
48
48
|
const teamId = await accountConnector.getCandidateTeam(acRequest.candidate);
|
|
49
|
-
// try fetchting by Id, if not found, try fetching by name
|
|
50
49
|
let secret = await this.getSecretById(teamId, secretName);
|
|
51
|
-
if (!secret) {
|
|
52
|
-
secret = await this.getSecretByName(acRequest, secretName);
|
|
53
|
-
}
|
|
54
50
|
return secret;
|
|
55
51
|
} catch (error) {
|
|
56
52
|
console.error(error);
|
|
@@ -73,51 +69,18 @@ export class SecretsManager extends VaultConnector {
|
|
|
73
69
|
|
|
74
70
|
do {
|
|
75
71
|
const listResponse: ListSecretsCommandOutput = await this.secretsManager.send(
|
|
76
|
-
new ListSecretsCommand({ NextToken: nextToken, Filters: [{ Key: '
|
|
72
|
+
new ListSecretsCommand({ NextToken: nextToken, Filters: [{ Key: 'name', Values: [this.getVaultKey(teamId, '')] }] })
|
|
77
73
|
);
|
|
78
74
|
if (listResponse.SecretList) {
|
|
79
75
|
for (const secret of listResponse.SecretList) {
|
|
80
76
|
if (secret.Name) {
|
|
81
|
-
secrets.push(
|
|
82
|
-
ARN: secret.ARN,
|
|
83
|
-
Name: secret.Name,
|
|
84
|
-
CreatedDate: secret.CreatedDate,
|
|
85
|
-
});
|
|
77
|
+
secrets.push(this.extractSecretName(secret.Name, teamId, this.prefix));
|
|
86
78
|
}
|
|
87
79
|
}
|
|
88
80
|
}
|
|
89
81
|
nextToken = listResponse.NextToken;
|
|
90
82
|
} while (nextToken);
|
|
91
|
-
|
|
92
|
-
const $promises = [];
|
|
93
|
-
for (const secret of secrets) {
|
|
94
|
-
$promises.push(getSpecificSecret(secret, this.secretsManager));
|
|
95
|
-
}
|
|
96
|
-
const formattedSecrets = await Promise.all($promises);
|
|
97
|
-
return formattedSecrets;
|
|
98
|
-
|
|
99
|
-
async function getSpecificSecret(secret, secretsManager: SecretsManagerClient) {
|
|
100
|
-
const data: GetSecretValueCommandOutput = await secretsManager.send(new GetSecretValueCommand({ SecretId: secret.ARN }));
|
|
101
|
-
let secretString = data.SecretString;
|
|
102
|
-
|
|
103
|
-
if (secretString) {
|
|
104
|
-
try {
|
|
105
|
-
let parsedSecret = JSON.parse(secretString);
|
|
106
|
-
const secretId = secret.Name?.split('/').pop();
|
|
107
|
-
const key = parsedSecret.key;
|
|
108
|
-
const value = parsedSecret.value;
|
|
109
|
-
const metadata = parsedSecret.metadata;
|
|
110
|
-
return {
|
|
111
|
-
id: secretId,
|
|
112
|
-
key,
|
|
113
|
-
value,
|
|
114
|
-
metadata,
|
|
115
|
-
};
|
|
116
|
-
} catch (error) {
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
return null;
|
|
120
|
-
}
|
|
83
|
+
return secrets;
|
|
121
84
|
}
|
|
122
85
|
|
|
123
86
|
public async getResourceACL(resourceId: string, candidate: IAccessCandidate) {
|
|
@@ -133,17 +96,6 @@ export class SecretsManager extends VaultConnector {
|
|
|
133
96
|
return acl;
|
|
134
97
|
}
|
|
135
98
|
|
|
136
|
-
private async getSecretByName(acRequest: AccessRequest, secretName: string) {
|
|
137
|
-
try {
|
|
138
|
-
const secrets = await this.listKeys(acRequest);
|
|
139
|
-
const secret = secrets.find((s) => s.key === secretName);
|
|
140
|
-
return secret?.value;
|
|
141
|
-
} catch (error) {
|
|
142
|
-
console.error(error);
|
|
143
|
-
return null;
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
|
|
147
99
|
private getVaultKey(teamId: string, secretName: string) {
|
|
148
100
|
return `${this.prefix.length ? `${this.prefix}/` : ''}${teamId}/${secretName}`;
|
|
149
101
|
}
|
|
@@ -151,20 +103,13 @@ export class SecretsManager extends VaultConnector {
|
|
|
151
103
|
private async getSecretById(teamId: string, secretId: string) {
|
|
152
104
|
try {
|
|
153
105
|
const secret: GetSecretValueCommandOutput = await this.secretsManager.send(new GetSecretValueCommand({ SecretId: this.getVaultKey(teamId, secretId) }));
|
|
154
|
-
return
|
|
106
|
+
return secret.SecretString;
|
|
155
107
|
} catch (error) {
|
|
156
|
-
console.error(error);
|
|
157
108
|
return null;
|
|
158
109
|
}
|
|
159
110
|
}
|
|
160
111
|
|
|
161
|
-
private
|
|
162
|
-
|
|
163
|
-
const parsedSecret = typeof secret.SecretString === 'string' ? JSON.parse(secret.SecretString) : secret.SecretString;
|
|
164
|
-
return parsedSecret.value;
|
|
165
|
-
} catch (error) {
|
|
166
|
-
return null;
|
|
167
|
-
}
|
|
168
|
-
|
|
112
|
+
private extractSecretName(secretKey: string, teamId: string, prefix: string) {
|
|
113
|
+
return secretKey.replace(`${prefix}/${teamId}/`, '');
|
|
169
114
|
}
|
|
170
115
|
}
|
|
@@ -64,6 +64,10 @@ export interface IStorageVectorDataSource {
|
|
|
64
64
|
id: string;
|
|
65
65
|
candidateId: string;
|
|
66
66
|
candidateRole: string;
|
|
67
|
+
datasourceSizeMb?: number;
|
|
68
|
+
createdAt?: Date;
|
|
69
|
+
chunkSize?: number;
|
|
70
|
+
chunkOverlap?: number;
|
|
67
71
|
}
|
|
68
72
|
|
|
69
73
|
export interface IStorageVectorNamespace {
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Utility method to chunk arrays into smaller batches
|
|
3
|
+
*/
|
|
4
|
+
export function chunkArr<T>(arr: T[], sizePerChunk: number): T[][] {
|
|
5
|
+
return arr.reduce((chunks, elem, index) => {
|
|
6
|
+
const chunkIndex = Math.floor(index / sizePerChunk);
|
|
7
|
+
const chunk = chunks[chunkIndex] || [];
|
|
8
|
+
chunks[chunkIndex] = chunk.concat([elem]);
|
|
9
|
+
return chunks;
|
|
10
|
+
}, [] as T[][]);
|
|
11
|
+
}
|
|
@@ -110,7 +110,7 @@ export function isBase64(str: string): boolean {
|
|
|
110
110
|
const cleanedBase64Data = cleanBase64(str);
|
|
111
111
|
|
|
112
112
|
// Sometimes words like 'male' and hashes like md5, sha1, sha256, sha512 are detected as base64
|
|
113
|
-
if (cleanedBase64Data.length < 128) return false;
|
|
113
|
+
//if (cleanedBase64Data.length < 128) return false;
|
|
114
114
|
|
|
115
115
|
try {
|
|
116
116
|
const buffer = Buffer.from(cleanedBase64Data, 'base64');
|
|
@@ -222,195 +222,6 @@ export const identifyMimetypeFromString = (input: string) => {
|
|
|
222
222
|
return 'text/plain';
|
|
223
223
|
};
|
|
224
224
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
// chunkSize,
|
|
229
|
-
// chunkOverlap,
|
|
230
|
-
// }: {
|
|
231
|
-
// chunkSize: number;
|
|
232
|
-
// chunkOverlap: number;
|
|
233
|
-
// }
|
|
234
|
-
// ): string[] {
|
|
235
|
-
// const textSplitter = new TextSplitter({
|
|
236
|
-
// chunkSize,
|
|
237
|
-
// chunkOverlap,
|
|
238
|
-
// separators: ['\n\n', '\n', ' ', ''],
|
|
239
|
-
// keepSeparator: true,
|
|
240
|
-
// });
|
|
241
|
-
// let output = textSplitter.splitText(text);
|
|
242
|
-
|
|
243
|
-
// return output;
|
|
244
|
-
// }
|
|
245
|
-
// class TextSplitter {
|
|
246
|
-
// private chunkSize: number;
|
|
247
|
-
// private chunkOverlap: number;
|
|
248
|
-
// private separators: string[] = ['\n\n', '\n', ' ', ''];
|
|
249
|
-
// private keepSeparator: boolean = true;
|
|
250
|
-
|
|
251
|
-
// constructor({
|
|
252
|
-
// chunkSize = 1000,
|
|
253
|
-
// chunkOverlap = 200,
|
|
254
|
-
// separators,
|
|
255
|
-
// keepSeparator,
|
|
256
|
-
// }: {
|
|
257
|
-
// chunkSize?: number;
|
|
258
|
-
// chunkOverlap?: number;
|
|
259
|
-
// separators?: string[];
|
|
260
|
-
// keepSeparator?: boolean;
|
|
261
|
-
// } = {}) {
|
|
262
|
-
// this.chunkSize = chunkSize;
|
|
263
|
-
// this.chunkOverlap = chunkOverlap;
|
|
264
|
-
|
|
265
|
-
// if (separators) {
|
|
266
|
-
// this.separators = separators;
|
|
267
|
-
// }
|
|
268
|
-
|
|
269
|
-
// if (keepSeparator !== undefined) {
|
|
270
|
-
// this.keepSeparator = keepSeparator;
|
|
271
|
-
// }
|
|
272
|
-
|
|
273
|
-
// if (this.chunkOverlap >= this.chunkSize) {
|
|
274
|
-
// throw new Error('Cannot have chunkOverlap >= chunkSize');
|
|
275
|
-
// }
|
|
276
|
-
// }
|
|
277
|
-
|
|
278
|
-
// public splitText(text: string): string[] {
|
|
279
|
-
// return this._splitText(text, this.separators);
|
|
280
|
-
// }
|
|
281
|
-
|
|
282
|
-
// private _splitText(text: string, separators: string[]): string[] {
|
|
283
|
-
// const finalChunks: string[] = [];
|
|
284
|
-
|
|
285
|
-
// // Get appropriate separator to use
|
|
286
|
-
// let separator: string = separators[separators.length - 1];
|
|
287
|
-
// let newSeparators: string[] | undefined;
|
|
288
|
-
|
|
289
|
-
// for (let i = 0; i < separators.length; i += 1) {
|
|
290
|
-
// const s = separators[i];
|
|
291
|
-
// if (s === '') {
|
|
292
|
-
// separator = s;
|
|
293
|
-
// break;
|
|
294
|
-
// }
|
|
295
|
-
// if (text.includes(s)) {
|
|
296
|
-
// separator = s;
|
|
297
|
-
// newSeparators = separators.slice(i + 1);
|
|
298
|
-
// break;
|
|
299
|
-
// }
|
|
300
|
-
// }
|
|
301
|
-
|
|
302
|
-
// // Split the text using the identified separator
|
|
303
|
-
// const splits = this.splitOnSeparator(text, separator);
|
|
304
|
-
|
|
305
|
-
// // Process splits, recursively splitting longer texts
|
|
306
|
-
// let goodSplits: string[] = [];
|
|
307
|
-
// const _separator = this.keepSeparator ? '' : separator;
|
|
308
|
-
|
|
309
|
-
// for (const s of splits) {
|
|
310
|
-
// if (this.lengthFunction(s) < this.chunkSize) {
|
|
311
|
-
// goodSplits.push(s);
|
|
312
|
-
// } else {
|
|
313
|
-
// if (goodSplits.length) {
|
|
314
|
-
// const mergedText = this.mergeSplits(goodSplits, _separator);
|
|
315
|
-
// finalChunks.push(...mergedText);
|
|
316
|
-
// goodSplits = [];
|
|
317
|
-
// }
|
|
318
|
-
|
|
319
|
-
// if (!newSeparators) {
|
|
320
|
-
// finalChunks.push(s);
|
|
321
|
-
// } else {
|
|
322
|
-
// const otherInfo = this._splitText(s, newSeparators);
|
|
323
|
-
// finalChunks.push(...otherInfo);
|
|
324
|
-
// }
|
|
325
|
-
// }
|
|
326
|
-
// }
|
|
327
|
-
|
|
328
|
-
// if (goodSplits.length) {
|
|
329
|
-
// const mergedText = this.mergeSplits(goodSplits, _separator);
|
|
330
|
-
// finalChunks.push(...mergedText);
|
|
331
|
-
// }
|
|
332
|
-
|
|
333
|
-
// return finalChunks;
|
|
334
|
-
// }
|
|
335
|
-
|
|
336
|
-
// private splitOnSeparator(text: string, separator: string): string[] {
|
|
337
|
-
// let splits: string[];
|
|
338
|
-
|
|
339
|
-
// if (separator) {
|
|
340
|
-
// if (this.keepSeparator) {
|
|
341
|
-
// const regexEscapedSeparator = separator.replace(/[/\-\\^$*+?.()|[\]{}]/g, '\\$&');
|
|
342
|
-
// splits = text.split(new RegExp(`(?=${regexEscapedSeparator})`));
|
|
343
|
-
// } else {
|
|
344
|
-
// splits = text.split(separator);
|
|
345
|
-
// }
|
|
346
|
-
// } else {
|
|
347
|
-
// splits = text.split('');
|
|
348
|
-
// }
|
|
349
|
-
|
|
350
|
-
// return splits.filter((s) => s !== '');
|
|
351
|
-
// }
|
|
352
|
-
|
|
353
|
-
// private lengthFunction(text: string): number {
|
|
354
|
-
// return text.length;
|
|
355
|
-
// }
|
|
356
|
-
|
|
357
|
-
// private joinDocs(docs: string[], separator: string): string | null {
|
|
358
|
-
// const text = docs.join(separator).trim();
|
|
359
|
-
// return text === '' ? null : text;
|
|
360
|
-
// }
|
|
361
|
-
|
|
362
|
-
// private mergeSplits(splits: string[], separator: string): string[] {
|
|
363
|
-
// const docs: string[] = [];
|
|
364
|
-
// const currentDoc: string[] = [];
|
|
365
|
-
// let total = 0;
|
|
366
|
-
|
|
367
|
-
// for (const d of splits) {
|
|
368
|
-
// const _len = this.lengthFunction(d);
|
|
369
|
-
|
|
370
|
-
// if (total + _len + currentDoc.length * separator.length > this.chunkSize) {
|
|
371
|
-
// if (total > this.chunkSize) {
|
|
372
|
-
// console.warn(`Created a chunk of size ${total}, which is longer than the specified ${this.chunkSize}`);
|
|
373
|
-
// }
|
|
374
|
-
|
|
375
|
-
// if (currentDoc.length > 0) {
|
|
376
|
-
// const doc = this.joinDocs(currentDoc, separator);
|
|
377
|
-
// if (doc !== null) {
|
|
378
|
-
// docs.push(doc);
|
|
379
|
-
// }
|
|
380
|
-
|
|
381
|
-
// // Keep popping if conditions are met
|
|
382
|
-
// while (total > this.chunkOverlap || (total + _len + currentDoc.length * separator.length > this.chunkSize && total > 0)) {
|
|
383
|
-
// total -= this.lengthFunction(currentDoc[0]);
|
|
384
|
-
// currentDoc.shift();
|
|
385
|
-
// }
|
|
386
|
-
// }
|
|
387
|
-
// }
|
|
388
|
-
|
|
389
|
-
// currentDoc.push(d);
|
|
390
|
-
// total += _len;
|
|
391
|
-
// }
|
|
392
|
-
|
|
393
|
-
// const doc = this.joinDocs(currentDoc, separator);
|
|
394
|
-
// if (doc !== null) {
|
|
395
|
-
// docs.push(doc);
|
|
396
|
-
// }
|
|
397
|
-
|
|
398
|
-
// return docs;
|
|
399
|
-
// }
|
|
400
|
-
// }
|
|
401
|
-
|
|
402
|
-
// class RecursiveTextSplitter extends TextSplitter {
|
|
403
|
-
// constructor({
|
|
404
|
-
// chunkSize = 1000,
|
|
405
|
-
// chunkOverlap = 200,
|
|
406
|
-
// separators = ['\n\n', '\n', ' ', ''],
|
|
407
|
-
// keepSeparator = true,
|
|
408
|
-
// }: {
|
|
409
|
-
// chunkSize?: number;
|
|
410
|
-
// chunkOverlap?: number;
|
|
411
|
-
// separators?: string[];
|
|
412
|
-
// keepSeparator?: boolean;
|
|
413
|
-
// } = {}) {
|
|
414
|
-
// super({ chunkSize, chunkOverlap, separators, keepSeparator });
|
|
415
|
-
// }
|
|
416
|
-
// }
|
|
225
|
+
export function calcSizeMb(text: string): number {
|
|
226
|
+
return Buffer.byteLength(text, 'utf-8') / 1024 / 1024;
|
|
227
|
+
}
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
import { IAgent as Agent } from '@sre/types/Agent.types';
|
|
2
|
-
import { Component } from './Component.class';
|
|
3
|
-
import { TemplateString } from '@sre/helpers/TemplateString.helper';
|
|
4
|
-
import Joi from 'joi';
|
|
5
|
-
import { validateCharacterSet } from '../utils';
|
|
6
|
-
import { ConnectorService } from '@sre/Core/ConnectorsService';
|
|
7
|
-
import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
|
|
8
|
-
|
|
9
|
-
import { DataSourceIndexer } from './DataSourceIndexer.class';
|
|
10
|
-
|
|
11
|
-
export class DataSourceCleaner extends Component {
|
|
12
|
-
protected configSchema = Joi.object({
|
|
13
|
-
namespaceId: Joi.string().max(50).allow('').label('namespace'),
|
|
14
|
-
id: Joi.string().custom(validateCharacterSet, 'custom validation characterSet').allow('').label('source identifier'),
|
|
15
|
-
});
|
|
16
|
-
constructor() {
|
|
17
|
-
super();
|
|
18
|
-
}
|
|
19
|
-
init() {}
|
|
20
|
-
async process(input, config, agent: Agent) {
|
|
21
|
-
await super.process(input, config, agent);
|
|
22
|
-
const teamId = agent.teamId;
|
|
23
|
-
const agentId = agent.id;
|
|
24
|
-
let debugOutput = agent.agentRuntime?.debug ? '== Source Indexer Log ==\n' : null;
|
|
25
|
-
|
|
26
|
-
try {
|
|
27
|
-
const configSchema = this.validateConfigData(config.data);
|
|
28
|
-
if (configSchema.error) {
|
|
29
|
-
throw new Error(`Config data validation error: ${configSchema.error}\n EXITING...`);
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
const outputs = {};
|
|
33
|
-
for (let con of config.outputs) {
|
|
34
|
-
if (con.default) continue;
|
|
35
|
-
outputs[con.name] = con?.description ? `<${con?.description}>` : '';
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
const inputSchema = this.validateInput(input);
|
|
39
|
-
if (inputSchema.error) {
|
|
40
|
-
throw new Error(`Input validation error: ${inputSchema.error}\n EXITING...`);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
const namespaceId = configSchema.value.namespaceId.split('_')?.slice(1).join('_') || configSchema.value.namespaceId;
|
|
44
|
-
|
|
45
|
-
let vectorDbConnector = ConnectorService.getVectorDBConnector();
|
|
46
|
-
|
|
47
|
-
let existingnamespace = await vectorDbConnector.requester(AccessCandidate.team(teamId)).namespaceExists(namespaceId);
|
|
48
|
-
if (!existingnamespace) {
|
|
49
|
-
throw new Error(`Namespace ${namespaceId} does not exist`);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
const providedId = TemplateString(config.data.id).parse(input).result;
|
|
53
|
-
const idRegex = /^[a-zA-Z0-9\-\_\.]+$/;
|
|
54
|
-
if (!idRegex.test(providedId)) {
|
|
55
|
-
throw new Error(`Invalid id. Accepted characters: 'a-z', 'A-Z', '0-9', '-', '_', '.'`);
|
|
56
|
-
}
|
|
57
|
-
debugOutput += `Searching for data source with id: ${providedId}\n`;
|
|
58
|
-
|
|
59
|
-
const dsId = DataSourceIndexer.genDsId(providedId, teamId, namespaceId);
|
|
60
|
-
|
|
61
|
-
await vectorDbConnector.requester(AccessCandidate.team(teamId)).deleteDatasource(namespaceId, dsId);
|
|
62
|
-
|
|
63
|
-
debugOutput += `Deleted data source with id: ${providedId}\n`;
|
|
64
|
-
|
|
65
|
-
return {
|
|
66
|
-
_debug: debugOutput,
|
|
67
|
-
Success: true,
|
|
68
|
-
// _error,
|
|
69
|
-
};
|
|
70
|
-
} catch (err: any) {
|
|
71
|
-
debugOutput += `Failed to delete data source: \n Error: ${err?.message}\n`;
|
|
72
|
-
|
|
73
|
-
return {
|
|
74
|
-
_debug: debugOutput,
|
|
75
|
-
_error: err?.message || "Couldn't delete data source",
|
|
76
|
-
};
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
validateInput(input: any) {
|
|
81
|
-
return Joi.object({}).unknown(true).validate(input);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
validateConfigData(data: any) {
|
|
85
|
-
return Joi.object({
|
|
86
|
-
namespaceId: Joi.string().required(),
|
|
87
|
-
id: Joi.string().optional().allow('').allow(null),
|
|
88
|
-
})
|
|
89
|
-
.unknown(true)
|
|
90
|
-
.validate(data);
|
|
91
|
-
}
|
|
92
|
-
}
|