@smythos/sre 1.7.20 → 1.7.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +134 -89
- package/dist/index.js.map +1 -1
- package/dist/types/Components/AgentPlugin.class.d.ts +1 -1
- package/dist/types/Components/DataSourceIndexer.class.d.ts +4 -12
- package/dist/types/Components/GenAILLM.class.d.ts +5 -5
- package/dist/types/Components/RAG/DataSourceCleaner.class.d.ts +4 -4
- package/dist/types/Components/RAG/DataSourceComponent.class.d.ts +5 -1
- package/dist/types/Components/index.d.ts +3 -3
- package/dist/types/config.d.ts +1 -0
- package/dist/types/helpers/Conversation.helper.d.ts +10 -13
- package/dist/types/helpers/TemplateString.helper.d.ts +1 -1
- package/dist/types/index.d.ts +4 -3
- package/dist/types/subsystems/IO/VectorDB.service/VectorDBConnector.d.ts +1 -0
- package/dist/types/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.d.ts +1 -0
- package/dist/types/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.d.ts +11 -4
- package/dist/types/subsystems/IO/VectorDB.service/embed/index.d.ts +5 -0
- package/dist/types/subsystems/LLMManager/LLM.helper.d.ts +19 -0
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.d.ts +15 -10
- package/dist/types/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.d.ts +35 -0
- package/dist/types/subsystems/Security/Account.service/AccountConnector.d.ts +2 -2
- package/dist/types/subsystems/Security/Vault.service/connectors/SecretsManager.class.d.ts +2 -3
- package/dist/types/types/LLM.types.d.ts +23 -0
- package/dist/types/types/VectorDB.types.d.ts +4 -0
- package/dist/types/utils/string.utils.d.ts +1 -0
- package/package.json +3 -3
- package/src/Components/APIEndpoint.class.ts +1 -6
- package/src/Components/AgentPlugin.class.ts +20 -3
- package/src/Components/Classifier.class.ts +79 -16
- package/src/Components/Component.class.ts +14 -1
- package/src/Components/ForEach.class.ts +34 -6
- package/src/Components/GenAILLM.class.ts +75 -34
- package/src/Components/LLMAssistant.class.ts +56 -21
- package/src/Components/RAG/DataSourceCleaner.class.ts +180 -0
- package/src/Components/RAG/DataSourceComponent.class.ts +137 -0
- package/src/Components/RAG/DataSourceIndexer.class.ts +260 -0
- package/src/Components/{DataSourceLookup.class.ts → RAG/DataSourceLookup.class.ts} +96 -3
- package/src/Components/ScrapflyWebScrape.class.ts +7 -0
- package/src/Components/ServerlessCode.class.ts +1 -4
- package/src/Components/index.ts +3 -3
- package/src/config.ts +1 -0
- package/src/helpers/Conversation.helper.ts +112 -26
- package/src/helpers/S3Cache.helper.ts +2 -1
- package/src/helpers/TemplateString.helper.ts +6 -5
- package/src/index.ts +213 -212
- package/src/index.ts.bak +213 -212
- package/src/subsystems/IO/NKV.service/connectors/NKVRedis.class.ts +3 -1
- package/src/subsystems/IO/VectorDB.service/VectorDBConnector.ts +1 -0
- package/src/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.ts +145 -19
- package/src/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.ts +67 -22
- package/src/subsystems/IO/VectorDB.service/embed/GoogleEmbedding.ts +1 -0
- package/src/subsystems/IO/VectorDB.service/embed/OpenAIEmbedding.ts +2 -1
- package/src/subsystems/IO/VectorDB.service/embed/index.ts +16 -0
- package/src/subsystems/LLMManager/LLM.helper.ts +25 -0
- package/src/subsystems/LLMManager/LLM.service/LLMConnector.ts +1 -1
- package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts +35 -10
- package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts +12 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/Echo.class.ts +4 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +192 -139
- package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +17 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/Ollama.class.ts +18 -3
- package/src/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.ts +14 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/VertexAI.class.ts +6 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +5 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +8 -3
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/utils.ts +1 -1
- package/src/subsystems/LLMManager/LLM.service/connectors/xAI.class.ts +9 -8
- package/src/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.ts +92 -1
- package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.ts +260 -17
- package/src/subsystems/Security/Account.service/AccountConnector.ts +3 -3
- package/src/subsystems/Security/Vault.service/connectors/SecretsManager.class.ts +8 -63
- package/src/types/LLM.types.ts +24 -0
- package/src/types/VectorDB.types.ts +4 -0
- package/src/utils/array.utils.ts +11 -0
- package/src/utils/base64.utils.ts +1 -1
- package/src/utils/data.utils.ts +6 -4
- package/src/utils/string.utils.ts +3 -192
- package/src/Components/DataSourceCleaner.class.ts +0 -92
- package/src/Components/DataSourceIndexer.class.ts +0 -181
|
@@ -13,6 +13,7 @@ import {
|
|
|
13
13
|
GetSecretValueCommandOutput,
|
|
14
14
|
} from '@aws-sdk/client-secrets-manager';
|
|
15
15
|
|
|
16
|
+
const defaultPrefix = 'smythos';
|
|
16
17
|
const console = Logger('SecretsManager');
|
|
17
18
|
|
|
18
19
|
export type SecretsManagerConfig = {
|
|
@@ -28,8 +29,7 @@ export class SecretsManager extends VaultConnector {
|
|
|
28
29
|
|
|
29
30
|
constructor(protected _settings: SecretsManagerConfig) {
|
|
30
31
|
super(_settings);
|
|
31
|
-
|
|
32
|
-
this.prefix = _settings.prefix || '';
|
|
32
|
+
this.prefix = _settings.prefix || defaultPrefix;
|
|
33
33
|
this.secretsManager = new SecretsManagerClient({
|
|
34
34
|
region: _settings.region,
|
|
35
35
|
...(_settings.awsAccessKeyId && _settings.awsSecretAccessKey
|
|
@@ -46,11 +46,7 @@ export class SecretsManager extends VaultConnector {
|
|
|
46
46
|
try {
|
|
47
47
|
const accountConnector = ConnectorService.getAccountConnector();
|
|
48
48
|
const teamId = await accountConnector.getCandidateTeam(acRequest.candidate);
|
|
49
|
-
// try fetchting by Id, if not found, try fetching by name
|
|
50
49
|
let secret = await this.getSecretById(teamId, secretName);
|
|
51
|
-
if (!secret) {
|
|
52
|
-
secret = await this.getSecretByName(acRequest, secretName);
|
|
53
|
-
}
|
|
54
50
|
return secret;
|
|
55
51
|
} catch (error) {
|
|
56
52
|
console.error(error);
|
|
@@ -73,51 +69,18 @@ export class SecretsManager extends VaultConnector {
|
|
|
73
69
|
|
|
74
70
|
do {
|
|
75
71
|
const listResponse: ListSecretsCommandOutput = await this.secretsManager.send(
|
|
76
|
-
new ListSecretsCommand({ NextToken: nextToken, Filters: [{ Key: '
|
|
72
|
+
new ListSecretsCommand({ NextToken: nextToken, Filters: [{ Key: 'name', Values: [this.getVaultKey(teamId, '')] }] })
|
|
77
73
|
);
|
|
78
74
|
if (listResponse.SecretList) {
|
|
79
75
|
for (const secret of listResponse.SecretList) {
|
|
80
76
|
if (secret.Name) {
|
|
81
|
-
secrets.push(
|
|
82
|
-
ARN: secret.ARN,
|
|
83
|
-
Name: secret.Name,
|
|
84
|
-
CreatedDate: secret.CreatedDate,
|
|
85
|
-
});
|
|
77
|
+
secrets.push(this.extractSecretName(secret.Name, teamId, this.prefix));
|
|
86
78
|
}
|
|
87
79
|
}
|
|
88
80
|
}
|
|
89
81
|
nextToken = listResponse.NextToken;
|
|
90
82
|
} while (nextToken);
|
|
91
|
-
|
|
92
|
-
const $promises = [];
|
|
93
|
-
for (const secret of secrets) {
|
|
94
|
-
$promises.push(getSpecificSecret(secret, this.secretsManager));
|
|
95
|
-
}
|
|
96
|
-
const formattedSecrets = await Promise.all($promises);
|
|
97
|
-
return formattedSecrets;
|
|
98
|
-
|
|
99
|
-
async function getSpecificSecret(secret, secretsManager: SecretsManagerClient) {
|
|
100
|
-
const data: GetSecretValueCommandOutput = await secretsManager.send(new GetSecretValueCommand({ SecretId: secret.ARN }));
|
|
101
|
-
let secretString = data.SecretString;
|
|
102
|
-
|
|
103
|
-
if (secretString) {
|
|
104
|
-
try {
|
|
105
|
-
let parsedSecret = JSON.parse(secretString);
|
|
106
|
-
const secretId = secret.Name?.split('/').pop();
|
|
107
|
-
const key = parsedSecret.key;
|
|
108
|
-
const value = parsedSecret.value;
|
|
109
|
-
const metadata = parsedSecret.metadata;
|
|
110
|
-
return {
|
|
111
|
-
id: secretId,
|
|
112
|
-
key,
|
|
113
|
-
value,
|
|
114
|
-
metadata,
|
|
115
|
-
};
|
|
116
|
-
} catch (error) {
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
return null;
|
|
120
|
-
}
|
|
83
|
+
return secrets;
|
|
121
84
|
}
|
|
122
85
|
|
|
123
86
|
public async getResourceACL(resourceId: string, candidate: IAccessCandidate) {
|
|
@@ -133,17 +96,6 @@ export class SecretsManager extends VaultConnector {
|
|
|
133
96
|
return acl;
|
|
134
97
|
}
|
|
135
98
|
|
|
136
|
-
private async getSecretByName(acRequest: AccessRequest, secretName: string) {
|
|
137
|
-
try {
|
|
138
|
-
const secrets = await this.listKeys(acRequest);
|
|
139
|
-
const secret = secrets.find((s) => s.key === secretName);
|
|
140
|
-
return secret?.value;
|
|
141
|
-
} catch (error) {
|
|
142
|
-
console.error(error);
|
|
143
|
-
return null;
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
|
|
147
99
|
private getVaultKey(teamId: string, secretName: string) {
|
|
148
100
|
return `${this.prefix.length ? `${this.prefix}/` : ''}${teamId}/${secretName}`;
|
|
149
101
|
}
|
|
@@ -151,20 +103,13 @@ export class SecretsManager extends VaultConnector {
|
|
|
151
103
|
private async getSecretById(teamId: string, secretId: string) {
|
|
152
104
|
try {
|
|
153
105
|
const secret: GetSecretValueCommandOutput = await this.secretsManager.send(new GetSecretValueCommand({ SecretId: this.getVaultKey(teamId, secretId) }));
|
|
154
|
-
return
|
|
106
|
+
return secret.SecretString;
|
|
155
107
|
} catch (error) {
|
|
156
|
-
console.error(error);
|
|
157
108
|
return null;
|
|
158
109
|
}
|
|
159
110
|
}
|
|
160
111
|
|
|
161
|
-
private
|
|
162
|
-
|
|
163
|
-
const parsedSecret = typeof secret.SecretString === 'string' ? JSON.parse(secret.SecretString) : secret.SecretString;
|
|
164
|
-
return parsedSecret.value;
|
|
165
|
-
} catch (error) {
|
|
166
|
-
return null;
|
|
167
|
-
}
|
|
168
|
-
|
|
112
|
+
private extractSecretName(secretKey: string, teamId: string, prefix: string) {
|
|
113
|
+
return secretKey.replace(`${prefix}/${teamId}/`, '');
|
|
169
114
|
}
|
|
170
115
|
}
|
package/src/types/LLM.types.ts
CHANGED
|
@@ -237,6 +237,7 @@ export type TLLMModel = {
|
|
|
237
237
|
isCustomLLM?: boolean;
|
|
238
238
|
isUserCustomLLM?: boolean;
|
|
239
239
|
modelId?: string;
|
|
240
|
+
modelEntryName?: string;
|
|
240
241
|
tokens?: number;
|
|
241
242
|
completionTokens?: number;
|
|
242
243
|
components?: string[];
|
|
@@ -446,11 +447,34 @@ export type TLLMInputMessage = {
|
|
|
446
447
|
};
|
|
447
448
|
|
|
448
449
|
export interface ILLMContextStore {
|
|
450
|
+
id: string;
|
|
449
451
|
save(messages: any[]): Promise<void>;
|
|
450
452
|
load(count?: number): Promise<any[]>;
|
|
451
453
|
getMessage(message_id: string): Promise<any[]>;
|
|
452
454
|
}
|
|
453
455
|
|
|
456
|
+
/**
|
|
457
|
+
* Configuration options for Conversation helper
|
|
458
|
+
*/
|
|
459
|
+
export interface IConversationSettings {
|
|
460
|
+
maxContextSize?: number;
|
|
461
|
+
maxOutputTokens?: number;
|
|
462
|
+
systemPrompt?: string;
|
|
463
|
+
toolChoice?: string;
|
|
464
|
+
store?: ILLMContextStore;
|
|
465
|
+
experimentalCache?: boolean;
|
|
466
|
+
toolsStrategy?: (toolsConfig: any) => any;
|
|
467
|
+
agentId?: string;
|
|
468
|
+
agentVersion?: string;
|
|
469
|
+
baseUrl?: string;
|
|
470
|
+
/**
|
|
471
|
+
* Maximum number of tool calls allowed in a single conversation session.
|
|
472
|
+
* Prevents infinite loops in tool calling scenarios.
|
|
473
|
+
* @default 100
|
|
474
|
+
*/
|
|
475
|
+
maxToolCalls?: number;
|
|
476
|
+
}
|
|
477
|
+
|
|
454
478
|
export enum APIKeySource {
|
|
455
479
|
Smyth = 'smyth-managed',
|
|
456
480
|
User = 'user-managed',
|
|
@@ -64,6 +64,10 @@ export interface IStorageVectorDataSource {
|
|
|
64
64
|
id: string;
|
|
65
65
|
candidateId: string;
|
|
66
66
|
candidateRole: string;
|
|
67
|
+
datasourceSizeMb?: number;
|
|
68
|
+
createdAt?: Date;
|
|
69
|
+
chunkSize?: number;
|
|
70
|
+
chunkOverlap?: number;
|
|
67
71
|
}
|
|
68
72
|
|
|
69
73
|
export interface IStorageVectorNamespace {
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Utility method to chunk arrays into smaller batches
|
|
3
|
+
*/
|
|
4
|
+
export function chunkArr<T>(arr: T[], sizePerChunk: number): T[][] {
|
|
5
|
+
return arr.reduce((chunks, elem, index) => {
|
|
6
|
+
const chunkIndex = Math.floor(index / sizePerChunk);
|
|
7
|
+
const chunk = chunks[chunkIndex] || [];
|
|
8
|
+
chunks[chunkIndex] = chunk.concat([elem]);
|
|
9
|
+
return chunks;
|
|
10
|
+
}, [] as T[][]);
|
|
11
|
+
}
|
|
@@ -110,7 +110,7 @@ export function isBase64(str: string): boolean {
|
|
|
110
110
|
const cleanedBase64Data = cleanBase64(str);
|
|
111
111
|
|
|
112
112
|
// Sometimes words like 'male' and hashes like md5, sha1, sha256, sha512 are detected as base64
|
|
113
|
-
if (cleanedBase64Data.length < 128) return false;
|
|
113
|
+
//if (cleanedBase64Data.length < 128) return false;
|
|
114
114
|
|
|
115
115
|
try {
|
|
116
116
|
const buffer = Buffer.from(cleanedBase64Data, 'base64');
|
package/src/utils/data.utils.ts
CHANGED
|
@@ -161,12 +161,12 @@ function isValidPathFormat(path: string): boolean {
|
|
|
161
161
|
const windowsAbsolute = /^[a-zA-Z]:[\\\/]/; // C:\ or C:/
|
|
162
162
|
const windowsUNC = /^\\\\[^\\]+\\[^\\]+/; // \\server\share
|
|
163
163
|
const windowsRelative = /^\.{1,2}[\\\/]/; // .\ or ..\ or ./ or ../
|
|
164
|
-
|
|
164
|
+
|
|
165
165
|
// Unix path patterns
|
|
166
166
|
const unixAbsolute = /^\//; // /path/to/file
|
|
167
167
|
const unixHome = /^~[\/]/; // ~/path/to/file
|
|
168
168
|
const unixRelative = /^\.{1,2}\//; // ./ or ../
|
|
169
|
-
|
|
169
|
+
|
|
170
170
|
// Relative paths without leading ./ or .\
|
|
171
171
|
const genericRelative = /^[^\\\/]/; // path/to/file or path\to\file
|
|
172
172
|
|
|
@@ -259,12 +259,14 @@ export async function formatDataForDebug(data: any, candidate: IAccessCandidate)
|
|
|
259
259
|
}
|
|
260
260
|
|
|
261
261
|
try {
|
|
262
|
-
|
|
262
|
+
// We use .includes() instead of === because constructor.name can be 'BinaryInput$1', 'FormData$1', etc.
|
|
263
|
+
// This happens when the same class is loaded in different module contexts (bundling, HMR, circular deps)
|
|
264
|
+
if (data.constructor?.name.includes('BinaryInput')) {
|
|
263
265
|
const jsonData = await data.getJsonData(candidate);
|
|
264
266
|
dataForDebug = `[BinaryInput size=${jsonData?.size}]`;
|
|
265
267
|
} else if (isBuffer(data)) {
|
|
266
268
|
dataForDebug = `[Buffer size=${data.byteLength}]`;
|
|
267
|
-
} else if (data.constructor?.name
|
|
269
|
+
} else if (data.constructor?.name.includes('FormData')) {
|
|
268
270
|
dataForDebug = `[FormData]`;
|
|
269
271
|
} else if (isBase64(data) || isBase64DataUrl(data)) {
|
|
270
272
|
dataForDebug = `[Base64 size=${getBase64FileSize(data)}]`;
|
|
@@ -222,195 +222,6 @@ export const identifyMimetypeFromString = (input: string) => {
|
|
|
222
222
|
return 'text/plain';
|
|
223
223
|
};
|
|
224
224
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
// chunkSize,
|
|
229
|
-
// chunkOverlap,
|
|
230
|
-
// }: {
|
|
231
|
-
// chunkSize: number;
|
|
232
|
-
// chunkOverlap: number;
|
|
233
|
-
// }
|
|
234
|
-
// ): string[] {
|
|
235
|
-
// const textSplitter = new TextSplitter({
|
|
236
|
-
// chunkSize,
|
|
237
|
-
// chunkOverlap,
|
|
238
|
-
// separators: ['\n\n', '\n', ' ', ''],
|
|
239
|
-
// keepSeparator: true,
|
|
240
|
-
// });
|
|
241
|
-
// let output = textSplitter.splitText(text);
|
|
242
|
-
|
|
243
|
-
// return output;
|
|
244
|
-
// }
|
|
245
|
-
// class TextSplitter {
|
|
246
|
-
// private chunkSize: number;
|
|
247
|
-
// private chunkOverlap: number;
|
|
248
|
-
// private separators: string[] = ['\n\n', '\n', ' ', ''];
|
|
249
|
-
// private keepSeparator: boolean = true;
|
|
250
|
-
|
|
251
|
-
// constructor({
|
|
252
|
-
// chunkSize = 1000,
|
|
253
|
-
// chunkOverlap = 200,
|
|
254
|
-
// separators,
|
|
255
|
-
// keepSeparator,
|
|
256
|
-
// }: {
|
|
257
|
-
// chunkSize?: number;
|
|
258
|
-
// chunkOverlap?: number;
|
|
259
|
-
// separators?: string[];
|
|
260
|
-
// keepSeparator?: boolean;
|
|
261
|
-
// } = {}) {
|
|
262
|
-
// this.chunkSize = chunkSize;
|
|
263
|
-
// this.chunkOverlap = chunkOverlap;
|
|
264
|
-
|
|
265
|
-
// if (separators) {
|
|
266
|
-
// this.separators = separators;
|
|
267
|
-
// }
|
|
268
|
-
|
|
269
|
-
// if (keepSeparator !== undefined) {
|
|
270
|
-
// this.keepSeparator = keepSeparator;
|
|
271
|
-
// }
|
|
272
|
-
|
|
273
|
-
// if (this.chunkOverlap >= this.chunkSize) {
|
|
274
|
-
// throw new Error('Cannot have chunkOverlap >= chunkSize');
|
|
275
|
-
// }
|
|
276
|
-
// }
|
|
277
|
-
|
|
278
|
-
// public splitText(text: string): string[] {
|
|
279
|
-
// return this._splitText(text, this.separators);
|
|
280
|
-
// }
|
|
281
|
-
|
|
282
|
-
// private _splitText(text: string, separators: string[]): string[] {
|
|
283
|
-
// const finalChunks: string[] = [];
|
|
284
|
-
|
|
285
|
-
// // Get appropriate separator to use
|
|
286
|
-
// let separator: string = separators[separators.length - 1];
|
|
287
|
-
// let newSeparators: string[] | undefined;
|
|
288
|
-
|
|
289
|
-
// for (let i = 0; i < separators.length; i += 1) {
|
|
290
|
-
// const s = separators[i];
|
|
291
|
-
// if (s === '') {
|
|
292
|
-
// separator = s;
|
|
293
|
-
// break;
|
|
294
|
-
// }
|
|
295
|
-
// if (text.includes(s)) {
|
|
296
|
-
// separator = s;
|
|
297
|
-
// newSeparators = separators.slice(i + 1);
|
|
298
|
-
// break;
|
|
299
|
-
// }
|
|
300
|
-
// }
|
|
301
|
-
|
|
302
|
-
// // Split the text using the identified separator
|
|
303
|
-
// const splits = this.splitOnSeparator(text, separator);
|
|
304
|
-
|
|
305
|
-
// // Process splits, recursively splitting longer texts
|
|
306
|
-
// let goodSplits: string[] = [];
|
|
307
|
-
// const _separator = this.keepSeparator ? '' : separator;
|
|
308
|
-
|
|
309
|
-
// for (const s of splits) {
|
|
310
|
-
// if (this.lengthFunction(s) < this.chunkSize) {
|
|
311
|
-
// goodSplits.push(s);
|
|
312
|
-
// } else {
|
|
313
|
-
// if (goodSplits.length) {
|
|
314
|
-
// const mergedText = this.mergeSplits(goodSplits, _separator);
|
|
315
|
-
// finalChunks.push(...mergedText);
|
|
316
|
-
// goodSplits = [];
|
|
317
|
-
// }
|
|
318
|
-
|
|
319
|
-
// if (!newSeparators) {
|
|
320
|
-
// finalChunks.push(s);
|
|
321
|
-
// } else {
|
|
322
|
-
// const otherInfo = this._splitText(s, newSeparators);
|
|
323
|
-
// finalChunks.push(...otherInfo);
|
|
324
|
-
// }
|
|
325
|
-
// }
|
|
326
|
-
// }
|
|
327
|
-
|
|
328
|
-
// if (goodSplits.length) {
|
|
329
|
-
// const mergedText = this.mergeSplits(goodSplits, _separator);
|
|
330
|
-
// finalChunks.push(...mergedText);
|
|
331
|
-
// }
|
|
332
|
-
|
|
333
|
-
// return finalChunks;
|
|
334
|
-
// }
|
|
335
|
-
|
|
336
|
-
// private splitOnSeparator(text: string, separator: string): string[] {
|
|
337
|
-
// let splits: string[];
|
|
338
|
-
|
|
339
|
-
// if (separator) {
|
|
340
|
-
// if (this.keepSeparator) {
|
|
341
|
-
// const regexEscapedSeparator = separator.replace(/[/\-\\^$*+?.()|[\]{}]/g, '\\$&');
|
|
342
|
-
// splits = text.split(new RegExp(`(?=${regexEscapedSeparator})`));
|
|
343
|
-
// } else {
|
|
344
|
-
// splits = text.split(separator);
|
|
345
|
-
// }
|
|
346
|
-
// } else {
|
|
347
|
-
// splits = text.split('');
|
|
348
|
-
// }
|
|
349
|
-
|
|
350
|
-
// return splits.filter((s) => s !== '');
|
|
351
|
-
// }
|
|
352
|
-
|
|
353
|
-
// private lengthFunction(text: string): number {
|
|
354
|
-
// return text.length;
|
|
355
|
-
// }
|
|
356
|
-
|
|
357
|
-
// private joinDocs(docs: string[], separator: string): string | null {
|
|
358
|
-
// const text = docs.join(separator).trim();
|
|
359
|
-
// return text === '' ? null : text;
|
|
360
|
-
// }
|
|
361
|
-
|
|
362
|
-
// private mergeSplits(splits: string[], separator: string): string[] {
|
|
363
|
-
// const docs: string[] = [];
|
|
364
|
-
// const currentDoc: string[] = [];
|
|
365
|
-
// let total = 0;
|
|
366
|
-
|
|
367
|
-
// for (const d of splits) {
|
|
368
|
-
// const _len = this.lengthFunction(d);
|
|
369
|
-
|
|
370
|
-
// if (total + _len + currentDoc.length * separator.length > this.chunkSize) {
|
|
371
|
-
// if (total > this.chunkSize) {
|
|
372
|
-
// console.warn(`Created a chunk of size ${total}, which is longer than the specified ${this.chunkSize}`);
|
|
373
|
-
// }
|
|
374
|
-
|
|
375
|
-
// if (currentDoc.length > 0) {
|
|
376
|
-
// const doc = this.joinDocs(currentDoc, separator);
|
|
377
|
-
// if (doc !== null) {
|
|
378
|
-
// docs.push(doc);
|
|
379
|
-
// }
|
|
380
|
-
|
|
381
|
-
// // Keep popping if conditions are met
|
|
382
|
-
// while (total > this.chunkOverlap || (total + _len + currentDoc.length * separator.length > this.chunkSize && total > 0)) {
|
|
383
|
-
// total -= this.lengthFunction(currentDoc[0]);
|
|
384
|
-
// currentDoc.shift();
|
|
385
|
-
// }
|
|
386
|
-
// }
|
|
387
|
-
// }
|
|
388
|
-
|
|
389
|
-
// currentDoc.push(d);
|
|
390
|
-
// total += _len;
|
|
391
|
-
// }
|
|
392
|
-
|
|
393
|
-
// const doc = this.joinDocs(currentDoc, separator);
|
|
394
|
-
// if (doc !== null) {
|
|
395
|
-
// docs.push(doc);
|
|
396
|
-
// }
|
|
397
|
-
|
|
398
|
-
// return docs;
|
|
399
|
-
// }
|
|
400
|
-
// }
|
|
401
|
-
|
|
402
|
-
// class RecursiveTextSplitter extends TextSplitter {
|
|
403
|
-
// constructor({
|
|
404
|
-
// chunkSize = 1000,
|
|
405
|
-
// chunkOverlap = 200,
|
|
406
|
-
// separators = ['\n\n', '\n', ' ', ''],
|
|
407
|
-
// keepSeparator = true,
|
|
408
|
-
// }: {
|
|
409
|
-
// chunkSize?: number;
|
|
410
|
-
// chunkOverlap?: number;
|
|
411
|
-
// separators?: string[];
|
|
412
|
-
// keepSeparator?: boolean;
|
|
413
|
-
// } = {}) {
|
|
414
|
-
// super({ chunkSize, chunkOverlap, separators, keepSeparator });
|
|
415
|
-
// }
|
|
416
|
-
// }
|
|
225
|
+
export function calcSizeMb(text: string): number {
|
|
226
|
+
return Buffer.byteLength(text, 'utf-8') / 1024 / 1024;
|
|
227
|
+
}
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
import { IAgent as Agent } from '@sre/types/Agent.types';
|
|
2
|
-
import { Component } from './Component.class';
|
|
3
|
-
import { TemplateString } from '@sre/helpers/TemplateString.helper';
|
|
4
|
-
import Joi from 'joi';
|
|
5
|
-
import { validateCharacterSet } from '../utils';
|
|
6
|
-
import { ConnectorService } from '@sre/Core/ConnectorsService';
|
|
7
|
-
import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
|
|
8
|
-
|
|
9
|
-
import { DataSourceIndexer } from './DataSourceIndexer.class';
|
|
10
|
-
|
|
11
|
-
export class DataSourceCleaner extends Component {
|
|
12
|
-
protected configSchema = Joi.object({
|
|
13
|
-
namespaceId: Joi.string().max(50).allow('').label('namespace'),
|
|
14
|
-
id: Joi.string().custom(validateCharacterSet, 'custom validation characterSet').allow('').label('source identifier'),
|
|
15
|
-
});
|
|
16
|
-
constructor() {
|
|
17
|
-
super();
|
|
18
|
-
}
|
|
19
|
-
init() {}
|
|
20
|
-
async process(input, config, agent: Agent) {
|
|
21
|
-
await super.process(input, config, agent);
|
|
22
|
-
const teamId = agent.teamId;
|
|
23
|
-
const agentId = agent.id;
|
|
24
|
-
let debugOutput = agent.agentRuntime?.debug ? '== Source Indexer Log ==\n' : null;
|
|
25
|
-
|
|
26
|
-
try {
|
|
27
|
-
const configSchema = this.validateConfigData(config.data);
|
|
28
|
-
if (configSchema.error) {
|
|
29
|
-
throw new Error(`Config data validation error: ${configSchema.error}\n EXITING...`);
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
const outputs = {};
|
|
33
|
-
for (let con of config.outputs) {
|
|
34
|
-
if (con.default) continue;
|
|
35
|
-
outputs[con.name] = con?.description ? `<${con?.description}>` : '';
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
const inputSchema = this.validateInput(input);
|
|
39
|
-
if (inputSchema.error) {
|
|
40
|
-
throw new Error(`Input validation error: ${inputSchema.error}\n EXITING...`);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
const namespaceId = configSchema.value.namespaceId.split('_')?.slice(1).join('_') || configSchema.value.namespaceId;
|
|
44
|
-
|
|
45
|
-
let vectorDbConnector = ConnectorService.getVectorDBConnector();
|
|
46
|
-
|
|
47
|
-
let existingnamespace = await vectorDbConnector.requester(AccessCandidate.team(teamId)).namespaceExists(namespaceId);
|
|
48
|
-
if (!existingnamespace) {
|
|
49
|
-
throw new Error(`Namespace ${namespaceId} does not exist`);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
const providedId = TemplateString(config.data.id).parse(input).result;
|
|
53
|
-
const idRegex = /^[a-zA-Z0-9\-\_\.]+$/;
|
|
54
|
-
if (!idRegex.test(providedId)) {
|
|
55
|
-
throw new Error(`Invalid id. Accepted characters: 'a-z', 'A-Z', '0-9', '-', '_', '.'`);
|
|
56
|
-
}
|
|
57
|
-
debugOutput += `Searching for data source with id: ${providedId}\n`;
|
|
58
|
-
|
|
59
|
-
const dsId = DataSourceIndexer.genDsId(providedId, teamId, namespaceId);
|
|
60
|
-
|
|
61
|
-
await vectorDbConnector.requester(AccessCandidate.team(teamId)).deleteDatasource(namespaceId, dsId);
|
|
62
|
-
|
|
63
|
-
debugOutput += `Deleted data source with id: ${providedId}\n`;
|
|
64
|
-
|
|
65
|
-
return {
|
|
66
|
-
_debug: debugOutput,
|
|
67
|
-
Success: true,
|
|
68
|
-
// _error,
|
|
69
|
-
};
|
|
70
|
-
} catch (err: any) {
|
|
71
|
-
debugOutput += `Failed to delete data source: \n Error: ${err?.message}\n`;
|
|
72
|
-
|
|
73
|
-
return {
|
|
74
|
-
_debug: debugOutput,
|
|
75
|
-
_error: err?.message || "Couldn't delete data source",
|
|
76
|
-
};
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
validateInput(input: any) {
|
|
81
|
-
return Joi.object({}).unknown(true).validate(input);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
validateConfigData(data: any) {
|
|
85
|
-
return Joi.object({
|
|
86
|
-
namespaceId: Joi.string().required(),
|
|
87
|
-
id: Joi.string().optional().allow('').allow(null),
|
|
88
|
-
})
|
|
89
|
-
.unknown(true)
|
|
90
|
-
.validate(data);
|
|
91
|
-
}
|
|
92
|
-
}
|