@smythos/sre 1.7.20 → 1.7.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +134 -89
- package/dist/index.js.map +1 -1
- package/dist/types/Components/AgentPlugin.class.d.ts +1 -1
- package/dist/types/Components/DataSourceIndexer.class.d.ts +4 -12
- package/dist/types/Components/GenAILLM.class.d.ts +5 -5
- package/dist/types/Components/RAG/DataSourceCleaner.class.d.ts +4 -4
- package/dist/types/Components/RAG/DataSourceComponent.class.d.ts +5 -1
- package/dist/types/Components/index.d.ts +3 -3
- package/dist/types/config.d.ts +1 -0
- package/dist/types/helpers/Conversation.helper.d.ts +10 -13
- package/dist/types/helpers/TemplateString.helper.d.ts +1 -1
- package/dist/types/index.d.ts +4 -3
- package/dist/types/subsystems/IO/VectorDB.service/VectorDBConnector.d.ts +1 -0
- package/dist/types/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.d.ts +1 -0
- package/dist/types/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.d.ts +11 -4
- package/dist/types/subsystems/IO/VectorDB.service/embed/index.d.ts +5 -0
- package/dist/types/subsystems/LLMManager/LLM.helper.d.ts +19 -0
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.d.ts +15 -10
- package/dist/types/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.d.ts +35 -0
- package/dist/types/subsystems/Security/Account.service/AccountConnector.d.ts +2 -2
- package/dist/types/subsystems/Security/Vault.service/connectors/SecretsManager.class.d.ts +2 -3
- package/dist/types/types/LLM.types.d.ts +23 -0
- package/dist/types/types/VectorDB.types.d.ts +4 -0
- package/dist/types/utils/string.utils.d.ts +1 -0
- package/package.json +3 -3
- package/src/Components/APIEndpoint.class.ts +1 -6
- package/src/Components/AgentPlugin.class.ts +20 -3
- package/src/Components/Classifier.class.ts +79 -16
- package/src/Components/Component.class.ts +14 -1
- package/src/Components/ForEach.class.ts +34 -6
- package/src/Components/GenAILLM.class.ts +75 -34
- package/src/Components/LLMAssistant.class.ts +56 -21
- package/src/Components/RAG/DataSourceCleaner.class.ts +180 -0
- package/src/Components/RAG/DataSourceComponent.class.ts +137 -0
- package/src/Components/RAG/DataSourceIndexer.class.ts +260 -0
- package/src/Components/{DataSourceLookup.class.ts → RAG/DataSourceLookup.class.ts} +96 -3
- package/src/Components/ScrapflyWebScrape.class.ts +7 -0
- package/src/Components/ServerlessCode.class.ts +1 -4
- package/src/Components/index.ts +3 -3
- package/src/config.ts +1 -0
- package/src/helpers/Conversation.helper.ts +112 -26
- package/src/helpers/S3Cache.helper.ts +2 -1
- package/src/helpers/TemplateString.helper.ts +6 -5
- package/src/index.ts +213 -212
- package/src/index.ts.bak +213 -212
- package/src/subsystems/IO/NKV.service/connectors/NKVRedis.class.ts +3 -1
- package/src/subsystems/IO/VectorDB.service/VectorDBConnector.ts +1 -0
- package/src/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.ts +145 -19
- package/src/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.ts +67 -22
- package/src/subsystems/IO/VectorDB.service/embed/GoogleEmbedding.ts +1 -0
- package/src/subsystems/IO/VectorDB.service/embed/OpenAIEmbedding.ts +2 -1
- package/src/subsystems/IO/VectorDB.service/embed/index.ts +16 -0
- package/src/subsystems/LLMManager/LLM.helper.ts +25 -0
- package/src/subsystems/LLMManager/LLM.service/LLMConnector.ts +1 -1
- package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts +35 -10
- package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts +12 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/Echo.class.ts +4 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +192 -139
- package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +17 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/Ollama.class.ts +18 -3
- package/src/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.ts +14 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/VertexAI.class.ts +6 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +5 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +8 -3
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/utils.ts +1 -1
- package/src/subsystems/LLMManager/LLM.service/connectors/xAI.class.ts +9 -8
- package/src/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.ts +92 -1
- package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.ts +260 -17
- package/src/subsystems/Security/Account.service/AccountConnector.ts +3 -3
- package/src/subsystems/Security/Vault.service/connectors/SecretsManager.class.ts +8 -63
- package/src/types/LLM.types.ts +24 -0
- package/src/types/VectorDB.types.ts +4 -0
- package/src/utils/array.utils.ts +11 -0
- package/src/utils/base64.utils.ts +1 -1
- package/src/utils/data.utils.ts +6 -4
- package/src/utils/string.utils.ts +3 -192
- package/src/Components/DataSourceCleaner.class.ts +0 -92
- package/src/Components/DataSourceIndexer.class.ts +0 -181
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import { IAgent as Agent } from '@sre/types/Agent.types';
|
|
2
|
+
import { Component } from '../Component.class';
|
|
3
|
+
import { TemplateString } from '@sre/helpers/TemplateString.helper';
|
|
4
|
+
import Joi from 'joi';
|
|
5
|
+
import { validateCharacterSet } from '../../utils';
|
|
6
|
+
import { ConnectorService } from '@sre/Core/ConnectorsService';
|
|
7
|
+
import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
|
|
8
|
+
|
|
9
|
+
import { DataSourceIndexer } from './DataSourceIndexer.class';
|
|
10
|
+
import { DataSourceComponent, NsRecord } from './DataSourceComponent.class';
|
|
11
|
+
import { VectorDBConnector } from '@sre/IO/VectorDB.service/VectorDBConnector';
|
|
12
|
+
import envConfig from '@sre/config';
|
|
13
|
+
|
|
14
|
+
export class DataSourceCleaner extends DataSourceComponent {
|
|
15
|
+
protected configSchema = Joi.object({
|
|
16
|
+
namespaceId: Joi.string().max(50).allow('').label('namespace'),
|
|
17
|
+
id: Joi.string().custom(validateCharacterSet, 'custom validation characterSet').allow('').label('source identifier'),
|
|
18
|
+
version: Joi.string().valid('v1', 'v2').default('v1'),
|
|
19
|
+
});
|
|
20
|
+
constructor() {
|
|
21
|
+
super();
|
|
22
|
+
}
|
|
23
|
+
init() {}
|
|
24
|
+
|
|
25
|
+
async process(input, config, agent: Agent) {
|
|
26
|
+
await super.process(input, config, agent);
|
|
27
|
+
if (envConfig.env.ROLLOUT_RAG_V2) {
|
|
28
|
+
return await this.processV2(input, config, agent);
|
|
29
|
+
} else {
|
|
30
|
+
return await this.processV1(input, config, agent);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
async processV1(input, config, agent: Agent) {
|
|
34
|
+
const teamId = agent.teamId;
|
|
35
|
+
const agentId = agent.id;
|
|
36
|
+
let debugOutput = agent.agentRuntime?.debug ? '== Source Indexer Log ==\n' : null;
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
const configSchema = this.validateConfigData(config.data);
|
|
40
|
+
if (configSchema.error) {
|
|
41
|
+
throw new Error(`Config data validation error: ${configSchema.error}\n EXITING...`);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const outputs = {};
|
|
45
|
+
for (let con of config.outputs) {
|
|
46
|
+
if (con.default) continue;
|
|
47
|
+
outputs[con.name] = con?.description ? `<${con?.description}>` : '';
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const inputSchema = this.validateInput(input);
|
|
51
|
+
if (inputSchema.error) {
|
|
52
|
+
throw new Error(`Input validation error: ${inputSchema.error}\n EXITING...`);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// const namespaceId = configSchema.value.namespaceId.split('_')?.slice(1).join('_') || configSchema.value.namespaceId;
|
|
56
|
+
const namespaceId = /^c[a-z0-9]{24}.+$/.test(configSchema.value.namespaceId)
|
|
57
|
+
? configSchema.value.namespaceId.split('_').slice(1).join('_')
|
|
58
|
+
: configSchema.value.namespaceId;
|
|
59
|
+
|
|
60
|
+
let vectorDbConnector = ConnectorService.getVectorDBConnector();
|
|
61
|
+
|
|
62
|
+
let existingnamespace = await vectorDbConnector.requester(AccessCandidate.team(teamId)).namespaceExists(namespaceId);
|
|
63
|
+
if (!existingnamespace) {
|
|
64
|
+
throw new Error(`Namespace ${namespaceId} does not exist`);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const providedId = TemplateString(config.data.id).parse(input).result;
|
|
68
|
+
const idRegex = /^[a-zA-Z0-9\-\_\.]+$/;
|
|
69
|
+
if (!idRegex.test(providedId)) {
|
|
70
|
+
throw new Error(`Invalid id. Accepted characters: 'a-z', 'A-Z', '0-9', '-', '_', '.'`);
|
|
71
|
+
}
|
|
72
|
+
debugOutput += `Searching for data source with id: ${providedId}\n`;
|
|
73
|
+
|
|
74
|
+
const dsId = DataSourceIndexer.normalizeDsId(providedId, teamId, namespaceId);
|
|
75
|
+
|
|
76
|
+
await vectorDbConnector.requester(AccessCandidate.team(teamId)).deleteDatasource(namespaceId, dsId);
|
|
77
|
+
|
|
78
|
+
debugOutput += `Deleted data source with id: ${providedId}\n`;
|
|
79
|
+
|
|
80
|
+
return {
|
|
81
|
+
_debug: debugOutput,
|
|
82
|
+
Success: true,
|
|
83
|
+
// _error,
|
|
84
|
+
};
|
|
85
|
+
} catch (err: any) {
|
|
86
|
+
debugOutput += `Failed to delete data source: \n Error: ${err?.message}\n`;
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
_debug: debugOutput,
|
|
90
|
+
_error: err?.message || "Couldn't delete data source",
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async processV2(input, config, agent: Agent) {
|
|
96
|
+
const teamId = agent.teamId;
|
|
97
|
+
const agentId = agent.id;
|
|
98
|
+
let debugOutput = agent.agentRuntime?.debug ? '== Source Indexer Log ==\n' : null;
|
|
99
|
+
|
|
100
|
+
try {
|
|
101
|
+
const configSchema = this.validateConfigData(config.data);
|
|
102
|
+
if (configSchema.error) {
|
|
103
|
+
throw new Error(`Config data validation error: ${configSchema.error}\n EXITING...`);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const outputs = {};
|
|
107
|
+
for (let con of config.outputs) {
|
|
108
|
+
if (con.default) continue;
|
|
109
|
+
outputs[con.name] = con?.description ? `<${con?.description}>` : '';
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const inputSchema = this.validateInput(input);
|
|
113
|
+
if (inputSchema.error) {
|
|
114
|
+
throw new Error(`Input validation error: ${inputSchema.error}\n EXITING...`);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// const namespaceId = configSchema.value.namespaceId.split('_')?.slice(1).join('_') || configSchema.value.namespaceId;
|
|
118
|
+
const namespaceLabelorId = configSchema.value.namespaceId;
|
|
119
|
+
// const namespaceId = configSchema.value.namespaceId;
|
|
120
|
+
|
|
121
|
+
let vecDbConnector: VectorDBConnector = null;
|
|
122
|
+
let namespaceRecord: NsRecord = null;
|
|
123
|
+
try {
|
|
124
|
+
const resolved = await this.resolveVectorDbConnector(namespaceLabelorId, teamId);
|
|
125
|
+
vecDbConnector = resolved.vecDbConnector;
|
|
126
|
+
namespaceRecord = resolved.namespaceRecord;
|
|
127
|
+
} catch (err: any) {
|
|
128
|
+
debugOutput += `Error: ${err?.message || "Couldn't get vector database connector"}\n\n`;
|
|
129
|
+
return {
|
|
130
|
+
_debug: debugOutput,
|
|
131
|
+
_error: err?.message || "Couldn't get vector database connector",
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// let existingnamespace = await vecDbConnector.requester(AccessCandidate.team(teamId)).namespaceExists(namespaceId);
|
|
136
|
+
// if (!existingnamespace) {
|
|
137
|
+
// throw new Error(`Namespace ${namespaceId} does not exist`);
|
|
138
|
+
// }
|
|
139
|
+
|
|
140
|
+
const providedId = TemplateString(config.data.id).parse(input).result;
|
|
141
|
+
const idRegex = /^[a-zA-Z0-9\-\_\.]+$/;
|
|
142
|
+
if (!idRegex.test(providedId)) {
|
|
143
|
+
throw new Error(`Invalid id. Accepted characters: 'a-z', 'A-Z', '0-9', '-', '_', '.'`);
|
|
144
|
+
}
|
|
145
|
+
debugOutput += `Searching for data source with id: ${providedId}\n`;
|
|
146
|
+
|
|
147
|
+
const dsId = DataSourceComponent.normalizeDsId(providedId, teamId, namespaceRecord.label);
|
|
148
|
+
|
|
149
|
+
await vecDbConnector.requester(AccessCandidate.team(teamId)).deleteDatasource(namespaceRecord.label, dsId);
|
|
150
|
+
|
|
151
|
+
debugOutput += `Deleted data source with id: ${providedId}\n`;
|
|
152
|
+
|
|
153
|
+
return {
|
|
154
|
+
_debug: debugOutput,
|
|
155
|
+
Success: true,
|
|
156
|
+
// _error,
|
|
157
|
+
};
|
|
158
|
+
} catch (err: any) {
|
|
159
|
+
debugOutput += `Failed to delete data source: \n Error: ${err?.message}\n`;
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
_debug: debugOutput,
|
|
163
|
+
_error: err?.message || "Couldn't delete data source",
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
validateInput(input: any) {
|
|
169
|
+
return Joi.object({}).unknown(true).validate(input);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
validateConfigData(data: any) {
|
|
173
|
+
return Joi.object({
|
|
174
|
+
namespaceId: Joi.string().required(),
|
|
175
|
+
id: Joi.string().optional().allow('').allow(null),
|
|
176
|
+
})
|
|
177
|
+
.unknown(true)
|
|
178
|
+
.validate(data);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
// Base class for the RAG components that handle the shared logic for the RAG components
|
|
2
|
+
import { EmbeddingsFactory } from '@sre/IO/VectorDB.service/embed';
|
|
3
|
+
import { Component } from '../Component.class';
|
|
4
|
+
import { ConnectorService } from '@sre/Core/ConnectorsService';
|
|
5
|
+
import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
|
|
6
|
+
import { getLLMCredentials } from '@sre/LLMManager/LLM.service/LLMCredentials.helper';
|
|
7
|
+
import { TEmbeddings } from '@sre/IO/VectorDB.service/embed/BaseEmbedding';
|
|
8
|
+
import { TemplateString } from '@sre/helpers/TemplateString.helper';
|
|
9
|
+
import { VectorDBConnector } from '@sre/IO/VectorDB.service/VectorDBConnector';
|
|
10
|
+
import { TLLMCredentials } from '@sre/types/LLM.types';
|
|
11
|
+
|
|
12
|
+
export type NsRecord = {
|
|
13
|
+
credentialId: string;
|
|
14
|
+
embeddings: { dimensions: string; modelId: string };
|
|
15
|
+
label: string;
|
|
16
|
+
createdAt: string;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
export enum TDataSourceCompErrorCodes {
|
|
20
|
+
NAMESPACE_NOT_FOUND = 1,
|
|
21
|
+
CREDENTIAL_NOT_FOUND = 2,
|
|
22
|
+
EMBEDDING_CONFIG_ERROR = 3,
|
|
23
|
+
}
|
|
24
|
+
export class DataSourceCompError extends Error {
|
|
25
|
+
public code: TDataSourceCompErrorCodes;
|
|
26
|
+
constructor(message: string, code: TDataSourceCompErrorCodes) {
|
|
27
|
+
super(message);
|
|
28
|
+
this.name = 'DataSourceCompError';
|
|
29
|
+
this.code = code;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
export class DataSourceComponent extends Component {
|
|
33
|
+
constructor() {
|
|
34
|
+
super();
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
public async resolveVectorDbConnector(
|
|
38
|
+
namespace: string | NsRecord,
|
|
39
|
+
teamId: string
|
|
40
|
+
): Promise<{ vecDbConnector: VectorDBConnector; namespaceRecord: NsRecord }> {
|
|
41
|
+
// resolve the ns record, if not exist, throw an error (new in v2)
|
|
42
|
+
// then we also need to resolve the credentials
|
|
43
|
+
let namespaceRecord = namespace as NsRecord;
|
|
44
|
+
|
|
45
|
+
if (typeof namespace === 'string') {
|
|
46
|
+
namespaceRecord = await this.resolveNamespaceRecord(namespace, teamId);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const accountConnector = ConnectorService.getAccountConnector();
|
|
50
|
+
const accountClient = accountConnector.requester(AccessCandidate.team(teamId));
|
|
51
|
+
const rawCredRecord = await accountClient.getTeamSetting(namespaceRecord.credentialId, 'vector_db_creds');
|
|
52
|
+
if (!rawCredRecord) {
|
|
53
|
+
throw new DataSourceCompError(
|
|
54
|
+
`Credential ${namespaceRecord.credentialId} does not exist`,
|
|
55
|
+
TDataSourceCompErrorCodes.CREDENTIAL_NOT_FOUND
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
const credRecord = JSON.parse(rawCredRecord);
|
|
59
|
+
await Promise.all(
|
|
60
|
+
Object.keys(credRecord.credentials).map(async (key) => {
|
|
61
|
+
if (typeof credRecord.credentials[key] !== 'string') return;
|
|
62
|
+
credRecord.credentials[key] = await TemplateString(credRecord.credentials[key]).parseTeamKeysAsync(teamId).asyncResult;
|
|
63
|
+
})
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
const vecDbConnector = ConnectorService.getVectorDBConnector(credRecord.provider).instance({
|
|
67
|
+
credentials: credRecord.credentials,
|
|
68
|
+
embeddings: await this.buildEmbeddingConfig(namespaceRecord.embeddings, teamId),
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
return { vecDbConnector, namespaceRecord };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
private async resolveNamespaceRecord(namespace: string, teamId: string): Promise<NsRecord> {
|
|
75
|
+
// if it's a string, we need to get the namespace record from the NKV
|
|
76
|
+
// TRY 1) try to get namespace using the label provided from teamSettings
|
|
77
|
+
const nkvConnector = ConnectorService.getNKVConnector();
|
|
78
|
+
const nkvClient = nkvConnector.requester(AccessCandidate.team(teamId));
|
|
79
|
+
const rawNsRecord = await nkvClient.get(`vectorDB:namespaces`, namespace);
|
|
80
|
+
|
|
81
|
+
if (rawNsRecord) {
|
|
82
|
+
return JSON.parse(rawNsRecord.toString()) as NsRecord;
|
|
83
|
+
} else {
|
|
84
|
+
// throw new DataSourceCompError(`Namespace ${namespace} does not exist`, TDataSourceCompErrorCodes.NAMESPACE_NOT_FOUND);
|
|
85
|
+
console.warn(`Namespace ${namespace} does not exist using the label ${namespace}. Trying to get namespace by mapping legacy id to V2 id`);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// TRY 2) try to get namespace using the legacy id. list all namespaces and find the one that matches the legacy id
|
|
89
|
+
const namespaces = (await nkvClient.list(`vectorDB:namespaces`))
|
|
90
|
+
.map((namespace) => {
|
|
91
|
+
try {
|
|
92
|
+
return JSON.parse(namespace.data.toString());
|
|
93
|
+
} catch (error) {
|
|
94
|
+
return null;
|
|
95
|
+
}
|
|
96
|
+
})
|
|
97
|
+
.filter((namespace) => namespace !== null);
|
|
98
|
+
|
|
99
|
+
const matchingNamespace = namespaces.find((ns) => ns.__legacy_id === namespace);
|
|
100
|
+
if (!matchingNamespace) {
|
|
101
|
+
console.warn(`Namespace ${namespace} does not exist using the legacy id ${namespace}.`);
|
|
102
|
+
throw new DataSourceCompError(`Namespace ${namespace} does not exist`, TDataSourceCompErrorCodes.NAMESPACE_NOT_FOUND);
|
|
103
|
+
}
|
|
104
|
+
return matchingNamespace;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
public async buildEmbeddingConfig(embedding: { dimensions: string; modelId: string }, teamId: string): Promise<TEmbeddings> {
|
|
108
|
+
// we need to take this and return a proper TEmbeddings object
|
|
109
|
+
|
|
110
|
+
const provider = EmbeddingsFactory.getProviderByModel(embedding.modelId as any);
|
|
111
|
+
|
|
112
|
+
// based on the provider, we should be able to retreive the correct credentials
|
|
113
|
+
const modelsProvider = ConnectorService.getModelsProviderConnector();
|
|
114
|
+
const modelProviderCandidate = modelsProvider.requester(AccessCandidate.team(teamId));
|
|
115
|
+
// const modelInfo = await modelProviderCandidate.getModelInfo(embedding.modelId);
|
|
116
|
+
|
|
117
|
+
const llmCreds = await getLLMCredentials(AccessCandidate.team(teamId), {
|
|
118
|
+
provider,
|
|
119
|
+
modelId: embedding.modelId,
|
|
120
|
+
credentials: [TLLMCredentials.Vault, TLLMCredentials.Internal],
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
return {
|
|
124
|
+
provider,
|
|
125
|
+
model: embedding.modelId,
|
|
126
|
+
credentials: llmCreds,
|
|
127
|
+
dimensions: parseInt(embedding.dimensions), // pass both for backwards compatibility
|
|
128
|
+
params: {
|
|
129
|
+
dimensions: parseInt(embedding.dimensions), // pass both for backwards compatibility
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
public static normalizeDsId(providedId: string, teamId: string, namespaceId: string) {
|
|
135
|
+
return `${teamId}::${namespaceId}::${providedId}`;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
import { IAgent as Agent } from '@sre/types/Agent.types';
|
|
2
|
+
import { DataSourceComponent, NsRecord } from './DataSourceComponent.class';
|
|
3
|
+
import Joi from 'joi';
|
|
4
|
+
import { validateCharacterSet } from '@sre/utils/validation.utils';
|
|
5
|
+
import { TemplateString } from '@sre/helpers/TemplateString.helper';
|
|
6
|
+
import { isUrl, detectURLSourceType } from '../../utils';
|
|
7
|
+
import { SmythFS } from '@sre/IO/Storage.service/SmythFS.class';
|
|
8
|
+
import { ConnectorService } from '@sre/Core/ConnectorsService';
|
|
9
|
+
|
|
10
|
+
import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
|
|
11
|
+
import { TEmbeddings } from '@sre/IO/VectorDB.service/embed/BaseEmbedding';
|
|
12
|
+
import { VectorDBConnector } from '@sre/IO/VectorDB.service/VectorDBConnector';
|
|
13
|
+
import { JSONContentHelper } from '@sre/helpers/JsonContent.helper';
|
|
14
|
+
import envConfig from '@sre/config';
|
|
15
|
+
|
|
16
|
+
export class DataSourceIndexer extends DataSourceComponent {
|
|
17
|
+
private MAX_ALLOWED_URLS_PER_INPUT = 20;
|
|
18
|
+
protected configSchema = Joi.object({
|
|
19
|
+
namespace: Joi.string().max(50).allow(''),
|
|
20
|
+
id: Joi.string().custom(validateCharacterSet, 'id custom validation').allow('').label('source identifier'),
|
|
21
|
+
name: Joi.string().max(50).allow('').label('label'),
|
|
22
|
+
metadata: Joi.string().allow(null).allow('').max(10000).label('metadata'),
|
|
23
|
+
chunkSize: Joi.number().optional(),
|
|
24
|
+
chunkOverlap: Joi.number().optional(),
|
|
25
|
+
version: Joi.string().valid('v1', 'v2').default('v1'),
|
|
26
|
+
});
|
|
27
|
+
constructor() {
|
|
28
|
+
super();
|
|
29
|
+
}
|
|
30
|
+
init() {}
|
|
31
|
+
async process(input, config, agent: Agent) {
|
|
32
|
+
await super.process(input, config, agent);
|
|
33
|
+
|
|
34
|
+
let response: any = null;
|
|
35
|
+
if (envConfig.env.ROLLOUT_RAG_V2) {
|
|
36
|
+
response = await this.processV2(input, config, agent);
|
|
37
|
+
} else {
|
|
38
|
+
response = await this.processV1(input, config, agent);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return response;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
private async processV1(input, config, agent: Agent) {
|
|
45
|
+
const teamId = agent.teamId;
|
|
46
|
+
const agentId = agent.id;
|
|
47
|
+
let debugOutput = agent.agentRuntime?.debug ? '== Source Indexer Log ==\n' : null;
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
const _config = {
|
|
51
|
+
...config.data,
|
|
52
|
+
name: TemplateString(config.data.name).parse(input).result,
|
|
53
|
+
id: TemplateString(config.data.id).parse(input).result,
|
|
54
|
+
metadata: TemplateString(config.data.metadata).parse(input).result,
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
const outputs = {};
|
|
58
|
+
for (let con of config.outputs) {
|
|
59
|
+
if (con.default) continue;
|
|
60
|
+
outputs[con.name] = con?.description ? `<${con?.description}>` : '';
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const namespaceId = _config.namespace.split('_').slice(1).join('_') || _config.namespace;
|
|
64
|
+
debugOutput += `[Selected data space id] \n${namespaceId}\n\n`;
|
|
65
|
+
|
|
66
|
+
const vectorDbConnector =
|
|
67
|
+
// (await vectorDBHelper.getTeamConnector(teamId)) ||
|
|
68
|
+
ConnectorService.getVectorDBConnector();
|
|
69
|
+
const nsExists = await vectorDbConnector.requester(AccessCandidate.team(teamId)).namespaceExists(namespaceId);
|
|
70
|
+
|
|
71
|
+
if (!nsExists) {
|
|
72
|
+
const newNs = await vectorDbConnector.requester(AccessCandidate.team(teamId)).createNamespace(namespaceId);
|
|
73
|
+
debugOutput += `[Created namespace] \n${newNs}\n\n`;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const inputSchema = this.validateInput(input);
|
|
77
|
+
if (inputSchema.error) {
|
|
78
|
+
throw new Error(`Input validation error: ${inputSchema.error}\n EXITING...`);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const providedId = _config.id;
|
|
82
|
+
// const isAutoId = _config.isAutoId;
|
|
83
|
+
const idRegex = /^[a-zA-Z0-9\-\_\.]+$/;
|
|
84
|
+
|
|
85
|
+
if (!providedId) {
|
|
86
|
+
// Assign a new ID if it's set to auto-generate or not provided
|
|
87
|
+
// _config.id = crypto.randomBytes(16).toString('hex');
|
|
88
|
+
throw new Error(`Id is required`);
|
|
89
|
+
} else if (!idRegex.test(providedId)) {
|
|
90
|
+
// Validate the provided ID if it's not auto-generated
|
|
91
|
+
throw new Error(`Invalid id. Accepted characters: 'a-z', 'A-Z', '0-9', '-', '_', '.'`);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
let indexRes: any = null;
|
|
95
|
+
let parsedUrlArray: string[] | null = null;
|
|
96
|
+
const dsId = DataSourceIndexer.normalizeDsId(providedId, teamId, namespaceId);
|
|
97
|
+
|
|
98
|
+
if (isUrl(inputSchema.value.Source)) {
|
|
99
|
+
debugOutput += `STEP: Parsing input as url\n\n`;
|
|
100
|
+
throw new Error('URLs are not supported yet');
|
|
101
|
+
} else {
|
|
102
|
+
debugOutput += `STEP: Parsing input as text\n\n`;
|
|
103
|
+
indexRes = await this.addDSFromText({
|
|
104
|
+
teamId,
|
|
105
|
+
namespaceId: namespaceId,
|
|
106
|
+
text: inputSchema.value.Source,
|
|
107
|
+
name: _config.name || 'Untitled',
|
|
108
|
+
metadata: _config.metadata || null,
|
|
109
|
+
sourceId: dsId,
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
debugOutput += `Created datasource successfully\n\n`;
|
|
114
|
+
|
|
115
|
+
return {
|
|
116
|
+
_debug: debugOutput,
|
|
117
|
+
Success: {
|
|
118
|
+
result: indexRes?.data?.dataSource || true,
|
|
119
|
+
id: _config.id,
|
|
120
|
+
},
|
|
121
|
+
// _error,
|
|
122
|
+
};
|
|
123
|
+
} catch (err: any) {
|
|
124
|
+
debugOutput += `Error: ${err?.message || "Couldn't index data source"}\n\n`;
|
|
125
|
+
return {
|
|
126
|
+
_debug: debugOutput,
|
|
127
|
+
_error: err?.message || "Couldn't index data source",
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
private async processV2(input, config, agent: Agent) {
|
|
133
|
+
const teamId = agent.teamId;
|
|
134
|
+
const agentId = agent.id;
|
|
135
|
+
let debugOutput = agent.agentRuntime?.debug ? '== Source Indexer Log ==\n' : null;
|
|
136
|
+
|
|
137
|
+
try {
|
|
138
|
+
const _config = {
|
|
139
|
+
...config.data,
|
|
140
|
+
name: TemplateString(config.data.name).parse(input).result,
|
|
141
|
+
id: TemplateString(config.data.id).parse(input).result,
|
|
142
|
+
metadata: TemplateString(config.data.metadata).parse(input).result,
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
const outputs = {};
|
|
146
|
+
for (let con of config.outputs) {
|
|
147
|
+
if (con.default) continue;
|
|
148
|
+
outputs[con.name] = con?.description ? `<${con?.description}>` : '';
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// we try to get the namespace without the prefix teamId, if not exist, we use the full namespace id
|
|
152
|
+
// const namespaceLabel = _config.namespace.split('_').slice(1).join('_') || _config.namespace;
|
|
153
|
+
const namespaceLabelorId = _config.namespace;
|
|
154
|
+
// const namespaceId = _config.namespace;
|
|
155
|
+
|
|
156
|
+
let vecDbConnector: VectorDBConnector = null;
|
|
157
|
+
let namespaceRecord: NsRecord = null;
|
|
158
|
+
try {
|
|
159
|
+
const resolved = await this.resolveVectorDbConnector(namespaceLabelorId, teamId);
|
|
160
|
+
vecDbConnector = resolved.vecDbConnector;
|
|
161
|
+
namespaceRecord = resolved.namespaceRecord;
|
|
162
|
+
} catch (err: any) {
|
|
163
|
+
debugOutput += `Error: ${err?.message || "Couldn't get vector database connector"}\n\n`;
|
|
164
|
+
return {
|
|
165
|
+
_debug: debugOutput,
|
|
166
|
+
_error: err?.message || "Couldn't get vector database connector",
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
debugOutput += `[Selected data space] \n${namespaceRecord.label}\n\n`;
|
|
171
|
+
|
|
172
|
+
const vecDbClient = vecDbConnector.requester(AccessCandidate.team(teamId));
|
|
173
|
+
|
|
174
|
+
const inputSchema = this.validateInput(input);
|
|
175
|
+
if (inputSchema.error) {
|
|
176
|
+
throw new Error(`Input validation error: ${inputSchema.error}\n EXITING...`);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const providedId = _config.id;
|
|
180
|
+
// const isAutoId = _config.isAutoId;
|
|
181
|
+
const idRegex = /^[a-zA-Z0-9\-\_\.]+$/;
|
|
182
|
+
|
|
183
|
+
if (!providedId) {
|
|
184
|
+
// Assign a new ID if it's set to auto-generate or not provided
|
|
185
|
+
// _config.id = crypto.randomBytes(16).toString('hex');
|
|
186
|
+
throw new Error(`Id is required`);
|
|
187
|
+
} else if (!idRegex.test(providedId)) {
|
|
188
|
+
// Validate the provided ID if it's not auto-generated
|
|
189
|
+
throw new Error(`Invalid id. Accepted characters: 'a-z', 'A-Z', '0-9', '-', '_', '.'`);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const dsId = DataSourceIndexer.normalizeDsId(providedId, teamId, namespaceRecord.label);
|
|
193
|
+
|
|
194
|
+
// check if the datasource already exists
|
|
195
|
+
const dsExists = await vecDbClient.getDatasource(namespaceRecord.label, dsId);
|
|
196
|
+
if (dsExists) {
|
|
197
|
+
debugOutput += `Datasource already exists\n\n`;
|
|
198
|
+
return {
|
|
199
|
+
_debug: debugOutput,
|
|
200
|
+
_error: `Datasource already exists`,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
debugOutput += `STEP: Parsing input as text\n\n`;
|
|
205
|
+
|
|
206
|
+
const response = await vecDbClient.createDatasource(namespaceRecord.label, {
|
|
207
|
+
text: inputSchema.value.Source,
|
|
208
|
+
metadata: JSONContentHelper.create(_config.metadata).tryParse() || null,
|
|
209
|
+
id: dsId,
|
|
210
|
+
label: _config.name || 'Untitled',
|
|
211
|
+
chunkSize: _config.chunkSize ? parseInt(_config.chunkSize) : undefined,
|
|
212
|
+
chunkOverlap: _config.chunkOverlap ? parseInt(_config.chunkOverlap) : undefined,
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
debugOutput += `Created datasource successfully\n\n`;
|
|
216
|
+
|
|
217
|
+
return {
|
|
218
|
+
_debug: debugOutput,
|
|
219
|
+
Success: {
|
|
220
|
+
result: response || true,
|
|
221
|
+
id: _config.id,
|
|
222
|
+
},
|
|
223
|
+
// _error,
|
|
224
|
+
};
|
|
225
|
+
} catch (err: any) {
|
|
226
|
+
debugOutput += `Error: ${err?.message || "Couldn't index data source"}\n\n`;
|
|
227
|
+
return {
|
|
228
|
+
_debug: debugOutput,
|
|
229
|
+
_error: err?.message || "Couldn't index data source",
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
validateInput(input: any) {
|
|
235
|
+
return Joi.object({
|
|
236
|
+
Source: Joi.any().required(),
|
|
237
|
+
})
|
|
238
|
+
.unknown(true)
|
|
239
|
+
.validate(input);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
private async addDSFromText({ teamId, sourceId, namespaceId, text, name, metadata }) {
|
|
243
|
+
let vectorDbConnector = ConnectorService.getVectorDBConnector();
|
|
244
|
+
// const isOnCustomStorage = await vectorDBHelper.isNamespaceOnCustomStorage(teamId, namespaceId);
|
|
245
|
+
// if (isOnCustomStorage) {
|
|
246
|
+
// const customTeamConnector = await vectorDBHelper.getTeamConnector(teamId);
|
|
247
|
+
// if (customTeamConnector) {
|
|
248
|
+
// vectorDbConnector = customTeamConnector;
|
|
249
|
+
// }
|
|
250
|
+
// }
|
|
251
|
+
const id = await vectorDbConnector.requester(AccessCandidate.team(teamId)).createDatasource(namespaceId, {
|
|
252
|
+
text,
|
|
253
|
+
metadata,
|
|
254
|
+
id: sourceId,
|
|
255
|
+
label: name,
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
return id;
|
|
259
|
+
}
|
|
260
|
+
}
|