@smythos/sre 1.7.20 → 1.7.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +114 -76
- package/dist/index.js.map +1 -1
- package/dist/types/Components/DataSourceIndexer.class.d.ts +4 -12
- package/dist/types/Components/GenAILLM.class.d.ts +5 -5
- package/dist/types/Components/index.d.ts +3 -3
- package/dist/types/index.d.ts +3 -3
- package/dist/types/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.d.ts +1 -0
- package/dist/types/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.d.ts +11 -4
- package/dist/types/subsystems/IO/VectorDB.service/embed/index.d.ts +5 -0
- package/dist/types/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.d.ts +35 -0
- package/dist/types/subsystems/Security/Account.service/AccountConnector.d.ts +2 -2
- package/dist/types/subsystems/Security/Vault.service/connectors/SecretsManager.class.d.ts +2 -3
- package/dist/types/types/VectorDB.types.d.ts +4 -0
- package/dist/types/utils/string.utils.d.ts +1 -0
- package/package.json +3 -3
- package/src/Components/APIEndpoint.class.ts +1 -6
- package/src/Components/Component.class.ts +14 -1
- package/src/Components/DataSourceIndexer.class.ts +148 -34
- package/src/Components/GenAILLM.class.ts +21 -11
- package/src/Components/RAG/DataSourceCleaner.class.ts +178 -0
- package/src/Components/RAG/DataSourceComponent.class.ts +111 -0
- package/src/Components/RAG/DataSourceIndexer.class.ts +254 -0
- package/src/Components/{DataSourceLookup.class.ts → RAG/DataSourceLookup.class.ts} +92 -3
- package/src/Components/ServerlessCode.class.ts +1 -4
- package/src/Components/index.ts +3 -3
- package/src/helpers/S3Cache.helper.ts +2 -1
- package/src/index.ts +212 -212
- package/src/index.ts.bak +212 -212
- package/src/subsystems/IO/NKV.service/connectors/NKVRedis.class.ts +3 -1
- package/src/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.ts +145 -19
- package/src/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.ts +56 -22
- package/src/subsystems/IO/VectorDB.service/embed/GoogleEmbedding.ts +1 -0
- package/src/subsystems/IO/VectorDB.service/embed/OpenAIEmbedding.ts +2 -1
- package/src/subsystems/IO/VectorDB.service/embed/index.ts +18 -0
- package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts +35 -10
- package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts +12 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/Echo.class.ts +4 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +13 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +17 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/Ollama.class.ts +18 -3
- package/src/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.ts +14 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/VertexAI.class.ts +6 -4
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +5 -5
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +8 -3
- package/src/subsystems/LLMManager/LLM.service/connectors/xAI.class.ts +9 -8
- package/src/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.ts +92 -1
- package/src/subsystems/ObservabilityManager/Telemetry.service/connectors/OTel/OTel.class.ts +32 -6
- package/src/subsystems/Security/Account.service/AccountConnector.ts +3 -3
- package/src/subsystems/Security/Vault.service/connectors/SecretsManager.class.ts +8 -63
- package/src/types/VectorDB.types.ts +4 -0
- package/src/utils/array.utils.ts +11 -0
- package/src/utils/base64.utils.ts +1 -1
- package/src/utils/string.utils.ts +3 -192
- package/src/Components/DataSourceCleaner.class.ts +0 -92
|
@@ -6,18 +6,10 @@ export declare class DataSourceIndexer extends Component {
|
|
|
6
6
|
protected configSchema: Joi.ObjectSchema<any>;
|
|
7
7
|
constructor();
|
|
8
8
|
init(): void;
|
|
9
|
-
process(input: any, config: any, agent: Agent): Promise<
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
id: any;
|
|
14
|
-
};
|
|
15
|
-
_error?: undefined;
|
|
16
|
-
} | {
|
|
17
|
-
_debug: string;
|
|
18
|
-
_error: any;
|
|
19
|
-
Success?: undefined;
|
|
20
|
-
}>;
|
|
9
|
+
process(input: any, config: any, agent: Agent): Promise<any>;
|
|
10
|
+
private processV1;
|
|
11
|
+
private processV2;
|
|
12
|
+
private transformEmbedding;
|
|
21
13
|
validateInput(input: any): Joi.ValidationResult<any>;
|
|
22
14
|
private addDSFromText;
|
|
23
15
|
static genDsId(providedId: string, teamId: string, namespaceId: string): string;
|
|
@@ -262,19 +262,19 @@ export declare class GenAILLM extends Component {
|
|
|
262
262
|
process(input: any, config: any, agent: Agent): Promise<{
|
|
263
263
|
Reply: any;
|
|
264
264
|
} | {
|
|
265
|
-
|
|
265
|
+
Output: any;
|
|
266
266
|
_error: string;
|
|
267
267
|
_debug: string;
|
|
268
|
-
|
|
268
|
+
Reply?: undefined;
|
|
269
269
|
} | {
|
|
270
|
-
|
|
270
|
+
Reply: any;
|
|
271
271
|
_error: string;
|
|
272
272
|
_debug: string;
|
|
273
|
-
|
|
273
|
+
Output?: undefined;
|
|
274
274
|
} | {
|
|
275
275
|
_error: any;
|
|
276
276
|
_debug: string;
|
|
277
|
-
Reply?: undefined;
|
|
278
277
|
Output?: undefined;
|
|
278
|
+
Reply?: undefined;
|
|
279
279
|
}>;
|
|
280
280
|
}
|
|
@@ -8,9 +8,9 @@ import { FSleep } from './FSleep.class';
|
|
|
8
8
|
import { FHash } from './FHash.class';
|
|
9
9
|
import { FEncDec } from './FEncDec.class';
|
|
10
10
|
import { FTimestamp } from './FTimestamp.class';
|
|
11
|
-
import { DataSourceLookup } from './DataSourceLookup.class';
|
|
12
|
-
import { DataSourceIndexer } from './DataSourceIndexer.class';
|
|
13
|
-
import { DataSourceCleaner } from './DataSourceCleaner.class';
|
|
11
|
+
import { DataSourceLookup } from './RAG/DataSourceLookup.class';
|
|
12
|
+
import { DataSourceIndexer } from './RAG/DataSourceIndexer.class';
|
|
13
|
+
import { DataSourceCleaner } from './RAG/DataSourceCleaner.class';
|
|
14
14
|
import { JSONFilter } from './JSONFilter.class';
|
|
15
15
|
import { LogicAND } from './LogicAND.class';
|
|
16
16
|
import { LogicOR } from './LogicOR.class';
|
package/dist/types/index.d.ts
CHANGED
|
@@ -9,9 +9,6 @@ export * from './Components/Await.class';
|
|
|
9
9
|
export * from './Components/Classifier.class';
|
|
10
10
|
export * from './Components/Component.class';
|
|
11
11
|
export * from './Components/ComponentHost.class';
|
|
12
|
-
export * from './Components/DataSourceCleaner.class';
|
|
13
|
-
export * from './Components/DataSourceIndexer.class';
|
|
14
|
-
export * from './Components/DataSourceLookup.class';
|
|
15
12
|
export * from './Components/FEncDec.class';
|
|
16
13
|
export * from './Components/FHash.class';
|
|
17
14
|
export * from './Components/FileStore.class';
|
|
@@ -88,6 +85,9 @@ export * from './Components/APICall/parseHeaders';
|
|
|
88
85
|
export * from './Components/APICall/parseProxy';
|
|
89
86
|
export * from './Components/APICall/parseUrl';
|
|
90
87
|
export * from './Components/Image/imageSettings.config';
|
|
88
|
+
export * from './Components/RAG/DataSourceCleaner.class';
|
|
89
|
+
export * from './Components/RAG/DataSourceComponent.class';
|
|
90
|
+
export * from './Components/RAG/DataSourceLookup.class';
|
|
91
91
|
export * from './Components/Triggers/Gmail.trigger';
|
|
92
92
|
export * from './Components/Triggers/JobScheduler.trigger';
|
|
93
93
|
export * from './Components/Triggers/Trigger.class';
|
|
@@ -34,6 +34,7 @@ export declare class MilvusVectorDB extends VectorDBConnector {
|
|
|
34
34
|
embedder: BaseEmbedding;
|
|
35
35
|
private SCHEMA_DEFINITION;
|
|
36
36
|
private INDEX_PARAMS;
|
|
37
|
+
private nkvConnector;
|
|
37
38
|
constructor(_settings: MilvusConfig);
|
|
38
39
|
protected createNamespace(acRequest: AccessRequest, namespace: string, metadata?: {
|
|
39
40
|
[key: string]: any;
|
|
@@ -7,13 +7,20 @@ import { DatasourceDto, IStorageVectorDataSource, IVectorDataSourceDto, QueryOpt
|
|
|
7
7
|
import { BaseEmbedding, TEmbeddings } from '../embed/BaseEmbedding';
|
|
8
8
|
export type PineconeConfig = {
|
|
9
9
|
/**
|
|
10
|
-
* The Pinecone API key
|
|
10
|
+
* The Pinecone API key [LEGACY]
|
|
11
11
|
*/
|
|
12
|
-
apiKey
|
|
12
|
+
apiKey?: string;
|
|
13
13
|
/**
|
|
14
|
-
* The Pinecone index name
|
|
14
|
+
* The Pinecone index name [LEGACY]
|
|
15
15
|
*/
|
|
16
|
-
indexName
|
|
16
|
+
indexName?: string;
|
|
17
|
+
/**
|
|
18
|
+
* The Pinecone credentials [New unified format]
|
|
19
|
+
*/
|
|
20
|
+
credentials?: {
|
|
21
|
+
apiKey: string;
|
|
22
|
+
indexName: string;
|
|
23
|
+
};
|
|
17
24
|
/**
|
|
18
25
|
* The embeddings model to use
|
|
19
26
|
*/
|
|
@@ -20,5 +20,10 @@ export declare class EmbeddingsFactory {
|
|
|
20
20
|
static create(provider?: SupportedProviders, config?: TEmbeddings & {
|
|
21
21
|
model?: SupportedModels[SupportedProviders] | TLLMModel;
|
|
22
22
|
}): OpenAIEmbeds | GoogleEmbeds;
|
|
23
|
+
static getProviderByModel(model: SupportedModels): SupportedProviders;
|
|
24
|
+
static getModels(): {
|
|
25
|
+
provider: SupportedProviders;
|
|
26
|
+
model: SupportedModels[SupportedProviders];
|
|
27
|
+
}[];
|
|
23
28
|
}
|
|
24
29
|
export {};
|
|
@@ -35,6 +35,41 @@ export declare class JSONModelsProvider extends ModelsProviderConnector {
|
|
|
35
35
|
private getValidModels;
|
|
36
36
|
private isValidSingleModel;
|
|
37
37
|
private isValidModel;
|
|
38
|
+
/**
|
|
39
|
+
* Determines whether a file path should be ignored by the directory watcher.
|
|
40
|
+
*
|
|
41
|
+
* This method implements a sophisticated filtering strategy for dot-segment paths
|
|
42
|
+
* (paths containing directories that start with a dot, like .git, .env, .cache).
|
|
43
|
+
*
|
|
44
|
+
* **Filtering Strategy:**
|
|
45
|
+
* 1. Paths WITHOUT dot segments: Never ignored
|
|
46
|
+
* 2. Paths WITH dot segments:
|
|
47
|
+
* - If SMYTH_PATH is not configured: All ignored
|
|
48
|
+
* - If SMYTH_PATH is configured:
|
|
49
|
+
* - Allow the watched directory even if SMYTH_PATH contains dot-segments
|
|
50
|
+
* (e.g., /home/user/.smyth/models/OpenAI/default.json is allowed)
|
|
51
|
+
* - Ignore dot-segments INSIDE the models directory
|
|
52
|
+
* (e.g., /home/user/.smyth/models/.hidden/model.json is ignored)
|
|
53
|
+
* - Paths outside watched directory: Ignored
|
|
54
|
+
*
|
|
55
|
+
* @param filePath - The file path to check
|
|
56
|
+
* @param watchedDir - The absolute path of the directory being watched (models folder)
|
|
57
|
+
* @param smythPath - The resolved SMYTH_PATH, or null if not configured
|
|
58
|
+
* @returns true if the path should be ignored, false if it should be watched
|
|
59
|
+
*
|
|
60
|
+
* @example
|
|
61
|
+
* ```typescript
|
|
62
|
+
* // Path without dot-segment (allowed)
|
|
63
|
+
* shouldIgnorePath('/models/OpenAI/default.json', '/models', '/home/.smyth') // => false
|
|
64
|
+
*
|
|
65
|
+
* // Dot-segment inside models directory (ignored)
|
|
66
|
+
* shouldIgnorePath('/models/.git/config', '/models', '/home/.smyth') // => true
|
|
67
|
+
*
|
|
68
|
+
* // Dot-segment in parent path only (allowed)
|
|
69
|
+
* shouldIgnorePath('/home/.smyth/models/OpenAI/default.json', '/home/.smyth/models', '/home/.smyth') // => false
|
|
70
|
+
* ```
|
|
71
|
+
*/
|
|
72
|
+
private shouldIgnorePath;
|
|
38
73
|
private initDirWatcher;
|
|
39
74
|
}
|
|
40
75
|
export {};
|
|
@@ -9,7 +9,7 @@ export interface ISmythAccountRequest {
|
|
|
9
9
|
getCandidateTeam(): Promise<string | undefined>;
|
|
10
10
|
getAllTeamSettings(): Promise<KeyValueObject>;
|
|
11
11
|
getAllUserSettings(): Promise<KeyValueObject>;
|
|
12
|
-
getTeamSetting(settingKey: string): Promise<string>;
|
|
12
|
+
getTeamSetting(settingKey: string, group?: string): Promise<string>;
|
|
13
13
|
getUserSetting(settingKey: string): Promise<string>;
|
|
14
14
|
getAgentSetting(settingKey: string): Promise<string>;
|
|
15
15
|
getTeam(): Promise<string>;
|
|
@@ -23,7 +23,7 @@ export declare abstract class AccountConnector extends Connector {
|
|
|
23
23
|
abstract getCandidateTeam(candidate: IAccessCandidate): Promise<string | undefined>;
|
|
24
24
|
abstract getAllTeamSettings(acRequest: AccessRequest, teamId: string): Promise<KeyValueObject>;
|
|
25
25
|
abstract getAllUserSettings(acRequest: AccessRequest, accountId: string): Promise<KeyValueObject>;
|
|
26
|
-
abstract getTeamSetting(acRequest: AccessRequest, teamId: string, settingKey: string): Promise<string>;
|
|
26
|
+
abstract getTeamSetting(acRequest: AccessRequest, teamId: string, settingKey: string, group?: string): Promise<string>;
|
|
27
27
|
abstract getUserSetting(acRequest: AccessRequest, accountId: string, settingKey: string): Promise<string>;
|
|
28
28
|
abstract getAgentSetting(acRequest: AccessRequest, agentId: string, settingKey: string): Promise<string>;
|
|
29
29
|
}
|
|
@@ -14,12 +14,11 @@ export declare class SecretsManager extends VaultConnector {
|
|
|
14
14
|
private secretsManager;
|
|
15
15
|
private prefix;
|
|
16
16
|
constructor(_settings: SecretsManagerConfig);
|
|
17
|
-
protected get(acRequest: AccessRequest, secretName: string): Promise<
|
|
17
|
+
protected get(acRequest: AccessRequest, secretName: string): Promise<string>;
|
|
18
18
|
protected exists(acRequest: AccessRequest, keyId: string): Promise<boolean>;
|
|
19
19
|
protected listKeys(acRequest: AccessRequest): Promise<any[]>;
|
|
20
20
|
getResourceACL(resourceId: string, candidate: IAccessCandidate): Promise<ACL>;
|
|
21
|
-
private getSecretByName;
|
|
22
21
|
private getVaultKey;
|
|
23
22
|
private getSecretById;
|
|
24
|
-
private
|
|
23
|
+
private extractSecretName;
|
|
25
24
|
}
|
|
@@ -42,6 +42,10 @@ export interface IStorageVectorDataSource {
|
|
|
42
42
|
id: string;
|
|
43
43
|
candidateId: string;
|
|
44
44
|
candidateRole: string;
|
|
45
|
+
datasourceSizeMb?: number;
|
|
46
|
+
createdAt?: Date;
|
|
47
|
+
chunkSize?: number;
|
|
48
|
+
chunkOverlap?: number;
|
|
45
49
|
}
|
|
46
50
|
export interface IStorageVectorNamespace {
|
|
47
51
|
namespace: string;
|
|
@@ -22,3 +22,4 @@ export declare const kebabToCapitalize: (input: any) => any;
|
|
|
22
22
|
* @param input
|
|
23
23
|
*/
|
|
24
24
|
export declare const identifyMimetypeFromString: (input: string) => "application/json" | "text/plain" | "application/xml" | "text/html" | "text/css" | "text/csv" | "text/markdown" | "" | "image/svg+xml" | "application/javascript" | "application/yaml" | "application/sql";
|
|
25
|
+
export declare function calcSizeMb(text: string): number;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@smythos/sre",
|
|
3
|
-
"version": "1.7.
|
|
3
|
+
"version": "1.7.40",
|
|
4
4
|
"description": "Smyth Runtime Environment",
|
|
5
5
|
"author": "Alaa-eddine KADDOURI",
|
|
6
6
|
"license": "MIT",
|
|
@@ -68,7 +68,7 @@
|
|
|
68
68
|
"@pinecone-database/pinecone": "^3.0.0",
|
|
69
69
|
"@runware/sdk-js": "^1.1.36",
|
|
70
70
|
"@smithy/smithy-client": "^4.4.3",
|
|
71
|
-
"@zilliz/milvus2-sdk-node": "^2.6.
|
|
71
|
+
"@zilliz/milvus2-sdk-node": "^2.6.4",
|
|
72
72
|
"acorn": "^8.14.1",
|
|
73
73
|
"axios": "^1.7.2",
|
|
74
74
|
"chokidar": "^4.0.3",
|
|
@@ -117,7 +117,7 @@
|
|
|
117
117
|
"scripts": {
|
|
118
118
|
"gen:barrel": "ctix build",
|
|
119
119
|
"build:types": "tsc --emitDeclarationOnly --declaration --outDir dist/types -p tsconfig.dts.json",
|
|
120
|
-
"build:jsbundle": "cross-env rollup -c",
|
|
120
|
+
"build:jsbundle": "cross-env NODE_OPTIONS='--max-old-space-size=8192' rollup -c",
|
|
121
121
|
"build": "pnpm run build:jsbundle && pnpm run build:types",
|
|
122
122
|
"test:unit": "cd ../.. && vitest run packages/core/tests/unit",
|
|
123
123
|
"test:unit:watch": "cd ../.. && vitest watch packages/core/tests/unit",
|
|
@@ -246,12 +246,7 @@ async function resolveTemplateVariables(data: any, input: any, agent: Agent): Pr
|
|
|
246
246
|
if (isKeyTemplateVar(value as string)) {
|
|
247
247
|
data[key] = await parseKey(value as string, agent.teamId);
|
|
248
248
|
} else if (isTemplateVar(value as string)) {
|
|
249
|
-
|
|
250
|
-
// This correctly resolves cases where input values reference agent variables with the same name.
|
|
251
|
-
// Example: agent variables { user_id: "123" }, input { user_id: "{{user_id}}" }.
|
|
252
|
-
data[key] = TemplateString(value as string)
|
|
253
|
-
.parse(input)
|
|
254
|
-
.parse(agent.agentVariables).result;
|
|
249
|
+
data[key] = TemplateString(value as string).parse(input).result;
|
|
255
250
|
}
|
|
256
251
|
}
|
|
257
252
|
|
|
@@ -3,6 +3,7 @@ import { IAgent as Agent } from '@sre/types/Agent.types';
|
|
|
3
3
|
import { Logger } from '@sre/helpers/Log.helper';
|
|
4
4
|
import { performTypeInference } from '@sre/helpers/TypeChecker.helper';
|
|
5
5
|
import { hookableClass, hookAsync } from '@sre/Core/HookService';
|
|
6
|
+
import { TemplateString } from '@sre/helpers/TemplateString.helper';
|
|
6
7
|
|
|
7
8
|
export type TComponentSchema = {
|
|
8
9
|
name: string;
|
|
@@ -123,7 +124,19 @@ export class Component {
|
|
|
123
124
|
if (agent.isKilled()) {
|
|
124
125
|
throw new Error('Agent killed');
|
|
125
126
|
}
|
|
126
|
-
|
|
127
|
+
|
|
128
|
+
let _input = {};
|
|
129
|
+
|
|
130
|
+
// #region Resolve agent variables so that:
|
|
131
|
+
// - type inference works correctly
|
|
132
|
+
// - we don’t need a separate resolution step when the variable name
|
|
133
|
+
// matches the component input name
|
|
134
|
+
for (let [key, value] of Object.entries(input)) {
|
|
135
|
+
_input[key] = TemplateString(value as string).parse(agent.agentVariables).result;
|
|
136
|
+
}
|
|
137
|
+
// #endregion
|
|
138
|
+
|
|
139
|
+
_input = await performTypeInference(_input, config?.inputs, agent);
|
|
127
140
|
|
|
128
141
|
// modify the input object for component's process method
|
|
129
142
|
for (const [key, value] of Object.entries(_input)) {
|
|
@@ -8,6 +8,9 @@ import { SmythFS } from '@sre/IO/Storage.service/SmythFS.class';
|
|
|
8
8
|
import { ConnectorService } from '@sre/Core/ConnectorsService';
|
|
9
9
|
|
|
10
10
|
import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
|
|
11
|
+
import { TEmbeddings } from '@sre/IO/VectorDB.service/embed/BaseEmbedding';
|
|
12
|
+
import { EmbeddingsFactory, SupportedModels } from '@sre/IO/VectorDB.service/embed';
|
|
13
|
+
import { getLLMCredentials } from '@sre/LLMManager/LLM.service/LLMCredentials.helper';
|
|
11
14
|
|
|
12
15
|
export class DataSourceIndexer extends Component {
|
|
13
16
|
private MAX_ALLOWED_URLS_PER_INPUT = 20;
|
|
@@ -16,6 +19,9 @@ export class DataSourceIndexer extends Component {
|
|
|
16
19
|
id: Joi.string().custom(validateCharacterSet, 'id custom validation').allow('').label('source identifier'),
|
|
17
20
|
name: Joi.string().max(50).allow('').label('label'),
|
|
18
21
|
metadata: Joi.string().allow(null).allow('').max(10000).label('metadata'),
|
|
22
|
+
chunkSize: Joi.number().optional(),
|
|
23
|
+
chunkOverlap: Joi.number().optional(),
|
|
24
|
+
version: Joi.string().valid('v1', 'v2').default('v1'),
|
|
19
25
|
});
|
|
20
26
|
constructor() {
|
|
21
27
|
super();
|
|
@@ -23,6 +29,18 @@ export class DataSourceIndexer extends Component {
|
|
|
23
29
|
init() {}
|
|
24
30
|
async process(input, config, agent: Agent) {
|
|
25
31
|
await super.process(input, config, agent);
|
|
32
|
+
|
|
33
|
+
let response: any = null;
|
|
34
|
+
if (!config.data.version || config.data.version === 'v1') {
|
|
35
|
+
response = await this.processV1(input, config, agent);
|
|
36
|
+
} else if (config.data.version === 'v2') {
|
|
37
|
+
response = await this.processV2(input, config, agent);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return response;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
private async processV1(input, config, agent: Agent) {
|
|
26
44
|
const teamId = agent.teamId;
|
|
27
45
|
const agentId = agent.id;
|
|
28
46
|
let debugOutput = agent.agentRuntime?.debug ? '== Source Indexer Log ==\n' : null;
|
|
@@ -72,47 +90,13 @@ export class DataSourceIndexer extends Component {
|
|
|
72
90
|
throw new Error(`Invalid id. Accepted characters: 'a-z', 'A-Z', '0-9', '-', '_', '.'`);
|
|
73
91
|
}
|
|
74
92
|
|
|
75
|
-
// check if the datasource already exists with the same id
|
|
76
|
-
// await this.checkForRecordDuplicate(dsId, token);
|
|
77
|
-
|
|
78
93
|
let indexRes: any = null;
|
|
79
94
|
let parsedUrlArray: string[] | null = null;
|
|
80
|
-
|
|
81
|
-
//! DISABLE URL ARRAY PARSING FOR NOW UNTIL WE HAVE A GOOD WAY TO HANDLE BULK INDEXING
|
|
82
|
-
// if ((parsedUrlArray = parseUrlArray(inputSchema.value.Source))) {
|
|
83
|
-
// debugOutput += `STEP: Parsing input as url array\n\n`;
|
|
84
|
-
// if (parsedUrlArray.length > this.MAX_ALLOWED_URLS_PER_INPUT) {
|
|
85
|
-
// throw new Error(`Too many urls in input. Max allowed: ${this.MAX_ALLOWED_URLS_PER_INPUT}`);
|
|
86
|
-
// }
|
|
87
|
-
|
|
88
|
-
// for (let url of parsedUrlArray) {
|
|
89
|
-
// indexRes = await this.addDSFromUrl({
|
|
90
|
-
// teamId,
|
|
91
|
-
// namespaceId,
|
|
92
|
-
// dsId, // WILL OVERRIDE EACH OTHER
|
|
93
|
-
// type: detectURLSourceType(url),
|
|
94
|
-
// url,
|
|
95
|
-
// name: _config.name || 'Untitled',
|
|
96
|
-
// });
|
|
97
|
-
|
|
98
|
-
// debugOutput += `STEP: Created datasource for url: ${url}\n\n`;
|
|
99
|
-
// }
|
|
100
|
-
// } else
|
|
101
|
-
|
|
102
95
|
const dsId = DataSourceIndexer.genDsId(providedId, teamId, namespaceId);
|
|
103
96
|
|
|
104
97
|
if (isUrl(inputSchema.value.Source)) {
|
|
105
98
|
debugOutput += `STEP: Parsing input as url\n\n`;
|
|
106
99
|
throw new Error('URLs are not supported yet');
|
|
107
|
-
// indexRes = await this.addDSFromUrl({
|
|
108
|
-
// teamId,
|
|
109
|
-
// namespaceId,
|
|
110
|
-
// dsId,
|
|
111
|
-
// type: detectURLSourceType(inputSchema.value.Source),
|
|
112
|
-
// url: inputSchema.value.Source,
|
|
113
|
-
// name: _config.name || 'Untitled',
|
|
114
|
-
// metadata: _config.metadata || null,
|
|
115
|
-
// });
|
|
116
100
|
} else {
|
|
117
101
|
debugOutput += `STEP: Parsing input as text\n\n`;
|
|
118
102
|
indexRes = await this.addDSFromText({
|
|
@@ -144,6 +128,136 @@ export class DataSourceIndexer extends Component {
|
|
|
144
128
|
}
|
|
145
129
|
}
|
|
146
130
|
|
|
131
|
+
private async processV2(input, config, agent: Agent) {
|
|
132
|
+
const teamId = agent.teamId;
|
|
133
|
+
const agentId = agent.id;
|
|
134
|
+
let debugOutput = agent.agentRuntime?.debug ? '== Source Indexer Log ==\n' : null;
|
|
135
|
+
|
|
136
|
+
try {
|
|
137
|
+
const _config = {
|
|
138
|
+
...config.data,
|
|
139
|
+
name: TemplateString(config.data.name).parse(input).result,
|
|
140
|
+
id: TemplateString(config.data.id).parse(input).result,
|
|
141
|
+
metadata: TemplateString(config.data.metadata).parse(input).result,
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
const outputs = {};
|
|
145
|
+
for (let con of config.outputs) {
|
|
146
|
+
if (con.default) continue;
|
|
147
|
+
outputs[con.name] = con?.description ? `<${con?.description}>` : '';
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// we try to get the namespace without the prefix teamId, if not exist, we use the full namespace id
|
|
151
|
+
const namespaceLabel = _config.namespace.split('_').slice(1).join('_') || _config.namespace;
|
|
152
|
+
const namespaceId = _config.namespace;
|
|
153
|
+
debugOutput += `[Selected namespace] \n${namespaceLabel}\n\n`;
|
|
154
|
+
|
|
155
|
+
// resolve the ns record, if not exist, throw an error (new in v2)
|
|
156
|
+
// then we also need to resolve the credentials
|
|
157
|
+
const nkvConnector = ConnectorService.getNKVConnector();
|
|
158
|
+
const nkvClient = nkvConnector.requester(AccessCandidate.team(teamId));
|
|
159
|
+
const rawNsRecord = await nkvClient.get(`vectorDB:namespaces`, namespaceId);
|
|
160
|
+
|
|
161
|
+
if (!rawNsRecord) {
|
|
162
|
+
return {
|
|
163
|
+
_debug: debugOutput,
|
|
164
|
+
_error: `Namespace ${namespaceLabel} does not exist`,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// const { credentialId, embeddings: embeddingsOptions } = JSON.parse(rawNsRecord.toString());
|
|
169
|
+
const namespaceRecord = JSON.parse(rawNsRecord.toString());
|
|
170
|
+
const accountConnector = ConnectorService.getAccountConnector();
|
|
171
|
+
const accountClient = accountConnector.requester(AccessCandidate.team(teamId));
|
|
172
|
+
const rawCredRecord = await accountClient.getTeamSetting(namespaceRecord.credentialId, 'vector_db_creds');
|
|
173
|
+
if (!rawCredRecord) {
|
|
174
|
+
throw new Error(`Credential ${namespaceRecord.credentialId} does not exist`);
|
|
175
|
+
}
|
|
176
|
+
const credRecord = JSON.parse(rawCredRecord);
|
|
177
|
+
await Promise.all(
|
|
178
|
+
Object.keys(credRecord.credentials).map(async (key) => {
|
|
179
|
+
if (typeof credRecord.credentials[key] !== 'string') return;
|
|
180
|
+
credRecord.credentials[key] = await TemplateString(credRecord.credentials[key]).parseTeamKeysAsync(teamId).asyncResult;
|
|
181
|
+
})
|
|
182
|
+
);
|
|
183
|
+
|
|
184
|
+
const vecDbConnector = ConnectorService.getVectorDBConnector(credRecord.provider).instance({
|
|
185
|
+
credentials: credRecord.credentials,
|
|
186
|
+
embeddings: await this.transformEmbedding(namespaceRecord.embeddings, config.data, teamId),
|
|
187
|
+
});
|
|
188
|
+
const vecDbClient = vecDbConnector.requester(AccessCandidate.team(teamId));
|
|
189
|
+
|
|
190
|
+
const inputSchema = this.validateInput(input);
|
|
191
|
+
if (inputSchema.error) {
|
|
192
|
+
throw new Error(`Input validation error: ${inputSchema.error}\n EXITING...`);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const providedId = _config.id;
|
|
196
|
+
// const isAutoId = _config.isAutoId;
|
|
197
|
+
const idRegex = /^[a-zA-Z0-9\-\_\.]+$/;
|
|
198
|
+
|
|
199
|
+
if (!providedId) {
|
|
200
|
+
// Assign a new ID if it's set to auto-generate or not provided
|
|
201
|
+
// _config.id = crypto.randomBytes(16).toString('hex');
|
|
202
|
+
throw new Error(`Id is required`);
|
|
203
|
+
} else if (!idRegex.test(providedId)) {
|
|
204
|
+
// Validate the provided ID if it's not auto-generated
|
|
205
|
+
throw new Error(`Invalid id. Accepted characters: 'a-z', 'A-Z', '0-9', '-', '_', '.'`);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
const dsId = DataSourceIndexer.genDsId(providedId, teamId, namespaceLabel);
|
|
209
|
+
|
|
210
|
+
debugOutput += `STEP: Parsing input as text\n\n`;
|
|
211
|
+
|
|
212
|
+
const response = await vecDbClient.createDatasource(namespaceLabel, {
|
|
213
|
+
text: inputSchema.value.Source,
|
|
214
|
+
metadata: _config.metadata || null,
|
|
215
|
+
id: dsId,
|
|
216
|
+
label: _config.name || 'Untitled',
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
debugOutput += `Created datasource successfully\n\n`;
|
|
220
|
+
|
|
221
|
+
return {
|
|
222
|
+
_debug: debugOutput,
|
|
223
|
+
Success: {
|
|
224
|
+
result: response || true,
|
|
225
|
+
id: _config.id,
|
|
226
|
+
},
|
|
227
|
+
// _error,
|
|
228
|
+
};
|
|
229
|
+
} catch (err: any) {
|
|
230
|
+
debugOutput += `Error: ${err?.message || "Couldn't index data source"}\n\n`;
|
|
231
|
+
return {
|
|
232
|
+
_debug: debugOutput,
|
|
233
|
+
_error: err?.message || "Couldn't index data source",
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
private async transformEmbedding(embedding: { dimensions: string; modelId: string }, data: any, teamId: string): Promise<TEmbeddings> {
|
|
239
|
+
// we need to take this and return a proper TEmbeddings object
|
|
240
|
+
|
|
241
|
+
const provider = EmbeddingsFactory.getProviderByModel(embedding.modelId as any);
|
|
242
|
+
|
|
243
|
+
// based on the provider, we should be able to retreive the correct credentials
|
|
244
|
+
const modelsProvider = ConnectorService.getModelsProviderConnector();
|
|
245
|
+
const modelProviderCandidate = modelsProvider.requester(AccessCandidate.team(teamId));
|
|
246
|
+
const modelInfo = await modelProviderCandidate.getModelInfo(embedding.modelId);
|
|
247
|
+
|
|
248
|
+
const llmCreds = await getLLMCredentials(AccessCandidate.team(teamId), modelInfo);
|
|
249
|
+
|
|
250
|
+
return {
|
|
251
|
+
provider,
|
|
252
|
+
model: embedding.modelId,
|
|
253
|
+
credentials: llmCreds,
|
|
254
|
+
params: {
|
|
255
|
+
dimensions: parseInt(embedding.dimensions),
|
|
256
|
+
chunkSize: data.chunkSize,
|
|
257
|
+
},
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
|
|
147
261
|
validateInput(input: any) {
|
|
148
262
|
return Joi.object({
|
|
149
263
|
Source: Joi.any().required(),
|
|
@@ -528,26 +528,36 @@ export class GenAILLM extends Component {
|
|
|
528
528
|
response = await contentPromise.catch((error) => {
|
|
529
529
|
return { error: error.message || error };
|
|
530
530
|
});
|
|
531
|
-
|
|
531
|
+
|
|
532
|
+
// #region Handle Response Errors
|
|
533
|
+
if (response?.error) {
|
|
534
|
+
const error = response?.error + ' ' + (response?.details || '');
|
|
535
|
+
logger.error(` LLM Error=`, error);
|
|
536
|
+
|
|
537
|
+
return { Output: response?.data, _error: error, _debug: logger.output };
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
const emptyResponseErrorMsg =
|
|
541
|
+
"Empty response. This is usually due to output token limit reached. Please try again with a higher 'Maximum Output Tokens'.";
|
|
542
|
+
|
|
543
|
+
// If the finish reason is not "stop", it means the model stopped before completing the response.
|
|
532
544
|
if (finishReason !== 'stop') {
|
|
545
|
+
let errMsg = `The model stopped before completing the response.
|
|
546
|
+
\nReason: ${finishReason}.
|
|
547
|
+
\n${!response ? emptyResponseErrorMsg : ''}`;
|
|
548
|
+
|
|
533
549
|
return {
|
|
534
550
|
Reply: response,
|
|
535
|
-
_error:
|
|
551
|
+
_error: errMsg,
|
|
536
552
|
_debug: logger.output,
|
|
537
553
|
};
|
|
538
554
|
}
|
|
539
555
|
|
|
540
|
-
//
|
|
556
|
+
// If the finish reason is "stop" but there is still no response, it is usually caused by reaching the output token limit.
|
|
541
557
|
if (!response) {
|
|
542
|
-
return { _error:
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
if (response?.error) {
|
|
546
|
-
const error = response?.error + ' ' + (response?.details || '');
|
|
547
|
-
logger.error(` LLM Error=`, error);
|
|
548
|
-
|
|
549
|
-
return { Output: response?.data, _error: error, _debug: logger.output };
|
|
558
|
+
return { _error: emptyResponseErrorMsg, _debug: logger.output };
|
|
550
559
|
}
|
|
560
|
+
// #endregion
|
|
551
561
|
|
|
552
562
|
const Reply = llmInference.connector.postProcess(response);
|
|
553
563
|
if (Reply.error) {
|