@forzalabs/remora 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +8 -0
- package/actions/automap.js +73 -0
- package/actions/compile.js +57 -0
- package/actions/debug.js +61 -0
- package/actions/deploy.js +95 -0
- package/actions/discover.js +36 -0
- package/actions/init.js +78 -0
- package/actions/run.js +51 -0
- package/auth/AdminManager.js +48 -0
- package/auth/ApiKeysManager.js +45 -0
- package/auth/JWTManager.js +56 -0
- package/core/Affirm.js +42 -0
- package/core/Algo.js +155 -0
- package/core/dste/DSTE.js +113 -0
- package/core/logger/DebugLogService.js +48 -0
- package/core/logger/DevelopmentLogService.js +70 -0
- package/core/logger/LocalLogService.js +70 -0
- package/core/logger/Logger.js +54 -0
- package/database/DatabaseEngine.js +119 -0
- package/database/DatabaseInitializer.js +80 -0
- package/database/DatabaseStructure.js +27 -0
- package/definitions/agents/DestinationDriver.js +2 -0
- package/definitions/agents/SourceDriver.js +2 -0
- package/definitions/cli.js +2 -0
- package/definitions/database/ApiKeys.js +2 -0
- package/definitions/database/Stored.js +7 -0
- package/definitions/database/UsageStat.js +2 -0
- package/definitions/database/User.js +2 -0
- package/definitions/json_schemas/consumer-schema.json +423 -0
- package/definitions/json_schemas/producer-schema.json +236 -0
- package/definitions/json_schemas/project-schema.json +59 -0
- package/definitions/json_schemas/source-schema.json +109 -0
- package/definitions/requests/ConsumerRequest.js +2 -0
- package/definitions/requests/Developer.js +2 -0
- package/definitions/requests/Mapping.js +2 -0
- package/definitions/requests/ProducerRequest.js +2 -0
- package/definitions/requests/Request.js +2 -0
- package/definitions/resources/Compiled.js +2 -0
- package/definitions/resources/Consumer.js +2 -0
- package/definitions/resources/Environment.js +2 -0
- package/definitions/resources/Library.js +2 -0
- package/definitions/resources/Producer.js +2 -0
- package/definitions/resources/Project.js +2 -0
- package/definitions/resources/Schema.js +2 -0
- package/definitions/resources/Source.js +2 -0
- package/documentation/README.md +123 -0
- package/documentation/default_resources/consumer.json +52 -0
- package/documentation/default_resources/producer.json +32 -0
- package/documentation/default_resources/project.json +14 -0
- package/documentation/default_resources/schema.json +36 -0
- package/documentation/default_resources/source.json +15 -0
- package/drivers/DriverFactory.js +56 -0
- package/drivers/LocalDriver.js +122 -0
- package/drivers/RedshiftDriver.js +179 -0
- package/drivers/S3Driver.js +47 -0
- package/drivers/S3SourceDriver.js +127 -0
- package/engines/CryptoEngine.js +46 -0
- package/engines/Environment.js +139 -0
- package/engines/ParseManager.js +38 -0
- package/engines/ProducerEngine.js +150 -0
- package/engines/UsageManager.js +61 -0
- package/engines/UserManager.js +43 -0
- package/engines/Validator.js +154 -0
- package/engines/ai/AutoMapperEngine.js +37 -0
- package/engines/ai/DeveloperEngine.js +70 -0
- package/engines/ai/LLM.js +299 -0
- package/engines/consumer/ConsumerEngine.js +204 -0
- package/engines/consumer/ConsumerManager.js +155 -0
- package/engines/consumer/PostProcessor.js +143 -0
- package/engines/deployment/DeploymentPlanner.js +46 -0
- package/engines/execution/ExecutionEnvironment.js +114 -0
- package/engines/execution/ExecutionPlanner.js +92 -0
- package/engines/execution/RequestExecutor.js +100 -0
- package/engines/file/FileCompiler.js +28 -0
- package/engines/file/FileExporter.js +116 -0
- package/engines/schema/SchemaEngine.js +33 -0
- package/engines/schema/SchemaValidator.js +67 -0
- package/engines/sql/SQLBuilder.js +96 -0
- package/engines/sql/SQLCompiler.js +140 -0
- package/engines/sql/SQLUtils.js +22 -0
- package/engines/validation/Validator.js +151 -0
- package/helper/Helper.js +64 -0
- package/helper/Settings.js +13 -0
- package/index.js +63 -0
- package/package.json +77 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
|
+
const ProducerEngine_1 = __importDefault(require("../ProducerEngine"));
|
|
17
|
+
const path_1 = __importDefault(require("path"));
|
|
18
|
+
const promises_1 = __importDefault(require("fs/promises"));
|
|
19
|
+
class DeveloperEngineClass {
|
|
20
|
+
constructor() {
|
|
21
|
+
this.discover = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
22
|
+
var _a;
|
|
23
|
+
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
24
|
+
const sampleData = yield ProducerEngine_1.default.readSampleData(producer);
|
|
25
|
+
(0, Affirm_1.default)(sampleData, 'Discover process failed: no result found');
|
|
26
|
+
const typeDefinitions = this.extractFieldTypes(sampleData);
|
|
27
|
+
const mappedProducer = {
|
|
28
|
+
name: producer.name,
|
|
29
|
+
description: producer.description,
|
|
30
|
+
source: producer.source,
|
|
31
|
+
settings: Object.assign({}, producer.settings),
|
|
32
|
+
dimensions: typeDefinitions.map(field => ({
|
|
33
|
+
name: field.name,
|
|
34
|
+
type: this.mapFieldTypeToProducerType(field.type),
|
|
35
|
+
description: `Auto-mapped field: ${field.name}`
|
|
36
|
+
})),
|
|
37
|
+
measures: [],
|
|
38
|
+
_version: (_a = producer._version) !== null && _a !== void 0 ? _a : 1
|
|
39
|
+
};
|
|
40
|
+
// Save the mapped producer to file
|
|
41
|
+
const producerPath = path_1.default.join(process.cwd(), 'remora', 'producers', `${producer.name}.json`);
|
|
42
|
+
yield promises_1.default.writeFile(producerPath, JSON.stringify(mappedProducer, null, 4), 'utf-8');
|
|
43
|
+
return { producer: mappedProducer, fields: typeDefinitions };
|
|
44
|
+
});
|
|
45
|
+
this.mapFieldTypeToProducerType = (fieldType) => {
|
|
46
|
+
switch (fieldType) {
|
|
47
|
+
case 'number':
|
|
48
|
+
return 'number';
|
|
49
|
+
case 'string':
|
|
50
|
+
return 'string';
|
|
51
|
+
case 'date':
|
|
52
|
+
case 'datetime':
|
|
53
|
+
return 'datetime';
|
|
54
|
+
default:
|
|
55
|
+
return 'string';
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
this.extractFieldTypes = (data) => {
|
|
59
|
+
if (!data || data.length === 0)
|
|
60
|
+
return [];
|
|
61
|
+
const sample = data[0];
|
|
62
|
+
return Object.entries(sample).map(([key, value]) => ({
|
|
63
|
+
name: key,
|
|
64
|
+
type: Array.isArray(value) ? 'array' : typeof value
|
|
65
|
+
}));
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
const DeveloperEngine = new DeveloperEngineClass();
|
|
70
|
+
exports.default = DeveloperEngine;
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const openai_1 = __importDefault(require("openai"));
|
|
16
|
+
const zod_1 = require("openai/helpers/zod");
|
|
17
|
+
const zod_2 = require("zod");
|
|
18
|
+
const baseProducersSystemPrompt = `
|
|
19
|
+
# TASK
|
|
20
|
+
You are an agent tasked with creating the mapping between an INPUT DATA SPEC and one or more OUTPUT DATA SPEC.
|
|
21
|
+
The mapping between the two is made by creating PRODUCERS and CONSUMERS.
|
|
22
|
+
You are tasked with creating the PRODUCER(S) that will then be used.
|
|
23
|
+
|
|
24
|
+
# PRODUCERS
|
|
25
|
+
A producer maps directly to a dataset and exposes it's dimensions.
|
|
26
|
+
|
|
27
|
+
## FIELDS
|
|
28
|
+
- alias: the reference to the column or property name if different from the desired name property
|
|
29
|
+
- classification: make your best guess if the field falls under any of these regulations
|
|
30
|
+
|
|
31
|
+
# RULES
|
|
32
|
+
- Add only the required fields to comply with the OUTPUT DATA SPEC
|
|
33
|
+
- Add fields that you think are important
|
|
34
|
+
|
|
35
|
+
# FORMAT
|
|
36
|
+
The INPUT DATA SPEC will be the first 10 rows from the dataset that the producer needs to map to.
|
|
37
|
+
The OUTPUT DATA SPEC is a JSON of the desired output that the consumer needs to return.
|
|
38
|
+
The result must be returned as a JSON object.
|
|
39
|
+
|
|
40
|
+
# INPUT DATA SPEC
|
|
41
|
+
{{input data spec}}
|
|
42
|
+
|
|
43
|
+
# OUTPUT DATA SPEC
|
|
44
|
+
{{output data spec}}
|
|
45
|
+
`;
|
|
46
|
+
const baseConsumersSystemPrompt = `
|
|
47
|
+
# TASK
|
|
48
|
+
You are an agent tasked with creating the mapping between a list of PRODUCERS and one or more OUTPUT DATA SPEC.
|
|
49
|
+
You are going to receive a list of PRODUCERS that expose some dimensions, and you will create the CONSUMERS that comply with the provided OUTPUT DATA SPEC.
|
|
50
|
+
|
|
51
|
+
# CONSUMERS
|
|
52
|
+
A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
|
|
53
|
+
|
|
54
|
+
## FIELDS
|
|
55
|
+
- fields.grouping: used when you need to create a nested data structure with many items under a single key. If no groupingKey is used, then don't add this field.
|
|
56
|
+
- fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
|
|
57
|
+
|
|
58
|
+
# RULES
|
|
59
|
+
- If a field is not needed, do not add it e.g. if there is no need for the "grouping" property, then don't add it.
|
|
60
|
+
- Only import a producer once
|
|
61
|
+
|
|
62
|
+
# FORMAT
|
|
63
|
+
The INPUT DATA SPEC will be the first 10 rows from the dataset that the producer needs to map to.
|
|
64
|
+
The OUTPUT DATA SPEC is a JSON of the desired output that the consumer needs to return.
|
|
65
|
+
The result must be returned as a JSON object.
|
|
66
|
+
|
|
67
|
+
# PRODUCERS
|
|
68
|
+
{{producers}}
|
|
69
|
+
|
|
70
|
+
# OUTPUT DATA SPEC
|
|
71
|
+
{{output data spec}}
|
|
72
|
+
|
|
73
|
+
# EXAMPLES
|
|
74
|
+
producers: """
|
|
75
|
+
{
|
|
76
|
+
"name": "claim",
|
|
77
|
+
"dimensions": [
|
|
78
|
+
{
|
|
79
|
+
"name": "id",
|
|
80
|
+
"type": "string",
|
|
81
|
+
"pk": true
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
"name": "amount",
|
|
85
|
+
"type": "number"
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
"name": "provider",
|
|
89
|
+
"type": "string"
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"name": "date",
|
|
93
|
+
"type": "datetime"
|
|
94
|
+
}
|
|
95
|
+
]
|
|
96
|
+
}
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
output data spec: """
|
|
100
|
+
{
|
|
101
|
+
"name": "claim",
|
|
102
|
+
"fields": [
|
|
103
|
+
{
|
|
104
|
+
"name": "id",
|
|
105
|
+
"type": "string",
|
|
106
|
+
"pk": true
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
"name": "amount",
|
|
110
|
+
"type": "number"
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"name": "provider",
|
|
114
|
+
"type": "string"
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
"name": "date",
|
|
118
|
+
"type": "datetime"
|
|
119
|
+
}
|
|
120
|
+
]
|
|
121
|
+
}
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
resulting consumer: """
|
|
125
|
+
{
|
|
126
|
+
"name": "claims",
|
|
127
|
+
"fields": [
|
|
128
|
+
{
|
|
129
|
+
"key": "id"
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"key": "amount"
|
|
133
|
+
},
|
|
134
|
+
{
|
|
135
|
+
"key": "provider"
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
"key": "date",
|
|
139
|
+
"alias": "Creation date"
|
|
140
|
+
}
|
|
141
|
+
],
|
|
142
|
+
"outputs": [
|
|
143
|
+
{
|
|
144
|
+
"format": "JSON"
|
|
145
|
+
}
|
|
146
|
+
],
|
|
147
|
+
"producers": [
|
|
148
|
+
{
|
|
149
|
+
"name": "claim"
|
|
150
|
+
}
|
|
151
|
+
]
|
|
152
|
+
}
|
|
153
|
+
"""
|
|
154
|
+
`;
|
|
155
|
+
const baseQASystemPrompt = `
|
|
156
|
+
# TASK
|
|
157
|
+
You are an agent tasked with ensuring that the CONSUMER(S) created follow the guidelines given.
|
|
158
|
+
You are going to receive a list of CONSUMERS and you need to return in the correct JSON format the same CONSUMERS with the needed updates to ensure that they follow all the rules.
|
|
159
|
+
|
|
160
|
+
# CONSUMER DEFINITION
|
|
161
|
+
A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
|
|
162
|
+
|
|
163
|
+
## FIELDS
|
|
164
|
+
- fields.grouping: used when you need to create a nested data structure with many items under a single key. If no groupingKey is used, then don't add this field.
|
|
165
|
+
- fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
|
|
166
|
+
|
|
167
|
+
# RULES
|
|
168
|
+
- If a field is not needed, do not add it e.g. if there is no need for the "grouping" property, then don't add it.
|
|
169
|
+
- Only import a producer once
|
|
170
|
+
|
|
171
|
+
# CONSUMERS
|
|
172
|
+
{{consumers}}
|
|
173
|
+
`;
|
|
174
|
+
class LLM {
|
|
175
|
+
constructor() {
|
|
176
|
+
this.inferProducers = (input, outputs) => __awaiter(this, void 0, void 0, function* () {
|
|
177
|
+
let systemPrompt = baseProducersSystemPrompt;
|
|
178
|
+
systemPrompt = systemPrompt.replace('{{input data spec}}', input.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
179
|
+
systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
180
|
+
const res = yield this._client.beta.chat.completions.parse({
|
|
181
|
+
model: 'o3-mini',
|
|
182
|
+
messages: [
|
|
183
|
+
{ role: 'system', content: systemPrompt }
|
|
184
|
+
],
|
|
185
|
+
response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
|
|
186
|
+
producers: zod_2.z.array(zod_2.z.object({
|
|
187
|
+
name: zod_2.z.string(),
|
|
188
|
+
description: zod_2.z.string(),
|
|
189
|
+
dimensions: zod_2.z.array(zod_2.z.object({
|
|
190
|
+
name: zod_2.z.string(),
|
|
191
|
+
alias: zod_2.z.string().optional(),
|
|
192
|
+
description: zod_2.z.string().optional(),
|
|
193
|
+
type: zod_2.z.enum(['string', 'number', 'datetime']),
|
|
194
|
+
pk: zod_2.z.boolean().optional(),
|
|
195
|
+
classification: zod_2.z.array(zod_2.z.enum(['PHI', 'PII', 'GDPR'])).optional()
|
|
196
|
+
}))
|
|
197
|
+
}))
|
|
198
|
+
}), 'environment')
|
|
199
|
+
});
|
|
200
|
+
const msg = res.choices[0].message;
|
|
201
|
+
return msg.parsed;
|
|
202
|
+
});
|
|
203
|
+
this.inferConsumers = (producers, outputs) => __awaiter(this, void 0, void 0, function* () {
|
|
204
|
+
let systemPrompt = baseConsumersSystemPrompt;
|
|
205
|
+
systemPrompt = systemPrompt.replace('{{producers}}', producers.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
206
|
+
systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
207
|
+
const item = {
|
|
208
|
+
model: 'o3-mini',
|
|
209
|
+
messages: [
|
|
210
|
+
{ role: 'system', content: systemPrompt }
|
|
211
|
+
],
|
|
212
|
+
response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
|
|
213
|
+
consumers: zod_2.z.array(zod_2.z.object({
|
|
214
|
+
name: zod_2.z.string(),
|
|
215
|
+
description: zod_2.z.string(),
|
|
216
|
+
producers: zod_2.z.array(zod_2.z.object({
|
|
217
|
+
name: zod_2.z.string().describe('References one of the producers. Must be unique, there can\'t be two entry with the same name.'),
|
|
218
|
+
joins: zod_2.z.array(zod_2.z.object({
|
|
219
|
+
otherName: zod_2.z.string(),
|
|
220
|
+
relationship: zod_2.z.enum(['one-to-one', 'one-to-many', 'many-to-one']),
|
|
221
|
+
sql: zod_2.z.string()
|
|
222
|
+
})).optional().describe('Which other producer to join this one with. Omit if empty.')
|
|
223
|
+
})),
|
|
224
|
+
fields: zod_2.z.array(zod_2.z.object({
|
|
225
|
+
key: zod_2.z.string(),
|
|
226
|
+
from: zod_2.z.string().optional(),
|
|
227
|
+
grouping: zod_2.z.object({
|
|
228
|
+
groupingKey: zod_2.z.string(),
|
|
229
|
+
subFields: zod_2.z.array(zod_2.z.lazy(() => zod_2.z.object({
|
|
230
|
+
key: zod_2.z.string(),
|
|
231
|
+
from: zod_2.z.string().optional()
|
|
232
|
+
})))
|
|
233
|
+
}).optional()
|
|
234
|
+
})),
|
|
235
|
+
outputs: zod_2.z.array(zod_2.z.object({
|
|
236
|
+
format: zod_2.z.enum(['SQL', 'API', 'CSV', 'PARQUET', 'JSON'])
|
|
237
|
+
}))
|
|
238
|
+
}))
|
|
239
|
+
}), 'environment')
|
|
240
|
+
};
|
|
241
|
+
const res = yield this._client.beta.chat.completions.parse(item);
|
|
242
|
+
const msg = res.choices[0].message;
|
|
243
|
+
const firstDraft = msg.parsed;
|
|
244
|
+
const qaSystemPrompt = baseQASystemPrompt.replace('{{consumers}}', firstDraft.consumers.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
245
|
+
const res2 = yield this._client.beta.chat.completions.parse({
|
|
246
|
+
model: 'o3-mini',
|
|
247
|
+
messages: [
|
|
248
|
+
{ role: 'system', content: qaSystemPrompt }
|
|
249
|
+
],
|
|
250
|
+
response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
|
|
251
|
+
consumers: zod_2.z.array(zod_2.z.object({
|
|
252
|
+
name: zod_2.z.string(),
|
|
253
|
+
description: zod_2.z.string(),
|
|
254
|
+
producers: zod_2.z.array(zod_2.z.object({
|
|
255
|
+
name: zod_2.z.string().describe('References one of the producers. Must be unique, there can\'t be two entry with the same name.'),
|
|
256
|
+
joins: zod_2.z.array(zod_2.z.object({
|
|
257
|
+
otherName: zod_2.z.string(),
|
|
258
|
+
relationship: zod_2.z.enum(['one-to-one', 'one-to-many', 'many-to-one']),
|
|
259
|
+
sql: zod_2.z.string()
|
|
260
|
+
})).optional().describe('Which other producer to join this one with. Omit if empty.')
|
|
261
|
+
})),
|
|
262
|
+
fields: zod_2.z.array(zod_2.z.object({
|
|
263
|
+
key: zod_2.z.string(),
|
|
264
|
+
from: zod_2.z.string().optional(),
|
|
265
|
+
grouping: zod_2.z.object({
|
|
266
|
+
groupingKey: zod_2.z.string().optional(),
|
|
267
|
+
subFields: zod_2.z.array(zod_2.z.lazy(() => zod_2.z.object({
|
|
268
|
+
key: zod_2.z.string(),
|
|
269
|
+
from: zod_2.z.string().optional()
|
|
270
|
+
}))).optional()
|
|
271
|
+
}).optional()
|
|
272
|
+
})),
|
|
273
|
+
outputs: zod_2.z.array(zod_2.z.object({
|
|
274
|
+
format: zod_2.z.enum(['SQL', 'API', 'CSV', 'PARQUET', 'JSON'])
|
|
275
|
+
}))
|
|
276
|
+
}))
|
|
277
|
+
}), 'environment')
|
|
278
|
+
});
|
|
279
|
+
const msg2 = res2.choices[0].message;
|
|
280
|
+
const finalDraft = msg2.parsed;
|
|
281
|
+
// Do some manual adjustments cause some things still don't work...
|
|
282
|
+
if (finalDraft && finalDraft.consumers) {
|
|
283
|
+
for (const cons of finalDraft.consumers) {
|
|
284
|
+
for (const field of cons.fields) {
|
|
285
|
+
if (field.grouping) {
|
|
286
|
+
if (!field.grouping.groupingKey || field.grouping.groupingKey.length === 0)
|
|
287
|
+
field.grouping = undefined;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
return finalDraft;
|
|
293
|
+
});
|
|
294
|
+
this._client = new openai_1.default({
|
|
295
|
+
apiKey: process.env.OPENAI_API_KEY
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
exports.default = LLM;
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
|
+
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
17
|
+
const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
|
|
18
|
+
const Helper_1 = __importDefault(require("../../helper/Helper"));
|
|
19
|
+
const DeploymentPlanner_1 = __importDefault(require("../deployment/DeploymentPlanner"));
|
|
20
|
+
const Environment_1 = __importDefault(require("../Environment"));
|
|
21
|
+
const ExecutionEnvironment_1 = __importDefault(require("../execution/ExecutionEnvironment"));
|
|
22
|
+
const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
|
|
23
|
+
const SQLUtils_1 = __importDefault(require("../sql/SQLUtils"));
|
|
24
|
+
const UsageManager_1 = __importDefault(require("../UsageManager"));
|
|
25
|
+
const ConsumerManager_1 = __importDefault(require("./ConsumerManager"));
|
|
26
|
+
class ConsumerEngineClass {
|
|
27
|
+
constructor() {
|
|
28
|
+
this.compile = (consumer) => {
|
|
29
|
+
var _a, _b;
|
|
30
|
+
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
31
|
+
const availableColumns = consumer.producers.flatMap((cProd) => {
|
|
32
|
+
var _a, _b;
|
|
33
|
+
const producer = Environment_1.default.getProducer(cProd.name);
|
|
34
|
+
if (!producer) {
|
|
35
|
+
const subConsumer = Environment_1.default.getConsumer(cProd.name);
|
|
36
|
+
(0, Affirm_1.default)(subConsumer, `No producer found with name "${cProd.name}"`);
|
|
37
|
+
return this.compile(subConsumer);
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
const dims = producer.dimensions.map(x => ({
|
|
41
|
+
consumerAlias: null,
|
|
42
|
+
consumerKey: null,
|
|
43
|
+
nameInProducer: x.name,
|
|
44
|
+
aliasInProducer: x.alias,
|
|
45
|
+
dimension: x,
|
|
46
|
+
owner: cProd.name
|
|
47
|
+
}));
|
|
48
|
+
const meas = (_b = (_a = producer.measures) === null || _a === void 0 ? void 0 : _a.map(x => ({
|
|
49
|
+
consumerAlias: null,
|
|
50
|
+
consumerKey: null,
|
|
51
|
+
nameInProducer: x.name,
|
|
52
|
+
aliasInProducer: x.name,
|
|
53
|
+
measure: x,
|
|
54
|
+
owner: cProd.name
|
|
55
|
+
}))) !== null && _b !== void 0 ? _b : [];
|
|
56
|
+
return [...dims, ...meas];
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
const selectedColumns = [];
|
|
60
|
+
const flat = ConsumerManager_1.default.getConsumerFlatFields(consumer);
|
|
61
|
+
for (let i = 0; i < flat.length; i++) {
|
|
62
|
+
const field = flat[i];
|
|
63
|
+
// TODO: replace with the new funcitons in the consumermanager to reduce diplicate code
|
|
64
|
+
if (field.key === '*') {
|
|
65
|
+
const from = (_a = field.from) !== null && _a !== void 0 ? _a : (consumer.producers.length === 1 ? consumer.producers[0].name : null);
|
|
66
|
+
availableColumns.filter(x => x.owner === from).forEach((col) => {
|
|
67
|
+
col.consumerKey = col.nameInProducer;
|
|
68
|
+
col.consumerAlias = col.nameInProducer;
|
|
69
|
+
selectedColumns.push(col);
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
else if (field.grouping) {
|
|
73
|
+
// This field should be ignored since it is only created when building the output for supported formats (json)
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
const col = ConsumerManager_1.default.searchFieldInColumns(field, availableColumns, consumer);
|
|
78
|
+
(0, Affirm_1.default)(col, `Consumer "${consumer.name}" misconfiguration: the requested field "${field.key}" is not found in any of the specified producers ("${consumer.producers.map(x => x.name).join(', ')}")`);
|
|
79
|
+
col.consumerKey = field.key;
|
|
80
|
+
col.consumerAlias = (_b = field.alias) !== null && _b !== void 0 ? _b : field.key;
|
|
81
|
+
selectedColumns.push(col);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
const columnsWithNoAlias = selectedColumns.filter(x => !x.consumerAlias || !x.consumerKey);
|
|
85
|
+
(0, Affirm_1.default)(columnsWithNoAlias.length === 0, `Consumer "${consumer.name}" compilation error: some selected fields don't have a correct alias or key (${columnsWithNoAlias.map(x => x.nameInProducer).join(', ')})`);
|
|
86
|
+
return selectedColumns;
|
|
87
|
+
};
|
|
88
|
+
this.deploy = (consumer) => __awaiter(this, void 0, void 0, function* () {
|
|
89
|
+
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
90
|
+
const firstProd = Environment_1.default.getFirstProducer(consumer.producers[0].name);
|
|
91
|
+
(0, Affirm_1.default)(firstProd, `Missing producer in consumer "${consumer.name}"`);
|
|
92
|
+
// For now we only support connecting producers of the same engine type to a consumer, so we give an error if we detect different ones
|
|
93
|
+
const allSources = consumer.producers.map(x => Environment_1.default.getSource(Environment_1.default.getProducer(x.name).source));
|
|
94
|
+
const uniqEngines = Algo_1.default.uniqBy(allSources, 'engine');
|
|
95
|
+
(0, Affirm_1.default)(uniqEngines.length === 1, `Sources with different engines were used in a single consumer (${uniqEngines.join(', ')})`);
|
|
96
|
+
// For now we also only support consumers that have producers ALL having the same exact source
|
|
97
|
+
const uniqNames = Algo_1.default.uniqBy(allSources, 'name');
|
|
98
|
+
(0, Affirm_1.default)(uniqNames.length === 1, `Producers with different sources were used in a single consumer (${uniqNames.join(', ')})`);
|
|
99
|
+
const source = Environment_1.default.getSource(firstProd.source);
|
|
100
|
+
const driver = yield DriverFactory_1.default.instantiateSource(source);
|
|
101
|
+
const plan = DeploymentPlanner_1.default.planConsumer(consumer);
|
|
102
|
+
for (const planStep of plan) {
|
|
103
|
+
switch (planStep.type) {
|
|
104
|
+
case 'create-materialized-view': {
|
|
105
|
+
const sql = SQLCompiler_1.default.compileConsumer(consumer);
|
|
106
|
+
(0, Affirm_1.default)(sql, `Invalid SQL from deployment compilation for consumer "${consumer.name}"`);
|
|
107
|
+
const internalSchema = Environment_1.default.get('schema');
|
|
108
|
+
(0, Affirm_1.default)(internalSchema, `Invalid schema set on the authentication for source "${source.name}"`);
|
|
109
|
+
// TODO When I want to update a materialize view there is no way except killing it and recreating it. The problem is that: 1) it is not said that it can be deleted since that materialize view could have some dependencies 2) we should find a way to update it without it going completely offline.
|
|
110
|
+
const mvSQL = `
|
|
111
|
+
DROP MATERIALIZED VIEW IF EXISTS "${internalSchema}"."${SQLUtils_1.default.acceleratedViewName(consumer.name)}";
|
|
112
|
+
CREATE MATERIALIZED VIEW "${internalSchema}"."${SQLUtils_1.default.acceleratedViewName(consumer.name)}" AS ${sql}`;
|
|
113
|
+
yield driver.execute(mvSQL);
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
case 'create-view': {
|
|
117
|
+
const sql = SQLCompiler_1.default.compileConsumer(consumer);
|
|
118
|
+
(0, Affirm_1.default)(sql, `Invalid SQL from deployment compilation for consumer "${consumer.name}"`);
|
|
119
|
+
const internalSchema = Environment_1.default.get('schema');
|
|
120
|
+
(0, Affirm_1.default)(internalSchema, `Invalid schema set on the authentication for source "${source.name}"`);
|
|
121
|
+
const vSQL = `CREATE OR REPLACE VIEW "${internalSchema}"."${SQLUtils_1.default.sanitizeName(consumer.name)}" AS ${sql}`;
|
|
122
|
+
yield driver.execute(vSQL);
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
default: throw new Error(`Invalid execution consumer plan step type "${planStep.type}"`);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
this.execute = (consumer, options, user) => __awaiter(this, void 0, void 0, function* () {
|
|
130
|
+
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
131
|
+
(0, Affirm_1.default)(options, `Invalid execute consume options`);
|
|
132
|
+
const { usageId } = UsageManager_1.default.startUsage(consumer, user);
|
|
133
|
+
try {
|
|
134
|
+
const execution = new ExecutionEnvironment_1.default(consumer);
|
|
135
|
+
const result = yield execution.run(options);
|
|
136
|
+
UsageManager_1.default.endUsage(usageId, result.data.length);
|
|
137
|
+
return result;
|
|
138
|
+
}
|
|
139
|
+
catch (error) {
|
|
140
|
+
UsageManager_1.default.failUsage(usageId, Helper_1.default.asError(error).message);
|
|
141
|
+
throw error;
|
|
142
|
+
}
|
|
143
|
+
});
|
|
144
|
+
this.getOutputShape = (consumer) => {
|
|
145
|
+
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
146
|
+
const compiled = this.compile(consumer);
|
|
147
|
+
const outDimensions = compiled.map(x => {
|
|
148
|
+
var _a;
|
|
149
|
+
return ({
|
|
150
|
+
name: (_a = x.consumerAlias) !== null && _a !== void 0 ? _a : x.consumerKey,
|
|
151
|
+
type: x.dimension.type,
|
|
152
|
+
classification: x.dimension.classification,
|
|
153
|
+
description: x.dimension.description,
|
|
154
|
+
mask: x.dimension.mask,
|
|
155
|
+
pk: x.dimension.pk
|
|
156
|
+
});
|
|
157
|
+
});
|
|
158
|
+
return {
|
|
159
|
+
_version: consumer._version,
|
|
160
|
+
name: consumer.name,
|
|
161
|
+
description: consumer.description,
|
|
162
|
+
metadata: consumer.metadata,
|
|
163
|
+
dimensions: outDimensions
|
|
164
|
+
};
|
|
165
|
+
};
|
|
166
|
+
/**
|
|
167
|
+
* Given a consumer, create the entire dependency chain of all the sub-consumers, producers and finally sources that are used by this consumer
|
|
168
|
+
*/
|
|
169
|
+
this.getDependencyChain = (consumer, depth = 0) => {
|
|
170
|
+
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
171
|
+
const chain = [];
|
|
172
|
+
for (let i = 0; i < consumer.producers.length; i++) {
|
|
173
|
+
const cProd = consumer.producers[i];
|
|
174
|
+
const producer = Environment_1.default.getProducer(cProd.name);
|
|
175
|
+
if (!producer) {
|
|
176
|
+
const subConsumer = Environment_1.default.getConsumer(cProd.name);
|
|
177
|
+
(0, Affirm_1.default)(subConsumer, `No producer found with name "${cProd.name}"`);
|
|
178
|
+
chain.push({
|
|
179
|
+
depth: depth,
|
|
180
|
+
from: { name: consumer.name, type: 'consumer' },
|
|
181
|
+
to: { name: subConsumer.name, type: 'consumer' }
|
|
182
|
+
});
|
|
183
|
+
if (subConsumer.producers && subConsumer.producers.length > 0)
|
|
184
|
+
return [...chain, ...this.getDependencyChain(subConsumer, depth + 1)];
|
|
185
|
+
}
|
|
186
|
+
else {
|
|
187
|
+
chain.push({
|
|
188
|
+
depth: depth,
|
|
189
|
+
from: { name: consumer.name, type: 'consumer' },
|
|
190
|
+
to: { name: producer.name, type: 'producer' }
|
|
191
|
+
});
|
|
192
|
+
chain.push({
|
|
193
|
+
depth: depth + 1,
|
|
194
|
+
from: { name: producer.name, type: 'producer' },
|
|
195
|
+
to: { name: producer.source, type: 'source' }
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
return chain.sort((a, b) => a.depth - b.depth);
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
const ConsumerEngine = new ConsumerEngineClass();
|
|
204
|
+
exports.default = ConsumerEngine;
|