@forzalabs/remora 0.0.19 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,6 +52,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
52
52
  return (mod && mod.__esModule) ? mod : { "default": mod };
53
53
  };
54
54
  Object.defineProperty(exports, "__esModule", { value: true });
55
+ exports.LocalDriverDestination = void 0;
55
56
  const fs = __importStar(require("fs"));
56
57
  const path_1 = __importDefault(require("path"));
57
58
  const readline_1 = __importDefault(require("readline"));
@@ -60,9 +61,9 @@ class LocalDriver {
60
61
  constructor() {
61
62
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
62
63
  const fileURL = source.authentication['path'];
63
- (0, Affirm_1.default)(fileURL, 'file path is not entered in configuration');
64
+ (0, Affirm_1.default)(fileURL, `Missing file path in the authentication of source "${source.name}"`);
64
65
  const exist = fs.existsSync(fileURL);
65
- (0, Affirm_1.default)(exist, 'The path entered is incorrect: ' + fileURL);
66
+ (0, Affirm_1.default)(exist, `The path (${fileURL}) for source "${source.name}" does NOT exist.`);
66
67
  this._path = source.authentication['path'];
67
68
  return this;
68
69
  });
@@ -119,4 +120,71 @@ class LocalDriver {
119
120
  });
120
121
  }
121
122
  }
123
+ class LocalDriverDestination {
124
+ constructor() {
125
+ this.init = (source) => __awaiter(this, void 0, void 0, function* () {
126
+ (0, Affirm_1.default)(source, `Invalid source`);
127
+ const fileURL = source.authentication['path'];
128
+ (0, Affirm_1.default)(fileURL, `Missing file path in the authentication of source "${source.name}"`);
129
+ const exist = fs.existsSync(fileURL);
130
+ (0, Affirm_1.default)(exist, `The path (${fileURL}) for source "${source.name}" does NOT exist.`);
131
+ this._path = source.authentication['path'];
132
+ return this;
133
+ });
134
+ this.uploadFile = (options) => __awaiter(this, void 0, void 0, function* () {
135
+ (0, Affirm_1.default)(this._path, 'Path not initialized');
136
+ (0, Affirm_1.default)(options, 'Invalid upload options');
137
+ (0, Affirm_1.default)(options.name, 'File name is required');
138
+ (0, Affirm_1.default)(options.content != null, 'File content is required');
139
+ const folder = this._path;
140
+ try {
141
+ if (!fs.existsSync(folder))
142
+ fs.mkdirSync(folder, { recursive: true });
143
+ const filePath = path_1.default.join(folder, options.name);
144
+ fs.writeFileSync(filePath, options.content);
145
+ return { bucket: folder, key: filePath, res: true };
146
+ }
147
+ catch (error) {
148
+ throw new Error(`Failed to upload local file "${options.name}": ${error.message}`);
149
+ }
150
+ });
151
+ this.multipartUpload = (options) => __awaiter(this, void 0, void 0, function* () {
152
+ (0, Affirm_1.default)(this._path, 'Path not initialized');
153
+ (0, Affirm_1.default)(options, 'Invalid upload options');
154
+ (0, Affirm_1.default)(options.name, 'File name is required');
155
+ (0, Affirm_1.default)(options.contents && Array.isArray(options.contents), 'Contents must be an array');
156
+ (0, Affirm_1.default)(options.contents.length > 0, 'Contents array cannot be empty');
157
+ const folder = this._path;
158
+ try {
159
+ if (!fs.existsSync(folder)) {
160
+ fs.mkdirSync(folder, { recursive: true });
161
+ }
162
+ const filePath = path_1.default.join(folder, options.name);
163
+ // Create or truncate the file first
164
+ fs.writeFileSync(filePath, '');
165
+ // Append each chunk
166
+ for (const chunk of options.contents) {
167
+ (0, Affirm_1.default)(typeof chunk === 'string', 'Each chunk must be a string');
168
+ fs.appendFileSync(filePath, chunk);
169
+ }
170
+ return { bucket: folder, key: filePath, res: true };
171
+ }
172
+ catch (error) {
173
+ // Clean up the partial file if it exists
174
+ const filePath = path_1.default.join(folder, options.name);
175
+ if (fs.existsSync(filePath)) {
176
+ try {
177
+ fs.unlinkSync(filePath);
178
+ }
179
+ catch (cleanupError) {
180
+ console.error(`Failed to clean up partial file after error: ${cleanupError.message}`);
181
+ throw cleanupError;
182
+ }
183
+ }
184
+ throw new Error(`Failed to complete local multipart upload for "${options.name}": ${error.message}`);
185
+ }
186
+ });
187
+ }
188
+ }
189
+ exports.LocalDriverDestination = LocalDriverDestination;
122
190
  exports.default = LocalDriver;
@@ -45,6 +45,58 @@ class S3Driver {
45
45
  (0, Affirm_1.default)(res.$metadata.httpStatusCode === 200, `Failed to upload the file "${name}" to the bucket "${this._bucketName}": status code ${res.$metadata.httpStatusCode}`);
46
46
  return { res: true, key: name, bucket: this._bucketName };
47
47
  });
48
+ this.multipartUpload = (options) => __awaiter(this, void 0, void 0, function* () {
49
+ (0, Affirm_1.default)(options, `Invalid upload options`);
50
+ (0, Affirm_1.default)(options.contents && options.contents.length > 0, 'No contents provided for multipart upload');
51
+ (0, Affirm_1.default)(options.name, 'No filename provided for multipart upload');
52
+ try {
53
+ // Create the multipart upload
54
+ const createMultipartUploadRes = yield this._client.send(new client_s3_1.CreateMultipartUploadCommand({
55
+ Bucket: this._bucketName,
56
+ Key: options.name
57
+ }));
58
+ const uploadId = createMultipartUploadRes.UploadId;
59
+ (0, Affirm_1.default)(uploadId, 'Failed to initiate multipart upload');
60
+ // Upload each part
61
+ const uploadPromises = options.contents.map((content, index) => __awaiter(this, void 0, void 0, function* () {
62
+ const partNumber = index + 1;
63
+ const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
64
+ Bucket: this._bucketName,
65
+ Key: options.name,
66
+ UploadId: uploadId,
67
+ PartNumber: partNumber,
68
+ Body: Buffer.from(content)
69
+ }));
70
+ return {
71
+ PartNumber: partNumber,
72
+ ETag: uploadPartRes.ETag
73
+ };
74
+ }));
75
+ const uploadedParts = yield Promise.all(uploadPromises);
76
+ // Complete the multipart upload
77
+ const completeRes = yield this._client.send(new client_s3_1.CompleteMultipartUploadCommand({
78
+ Bucket: this._bucketName,
79
+ Key: options.name,
80
+ UploadId: uploadId,
81
+ MultipartUpload: {
82
+ Parts: uploadedParts
83
+ }
84
+ }));
85
+ (0, Affirm_1.default)(completeRes.$metadata.httpStatusCode === 200, `Failed to complete multipart upload for "${options.name}": status code ${completeRes.$metadata.httpStatusCode}`);
86
+ return { res: true, key: options.name, bucket: this._bucketName };
87
+ }
88
+ catch (error) {
89
+ // If anything fails, make sure to abort the multipart upload
90
+ if (error.UploadId) {
91
+ yield this._client.send(new client_s3_1.AbortMultipartUploadCommand({
92
+ Bucket: this._bucketName,
93
+ Key: options.name,
94
+ UploadId: error.UploadId
95
+ }));
96
+ }
97
+ throw error;
98
+ }
99
+ });
48
100
  }
49
101
  }
50
102
  exports.default = S3Driver;
@@ -0,0 +1,55 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ const Algo_1 = __importDefault(require("../core/Algo"));
7
+ const Helper_1 = __importDefault(require("../helper/Helper"));
8
+ class DataframeManagerClass {
9
+ fill(points, from, to, onlyLastValue, maintainLastValue) {
10
+ const min = from !== null && from !== void 0 ? from : this.getMinDate(points);
11
+ const max = to !== null && to !== void 0 ? to : this.getMaxDate(points);
12
+ const orderPoints = points.length > 0 ? Algo_1.default.orderBy(points, 'x') : [];
13
+ const filledPoints = [];
14
+ const currentDate = new Date(min);
15
+ while (currentDate <= max) {
16
+ const monthKey = Helper_1.default.formatDateToYYYYMM(currentDate);
17
+ filledPoints.push({ x: monthKey, y: 0 });
18
+ currentDate.setMonth(currentDate.getMonth() + 1);
19
+ }
20
+ for (let i = 0; i < orderPoints.length; i++) {
21
+ const point = orderPoints[i];
22
+ const date = new Date(point.x);
23
+ const filledPoint = filledPoints.find(x => x.x === Helper_1.default.formatDateToYYYYMM(date));
24
+ if (filledPoint) {
25
+ if (!onlyLastValue)
26
+ filledPoint.y += point.y;
27
+ else
28
+ filledPoint.y = point.y;
29
+ if (maintainLastValue) {
30
+ const index = filledPoints.findIndex(x => x.x === Helper_1.default.formatDateToYYYYMM(date));
31
+ for (let k = index; k < filledPoints.length; k++) {
32
+ const nextFilledPoint = filledPoints[k];
33
+ nextFilledPoint.y = filledPoint.y;
34
+ }
35
+ }
36
+ }
37
+ }
38
+ return filledPoints;
39
+ }
40
+ getMinDate(points) {
41
+ if (!points || points.length === 0) {
42
+ const currentDate = new Date();
43
+ return new Date(currentDate.getFullYear() - 1, currentDate.getMonth(), currentDate.getDate());
44
+ }
45
+ return points.reduce((min, point) => (new Date(point.x) < min ? new Date(point === null || point === void 0 ? void 0 : point.x) : min), new Date(points[0].x));
46
+ }
47
+ getMaxDate(points) {
48
+ if (!points || points.length === 0) {
49
+ return new Date();
50
+ }
51
+ return points.reduce((max, point) => (new Date(point.x) > max ? new Date(point.x) : max), new Date(points[0].x));
52
+ }
53
+ }
54
+ const DataframeManager = new DataframeManagerClass();
55
+ exports.default = DataframeManager;
@@ -7,7 +7,8 @@ const fs_1 = __importDefault(require("fs"));
7
7
  const path_1 = __importDefault(require("path"));
8
8
  const Affirm_1 = __importDefault(require("../core/Affirm"));
9
9
  const SchemaValidator_1 = __importDefault(require("./schema/SchemaValidator"));
10
- const Validator_1 = __importDefault(require("./Validator"));
10
+ const Validator_1 = __importDefault(require("./validation/Validator"));
11
+ const Constants_1 = __importDefault(require("../Constants"));
11
12
  class EnvironmentClass {
12
13
  constructor() {
13
14
  this._env = null;
@@ -71,9 +72,16 @@ class EnvironmentClass {
71
72
  if (!SchemaValidator_1.default.validate('consumer-schema', consumer))
72
73
  throw new Error(`Invalid consumer configuration: ${consumer.name}`);
73
74
  });
75
+ // Load the project settings
76
+ const envSettings = new Map(Object.entries(Object.assign({}, projectConfig.settings))
77
+ .map(([key, value]) => [key, String(value)]));
78
+ if (!envSettings.has('SQL_MAX_QUERY_ROWS'))
79
+ envSettings.set('SQL_MAX_QUERY_ROWS', Constants_1.default.defaults.SQL_MAX_QUERY_ROWS.toString());
80
+ if (!envSettings.has('STRING_MAX_CHARACTERS_LENGTH'))
81
+ envSettings.set('STRING_MAX_CHARACTERS_LENGTH', Constants_1.default.defaults.STRING_MAX_CHARACTERS_LENGTH.toString());
74
82
  // Initialize environment
75
83
  this.init({
76
- settings: new Map(Object.entries(Object.assign({}, projectConfig.settings)).map(([key, value]) => [key, String(value)])),
84
+ settings: envSettings,
77
85
  sources,
78
86
  producers,
79
87
  consumers,
@@ -93,7 +101,7 @@ class EnvironmentClass {
93
101
  */
94
102
  this.getFirstProducer = (producerName) => {
95
103
  (0, Affirm_1.default)(producerName, `Invalid producer name`);
96
- const prod = this._env.producers.find(x => x.name === producerName);
104
+ const prod = this._env.producers.find(x => x.name.toLocaleLowerCase() === producerName.toLowerCase());
97
105
  if (!prod) {
98
106
  const consumer = this.getConsumer(producerName);
99
107
  (0, Affirm_1.default)(consumer, `Invalid producer name`);
@@ -103,11 +111,11 @@ class EnvironmentClass {
103
111
  };
104
112
  this.getProducer = (producerName) => {
105
113
  (0, Affirm_1.default)(producerName, `Invalid producer name`);
106
- return this._env.producers.find(x => x.name === producerName);
114
+ return this._env.producers.find(x => x.name.toLowerCase() === producerName.toLowerCase());
107
115
  };
108
116
  this.getConsumer = (consumerName) => {
109
117
  (0, Affirm_1.default)(consumerName, `Invalid consumer name`);
110
- return this._env.consumers.find(x => x.name === consumerName);
118
+ return this._env.consumers.find(x => x.name.toLowerCase() === consumerName.toLowerCase());
111
119
  };
112
120
  this.getSchema = (schemaName) => {
113
121
  (0, Affirm_1.default)(schemaName, 'Invalid schema name');
@@ -82,6 +82,7 @@ class ProducerEngineClass {
82
82
  }
83
83
  });
84
84
  this.readFile = (producer, options) => __awaiter(this, void 0, void 0, function* () {
85
+ var _a;
85
86
  (0, Affirm_1.default)(producer, 'Invalid producer');
86
87
  (0, Affirm_1.default)(options, 'Invalid options');
87
88
  if (options.readmode === 'lines')
@@ -95,7 +96,7 @@ class ProducerEngineClass {
95
96
  lines = yield driver.readLinesInRange({ fileKey: producer.settings.fileKey, lineFrom: options.lines.from, lineTo: options.lines.to });
96
97
  else
97
98
  lines = [(yield driver.download({ fileKey: producer.settings.fileKey }))];
98
- switch (producer.settings.fileType.toUpperCase()) {
99
+ switch ((_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase()) {
99
100
  case 'CSV': {
100
101
  return { data: lines, dataType: 'lines-of-text' };
101
102
  }
@@ -0,0 +1,110 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
16
+ const DatabaseEngine_1 = __importDefault(require("../database/DatabaseEngine"));
17
+ const DataframeManager_1 = __importDefault(require("./DataframeManager"));
18
+ class UsageDataManager {
19
+ getUsageDetails() {
20
+ return __awaiter(this, void 0, void 0, function* () {
21
+ const now = DSTE_1.default.now();
22
+ const from = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
23
+ const prevMonthFrom = new Date(now.getTime() - 60 * 24 * 60 * 60 * 1000);
24
+ const yearAgo = new Date(now.getFullYear(), now.getMonth() - 11, 1);
25
+ const collection = 'usage';
26
+ // Aggregate status counts for current and previous month
27
+ const getStatusCounts = (start, end) => __awaiter(this, void 0, void 0, function* () {
28
+ const results = yield DatabaseEngine_1.default.aggregate(collection, [
29
+ { $match: { startedAt: { $gte: start, $lte: end } } },
30
+ { $group: { _id: '$status', count: { $sum: 1 } } }
31
+ ]);
32
+ let success = 0, failed = 0, total = 0;
33
+ results.forEach((r) => {
34
+ total += r.count;
35
+ if (r._id === 'success')
36
+ success = r.count;
37
+ if (r._id === 'failed')
38
+ failed = r.count;
39
+ });
40
+ return { total, success, failed };
41
+ });
42
+ const statusesRequests = yield getStatusCounts(from, now);
43
+ const prevStatusesRequests = yield getStatusCounts(prevMonthFrom, from);
44
+ // Monthly success and fails for last 12 months
45
+ const monthlySuccessPipeline = [
46
+ { $match: { status: 'success', startedAt: { $gte: yearAgo, $lte: now } } },
47
+ { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
48
+ { $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
49
+ { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
50
+ { $sort: { x: 1 } }
51
+ ];
52
+ const monthlyFailsPipeline = [
53
+ { $match: { status: 'failed', startedAt: { $gte: yearAgo, $lte: now } } },
54
+ { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
55
+ { $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
56
+ { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
57
+ { $sort: { x: 1 } }
58
+ ];
59
+ const rawMonthlySuccess = yield DatabaseEngine_1.default.aggregate(collection, monthlySuccessPipeline);
60
+ const rawMonthlyFails = yield DatabaseEngine_1.default.aggregate(collection, monthlyFailsPipeline);
61
+ // Top lines per month for last 12 months
62
+ const topLinesPipeline = [
63
+ { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
64
+ { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
65
+ { $group: { _id: { year: '$year', month: '$month' }, itemsCount: { $max: '$itemsCount' } } },
66
+ { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$itemsCount' } },
67
+ { $sort: { x: 1 } }
68
+ ];
69
+ const topLines = yield DatabaseEngine_1.default.aggregate(collection, topLinesPipeline);
70
+ // Top times per month for last 12 months
71
+ const topTimePipeline = [
72
+ { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
73
+ { $addFields: { durationMs: { $subtract: ['$finishedAt', '$startedAt'] }, year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
74
+ { $group: { _id: { year: '$year', month: '$month' }, maxDuration: { $max: '$durationMs' } } },
75
+ { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$maxDuration' } },
76
+ { $sort: { x: 1 } }
77
+ ];
78
+ const topTime = yield DatabaseEngine_1.default.aggregate(collection, topTimePipeline);
79
+ // Monthly consumers: for each consumer, per month count
80
+ const consumerPipeline = [
81
+ { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
82
+ { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
83
+ { $group: { _id: { consumer: '$consumer', year: '$year', month: '$month' }, count: { $sum: 1 } } },
84
+ { $project: { _id: 0, consumer: '$_id.consumer', x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
85
+ { $sort: { consumer: 1, x: 1 } }
86
+ ];
87
+ const consumersData = yield DatabaseEngine_1.default.aggregate(collection, consumerPipeline);
88
+ // transform to consumer array
89
+ const consumerMap = {};
90
+ consumersData.forEach((r) => {
91
+ consumerMap[r.consumer] = consumerMap[r.consumer] || [];
92
+ consumerMap[r.consumer].push({ x: r.x, y: r.y });
93
+ });
94
+ const consumers = Object.entries(consumerMap).map(([name, data]) => ({ name, data: DataframeManager_1.default.fill(data !== null && data !== void 0 ? data : [], yearAgo, now) }));
95
+ // Recent executions
96
+ const recentExecution = yield DatabaseEngine_1.default.query(collection, { startedAt: { $gte: from, $lte: now } }, { sort: { startedAt: -1 }, limit: 10 });
97
+ return {
98
+ statusesRequests,
99
+ prevStatusesRequests,
100
+ monthlySuccess: DataframeManager_1.default.fill(rawMonthlySuccess !== null && rawMonthlySuccess !== void 0 ? rawMonthlySuccess : [], yearAgo, now),
101
+ monthlyFails: DataframeManager_1.default.fill(rawMonthlyFails !== null && rawMonthlyFails !== void 0 ? rawMonthlyFails : [], yearAgo, now),
102
+ consumers: consumers,
103
+ topLine: DataframeManager_1.default.fill(topLines !== null && topLines !== void 0 ? topLines : [], yearAgo, now),
104
+ topTime: DataframeManager_1.default.fill(topTime !== null && topTime !== void 0 ? topTime : [], yearAgo, now),
105
+ recentExecution
106
+ };
107
+ });
108
+ }
109
+ }
110
+ exports.default = new UsageDataManager();
@@ -46,8 +46,8 @@ const DEV_USER = {
46
46
  const MOCK_USER = {
47
47
  _id: '__mock__',
48
48
  auth: { oid: '', provider: 'azure' },
49
- email: '',
50
- name: 'mock',
49
+ email: 'mock.user@email.com',
50
+ name: 'Mock User',
51
51
  roles: ['user'],
52
52
  _signature: '',
53
53
  lastLogin: new Date().toJSON()
@@ -120,7 +120,7 @@ class ValidatorClass {
120
120
  const groupingFields = fields.filter(x => x.grouping);
121
121
  if (groupingFields.length > 1)
122
122
  errors.push(`There can't be 2 fields with grouping defined at the same level (${groupingFields.map(x => x.key).join(', ')}). Level: ${level}`);
123
- groupingFields.forEach(field => {
123
+ groupingFields.forEach((field) => {
124
124
  if (field.grouping)
125
125
  errors = [...errors, ...validateGroupingLevels(field.grouping.subFields, level + 1)];
126
126
  });
@@ -20,11 +20,11 @@ class AutoMapperEngineClass {
20
20
  * input: the first ten lines of the uploaded file
21
21
  * outputs: the selected schemas
22
22
  */
23
- this.map = (input, outputs) => __awaiter(this, void 0, void 0, function* () {
23
+ this.map = (input, outputs, fileName, sources) => __awaiter(this, void 0, void 0, function* () {
24
24
  (0, Affirm_1.default)(input, 'Invalid input');
25
25
  (0, Affirm_1.default)(outputs, 'Invalid outputs');
26
26
  const llm = new LLM_1.default();
27
- const producersRes = yield llm.inferProducers(input, outputs);
27
+ const producersRes = yield llm.inferProducers(input, outputs, fileName, sources);
28
28
  const consumersRes = yield llm.inferConsumers(producersRes.producers, outputs);
29
29
  return {
30
30
  consumers: consumersRes.consumers,
package/engines/ai/LLM.js CHANGED
@@ -25,12 +25,16 @@ You are tasked with creating the PRODUCER(S) that will then be used.
25
25
  A producer maps directly to a dataset and exposes it's dimensions.
26
26
 
27
27
  ## FIELDS
28
- - alias: the reference to the column or property name if different from the desired name property
29
28
  - classification: make your best guess if the field falls under any of these regulations
30
29
 
31
30
  # RULES
32
31
  - Add only the required fields to comply with the OUTPUT DATA SPEC
33
32
  - Add fields that you think are important
33
+ - The name of the producer must be the same as the name of the dataset.
34
+ - Avoid creating multiple providers with similar data.
35
+ - Try to create the least number of providers
36
+ - Awlays include this exact property as the first -> "$schema": "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/producer-schema.json"
37
+ - Based on the producer select the source that makes the most sense to connect otherwise leave the string "<source_name>"
34
38
 
35
39
  # FORMAT
36
40
  The INPUT DATA SPEC will be the first 10 rows from the dataset that the producer needs to map to.
@@ -42,6 +46,12 @@ The result must be returned as a JSON object.
42
46
 
43
47
  # OUTPUT DATA SPEC
44
48
  {{output data spec}}
49
+
50
+ # File name
51
+ {{file name}}
52
+
53
+ # SOURCES
54
+ {{sources}}
45
55
  `;
46
56
  const baseConsumersSystemPrompt = `
47
57
  # TASK
@@ -52,12 +62,14 @@ You are going to receive a list of PRODUCERS that expose some dimensions, and yo
52
62
  A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
53
63
 
54
64
  ## FIELDS
55
- - fields.grouping: used when you need to create a nested data structure with many items under a single key. If no groupingKey is used, then don't add this field.
56
65
  - fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
57
66
 
58
67
  # RULES
59
- - If a field is not needed, do not add it e.g. if there is no need for the "grouping" property, then don't add it.
68
+ - If a field is not needed, do not add it e.g.
60
69
  - Only import a producer once
70
+ - Awlays include this exact property as the first -> $schema": "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/consumer-schema.json",
71
+ - Use "API" as the only valid output format.
72
+ - The "from" must contain only the name of the producer
61
73
 
62
74
  # FORMAT
63
75
  The INPUT DATA SPEC will be the first 10 rows from the dataset that the producer needs to map to.
@@ -141,7 +153,7 @@ resulting consumer: """
141
153
  ],
142
154
  "outputs": [
143
155
  {
144
- "format": "JSON"
156
+ "format": "API"
145
157
  }
146
158
  ],
147
159
  "producers": [
@@ -159,36 +171,45 @@ You are going to receive a list of CONSUMERS and you need to return in the corre
159
171
 
160
172
  # CONSUMER DEFINITION
161
173
  A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
162
-
163
174
  ## FIELDS
164
- - fields.grouping: used when you need to create a nested data structure with many items under a single key. If no groupingKey is used, then don't add this field.
165
175
  - fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
166
176
 
167
177
  # RULES
168
- - If a field is not needed, do not add it e.g. if there is no need for the "grouping" property, then don't add it.
178
+ - If a field is not needed, do not add it e.g.
169
179
  - Only import a producer once
180
+ - Awlays include this exact property as the first -> "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/consumer-schema.json",
181
+ - Use "API" as the only valid output format.
182
+ - The "from" must contain only the name of the producer
170
183
 
171
184
  # CONSUMERS
172
185
  {{consumers}}
173
186
  `;
174
187
  class LLM {
175
188
  constructor() {
176
- this.inferProducers = (input, outputs) => __awaiter(this, void 0, void 0, function* () {
189
+ this.inferProducers = (input, outputs, fileName, sources) => __awaiter(this, void 0, void 0, function* () {
177
190
  let systemPrompt = baseProducersSystemPrompt;
178
191
  systemPrompt = systemPrompt.replace('{{input data spec}}', input.map(x => `- ${JSON.stringify(x)}`).join('\n'));
179
192
  systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
193
+ systemPrompt = systemPrompt.replace('{{file name}}', fileName);
194
+ systemPrompt = systemPrompt.replace('{{sources}}', sources.map(x => `- ${JSON.stringify(x)}`).join('\n'));
180
195
  const res = yield this._client.beta.chat.completions.parse({
181
- model: 'o3-mini',
196
+ model: 'gpt-4o',
182
197
  messages: [
183
198
  { role: 'system', content: systemPrompt }
184
199
  ],
185
200
  response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
186
201
  producers: zod_2.z.array(zod_2.z.object({
202
+ $schema: zod_2.z.string().describe('The schema of the producer. This should always be the same.'),
187
203
  name: zod_2.z.string(),
188
204
  description: zod_2.z.string(),
189
205
  dimensions: zod_2.z.array(zod_2.z.object({
190
206
  name: zod_2.z.string(),
191
- alias: zod_2.z.string().optional(),
207
+ // alias: z.string().optional(),
208
+ source: zod_2.z.string().describe('The name of the source linked to this producer.'),
209
+ settings: zod_2.z.object({
210
+ fileKey: zod_2.z.string().describe('The name of the file'),
211
+ fileType: zod_2.z.string().describe('The file extension (CSV | JSONL | JSON)')
212
+ }),
192
213
  description: zod_2.z.string().optional(),
193
214
  type: zod_2.z.enum(['string', 'number', 'datetime']),
194
215
  pk: zod_2.z.boolean().optional(),
@@ -205,12 +226,13 @@ class LLM {
205
226
  systemPrompt = systemPrompt.replace('{{producers}}', producers.map(x => `- ${JSON.stringify(x)}`).join('\n'));
206
227
  systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
207
228
  const item = {
208
- model: 'o3-mini',
229
+ model: 'gpt-4o',
209
230
  messages: [
210
231
  { role: 'system', content: systemPrompt }
211
232
  ],
212
233
  response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
213
234
  consumers: zod_2.z.array(zod_2.z.object({
235
+ $schema: zod_2.z.string().describe('The schema of the consumer. This should always be the same.'),
214
236
  name: zod_2.z.string(),
215
237
  description: zod_2.z.string(),
216
238
  producers: zod_2.z.array(zod_2.z.object({
@@ -224,13 +246,14 @@ class LLM {
224
246
  fields: zod_2.z.array(zod_2.z.object({
225
247
  key: zod_2.z.string(),
226
248
  from: zod_2.z.string().optional(),
227
- grouping: zod_2.z.object({
228
- groupingKey: zod_2.z.string(),
229
- subFields: zod_2.z.array(zod_2.z.lazy(() => zod_2.z.object({
230
- key: zod_2.z.string(),
231
- from: zod_2.z.string().optional()
232
- })))
233
- }).optional()
249
+ alias: zod_2.z.string().optional()
250
+ // grouping: z.object({
251
+ // groupingKey: z.string(),
252
+ // subFields: z.array(z.lazy(() => z.object({
253
+ // key: z.string(),
254
+ // from: z.string().optional()
255
+ // })))
256
+ // }).optional()
234
257
  })),
235
258
  outputs: zod_2.z.array(zod_2.z.object({
236
259
  format: zod_2.z.enum(['SQL', 'API', 'CSV', 'PARQUET', 'JSON'])
@@ -243,12 +266,13 @@ class LLM {
243
266
  const firstDraft = msg.parsed;
244
267
  const qaSystemPrompt = baseQASystemPrompt.replace('{{consumers}}', firstDraft.consumers.map(x => `- ${JSON.stringify(x)}`).join('\n'));
245
268
  const res2 = yield this._client.beta.chat.completions.parse({
246
- model: 'o3-mini',
269
+ model: 'gpt-4o',
247
270
  messages: [
248
271
  { role: 'system', content: qaSystemPrompt }
249
272
  ],
250
273
  response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
251
274
  consumers: zod_2.z.array(zod_2.z.object({
275
+ $schema: zod_2.z.string().describe('The schema of the consumer. This should always be the same.'),
252
276
  name: zod_2.z.string(),
253
277
  description: zod_2.z.string(),
254
278
  producers: zod_2.z.array(zod_2.z.object({
@@ -262,13 +286,14 @@ class LLM {
262
286
  fields: zod_2.z.array(zod_2.z.object({
263
287
  key: zod_2.z.string(),
264
288
  from: zod_2.z.string().optional(),
265
- grouping: zod_2.z.object({
266
- groupingKey: zod_2.z.string().optional(),
267
- subFields: zod_2.z.array(zod_2.z.lazy(() => zod_2.z.object({
268
- key: zod_2.z.string(),
269
- from: zod_2.z.string().optional()
270
- }))).optional()
271
- }).optional()
289
+ alias: zod_2.z.string().optional()
290
+ // grouping: z.object({
291
+ // groupingKey: z.string().optional(),
292
+ // subFields: z.array(z.lazy(() => z.object({
293
+ // key: z.string(),
294
+ // from: z.string().optional()
295
+ // }))).optional()
296
+ // }).optional()
272
297
  })),
273
298
  outputs: zod_2.z.array(zod_2.z.object({
274
299
  format: zod_2.z.enum(['SQL', 'API', 'CSV', 'PARQUET', 'JSON'])
@@ -113,7 +113,8 @@ class ConsumerManagerClass {
113
113
  else {
114
114
  const col = ConsumerManager.searchFieldInColumns(field.cField, availableColumns, consumer);
115
115
  (0, Affirm_1.default)(col, `Consumer "${consumer.name}" misconfiguration: the requested field "${field.cField.key}" is not found in any of the specified producers ("${consumer.producers.map(x => x.name).join(', ')}")`);
116
- expandedFields.push(field);
116
+ // TODO: CHECK THIS FIX IS GOOD
117
+ expandedFields.push(Object.assign(Object.assign({}, field), { dimension: col.dimension, measure: col.measure }));
117
118
  }
118
119
  return expandedFields;
119
120
  };