@forzalabs/remora 1.0.13 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '1.0.1',
4
+ cliVersion: '1.0.14',
5
5
  backendVersion: 1,
6
6
  backendPort: 5088,
7
7
  workerVersion: 2,
@@ -12,6 +12,7 @@ const CONSTANTS = {
12
12
  */
13
13
  SOURCE_FILENAME_COLUMN: '$source_filename',
14
14
  defaults: {
15
+ REMORA_PATH: './remora',
15
16
  PRODUCER_TEMP_FOLDER: '.temp',
16
17
  SQL_MAX_QUERY_ROWS: 10000,
17
18
  STRING_MAX_CHARACTERS_LENGTH: 10000000,
@@ -20,17 +20,18 @@ class DatabaseEngineClass {
20
20
  this.MAX_TRY_CONNECTION = 3;
21
21
  this.db = () => this._db;
22
22
  this.connect = () => __awaiter(this, void 0, void 0, function* () {
23
- var _a;
24
23
  // WARNING: this was changed during the deployment to ECS...
25
24
  // I've reverted it, but maybe it needs to be changed or looked into...
25
+ var _a;
26
26
  this._uri = ((_a = process.env.MONGO_URI) !== null && _a !== void 0 ? _a : Helper_1.default.isDev())
27
27
  ? 'mongodb://mongo:27017/remora'
28
28
  : 'mongodb://localhost:27017/remora';
29
29
  this._client = new mongodb_1.MongoClient(this._uri);
30
30
  const errors = [];
31
+ this._client = new mongodb_1.MongoClient(this._uri);
31
32
  for (let i = 0; i < this.MAX_TRY_CONNECTION; i++) {
32
33
  try {
33
- console.log(`Attempting to connect to mongo: "${this._uri}" (${i})`);
34
+ console.log(`Attempting to connect to mongo "${this._uri}`);
34
35
  yield this._client.connect();
35
36
  this._db = this._client.db(Settings_1.default.db.name);
36
37
  this._connected = true;
@@ -57,29 +57,6 @@
57
57
  "additionalProperties": false
58
58
  }
59
59
  },
60
- "custom": {
61
- "type": "object",
62
- "description": "Custom code to extract fields from producers, transform them, and return results",
63
- "properties": {
64
- "language": {
65
- "type": "string",
66
- "enum": [
67
- "js",
68
- "python"
69
- ],
70
- "description": "The language of the custom code"
71
- },
72
- "code": {
73
- "type": "string",
74
- "description": "The custom code to execute"
75
- }
76
- },
77
- "required": [
78
- "language",
79
- "code"
80
- ],
81
- "additionalProperties": false
82
- },
83
60
  "union": {
84
61
  "type": "boolean",
85
62
  "description": "Merges the data from the various producers in a single dataset. They must have the same output dimensions. If true, then you can't set any joins on any producer, since all producers are merged in a single dataset."
@@ -392,6 +369,45 @@
392
369
  "distinct": {
393
370
  "type": "boolean",
394
371
  "description": "If true, then the result set will only contain DISTINCT values"
372
+ },
373
+ "distinctOn": {
374
+ "type": "object",
375
+ "description": "Performs a distinct operation on specific key(s) and applies collision resolution rules to determine which record to keep when duplicates are found",
376
+ "properties": {
377
+ "keys": {
378
+ "type": "array",
379
+ "items": {
380
+ "type": "string"
381
+ },
382
+ "minItems": 1,
383
+ "description": "The field(s) to use for determining uniqueness. Use the 'alias' if specified. Can be a single field or multiple fields for composite keys."
384
+ },
385
+ "resolution": {
386
+ "type": "object",
387
+ "description": "Rules to determine which record to keep when duplicates are found",
388
+ "properties": {
389
+ "strategy": {
390
+ "type": "string",
391
+ "enum": ["first", "last", "min", "max"],
392
+ "description": "Strategy for resolving which record to keep. 'first': Keep the first record based on orderBy field. 'last': Keep the last record based on orderBy field. 'min': Keep the record with the minimum value in orderBy field. 'max': Keep the record with the maximum value in orderBy field."
393
+ },
394
+ "orderBy": {
395
+ "type": "string",
396
+ "description": "Required for 'first', 'last', 'min', 'max' strategies. The field to use for ordering/comparison when selecting the record to keep."
397
+ },
398
+ "direction": {
399
+ "type": "string",
400
+ "enum": ["asc", "desc"],
401
+ "default": "asc",
402
+ "description": "For 'first' and 'last' strategies, the sort direction."
403
+ }
404
+ },
405
+ "required": ["strategy"],
406
+ "additionalProperties": false
407
+ }
408
+ },
409
+ "required": ["keys", "resolution"],
410
+ "additionalProperties": false
395
411
  }
396
412
  },
397
413
  "additionalProperties": false
@@ -177,8 +177,8 @@ class DeltaShareSourceDriver {
177
177
  .map(x => JSON.parse(x));
178
178
  return deltaLines;
179
179
  });
180
- this.ready = (producer) => __awaiter(this, void 0, void 0, function* () {
181
- void producer;
180
+ this.ready = (request) => __awaiter(this, void 0, void 0, function* () {
181
+ void request;
182
182
  throw new Error('DeltaShareSourceDriver.ready is not supported: Delta Sharing does not support readiness checks');
183
183
  });
184
184
  }
@@ -198,10 +198,10 @@ class HttpApiSourceDriver {
198
198
  }
199
199
  return itemsData;
200
200
  };
201
- this.ready = (producer) => {
202
- void producer;
201
+ this.ready = (request) => __awaiter(this, void 0, void 0, function* () {
202
+ void request;
203
203
  throw new Error('Not implemented yet');
204
- };
204
+ });
205
205
  }
206
206
  }
207
207
  exports.HttpApiSourceDriver = HttpApiSourceDriver;
@@ -175,8 +175,8 @@ class RedshiftDriver {
175
175
  }
176
176
  return records;
177
177
  };
178
- this.ready = (producer) => __awaiter(this, void 0, void 0, function* () {
179
- void producer;
178
+ this.ready = (request) => __awaiter(this, void 0, void 0, function* () {
179
+ void request;
180
180
  throw new Error('Not implemented yet');
181
181
  });
182
182
  }
@@ -57,7 +57,6 @@ const promises_1 = __importDefault(require("fs/promises"));
57
57
  const readline_1 = __importDefault(require("readline"));
58
58
  const path_1 = __importDefault(require("path"));
59
59
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
60
- const FileExporter_1 = __importDefault(require("../../engines/file/FileExporter"));
61
60
  const Logger_1 = __importDefault(require("../../helper/Logger"));
62
61
  class LocalDestinationDriver {
63
62
  constructor() {
@@ -70,57 +69,6 @@ class LocalDestinationDriver {
70
69
  this._path = source.authentication['path'];
71
70
  return this;
72
71
  });
73
- this.uploadFile = (options) => __awaiter(this, void 0, void 0, function* () {
74
- (0, Affirm_1.default)(this._path, 'Path not initialized');
75
- (0, Affirm_1.default)(options, 'Invalid upload options');
76
- (0, Affirm_1.default)(options.name, 'File name is required');
77
- (0, Affirm_1.default)(options.content != null, 'File content is required');
78
- const folder = this._path;
79
- try {
80
- if (!fs.existsSync(folder))
81
- fs.mkdirSync(folder, { recursive: true });
82
- const filePath = path_1.default.join(folder, options.name);
83
- fs.writeFileSync(filePath, options.content);
84
- return { bucket: folder, key: filePath, res: true };
85
- }
86
- catch (error) {
87
- throw new Error(`Failed to upload local file "${options.name}": ${error.message}`);
88
- }
89
- });
90
- this.uploadStream = (options) => __awaiter(this, void 0, void 0, function* () {
91
- (0, Affirm_1.default)(options, `Invalid upload options`);
92
- const { dataset, name, recordProjection } = options;
93
- (0, Affirm_1.default)(dataset, 'No streaming dataset');
94
- (0, Affirm_1.default)(name, 'No filename provided for upload stream');
95
- (0, Affirm_1.default)(recordProjection, 'No recordProjection for upload stream');
96
- const folder = this._path;
97
- try {
98
- if (!fs.existsSync(folder))
99
- fs.mkdirSync(folder, { recursive: true });
100
- const filePath = path_1.default.join(folder, options.name);
101
- fs.writeFileSync(filePath, '');
102
- yield dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
103
- const chunks = FileExporter_1.default.prepareBatch(batch, options);
104
- for (const chunk of chunks)
105
- fs.appendFileSync(filePath, chunk);
106
- }));
107
- return { bucket: folder, key: filePath, res: true };
108
- }
109
- catch (error) {
110
- // Clean up the partial file if it exists
111
- const filePath = path_1.default.join(folder, options.name);
112
- if (fs.existsSync(filePath)) {
113
- try {
114
- fs.unlinkSync(filePath);
115
- }
116
- catch (cleanupError) {
117
- console.error(`Failed to clean up partial file after error: ${cleanupError.message}`);
118
- throw cleanupError;
119
- }
120
- }
121
- throw new Error(`Failed to complete local multipart upload for "${options.name}": ${error.message}`);
122
- }
123
- });
124
72
  this.saveFile = (fileKey, content) => {
125
73
  (0, Affirm_1.default)(this._path, 'Path not initialized');
126
74
  (0, Affirm_1.default)(fileKey, 'Invalid file key');
@@ -150,9 +98,6 @@ class LocalDestinationDriver {
150
98
  const fileContent = yield s3Driver.downloadFile(sourceFileKey);
151
99
  yield this.saveFile(destinationFileKey, fileContent);
152
100
  });
153
- this.ready = (destinationPath) => __awaiter(this, void 0, void 0, function* () {
154
- return fs.createWriteStream(destinationPath);
155
- });
156
101
  this.move = (fromPath, toName) => __awaiter(this, void 0, void 0, function* () {
157
102
  try {
158
103
  const toFilePath = path_1.default.join(this._path, toName);
@@ -374,8 +374,9 @@ class LocalSourceDriver {
374
374
  }
375
375
  fs.renameSync(sourceFilePath, destinationFilePath);
376
376
  };
377
- this.ready = (producer) => __awaiter(this, void 0, void 0, function* () {
378
- (0, Affirm_1.default)(producer, 'Invalid producer');
377
+ this.ready = (request) => __awaiter(this, void 0, void 0, function* () {
378
+ (0, Affirm_1.default)(request, 'Invalid ready request');
379
+ const { producer } = request;
379
380
  // TODO: extra logic for encoded files (xml, xls, ...) to be decoded and prepared locally as a plain CSV
380
381
  // then return the uri to this new temporary file
381
382
  const { fileKey } = producer.settings;
@@ -8,6 +8,26 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
8
8
  step((generator = generator.apply(thisArg, _arguments || [])).next());
9
9
  });
10
10
  };
11
+ var __asyncValues = (this && this.__asyncValues) || function (o) {
12
+ if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
13
+ var m = o[Symbol.asyncIterator], i;
14
+ return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
15
+ function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
16
+ function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
17
+ };
18
+ var __await = (this && this.__await) || function (v) { return this instanceof __await ? (this.v = v, this) : new __await(v); }
19
+ var __asyncGenerator = (this && this.__asyncGenerator) || function (thisArg, _arguments, generator) {
20
+ if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
21
+ var g = generator.apply(thisArg, _arguments || []), i, q = [];
22
+ return i = Object.create((typeof AsyncIterator === "function" ? AsyncIterator : Object).prototype), verb("next"), verb("throw"), verb("return", awaitReturn), i[Symbol.asyncIterator] = function () { return this; }, i;
23
+ function awaitReturn(f) { return function (v) { return Promise.resolve(v).then(f, reject); }; }
24
+ function verb(n, f) { if (g[n]) { i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; if (f) i[n] = f(i[n]); } }
25
+ function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }
26
+ function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }
27
+ function fulfill(value) { resume("next", value); }
28
+ function reject(value) { resume("throw", value); }
29
+ function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }
30
+ };
11
31
  var __importDefault = (this && this.__importDefault) || function (mod) {
12
32
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
33
  };
@@ -15,7 +35,9 @@ Object.defineProperty(exports, "__esModule", { value: true });
15
35
  const client_s3_1 = require("@aws-sdk/client-s3");
16
36
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
17
37
  const SecretManager_1 = __importDefault(require("../../engines/SecretManager"));
18
- const FileExporter_1 = __importDefault(require("../../engines/file/FileExporter"));
38
+ const path_1 = __importDefault(require("path"));
39
+ const fs_1 = __importDefault(require("fs"));
40
+ const readline_1 = __importDefault(require("readline"));
19
41
  class S3DestinationDriver {
20
42
  constructor() {
21
43
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -33,32 +55,74 @@ class S3DestinationDriver {
33
55
  // TODO: is there a way to test if the connection was successful? like a query or scan that I can do?
34
56
  return this;
35
57
  });
36
- this.uploadFile = (options) => __awaiter(this, void 0, void 0, function* () {
37
- (0, Affirm_1.default)(options, `Invalid upload options`);
38
- const { content, name } = options;
39
- const commandParams = {
58
+ this.copyFromS3 = (sourceBucket, sourceFileKey, destinationFileKey) => __awaiter(this, void 0, void 0, function* () {
59
+ (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
60
+ (0, Affirm_1.default)(sourceBucket, 'Invalid source bucket');
61
+ (0, Affirm_1.default)(sourceFileKey, 'Invalid source file key');
62
+ (0, Affirm_1.default)(destinationFileKey, 'Invalid destination file key');
63
+ yield this._client.send(new client_s3_1.CopyObjectCommand({
64
+ CopySource: `${sourceBucket}/${sourceFileKey}`,
65
+ Bucket: this._bucketName,
66
+ Key: destinationFileKey
67
+ }));
68
+ });
69
+ this.saveFile = (fileKey, content) => __awaiter(this, void 0, void 0, function* () {
70
+ (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
71
+ (0, Affirm_1.default)(fileKey, 'Invalid file key');
72
+ (0, Affirm_1.default)(content, 'Invalid content');
73
+ yield this._client.send(new client_s3_1.PutObjectCommand({
40
74
  Bucket: this._bucketName,
41
- Key: name,
75
+ Key: fileKey,
42
76
  Body: content
77
+ }));
78
+ });
79
+ this.move = (fromPath, toName) => __awaiter(this, void 0, void 0, function* () {
80
+ (0, Affirm_1.default)(fromPath, 'Invalid source path');
81
+ (0, Affirm_1.default)(toName, 'Invalid destination name');
82
+ (0, Affirm_1.default)(fs_1.default.existsSync(fromPath), `Source file does not exist: ${fromPath}`);
83
+ const readStream = fs_1.default.createReadStream(fromPath);
84
+ return this._multipartUpload(toName, readStream);
85
+ });
86
+ this.transformAndMove = (fromPath, transform, toName) => __awaiter(this, void 0, void 0, function* () {
87
+ (0, Affirm_1.default)(fromPath, 'Invalid source path');
88
+ (0, Affirm_1.default)(transform, 'Invalid transform function');
89
+ (0, Affirm_1.default)(toName, 'Invalid destination name');
90
+ (0, Affirm_1.default)(fs_1.default.existsSync(fromPath), `Source file does not exist: ${fromPath}`);
91
+ const reader = fs_1.default.createReadStream(fromPath);
92
+ const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
93
+ const transformedLines = function () {
94
+ return __asyncGenerator(this, arguments, function* () {
95
+ var _a, e_1, _b, _c;
96
+ try {
97
+ for (var _d = true, lineReader_1 = __asyncValues(lineReader), lineReader_1_1; lineReader_1_1 = yield __await(lineReader_1.next()), _a = lineReader_1_1.done, !_a; _d = true) {
98
+ _c = lineReader_1_1.value;
99
+ _d = false;
100
+ const line = _c;
101
+ yield yield __await(transform(line) + '\n');
102
+ }
103
+ }
104
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
105
+ finally {
106
+ try {
107
+ if (!_d && !_a && (_b = lineReader_1.return)) yield __await(_b.call(lineReader_1));
108
+ }
109
+ finally { if (e_1) throw e_1.error; }
110
+ }
111
+ });
43
112
  };
44
- const command = new client_s3_1.PutObjectCommand(commandParams);
45
- const res = yield this._client.send(command);
46
- (0, Affirm_1.default)(res.$metadata.httpStatusCode === 200, `Failed to upload the file "${name}" to the bucket "${this._bucketName}": status code ${res.$metadata.httpStatusCode}`);
47
- return { res: true, key: name, bucket: this._bucketName };
113
+ return this._multipartUpload(toName, transformedLines());
48
114
  });
49
- this.uploadStream = (options) => __awaiter(this, void 0, void 0, function* () {
50
- (0, Affirm_1.default)(options, `Invalid upload options`);
51
- const { dataset, name, recordProjection } = options;
52
- (0, Affirm_1.default)(dataset, 'No streaming dataset');
53
- (0, Affirm_1.default)(name, 'No filename provided for upload stream');
54
- (0, Affirm_1.default)(recordProjection, 'No recordProjection for upload stream');
115
+ this._multipartUpload = (toName, dataSource) => __awaiter(this, void 0, void 0, function* () {
116
+ var _a, dataSource_1, dataSource_1_1;
117
+ var _b, e_2, _c, _d;
118
+ let uploadId;
55
119
  try {
56
120
  // Create the multipart upload
57
121
  const createMultipartUploadRes = yield this._client.send(new client_s3_1.CreateMultipartUploadCommand({
58
122
  Bucket: this._bucketName,
59
- Key: name
123
+ Key: toName
60
124
  }));
61
- const uploadId = createMultipartUploadRes.UploadId;
125
+ uploadId = createMultipartUploadRes.UploadId;
62
126
  (0, Affirm_1.default)(uploadId, 'Failed to initiate multipart upload');
63
127
  const uploadedParts = [];
64
128
  let partNumber = 1;
@@ -67,7 +131,7 @@ class S3DestinationDriver {
67
131
  const uploadPart = (buffer) => __awaiter(this, void 0, void 0, function* () {
68
132
  const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
69
133
  Bucket: this._bucketName,
70
- Key: name,
134
+ Key: toName,
71
135
  UploadId: uploadId,
72
136
  PartNumber: partNumber,
73
137
  Body: buffer
@@ -78,18 +142,28 @@ class S3DestinationDriver {
78
142
  });
79
143
  partNumber++;
80
144
  });
81
- yield dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
82
- const chunks = FileExporter_1.default.prepareBatch(batch, options);
83
- for (const chunk of chunks) {
84
- const chunkBuffer = Buffer.from(chunk);
145
+ try {
146
+ for (_a = true, dataSource_1 = __asyncValues(dataSource); dataSource_1_1 = yield dataSource_1.next(), _b = dataSource_1_1.done, !_b; _a = true) {
147
+ _d = dataSource_1_1.value;
148
+ _a = false;
149
+ const chunk = _d;
150
+ const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
85
151
  accumulatedBuffer = Buffer.concat([accumulatedBuffer, chunkBuffer]);
86
152
  // If accumulated buffer is at least 5MB, upload it as a part
87
- if (accumulatedBuffer.length >= MIN_PART_SIZE) {
88
- yield uploadPart(accumulatedBuffer);
89
- accumulatedBuffer = Buffer.alloc(0);
153
+ while (accumulatedBuffer.length >= MIN_PART_SIZE) {
154
+ const partBuffer = accumulatedBuffer.subarray(0, MIN_PART_SIZE);
155
+ accumulatedBuffer = accumulatedBuffer.subarray(MIN_PART_SIZE);
156
+ yield uploadPart(partBuffer);
90
157
  }
91
158
  }
92
- }));
159
+ }
160
+ catch (e_2_1) { e_2 = { error: e_2_1 }; }
161
+ finally {
162
+ try {
163
+ if (!_a && !_b && (_c = dataSource_1.return)) yield _c.call(dataSource_1);
164
+ }
165
+ finally { if (e_2) throw e_2.error; }
166
+ }
93
167
  // Upload any remaining data as the final part (even if smaller than 5MB)
94
168
  if (accumulatedBuffer.length > 0) {
95
169
  yield uploadPart(accumulatedBuffer);
@@ -97,63 +171,27 @@ class S3DestinationDriver {
97
171
  // Complete the multipart upload
98
172
  const completeRes = yield this._client.send(new client_s3_1.CompleteMultipartUploadCommand({
99
173
  Bucket: this._bucketName,
100
- Key: options.name,
174
+ Key: toName,
101
175
  UploadId: uploadId,
102
176
  MultipartUpload: {
103
177
  Parts: uploadedParts
104
178
  }
105
179
  }));
106
- (0, Affirm_1.default)(completeRes.$metadata.httpStatusCode === 200, `Failed to complete multipart upload for "${options.name}": status code ${completeRes.$metadata.httpStatusCode}`);
107
- return { res: true, key: options.name, bucket: this._bucketName };
180
+ (0, Affirm_1.default)(completeRes.$metadata.httpStatusCode === 200, `Failed to complete multipart upload for "${toName}": status code ${completeRes.$metadata.httpStatusCode}`);
181
+ return { res: true, key: path_1.default.join(this._bucketName, toName), bucket: this._bucketName };
108
182
  }
109
183
  catch (error) {
110
184
  // If anything fails, make sure to abort the multipart upload
111
- if (error.UploadId) {
185
+ if (uploadId) {
112
186
  yield this._client.send(new client_s3_1.AbortMultipartUploadCommand({
113
187
  Bucket: this._bucketName,
114
- Key: options.name,
115
- UploadId: error.UploadId
188
+ Key: toName,
189
+ UploadId: uploadId
116
190
  }));
117
191
  }
118
192
  throw error;
119
193
  }
120
194
  });
121
- this.copyFromS3 = (sourceBucket, sourceFileKey, destinationFileKey) => __awaiter(this, void 0, void 0, function* () {
122
- (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
123
- (0, Affirm_1.default)(sourceBucket, 'Invalid source bucket');
124
- (0, Affirm_1.default)(sourceFileKey, 'Invalid source file key');
125
- (0, Affirm_1.default)(destinationFileKey, 'Invalid destination file key');
126
- yield this._client.send(new client_s3_1.CopyObjectCommand({
127
- CopySource: `${sourceBucket}/${sourceFileKey}`,
128
- Bucket: this._bucketName,
129
- Key: destinationFileKey
130
- }));
131
- });
132
- this.saveFile = (fileKey, content) => __awaiter(this, void 0, void 0, function* () {
133
- (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
134
- (0, Affirm_1.default)(fileKey, 'Invalid file key');
135
- (0, Affirm_1.default)(content, 'Invalid content');
136
- yield this._client.send(new client_s3_1.PutObjectCommand({
137
- Bucket: this._bucketName,
138
- Key: fileKey,
139
- Body: content
140
- }));
141
- });
142
- this.ready = (destinationPath) => {
143
- void destinationPath;
144
- throw new Error('Not implemented yet');
145
- };
146
- this.move = (fromPath, toName) => {
147
- void fromPath;
148
- void toName;
149
- throw new Error('Not implemented yet');
150
- };
151
- this.transformAndMove = (fromPath, transform, toName) => {
152
- void fromPath;
153
- void toName;
154
- void transform;
155
- throw new Error('Not implemented yet');
156
- };
157
195
  }
158
196
  }
159
197
  exports.default = S3DestinationDriver;
@@ -22,8 +22,10 @@ Object.defineProperty(exports, "__esModule", { value: true });
22
22
  const client_s3_1 = require("@aws-sdk/client-s3");
23
23
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
24
24
  const SecretManager_1 = __importDefault(require("../../engines/SecretManager"));
25
+ const promises_1 = require("stream/promises");
25
26
  const readline_1 = __importDefault(require("readline"));
26
27
  const path_1 = __importDefault(require("path"));
28
+ const fs_1 = __importDefault(require("fs"));
27
29
  const Algo_1 = __importDefault(require("../../core/Algo"));
28
30
  const xlsx_1 = __importDefault(require("xlsx"));
29
31
  const XMLParser_1 = __importDefault(require("../../engines/parsing/XMLParser"));
@@ -33,6 +35,7 @@ const DriverHelper_1 = __importDefault(require("../DriverHelper"));
33
35
  const Logger_1 = __importDefault(require("../../helper/Logger"));
34
36
  const Constants_1 = __importDefault(require("../../Constants"));
35
37
  const XLSParser_1 = __importDefault(require("../../engines/parsing/XLSParser"));
38
+ const ExecutorScope_1 = __importDefault(require("../../executors/ExecutorScope"));
36
39
  class S3SourceDriver {
37
40
  constructor() {
38
41
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -446,10 +449,47 @@ class S3SourceDriver {
446
449
  Key: destinationFileKey
447
450
  }));
448
451
  });
449
- this.ready = (producer) => {
450
- void producer;
451
- throw new Error('Not implemented yet');
452
- };
452
+ this.ready = (request) => __awaiter(this, void 0, void 0, function* () {
453
+ (0, Affirm_1.default)(request, 'Invalid producer');
454
+ (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
455
+ const { producer, scope } = request;
456
+ const { fileKey } = producer.settings;
457
+ (0, Affirm_1.default)(fileKey, 'Invalid file key');
458
+ const streamToFile = (s3Key, localPath) => __awaiter(this, void 0, void 0, function* () {
459
+ const command = new client_s3_1.GetObjectCommand({
460
+ Bucket: this._bucketName,
461
+ Key: s3Key
462
+ });
463
+ const response = yield this._client.send(command);
464
+ (0, Affirm_1.default)(response.Body, `Failed to fetch object "${s3Key}" from S3`);
465
+ // Ensure the directory for the file exists
466
+ const fileDir = path_1.default.dirname(localPath);
467
+ if (!fs_1.default.existsSync(fileDir)) {
468
+ fs_1.default.mkdirSync(fileDir, { recursive: true });
469
+ }
470
+ const writeStream = fs_1.default.createWriteStream(localPath);
471
+ yield (0, promises_1.pipeline)(response.Body, writeStream);
472
+ });
473
+ if (fileKey.includes('%')) {
474
+ const allFileKeys = yield this.listFiles(fileKey);
475
+ Affirm_1.default.hasItems(allFileKeys, `The file key pattern "${fileKey}" doesn't have any matches in bucket "${this._bucketName}".`);
476
+ // Stream each file to local temp storage sequentially to avoid overwhelming the connection
477
+ const allFilePaths = [];
478
+ for (const s3Key of allFileKeys) {
479
+ const localPath = ExecutorScope_1.default.getProducerPath(scope, producer, s3Key);
480
+ ExecutorScope_1.default.ensurePath(localPath);
481
+ yield streamToFile(s3Key, localPath);
482
+ allFilePaths.push(localPath);
483
+ }
484
+ return { files: allFilePaths.map(x => ({ fullUri: x })) };
485
+ }
486
+ else {
487
+ const localPath = ExecutorScope_1.default.getProducerPath(scope, producer, fileKey);
488
+ ExecutorScope_1.default.ensurePath(localPath);
489
+ yield streamToFile(fileKey, localPath);
490
+ return { files: [{ fullUri: localPath }] };
491
+ }
492
+ });
453
493
  }
454
494
  }
455
495
  exports.default = S3SourceDriver;
@@ -45,7 +45,7 @@ class EnvironmentClass {
45
45
  }
46
46
  }
47
47
  catch (error) {
48
- throw new Error(`Error loading configuration from ${configPath}: ${error.message}`);
48
+ throw new Error(`Error loading from ${path_1.default.resolve(remoraPath)} configuration from ${configPath}: ${error.message}`);
49
49
  }
50
50
  }
51
51
  return configs;
@@ -46,6 +46,25 @@ class LineParserClass {
46
46
  throw new Error(`File type ${fileType} not implemented yet.`);
47
47
  }
48
48
  };
49
+ /**
50
+ * Used ONLY to parse internal records (inside the .dataset) since I know they are already prepared in the correct way
51
+ */
52
+ this._internalParseCSV = (line, fields, delimiter) => {
53
+ var _a, _b;
54
+ const parts = CSVParser_1.default.parseRow(line, delimiter !== null && delimiter !== void 0 ? delimiter : ',');
55
+ const record = {};
56
+ for (const [index, field] of fields.entries()) {
57
+ const fieldKey = field.finalKey;
58
+ record[fieldKey] = TypeCaster_1.default.cast(parts[index], (_b = (_a = field.dimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string');
59
+ }
60
+ return record;
61
+ };
62
+ /**
63
+ * Used ONLY to parse internal records (inside the .dataset) since I know they are already prepared in the correct way
64
+ */
65
+ this._internalParseJSON = (line) => {
66
+ return JSON.parse(line);
67
+ };
49
68
  }
50
69
  }
51
70
  const LineParser = new LineParserClass();
@@ -119,11 +119,11 @@ class ValidatorClass {
119
119
  // For now we only support connecting producers of the same engine type to a consumer, so we give an error if we detect different ones
120
120
  const uniqEngines = Algo_1.default.uniqBy(sources, 'engine');
121
121
  if (uniqEngines.length !== 1)
122
- errors.push(`Sources with different engines were used in the consumer "${consumer.name}" (${uniqEngines.join(', ')})`);
122
+ errors.push(`Sources with different engines are used in the consumer "${consumer.name}" (${uniqEngines.join(', ')})`);
123
123
  // For now we also only support consumers that have producers ALL having the same exact source
124
124
  const uniqNames = Algo_1.default.uniqBy(sources, 'name');
125
125
  if (uniqNames.length !== 1)
126
- errors.push(`Producers with different sources were used in the consumer "${consumer.name}" (${uniqNames.join(', ')})`);
126
+ errors.push(`Producers with different sources are used in the consumer "${consumer.name}" (${uniqNames.join(', ')})`);
127
127
  if (consumer.filters && consumer.filters.length > 0) {
128
128
  if (consumer.filters.some(x => x.sql && x.rule))
129
129
  errors.push(`A single consumer can't have both filters based on SQL and filters based on rules.`);
@@ -188,6 +188,11 @@ class ValidatorClass {
188
188
  errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
189
189
  }
190
190
  }
191
+ // Validate distinct
192
+ if (consumer.options) {
193
+ if (Algo_1.default.hasVal(consumer.options.distinct) && Algo_1.default.hasVal(consumer.options.distinctOn))
194
+ errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
195
+ }
191
196
  }
192
197
  catch (e) {
193
198
  if (errors.length === 0)